diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-23 19:11:50 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-23 19:11:50 -0800 |
commit | 3aacd625f20129f5a41ea3ff3b5353b0e4dabd01 (patch) | |
tree | 7cf4ea65397f80098b30494df31cfc8f5fa26d63 | |
parent | 7e21774db5cc9cf8fe93a64a2f0c6cf47db8ab24 (diff) | |
parent | 2a1d689c9ba42a6066540fb221b6ecbd6298b728 (diff) | |
download | op-kernel-dev-3aacd625f20129f5a41ea3ff3b5353b0e4dabd01.zip op-kernel-dev-3aacd625f20129f5a41ea3ff3b5353b0e4dabd01.tar.gz |
Merge branch 'akpm' (incoming from Andrew)
Merge second patch-bomb from Andrew Morton:
- various misc bits
- the rest of MM
- add generic fixmap.h, use it
- backlight updates
- dynamic_debug updates
- printk() updates
- checkpatch updates
- binfmt_elf
- ramfs
- init/
- autofs4
- drivers/rtc
- nilfs
- hfsplus
- Documentation/
- coredump
- procfs
- fork
- exec
- kexec
- kdump
- partitions
- rapidio
- rbtree
- userns
- memstick
- w1
- decompressors
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (197 commits)
lib/decompress_unlz4.c: always set an error return code on failures
romfs: fix returm err while getting inode in fill_super
drivers/w1/masters/w1-gpio.c: add strong pullup emulation
drivers/memstick/host/rtsx_pci_ms.c: fix ms card data transfer bug
userns: relax the posix_acl_valid() checks
arch/sh/kernel/dwarf.c: use rbtree postorder iteration helper instead of solution using repeated rb_erase()
fs-ext3-use-rbtree-postorder-iteration-helper-instead-of-opencoding-fix
fs/ext3: use rbtree postorder iteration helper instead of opencoding
fs/jffs2: use rbtree postorder iteration helper instead of opencoding
fs/ext4: use rbtree postorder iteration helper instead of opencoding
fs/ubifs: use rbtree postorder iteration helper instead of opencoding
net/netfilter/ipset/ip_set_hash_netiface.c: use rbtree postorder iteration instead of opencoding
rbtree/test: test rbtree_postorder_for_each_entry_safe()
rbtree/test: move rb_node to the middle of the test struct
rapidio: add modular rapidio core build into powerpc and mips branches
partitions/efi: complete documentation of gpt kernel param purpose
kdump: add /sys/kernel/vmcoreinfo ABI documentation
kdump: fix exported size of vmcoreinfo note
kexec: add sysctl to disable kexec_load
fs/exec.c: call arch_pick_mmap_layout() only once
...
231 files changed, 3339 insertions, 2187 deletions
diff --git a/Documentation/ABI/testing/sysfs-kernel-vmcoreinfo b/Documentation/ABI/testing/sysfs-kernel-vmcoreinfo new file mode 100644 index 0000000..7bd81168 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-vmcoreinfo @@ -0,0 +1,14 @@ +What: /sys/kernel/vmcoreinfo +Date: October 2007 +KernelVersion: 2.6.24 +Contact: Ken'ichi Ohmichi <oomichi@mxs.nes.nec.co.jp> + Kexec Mailing List <kexec@lists.infradead.org> + Vivek Goyal <vgoyal@redhat.com> +Description + Shows physical address and size of vmcoreinfo ELF note. + First value contains physical address of note in hex and + second value contains the size of note in hex. This ELF + note info is parsed by second kernel and exported to user + space as part of ELF note in /proc/vmcore file. This note + contains various information like struct size, symbol + values, page size etc. diff --git a/Documentation/blockdev/ramdisk.txt b/Documentation/blockdev/ramdisk.txt index fa72e97..fe2ef97 100644 --- a/Documentation/blockdev/ramdisk.txt +++ b/Documentation/blockdev/ramdisk.txt @@ -36,21 +36,30 @@ allowing one to squeeze more programs onto an average installation or rescue floppy disk. -2) Kernel Command Line Parameters +2) Parameters --------------------------------- +2a) Kernel Command Line Parameters + ramdisk_size=N ============== This parameter tells the RAM disk driver to set up RAM disks of N k size. The -default is 4096 (4 MB) (8192 (8 MB) on S390). +default is 4096 (4 MB). + +2b) Module parameters - ramdisk_blocksize=N - =================== + rd_nr + ===== + /dev/ramX devices created. -This parameter tells the RAM disk driver how many bytes to use per block. The -default is 1024 (BLOCK_SIZE). + max_part + ======== + Maximum partition number. + rd_size + ======= + See ramdisk_size. 3) Using "rdev -r" ------------------ diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index 8cb9938..be675d2 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt @@ -285,7 +285,7 @@ A: This is what you would need in your kernel code to receive notifications. return NOTIFY_OK; } - static struct notifier_block foobar_cpu_notifer = + static struct notifier_block foobar_cpu_notifier = { .notifier_call = foobar_cpu_callback, }; diff --git a/Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt b/Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt new file mode 100644 index 0000000..31406fd --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt @@ -0,0 +1,27 @@ +Haoyu Microelectronics HYM8563 Real Time Clock + +The HYM8563 provides basic rtc and alarm functionality +as well as a clock output of up to 32kHz. + +Required properties: +- compatible: should be: "haoyu,hym8563" +- reg: i2c address +- interrupts: rtc alarm/event interrupt +- #clock-cells: the value should be 0 + +Example: + +hym8563: hym8563@51 { + compatible = "haoyu,hym8563"; + reg = <0x51>; + + interrupts = <13 IRQ_TYPE_EDGE_FALLING>; + + #clock-cells = <0>; +}; + +device { +... + clocks = <&hym8563>; +... +}; diff --git a/Documentation/devicetree/bindings/rtc/maxim,ds1742.txt b/Documentation/devicetree/bindings/rtc/maxim,ds1742.txt new file mode 100644 index 0000000..d0f937c --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/maxim,ds1742.txt @@ -0,0 +1,12 @@ +* Maxim (Dallas) DS1742/DS1743 Real Time Clock + +Required properties: +- compatible: Should contain "maxim,ds1742". +- reg: Physical base address of the RTC and length of memory + mapped region. + +Example: + rtc: rtc@10000000 { + compatible = "maxim,ds1742"; + reg = <0x10000000 0x800>; + }; diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index ff415d1..520596d 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -34,6 +34,7 @@ fsl Freescale Semiconductor GEFanuc GE Fanuc Intelligent Platforms Embedded Systems, Inc. gef GE Fanuc Intelligent Platforms Embedded Systems, Inc. gmt Global Mixed-mode Technology, Inc. +haoyu Haoyu Microelectronic Co. Ltd. hisilicon Hisilicon Limited. hp Hewlett Packard ibm International Business Machines (IBM) diff --git a/Documentation/dynamic-debug-howto.txt b/Documentation/dynamic-debug-howto.txt index 1bbdcfc..46325eb 100644 --- a/Documentation/dynamic-debug-howto.txt +++ b/Documentation/dynamic-debug-howto.txt @@ -108,6 +108,12 @@ If your query set is big, you can batch them too: ~# cat query-batch-file > <debugfs>/dynamic_debug/control +A another way is to use wildcard. The match rule support '*' (matches +zero or more characters) and '?' (matches exactly one character).For +example, you can match all usb drivers: + + ~# echo "file drivers/usb/* +p" > <debugfs>/dynamic_debug/control + At the syntactical level, a command comprises a sequence of match specifications, followed by a flags change specification. @@ -315,6 +321,9 @@ nullarbor:~ # echo -n 'func svc_process -p' > nullarbor:~ # echo -n 'format "nfsd: READ" +p' > <debugfs>/dynamic_debug/control +// enable messages in files of which the pathes include string "usb" +nullarbor:~ # echo -n '*usb* +p' > <debugfs>/dynamic_debug/control + // enable all messages nullarbor:~ # echo -n '+p' > <debugfs>/dynamic_debug/control diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 8042050..632211c 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX @@ -10,24 +10,32 @@ afs.txt - info and examples for the distributed AFS (Andrew File System) fs. affs.txt - info and mount options for the Amiga Fast File System. +autofs4-mount-control.txt + - info on device control operations for autofs4 module. automount-support.txt - information about filesystem automount support. befs.txt - information about the BeOS filesystem for Linux. bfs.txt - info for the SCO UnixWare Boot Filesystem (BFS). +btrfs.txt + - info for the BTRFS filesystem. +caching/ + - directory containing filesystem cache documentation. ceph.txt - - info for the Ceph Distributed File System -cifs.txt - - description of the CIFS filesystem. + - info for the Ceph Distributed File System. +cifs/ + - directory containing CIFS filesystem documentation and example code. coda.txt - description of the CODA filesystem. configfs/ - directory containing configfs documentation and example code. cramfs.txt - info on the cram filesystem for small storage (ROMs etc). -dentry-locking.txt - - info on the RCU-based dcache locking model. +debugfs.txt + - info on the debugfs filesystem. +devpts.txt + - info on the devpts filesystem. directory-locking - info about the locking scheme used for directory operations. dlmfs.txt @@ -35,7 +43,7 @@ dlmfs.txt dnotify.txt - info about directory notification in Linux. dnotify_test.c - - example program for dnotify + - example program for dnotify. ecryptfs.txt - docs on eCryptfs: stacked cryptographic filesystem for Linux. efivarfs.txt @@ -48,12 +56,18 @@ ext3.txt - info, mount options and specifications for the Ext3 filesystem. ext4.txt - info, mount options and specifications for the Ext4 filesystem. -files.txt - - info on file management in the Linux kernel. f2fs.txt - info and mount options for the F2FS filesystem. +fiemap.txt + - info on fiemap ioctl. +files.txt + - info on file management in the Linux kernel. fuse.txt - info on the Filesystem in User SpacE including mount options. +gfs2-glocks.txt + - info on the Global File System 2 - Glock internal locking rules. +gfs2-uevents.txt + - info on the Global File System 2 - uevents. gfs2.txt - info on the Global File System 2. hfs.txt @@ -84,40 +98,58 @@ ntfs.txt - info and mount options for the NTFS filesystem (Windows NT). ocfs2.txt - info and mount options for the OCFS2 clustered filesystem. +omfs.txt + - info on the Optimized MPEG FileSystem. +path-lookup.txt + - info on path walking and name lookup locking. +pohmelfs/ + - directory containing pohmelfs filesystem documentation. porting - various information on filesystem porting. proc.txt - info on Linux's /proc filesystem. +qnx6.txt + - info on the QNX6 filesystem. +quota.txt + - info on Quota subsystem. ramfs-rootfs-initramfs.txt - info on the 'in memory' filesystems ramfs, rootfs and initramfs. -reiser4.txt - - info on the Reiser4 filesystem based on dancing tree algorithms. relay.txt - info on relay, for efficient streaming from kernel to user space. romfs.txt - description of the ROMFS filesystem. seq_file.txt - - how to use the seq_file API + - how to use the seq_file API. sharedsubtree.txt - a description of shared subtrees for namespaces. spufs.txt - info and mount options for the SPU filesystem used on Cell. +squashfs.txt + - info on the squashfs filesystem. sysfs-pci.txt - info on accessing PCI device resources through sysfs. +sysfs-tagging.txt + - info on sysfs tagging to avoid duplicates. sysfs.txt - info on sysfs, a ram-based filesystem for exporting kernel objects. sysv-fs.txt - info on the SystemV/V7/Xenix/Coherent filesystem. tmpfs.txt - info on tmpfs, a filesystem that holds all files in virtual memory. +ubifs.txt + - info on the Unsorted Block Images FileSystem. udf.txt - info and mount options for the UDF filesystem. ufs.txt - info on the ufs filesystem. vfat.txt - - info on using the VFAT filesystem used in Windows NT and Windows 95 + - info on using the VFAT filesystem used in Windows NT and Windows 95. vfs.txt - - overview of the Virtual File System + - overview of the Virtual File System. +xfs-delayed-logging-design.txt + - info on the XFS Delayed Logging Design. +xfs-self-describing-metadata.txt + - info on XFS Self Describing Metadata. xfs.txt - info and mount options for the XFS filesystem. xip.txt diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt index 873a2ab..06887d4 100644 --- a/Documentation/filesystems/nilfs2.txt +++ b/Documentation/filesystems/nilfs2.txt @@ -81,6 +81,62 @@ nodiscard(*) The discard/TRIM commands are sent to the underlying block device when blocks are freed. This is useful for SSD devices and sparse/thinly-provisioned LUNs. +Ioctls +====== + +There is some NILFS2 specific functionality which can be accessed by applications +through the system call interfaces. The list of all NILFS2 specific ioctls are +shown in the table below. + +Table of NILFS2 specific ioctls +.............................................................................. + Ioctl Description + NILFS_IOCTL_CHANGE_CPMODE Change mode of given checkpoint between + checkpoint and snapshot state. This ioctl is + used in chcp and mkcp utilities. + + NILFS_IOCTL_DELETE_CHECKPOINT Remove checkpoint from NILFS2 file system. + This ioctl is used in rmcp utility. + + NILFS_IOCTL_GET_CPINFO Return info about requested checkpoints. This + ioctl is used in lscp utility and by + nilfs_cleanerd daemon. + + NILFS_IOCTL_GET_CPSTAT Return checkpoints statistics. This ioctl is + used by lscp, rmcp utilities and by + nilfs_cleanerd daemon. + + NILFS_IOCTL_GET_SUINFO Return segment usage info about requested + segments. This ioctl is used in lssu, + nilfs_resize utilities and by nilfs_cleanerd + daemon. + + NILFS_IOCTL_GET_SUSTAT Return segment usage statistics. This ioctl + is used in lssu, nilfs_resize utilities and + by nilfs_cleanerd daemon. + + NILFS_IOCTL_GET_VINFO Return information on virtual block addresses. + This ioctl is used by nilfs_cleanerd daemon. + + NILFS_IOCTL_GET_BDESCS Return information about descriptors of disk + block numbers. This ioctl is used by + nilfs_cleanerd daemon. + + NILFS_IOCTL_CLEAN_SEGMENTS Do garbage collection operation in the + environment of requested parameters from + userspace. This ioctl is used by + nilfs_cleanerd daemon. + + NILFS_IOCTL_SYNC Make a checkpoint. This ioctl is used in + mkcp utility. + + NILFS_IOCTL_RESIZE Resize NILFS2 volume. This ioctl is used + by nilfs_resize utility. + + NILFS_IOCTL_SET_ALLOC_RANGE Define lower limit of segments in bytes and + upper limit of segments in bytes. This ioctl + is used by nilfs_resize utility. + NILFS2 usage ============ diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt index a6619b7..b35a64b 100644 --- a/Documentation/filesystems/sysfs.txt +++ b/Documentation/filesystems/sysfs.txt @@ -108,12 +108,12 @@ static DEVICE_ATTR(foo, S_IWUSR | S_IRUGO, show_foo, store_foo); is equivalent to doing: static struct device_attribute dev_attr_foo = { - .attr = { + .attr = { .name = "foo", .mode = S_IWUSR | S_IRUGO, - .show = show_foo, - .store = store_foo, }, + .show = show_foo, + .store = store_foo, }; diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index d4762d7..4473856 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1059,7 +1059,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. debugfs files are removed at module unload time. gpt [EFI] Forces disk with valid GPT signature but - invalid Protective MBR to be treated as GPT. + invalid Protective MBR to be treated as GPT. If the + primary GPT is corrupted, it enables the backup/alternate + GPT to be used instead. grcan.enable0= [HW] Configuration of physical interface 0. Determines the "Enable 0" bit of the configuration register. @@ -1461,6 +1463,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Valid arguments: on, off Default: on + kmemcheck= [X86] Boot-time kmemcheck enable/disable/one-shot mode + Valid arguments: 0, 1, 2 + kmemcheck=0 (disabled) + kmemcheck=1 (enabled) + kmemcheck=2 (one-shot mode) + Default: 2 (one-shot mode) + kstack=N [X86] Print N words from the kernel stack in oops dumps. diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt index 445ad74..6f4eb32 100644 --- a/Documentation/printk-formats.txt +++ b/Documentation/printk-formats.txt @@ -55,14 +55,21 @@ Struct Resources: For printing struct resources. The 'R' and 'r' specifiers result in a printed resource with ('R') or without ('r') a decoded flags member. -Physical addresses: +Physical addresses types phys_addr_t: - %pa 0x01234567 or 0x0123456789abcdef + %pa[p] 0x01234567 or 0x0123456789abcdef For printing a phys_addr_t type (and its derivatives, such as resource_size_t) which can vary based on build options, regardless of the width of the CPU data path. Passed by reference. +DMA addresses types dma_addr_t: + + %pad 0x01234567 or 0x0123456789abcdef + + For printing a dma_addr_t type which can vary based on build options, + regardless of the width of the CPU data path. Passed by reference. + Raw buffer as a hex string: %*ph 00 01 02 ... 3f %*phC 00:01:02: ... :3f diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 6d48640..ee9a2f9 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -33,6 +33,7 @@ show up in /proc/sys/kernel: - domainname - hostname - hotplug +- kexec_load_disabled - kptr_restrict - kstack_depth_to_print [ X86 only ] - l2cr [ PPC only ] @@ -287,6 +288,18 @@ Default value is "/sbin/hotplug". ============================================================== +kexec_load_disabled: + +A toggle indicating if the kexec_load syscall has been disabled. This +value defaults to 0 (false: kexec_load enabled), but can be set to 1 +(true: kexec_load disabled). Once true, kexec can no longer be used, and +the toggle cannot be set back to false. This allows a kexec image to be +loaded before disabling the syscall, allowing a system to set up (and +later use) an image without it being altered. Generally used together +with the "modules_disabled" sysctl. + +============================================================== + kptr_restrict: This toggle indicates whether restrictions are placed on @@ -331,7 +344,7 @@ A toggle value indicating if modules are allowed to be loaded in an otherwise modular kernel. This toggle defaults to off (0), but can be set true (1). Once true, modules can be neither loaded nor unloaded, and the toggle cannot be set back -to false. +to false. Generally used with the "kexec_load_disabled" toggle. ============================================================== diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl index 4a37c47..00e425f 100644 --- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl +++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl @@ -123,7 +123,7 @@ my $regex_writepage; # Static regex used. Specified like this for readability and for use with /o # (process_pid) (cpus ) ( time ) (tpoint ) (details) -my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)'; +my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])(\s*[dX.][Nnp.][Hhs.][0-9a-fA-F.]*|)\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)'; my $regex_statname = '[-0-9]*\s\((.*)\).*'; my $regex_statppid = '[-0-9]*\s\(.*\)\s[A-Za-z]\s([0-9]*).*'; @@ -270,8 +270,8 @@ EVENT_PROCESS: while ($traceevent = <STDIN>) { if ($traceevent =~ /$regex_traceevent/o) { $process_pid = $1; - $timestamp = $3; - $tracepoint = $4; + $timestamp = $4; + $tracepoint = $5; $process_pid =~ /(.*)-([0-9]*)$/; my $process = $1; @@ -299,7 +299,7 @@ EVENT_PROCESS: $perprocesspid{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN}++; $perprocesspid{$process_pid}->{STATE_DIRECT_BEGIN} = $timestamp; - $details = $5; + $details = $6; if ($details !~ /$regex_direct_begin/o) { print "WARNING: Failed to parse mm_vmscan_direct_reclaim_begin as expected\n"; print " $details\n"; @@ -322,7 +322,7 @@ EVENT_PROCESS: $perprocesspid{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[$index] = "$order-$latency"; } } elsif ($tracepoint eq "mm_vmscan_kswapd_wake") { - $details = $5; + $details = $6; if ($details !~ /$regex_kswapd_wake/o) { print "WARNING: Failed to parse mm_vmscan_kswapd_wake as expected\n"; print " $details\n"; @@ -356,7 +356,7 @@ EVENT_PROCESS: } elsif ($tracepoint eq "mm_vmscan_wakeup_kswapd") { $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD}++; - $details = $5; + $details = $6; if ($details !~ /$regex_wakeup_kswapd/o) { print "WARNING: Failed to parse mm_vmscan_wakeup_kswapd as expected\n"; print " $details\n"; @@ -366,7 +366,7 @@ EVENT_PROCESS: my $order = $3; $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD_PERORDER}[$order]++; } elsif ($tracepoint eq "mm_vmscan_lru_isolate") { - $details = $5; + $details = $6; if ($details !~ /$regex_lru_isolate/o) { print "WARNING: Failed to parse mm_vmscan_lru_isolate as expected\n"; print " $details\n"; @@ -387,7 +387,7 @@ EVENT_PROCESS: } $perprocesspid{$process_pid}->{HIGH_NR_CONTIG_DIRTY} += $nr_contig_dirty; } elsif ($tracepoint eq "mm_vmscan_lru_shrink_inactive") { - $details = $5; + $details = $6; if ($details !~ /$regex_lru_shrink_inactive/o) { print "WARNING: Failed to parse mm_vmscan_lru_shrink_inactive as expected\n"; print " $details\n"; @@ -397,7 +397,7 @@ EVENT_PROCESS: my $nr_reclaimed = $4; $perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED} += $nr_reclaimed; } elsif ($tracepoint eq "mm_vmscan_writepage") { - $details = $5; + $details = $6; if ($details !~ /$regex_writepage/o) { print "WARNING: Failed to parse mm_vmscan_writepage as expected\n"; print " $details\n"; diff --git a/Documentation/vm/locking b/Documentation/vm/locking deleted file mode 100644 index f61228b..0000000 --- a/Documentation/vm/locking +++ /dev/null @@ -1,130 +0,0 @@ -Started Oct 1999 by Kanoj Sarcar <kanojsarcar@yahoo.com> - -The intent of this file is to have an uptodate, running commentary -from different people about how locking and synchronization is done -in the Linux vm code. - -page_table_lock & mmap_sem --------------------------------------- - -Page stealers pick processes out of the process pool and scan for -the best process to steal pages from. To guarantee the existence -of the victim mm, a mm_count inc and a mmdrop are done in swap_out(). -Page stealers hold kernel_lock to protect against a bunch of races. -The vma list of the victim mm is also scanned by the stealer, -and the page_table_lock is used to preserve list sanity against the -process adding/deleting to the list. This also guarantees existence -of the vma. Vma existence is not guaranteed once try_to_swap_out() -drops the page_table_lock. To guarantee the existence of the underlying -file structure, a get_file is done before the swapout() method is -invoked. The page passed into swapout() is guaranteed not to be reused -for a different purpose because the page reference count due to being -present in the user's pte is not released till after swapout() returns. - -Any code that modifies the vmlist, or the vm_start/vm_end/ -vm_flags:VM_LOCKED/vm_next of any vma *in the list* must prevent -kswapd from looking at the chain. - -The rules are: -1. To scan the vmlist (look but don't touch) you must hold the - mmap_sem with read bias, i.e. down_read(&mm->mmap_sem) -2. To modify the vmlist you need to hold the mmap_sem with - read&write bias, i.e. down_write(&mm->mmap_sem) *AND* - you need to take the page_table_lock. -3. The swapper takes _just_ the page_table_lock, this is done - because the mmap_sem can be an extremely long lived lock - and the swapper just cannot sleep on that. -4. The exception to this rule is expand_stack, which just - takes the read lock and the page_table_lock, this is ok - because it doesn't really modify fields anybody relies on. -5. You must be able to guarantee that while holding page_table_lock - or page_table_lock of mm A, you will not try to get either lock - for mm B. - -The caveats are: -1. find_vma() makes use of, and updates, the mmap_cache pointer hint. -The update of mmap_cache is racy (page stealer can race with other code -that invokes find_vma with mmap_sem held), but that is okay, since it -is a hint. This can be fixed, if desired, by having find_vma grab the -page_table_lock. - - -Code that add/delete elements from the vmlist chain are -1. callers of insert_vm_struct -2. callers of merge_segments -3. callers of avl_remove - -Code that changes vm_start/vm_end/vm_flags:VM_LOCKED of vma's on -the list: -1. expand_stack -2. mprotect -3. mlock -4. mremap - -It is advisable that changes to vm_start/vm_end be protected, although -in some cases it is not really needed. Eg, vm_start is modified by -expand_stack(), it is hard to come up with a destructive scenario without -having the vmlist protection in this case. - -The page_table_lock nests with the inode i_mmap_mutex and the kmem cache -c_spinlock spinlocks. This is okay, since the kmem code asks for pages after -dropping c_spinlock. The page_table_lock also nests with pagecache_lock and -pagemap_lru_lock spinlocks, and no code asks for memory with these locks -held. - -The page_table_lock is grabbed while holding the kernel_lock spinning monitor. - -The page_table_lock is a spin lock. - -Note: PTL can also be used to guarantee that no new clones using the -mm start up ... this is a loose form of stability on mm_users. For -example, it is used in copy_mm to protect against a racing tlb_gather_mmu -single address space optimization, so that the zap_page_range (from -truncate) does not lose sending ipi's to cloned threads that might -be spawned underneath it and go to user mode to drag in pte's into tlbs. - -swap_lock --------------- -The swap devices are chained in priority order from the "swap_list" header. -The "swap_list" is used for the round-robin swaphandle allocation strategy. -The #free swaphandles is maintained in "nr_swap_pages". These two together -are protected by the swap_lock. - -The swap_lock also protects all the device reference counts on the -corresponding swaphandles, maintained in the "swap_map" array, and the -"highest_bit" and "lowest_bit" fields. - -The swap_lock is a spinlock, and is never acquired from intr level. - -To prevent races between swap space deletion or async readahead swapins -deciding whether a swap handle is being used, ie worthy of being read in -from disk, and an unmap -> swap_free making the handle unused, the swap -delete and readahead code grabs a temp reference on the swaphandle to -prevent warning messages from swap_duplicate <- read_swap_cache_async. - -Swap cache locking ------------------- -Pages are added into the swap cache with kernel_lock held, to make sure -that multiple pages are not being added (and hence lost) by associating -all of them with the same swaphandle. - -Pages are guaranteed not to be removed from the scache if the page is -"shared": ie, other processes hold reference on the page or the associated -swap handle. The only code that does not follow this rule is shrink_mmap, -which deletes pages from the swap cache if no process has a reference on -the page (multiple processes might have references on the corresponding -swap handle though). lookup_swap_cache() races with shrink_mmap, when -establishing a reference on a scache page, so, it must check whether the -page it located is still in the swapcache, or shrink_mmap deleted it. -(This race is due to the fact that shrink_mmap looks at the page ref -count with pagecache_lock, but then drops pagecache_lock before deleting -the page from the scache). - -do_wp_page and do_swap_page have MP races in them while trying to figure -out whether a page is "shared", by looking at the page_count + swap_count. -To preserve the sum of the counts, the page lock _must_ be acquired before -calling is_page_shared (else processes might switch their swap_count refs -to the page count refs, after the page count ref has been snapshotted). - -Swap device deletion code currently breaks all the scache assumptions, -since it grabs neither mmap_sem nor page_table_lock. diff --git a/MAINTAINERS b/MAINTAINERS index 15802a3..6270a0b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -93,6 +93,11 @@ Descriptions of section entries: N: Files and directories with regex patterns. N: [^a-z]tegra all files whose path contains the word tegra One pattern per line. Multiple N: lines acceptable. + scripts/get_maintainer.pl has different behavior for files that + match F: pattern and matches of N: patterns. By default, + get_maintainer will not look at git log history when an F: pattern + match occurs. When an N: match occurs, git log history is used + to also notify the people that have git commit signatures. X: Files and directories that are NOT maintained, same rules as F: Files exclusions are tested before file matches. Can be useful for excluding a specific subdirectory, for instance: @@ -3375,7 +3380,6 @@ M: Jingoo Han <jg1.han@samsung.com> L: linux-fbdev@vger.kernel.org S: Maintained F: drivers/video/exynos/exynos_dp* -F: include/video/exynos_dp* EXYNOS MIPI DISPLAY DRIVERS M: Inki Dae <inki.dae@samsung.com> @@ -3986,6 +3990,12 @@ S: Orphan F: Documentation/filesystems/hfs.txt F: fs/hfs/ +HFSPLUS FILESYSTEM +L: linux-fsdevel@vger.kernel.org +S: Orphan +F: Documentation/filesystems/hfsplus.txt +F: fs/hfsplus/ + HGA FRAMEBUFFER DRIVER M: Ferenc Bakonyi <fero@drama.obuda.kando.hu> L: linux-nvidia@lists.surfsouth.com diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index d39dc9b..3ba48fe 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -539,13 +539,13 @@ config SMP depends on ALPHA_SABLE || ALPHA_LYNX || ALPHA_RAWHIDE || ALPHA_DP264 || ALPHA_WILDFIRE || ALPHA_TITAN || ALPHA_GENERIC || ALPHA_SHARK || ALPHA_MARVEL ---help--- This enables support for systems with more than one CPU. If you have - a system with only one CPU, like most personal computers, say N. If - you have a system with more than one CPU, say Y. + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. - If you say N here, the kernel will run on single and multiprocessor + If you say N here, the kernel will run on uni- and multiprocessor machines, but will use only one CPU of a multiprocessor machine. If you say Y here, the kernel will run on many, but not all, - singleprocessor machines. On a singleprocessor machine, the kernel + uniprocessor machines. On a uniprocessor machine, the kernel will run faster if you say N here. See also the SMP-HOWTO available at diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 9063ae6..5438cab 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -128,8 +128,8 @@ config SMP default n help This enables support for systems with more than one CPU. If you have - a system with only one CPU, like most personal computers, say N. If - you have a system with more than one CPU, say Y. + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. if SMP diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index f9b0fd3..dc6ef9a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1470,14 +1470,14 @@ config SMP depends on MMU || ARM_MPU help This enables support for systems with more than one CPU. If you have - a system with only one CPU, like most personal computers, say N. If - you have a system with more than one CPU, say Y. + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. - If you say N here, the kernel will run on single and multiprocessor + If you say N here, the kernel will run on uni- and multiprocessor machines, but will use only one CPU of a multiprocessor machine. If - you say Y here, the kernel will run on many, but not all, single - processor machines. On a single processor machine, the kernel will - run faster if you say N here. + you say Y here, the kernel will run on many, but not all, + uniprocessor machines. On a uniprocessor machine, the kernel + will run faster if you say N here. See also <file:Documentation/x86/i386/IO-APIC.txt>, <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at diff --git a/arch/cris/include/asm/io.h b/arch/cris/include/asm/io.h index 4353cf2..e59dba1 100644 --- a/arch/cris/include/asm/io.h +++ b/arch/cris/include/asm/io.h @@ -169,7 +169,11 @@ static inline void outsl(unsigned int port, const void *addr, } #define inb_p(port) inb(port) +#define inw_p(port) inw(port) +#define inl_p(port) inl(port) #define outb_p(val, port) outb((val), (port)) +#define outw_p(val, port) outw((val), (port)) +#define outl_p(val, port) outl((val), (port)) /* * Convert a physical pointer to a virtual kernel pointer for /dev/mem diff --git a/arch/hexagon/include/asm/fixmap.h b/arch/hexagon/include/asm/fixmap.h index b75b6bf..1387f84 100644 --- a/arch/hexagon/include/asm/fixmap.h +++ b/arch/hexagon/include/asm/fixmap.h @@ -26,45 +26,7 @@ */ #include <asm/mem-layout.h> -/* - * Full fixmap support involves set_fixmap() functions, but - * these may not be needed if all we're after is an area for - * highmem kernel mappings. - */ -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) -#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) - -extern void __this_fixmap_does_not_exist(void); - -/** - * fix_to_virt -- "index to address" translation. - * - * If anyone tries to use the idx directly without translation, - * we catch the bug with a NULL-deference kernel oops. Illegal - * ranges of incoming indices are caught too. - */ -static inline unsigned long fix_to_virt(const unsigned int idx) -{ - /* - * This branch gets completely eliminated after inlining, - * except when someone tries to use fixaddr indices in an - * illegal way. (such as mixing up address types or using - * out-of-range indices). - * - * If it doesn't get removed, the linker will complain - * loudly with a reasonably clear error message.. - */ - if (idx >= __end_of_fixed_addresses) - __this_fixmap_does_not_exist(); - - return __fix_to_virt(idx); -} - -static inline unsigned long virt_to_fix(const unsigned long vaddr) -{ - BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); - return __virt_to_fix(vaddr); -} +#include <asm-generic/fixmap.h> #define kmap_get_fixmap_pte(vaddr) \ pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), \ diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index a8c3a11..c063b05 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -104,6 +104,7 @@ config HAVE_SETUP_PER_CPU_AREA config DMI bool default y + select DMI_SCAN_MACHINE_NON_EFI_FALLBACK config EFI bool diff --git a/arch/ia64/include/asm/dmi.h b/arch/ia64/include/asm/dmi.h index 185d3d1..f365a61 100644 --- a/arch/ia64/include/asm/dmi.h +++ b/arch/ia64/include/asm/dmi.h @@ -5,8 +5,10 @@ #include <asm/io.h> /* Use normal IO mappings for DMI */ -#define dmi_ioremap ioremap -#define dmi_iounmap(x,l) iounmap(x) -#define dmi_alloc(l) kzalloc(l, GFP_ATOMIC) +#define dmi_early_remap ioremap +#define dmi_early_unmap(x, l) iounmap(x) +#define dmi_remap ioremap +#define dmi_unmap iounmap +#define dmi_alloc(l) kzalloc(l, GFP_ATOMIC) #endif diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index 5a84b3a..efd1b92 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -71,6 +71,7 @@ #include <linux/compiler.h> #include <linux/threads.h> #include <linux/types.h> +#include <linux/bitops.h> #include <asm/fpu.h> #include <asm/page.h> diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 09ef94a..ca45044 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -277,13 +277,13 @@ config SMP bool "Symmetric multi-processing support" ---help--- This enables support for systems with more than one CPU. If you have - a system with only one CPU, like most personal computers, say N. If - you have a system with more than one CPU, say Y. + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. - If you say N here, the kernel will run on single and multiprocessor + If you say N here, the kernel will run on uni- and multiprocessor machines, but will use only one CPU of a multiprocessor machine. If you say Y here, the kernel will run on many, but not all, - singleprocessor machines. On a singleprocessor machine, the kernel + uniprocessor machines. On a uniprocessor machine, the kernel will run faster if you say N here. People using multiprocessor machines who say Y here should also say diff --git a/arch/metag/include/asm/fixmap.h b/arch/metag/include/asm/fixmap.h index 3331275..af621b0 100644 --- a/arch/metag/include/asm/fixmap.h +++ b/arch/metag/include/asm/fixmap.h @@ -51,37 +51,7 @@ enum fixed_addresses { #define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START ((FIXADDR_TOP - FIXADDR_SIZE) & PMD_MASK) -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) -#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) - -extern void __this_fixmap_does_not_exist(void); -/* - * 'index to address' translation. If anyone tries to use the idx - * directly without tranlation, we catch the bug with a NULL-deference - * kernel oops. Illegal ranges of incoming indices are caught too. - */ -static inline unsigned long fix_to_virt(const unsigned int idx) -{ - /* - * this branch gets completely eliminated after inlining, - * except when someone tries to use fixaddr indices in an - * illegal way. (such as mixing up address types or using - * out-of-range indices). - * - * If it doesn't get removed, the linker will complain - * loudly with a reasonably clear error message.. - */ - if (idx >= __end_of_fixed_addresses) - __this_fixmap_does_not_exist(); - - return __fix_to_virt(idx); -} - -static inline unsigned long virt_to_fix(const unsigned long vaddr) -{ - BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); - return __virt_to_fix(vaddr); -} +#include <asm-generic/fixmap.h> #define kmap_get_fixmap_pte(vaddr) \ pte_offset_kernel( \ diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index e23cccd..8d581ab 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -30,6 +30,7 @@ config MICROBLAZE select MODULES_USE_ELF_RELA select CLONE_BACKWARDS3 select CLKSRC_OF + select BUILDTIME_EXTABLE_SORT config SWAP def_bool n diff --git a/arch/microblaze/include/asm/fixmap.h b/arch/microblaze/include/asm/fixmap.h index f2b312e..06c0e2b 100644 --- a/arch/microblaze/include/asm/fixmap.h +++ b/arch/microblaze/include/asm/fixmap.h @@ -58,52 +58,12 @@ enum fixed_addresses { extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags); -#define set_fixmap(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL) -/* - * Some hardware wants to get fixmapped without caching. - */ -#define set_fixmap_nocache(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL_CI) - -#define clear_fixmap(idx) \ - __set_fixmap(idx, 0, __pgprot(0)) - #define __FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) -#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) - -extern void __this_fixmap_does_not_exist(void); - -/* - * 'index to address' translation. If anyone tries to use the idx - * directly without tranlation, we catch the bug with a NULL-deference - * kernel oops. Illegal ranges of incoming indices are caught too. - */ -static __always_inline unsigned long fix_to_virt(const unsigned int idx) -{ - /* - * this branch gets completely eliminated after inlining, - * except when someone tries to use fixaddr indices in an - * illegal way. (such as mixing up address types or using - * out-of-range indices). - * - * If it doesn't get removed, the linker will complain - * loudly with a reasonably clear error message.. - */ - if (idx >= __end_of_fixed_addresses) - __this_fixmap_does_not_exist(); - - return __fix_to_virt(idx); -} +#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_CI -static inline unsigned long virt_to_fix(const unsigned long vaddr) -{ - BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); - return __virt_to_fix(vaddr); -} +#include <asm-generic/fixmap.h> #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index c93d92b..52dac06 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2129,13 +2129,13 @@ config SMP depends on SYS_SUPPORTS_SMP help This enables support for systems with more than one CPU. If you have - a system with only one CPU, like most personal computers, say N. If - you have a system with more than one CPU, say Y. + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. - If you say N here, the kernel will run on single and multiprocessor + If you say N here, the kernel will run on uni- and multiprocessor machines, but will use only one CPU of a multiprocessor machine. If you say Y here, the kernel will run on many, but not all, - singleprocessor machines. On a singleprocessor machine, the kernel + uniprocessor machines. On a uniprocessor machine, the kernel will run faster if you say N here. People using multiprocessor machines who say Y here should also say @@ -2430,7 +2430,7 @@ source "drivers/pcmcia/Kconfig" source "drivers/pci/hotplug/Kconfig" config RAPIDIO - bool "RapidIO support" + tristate "RapidIO support" depends on PCI default n help diff --git a/arch/mips/include/asm/fixmap.h b/arch/mips/include/asm/fixmap.h index dfaaf49..8c012af 100644 --- a/arch/mips/include/asm/fixmap.h +++ b/arch/mips/include/asm/fixmap.h @@ -71,38 +71,7 @@ enum fixed_addresses { #define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) -#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) - -extern void __this_fixmap_does_not_exist(void); - -/* - * 'index to address' translation. If anyone tries to use the idx - * directly without tranlation, we catch the bug with a NULL-deference - * kernel oops. Illegal ranges of incoming indices are caught too. - */ -static inline unsigned long fix_to_virt(const unsigned int idx) -{ - /* - * this branch gets completely eliminated after inlining, - * except when someone tries to use fixaddr indices in an - * illegal way. (such as mixing up address types or using - * out-of-range indices). - * - * If it doesn't get removed, the linker will complain - * loudly with a reasonably clear error message.. - */ - if (idx >= __end_of_fixed_addresses) - __this_fixmap_does_not_exist(); - - return __fix_to_virt(idx); -} - -static inline unsigned long virt_to_fix(const unsigned long vaddr) -{ - BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); - return __virt_to_fix(vaddr); -} +#include <asm-generic/fixmap.h> #define kmap_get_fixmap_pte(vaddr) \ pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), (vaddr)) diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 8bde923..a648de1 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -184,13 +184,13 @@ config SMP depends on MN10300_PROC_MN2WS0038 || MN10300_PROC_MN2WS0050 ---help--- This enables support for systems with more than one CPU. If you have - a system with only one CPU, like most personal computers, say N. If - you have a system with more than one CPU, say Y. + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. - If you say N here, the kernel will run on single and multiprocessor + If you say N here, the kernel will run on uni- and multiprocessor machines, but will use only one CPU of a multiprocessor machine. If you say Y here, the kernel will run on many, but not all, - singleprocessor machines. On a singleprocessor machine, the kernel + uniprocessor machines. On a uniprocessor machine, the kernel will run faster if you say N here. See also <file:Documentation/x86/i386/IO-APIC.txt>, diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index b5f1858..bb2a8ec 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -229,13 +229,13 @@ config SMP bool "Symmetric multi-processing support" ---help--- This enables support for systems with more than one CPU. If you have - a system with only one CPU, like most personal computers, say N. If - you have a system with more than one CPU, say Y. + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. - If you say N here, the kernel will run on single and multiprocessor + If you say N here, the kernel will run on uni- and multiprocessor machines, but will use only one CPU of a multiprocessor machine. If you say Y here, the kernel will run on many, but not all, - singleprocessor machines. On a singleprocessor machine, the kernel + uniprocessor machines. On a uniprocessor machine, the kernel will run faster if you say N here. See also <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b2be8e8..bedc62b 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -794,7 +794,7 @@ config HAS_RAPIDIO default n config RAPIDIO - bool "RapidIO support" + tristate "RapidIO support" depends on HAS_RAPIDIO || PCI help If you say Y here, the kernel will include drivers and @@ -802,7 +802,7 @@ config RAPIDIO config FSL_RIO bool "Freescale Embedded SRIO Controller support" - depends on RAPIDIO && HAS_RAPIDIO + depends on RAPIDIO = y && HAS_RAPIDIO default "n" ---help--- Include support for RapidIO controller on Freescale embedded diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 5c2c023..90f604b 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -58,52 +58,12 @@ enum fixed_addresses { extern void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags); -#define set_fixmap(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL) -/* - * Some hardware wants to get fixmapped without caching. - */ -#define set_fixmap_nocache(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL_NCG) - -#define clear_fixmap(idx) \ - __set_fixmap(idx, 0, __pgprot(0)) - #define __FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) -#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) - -extern void __this_fixmap_does_not_exist(void); - -/* - * 'index to address' translation. If anyone tries to use the idx - * directly without tranlation, we catch the bug with a NULL-deference - * kernel oops. Illegal ranges of incoming indices are caught too. - */ -static __always_inline unsigned long fix_to_virt(const unsigned int idx) -{ - /* - * this branch gets completely eliminated after inlining, - * except when someone tries to use fixaddr indices in an - * illegal way. (such as mixing up address types or using - * out-of-range indices). - * - * If it doesn't get removed, the linker will complain - * loudly with a reasonably clear error message.. - */ - if (idx >= __end_of_fixed_addresses) - __this_fixmap_does_not_exist(); - - return __fix_to_virt(idx); -} +#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NCG -static inline unsigned long virt_to_fix(const unsigned long vaddr) -{ - BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); - return __virt_to_fix(vaddr); -} +#include <asm-generic/fixmap.h> #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e9f3125..4f858f7 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -334,10 +334,10 @@ config SMP a system with only one CPU, like most personal computers, say N. If you have a system with more than one CPU, say Y. - If you say N here, the kernel will run on single and multiprocessor + If you say N here, the kernel will run on uni- and multiprocessor machines, but will use only one CPU of a multiprocessor machine. If you say Y here, the kernel will run on many, but not all, - singleprocessor machines. On a singleprocessor machine, the kernel + uniprocessor machines. On a uniprocessor machine, the kernel will run faster if you say N here. See also the SMP-HOWTO available at diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index ce29831..6357710 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -701,13 +701,13 @@ config SMP depends on SYS_SUPPORTS_SMP ---help--- This enables support for systems with more than one CPU. If you have - a system with only one CPU, like most personal computers, say N. If - you have a system with more than one CPU, say Y. + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. - If you say N here, the kernel will run on single and multiprocessor + If you say N here, the kernel will run on uni- and multiprocessor machines, but will use only one CPU of a multiprocessor machine. If you say Y here, the kernel will run on many, but not all, - singleprocessor machines. On a singleprocessor machine, the kernel + uniprocessor machines. On a uniprocessor machine, the kernel will run faster if you say N here. People using multiprocessor machines who say Y here should also say diff --git a/arch/sh/include/asm/fixmap.h b/arch/sh/include/asm/fixmap.h index cbe0186..4daf91c 100644 --- a/arch/sh/include/asm/fixmap.h +++ b/arch/sh/include/asm/fixmap.h @@ -79,13 +79,6 @@ extern void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags); extern void __clear_fixmap(enum fixed_addresses idx, pgprot_t flags); -#define set_fixmap(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL) -/* - * Some hardware wants to get fixmapped without caching. - */ -#define set_fixmap_nocache(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) /* * used by vmalloc.c. * @@ -101,36 +94,8 @@ extern void __clear_fixmap(enum fixed_addresses idx, pgprot_t flags); #define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) -#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) - -extern void __this_fixmap_does_not_exist(void); - -/* - * 'index to address' translation. If anyone tries to use the idx - * directly without tranlation, we catch the bug with a NULL-deference - * kernel oops. Illegal ranges of incoming indices are caught too. - */ -static inline unsigned long fix_to_virt(const unsigned int idx) -{ - /* - * this branch gets completely eliminated after inlining, - * except when someone tries to use fixaddr indices in an - * illegal way. (such as mixing up address types or using - * out-of-range indices). - * - * If it doesn't get removed, the linker will complain - * loudly with a reasonably clear error message.. - */ - if (idx >= __end_of_fixed_addresses) - __this_fixmap_does_not_exist(); +#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NOCACHE - return __fix_to_virt(idx); -} +#include <asm-generic/fixmap.h> -static inline unsigned long virt_to_fix(const unsigned long vaddr) -{ - BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); - return __virt_to_fix(vaddr); -} #endif diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c index 49c09c7..67a049e 100644 --- a/arch/sh/kernel/dwarf.c +++ b/arch/sh/kernel/dwarf.c @@ -995,29 +995,19 @@ static struct unwinder dwarf_unwinder = { static void dwarf_unwinder_cleanup(void) { - struct rb_node **fde_rb_node = &fde_root.rb_node; - struct rb_node **cie_rb_node = &cie_root.rb_node; + struct dwarf_fde *fde, *next_fde; + struct dwarf_cie *cie, *next_cie; /* * Deallocate all the memory allocated for the DWARF unwinder. * Traverse all the FDE/CIE lists and remove and free all the * memory associated with those data structures. */ - while (*fde_rb_node) { - struct dwarf_fde *fde; - - fde = rb_entry(*fde_rb_node, struct dwarf_fde, node); - rb_erase(*fde_rb_node, &fde_root); + rbtree_postorder_for_each_entry_safe(fde, next_fde, &fde_root, node) kfree(fde); - } - while (*cie_rb_node) { - struct dwarf_cie *cie; - - cie = rb_entry(*cie_rb_node, struct dwarf_cie, node); - rb_erase(*cie_rb_node, &cie_root); + rbtree_postorder_for_each_entry_safe(cie, next_cie, &cie_root, node) kfree(cie); - } kmem_cache_destroy(dwarf_reg_cachep); kmem_cache_destroy(dwarf_frame_cachep); diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index d4f7a6a..63dfe68 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -152,10 +152,10 @@ config SMP a system with only one CPU, say N. If you have a system with more than one CPU, say Y. - If you say N here, the kernel will run on single and multiprocessor + If you say N here, the kernel will run on uni- and multiprocessor machines, but will use only one CPU of a multiprocessor machine. If you say Y here, the kernel will run on many, but not all, - singleprocessor machines. On a singleprocessor machine, the kernel + uniprocessor machines. On a uniprocessor machine, the kernel will run faster if you say N here. People using multiprocessor machines who say Y here should also say diff --git a/arch/tile/include/asm/fixmap.h b/arch/tile/include/asm/fixmap.h index c6b9c1b..ffe2637 100644 --- a/arch/tile/include/asm/fixmap.h +++ b/arch/tile/include/asm/fixmap.h @@ -25,9 +25,6 @@ #include <asm/kmap_types.h> #endif -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) -#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) - /* * Here we define all the compile-time 'special' virtual * addresses. The point is to have a constant address at @@ -83,35 +80,7 @@ enum fixed_addresses { #define FIXADDR_START (FIXADDR_TOP + PAGE_SIZE - __FIXADDR_SIZE) #define FIXADDR_BOOT_START (FIXADDR_TOP + PAGE_SIZE - __FIXADDR_BOOT_SIZE) -extern void __this_fixmap_does_not_exist(void); - -/* - * 'index to address' translation. If anyone tries to use the idx - * directly without tranlation, we catch the bug with a NULL-deference - * kernel oops. Illegal ranges of incoming indices are caught too. - */ -static __always_inline unsigned long fix_to_virt(const unsigned int idx) -{ - /* - * this branch gets completely eliminated after inlining, - * except when someone tries to use fixaddr indices in an - * illegal way. (such as mixing up address types or using - * out-of-range indices). - * - * If it doesn't get removed, the linker will complain - * loudly with a reasonably clear error message.. - */ - if (idx >= __end_of_fixed_addresses) - __this_fixmap_does_not_exist(); - - return __fix_to_virt(idx); -} - -static inline unsigned long virt_to_fix(const unsigned long vaddr) -{ - BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); - return __virt_to_fix(vaddr); -} +#include <asm-generic/fixmap.h> #endif /* !__ASSEMBLY__ */ diff --git a/arch/um/include/asm/fixmap.h b/arch/um/include/asm/fixmap.h index 21a423b..3094ea3c 100644 --- a/arch/um/include/asm/fixmap.h +++ b/arch/um/include/asm/fixmap.h @@ -43,13 +43,6 @@ enum fixed_addresses { extern void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags); -#define set_fixmap(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL) -/* - * Some hardware wants to get fixmapped without caching. - */ -#define set_fixmap_nocache(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) /* * used by vmalloc.c. * @@ -62,37 +55,6 @@ extern void __set_fixmap (enum fixed_addresses idx, #define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) -#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) - -extern void __this_fixmap_does_not_exist(void); - -/* - * 'index to address' translation. If anyone tries to use the idx - * directly without tranlation, we catch the bug with a NULL-deference - * kernel oops. Illegal ranges of incoming indices are caught too. - */ -static inline unsigned long fix_to_virt(const unsigned int idx) -{ - /* - * this branch gets completely eliminated after inlining, - * except when someone tries to use fixaddr indices in an - * illegal way. (such as mixing up address types or using - * out-of-range indices). - * - * If it doesn't get removed, the linker will complain - * loudly with a reasonably clear error message.. - */ - if (idx >= __end_of_fixed_addresses) - __this_fixmap_does_not_exist(); - - return __fix_to_virt(idx); -} - -static inline unsigned long virt_to_fix(const unsigned long vaddr) -{ - BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); - return __virt_to_fix(vaddr); -} +#include <asm-generic/fixmap.h> #endif diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d3b9186..3e97a3d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -279,13 +279,13 @@ config SMP bool "Symmetric multi-processing support" ---help--- This enables support for systems with more than one CPU. If you have - a system with only one CPU, like most personal computers, say N. If - you have a system with more than one CPU, say Y. + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. - If you say N here, the kernel will run on single and multiprocessor + If you say N here, the kernel will run on uni- and multiprocessor machines, but will use only one CPU of a multiprocessor machine. If you say Y here, the kernel will run on many, but not all, - singleprocessor machines. On a singleprocessor machine, the kernel + uniprocessor machines. On a uniprocessor machine, the kernel will run faster if you say N here. Note that if you say Y here and choose architecture "586" or @@ -731,6 +731,7 @@ config APB_TIMER # The code disables itself when not needed. config DMI default y + select DMI_SCAN_MACHINE_NON_EFI_FALLBACK bool "Enable DMI scanning" if EXPERT ---help--- Enabled scanning of DMI to identify machine quirks. Say Y diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h index fd8f9e2..535192f 100644 --- a/arch/x86/include/asm/dmi.h +++ b/arch/x86/include/asm/dmi.h @@ -13,7 +13,9 @@ static __always_inline __init void *dmi_alloc(unsigned len) } /* Use early IO mappings for DMI because it's initialized early */ -#define dmi_ioremap early_ioremap -#define dmi_iounmap early_iounmap +#define dmi_early_remap early_ioremap +#define dmi_early_unmap early_iounmap +#define dmi_remap ioremap +#define dmi_unmap iounmap #endif /* _ASM_X86_DMI_H */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index e846225..7252cd3 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -175,64 +175,7 @@ static inline void __set_fixmap(enum fixed_addresses idx, } #endif -#define set_fixmap(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL) - -/* - * Some hardware wants to get fixmapped without caching. - */ -#define set_fixmap_nocache(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) - -#define clear_fixmap(idx) \ - __set_fixmap(idx, 0, __pgprot(0)) - -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) -#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) - -extern void __this_fixmap_does_not_exist(void); - -/* - * 'index to address' translation. If anyone tries to use the idx - * directly without translation, we catch the bug with a NULL-deference - * kernel oops. Illegal ranges of incoming indices are caught too. - */ -static __always_inline unsigned long fix_to_virt(const unsigned int idx) -{ - /* - * this branch gets completely eliminated after inlining, - * except when someone tries to use fixaddr indices in an - * illegal way. (such as mixing up address types or using - * out-of-range indices). - * - * If it doesn't get removed, the linker will complain - * loudly with a reasonably clear error message.. - */ - if (idx >= __end_of_fixed_addresses) - __this_fixmap_does_not_exist(); - - return __fix_to_virt(idx); -} - -static inline unsigned long virt_to_fix(const unsigned long vaddr) -{ - BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); - return __virt_to_fix(vaddr); -} - -/* Return an pointer with offset calculated */ -static __always_inline unsigned long -__set_fixmap_offset(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) -{ - __set_fixmap(idx, phys, flags); - return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1)); -} - -#define set_fixmap_offset(idx, phys) \ - __set_fixmap_offset(idx, phys, PAGE_KERNEL) - -#define set_fixmap_offset_nocache(idx, phys) \ - __set_fixmap_offset(idx, phys, PAGE_KERNEL_NOCACHE) +#include <asm-generic/fixmap.h> #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_FIXMAP_H */ diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 0596e8e..207d9aef 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -108,8 +108,8 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, static inline void get_head_page_multiple(struct page *page, int nr) { - VM_BUG_ON(page != compound_head(page)); - VM_BUG_ON(page_count(page) == 0); + VM_BUG_ON_PAGE(page != compound_head(page), page); + VM_BUG_ON_PAGE(page_count(page) == 0, page); atomic_add(nr, &page->_count); SetPageReferenced(page); } @@ -135,7 +135,7 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, head = pte_page(pte); page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); do { - VM_BUG_ON(compound_head(page) != head); + VM_BUG_ON_PAGE(compound_head(page) != head, page); pages[*nr] = page; if (PageTail(page)) get_huge_page_tail(page); @@ -212,7 +212,7 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr, head = pte_page(pte); page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); do { - VM_BUG_ON(compound_head(page) != head); + VM_BUG_ON_PAGE(compound_head(page) != head, page); pages[*nr] = page; if (PageTail(page)) get_huge_page_tail(page); diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 86b9f37..9ffa90c 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -368,7 +368,8 @@ config BLK_DEV_RAM For details, read <file:Documentation/blockdev/ramdisk.txt>. To compile this driver as a module, choose M here: the - module will be called rd. + module will be called brd. An alias "rd" has been defined + for historical reasons. Most normal users won't need the RAM disk functionality, and can thus say N here. diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 0747872..5a29fac 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -108,6 +108,9 @@ config DMI_SYSFS under /sys/firmware/dmi when this option is enabled and loaded. +config DMI_SCAN_MACHINE_NON_EFI_FALLBACK + bool + config ISCSI_IBFT_FIND bool "iSCSI Boot Firmware Table Attributes" depends on X86 diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index c7e81ff..17afc51 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -116,7 +116,7 @@ static int __init dmi_walk_early(void (*decode)(const struct dmi_header *, { u8 *buf; - buf = dmi_ioremap(dmi_base, dmi_len); + buf = dmi_early_remap(dmi_base, dmi_len); if (buf == NULL) return -1; @@ -124,7 +124,7 @@ static int __init dmi_walk_early(void (*decode)(const struct dmi_header *, add_device_randomness(buf, dmi_len); - dmi_iounmap(buf, dmi_len); + dmi_early_unmap(buf, dmi_len); return 0; } @@ -527,18 +527,18 @@ void __init dmi_scan_machine(void) * needed during early boot. This also means we can * iounmap the space when we're done with it. */ - p = dmi_ioremap(efi.smbios, 32); + p = dmi_early_remap(efi.smbios, 32); if (p == NULL) goto error; memcpy_fromio(buf, p, 32); - dmi_iounmap(p, 32); + dmi_early_unmap(p, 32); if (!dmi_present(buf)) { dmi_available = 1; goto out; } - } else { - p = dmi_ioremap(0xF0000, 0x10000); + } else if (IS_ENABLED(CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK)) { + p = dmi_early_remap(0xF0000, 0x10000); if (p == NULL) goto error; @@ -554,12 +554,12 @@ void __init dmi_scan_machine(void) memcpy_fromio(buf + 16, q, 16); if (!dmi_present(buf)) { dmi_available = 1; - dmi_iounmap(p, 0x10000); + dmi_early_unmap(p, 0x10000); goto out; } memcpy(buf, buf + 16, 16); } - dmi_iounmap(p, 0x10000); + dmi_early_unmap(p, 0x10000); } error: pr_info("DMI not present or invalid.\n"); @@ -831,13 +831,13 @@ int dmi_walk(void (*decode)(const struct dmi_header *, void *), if (!dmi_available) return -1; - buf = ioremap(dmi_base, dmi_len); + buf = dmi_remap(dmi_base, dmi_len); if (buf == NULL) return -1; dmi_table(buf, dmi_len, dmi_num, decode, private_data); - iounmap(buf); + dmi_unmap(buf); return 0; } EXPORT_SYMBOL_GPL(dmi_walk); diff --git a/drivers/gpu/drm/gma500/backlight.c b/drivers/gpu/drm/gma500/backlight.c index 143eba3..ea7dfc59 100644 --- a/drivers/gpu/drm/gma500/backlight.c +++ b/drivers/gpu/drm/gma500/backlight.c @@ -26,13 +26,13 @@ #include "intel_bios.h" #include "power.h" +#ifdef CONFIG_BACKLIGHT_CLASS_DEVICE static void do_gma_backlight_set(struct drm_device *dev) { -#ifdef CONFIG_BACKLIGHT_CLASS_DEVICE struct drm_psb_private *dev_priv = dev->dev_private; backlight_update_status(dev_priv->backlight_device); -#endif } +#endif void gma_backlight_enable(struct drm_device *dev) { diff --git a/drivers/mailbox/omap-mbox.h b/drivers/mailbox/omap-mbox.h index 6cd38fc..86d7518 100644 --- a/drivers/mailbox/omap-mbox.h +++ b/drivers/mailbox/omap-mbox.h @@ -52,7 +52,7 @@ struct omap_mbox_queue { struct omap_mbox { const char *name; - unsigned int irq; + int irq; struct omap_mbox_queue *txq, *rxq; struct omap_mbox_ops *ops; struct device *dev; diff --git a/drivers/memstick/host/rtsx_pci_ms.c b/drivers/memstick/host/rtsx_pci_ms.c index 25f8f93..2a635b6 100644 --- a/drivers/memstick/host/rtsx_pci_ms.c +++ b/drivers/memstick/host/rtsx_pci_ms.c @@ -145,6 +145,8 @@ static int ms_transfer_data(struct realtek_pci_ms *host, unsigned char data_dir, unsigned int length = sg->length; u16 sec_cnt = (u16)(length / 512); u8 val, trans_mode, dma_dir; + struct memstick_dev *card = host->msh->card; + bool pro_card = card->id.type == MEMSTICK_TYPE_PRO; dev_dbg(ms_dev(host), "%s: tpc = 0x%02x, data_dir = %s, length = %d\n", __func__, tpc, (data_dir == READ) ? "READ" : "WRITE", @@ -152,19 +154,21 @@ static int ms_transfer_data(struct realtek_pci_ms *host, unsigned char data_dir, if (data_dir == READ) { dma_dir = DMA_DIR_FROM_CARD; - trans_mode = MS_TM_AUTO_READ; + trans_mode = pro_card ? MS_TM_AUTO_READ : MS_TM_NORMAL_READ; } else { dma_dir = DMA_DIR_TO_CARD; - trans_mode = MS_TM_AUTO_WRITE; + trans_mode = pro_card ? MS_TM_AUTO_WRITE : MS_TM_NORMAL_WRITE; } rtsx_pci_init_cmd(pcr); rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, MS_TPC, 0xFF, tpc); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, MS_SECTOR_CNT_H, - 0xFF, (u8)(sec_cnt >> 8)); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, MS_SECTOR_CNT_L, - 0xFF, (u8)sec_cnt); + if (pro_card) { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, MS_SECTOR_CNT_H, + 0xFF, (u8)(sec_cnt >> 8)); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, MS_SECTOR_CNT_L, + 0xFF, (u8)sec_cnt); + } rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, MS_TRANS_CFG, 0xFF, cfg); rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, IRQSTAT0, @@ -192,8 +196,14 @@ static int ms_transfer_data(struct realtek_pci_ms *host, unsigned char data_dir, } rtsx_pci_read_register(pcr, MS_TRANS_CFG, &val); - if (val & (MS_INT_CMDNK | MS_INT_ERR | MS_CRC16_ERR | MS_RDY_TIMEOUT)) - return -EIO; + if (pro_card) { + if (val & (MS_INT_CMDNK | MS_INT_ERR | + MS_CRC16_ERR | MS_RDY_TIMEOUT)) + return -EIO; + } else { + if (val & (MS_CRC16_ERR | MS_RDY_TIMEOUT)) + return -EIO; + } return 0; } @@ -462,8 +472,8 @@ static int rtsx_pci_ms_set_param(struct memstick_host *msh, clock = 19000000; ssc_depth = RTSX_SSC_DEPTH_500K; - err = rtsx_pci_write_register(pcr, MS_CFG, - 0x18, MS_BUS_WIDTH_1); + err = rtsx_pci_write_register(pcr, MS_CFG, 0x58, + MS_BUS_WIDTH_1 | PUSH_TIME_DEFAULT); if (err < 0) return err; } else if (value == MEMSTICK_PAR4) { diff --git a/drivers/mfd/max8998.c b/drivers/mfd/max8998.c index f47eaa7..612ca40 100644 --- a/drivers/mfd/max8998.c +++ b/drivers/mfd/max8998.c @@ -175,7 +175,7 @@ static inline int max8998_i2c_get_driver_data(struct i2c_client *i2c, if (IS_ENABLED(CONFIG_OF) && i2c->dev.of_node) { const struct of_device_id *match; match = of_match_node(max8998_dt_match, i2c->dev.of_node); - return (int)match->data; + return (int)(long)match->data; } return (int)id->driver_data; diff --git a/drivers/mfd/tps65217.c b/drivers/mfd/tps65217.c index 6939ae5..966cf65 100644 --- a/drivers/mfd/tps65217.c +++ b/drivers/mfd/tps65217.c @@ -170,7 +170,7 @@ static int tps65217_probe(struct i2c_client *client, "Failed to find matching dt id\n"); return -EINVAL; } - chip_id = (unsigned int)match->data; + chip_id = (unsigned int)(unsigned long)match->data; status_off = of_property_read_bool(client->dev.of_node, "ti,pmic-shutdown-controller"); } diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index b1328a4..db933de 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -212,6 +212,17 @@ config RTC_DRV_DS3232 This driver can also be built as a module. If so, the module will be called rtc-ds3232. +config RTC_DRV_HYM8563 + tristate "Haoyu Microelectronics HYM8563" + depends on I2C && OF + help + Say Y to enable support for the HYM8563 I2C RTC chip. Apart + from the usual rtc functions it provides a clock output of + up to 32kHz. + + This driver can also be built as a module. If so, the module + will be called rtc-hym8563. + config RTC_DRV_LP8788 tristate "TI LP8788 RTC driver" depends on MFD_LP8788 @@ -637,7 +648,7 @@ comment "Platform RTC drivers" config RTC_DRV_CMOS tristate "PC-style 'CMOS'" - depends on X86 || ARM || M32R || ATARI || PPC || MIPS || SPARC64 + depends on X86 || ARM || M32R || PPC || MIPS || SPARC64 default y if X86 help Say "yes" here to get direct support for the real time clock diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index c00741a..b427bf7 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -55,6 +55,7 @@ obj-$(CONFIG_RTC_DRV_EP93XX) += rtc-ep93xx.o obj-$(CONFIG_RTC_DRV_FM3130) += rtc-fm3130.o obj-$(CONFIG_RTC_DRV_GENERIC) += rtc-generic.o obj-$(CONFIG_RTC_DRV_HID_SENSOR_TIME) += rtc-hid-sensor-time.o +obj-$(CONFIG_RTC_DRV_HYM8563) += rtc-hym8563.o obj-$(CONFIG_RTC_DRV_IMXDI) += rtc-imxdi.o obj-$(CONFIG_RTC_DRV_ISL1208) += rtc-isl1208.o obj-$(CONFIG_RTC_DRV_ISL12022) += rtc-isl12022.o diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index 0242681..589351e 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -14,6 +14,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> +#include <linux/of.h> #include <linux/rtc.h> #include <linux/kdev_t.h> #include <linux/idr.h> @@ -157,12 +158,27 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev, { struct rtc_device *rtc; struct rtc_wkalrm alrm; - int id, err; + int of_id = -1, id = -1, err; + + if (dev->of_node) + of_id = of_alias_get_id(dev->of_node, "rtc"); + else if (dev->parent && dev->parent->of_node) + of_id = of_alias_get_id(dev->parent->of_node, "rtc"); + + if (of_id >= 0) { + id = ida_simple_get(&rtc_ida, of_id, of_id + 1, + GFP_KERNEL); + if (id < 0) + dev_warn(dev, "/aliases ID %d not available\n", + of_id); + } - id = ida_simple_get(&rtc_ida, 0, 0, GFP_KERNEL); if (id < 0) { - err = id; - goto exit; + id = ida_simple_get(&rtc_ida, 0, 0, GFP_KERNEL); + if (id < 0) { + err = id; + goto exit; + } } rtc = kzalloc(sizeof(struct rtc_device), GFP_KERNEL); diff --git a/drivers/rtc/rtc-as3722.c b/drivers/rtc/rtc-as3722.c index 9cfa817..4af0169 100644 --- a/drivers/rtc/rtc-as3722.c +++ b/drivers/rtc/rtc-as3722.c @@ -198,7 +198,7 @@ static int as3722_rtc_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 1); - as3722_rtc->rtc = rtc_device_register("as3722", &pdev->dev, + as3722_rtc->rtc = devm_rtc_device_register(&pdev->dev, "as3722-rtc", &as3722_rtc_ops, THIS_MODULE); if (IS_ERR(as3722_rtc->rtc)) { ret = PTR_ERR(as3722_rtc->rtc); @@ -209,28 +209,16 @@ static int as3722_rtc_probe(struct platform_device *pdev) as3722_rtc->alarm_irq = platform_get_irq(pdev, 0); dev_info(&pdev->dev, "RTC interrupt %d\n", as3722_rtc->alarm_irq); - ret = request_threaded_irq(as3722_rtc->alarm_irq, NULL, + ret = devm_request_threaded_irq(&pdev->dev, as3722_rtc->alarm_irq, NULL, as3722_alarm_irq, IRQF_ONESHOT | IRQF_EARLY_RESUME, "rtc-alarm", as3722_rtc); if (ret < 0) { dev_err(&pdev->dev, "Failed to request alarm IRQ %d: %d\n", as3722_rtc->alarm_irq, ret); - goto scrub; + return ret; } disable_irq(as3722_rtc->alarm_irq); return 0; -scrub: - rtc_device_unregister(as3722_rtc->rtc); - return ret; -} - -static int as3722_rtc_remove(struct platform_device *pdev) -{ - struct as3722_rtc *as3722_rtc = platform_get_drvdata(pdev); - - free_irq(as3722_rtc->alarm_irq, as3722_rtc); - rtc_device_unregister(as3722_rtc->rtc); - return 0; } #ifdef CONFIG_PM_SLEEP @@ -260,7 +248,6 @@ static const struct dev_pm_ops as3722_rtc_pm_ops = { static struct platform_driver as3722_rtc_driver = { .probe = as3722_rtc_probe, - .remove = as3722_rtc_remove, .driver = { .name = "as3722-rtc", .pm = &as3722_rtc_pm_ops, diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index a2325bc..cae212f 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -756,11 +756,9 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) irq_handler_t rtc_cmos_int_handler; if (is_hpet_enabled()) { - int err; - rtc_cmos_int_handler = hpet_rtc_interrupt; - err = hpet_register_irq_handler(cmos_interrupt); - if (err != 0) { + retval = hpet_register_irq_handler(cmos_interrupt); + if (retval) { dev_warn(dev, "hpet_register_irq_handler " " failed in rtc_init()."); goto cleanup1; @@ -1175,7 +1173,7 @@ static struct platform_driver cmos_platform_driver = { .remove = __exit_p(cmos_platform_remove), .shutdown = cmos_platform_shutdown, .driver = { - .name = (char *) driver_name, + .name = driver_name, #ifdef CONFIG_PM .pm = &cmos_pm_ops, #endif diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c index 80f3237..2dd586a 100644 --- a/drivers/rtc/rtc-ds1305.c +++ b/drivers/rtc/rtc-ds1305.c @@ -787,7 +787,6 @@ static int ds1305_remove(struct spi_device *spi) cancel_work_sync(&ds1305->work); } - spi_set_drvdata(spi, NULL); return 0; } diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c index 17b73fd..5a1f3b2 100644 --- a/drivers/rtc/rtc-ds1742.c +++ b/drivers/rtc/rtc-ds1742.c @@ -13,12 +13,13 @@ */ #include <linux/bcd.h> -#include <linux/init.h> #include <linux/kernel.h> #include <linux/gfp.h> #include <linux/delay.h> #include <linux/jiffies.h> #include <linux/rtc.h> +#include <linux/of.h> +#include <linux/of_device.h> #include <linux/platform_device.h> #include <linux/io.h> #include <linux/module.h> @@ -215,12 +216,19 @@ static int ds1742_rtc_remove(struct platform_device *pdev) return 0; } +static struct of_device_id __maybe_unused ds1742_rtc_of_match[] = { + { .compatible = "maxim,ds1742", }, + { } +}; +MODULE_DEVICE_TABLE(of, ds1742_rtc_of_match); + static struct platform_driver ds1742_rtc_driver = { .probe = ds1742_rtc_probe, .remove = ds1742_rtc_remove, .driver = { .name = "rtc-ds1742", .owner = THIS_MODULE, + .of_match_table = ds1742_rtc_of_match, }, }; diff --git a/drivers/rtc/rtc-hym8563.c b/drivers/rtc/rtc-hym8563.c new file mode 100644 index 0000000..bd628a6 --- /dev/null +++ b/drivers/rtc/rtc-hym8563.c @@ -0,0 +1,606 @@ +/* + * Haoyu HYM8563 RTC driver + * + * Copyright (C) 2013 MundoReader S.L. + * Author: Heiko Stuebner <heiko@sntech.de> + * + * based on rtc-HYM8563 + * Copyright (C) 2010 ROCKCHIP, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/module.h> +#include <linux/clk-provider.h> +#include <linux/i2c.h> +#include <linux/bcd.h> +#include <linux/rtc.h> + +#define HYM8563_CTL1 0x00 +#define HYM8563_CTL1_TEST BIT(7) +#define HYM8563_CTL1_STOP BIT(5) +#define HYM8563_CTL1_TESTC BIT(3) + +#define HYM8563_CTL2 0x01 +#define HYM8563_CTL2_TI_TP BIT(4) +#define HYM8563_CTL2_AF BIT(3) +#define HYM8563_CTL2_TF BIT(2) +#define HYM8563_CTL2_AIE BIT(1) +#define HYM8563_CTL2_TIE BIT(0) + +#define HYM8563_SEC 0x02 +#define HYM8563_SEC_VL BIT(7) +#define HYM8563_SEC_MASK 0x7f + +#define HYM8563_MIN 0x03 +#define HYM8563_MIN_MASK 0x7f + +#define HYM8563_HOUR 0x04 +#define HYM8563_HOUR_MASK 0x3f + +#define HYM8563_DAY 0x05 +#define HYM8563_DAY_MASK 0x3f + +#define HYM8563_WEEKDAY 0x06 +#define HYM8563_WEEKDAY_MASK 0x07 + +#define HYM8563_MONTH 0x07 +#define HYM8563_MONTH_CENTURY BIT(7) +#define HYM8563_MONTH_MASK 0x1f + +#define HYM8563_YEAR 0x08 + +#define HYM8563_ALM_MIN 0x09 +#define HYM8563_ALM_HOUR 0x0a +#define HYM8563_ALM_DAY 0x0b +#define HYM8563_ALM_WEEK 0x0c + +/* Each alarm check can be disabled by setting this bit in the register */ +#define HYM8563_ALM_BIT_DISABLE BIT(7) + +#define HYM8563_CLKOUT 0x0d +#define HYM8563_CLKOUT_DISABLE BIT(7) +#define HYM8563_CLKOUT_32768 0 +#define HYM8563_CLKOUT_1024 1 +#define HYM8563_CLKOUT_32 2 +#define HYM8563_CLKOUT_1 3 +#define HYM8563_CLKOUT_MASK 3 + +#define HYM8563_TMR_CTL 0x0e +#define HYM8563_TMR_CTL_ENABLE BIT(7) +#define HYM8563_TMR_CTL_4096 0 +#define HYM8563_TMR_CTL_64 1 +#define HYM8563_TMR_CTL_1 2 +#define HYM8563_TMR_CTL_1_60 3 +#define HYM8563_TMR_CTL_MASK 3 + +#define HYM8563_TMR_CNT 0x0f + +struct hym8563 { + struct i2c_client *client; + struct rtc_device *rtc; + bool valid; +#ifdef CONFIG_COMMON_CLK + struct clk_hw clkout_hw; +#endif +}; + +/* + * RTC handling + */ + +static int hym8563_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + struct i2c_client *client = to_i2c_client(dev); + struct hym8563 *hym8563 = i2c_get_clientdata(client); + u8 buf[7]; + int ret; + + if (!hym8563->valid) { + dev_warn(&client->dev, "no valid clock/calendar values available\n"); + return -EPERM; + } + + ret = i2c_smbus_read_i2c_block_data(client, HYM8563_SEC, 7, buf); + + tm->tm_sec = bcd2bin(buf[0] & HYM8563_SEC_MASK); + tm->tm_min = bcd2bin(buf[1] & HYM8563_MIN_MASK); + tm->tm_hour = bcd2bin(buf[2] & HYM8563_HOUR_MASK); + tm->tm_mday = bcd2bin(buf[3] & HYM8563_DAY_MASK); + tm->tm_wday = bcd2bin(buf[4] & HYM8563_WEEKDAY_MASK); /* 0 = Sun */ + tm->tm_mon = bcd2bin(buf[5] & HYM8563_MONTH_MASK) - 1; /* 0 = Jan */ + tm->tm_year = bcd2bin(buf[6]) + 100; + + return 0; +} + +static int hym8563_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + struct i2c_client *client = to_i2c_client(dev); + struct hym8563 *hym8563 = i2c_get_clientdata(client); + u8 buf[7]; + int ret; + + /* Years >= 2100 are to far in the future, 19XX is to early */ + if (tm->tm_year < 100 || tm->tm_year >= 200) + return -EINVAL; + + buf[0] = bin2bcd(tm->tm_sec); + buf[1] = bin2bcd(tm->tm_min); + buf[2] = bin2bcd(tm->tm_hour); + buf[3] = bin2bcd(tm->tm_mday); + buf[4] = bin2bcd(tm->tm_wday); + buf[5] = bin2bcd(tm->tm_mon + 1); + + /* + * While the HYM8563 has a century flag in the month register, + * it does not seem to carry it over a subsequent write/read. + * So we'll limit ourself to 100 years, starting at 2000 for now. + */ + buf[6] = tm->tm_year - 100; + + /* + * CTL1 only contains TEST-mode bits apart from stop, + * so no need to read the value first + */ + ret = i2c_smbus_write_byte_data(client, HYM8563_CTL1, + HYM8563_CTL1_STOP); + if (ret < 0) + return ret; + + ret = i2c_smbus_write_i2c_block_data(client, HYM8563_SEC, 7, buf); + if (ret < 0) + return ret; + + ret = i2c_smbus_write_byte_data(client, HYM8563_CTL1, 0); + if (ret < 0) + return ret; + + hym8563->valid = true; + + return 0; +} + +static int hym8563_rtc_alarm_irq_enable(struct device *dev, + unsigned int enabled) +{ + struct i2c_client *client = to_i2c_client(dev); + int data; + + data = i2c_smbus_read_byte_data(client, HYM8563_CTL2); + if (data < 0) + return data; + + if (enabled) + data |= HYM8563_CTL2_AIE; + else + data &= ~HYM8563_CTL2_AIE; + + return i2c_smbus_write_byte_data(client, HYM8563_CTL2, data); +}; + +static int hym8563_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm) +{ + struct i2c_client *client = to_i2c_client(dev); + struct rtc_time *alm_tm = &alm->time; + u8 buf[4]; + int ret; + + ret = i2c_smbus_read_i2c_block_data(client, HYM8563_ALM_MIN, 4, buf); + if (ret < 0) + return ret; + + /* The alarm only has a minute accuracy */ + alm_tm->tm_sec = -1; + + alm_tm->tm_min = (buf[0] & HYM8563_ALM_BIT_DISABLE) ? + -1 : + bcd2bin(buf[0] & HYM8563_MIN_MASK); + alm_tm->tm_hour = (buf[1] & HYM8563_ALM_BIT_DISABLE) ? + -1 : + bcd2bin(buf[1] & HYM8563_HOUR_MASK); + alm_tm->tm_mday = (buf[2] & HYM8563_ALM_BIT_DISABLE) ? + -1 : + bcd2bin(buf[2] & HYM8563_DAY_MASK); + alm_tm->tm_wday = (buf[3] & HYM8563_ALM_BIT_DISABLE) ? + -1 : + bcd2bin(buf[3] & HYM8563_WEEKDAY_MASK); + + alm_tm->tm_mon = -1; + alm_tm->tm_year = -1; + + ret = i2c_smbus_read_byte_data(client, HYM8563_CTL2); + if (ret < 0) + return ret; + + if (ret & HYM8563_CTL2_AIE) + alm->enabled = 1; + + return 0; +} + +static int hym8563_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm) +{ + struct i2c_client *client = to_i2c_client(dev); + struct rtc_time *alm_tm = &alm->time; + u8 buf[4]; + int ret; + + /* + * The alarm has no seconds so deal with it + */ + if (alm_tm->tm_sec) { + alm_tm->tm_sec = 0; + alm_tm->tm_min++; + if (alm_tm->tm_min >= 60) { + alm_tm->tm_min = 0; + alm_tm->tm_hour++; + if (alm_tm->tm_hour >= 24) { + alm_tm->tm_hour = 0; + alm_tm->tm_mday++; + if (alm_tm->tm_mday > 31) + alm_tm->tm_mday = 0; + } + } + } + + ret = i2c_smbus_read_byte_data(client, HYM8563_CTL2); + if (ret < 0) + return ret; + + ret &= ~HYM8563_CTL2_AIE; + + ret = i2c_smbus_write_byte_data(client, HYM8563_CTL2, ret); + if (ret < 0) + return ret; + + buf[0] = (alm_tm->tm_min < 60 && alm_tm->tm_min >= 0) ? + bin2bcd(alm_tm->tm_min) : HYM8563_ALM_BIT_DISABLE; + + buf[1] = (alm_tm->tm_hour < 24 && alm_tm->tm_hour >= 0) ? + bin2bcd(alm_tm->tm_hour) : HYM8563_ALM_BIT_DISABLE; + + buf[2] = (alm_tm->tm_mday <= 31 && alm_tm->tm_mday >= 1) ? + bin2bcd(alm_tm->tm_mday) : HYM8563_ALM_BIT_DISABLE; + + buf[3] = (alm_tm->tm_wday < 7 && alm_tm->tm_wday >= 0) ? + bin2bcd(alm_tm->tm_wday) : HYM8563_ALM_BIT_DISABLE; + + ret = i2c_smbus_write_i2c_block_data(client, HYM8563_ALM_MIN, 4, buf); + if (ret < 0) + return ret; + + return hym8563_rtc_alarm_irq_enable(dev, alm->enabled); +} + +static const struct rtc_class_ops hym8563_rtc_ops = { + .read_time = hym8563_rtc_read_time, + .set_time = hym8563_rtc_set_time, + .alarm_irq_enable = hym8563_rtc_alarm_irq_enable, + .read_alarm = hym8563_rtc_read_alarm, + .set_alarm = hym8563_rtc_set_alarm, +}; + +/* + * Handling of the clkout + */ + +#ifdef CONFIG_COMMON_CLK +#define clkout_hw_to_hym8563(_hw) container_of(_hw, struct hym8563, clkout_hw) + +static int clkout_rates[] = { + 32768, + 1024, + 32, + 1, +}; + +static unsigned long hym8563_clkout_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct hym8563 *hym8563 = clkout_hw_to_hym8563(hw); + struct i2c_client *client = hym8563->client; + int ret = i2c_smbus_read_byte_data(client, HYM8563_CLKOUT); + + if (ret < 0 || ret & HYM8563_CLKOUT_DISABLE) + return 0; + + ret &= HYM8563_CLKOUT_MASK; + return clkout_rates[ret]; +} + +static long hym8563_clkout_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(clkout_rates); i++) + if (clkout_rates[i] <= rate) + return clkout_rates[i]; + + return 0; +} + +static int hym8563_clkout_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct hym8563 *hym8563 = clkout_hw_to_hym8563(hw); + struct i2c_client *client = hym8563->client; + int ret = i2c_smbus_read_byte_data(client, HYM8563_CLKOUT); + int i; + + if (ret < 0) + return ret; + + for (i = 0; i < ARRAY_SIZE(clkout_rates); i++) + if (clkout_rates[i] == rate) { + ret &= ~HYM8563_CLKOUT_MASK; + ret |= i; + return i2c_smbus_write_byte_data(client, + HYM8563_CLKOUT, ret); + } + + return -EINVAL; +} + +static int hym8563_clkout_control(struct clk_hw *hw, bool enable) +{ + struct hym8563 *hym8563 = clkout_hw_to_hym8563(hw); + struct i2c_client *client = hym8563->client; + int ret = i2c_smbus_read_byte_data(client, HYM8563_CLKOUT); + + if (ret < 0) + return ret; + + if (enable) + ret &= ~HYM8563_CLKOUT_DISABLE; + else + ret |= HYM8563_CLKOUT_DISABLE; + + return i2c_smbus_write_byte_data(client, HYM8563_CLKOUT, ret); +} + +static int hym8563_clkout_prepare(struct clk_hw *hw) +{ + return hym8563_clkout_control(hw, 1); +} + +static void hym8563_clkout_unprepare(struct clk_hw *hw) +{ + hym8563_clkout_control(hw, 0); +} + +static int hym8563_clkout_is_prepared(struct clk_hw *hw) +{ + struct hym8563 *hym8563 = clkout_hw_to_hym8563(hw); + struct i2c_client *client = hym8563->client; + int ret = i2c_smbus_read_byte_data(client, HYM8563_CLKOUT); + + if (ret < 0) + return ret; + + return !(ret & HYM8563_CLKOUT_DISABLE); +} + +static const struct clk_ops hym8563_clkout_ops = { + .prepare = hym8563_clkout_prepare, + .unprepare = hym8563_clkout_unprepare, + .is_prepared = hym8563_clkout_is_prepared, + .recalc_rate = hym8563_clkout_recalc_rate, + .round_rate = hym8563_clkout_round_rate, + .set_rate = hym8563_clkout_set_rate, +}; + +static struct clk *hym8563_clkout_register_clk(struct hym8563 *hym8563) +{ + struct i2c_client *client = hym8563->client; + struct device_node *node = client->dev.of_node; + struct clk *clk; + struct clk_init_data init; + int ret; + + ret = i2c_smbus_write_byte_data(client, HYM8563_CLKOUT, + HYM8563_CLKOUT_DISABLE); + if (ret < 0) + return ERR_PTR(ret); + + init.name = "hym8563-clkout"; + init.ops = &hym8563_clkout_ops; + init.flags = CLK_IS_ROOT; + init.parent_names = NULL; + init.num_parents = 0; + hym8563->clkout_hw.init = &init; + + /* register the clock */ + clk = clk_register(&client->dev, &hym8563->clkout_hw); + + if (!IS_ERR(clk)) + of_clk_add_provider(node, of_clk_src_simple_get, clk); + + return clk; +} +#endif + +/* + * The alarm interrupt is implemented as a level-low interrupt in the + * hym8563, while the timer interrupt uses a falling edge. + * We don't use the timer at all, so the interrupt is requested to + * use the level-low trigger. + */ +static irqreturn_t hym8563_irq(int irq, void *dev_id) +{ + struct hym8563 *hym8563 = (struct hym8563 *)dev_id; + struct i2c_client *client = hym8563->client; + struct mutex *lock = &hym8563->rtc->ops_lock; + int data, ret; + + mutex_lock(lock); + + /* Clear the alarm flag */ + + data = i2c_smbus_read_byte_data(client, HYM8563_CTL2); + if (data < 0) { + dev_err(&client->dev, "%s: error reading i2c data %d\n", + __func__, data); + goto out; + } + + data &= ~HYM8563_CTL2_AF; + + ret = i2c_smbus_write_byte_data(client, HYM8563_CTL2, data); + if (ret < 0) { + dev_err(&client->dev, "%s: error writing i2c data %d\n", + __func__, ret); + } + +out: + mutex_unlock(lock); + return IRQ_HANDLED; +} + +static int hym8563_init_device(struct i2c_client *client) +{ + int ret; + + /* Clear stop flag if present */ + ret = i2c_smbus_write_byte_data(client, HYM8563_CTL1, 0); + if (ret < 0) + return ret; + + ret = i2c_smbus_read_byte_data(client, HYM8563_CTL2); + if (ret < 0) + return ret; + + /* Disable alarm and timer interrupts */ + ret &= ~HYM8563_CTL2_AIE; + ret &= ~HYM8563_CTL2_TIE; + + /* Clear any pending alarm and timer flags */ + if (ret & HYM8563_CTL2_AF) + ret &= ~HYM8563_CTL2_AF; + + if (ret & HYM8563_CTL2_TF) + ret &= ~HYM8563_CTL2_TF; + + ret &= ~HYM8563_CTL2_TI_TP; + + return i2c_smbus_write_byte_data(client, HYM8563_CTL2, ret); +} + +#ifdef CONFIG_PM_SLEEP +static int hym8563_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + int ret; + + if (device_may_wakeup(dev)) { + ret = enable_irq_wake(client->irq); + if (ret) { + dev_err(dev, "enable_irq_wake failed, %d\n", ret); + return ret; + } + } + + return 0; +} + +static int hym8563_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + if (device_may_wakeup(dev)) + disable_irq_wake(client->irq); + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(hym8563_pm_ops, hym8563_suspend, hym8563_resume); + +static int hym8563_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct hym8563 *hym8563; + int ret; + + hym8563 = devm_kzalloc(&client->dev, sizeof(*hym8563), GFP_KERNEL); + if (!hym8563) + return -ENOMEM; + + hym8563->client = client; + i2c_set_clientdata(client, hym8563); + + device_set_wakeup_capable(&client->dev, true); + + ret = hym8563_init_device(client); + if (ret) { + dev_err(&client->dev, "could not init device, %d\n", ret); + return ret; + } + + ret = devm_request_threaded_irq(&client->dev, client->irq, + NULL, hym8563_irq, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, + client->name, hym8563); + if (ret < 0) { + dev_err(&client->dev, "irq %d request failed, %d\n", + client->irq, ret); + return ret; + } + + /* check state of calendar information */ + ret = i2c_smbus_read_byte_data(client, HYM8563_SEC); + if (ret < 0) + return ret; + + hym8563->valid = !(ret & HYM8563_SEC_VL); + dev_dbg(&client->dev, "rtc information is %s\n", + hym8563->valid ? "valid" : "invalid"); + + hym8563->rtc = devm_rtc_device_register(&client->dev, client->name, + &hym8563_rtc_ops, THIS_MODULE); + if (IS_ERR(hym8563->rtc)) + return PTR_ERR(hym8563->rtc); + +#ifdef CONFIG_COMMON_CLK + hym8563_clkout_register_clk(hym8563); +#endif + + return 0; +} + +static const struct i2c_device_id hym8563_id[] = { + { "hym8563", 0 }, + {}, +}; +MODULE_DEVICE_TABLE(i2c, hym8563_id); + +static struct of_device_id hym8563_dt_idtable[] = { + { .compatible = "haoyu,hym8563" }, + {}, +}; +MODULE_DEVICE_TABLE(of, hym8563_dt_idtable); + +static struct i2c_driver hym8563_driver = { + .driver = { + .name = "rtc-hym8563", + .owner = THIS_MODULE, + .pm = &hym8563_pm_ops, + .of_match_table = hym8563_dt_idtable, + }, + .probe = hym8563_probe, + .id_table = hym8563_id, +}; + +module_i2c_driver(hym8563_driver); + +MODULE_AUTHOR("Heiko Stuebner <heiko@sntech.de>"); +MODULE_DESCRIPTION("HYM8563 RTC driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/rtc/rtc-max8907.c b/drivers/rtc/rtc-max8907.c index 8e45b3c..3032178 100644 --- a/drivers/rtc/rtc-max8907.c +++ b/drivers/rtc/rtc-max8907.c @@ -51,7 +51,7 @@ static irqreturn_t max8907_irq_handler(int irq, void *data) { struct max8907_rtc *rtc = data; - regmap_update_bits(rtc->regmap, MAX8907_REG_ALARM0_CNTL, 0x7f, 0); + regmap_write(rtc->regmap, MAX8907_REG_ALARM0_CNTL, 0); rtc_update_irq(rtc->rtc_dev, 1, RTC_IRQF | RTC_AF); @@ -64,7 +64,7 @@ static void regs_to_tm(u8 *regs, struct rtc_time *tm) bcd2bin(regs[RTC_YEAR1]) - 1900; tm->tm_mon = bcd2bin(regs[RTC_MONTH] & 0x1f) - 1; tm->tm_mday = bcd2bin(regs[RTC_DATE] & 0x3f); - tm->tm_wday = (regs[RTC_WEEKDAY] & 0x07) - 1; + tm->tm_wday = (regs[RTC_WEEKDAY] & 0x07); if (regs[RTC_HOUR] & HOUR_12) { tm->tm_hour = bcd2bin(regs[RTC_HOUR] & 0x01f); if (tm->tm_hour == 12) @@ -88,7 +88,7 @@ static void tm_to_regs(struct rtc_time *tm, u8 *regs) regs[RTC_YEAR1] = bin2bcd(low); regs[RTC_MONTH] = bin2bcd(tm->tm_mon + 1); regs[RTC_DATE] = bin2bcd(tm->tm_mday); - regs[RTC_WEEKDAY] = tm->tm_wday + 1; + regs[RTC_WEEKDAY] = tm->tm_wday; regs[RTC_HOUR] = bin2bcd(tm->tm_hour); regs[RTC_MIN] = bin2bcd(tm->tm_min); regs[RTC_SEC] = bin2bcd(tm->tm_sec); @@ -153,7 +153,7 @@ static int max8907_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) tm_to_regs(&alrm->time, regs); /* Disable alarm while we update the target time */ - ret = regmap_update_bits(rtc->regmap, MAX8907_REG_ALARM0_CNTL, 0x7f, 0); + ret = regmap_write(rtc->regmap, MAX8907_REG_ALARM0_CNTL, 0); if (ret < 0) return ret; @@ -163,8 +163,7 @@ static int max8907_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) return ret; if (alrm->enabled) - ret = regmap_update_bits(rtc->regmap, MAX8907_REG_ALARM0_CNTL, - 0x7f, 0x7f); + ret = regmap_write(rtc->regmap, MAX8907_REG_ALARM0_CNTL, 0x77); return ret; } diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c index 50c5726..419874f 100644 --- a/drivers/rtc/rtc-mxc.c +++ b/drivers/rtc/rtc-mxc.c @@ -391,11 +391,13 @@ static int mxc_rtc_probe(struct platform_device *pdev) pdata->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(pdata->clk)) { dev_err(&pdev->dev, "unable to get clock!\n"); - ret = PTR_ERR(pdata->clk); - goto exit_free_pdata; + return PTR_ERR(pdata->clk); } - clk_prepare_enable(pdata->clk); + ret = clk_prepare_enable(pdata->clk); + if (ret) + return ret; + rate = clk_get_rate(pdata->clk); if (rate == 32768) @@ -447,8 +449,6 @@ static int mxc_rtc_probe(struct platform_device *pdev) exit_put_clk: clk_disable_unprepare(pdata->clk); -exit_free_pdata: - return ret; } diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 1ee514a..9bd842e 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -197,10 +197,7 @@ static int pcf2127_probe(struct i2c_client *client, pcf2127_driver.driver.name, &pcf2127_rtc_ops, THIS_MODULE); - if (IS_ERR(pcf2127->rtc)) - return PTR_ERR(pcf2127->rtc); - - return 0; + return PTR_ERR_OR_ZERO(pcf2127->rtc); } static const struct i2c_device_id pcf2127_id[] = { diff --git a/drivers/rtc/rtc-rx8581.c b/drivers/rtc/rtc-rx8581.c index 00b0eb7..de8d9c4 100644 --- a/drivers/rtc/rtc-rx8581.c +++ b/drivers/rtc/rtc-rx8581.c @@ -52,8 +52,45 @@ #define RX8581_CTRL_STOP 0x02 /* STOP bit */ #define RX8581_CTRL_RESET 0x01 /* RESET bit */ +struct rx8581 { + struct i2c_client *client; + struct rtc_device *rtc; + s32 (*read_block_data)(const struct i2c_client *client, u8 command, + u8 length, u8 *values); + s32 (*write_block_data)(const struct i2c_client *client, u8 command, + u8 length, const u8 *values); +}; + static struct i2c_driver rx8581_driver; +static int rx8581_read_block_data(const struct i2c_client *client, u8 command, + u8 length, u8 *values) +{ + s32 i, data; + + for (i = 0; i < length; i++) { + data = i2c_smbus_read_byte_data(client, command + i); + if (data < 0) + return data; + values[i] = data; + } + return i; +} + +static int rx8581_write_block_data(const struct i2c_client *client, u8 command, + u8 length, const u8 *values) +{ + s32 i, ret; + + for (i = 0; i < length; i++) { + ret = i2c_smbus_write_byte_data(client, command + i, + values[i]); + if (ret < 0) + return ret; + } + return length; +} + /* * In the routines that deal directly with the rx8581 hardware, we use * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch. @@ -62,6 +99,7 @@ static int rx8581_get_datetime(struct i2c_client *client, struct rtc_time *tm) { unsigned char date[7]; int data, err; + struct rx8581 *rx8581 = i2c_get_clientdata(client); /* First we ensure that the "update flag" is not set, we read the * time and date then re-read the "update flag". If the update flag @@ -80,14 +118,13 @@ static int rx8581_get_datetime(struct i2c_client *client, struct rtc_time *tm) err = i2c_smbus_write_byte_data(client, RX8581_REG_FLAG, (data & ~RX8581_FLAG_UF)); if (err != 0) { - dev_err(&client->dev, "Unable to write device " - "flags\n"); + dev_err(&client->dev, "Unable to write device flags\n"); return -EIO; } } /* Now read time and date */ - err = i2c_smbus_read_i2c_block_data(client, RX8581_REG_SC, + err = rx8581->read_block_data(client, RX8581_REG_SC, 7, date); if (err < 0) { dev_err(&client->dev, "Unable to read date\n"); @@ -140,6 +177,7 @@ static int rx8581_set_datetime(struct i2c_client *client, struct rtc_time *tm) { int data, err; unsigned char buf[7]; + struct rx8581 *rx8581 = i2c_get_clientdata(client); dev_dbg(&client->dev, "%s: secs=%d, mins=%d, hours=%d, " "mday=%d, mon=%d, year=%d, wday=%d\n", @@ -176,7 +214,7 @@ static int rx8581_set_datetime(struct i2c_client *client, struct rtc_time *tm) } /* write register's data */ - err = i2c_smbus_write_i2c_block_data(client, RX8581_REG_SC, 7, buf); + err = rx8581->write_block_data(client, RX8581_REG_SC, 7, buf); if (err < 0) { dev_err(&client->dev, "Unable to write to date registers\n"); return -EIO; @@ -231,22 +269,39 @@ static const struct rtc_class_ops rx8581_rtc_ops = { static int rx8581_probe(struct i2c_client *client, const struct i2c_device_id *id) { - struct rtc_device *rtc; + struct rx8581 *rx8581; dev_dbg(&client->dev, "%s\n", __func__); - if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) - return -ENODEV; + if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA) + && !i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) + return -EIO; - dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n"); + rx8581 = devm_kzalloc(&client->dev, sizeof(struct rx8581), GFP_KERNEL); + if (!rx8581) + return -ENOMEM; - rtc = devm_rtc_device_register(&client->dev, rx8581_driver.driver.name, - &rx8581_rtc_ops, THIS_MODULE); + i2c_set_clientdata(client, rx8581); + rx8581->client = client; - if (IS_ERR(rtc)) - return PTR_ERR(rtc); + if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) { + rx8581->read_block_data = i2c_smbus_read_i2c_block_data; + rx8581->write_block_data = i2c_smbus_write_i2c_block_data; + } else { + rx8581->read_block_data = rx8581_read_block_data; + rx8581->write_block_data = rx8581_write_block_data; + } - i2c_set_clientdata(client, rtc); + dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n"); + + rx8581->rtc = devm_rtc_device_register(&client->dev, + rx8581_driver.driver.name, &rx8581_rtc_ops, THIS_MODULE); + + if (IS_ERR(rx8581->rtc)) { + dev_err(&client->dev, + "unable to register the class device\n"); + return PTR_ERR(rx8581->rtc); + } return 0; } diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c index ae8119d..476af93 100644 --- a/drivers/rtc/rtc-s5m.c +++ b/drivers/rtc/rtc-s5m.c @@ -639,6 +639,7 @@ static void s5m_rtc_shutdown(struct platform_device *pdev) s5m_rtc_enable_smpl(info, false); } +#ifdef CONFIG_PM_SLEEP static int s5m_rtc_resume(struct device *dev) { struct s5m_rtc_info *info = dev_get_drvdata(dev); @@ -660,6 +661,7 @@ static int s5m_rtc_suspend(struct device *dev) return ret; } +#endif /* CONFIG_PM_SLEEP */ static SIMPLE_DEV_PM_OPS(s5m_rtc_pm_ops, s5m_rtc_suspend, s5m_rtc_resume); diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c index c2e80d7..1915464 100644 --- a/drivers/rtc/rtc-twl.c +++ b/drivers/rtc/rtc-twl.c @@ -479,7 +479,7 @@ static int twl_rtc_probe(struct platform_device *pdev) u8 rd_reg; if (irq <= 0) - goto out1; + return ret; /* Initialize the register map */ if (twl_class_is_4030()) @@ -489,7 +489,7 @@ static int twl_rtc_probe(struct platform_device *pdev) ret = twl_rtc_read_u8(&rd_reg, REG_RTC_STATUS_REG); if (ret < 0) - goto out1; + return ret; if (rd_reg & BIT_RTC_STATUS_REG_POWER_UP_M) dev_warn(&pdev->dev, "Power up reset detected.\n"); @@ -500,7 +500,7 @@ static int twl_rtc_probe(struct platform_device *pdev) /* Clear RTC Power up reset and pending alarm interrupts */ ret = twl_rtc_write_u8(rd_reg, REG_RTC_STATUS_REG); if (ret < 0) - goto out1; + return ret; if (twl_class_is_6030()) { twl6030_interrupt_unmask(TWL6030_RTC_INT_MASK, @@ -512,7 +512,7 @@ static int twl_rtc_probe(struct platform_device *pdev) dev_info(&pdev->dev, "Enabling TWL-RTC\n"); ret = twl_rtc_write_u8(BIT_RTC_CTRL_REG_STOP_RTC_M, REG_RTC_CTRL_REG); if (ret < 0) - goto out1; + return ret; /* ensure interrupts are disabled, bootloaders can be strange */ ret = twl_rtc_write_u8(0, REG_RTC_INTERRUPTS_REG); @@ -522,34 +522,29 @@ static int twl_rtc_probe(struct platform_device *pdev) /* init cached IRQ enable bits */ ret = twl_rtc_read_u8(&rtc_irq_bits, REG_RTC_INTERRUPTS_REG); if (ret < 0) - goto out1; + return ret; device_init_wakeup(&pdev->dev, 1); - rtc = rtc_device_register(pdev->name, - &pdev->dev, &twl_rtc_ops, THIS_MODULE); + rtc = devm_rtc_device_register(&pdev->dev, pdev->name, + &twl_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) { - ret = PTR_ERR(rtc); dev_err(&pdev->dev, "can't register RTC device, err %ld\n", PTR_ERR(rtc)); - goto out1; + return PTR_ERR(rtc); } - ret = request_threaded_irq(irq, NULL, twl_rtc_interrupt, - IRQF_TRIGGER_RISING | IRQF_ONESHOT, - dev_name(&rtc->dev), rtc); + ret = devm_request_threaded_irq(&pdev->dev, irq, NULL, + twl_rtc_interrupt, + IRQF_TRIGGER_RISING | IRQF_ONESHOT, + dev_name(&rtc->dev), rtc); if (ret < 0) { dev_err(&pdev->dev, "IRQ is not free.\n"); - goto out2; + return ret; } platform_set_drvdata(pdev, rtc); return 0; - -out2: - rtc_device_unregister(rtc); -out1: - return ret; } /* @@ -559,9 +554,6 @@ out1: static int twl_rtc_remove(struct platform_device *pdev) { /* leave rtc running, but disable irqs */ - struct rtc_device *rtc = platform_get_drvdata(pdev); - int irq = platform_get_irq(pdev, 0); - mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_ALARM_M); mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M); if (twl_class_is_6030()) { @@ -571,10 +563,6 @@ static int twl_rtc_remove(struct platform_device *pdev) REG_INT_MSK_STS_A); } - - free_irq(irq, rtc); - - rtc_device_unregister(rtc); return 0; } diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c index aabc22c..88c9c92 100644 --- a/drivers/rtc/rtc-vr41xx.c +++ b/drivers/rtc/rtc-vr41xx.c @@ -293,7 +293,7 @@ static int rtc_probe(struct platform_device *pdev) if (!res) return -EBUSY; - rtc1_base = ioremap(res->start, resource_size(res)); + rtc1_base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); if (!rtc1_base) return -EBUSY; @@ -303,13 +303,14 @@ static int rtc_probe(struct platform_device *pdev) goto err_rtc1_iounmap; } - rtc2_base = ioremap(res->start, resource_size(res)); + rtc2_base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); if (!rtc2_base) { retval = -EBUSY; goto err_rtc1_iounmap; } - rtc = rtc_device_register(rtc_name, &pdev->dev, &vr41xx_rtc_ops, THIS_MODULE); + rtc = devm_rtc_device_register(&pdev->dev, rtc_name, &vr41xx_rtc_ops, + THIS_MODULE); if (IS_ERR(rtc)) { retval = PTR_ERR(rtc); goto err_iounmap_all; @@ -330,24 +331,24 @@ static int rtc_probe(struct platform_device *pdev) aie_irq = platform_get_irq(pdev, 0); if (aie_irq <= 0) { retval = -EBUSY; - goto err_device_unregister; + goto err_iounmap_all; } - retval = request_irq(aie_irq, elapsedtime_interrupt, 0, - "elapsed_time", pdev); + retval = devm_request_irq(&pdev->dev, aie_irq, elapsedtime_interrupt, 0, + "elapsed_time", pdev); if (retval < 0) - goto err_device_unregister; + goto err_iounmap_all; pie_irq = platform_get_irq(pdev, 1); if (pie_irq <= 0) { retval = -EBUSY; - goto err_free_irq; + goto err_iounmap_all; } - retval = request_irq(pie_irq, rtclong1_interrupt, 0, - "rtclong1", pdev); + retval = devm_request_irq(&pdev->dev, pie_irq, rtclong1_interrupt, 0, + "rtclong1", pdev); if (retval < 0) - goto err_free_irq; + goto err_iounmap_all; platform_set_drvdata(pdev, rtc); @@ -358,47 +359,20 @@ static int rtc_probe(struct platform_device *pdev) return 0; -err_free_irq: - free_irq(aie_irq, pdev); - -err_device_unregister: - rtc_device_unregister(rtc); - err_iounmap_all: - iounmap(rtc2_base); rtc2_base = NULL; err_rtc1_iounmap: - iounmap(rtc1_base); rtc1_base = NULL; return retval; } -static int rtc_remove(struct platform_device *pdev) -{ - struct rtc_device *rtc; - - rtc = platform_get_drvdata(pdev); - if (rtc) - rtc_device_unregister(rtc); - - free_irq(aie_irq, pdev); - free_irq(pie_irq, pdev); - if (rtc1_base) - iounmap(rtc1_base); - if (rtc2_base) - iounmap(rtc2_base); - - return 0; -} - /* work with hotplug and coldplug */ MODULE_ALIAS("platform:RTC"); static struct platform_driver rtc_platform_driver = { .probe = rtc_probe, - .remove = rtc_remove, .driver = { .name = rtc_name, .owner = THIS_MODULE, diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c index 12ca031..52108be 100644 --- a/drivers/video/aty/aty128fb.c +++ b/drivers/video/aty/aty128fb.c @@ -357,11 +357,13 @@ static int default_lcd_on = 1; static bool mtrr = true; #endif +#ifdef CONFIG_FB_ATY128_BACKLIGHT #ifdef CONFIG_PMAC_BACKLIGHT static int backlight = 1; #else static int backlight = 0; #endif +#endif /* PLL constants */ struct aty128_constants { @@ -1671,7 +1673,9 @@ static int aty128fb_setup(char *options) default_crt_on = simple_strtoul(this_opt+4, NULL, 0); continue; } else if (!strncmp(this_opt, "backlight:", 10)) { +#ifdef CONFIG_FB_ATY128_BACKLIGHT backlight = simple_strtoul(this_opt+10, NULL, 0); +#endif continue; } #ifdef CONFIG_MTRR diff --git a/drivers/video/backlight/hp680_bl.c b/drivers/video/backlight/hp680_bl.c index 00076ec..8ea42b8 100644 --- a/drivers/video/backlight/hp680_bl.c +++ b/drivers/video/backlight/hp680_bl.c @@ -110,8 +110,8 @@ static int hp680bl_probe(struct platform_device *pdev) memset(&props, 0, sizeof(struct backlight_properties)); props.type = BACKLIGHT_RAW; props.max_brightness = HP680_MAX_INTENSITY; - bd = backlight_device_register("hp680-bl", &pdev->dev, NULL, - &hp680bl_ops, &props); + bd = devm_backlight_device_register(&pdev->dev, "hp680-bl", &pdev->dev, + NULL, &hp680bl_ops, &props); if (IS_ERR(bd)) return PTR_ERR(bd); @@ -131,8 +131,6 @@ static int hp680bl_remove(struct platform_device *pdev) bd->props.power = 0; hp680bl_send_intensity(bd); - backlight_device_unregister(bd); - return 0; } diff --git a/drivers/video/backlight/jornada720_bl.c b/drivers/video/backlight/jornada720_bl.c index 3ccb893..6ce96b4 100644 --- a/drivers/video/backlight/jornada720_bl.c +++ b/drivers/video/backlight/jornada720_bl.c @@ -115,9 +115,10 @@ static int jornada_bl_probe(struct platform_device *pdev) memset(&props, 0, sizeof(struct backlight_properties)); props.type = BACKLIGHT_RAW; props.max_brightness = BL_MAX_BRIGHT; - bd = backlight_device_register(S1D_DEVICENAME, &pdev->dev, NULL, - &jornada_bl_ops, &props); + bd = devm_backlight_device_register(&pdev->dev, S1D_DEVICENAME, + &pdev->dev, NULL, &jornada_bl_ops, + &props); if (IS_ERR(bd)) { ret = PTR_ERR(bd); dev_err(&pdev->dev, "failed to register device, err=%x\n", ret); @@ -139,18 +140,8 @@ static int jornada_bl_probe(struct platform_device *pdev) return 0; } -static int jornada_bl_remove(struct platform_device *pdev) -{ - struct backlight_device *bd = platform_get_drvdata(pdev); - - backlight_device_unregister(bd); - - return 0; -} - static struct platform_driver jornada_bl_driver = { .probe = jornada_bl_probe, - .remove = jornada_bl_remove, .driver = { .name = "jornada_bl", }, diff --git a/drivers/video/backlight/jornada720_lcd.c b/drivers/video/backlight/jornada720_lcd.c index b061413..da3876c 100644 --- a/drivers/video/backlight/jornada720_lcd.c +++ b/drivers/video/backlight/jornada720_lcd.c @@ -100,7 +100,8 @@ static int jornada_lcd_probe(struct platform_device *pdev) struct lcd_device *lcd_device; int ret; - lcd_device = lcd_device_register(S1D_DEVICENAME, &pdev->dev, NULL, &jornada_lcd_props); + lcd_device = devm_lcd_device_register(&pdev->dev, S1D_DEVICENAME, + &pdev->dev, NULL, &jornada_lcd_props); if (IS_ERR(lcd_device)) { ret = PTR_ERR(lcd_device); @@ -119,18 +120,8 @@ static int jornada_lcd_probe(struct platform_device *pdev) return 0; } -static int jornada_lcd_remove(struct platform_device *pdev) -{ - struct lcd_device *lcd_device = platform_get_drvdata(pdev); - - lcd_device_unregister(lcd_device); - - return 0; -} - static struct platform_driver jornada_lcd_driver = { .probe = jornada_lcd_probe, - .remove = jornada_lcd_remove, .driver = { .name = "jornada_lcd", }, diff --git a/drivers/video/backlight/kb3886_bl.c b/drivers/video/backlight/kb3886_bl.c index 7592cc2..84a110a 100644 --- a/drivers/video/backlight/kb3886_bl.c +++ b/drivers/video/backlight/kb3886_bl.c @@ -78,7 +78,7 @@ static struct kb3886bl_machinfo *bl_machinfo; static unsigned long kb3886bl_flags; #define KB3886BL_SUSPENDED 0x01 -static struct dmi_system_id __initdata kb3886bl_device_table[] = { +static struct dmi_system_id kb3886bl_device_table[] __initdata = { { .ident = "Sahara Touch-iT", .matches = { diff --git a/drivers/video/backlight/l4f00242t03.c b/drivers/video/backlight/l4f00242t03.c index b5fc13b..63e7638 100644 --- a/drivers/video/backlight/l4f00242t03.c +++ b/drivers/video/backlight/l4f00242t03.c @@ -223,8 +223,8 @@ static int l4f00242t03_probe(struct spi_device *spi) return PTR_ERR(priv->core_reg); } - priv->ld = lcd_device_register("l4f00242t03", - &spi->dev, priv, &l4f_ops); + priv->ld = devm_lcd_device_register(&spi->dev, "l4f00242t03", &spi->dev, + priv, &l4f_ops); if (IS_ERR(priv->ld)) return PTR_ERR(priv->ld); @@ -243,8 +243,6 @@ static int l4f00242t03_remove(struct spi_device *spi) struct l4f00242t03_priv *priv = spi_get_drvdata(spi); l4f00242t03_lcd_power_set(priv->ld, FB_BLANK_POWERDOWN); - lcd_device_unregister(priv->ld); - return 0; } diff --git a/drivers/video/backlight/lp855x_bl.c b/drivers/video/backlight/lp855x_bl.c index cae80d5..2ca3a04 100644 --- a/drivers/video/backlight/lp855x_bl.c +++ b/drivers/video/backlight/lp855x_bl.c @@ -125,7 +125,7 @@ static bool lp855x_is_valid_rom_area(struct lp855x *lp, u8 addr) return false; } - return (addr >= start && addr <= end); + return addr >= start && addr <= end; } static int lp8557_bl_off(struct lp855x *lp) diff --git a/drivers/video/backlight/lp8788_bl.c b/drivers/video/backlight/lp8788_bl.c index e49905d..daba34d 100644 --- a/drivers/video/backlight/lp8788_bl.c +++ b/drivers/video/backlight/lp8788_bl.c @@ -63,13 +63,13 @@ static struct lp8788_bl_config default_bl_config = { static inline bool is_brightness_ctrl_by_pwm(enum lp8788_bl_ctrl_mode mode) { - return (mode == LP8788_BL_COMB_PWM_BASED); + return mode == LP8788_BL_COMB_PWM_BASED; } static inline bool is_brightness_ctrl_by_register(enum lp8788_bl_ctrl_mode mode) { - return (mode == LP8788_BL_REGISTER_ONLY || - mode == LP8788_BL_COMB_REGISTER_BASED); + return mode == LP8788_BL_REGISTER_ONLY || + mode == LP8788_BL_COMB_REGISTER_BASED; } static int lp8788_backlight_configure(struct lp8788_bl *bl) diff --git a/drivers/video/backlight/omap1_bl.c b/drivers/video/backlight/omap1_bl.c index ac11a46..a0dcd88 100644 --- a/drivers/video/backlight/omap1_bl.c +++ b/drivers/video/backlight/omap1_bl.c @@ -146,8 +146,8 @@ static int omapbl_probe(struct platform_device *pdev) memset(&props, 0, sizeof(struct backlight_properties)); props.type = BACKLIGHT_RAW; props.max_brightness = OMAPBL_MAX_INTENSITY; - dev = backlight_device_register("omap-bl", &pdev->dev, bl, &omapbl_ops, - &props); + dev = devm_backlight_device_register(&pdev->dev, "omap-bl", &pdev->dev, + bl, &omapbl_ops, &props); if (IS_ERR(dev)) return PTR_ERR(dev); @@ -170,20 +170,10 @@ static int omapbl_probe(struct platform_device *pdev) return 0; } -static int omapbl_remove(struct platform_device *pdev) -{ - struct backlight_device *dev = platform_get_drvdata(pdev); - - backlight_device_unregister(dev); - - return 0; -} - static SIMPLE_DEV_PM_OPS(omapbl_pm_ops, omapbl_suspend, omapbl_resume); static struct platform_driver omapbl_driver = { .probe = omapbl_probe, - .remove = omapbl_remove, .driver = { .name = "omap-bl", .pm = &omapbl_pm_ops, diff --git a/drivers/video/backlight/ot200_bl.c b/drivers/video/backlight/ot200_bl.c index fdbb6ee..f5a5202 100644 --- a/drivers/video/backlight/ot200_bl.c +++ b/drivers/video/backlight/ot200_bl.c @@ -118,8 +118,9 @@ static int ot200_backlight_probe(struct platform_device *pdev) props.brightness = 100; props.type = BACKLIGHT_RAW; - bl = backlight_device_register(dev_name(&pdev->dev), &pdev->dev, data, - &ot200_backlight_ops, &props); + bl = devm_backlight_device_register(&pdev->dev, dev_name(&pdev->dev), + &pdev->dev, data, &ot200_backlight_ops, + &props); if (IS_ERR(bl)) { dev_err(&pdev->dev, "failed to register backlight\n"); retval = PTR_ERR(bl); @@ -137,10 +138,6 @@ error_devm_kzalloc: static int ot200_backlight_remove(struct platform_device *pdev) { - struct backlight_device *bl = platform_get_drvdata(pdev); - - backlight_device_unregister(bl); - /* on module unload set brightness to 100% */ cs5535_mfgpt_write(pwm_timer, MFGPT_REG_COUNTER, 0); cs5535_mfgpt_write(pwm_timer, MFGPT_REG_SETUP, MFGPT_SETUP_CNTEN); diff --git a/drivers/video/backlight/tosa_bl.c b/drivers/video/backlight/tosa_bl.c index b8db933..3ad6765 100644 --- a/drivers/video/backlight/tosa_bl.c +++ b/drivers/video/backlight/tosa_bl.c @@ -105,8 +105,9 @@ static int tosa_bl_probe(struct i2c_client *client, memset(&props, 0, sizeof(struct backlight_properties)); props.type = BACKLIGHT_RAW; props.max_brightness = 512 - 1; - data->bl = backlight_device_register("tosa-bl", &client->dev, data, - &bl_ops, &props); + data->bl = devm_backlight_device_register(&client->dev, "tosa-bl", + &client->dev, data, &bl_ops, + &props); if (IS_ERR(data->bl)) { ret = PTR_ERR(data->bl); goto err_reg; @@ -128,9 +129,7 @@ static int tosa_bl_remove(struct i2c_client *client) { struct tosa_bl_data *data = i2c_get_clientdata(client); - backlight_device_unregister(data->bl); data->bl = NULL; - return 0; } diff --git a/drivers/video/backlight/tosa_lcd.c b/drivers/video/backlight/tosa_lcd.c index be5d636..f08d641 100644 --- a/drivers/video/backlight/tosa_lcd.c +++ b/drivers/video/backlight/tosa_lcd.c @@ -206,8 +206,8 @@ static int tosa_lcd_probe(struct spi_device *spi) tosa_lcd_tg_on(data); - data->lcd = lcd_device_register("tosa-lcd", &spi->dev, data, - &tosa_lcd_ops); + data->lcd = devm_lcd_device_register(&spi->dev, "tosa-lcd", &spi->dev, + data, &tosa_lcd_ops); if (IS_ERR(data->lcd)) { ret = PTR_ERR(data->lcd); @@ -226,8 +226,6 @@ static int tosa_lcd_remove(struct spi_device *spi) { struct tosa_lcd_data *data = spi_get_drvdata(spi); - lcd_device_unregister(data->lcd); - if (data->i2c) i2c_unregister_device(data->i2c); diff --git a/drivers/vlynq/vlynq.c b/drivers/vlynq/vlynq.c index 7b07135..c0227f9 100644 --- a/drivers/vlynq/vlynq.c +++ b/drivers/vlynq/vlynq.c @@ -762,7 +762,8 @@ static int vlynq_remove(struct platform_device *pdev) device_unregister(&dev->dev); iounmap(dev->local); - release_mem_region(dev->regs_start, dev->regs_end - dev->regs_start); + release_mem_region(dev->regs_start, + dev->regs_end - dev->regs_start + 1); kfree(dev); diff --git a/drivers/w1/masters/w1-gpio.c b/drivers/w1/masters/w1-gpio.c index e36b18b..9709b8b 100644 --- a/drivers/w1/masters/w1-gpio.c +++ b/drivers/w1/masters/w1-gpio.c @@ -18,10 +18,31 @@ #include <linux/of_gpio.h> #include <linux/err.h> #include <linux/of.h> +#include <linux/delay.h> #include "../w1.h" #include "../w1_int.h" +static u8 w1_gpio_set_pullup(void *data, int delay) +{ + struct w1_gpio_platform_data *pdata = data; + + if (delay) { + pdata->pullup_duration = delay; + } else { + if (pdata->pullup_duration) { + gpio_direction_output(pdata->pin, 1); + + msleep(pdata->pullup_duration); + + gpio_direction_input(pdata->pin); + } + pdata->pullup_duration = 0; + } + + return 0; +} + static void w1_gpio_write_bit_dir(void *data, u8 bit) { struct w1_gpio_platform_data *pdata = data; @@ -132,6 +153,7 @@ static int w1_gpio_probe(struct platform_device *pdev) } else { gpio_direction_input(pdata->pin); master->write_bit = w1_gpio_write_bit_dir; + master->set_pullup = w1_gpio_set_pullup; } err = w1_add_master_device(master); diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index 5a98649..590bd8a 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -117,18 +117,6 @@ int w1_add_master_device(struct w1_bus_master *master) printk(KERN_ERR "w1_add_master_device: invalid function set\n"); return(-EINVAL); } - /* While it would be electrically possible to make a device that - * generated a strong pullup in bit bang mode, only hardware that - * controls 1-wire time frames are even expected to support a strong - * pullup. w1_io.c would need to support calling set_pullup before - * the last write_bit operation of a w1_write_8 which it currently - * doesn't. - */ - if (!master->write_byte && !master->touch_bit && master->set_pullup) { - printk(KERN_ERR "w1_add_master_device: set_pullup requires " - "write_byte or touch_bit, disabling\n"); - master->set_pullup = NULL; - } /* Lock until the device is added (or not) to w1_masters. */ mutex_lock(&w1_mlock); diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 4218e26..acf3205 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -104,7 +104,7 @@ struct autofs_sb_info { u32 magic; int pipefd; struct file *pipe; - pid_t oz_pgrp; + struct pid *oz_pgrp; int catatonic; int version; int sub_version; @@ -140,7 +140,7 @@ static inline struct autofs_info *autofs4_dentry_ino(struct dentry *dentry) filesystem without "magic".) */ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) { - return sbi->catatonic || task_pgrp_nr(current) == sbi->oz_pgrp; + return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp; } /* Does a dentry have some pending activity? */ diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 1818ce7..3182c0e 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -346,6 +346,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp, { int pipefd; int err = 0; + struct pid *new_pid = NULL; if (param->setpipefd.pipefd == -1) return -EINVAL; @@ -357,7 +358,17 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp, mutex_unlock(&sbi->wq_mutex); return -EBUSY; } else { - struct file *pipe = fget(pipefd); + struct file *pipe; + + new_pid = get_task_pid(current, PIDTYPE_PGID); + + if (ns_of_pid(new_pid) != ns_of_pid(sbi->oz_pgrp)) { + AUTOFS_WARN("Not allowed to change PID namespace"); + err = -EINVAL; + goto out; + } + + pipe = fget(pipefd); if (!pipe) { err = -EBADF; goto out; @@ -367,12 +378,13 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp, fput(pipe); goto out; } - sbi->oz_pgrp = task_pgrp_nr(current); + swap(sbi->oz_pgrp, new_pid); sbi->pipefd = pipefd; sbi->pipe = pipe; sbi->catatonic = 0; } out: + put_pid(new_pid); mutex_unlock(&sbi->wq_mutex); return err; } diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 3d9d3f5..394e90b 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -402,6 +402,20 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, goto next; } + if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)) { + DPRINTK("checking symlink %p %.*s", + dentry, (int)dentry->d_name.len, dentry->d_name.name); + /* + * A symlink can't be "busy" in the usual sense so + * just check last used for expire timeout. + */ + if (autofs4_can_expire(dentry, timeout, do_now)) { + expired = dentry; + goto found; + } + goto next; + } + if (simple_empty(dentry)) goto next; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 3b9cc9b..d7bd395 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -56,8 +56,11 @@ void autofs4_kill_sb(struct super_block *sb) * just call kill_anon_super when we are called from * deactivate_super. */ - if (sbi) /* Free wait queues, close pipe */ + if (sbi) { + /* Free wait queues, close pipe */ autofs4_catatonic_mode(sbi); + put_pid(sbi->oz_pgrp); + } DPRINTK("shutting down"); kill_litter_super(sb); @@ -80,7 +83,7 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root) if (!gid_eq(root_inode->i_gid, GLOBAL_ROOT_GID)) seq_printf(m, ",gid=%u", from_kgid_munged(&init_user_ns, root_inode->i_gid)); - seq_printf(m, ",pgrp=%d", sbi->oz_pgrp); + seq_printf(m, ",pgrp=%d", pid_vnr(sbi->oz_pgrp)); seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ); seq_printf(m, ",minproto=%d", sbi->min_proto); seq_printf(m, ",maxproto=%d", sbi->max_proto); @@ -124,7 +127,8 @@ static const match_table_t tokens = { }; static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid, - pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto) + int *pgrp, bool *pgrp_set, unsigned int *type, + int *minproto, int *maxproto) { char *p; substring_t args[MAX_OPT_ARGS]; @@ -132,7 +136,6 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid, *uid = current_uid(); *gid = current_gid(); - *pgrp = task_pgrp_nr(current); *minproto = AUTOFS_MIN_PROTO_VERSION; *maxproto = AUTOFS_MAX_PROTO_VERSION; @@ -171,6 +174,7 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid, if (match_int(args, &option)) return 1; *pgrp = option; + *pgrp_set = true; break; case Opt_minproto: if (match_int(args, &option)) @@ -206,10 +210,13 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) int pipefd; struct autofs_sb_info *sbi; struct autofs_info *ino; + int pgrp; + bool pgrp_set = false; + int ret = -EINVAL; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) - goto fail_unlock; + return -ENOMEM; DPRINTK("starting up, sbi = %p",sbi); s->s_fs_info = sbi; @@ -218,7 +225,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) sbi->pipe = NULL; sbi->catatonic = 1; sbi->exp_timeout = 0; - sbi->oz_pgrp = task_pgrp_nr(current); + sbi->oz_pgrp = NULL; sbi->sb = s; sbi->version = 0; sbi->sub_version = 0; @@ -243,8 +250,10 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) * Get the root inode and dentry, but defer checking for errors. */ ino = autofs4_new_ino(sbi); - if (!ino) + if (!ino) { + ret = -ENOMEM; goto fail_free; + } root_inode = autofs4_get_inode(s, S_IFDIR | 0755); root = d_make_root(root_inode); if (!root) @@ -255,12 +264,23 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) /* Can this call block? */ if (parse_options(data, &pipefd, &root_inode->i_uid, &root_inode->i_gid, - &sbi->oz_pgrp, &sbi->type, &sbi->min_proto, - &sbi->max_proto)) { + &pgrp, &pgrp_set, &sbi->type, &sbi->min_proto, + &sbi->max_proto)) { printk("autofs: called with bogus options\n"); goto fail_dput; } + if (pgrp_set) { + sbi->oz_pgrp = find_get_pid(pgrp); + if (!sbi->oz_pgrp) { + pr_warn("autofs: could not find process group %d\n", + pgrp); + goto fail_dput; + } + } else { + sbi->oz_pgrp = get_task_pid(current, PIDTYPE_PGID); + } + if (autofs_type_trigger(sbi->type)) __managed_dentry_set_managed(root); @@ -284,14 +304,15 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) sbi->version = sbi->max_proto; sbi->sub_version = AUTOFS_PROTO_SUBVERSION; - DPRINTK("pipe fd = %d, pgrp = %u", pipefd, sbi->oz_pgrp); + DPRINTK("pipe fd = %d, pgrp = %u", pipefd, pid_nr(sbi->oz_pgrp)); pipe = fget(pipefd); - + if (!pipe) { printk("autofs: could not open pipe file descriptor\n"); goto fail_dput; } - if (autofs_prepare_pipe(pipe) < 0) + ret = autofs_prepare_pipe(pipe); + if (ret < 0) goto fail_fput; sbi->pipe = pipe; sbi->pipefd = pipefd; @@ -316,10 +337,10 @@ fail_dput: fail_ino: kfree(ino); fail_free: + put_pid(sbi->oz_pgrp); kfree(sbi); s->s_fs_info = NULL; -fail_unlock: - return -EINVAL; + return ret; } struct inode *autofs4_get_inode(struct super_block *sb, umode_t mode) diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 92ef341..2caf36a 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -558,7 +558,7 @@ static int autofs4_dir_symlink(struct inode *dir, dget(dentry); atomic_inc(&ino->count); p_ino = autofs4_dentry_ino(dentry->d_parent); - if (p_ino && dentry->d_parent != dentry) + if (p_ino && !IS_ROOT(dentry)) atomic_inc(&p_ino->count); dir->i_mtime = CURRENT_TIME; @@ -593,7 +593,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) if (atomic_dec_and_test(&ino->count)) { p_ino = autofs4_dentry_ino(dentry->d_parent); - if (p_ino && dentry->d_parent != dentry) + if (p_ino && !IS_ROOT(dentry)) atomic_dec(&p_ino->count); } dput(ino->dentry); @@ -732,7 +732,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t m dget(dentry); atomic_inc(&ino->count); p_ino = autofs4_dentry_ino(dentry->d_parent); - if (p_ino && dentry->d_parent != dentry) + if (p_ino && !IS_ROOT(dentry)) atomic_inc(&p_ino->count); inc_nlink(dir); dir->i_mtime = CURRENT_TIME; diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c index f27c094..1e8ea19 100644 --- a/fs/autofs4/symlink.c +++ b/fs/autofs4/symlink.c @@ -14,6 +14,10 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) { + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + struct autofs_info *ino = autofs4_dentry_ino(dentry); + if (ino && !autofs4_oz_mode(sbi)) + ino->last_used = jiffies; nd_set_link(nd, dentry->d_inode->i_private); return NULL; } diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 689e40d..116fd38 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -347,11 +347,23 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, struct qstr qstr; char *name; int status, ret, type; + pid_t pid; + pid_t tgid; /* In catatonic mode, we don't wait for nobody */ if (sbi->catatonic) return -ENOENT; + /* + * Try translating pids to the namespace of the daemon. + * + * Zero means failure: we are in an unrelated pid namespace. + */ + pid = task_pid_nr_ns(current, ns_of_pid(sbi->oz_pgrp)); + tgid = task_tgid_nr_ns(current, ns_of_pid(sbi->oz_pgrp)); + if (pid == 0 || tgid == 0) + return -ENOENT; + if (!dentry->d_inode) { /* * A wait for a negative dentry is invalid for certain @@ -417,8 +429,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, wq->ino = autofs4_get_ino(sbi); wq->uid = current_uid(); wq->gid = current_gid(); - wq->pid = current->pid; - wq->tgid = current->tgid; + wq->pid = pid; + wq->tgid = tgid; wq->status = -EINTR; /* Status return if interrupted */ wq->wait_ctr = 2; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 571a423..67be295 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -543,9 +543,6 @@ out: * libraries. There is no binary dependent code anywhere else. */ -#define INTERPRETER_NONE 0 -#define INTERPRETER_ELF 2 - #ifndef STACK_RND_MASK #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */ #endif diff --git a/fs/coredump.c b/fs/coredump.c index bc3fbcd..e3ad709 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -40,7 +40,6 @@ #include <trace/events/task.h> #include "internal.h" -#include "coredump.h" #include <trace/events/sched.h> diff --git a/fs/coredump.h b/fs/coredump.h deleted file mode 100644 index e39ff07..0000000 --- a/fs/coredump.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _FS_COREDUMP_H -#define _FS_COREDUMP_H - -extern int __get_dumpable(unsigned long mm_flags); - -#endif @@ -62,7 +62,6 @@ #include <trace/events/task.h> #include "internal.h" -#include "coredump.h" #include <trace/events/sched.h> @@ -843,7 +842,6 @@ static int exec_mmap(struct mm_struct *mm) tsk->active_mm = mm; activate_mm(active_mm, mm); task_unlock(tsk); - arch_pick_mmap_layout(mm); if (old_mm) { up_read(&old_mm->mmap_sem); BUG_ON(active_mm != old_mm); @@ -1088,8 +1086,8 @@ int flush_old_exec(struct linux_binprm * bprm) bprm->mm = NULL; /* We're using it now */ set_fs(USER_DS); - current->flags &= - ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | PF_NOFREEZE); + current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | + PF_NOFREEZE | PF_NO_SETAFFINITY); flush_thread(); current->personality &= ~bprm->per_clear; @@ -1139,9 +1137,7 @@ void setup_new_exec(struct linux_binprm * bprm) /* An exec changes our domain. We are no longer part of the thread group */ - current->self_exec_id++; - flush_signal_handlers(current, 0); do_close_on_exec(current->files); } @@ -1173,6 +1169,10 @@ void free_bprm(struct linux_binprm *bprm) mutex_unlock(¤t->signal->cred_guard_mutex); abort_creds(bprm->cred); } + if (bprm->file) { + allow_write_access(bprm->file); + fput(bprm->file); + } /* If a binfmt changed the interp, free it. */ if (bprm->interp != bprm->filename) kfree(bprm->interp); @@ -1224,11 +1224,10 @@ EXPORT_SYMBOL(install_exec_creds); * - the caller must hold ->cred_guard_mutex to protect against * PTRACE_ATTACH */ -static int check_unsafe_exec(struct linux_binprm *bprm) +static void check_unsafe_exec(struct linux_binprm *bprm) { struct task_struct *p = current, *t; unsigned n_fs; - int res = 0; if (p->ptrace) { if (p->ptrace & PT_PTRACE_CAP) @@ -1244,31 +1243,25 @@ static int check_unsafe_exec(struct linux_binprm *bprm) if (current->no_new_privs) bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS; + t = p; n_fs = 1; spin_lock(&p->fs->lock); rcu_read_lock(); - for (t = next_thread(p); t != p; t = next_thread(t)) { + while_each_thread(p, t) { if (t->fs == p->fs) n_fs++; } rcu_read_unlock(); - if (p->fs->users > n_fs) { + if (p->fs->users > n_fs) bprm->unsafe |= LSM_UNSAFE_SHARE; - } else { - res = -EAGAIN; - if (!p->fs->in_exec) { - p->fs->in_exec = 1; - res = 1; - } - } + else + p->fs->in_exec = 1; spin_unlock(&p->fs->lock); - - return res; } -/* - * Fill the binprm structure from the inode. +/* + * Fill the binprm structure from the inode. * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes * * This may be called multiple times for binary chains (scripts for example). @@ -1430,14 +1423,7 @@ static int exec_binprm(struct linux_binprm *bprm) audit_bprm(bprm); trace_sched_process_exec(current, old_pid, bprm); ptrace_event(PTRACE_EVENT_EXEC, old_vpid); - current->did_exec = 1; proc_exec_connector(current); - - if (bprm->file) { - allow_write_access(bprm->file); - fput(bprm->file); - bprm->file = NULL; /* to catch use-after-free */ - } } return ret; @@ -1453,7 +1439,6 @@ static int do_execve_common(const char *filename, struct linux_binprm *bprm; struct file *file; struct files_struct *displaced; - bool clear_in_exec; int retval; /* @@ -1485,10 +1470,7 @@ static int do_execve_common(const char *filename, if (retval) goto out_free; - retval = check_unsafe_exec(bprm); - if (retval < 0) - goto out_free; - clear_in_exec = retval; + check_unsafe_exec(bprm); current->in_execve = 1; file = open_exec(filename); @@ -1504,7 +1486,7 @@ static int do_execve_common(const char *filename, retval = bprm_mm_init(bprm); if (retval) - goto out_file; + goto out_unmark; bprm->argc = count(argv, MAX_ARG_STRINGS); if ((retval = bprm->argc) < 0) @@ -1551,15 +1533,8 @@ out: mmput(bprm->mm); } -out_file: - if (bprm->file) { - allow_write_access(bprm->file); - fput(bprm->file); - } - out_unmark: - if (clear_in_exec) - current->fs->in_exec = 0; + current->fs->in_exec = 0; current->in_execve = 0; out_free: @@ -1609,67 +1584,22 @@ void set_binfmt(struct linux_binfmt *new) if (new) __module_get(new->module); } - EXPORT_SYMBOL(set_binfmt); /* - * set_dumpable converts traditional three-value dumpable to two flags and - * stores them into mm->flags. It modifies lower two bits of mm->flags, but - * these bits are not changed atomically. So get_dumpable can observe the - * intermediate state. To avoid doing unexpected behavior, get get_dumpable - * return either old dumpable or new one by paying attention to the order of - * modifying the bits. - * - * dumpable | mm->flags (binary) - * old new | initial interim final - * ---------+----------------------- - * 0 1 | 00 01 01 - * 0 2 | 00 10(*) 11 - * 1 0 | 01 00 00 - * 1 2 | 01 11 11 - * 2 0 | 11 10(*) 00 - * 2 1 | 11 11 01 - * - * (*) get_dumpable regards interim value of 10 as 11. + * set_dumpable stores three-value SUID_DUMP_* into mm->flags. */ void set_dumpable(struct mm_struct *mm, int value) { - switch (value) { - case SUID_DUMP_DISABLE: - clear_bit(MMF_DUMPABLE, &mm->flags); - smp_wmb(); - clear_bit(MMF_DUMP_SECURELY, &mm->flags); - break; - case SUID_DUMP_USER: - set_bit(MMF_DUMPABLE, &mm->flags); - smp_wmb(); - clear_bit(MMF_DUMP_SECURELY, &mm->flags); - break; - case SUID_DUMP_ROOT: - set_bit(MMF_DUMP_SECURELY, &mm->flags); - smp_wmb(); - set_bit(MMF_DUMPABLE, &mm->flags); - break; - } -} - -int __get_dumpable(unsigned long mm_flags) -{ - int ret; + unsigned long old, new; - ret = mm_flags & MMF_DUMPABLE_MASK; - return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret; -} + if (WARN_ON((unsigned)value > SUID_DUMP_ROOT)) + return; -/* - * This returns the actual value of the suid_dumpable flag. For things - * that are using this for checking for privilege transitions, it must - * test against SUID_DUMP_USER rather than treating it as a boolean - * value. - */ -int get_dumpable(struct mm_struct *mm) -{ - return __get_dumpable(mm->flags); + do { + old = ACCESS_ONCE(mm->flags); + new = (old & ~MMF_DUMPABLE_MASK) | value; + } while (cmpxchg(&mm->flags, old, new) != old); } SYSCALL_DEFINE3(execve, diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index bafdd48..e66e480 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -309,43 +309,17 @@ struct fname { */ static void free_rb_tree_fname(struct rb_root *root) { - struct rb_node *n = root->rb_node; - struct rb_node *parent; - struct fname *fname; - - while (n) { - /* Do the node's children first */ - if (n->rb_left) { - n = n->rb_left; - continue; - } - if (n->rb_right) { - n = n->rb_right; - continue; - } - /* - * The node has no children; free it, and then zero - * out parent's link to it. Finally go to the - * beginning of the loop and try to free the parent - * node. - */ - parent = rb_parent(n); - fname = rb_entry(n, struct fname, rb_hash); - while (fname) { - struct fname * old = fname; + struct fname *fname, *next; + + rbtree_postorder_for_each_entry_safe(fname, next, root, rb_hash) + do { + struct fname *old = fname; fname = fname->next; - kfree (old); - } - if (!parent) - *root = RB_ROOT; - else if (parent->rb_left == n) - parent->rb_left = NULL; - else if (parent->rb_right == n) - parent->rb_right = NULL; - n = parent; - } -} + kfree(old); + } while (fname); + *root = RB_ROOT; +} static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp, loff_t pos) diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 3f11656..41eb9dc 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -180,37 +180,12 @@ int ext4_setup_system_zone(struct super_block *sb) /* Called when the filesystem is unmounted */ void ext4_release_system_zone(struct super_block *sb) { - struct rb_node *n = EXT4_SB(sb)->system_blks.rb_node; - struct rb_node *parent; - struct ext4_system_zone *entry; + struct ext4_system_zone *entry, *n; - while (n) { - /* Do the node's children first */ - if (n->rb_left) { - n = n->rb_left; - continue; - } - if (n->rb_right) { - n = n->rb_right; - continue; - } - /* - * The node has no children; free it, and then zero - * out parent's link to it. Finally go to the - * beginning of the loop and try to free the parent - * node. - */ - parent = rb_parent(n); - entry = rb_entry(n, struct ext4_system_zone, node); + rbtree_postorder_for_each_entry_safe(entry, n, + &EXT4_SB(sb)->system_blks, node) kmem_cache_free(ext4_system_zone_cachep, entry); - if (!parent) - EXT4_SB(sb)->system_blks = RB_ROOT; - else if (parent->rb_left == n) - parent->rb_left = NULL; - else if (parent->rb_right == n) - parent->rb_right = NULL; - n = parent; - } + EXT4_SB(sb)->system_blks = RB_ROOT; } diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 680bb33..d638c57 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -353,41 +353,16 @@ struct fname { */ static void free_rb_tree_fname(struct rb_root *root) { - struct rb_node *n = root->rb_node; - struct rb_node *parent; - struct fname *fname; - - while (n) { - /* Do the node's children first */ - if (n->rb_left) { - n = n->rb_left; - continue; - } - if (n->rb_right) { - n = n->rb_right; - continue; - } - /* - * The node has no children; free it, and then zero - * out parent's link to it. Finally go to the - * beginning of the loop and try to free the parent - * node. - */ - parent = rb_parent(n); - fname = rb_entry(n, struct fname, rb_hash); + struct fname *fname, *next; + + rbtree_postorder_for_each_entry_safe(fname, next, root, rb_hash) while (fname) { struct fname *old = fname; fname = fname->next; kfree(old); } - if (!parent) - *root = RB_ROOT; - else if (parent->rb_left == n) - parent->rb_left = NULL; - else if (parent->rb_right == n) - parent->rb_right = NULL; - n = parent; - } + + *root = RB_ROOT; } diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 37213d0..3ebda92 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -178,64 +178,6 @@ const struct dentry_operations hfsplus_dentry_operations = { .d_compare = hfsplus_compare_dentry, }; -static struct dentry *hfsplus_file_lookup(struct inode *dir, - struct dentry *dentry, unsigned int flags) -{ - struct hfs_find_data fd; - struct super_block *sb = dir->i_sb; - struct inode *inode = NULL; - struct hfsplus_inode_info *hip; - int err; - - if (HFSPLUS_IS_RSRC(dir) || strcmp(dentry->d_name.name, "rsrc")) - goto out; - - inode = HFSPLUS_I(dir)->rsrc_inode; - if (inode) - goto out; - - inode = new_inode(sb); - if (!inode) - return ERR_PTR(-ENOMEM); - - hip = HFSPLUS_I(inode); - inode->i_ino = dir->i_ino; - INIT_LIST_HEAD(&hip->open_dir_list); - mutex_init(&hip->extents_lock); - hip->extent_state = 0; - hip->flags = 0; - hip->userflags = 0; - set_bit(HFSPLUS_I_RSRC, &hip->flags); - - err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); - if (!err) { - err = hfsplus_find_cat(sb, dir->i_ino, &fd); - if (!err) - err = hfsplus_cat_read_inode(inode, &fd); - hfs_find_exit(&fd); - } - if (err) { - iput(inode); - return ERR_PTR(err); - } - hip->rsrc_inode = dir; - HFSPLUS_I(dir)->rsrc_inode = inode; - igrab(dir); - - /* - * __mark_inode_dirty expects inodes to be hashed. Since we don't - * want resource fork inodes in the regular inode space, we make them - * appear hashed, but do not put on any lists. hlist_del() - * will work fine and require no locking. - */ - hlist_add_fake(&inode->i_hash); - - mark_inode_dirty(inode); -out: - d_add(dentry, inode); - return NULL; -} - static void hfsplus_get_perms(struct inode *inode, struct hfsplus_perm *perms, int dir) { @@ -385,7 +327,6 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, } static const struct inode_operations hfsplus_file_inode_operations = { - .lookup = hfsplus_file_lookup, .setattr = hfsplus_setattr, .setxattr = generic_setxattr, .getxattr = generic_getxattr, diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c index 975a1f5..9a5449b 100644 --- a/fs/jffs2/nodelist.c +++ b/fs/jffs2/nodelist.c @@ -564,25 +564,10 @@ struct jffs2_node_frag *jffs2_lookup_node_frag(struct rb_root *fragtree, uint32_ they're killed. */ void jffs2_kill_fragtree(struct rb_root *root, struct jffs2_sb_info *c) { - struct jffs2_node_frag *frag; - struct jffs2_node_frag *parent; - - if (!root->rb_node) - return; + struct jffs2_node_frag *frag, *next; dbg_fragtree("killing\n"); - - frag = (rb_entry(root->rb_node, struct jffs2_node_frag, rb)); - while(frag) { - if (frag->rb.rb_left) { - frag = frag_left(frag); - continue; - } - if (frag->rb.rb_right) { - frag = frag_right(frag); - continue; - } - + rbtree_postorder_for_each_entry_safe(frag, next, root, rb) { if (frag->node && !(--frag->node->frags)) { /* Not a hole, and it's the final remaining frag of this node. Free the node */ @@ -591,17 +576,8 @@ void jffs2_kill_fragtree(struct rb_root *root, struct jffs2_sb_info *c) jffs2_free_full_dnode(frag->node); } - parent = frag_parent(frag); - if (parent) { - if (frag_left(parent) == frag) - parent->rb.rb_left = NULL; - else - parent->rb.rb_right = NULL; - } jffs2_free_node_frag(frag); - frag = parent; - cond_resched(); } } diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index ae81b01..386303d 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -543,33 +543,13 @@ static int jffs2_build_inode_fragtree(struct jffs2_sb_info *c, static void jffs2_free_tmp_dnode_info_list(struct rb_root *list) { - struct rb_node *this; - struct jffs2_tmp_dnode_info *tn; - - this = list->rb_node; + struct jffs2_tmp_dnode_info *tn, *next; - /* Now at bottom of tree */ - while (this) { - if (this->rb_left) - this = this->rb_left; - else if (this->rb_right) - this = this->rb_right; - else { - tn = rb_entry(this, struct jffs2_tmp_dnode_info, rb); + rbtree_postorder_for_each_entry_safe(tn, next, list, rb) { jffs2_free_full_dnode(tn->fn); jffs2_free_tmp_dnode_info(tn); - - this = rb_parent(this); - if (!this) - break; - - if (this->rb_left == &tn->rb) - this->rb_left = NULL; - else if (this->rb_right == &tn->rb) - this->rb_right = NULL; - else BUG(); - } } + *list = RB_ROOT; } diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index d448a77..7f9b096 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c @@ -62,7 +62,8 @@ static struct page *get_mapping_page(struct super_block *sb, pgoff_t index, page = read_cache_page(mapping, index, filler, sb); else { page = find_or_create_page(mapping, index, GFP_NOFS); - unlock_page(page); + if (page) + unlock_page(page); } return page; } diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index b44bdb29..2b34021 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -37,7 +37,26 @@ #include "sufile.h" #include "dat.h" - +/** + * nilfs_ioctl_wrap_copy - wrapping function of get/set metadata info + * @nilfs: nilfs object + * @argv: vector of arguments from userspace + * @dir: set of direction flags + * @dofunc: concrete function of get/set metadata info + * + * Description: nilfs_ioctl_wrap_copy() gets/sets metadata info by means of + * calling dofunc() function on the basis of @argv argument. + * + * Return Value: On success, 0 is returned and requested metadata info + * is copied into userspace. On error, one of the following + * negative error codes is returned. + * + * %-EINVAL - Invalid arguments from userspace. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EFAULT - Failure during execution of requested operation. + */ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, struct nilfs_argv *argv, int dir, ssize_t (*dofunc)(struct the_nilfs *, @@ -57,6 +76,14 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, if (argv->v_size > PAGE_SIZE) return -EINVAL; + /* + * Reject pairs of a start item position (argv->v_index) and a + * total count (argv->v_nmembs) which leads position 'pos' to + * overflow by the increment at the end of the loop. + */ + if (argv->v_index > ~(__u64)0 - argv->v_nmembs) + return -EINVAL; + buf = (void *)__get_free_pages(GFP_NOFS, 0); if (unlikely(!buf)) return -ENOMEM; @@ -99,6 +126,9 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, return ret; } +/** + * nilfs_ioctl_getflags - ioctl to support lsattr + */ static int nilfs_ioctl_getflags(struct inode *inode, void __user *argp) { unsigned int flags = NILFS_I(inode)->i_flags & FS_FL_USER_VISIBLE; @@ -106,6 +136,9 @@ static int nilfs_ioctl_getflags(struct inode *inode, void __user *argp) return put_user(flags, (int __user *)argp); } +/** + * nilfs_ioctl_setflags - ioctl to support chattr + */ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp, void __user *argp) { @@ -158,11 +191,33 @@ out: return ret; } +/** + * nilfs_ioctl_getversion - get info about a file's version (generation number) + */ static int nilfs_ioctl_getversion(struct inode *inode, void __user *argp) { return put_user(inode->i_generation, (int __user *)argp); } +/** + * nilfs_ioctl_change_cpmode - change checkpoint mode (checkpoint/snapshot) + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_change_cpmode() function changes mode of + * given checkpoint between checkpoint and snapshot state. This ioctl + * is used in chcp and mkcp utilities. + * + * Return Value: On success, 0 is returned and mode of a checkpoint is + * changed. On error, one of the following negative error codes + * is returned. + * + * %-EPERM - Operation not permitted. + * + * %-EFAULT - Failure during checkpoint mode changing. + */ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { @@ -198,6 +253,25 @@ out: return ret; } +/** + * nilfs_ioctl_delete_checkpoint - remove checkpoint + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_delete_checkpoint() function removes + * checkpoint from NILFS2 file system. This ioctl is used in rmcp + * utility. + * + * Return Value: On success, 0 is returned and a checkpoint is + * removed. On error, one of the following negative error codes + * is returned. + * + * %-EPERM - Operation not permitted. + * + * %-EFAULT - Failure during checkpoint removing. + */ static int nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) @@ -229,6 +303,21 @@ out: return ret; } +/** + * nilfs_ioctl_do_get_cpinfo - callback method getting info about checkpoints + * @nilfs: nilfs object + * @posp: pointer on array of checkpoint's numbers + * @flags: checkpoint mode (checkpoint or snapshot) + * @buf: buffer for storing checkponts' info + * @size: size in bytes of one checkpoint info item in array + * @nmembs: number of checkpoints in array (numbers and infos) + * + * Description: nilfs_ioctl_do_get_cpinfo() function returns info about + * requested checkpoints. The NILFS_IOCTL_GET_CPINFO ioctl is used in + * lscp utility and by nilfs_cleanerd daemon. + * + * Return value: count of nilfs_cpinfo structures in output buffer. + */ static ssize_t nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) @@ -242,6 +331,27 @@ nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, return ret; } +/** + * nilfs_ioctl_get_cpstat - get checkpoints statistics + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_get_cpstat() returns information about checkpoints. + * The NILFS_IOCTL_GET_CPSTAT ioctl is used by lscp, rmcp utilities + * and by nilfs_cleanerd daemon. + * + * Return Value: On success, 0 is returned, and checkpoints information is + * copied into userspace pointer @argp. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EFAULT - Failure during getting checkpoints statistics. + */ static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { @@ -260,6 +370,21 @@ static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp, return ret; } +/** + * nilfs_ioctl_do_get_suinfo - callback method getting segment usage info + * @nilfs: nilfs object + * @posp: pointer on array of segment numbers + * @flags: *not used* + * @buf: buffer for storing suinfo array + * @size: size in bytes of one suinfo item in array + * @nmembs: count of segment numbers and suinfos in array + * + * Description: nilfs_ioctl_do_get_suinfo() function returns segment usage + * info about requested segments. The NILFS_IOCTL_GET_SUINFO ioctl is used + * in lssu, nilfs_resize utilities and by nilfs_cleanerd daemon. + * + * Return value: count of nilfs_suinfo structures in output buffer. + */ static ssize_t nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) @@ -273,6 +398,27 @@ nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, return ret; } +/** + * nilfs_ioctl_get_sustat - get segment usage statistics + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_get_sustat() returns segment usage statistics. + * The NILFS_IOCTL_GET_SUSTAT ioctl is used in lssu, nilfs_resize utilities + * and by nilfs_cleanerd daemon. + * + * Return Value: On success, 0 is returned, and segment usage information is + * copied into userspace pointer @argp. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EFAULT - Failure during getting segment usage statistics. + */ static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { @@ -291,6 +437,21 @@ static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp, return ret; } +/** + * nilfs_ioctl_do_get_vinfo - callback method getting virtual blocks info + * @nilfs: nilfs object + * @posp: *not used* + * @flags: *not used* + * @buf: buffer for storing array of nilfs_vinfo structures + * @size: size in bytes of one vinfo item in array + * @nmembs: count of vinfos in array + * + * Description: nilfs_ioctl_do_get_vinfo() function returns information + * on virtual block addresses. The NILFS_IOCTL_GET_VINFO ioctl is used + * by nilfs_cleanerd daemon. + * + * Return value: count of nilfs_vinfo structures in output buffer. + */ static ssize_t nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) @@ -303,6 +464,21 @@ nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, return ret; } +/** + * nilfs_ioctl_do_get_bdescs - callback method getting disk block descriptors + * @nilfs: nilfs object + * @posp: *not used* + * @flags: *not used* + * @buf: buffer for storing array of nilfs_bdesc structures + * @size: size in bytes of one bdesc item in array + * @nmembs: count of bdescs in array + * + * Description: nilfs_ioctl_do_get_bdescs() function returns information + * about descriptors of disk block numbers. The NILFS_IOCTL_GET_BDESCS ioctl + * is used by nilfs_cleanerd daemon. + * + * Return value: count of nilfs_bdescs structures in output buffer. + */ static ssize_t nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) @@ -329,6 +505,29 @@ nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags, return nmembs; } +/** + * nilfs_ioctl_get_bdescs - get disk block descriptors + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_do_get_bdescs() function returns information + * about descriptors of disk block numbers. The NILFS_IOCTL_GET_BDESCS ioctl + * is used by nilfs_cleanerd daemon. + * + * Return Value: On success, 0 is returned, and disk block descriptors are + * copied into userspace pointer @argp. On error, one of the following + * negative error codes is returned. + * + * %-EINVAL - Invalid arguments from userspace. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EFAULT - Failure during getting disk block descriptors. + */ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { @@ -352,6 +551,26 @@ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, return ret; } +/** + * nilfs_ioctl_move_inode_block - prepare data/node block for moving by GC + * @inode: inode object + * @vdesc: descriptor of virtual block number + * @buffers: list of moving buffers + * + * Description: nilfs_ioctl_move_inode_block() function registers data/node + * buffer in the GC pagecache and submit read request. + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOENT - Requested block doesn't exist. + * + * %-EEXIST - Blocks conflict is detected. + */ static int nilfs_ioctl_move_inode_block(struct inode *inode, struct nilfs_vdesc *vdesc, struct list_head *buffers) @@ -397,6 +616,19 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode, return 0; } +/** + * nilfs_ioctl_move_blocks - move valid inode's blocks during garbage collection + * @sb: superblock object + * @argv: vector of arguments from userspace + * @buf: array of nilfs_vdesc structures + * + * Description: nilfs_ioctl_move_blocks() function reads valid data/node + * blocks that garbage collector specified with the array of nilfs_vdesc + * structures and stores them into page caches of GC inodes. + * + * Return Value: Number of processed nilfs_vdesc structures or + * error code, otherwise. + */ static int nilfs_ioctl_move_blocks(struct super_block *sb, struct nilfs_argv *argv, void *buf) { @@ -462,6 +694,25 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb, return ret; } +/** + * nilfs_ioctl_delete_checkpoints - delete checkpoints + * @nilfs: nilfs object + * @argv: vector of arguments from userspace + * @buf: array of periods of checkpoints numbers + * + * Description: nilfs_ioctl_delete_checkpoints() function deletes checkpoints + * in the period from p_start to p_end, excluding p_end itself. The checkpoints + * which have been already deleted are ignored. + * + * Return Value: Number of processed nilfs_period structures or + * error code, otherwise. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EINVAL - invalid checkpoints. + */ static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, struct nilfs_argv *argv, void *buf) { @@ -479,6 +730,24 @@ static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, return nmembs; } +/** + * nilfs_ioctl_free_vblocknrs - free virtual block numbers + * @nilfs: nilfs object + * @argv: vector of arguments from userspace + * @buf: array of virtual block numbers + * + * Description: nilfs_ioctl_free_vblocknrs() function frees + * the virtual block numbers specified by @buf and @argv->v_nmembs. + * + * Return Value: Number of processed virtual block numbers or + * error code, otherwise. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-ENOENT - The virtual block number have not been allocated. + */ static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, struct nilfs_argv *argv, void *buf) { @@ -490,6 +759,24 @@ static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, return (ret < 0) ? ret : nmembs; } +/** + * nilfs_ioctl_mark_blocks_dirty - mark blocks dirty + * @nilfs: nilfs object + * @argv: vector of arguments from userspace + * @buf: array of block descriptors + * + * Description: nilfs_ioctl_mark_blocks_dirty() function marks + * metadata file or data blocks as dirty. + * + * Return Value: Number of processed block descriptors or + * error code, otherwise. + * + * %-ENOMEM - Insufficient memory available. + * + * %-EIO - I/O error + * + * %-ENOENT - the specified block does not exist (hole block) + */ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, struct nilfs_argv *argv, void *buf) { @@ -571,6 +858,20 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, return ret; } +/** + * nilfs_ioctl_clean_segments - clean segments + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_clean_segments() function makes garbage + * collection operation in the environment of requested parameters + * from userspace. The NILFS_IOCTL_CLEAN_SEGMENTS ioctl is used by + * nilfs_cleanerd daemon. + * + * Return Value: On success, 0 is returned or error code, otherwise. + */ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { @@ -682,6 +983,33 @@ out: return ret; } +/** + * nilfs_ioctl_sync - make a checkpoint + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * + * Description: nilfs_ioctl_sync() function constructs a logical segment + * for checkpointing. This function guarantees that all modified data + * and metadata are written out to the device when it successfully + * returned. + * + * Return Value: On success, 0 is retured. On errors, one of the following + * negative error code is returned. + * + * %-EROFS - Read only filesystem. + * + * %-EIO - I/O error + * + * %-ENOSPC - No space left on device (only in a panic state). + * + * %-ERESTARTSYS - Interrupted. + * + * %-ENOMEM - Insufficient memory available. + * + * %-EFAULT - Failure during execution of requested operation. + */ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { @@ -710,6 +1038,14 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, return 0; } +/** + * nilfs_ioctl_resize - resize NILFS2 volume + * @inode: inode object + * @filp: file object + * @argp: pointer on argument from userspace + * + * Return Value: On success, 0 is returned or error code, otherwise. + */ static int nilfs_ioctl_resize(struct inode *inode, struct file *filp, void __user *argp) { @@ -735,6 +1071,17 @@ out: return ret; } +/** + * nilfs_ioctl_set_alloc_range - limit range of segments to be allocated + * @inode: inode object + * @argp: pointer on argument from userspace + * + * Decription: nilfs_ioctl_set_alloc_range() function defines lower limit + * of segments in bytes and upper limit of segments in bytes. + * The NILFS_IOCTL_SET_ALLOC_RANGE is used by nilfs_resize utility. + * + * Return Value: On success, 0 is returned or error code, otherwise. + */ static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp) { struct the_nilfs *nilfs = inode->i_sb->s_fs_info; @@ -767,6 +1114,28 @@ out: return ret; } +/** + * nilfs_ioctl_get_info - wrapping function of get metadata info + * @inode: inode object + * @filp: file object + * @cmd: ioctl's request code + * @argp: pointer on argument from userspace + * @membsz: size of an item in bytes + * @dofunc: concrete function of getting metadata info + * + * Description: nilfs_ioctl_get_info() gets metadata info by means of + * calling dofunc() function. + * + * Return Value: On success, 0 is returned and requested metadata info + * is copied into userspace. On error, one of the following + * negative error codes is returned. + * + * %-EINVAL - Invalid arguments from userspace. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EFAULT - Failure during execution of requested operation. + */ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp, size_t membsz, @@ -663,10 +663,11 @@ out: wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } - if (ret > 0) { + if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) { int err = file_update_time(filp); if (err) ret = err; + sb_end_write(file_inode(filp)->i_sb); } return ret; } diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 021e7c0..551e61b 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -149,8 +149,6 @@ posix_acl_valid(const struct posix_acl *acl) { const struct posix_acl_entry *pa, *pe; int state = ACL_USER_OBJ; - kuid_t prev_uid = INVALID_UID; - kgid_t prev_gid = INVALID_GID; int needs_mask = 0; FOREACH_ACL_ENTRY(pa, acl, pe) { @@ -169,10 +167,6 @@ posix_acl_valid(const struct posix_acl *acl) return -EINVAL; if (!uid_valid(pa->e_uid)) return -EINVAL; - if (uid_valid(prev_uid) && - uid_lte(pa->e_uid, prev_uid)) - return -EINVAL; - prev_uid = pa->e_uid; needs_mask = 1; break; @@ -188,10 +182,6 @@ posix_acl_valid(const struct posix_acl *acl) return -EINVAL; if (!gid_valid(pa->e_gid)) return -EINVAL; - if (gid_valid(prev_gid) && - gid_lte(pa->e_gid, prev_gid)) - return -EINVAL; - prev_gid = pa->e_gid; needs_mask = 1; break; diff --git a/fs/proc/array.c b/fs/proc/array.c index 1bd2077..656e401 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -140,24 +140,15 @@ static const char * const task_state_array[] = { "t (tracing stop)", /* 8 */ "Z (zombie)", /* 16 */ "X (dead)", /* 32 */ - "x (dead)", /* 64 */ - "K (wakekill)", /* 128 */ - "W (waking)", /* 256 */ - "P (parked)", /* 512 */ }; static inline const char *get_task_state(struct task_struct *tsk) { - unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state; - const char * const *p = &task_state_array[0]; + unsigned int state = (tsk->state | tsk->exit_state) & TASK_REPORT; - BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array)); + BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != ARRAY_SIZE(task_state_array)-1); - while (state) { - p++; - state >>= 1; - } - return *p; + return task_state_array[fls(state)]; } static inline void task_state(struct seq_file *m, struct pid_namespace *ns, @@ -453,8 +444,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, min_flt += t->min_flt; maj_flt += t->maj_flt; gtime += task_gtime(t); - t = next_thread(t); - } while (t != task); + } while_each_thread(task, t); min_flt += sig->min_flt; maj_flt += sig->maj_flt; diff --git a/fs/proc/base.c b/fs/proc/base.c index 03c8d74..5150706 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1658,13 +1658,18 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags) return 0; } +static inline bool proc_inode_is_dead(struct inode *inode) +{ + return !proc_pid(inode)->tasks[PIDTYPE_PID].first; +} + int pid_delete_dentry(const struct dentry *dentry) { /* Is the task we represent dead? * If so, then don't put the dentry on the lru list, * kill it immediately. */ - return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; + return proc_inode_is_dead(dentry->d_inode); } const struct dentry_operations pid_dentry_operations = @@ -3092,34 +3097,42 @@ out_no_task: * In the case of a seek we start with the leader and walk nr * threads past it. */ -static struct task_struct *first_tid(struct task_struct *leader, - int tid, int nr, struct pid_namespace *ns) +static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos, + struct pid_namespace *ns) { - struct task_struct *pos; + struct task_struct *pos, *task; + unsigned long nr = f_pos; + + if (nr != f_pos) /* 32bit overflow? */ + return NULL; rcu_read_lock(); - /* Attempt to start with the pid of a thread */ - if (tid && (nr > 0)) { + task = pid_task(pid, PIDTYPE_PID); + if (!task) + goto fail; + + /* Attempt to start with the tid of a thread */ + if (tid && nr) { pos = find_task_by_pid_ns(tid, ns); - if (pos && (pos->group_leader == leader)) + if (pos && same_thread_group(pos, task)) goto found; } /* If nr exceeds the number of threads there is nothing todo */ - pos = NULL; - if (nr && nr >= get_nr_threads(leader)) - goto out; + if (nr >= get_nr_threads(task)) + goto fail; /* If we haven't found our starting place yet start * with the leader and walk nr threads forward. */ - for (pos = leader; nr > 0; --nr) { - pos = next_thread(pos); - if (pos == leader) { - pos = NULL; - goto out; - } - } + pos = task = task->group_leader; + do { + if (!nr--) + goto found; + } while_each_thread(task, pos); +fail: + pos = NULL; + goto out; found: get_task_struct(pos); out: @@ -3152,25 +3165,16 @@ static struct task_struct *next_tid(struct task_struct *start) /* for the /proc/TGID/task/ directories */ static int proc_task_readdir(struct file *file, struct dir_context *ctx) { - struct task_struct *leader = NULL; - struct task_struct *task = get_proc_task(file_inode(file)); + struct inode *inode = file_inode(file); + struct task_struct *task; struct pid_namespace *ns; int tid; - if (!task) - return -ENOENT; - rcu_read_lock(); - if (pid_alive(task)) { - leader = task->group_leader; - get_task_struct(leader); - } - rcu_read_unlock(); - put_task_struct(task); - if (!leader) + if (proc_inode_is_dead(inode)) return -ENOENT; if (!dir_emit_dots(file, ctx)) - goto out; + return 0; /* f_version caches the tgid value that the last readdir call couldn't * return. lseek aka telldir automagically resets f_version to 0. @@ -3178,7 +3182,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) ns = file->f_dentry->d_sb->s_fs_info; tid = (int)file->f_version; file->f_version = 0; - for (task = first_tid(leader, tid, ctx->pos - 2, ns); + for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); task; task = next_tid(task), ctx->pos++) { char name[PROC_NUMBUF]; @@ -3194,8 +3198,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) break; } } -out: - put_task_struct(leader); + return 0; } diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c index 82676e3..cbd82df 100644 --- a/fs/proc/cmdline.c +++ b/fs/proc/cmdline.c @@ -26,4 +26,4 @@ static int __init proc_cmdline_init(void) proc_create("cmdline", 0, NULL, &cmdline_proc_fops); return 0; } -module_init(proc_cmdline_init); +fs_initcall(proc_cmdline_init); diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c index 51942d5..290ba85 100644 --- a/fs/proc/consoles.c +++ b/fs/proc/consoles.c @@ -109,4 +109,4 @@ static int __init proc_consoles_init(void) proc_create("consoles", 0, NULL, &proc_consoles_operations); return 0; } -module_init(proc_consoles_init); +fs_initcall(proc_consoles_init); diff --git a/fs/proc/cpuinfo.c b/fs/proc/cpuinfo.c index 5a1e539..06f4d31 100644 --- a/fs/proc/cpuinfo.c +++ b/fs/proc/cpuinfo.c @@ -21,4 +21,4 @@ static int __init proc_cpuinfo_init(void) proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); return 0; } -module_init(proc_cpuinfo_init); +fs_initcall(proc_cpuinfo_init); diff --git a/fs/proc/devices.c b/fs/proc/devices.c index b143471..50493ed 100644 --- a/fs/proc/devices.c +++ b/fs/proc/devices.c @@ -67,4 +67,4 @@ static int __init proc_devices_init(void) proc_create("devices", 0, NULL, &proc_devinfo_operations); return 0; } -module_init(proc_devices_init); +fs_initcall(proc_devices_init); diff --git a/fs/proc/generic.c b/fs/proc/generic.c index cca93b6..b7f268e 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -49,8 +49,7 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) setattr_copy(inode, iattr); mark_inode_dirty(inode); - de->uid = inode->i_uid; - de->gid = inode->i_gid; + proc_set_user(de, inode->i_uid, inode->i_gid); de->mode = inode->i_mode; return 0; } diff --git a/fs/proc/interrupts.c b/fs/proc/interrupts.c index 05029c0..a352d57 100644 --- a/fs/proc/interrupts.c +++ b/fs/proc/interrupts.c @@ -50,4 +50,4 @@ static int __init proc_interrupts_init(void) proc_create("interrupts", 0, NULL, &proc_interrupts_operations); return 0; } -module_init(proc_interrupts_init); +fs_initcall(proc_interrupts_init); diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 5ed0e52..39e6ef3 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -639,4 +639,4 @@ static int __init proc_kcore_init(void) return 0; } -module_init(proc_kcore_init); +fs_initcall(proc_kcore_init); diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c index bdfabda..05f8dcd 100644 --- a/fs/proc/kmsg.c +++ b/fs/proc/kmsg.c @@ -61,4 +61,4 @@ static int __init proc_kmsg_init(void) proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); return 0; } -module_init(proc_kmsg_init); +fs_initcall(proc_kmsg_init); diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c index 1afa4dd..aec66e6 100644 --- a/fs/proc/loadavg.c +++ b/fs/proc/loadavg.c @@ -42,4 +42,4 @@ static int __init proc_loadavg_init(void) proc_create("loadavg", 0, NULL, &loadavg_proc_fops); return 0; } -module_init(proc_loadavg_init); +fs_initcall(proc_loadavg_init); diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 24270ec..136e548 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -220,4 +220,4 @@ static int __init proc_meminfo_init(void) proc_create("meminfo", 0, NULL, &meminfo_proc_fops); return 0; } -module_init(proc_meminfo_init); +fs_initcall(proc_meminfo_init); diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 5f9bc8a..d4a3574 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -131,4 +131,4 @@ static int __init proc_nommu_init(void) return 0; } -module_init(proc_nommu_init); +fs_initcall(proc_nommu_init); diff --git a/fs/proc/page.c b/fs/proc/page.c index b8730d9..02174a6 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -118,10 +118,12 @@ u64 stable_page_flags(struct page *page) /* * PageTransCompound can be true for non-huge compound pages (slab * pages or pages allocated by drivers with __GFP_COMP) because it - * just checks PG_head/PG_tail, so we need to check PageLRU to make - * sure a given page is a thp, not a non-huge compound page. + * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon + * to make sure a given page is a thp, not a non-huge compound page. */ - else if (PageTransCompound(page) && PageLRU(compound_trans_head(page))) + else if (PageTransCompound(page) && + (PageLRU(compound_trans_head(page)) || + PageAnon(compound_trans_head(page)))) u |= 1 << KPF_THP; /* @@ -217,4 +219,4 @@ static int __init proc_page_init(void) proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations); return 0; } -module_init(proc_page_init); +fs_initcall(proc_page_init); diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index 70779b2..c82dd51 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -74,9 +74,9 @@ __proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp, return NULL; if (!strncmp(name, "security-", 9)) - ent->size = 0; /* don't leak number of password chars */ + proc_set_size(ent, 0); /* don't leak number of password chars */ else - ent->size = pp->length; + proc_set_size(ent, pp->length); return ent; } @@ -232,6 +232,7 @@ void __init proc_device_tree_init(void) return; root = of_find_node_by_path("/"); if (root == NULL) { + remove_proc_entry("device-tree", NULL); pr_debug("/proc/device-tree: can't find root\n"); return; } diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c index 62604be..ad8a77f 100644 --- a/fs/proc/softirqs.c +++ b/fs/proc/softirqs.c @@ -41,4 +41,4 @@ static int __init proc_softirqs_init(void) proc_create("softirqs", 0, NULL, &proc_softirqs_operations); return 0; } -module_init(proc_softirqs_init); +fs_initcall(proc_softirqs_init); diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 1cf86c0..6f599c6 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -221,4 +221,4 @@ static int __init proc_stat_init(void) proc_create("stat", 0, NULL, &proc_stat_operations); return 0; } -module_init(proc_stat_init); +fs_initcall(proc_stat_init); diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 0618946..7141b8d 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -49,4 +49,4 @@ static int __init proc_uptime_init(void) proc_create("uptime", 0, NULL, &uptime_proc_fops); return 0; } -module_init(proc_uptime_init); +fs_initcall(proc_uptime_init); diff --git a/fs/proc/version.c b/fs/proc/version.c index 76817a6..d2154eb 100644 --- a/fs/proc/version.c +++ b/fs/proc/version.c @@ -31,4 +31,4 @@ static int __init proc_version_init(void) proc_create("version", 0, NULL, &version_proc_fops); return 0; } -module_init(proc_version_init); +fs_initcall(proc_version_init); diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 9100d69..2ca7ba0 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -1082,7 +1082,7 @@ static int __init vmcore_init(void) proc_vmcore->size = vmcore_size; return 0; } -module_init(vmcore_init) +fs_initcall(vmcore_init); /* Cleanup function for vmcore module. */ void vmcore_cleanup(void) diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 439406e..7be26f0 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -234,17 +234,12 @@ static int mounts_open_common(struct inode *inode, struct file *file, rcu_read_lock(); nsp = task_nsproxy(task); - if (!nsp) { + if (!nsp || !nsp->mnt_ns) { rcu_read_unlock(); put_task_struct(task); goto err; } ns = nsp->mnt_ns; - if (!ns) { - rcu_read_unlock(); - put_task_struct(task); - goto err; - } get_mnt_ns(ns); rcu_read_unlock(); task_lock(task); diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index 4884ac5..1e56a4e 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c @@ -30,13 +30,6 @@ #include "internal.h" -const struct address_space_operations ramfs_aops = { - .readpage = simple_readpage, - .write_begin = simple_write_begin, - .write_end = simple_write_end, - .set_page_dirty = __set_page_dirty_no_writeback, -}; - const struct file_operations ramfs_file_operations = { .read = do_sync_read, .aio_read = generic_file_aio_read, diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 8d5b438..0b3d8e4 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -27,13 +27,12 @@ #include "internal.h" static int ramfs_nommu_setattr(struct dentry *, struct iattr *); - -const struct address_space_operations ramfs_aops = { - .readpage = simple_readpage, - .write_begin = simple_write_begin, - .write_end = simple_write_end, - .set_page_dirty = __set_page_dirty_no_writeback, -}; +static unsigned long ramfs_nommu_get_unmapped_area(struct file *file, + unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags); +static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma); const struct file_operations ramfs_file_operations = { .mmap = ramfs_nommu_mmap, @@ -197,7 +196,7 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia) * - the pages to be mapped must exist * - the pages be physically contiguous in sequence */ -unsigned long ramfs_nommu_get_unmapped_area(struct file *file, +static unsigned long ramfs_nommu_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { @@ -256,7 +255,7 @@ out: /* * set up a mapping for shared memory segments */ -int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma) +static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma) { if (!(vma->vm_flags & VM_SHARED)) return -ENOSYS; diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 6a3e2c4..d365b1c 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -43,6 +43,13 @@ static const struct super_operations ramfs_ops; static const struct inode_operations ramfs_dir_inode_operations; +static const struct address_space_operations ramfs_aops = { + .readpage = simple_readpage, + .write_begin = simple_write_begin, + .write_end = simple_write_end, + .set_page_dirty = __set_page_dirty_no_writeback, +}; + static struct backing_dev_info ramfs_backing_dev_info = { .name = "ramfs", .ra_pages = 0, /* No readahead */ diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h index 6b33063..a9d8ae8 100644 --- a/fs/ramfs/internal.h +++ b/fs/ramfs/internal.h @@ -10,5 +10,4 @@ */ -extern const struct address_space_operations ramfs_aops; extern const struct inode_operations ramfs_file_inode_operations; diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index f8adaee..dfb617b 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -1958,8 +1958,6 @@ struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,} #define MAX_US_INT 0xffff // reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset -#define U32_MAX (~(__u32)0) - static inline loff_t max_reiserfs_offset(struct inode *inode) { if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5) diff --git a/fs/romfs/super.c b/fs/romfs/super.c index ff1d3d4..d841878 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -533,16 +533,14 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent) root = romfs_iget(sb, pos); if (IS_ERR(root)) - goto error; + return PTR_ERR(root); sb->s_root = d_make_root(root); if (!sb->s_root) - goto error; + return -ENOMEM; return 0; -error: - return -EINVAL; error_rsb_inval: ret = -EINVAL; error_rsb: diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index cc1febd..5157b86 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2118,26 +2118,10 @@ out_free: */ static void free_inodes(struct fsck_data *fsckd) { - struct rb_node *this = fsckd->inodes.rb_node; - struct fsck_inode *fscki; + struct fsck_inode *fscki, *n; - while (this) { - if (this->rb_left) - this = this->rb_left; - else if (this->rb_right) - this = this->rb_right; - else { - fscki = rb_entry(this, struct fsck_inode, rb); - this = rb_parent(this); - if (this) { - if (this->rb_left == &fscki->rb) - this->rb_left = NULL; - else - this->rb_right = NULL; - } - kfree(fscki); - } - } + rbtree_postorder_for_each_entry_safe(fscki, n, &fsckd->inodes, rb) + kfree(fscki); } /** diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index 36bd4ef..a902c59 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c @@ -574,27 +574,10 @@ static int done_already(struct rb_root *done_tree, int lnum) */ static void destroy_done_tree(struct rb_root *done_tree) { - struct rb_node *this = done_tree->rb_node; - struct done_ref *dr; + struct done_ref *dr, *n; - while (this) { - if (this->rb_left) { - this = this->rb_left; - continue; - } else if (this->rb_right) { - this = this->rb_right; - continue; - } - dr = rb_entry(this, struct done_ref, rb); - this = rb_parent(this); - if (this) { - if (this->rb_left == &dr->rb) - this->rb_left = NULL; - else - this->rb_right = NULL; - } + rbtree_postorder_for_each_entry_safe(dr, n, done_tree, rb) kfree(dr); - } } /** diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index ba32da3..f1c3e5a1 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -815,27 +815,10 @@ static int dbg_find_check_orphan(struct rb_root *root, ino_t inum) static void dbg_free_check_tree(struct rb_root *root) { - struct rb_node *this = root->rb_node; - struct check_orphan *o; + struct check_orphan *o, *n; - while (this) { - if (this->rb_left) { - this = this->rb_left; - continue; - } else if (this->rb_right) { - this = this->rb_right; - continue; - } - o = rb_entry(this, struct check_orphan, rb); - this = rb_parent(this); - if (this) { - if (this->rb_left == &o->rb) - this->rb_left = NULL; - else - this->rb_right = NULL; - } + rbtree_postorder_for_each_entry_safe(o, n, root, rb) kfree(o); - } } static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr, diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 065096e..c14adb2 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -1335,29 +1335,14 @@ static void remove_ino(struct ubifs_info *c, ino_t inum) */ void ubifs_destroy_size_tree(struct ubifs_info *c) { - struct rb_node *this = c->size_tree.rb_node; - struct size_entry *e; + struct size_entry *e, *n; - while (this) { - if (this->rb_left) { - this = this->rb_left; - continue; - } else if (this->rb_right) { - this = this->rb_right; - continue; - } - e = rb_entry(this, struct size_entry, rb); + rbtree_postorder_for_each_entry_safe(e, n, &c->size_tree, rb) { if (e->inode) iput(e->inode); - this = rb_parent(this); - if (this) { - if (this->rb_left == &e->rb) - this->rb_left = NULL; - else - this->rb_right = NULL; - } kfree(e); } + c->size_tree = RB_ROOT; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index f69daa5..5ded849 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -873,26 +873,10 @@ static void free_orphans(struct ubifs_info *c) */ static void free_buds(struct ubifs_info *c) { - struct rb_node *this = c->buds.rb_node; - struct ubifs_bud *bud; - - while (this) { - if (this->rb_left) - this = this->rb_left; - else if (this->rb_right) - this = this->rb_right; - else { - bud = rb_entry(this, struct ubifs_bud, rb); - this = rb_parent(this); - if (this) { - if (this->rb_left == &bud->rb) - this->rb_left = NULL; - else - this->rb_right = NULL; - } - kfree(bud); - } - } + struct ubifs_bud *bud, *n; + + rbtree_postorder_for_each_entry_safe(bud, n, &c->buds, rb) + kfree(bud); } /** diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 349f31a..9083bc7 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -178,27 +178,11 @@ static int ins_clr_old_idx_znode(struct ubifs_info *c, */ void destroy_old_idx(struct ubifs_info *c) { - struct rb_node *this = c->old_idx.rb_node; - struct ubifs_old_idx *old_idx; + struct ubifs_old_idx *old_idx, *n; - while (this) { - if (this->rb_left) { - this = this->rb_left; - continue; - } else if (this->rb_right) { - this = this->rb_right; - continue; - } - old_idx = rb_entry(this, struct ubifs_old_idx, rb); - this = rb_parent(this); - if (this) { - if (this->rb_left == &old_idx->rb) - this->rb_left = NULL; - else - this->rb_right = NULL; - } + rbtree_postorder_for_each_entry_safe(old_idx, n, &c->old_idx, rb) kfree(old_idx); - } + c->old_idx = RB_ROOT; } diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h new file mode 100644 index 0000000..5a64ca4 --- /dev/null +++ b/include/asm-generic/fixmap.h @@ -0,0 +1,97 @@ +/* + * fixmap.h: compile-time virtual memory allocation + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998 Ingo Molnar + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + * x86_32 and x86_64 integration by Gustavo F. Padovan, February 2009 + * Break out common bits to asm-generic by Mark Salter, November 2013 + */ + +#ifndef __ASM_GENERIC_FIXMAP_H +#define __ASM_GENERIC_FIXMAP_H + +#include <linux/bug.h> + +#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) +#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) + +#ifndef __ASSEMBLY__ +/* + * 'index to address' translation. If anyone tries to use the idx + * directly without translation, we catch the bug with a NULL-deference + * kernel oops. Illegal ranges of incoming indices are caught too. + */ +static __always_inline unsigned long fix_to_virt(const unsigned int idx) +{ + BUILD_BUG_ON(idx >= __end_of_fixed_addresses); + return __fix_to_virt(idx); +} + +static inline unsigned long virt_to_fix(const unsigned long vaddr) +{ + BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); + return __virt_to_fix(vaddr); +} + +/* + * Provide some reasonable defaults for page flags. + * Not all architectures use all of these different types and some + * architectures use different names. + */ +#ifndef FIXMAP_PAGE_NORMAL +#define FIXMAP_PAGE_NORMAL PAGE_KERNEL +#endif +#ifndef FIXMAP_PAGE_NOCACHE +#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NOCACHE +#endif +#ifndef FIXMAP_PAGE_IO +#define FIXMAP_PAGE_IO PAGE_KERNEL_IO +#endif +#ifndef FIXMAP_PAGE_CLEAR +#define FIXMAP_PAGE_CLEAR __pgprot(0) +#endif + +#ifndef set_fixmap +#define set_fixmap(idx, phys) \ + __set_fixmap(idx, phys, FIXMAP_PAGE_NORMAL) +#endif + +#ifndef clear_fixmap +#define clear_fixmap(idx) \ + __set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR) +#endif + +/* Return a pointer with offset calculated */ +#define __set_fixmap_offset(idx, phys, flags) \ +({ \ + unsigned long addr; \ + __set_fixmap(idx, phys, flags); \ + addr = fix_to_virt(idx) + ((phys) & (PAGE_SIZE - 1)); \ + addr; \ +}) + +#define set_fixmap_offset(idx, phys) \ + __set_fixmap_offset(idx, phys, FIXMAP_PAGE_NORMAL) + +/* + * Some hardware wants to get fixmapped without caching. + */ +#define set_fixmap_nocache(idx, phys) \ + __set_fixmap(idx, phys, FIXMAP_PAGE_NOCACHE) + +#define set_fixmap_offset_nocache(idx, phys) \ + __set_fixmap_offset(idx, phys, FIXMAP_PAGE_NOCACHE) + +/* + * Some fixmaps are for IO + */ +#define set_fixmap_io(idx, phys) \ + __set_fixmap(idx, phys, FIXMAP_PAGE_IO) + +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_GENERIC_FIXMAP_H */ diff --git a/include/asm-generic/int-l64.h b/include/asm-generic/int-l64.h deleted file mode 100644 index 27d4ec0..0000000 --- a/include/asm-generic/int-l64.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * asm-generic/int-l64.h - * - * Integer declarations for architectures which use "long" - * for 64-bit types. - */ -#ifndef _ASM_GENERIC_INT_L64_H -#define _ASM_GENERIC_INT_L64_H - -#include <uapi/asm-generic/int-l64.h> - - -#ifndef __ASSEMBLY__ - -typedef signed char s8; -typedef unsigned char u8; - -typedef signed short s16; -typedef unsigned short u16; - -typedef signed int s32; -typedef unsigned int u32; - -typedef signed long s64; -typedef unsigned long u64; - -#define S8_C(x) x -#define U8_C(x) x ## U -#define S16_C(x) x -#define U16_C(x) x ## U -#define S32_C(x) x -#define U32_C(x) x ## U -#define S64_C(x) x ## L -#define U64_C(x) x ## UL - -#else /* __ASSEMBLY__ */ - -#define S8_C(x) x -#define U8_C(x) x -#define S16_C(x) x -#define U16_C(x) x -#define S32_C(x) x -#define U32_C(x) x -#define S64_C(x) x -#define U64_C(x) x - -#endif /* __ASSEMBLY__ */ - -#endif /* _ASM_GENERIC_INT_L64_H */ diff --git a/include/linux/cache.h b/include/linux/cache.h index 4c570653..17e7e82 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -1,11 +1,11 @@ #ifndef __LINUX_CACHE_H #define __LINUX_CACHE_H -#include <linux/kernel.h> +#include <uapi/linux/kernel.h> #include <asm/cache.h> #ifndef L1_CACHE_ALIGN -#define L1_CACHE_ALIGN(x) ALIGN(x, L1_CACHE_BYTES) +#define L1_CACHE_ALIGN(x) __ALIGN_KERNEL(x, L1_CACHE_BYTES) #endif #ifndef SMP_CACHE_BYTES diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index 0442c3d..a6ef9cc 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -8,23 +8,6 @@ #include <linux/ceph/types.h> -/* This seemed to be the easiest place to define these */ - -#define U8_MAX ((u8)(~0U)) -#define U16_MAX ((u16)(~0U)) -#define U32_MAX ((u32)(~0U)) -#define U64_MAX ((u64)(~0ULL)) - -#define S8_MAX ((s8)(U8_MAX >> 1)) -#define S16_MAX ((s16)(U16_MAX >> 1)) -#define S32_MAX ((s32)(U32_MAX >> 1)) -#define S64_MAX ((s64)(U64_MAX >> 1LL)) - -#define S8_MIN ((s8)(-S8_MAX - 1)) -#define S16_MIN ((s16)(-S16_MAX - 1)) -#define S32_MIN ((s32)(-S32_MAX - 1)) -#define S64_MIN ((s64)(-S64_MAX - 1LL)) - /* * in all cases, * void **p pointer to position pointer diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 1eda33d..1c2fdaa 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -30,6 +30,8 @@ #ifndef __GENALLOC_H__ #define __GENALLOC_H__ +#include <linux/spinlock_types.h> + struct device; struct device_node; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 9b4dd49..0437439 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -1,6 +1,7 @@ #ifndef __LINUX_GFP_H #define __LINUX_GFP_H +#include <linux/mmdebug.h> #include <linux/mmzone.h> #include <linux/stddef.h> #include <linux/linkage.h> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d01cc97..8c43cc4 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -2,6 +2,7 @@ #define _LINUX_HUGETLB_H #include <linux/mm_types.h> +#include <linux/mmdebug.h> #include <linux/fs.h> #include <linux/hugetlb_inline.h> #include <linux/cgroup.h> @@ -354,7 +355,7 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, static inline struct hstate *page_hstate(struct page *page) { - VM_BUG_ON(!PageHuge(page)); + VM_BUG_ON_PAGE(!PageHuge(page), page); return size_to_hstate(PAGE_SIZE << compound_order(page)); } diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index ce8217f..787bba3 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -15,6 +15,7 @@ #ifndef _LINUX_HUGETLB_CGROUP_H #define _LINUX_HUGETLB_CGROUP_H +#include <linux/mmdebug.h> #include <linux/res_counter.h> struct hugetlb_cgroup; @@ -28,7 +29,7 @@ struct hugetlb_cgroup; static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) { - VM_BUG_ON(!PageHuge(page)); + VM_BUG_ON_PAGE(!PageHuge(page), page); if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) return NULL; @@ -38,7 +39,7 @@ static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) static inline int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) { - VM_BUG_ON(!PageHuge(page)); + VM_BUG_ON_PAGE(!PageHuge(page), page); if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) return -1; diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2aa3d4b0..f74bb58 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -29,6 +29,19 @@ #define ULLONG_MAX (~0ULL) #define SIZE_MAX (~(size_t)0) +#define U8_MAX ((u8)~0U) +#define S8_MAX ((s8)(U8_MAX>>1)) +#define S8_MIN ((s8)(-S8_MAX - 1)) +#define U16_MAX ((u16)~0U) +#define S16_MAX ((s16)(U16_MAX>>1)) +#define S16_MIN ((s16)(-S16_MAX - 1)) +#define U32_MAX ((u32)~0U) +#define S32_MAX ((s32)(U32_MAX>>1)) +#define S32_MIN ((s32)(-S32_MAX - 1)) +#define U64_MAX ((u64)~0ULL) +#define S64_MAX ((s64)(U64_MAX>>1)) +#define S64_MIN ((s64)(-S64_MAX - 1)) + #define STACK_MAGIC 0xdeadbeef #define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 5fd33dc..6d4066c 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -170,6 +170,7 @@ unsigned long paddr_vmcoreinfo_note(void); extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; +extern int kexec_load_disabled; #ifndef kexec_flush_icache_page #define kexec_flush_icache_page(page) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index cd0274b..1ef6636 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -61,6 +61,7 @@ phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align, phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align); phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr); +phys_addr_t get_allocated_memblock_memory_regions_info(phys_addr_t *addr); void memblock_allow_resize(void); int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid); int memblock_add(phys_addr_t base, phys_addr_t size); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b3e7a66..abd0113 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -497,10 +497,11 @@ void __memcg_kmem_commit_charge(struct page *page, void __memcg_kmem_uncharge_pages(struct page *page, int order); int memcg_cache_id(struct mem_cgroup *memcg); -int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s, - struct kmem_cache *root_cache); -void memcg_release_cache(struct kmem_cache *cachep); -void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep); +int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s, + struct kmem_cache *root_cache); +void memcg_free_cache_params(struct kmem_cache *s); +void memcg_register_cache(struct kmem_cache *s); +void memcg_unregister_cache(struct kmem_cache *s); int memcg_update_cache_size(struct kmem_cache *s, int num_groups); void memcg_update_array_size(int num_groups); @@ -640,19 +641,21 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg) return -1; } -static inline int -memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s, - struct kmem_cache *root_cache) +static inline int memcg_alloc_cache_params(struct mem_cgroup *memcg, + struct kmem_cache *s, struct kmem_cache *root_cache) { return 0; } -static inline void memcg_release_cache(struct kmem_cache *cachep) +static inline void memcg_free_cache_params(struct kmem_cache *s) +{ +} + +static inline void memcg_register_cache(struct kmem_cache *s) { } -static inline void memcg_cache_list_add(struct mem_cgroup *memcg, - struct kmem_cache *s) +static inline void memcg_unregister_cache(struct kmem_cache *s) { } diff --git a/include/linux/mm.h b/include/linux/mm.h index a512dd8..d9992fc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -5,6 +5,7 @@ #ifdef __KERNEL__ +#include <linux/mmdebug.h> #include <linux/gfp.h> #include <linux/bug.h> #include <linux/list.h> @@ -303,7 +304,7 @@ static inline int get_freepage_migratetype(struct page *page) */ static inline int put_page_testzero(struct page *page) { - VM_BUG_ON(atomic_read(&page->_count) == 0); + VM_BUG_ON_PAGE(atomic_read(&page->_count) == 0, page); return atomic_dec_and_test(&page->_count); } @@ -364,7 +365,7 @@ static inline int is_vmalloc_or_module_addr(const void *x) static inline void compound_lock(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - VM_BUG_ON(PageSlab(page)); + VM_BUG_ON_PAGE(PageSlab(page), page); bit_spin_lock(PG_compound_lock, &page->flags); #endif } @@ -372,7 +373,7 @@ static inline void compound_lock(struct page *page) static inline void compound_unlock(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - VM_BUG_ON(PageSlab(page)); + VM_BUG_ON_PAGE(PageSlab(page), page); bit_spin_unlock(PG_compound_lock, &page->flags); #endif } @@ -447,7 +448,7 @@ static inline bool __compound_tail_refcounted(struct page *page) */ static inline bool compound_tail_refcounted(struct page *page) { - VM_BUG_ON(!PageHead(page)); + VM_BUG_ON_PAGE(!PageHead(page), page); return __compound_tail_refcounted(page); } @@ -456,9 +457,9 @@ static inline void get_huge_page_tail(struct page *page) /* * __split_huge_page_refcount() cannot run from under us. */ - VM_BUG_ON(!PageTail(page)); - VM_BUG_ON(page_mapcount(page) < 0); - VM_BUG_ON(atomic_read(&page->_count) != 0); + VM_BUG_ON_PAGE(!PageTail(page), page); + VM_BUG_ON_PAGE(page_mapcount(page) < 0, page); + VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page); if (compound_tail_refcounted(page->first_page)) atomic_inc(&page->_mapcount); } @@ -474,7 +475,7 @@ static inline void get_page(struct page *page) * Getting a normal page or the head of a compound page * requires to already have an elevated page->_count. */ - VM_BUG_ON(atomic_read(&page->_count) <= 0); + VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page); atomic_inc(&page->_count); } @@ -511,13 +512,13 @@ static inline int PageBuddy(struct page *page) static inline void __SetPageBuddy(struct page *page) { - VM_BUG_ON(atomic_read(&page->_mapcount) != -1); + VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page); atomic_set(&page->_mapcount, PAGE_BUDDY_MAPCOUNT_VALUE); } static inline void __ClearPageBuddy(struct page *page) { - VM_BUG_ON(!PageBuddy(page)); + VM_BUG_ON_PAGE(!PageBuddy(page), page); atomic_set(&page->_mapcount, -1); } @@ -1401,7 +1402,7 @@ static inline bool ptlock_init(struct page *page) * slab code uses page->slab_cache and page->first_page (for tail * pages), which share storage with page->ptl. */ - VM_BUG_ON(*(unsigned long *)&page->ptl); + VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page); if (!ptlock_alloc(page)) return false; spin_lock_init(ptlock_ptr(page)); @@ -1492,7 +1493,7 @@ static inline bool pgtable_pmd_page_ctor(struct page *page) static inline void pgtable_pmd_page_dtor(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - VM_BUG_ON(page->pmd_huge_pte); + VM_BUG_ON_PAGE(page->pmd_huge_pte, page); #endif ptlock_free(page); } @@ -2029,8 +2030,6 @@ extern void shake_page(struct page *p, int access); extern atomic_long_t num_poisoned_pages; extern int soft_offline_page(struct page *page, int flags); -extern void dump_page(struct page *page); - #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) extern void clear_huge_page(struct page *page, unsigned long addr, diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 580bd58..5042c03 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -1,10 +1,19 @@ #ifndef LINUX_MM_DEBUG_H #define LINUX_MM_DEBUG_H 1 +struct page; + +extern void dump_page(struct page *page, char *reason); +extern void dump_page_badflags(struct page *page, char *reason, + unsigned long badflags); + #ifdef CONFIG_DEBUG_VM #define VM_BUG_ON(cond) BUG_ON(cond) +#define VM_BUG_ON_PAGE(cond, page) \ + do { if (unlikely(cond)) { dump_page(page, NULL); BUG(); } } while (0) #else #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond) +#define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond) #endif #ifdef CONFIG_DEBUG_VIRTUAL diff --git a/include/linux/of.h b/include/linux/of.h index 276c546..70c64ba 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -377,8 +377,13 @@ static inline bool of_have_populated_dt(void) return false; } +/* Kill an unused variable warning on a device_node pointer */ +static inline void __of_use_dn(const struct device_node *np) +{ +} + #define for_each_child_of_node(parent, child) \ - while (0) + while (__of_use_dn(parent), __of_use_dn(child), 0) #define for_each_available_child_of_node(parent, child) \ while (0) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 98ada58..e464b4e 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -412,7 +412,7 @@ static inline void ClearPageCompound(struct page *page) */ static inline int PageTransHuge(struct page *page) { - VM_BUG_ON(PageTail(page)); + VM_BUG_ON_PAGE(PageTail(page), page); return PageHead(page); } @@ -460,25 +460,25 @@ static inline int PageTransTail(struct page *page) */ static inline int PageSlabPfmemalloc(struct page *page) { - VM_BUG_ON(!PageSlab(page)); + VM_BUG_ON_PAGE(!PageSlab(page), page); return PageActive(page); } static inline void SetPageSlabPfmemalloc(struct page *page) { - VM_BUG_ON(!PageSlab(page)); + VM_BUG_ON_PAGE(!PageSlab(page), page); SetPageActive(page); } static inline void __ClearPageSlabPfmemalloc(struct page *page) { - VM_BUG_ON(!PageSlab(page)); + VM_BUG_ON_PAGE(!PageSlab(page), page); __ClearPageActive(page); } static inline void ClearPageSlabPfmemalloc(struct page *page) { - VM_BUG_ON(!PageSlab(page)); + VM_BUG_ON_PAGE(!PageSlab(page), page); ClearPageActive(page); } diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e3dea75..1710d1b 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -162,7 +162,7 @@ static inline int page_cache_get_speculative(struct page *page) * disabling preempt, and hence no need for the "speculative get" that * SMP requires. */ - VM_BUG_ON(page_count(page) == 0); + VM_BUG_ON_PAGE(page_count(page) == 0, page); atomic_inc(&page->_count); #else @@ -175,7 +175,7 @@ static inline int page_cache_get_speculative(struct page *page) return 0; } #endif - VM_BUG_ON(PageTail(page)); + VM_BUG_ON_PAGE(PageTail(page), page); return 1; } @@ -191,14 +191,14 @@ static inline int page_cache_add_speculative(struct page *page, int count) # ifdef CONFIG_PREEMPT_COUNT VM_BUG_ON(!in_atomic()); # endif - VM_BUG_ON(page_count(page) == 0); + VM_BUG_ON_PAGE(page_count(page) == 0, page); atomic_add(count, &page->_count); #else if (unlikely(!atomic_add_unless(&page->_count, count, 0))) return 0; #endif - VM_BUG_ON(PageCompound(page) && page != compound_head(page)); + VM_BUG_ON_PAGE(PageCompound(page) && page != compound_head(page), page); return 1; } @@ -210,7 +210,7 @@ static inline int page_freeze_refs(struct page *page, int count) static inline void page_unfreeze_refs(struct page *page, int count) { - VM_BUG_ON(page_count(page) != 0); + VM_BUG_ON_PAGE(page_count(page) != 0, page); VM_BUG_ON(count == 0); atomic_set(&page->_count, count); diff --git a/include/linux/parser.h b/include/linux/parser.h index ea2281e..39d5b79 100644 --- a/include/linux/parser.h +++ b/include/linux/parser.h @@ -29,5 +29,6 @@ int match_token(char *, const match_table_t table, substring_t args[]); int match_int(substring_t *, int *result); int match_octal(substring_t *, int *result); int match_hex(substring_t *, int *result); +bool match_wildcard(const char *pattern, const char *str); size_t match_strlcpy(char *, const substring_t *, size_t); char *match_strdup(const substring_t *); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 9e4761c..e3817d2 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -1,6 +1,7 @@ #ifndef __LINUX_PERCPU_H #define __LINUX_PERCPU_H +#include <linux/mmdebug.h> #include <linux/preempt.h> #include <linux/smp.h> #include <linux/cpumask.h> diff --git a/include/linux/printk.h b/include/linux/printk.h index 6949258..cc6f74d 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -5,6 +5,7 @@ #include <linux/init.h> #include <linux/kern_levels.h> #include <linux/linkage.h> +#include <linux/cache.h> extern const char linux_banner[]; extern const char linux_proc_banner[]; @@ -253,17 +254,17 @@ extern asmlinkage void dump_stack(void) __cold; */ #ifdef CONFIG_PRINTK -#define printk_once(fmt, ...) \ -({ \ - static bool __print_once; \ - \ - if (!__print_once) { \ - __print_once = true; \ - printk(fmt, ##__VA_ARGS__); \ - } \ +#define printk_once(fmt, ...) \ +({ \ + static bool __print_once __read_mostly; \ + \ + if (!__print_once) { \ + __print_once = true; \ + printk(fmt, ##__VA_ARGS__); \ + } \ }) #else -#define printk_once(fmt, ...) \ +#define printk_once(fmt, ...) \ no_printk(fmt, ##__VA_ARGS__) #endif diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h index 753207c..ecc7309 100644 --- a/include/linux/ramfs.h +++ b/include/linux/ramfs.h @@ -14,13 +14,6 @@ ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) } #else extern int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize); -extern unsigned long ramfs_nommu_get_unmapped_area(struct file *file, - unsigned long addr, - unsigned long len, - unsigned long pgoff, - unsigned long flags); - -extern int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma); #endif extern const struct file_operations ramfs_file_operations; diff --git a/include/linux/sched.h b/include/linux/sched.h index 485234d..68a0e84 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -229,7 +229,7 @@ extern char ___assert_task_state[1 - 2*!!( /* get_task_state() */ #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ - __TASK_TRACED) + __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD) #define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) @@ -391,22 +391,33 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} #endif - -extern void set_dumpable(struct mm_struct *mm, int value); -extern int get_dumpable(struct mm_struct *mm); - #define SUID_DUMP_DISABLE 0 /* No setuid dumping */ #define SUID_DUMP_USER 1 /* Dump as user of process */ #define SUID_DUMP_ROOT 2 /* Dump as root */ /* mm flags */ -/* dumpable bits */ -#define MMF_DUMPABLE 0 /* core dump is permitted */ -#define MMF_DUMP_SECURELY 1 /* core file is readable only by root */ +/* for SUID_DUMP_* above */ #define MMF_DUMPABLE_BITS 2 #define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) +extern void set_dumpable(struct mm_struct *mm, int value); +/* + * This returns the actual value of the suid_dumpable flag. For things + * that are using this for checking for privilege transitions, it must + * test against SUID_DUMP_USER rather than treating it as a boolean + * value. + */ +static inline int __get_dumpable(unsigned long mm_flags) +{ + return mm_flags & MMF_DUMPABLE_MASK; +} + +static inline int get_dumpable(struct mm_struct *mm) +{ + return __get_dumpable(mm->flags); +} + /* coredump filter bits */ #define MMF_DUMP_ANON_PRIVATE 2 #define MMF_DUMP_ANON_SHARED 3 @@ -1228,7 +1239,6 @@ struct task_struct { /* Used for emulating ABI behavior of previous Linux versions */ unsigned int personality; - unsigned did_exec:1; unsigned in_execve:1; /* Tell the LSMs that the process is doing an * execve */ unsigned in_iowait:1; @@ -2284,8 +2294,6 @@ extern struct mm_struct *get_task_mm(struct task_struct *task); extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); /* Remove the current tasks stale references to the old mm_struct */ extern void mm_release(struct task_struct *, struct mm_struct *); -/* Allocate a new mm structure and copy contents from tsk->mm */ -extern struct mm_struct *dup_mm(struct task_struct *tsk); extern int copy_thread(unsigned long, unsigned long, unsigned long, struct task_struct *); diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 31e0193..b13cf43 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -99,4 +99,8 @@ extern int sched_rt_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +extern int sysctl_numa_balancing(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); + #endif /* _SCHED_SYSCTL_H */ diff --git a/include/linux/slab.h b/include/linux/slab.h index 1e2f4fe..a060142 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -513,7 +513,9 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) * * Both the root cache and the child caches will have it. For the root cache, * this will hold a dynamically allocated array large enough to hold - * information about the currently limited memcgs in the system. + * information about the currently limited memcgs in the system. To allow the + * array to be accessed without taking any locks, on relocation we free the old + * version only after a grace period. * * Child caches will hold extra metadata needed for its operation. Fields are: * @@ -528,7 +530,10 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) struct memcg_cache_params { bool is_root_cache; union { - struct kmem_cache *memcg_caches[0]; + struct { + struct rcu_head rcu_head; + struct kmem_cache *memcg_caches[0]; + }; struct { struct mem_cgroup *memcg; struct list_head list; diff --git a/include/linux/w1-gpio.h b/include/linux/w1-gpio.h index 065e3ae..d58594a 100644 --- a/include/linux/w1-gpio.h +++ b/include/linux/w1-gpio.h @@ -20,6 +20,7 @@ struct w1_gpio_platform_data { unsigned int is_open_drain:1; void (*enable_external_pullup)(int enable); unsigned int ext_pullup_enable_pin; + unsigned int pullup_duration; }; #endif /* _LINUX_W1_GPIO_H */ diff --git a/include/uapi/asm-generic/types.h b/include/uapi/asm-generic/types.h index bd39806..a387792 100644 --- a/include/uapi/asm-generic/types.h +++ b/include/uapi/asm-generic/types.h @@ -1,8 +1,7 @@ #ifndef _ASM_GENERIC_TYPES_H #define _ASM_GENERIC_TYPES_H /* - * int-ll64 is used practically everywhere now, - * so use it as a reasonable default. + * int-ll64 is used everywhere now. */ #include <asm-generic/int-ll64.h> diff --git a/include/uapi/linux/dn.h b/include/uapi/linux/dn.h index 5fbdd3d..4295c74 100644 --- a/include/uapi/linux/dn.h +++ b/include/uapi/linux/dn.h @@ -1,6 +1,7 @@ #ifndef _LINUX_DN_H #define _LINUX_DN_H +#include <linux/ioctl.h> #include <linux/types.h> #include <linux/if_ether.h> diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index 788128e..35f5f4c 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -150,7 +150,7 @@ #define NFS4_SECINFO_STYLE4_CURRENT_FH 0 #define NFS4_SECINFO_STYLE4_PARENT 1 -#define NFS4_MAX_UINT64 (~(u64)0) +#define NFS4_MAX_UINT64 (~(__u64)0) /* An NFS4 sessions server must support at least NFS4_MAX_OPS operations. * If a compound requires more operations, adjust NFS4_MAX_OPS accordingly. diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index e244ed4..853bc1c 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -788,7 +788,7 @@ union perf_mem_data_src { #define PERF_MEM_TLB_SHIFT 26 #define PERF_MEM_S(a, s) \ - (((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) + (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) /* * single taken branch record layout: diff --git a/include/uapi/linux/ppp-ioctl.h b/include/uapi/linux/ppp-ioctl.h index 2d9a885..63a23a3 100644 --- a/include/uapi/linux/ppp-ioctl.h +++ b/include/uapi/linux/ppp-ioctl.h @@ -12,6 +12,7 @@ #include <linux/types.h> #include <linux/compiler.h> +#include <linux/ppp_defs.h> /* * Bit definitions for flags argument to PPPIOCGFLAGS/PPPIOCSFLAGS. diff --git a/init/initramfs.c b/init/initramfs.c index a67ef9d..93b6139 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -583,7 +583,7 @@ static int __init populate_rootfs(void) { char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size); if (err) - panic(err); /* Failed to decompress INTERNAL initramfs */ + panic("%s", err); /* Failed to decompress INTERNAL initramfs */ if (initrd_start) { #ifdef CONFIG_BLK_DEV_RAM int fd; diff --git a/init/main.c b/init/main.c index f865261..f333385 100644 --- a/init/main.c +++ b/init/main.c @@ -99,10 +99,6 @@ extern void radix_tree_init(void); static inline void mark_rodata_ro(void) { } #endif -#ifdef CONFIG_TC -extern void tc_init(void); -#endif - /* * Debug helper: via this flag we know that we are in 'early bootup code' * where only the boot processor is running with IRQ disabled. This means @@ -282,7 +278,7 @@ static int __init unknown_bootoption(char *param, char *val, const char *unused) unsigned int i; for (i = 0; envp_init[i]; i++) { if (i == MAX_INIT_ENVS) { - panic_later = "Too many boot env vars at `%s'"; + panic_later = "env"; panic_param = param; } if (!strncmp(param, envp_init[i], val - param)) @@ -294,7 +290,7 @@ static int __init unknown_bootoption(char *param, char *val, const char *unused) unsigned int i; for (i = 0; argv_init[i]; i++) { if (i == MAX_INIT_ARGS) { - panic_later = "Too many boot init vars at `%s'"; + panic_later = "init"; panic_param = param; } } @@ -586,7 +582,8 @@ asmlinkage void __init start_kernel(void) */ console_init(); if (panic_later) - panic(panic_later, panic_param); + panic("Too many boot %s vars at `%s'", panic_later, + panic_param); lockdep_info(); diff --git a/kernel/fork.c b/kernel/fork.c index 2f11bbe..a17621c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -800,14 +800,11 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) * Allocate a new mm structure and copy contents from the * mm structure of the passed in task structure. */ -struct mm_struct *dup_mm(struct task_struct *tsk) +static struct mm_struct *dup_mm(struct task_struct *tsk) { struct mm_struct *mm, *oldmm = current->mm; int err; - if (!oldmm) - return NULL; - mm = allocate_mm(); if (!mm) goto fail_nomem; @@ -1229,7 +1226,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; - p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); INIT_LIST_HEAD(&p->children); @@ -1654,7 +1650,7 @@ SYSCALL_DEFINE0(fork) return do_fork(SIGCHLD, 0, 0, NULL, NULL); #else /* can not support in nommu mode */ - return(-EINVAL); + return -EINVAL; #endif } #endif @@ -1662,7 +1658,7 @@ SYSCALL_DEFINE0(fork) #ifdef __ARCH_WANT_SYS_VFORK SYSCALL_DEFINE0(vfork) { - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, 0, NULL, NULL); } #endif diff --git a/kernel/kexec.c b/kernel/kexec.c index 9c97016..ac73878 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -932,6 +932,7 @@ static int kimage_load_segment(struct kimage *image, */ struct kimage *kexec_image; struct kimage *kexec_crash_image; +int kexec_load_disabled; static DEFINE_MUTEX(kexec_mutex); @@ -942,7 +943,7 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, int result; /* We only trust the superuser with rebooting the system. */ - if (!capable(CAP_SYS_BOOT)) + if (!capable(CAP_SYS_BOOT) || kexec_load_disabled) return -EPERM; /* diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 9659d38..d945a94 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -126,7 +126,7 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj, { return sprintf(buf, "%lx %x\n", paddr_vmcoreinfo_note(), - (unsigned int)vmcoreinfo_max_size); + (unsigned int)sizeof(vmcoreinfo_note)); } KERNEL_ATTR_RO(vmcoreinfo); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index f8b41bd..b1d255f 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1595,10 +1595,13 @@ asmlinkage int vprintk_emit(int facility, int level, * either merge it with the current buffer and flush, or if * there was a race with interrupts (prefix == true) then just * flush it out and store this line separately. + * If the preceding printk was from a different task and missed + * a newline, flush and append the newline. */ - if (cont.len && cont.owner == current) { - if (!(lflags & LOG_PREFIX)) - stored = cont_add(facility, level, text, text_len); + if (cont.len) { + if (cont.owner == current && !(lflags & LOG_PREFIX)) + stored = cont_add(facility, level, text, + text_len); cont_flush(LOG_NEWLINE); } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4d6964e..7fea865 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1770,7 +1770,29 @@ void set_numabalancing_state(bool enabled) numabalancing_enabled = enabled; } #endif /* CONFIG_SCHED_DEBUG */ -#endif /* CONFIG_NUMA_BALANCING */ + +#ifdef CONFIG_PROC_SYSCTL +int sysctl_numa_balancing(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table t; + int err; + int state = numabalancing_enabled; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + t = *table; + t.data = &state; + err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); + if (err < 0) + return err; + if (write) + set_numabalancing_state(state); + return err; +} +#endif +#endif /* * fork()/clone()-time setup: diff --git a/kernel/signal.c b/kernel/signal.c index 940b30e..52f881d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2047,8 +2047,8 @@ static bool do_signal_stop(int signr) if (task_set_jobctl_pending(current, signr | gstop)) sig->group_stop_count++; - for (t = next_thread(current); t != current; - t = next_thread(t)) { + t = current; + while_each_thread(current, t) { /* * Setting state to TASK_STOPPED for a group * stop is always done with the siglock held, @@ -3125,8 +3125,7 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) rm_from_queue_full(&mask, &t->signal->shared_pending); do { rm_from_queue_full(&mask, &t->pending); - t = next_thread(t); - } while (t != current); + } while_each_thread(current, t); } } diff --git a/kernel/sys.c b/kernel/sys.c index c723113..c0a58be 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -895,8 +895,7 @@ SYSCALL_DEFINE1(times, struct tms __user *, tbuf) * only important on a multi-user system anyway, to make sure one user * can't send a signal to a process owned by another. -TYT, 12/12/91 * - * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. - * LBT 04.03.94 + * !PF_FORKNOEXEC check to conform completely to POSIX. */ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) { @@ -932,7 +931,7 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) if (task_session(p) != task_session(group_leader)) goto out; err = -EACCES; - if (p->did_exec) + if (!(p->flags & PF_FORKNOEXEC)) goto out; } else { err = -ESRCH; @@ -1572,8 +1571,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) t = p; do { accumulate_thread_rusage(t, r); - t = next_thread(t); - } while (t != p); + } while_each_thread(p, t); break; default: diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 332cefc..096db74 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -62,6 +62,7 @@ #include <linux/capability.h> #include <linux/binfmts.h> #include <linux/sched/sysctl.h> +#include <linux/kexec.h> #include <asm/uaccess.h> #include <asm/processor.h> @@ -389,6 +390,15 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "numa_balancing", + .data = NULL, /* filled in by handler */ + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sysctl_numa_balancing, + .extra1 = &zero, + .extra2 = &one, + }, #endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_SCHED_DEBUG */ { @@ -605,6 +615,18 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif +#ifdef CONFIG_KEXEC + { + .procname = "kexec_load_disabled", + .data = &kexec_load_disabled, + .maxlen = sizeof(int), + .mode = 0644, + /* only handle a transition from default "0" to "1" */ + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &one, + }, +#endif #ifdef CONFIG_MODULES { .procname = "modprobe", diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 900b63c..e0e2eeb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1595,6 +1595,33 @@ config DMA_API_DEBUG If unsure, say N. +config TEST_MODULE + tristate "Test module loading with 'hello world' module" + default n + depends on m + help + This builds the "test_module" module that emits "Hello, world" + on printk when loaded. It is designed to be used for basic + evaluation of the module loading subsystem (for example when + validating module verification). It lacks any extra dependencies, + and will not normally be loaded by the system unless explicitly + requested by name. + + If unsure, say N. + +config TEST_USER_COPY + tristate "Test user/kernel boundary protections" + default n + depends on m + help + This builds the "test_user_copy" module that runs sanity checks + on the copy_to/from_user infrastructure, making sure basic + user/kernel boundary testing is working. If it fails to load, + a regression has been detected in the user/kernel memory boundary + protections. + + If unsure, say N. + source "samples/Kconfig" source "lib/Kconfig.kgdb" diff --git a/lib/Makefile b/lib/Makefile index a459c31..98ec3b8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -31,6 +31,8 @@ obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o +obj-$(CONFIG_TEST_MODULE) += test_module.o +obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG diff --git a/lib/cmdline.c b/lib/cmdline.c index eb67911..d4932f7 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -49,13 +49,13 @@ static int get_range(char **str, int *pint) * 3 - hyphen found to denote a range */ -int get_option (char **str, int *pint) +int get_option(char **str, int *pint) { char *cur = *str; if (!cur || !(*cur)) return 0; - *pint = simple_strtol (cur, str, 0); + *pint = simple_strtol(cur, str, 0); if (cur == *str) return 0; if (**str == ',') { @@ -67,6 +67,7 @@ int get_option (char **str, int *pint) return 1; } +EXPORT_SYMBOL(get_option); /** * get_options - Parse a string into a list of integers @@ -84,13 +85,13 @@ int get_option (char **str, int *pint) * the parse to end (typically a null terminator, if @str is * completely parseable). */ - + char *get_options(const char *str, int nints, int *ints) { int res, i = 1; while (i < nints) { - res = get_option ((char **)&str, ints + i); + res = get_option((char **)&str, ints + i); if (res == 0) break; if (res == 3) { @@ -112,6 +113,7 @@ char *get_options(const char *str, int nints, int *ints) ints[0] = i - 1; return (char *)str; } +EXPORT_SYMBOL(get_options); /** * memparse - parse a string with mem suffixes into a number @@ -152,8 +154,4 @@ unsigned long long memparse(const char *ptr, char **retptr) return ret; } - - EXPORT_SYMBOL(memparse); -EXPORT_SYMBOL(get_option); -EXPORT_SYMBOL(get_options); diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c index 3e67cfad1..7d1e83c 100644 --- a/lib/decompress_unlz4.c +++ b/lib/decompress_unlz4.c @@ -141,6 +141,7 @@ STATIC inline int INIT unlz4(u8 *input, int in_len, goto exit_2; } + ret = -1; if (flush && flush(outp, dest_len) != dest_len) goto exit_2; if (output) diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index c37aeac..600ac57 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -8,6 +8,7 @@ * By Greg Banks <gnb@melbourne.sgi.com> * Copyright (c) 2008 Silicon Graphics Inc. All Rights Reserved. * Copyright (C) 2011 Bart Van Assche. All Rights Reserved. + * Copyright (C) 2013 Du, Changbin <changbin.du@gmail.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ @@ -24,6 +25,7 @@ #include <linux/sysctl.h> #include <linux/ctype.h> #include <linux/string.h> +#include <linux/parser.h> #include <linux/string_helpers.h> #include <linux/uaccess.h> #include <linux/dynamic_debug.h> @@ -147,7 +149,8 @@ static int ddebug_change(const struct ddebug_query *query, list_for_each_entry(dt, &ddebug_tables, link) { /* match against the module name */ - if (query->module && strcmp(query->module, dt->mod_name)) + if (query->module && + !match_wildcard(query->module, dt->mod_name)) continue; for (i = 0; i < dt->num_ddebugs; i++) { @@ -155,14 +158,16 @@ static int ddebug_change(const struct ddebug_query *query, /* match against the source filename */ if (query->filename && - strcmp(query->filename, dp->filename) && - strcmp(query->filename, kbasename(dp->filename)) && - strcmp(query->filename, trim_prefix(dp->filename))) + !match_wildcard(query->filename, dp->filename) && + !match_wildcard(query->filename, + kbasename(dp->filename)) && + !match_wildcard(query->filename, + trim_prefix(dp->filename))) continue; /* match against the function */ if (query->function && - strcmp(query->function, dp->function)) + !match_wildcard(query->function, dp->function)) continue; /* match against the format */ diff --git a/lib/kstrtox.c b/lib/kstrtox.c index f78ae0c..ec8da78 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -92,7 +92,6 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res) rv = _parse_integer(s, base, &_res); if (rv & KSTRTOX_OVERFLOW) return -ERANGE; - rv &= ~KSTRTOX_OVERFLOW; if (rv == 0) return -EINVAL; s += rv; diff --git a/lib/parser.c b/lib/parser.c index 807b2aa..b6d1163 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -113,6 +113,7 @@ int match_token(char *s, const match_table_t table, substring_t args[]) return p->token; } +EXPORT_SYMBOL(match_token); /** * match_number: scan a number in the given base from a substring_t @@ -163,6 +164,7 @@ int match_int(substring_t *s, int *result) { return match_number(s, result, 0); } +EXPORT_SYMBOL(match_int); /** * match_octal: - scan an octal representation of an integer from a substring_t @@ -177,6 +179,7 @@ int match_octal(substring_t *s, int *result) { return match_number(s, result, 8); } +EXPORT_SYMBOL(match_octal); /** * match_hex: - scan a hex representation of an integer from a substring_t @@ -191,6 +194,58 @@ int match_hex(substring_t *s, int *result) { return match_number(s, result, 16); } +EXPORT_SYMBOL(match_hex); + +/** + * match_wildcard: - parse if a string matches given wildcard pattern + * @pattern: wildcard pattern + * @str: the string to be parsed + * + * Description: Parse the string @str to check if matches wildcard + * pattern @pattern. The pattern may contain two type wildcardes: + * '*' - matches zero or more characters + * '?' - matches one character + * If it's matched, return true, else return false. + */ +bool match_wildcard(const char *pattern, const char *str) +{ + const char *s = str; + const char *p = pattern; + bool star = false; + + while (*s) { + switch (*p) { + case '?': + s++; + p++; + break; + case '*': + star = true; + str = s; + if (!*++p) + return true; + pattern = p; + break; + default: + if (*s == *p) { + s++; + p++; + } else { + if (!star) + return false; + str++; + s = str; + p = pattern; + } + break; + } + } + + if (*p == '*') + ++p; + return !*p; +} +EXPORT_SYMBOL(match_wildcard); /** * match_strlcpy: - Copy the characters from a substring_t to a sized buffer @@ -213,6 +268,7 @@ size_t match_strlcpy(char *dest, const substring_t *src, size_t size) } return ret; } +EXPORT_SYMBOL(match_strlcpy); /** * match_strdup: - allocate a new string with the contents of a substring_t @@ -230,10 +286,4 @@ char *match_strdup(const substring_t *s) match_strlcpy(p, s, sz); return p; } - -EXPORT_SYMBOL(match_token); -EXPORT_SYMBOL(match_int); -EXPORT_SYMBOL(match_octal); -EXPORT_SYMBOL(match_hex); -EXPORT_SYMBOL(match_strlcpy); EXPORT_SYMBOL(match_strdup); diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 31dd4cc..8b3c9dc 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -8,8 +8,8 @@ #define CHECK_LOOPS 100 struct test_node { - struct rb_node rb; u32 key; + struct rb_node rb; /* following fields used for testing augmented rbtree functionality */ u32 val; @@ -114,6 +114,16 @@ static int black_path_count(struct rb_node *rb) return count; } +static void check_postorder_foreach(int nr_nodes) +{ + struct test_node *cur, *n; + int count = 0; + rbtree_postorder_for_each_entry_safe(cur, n, &root, rb) + count++; + + WARN_ON_ONCE(count != nr_nodes); +} + static void check_postorder(int nr_nodes) { struct rb_node *rb; @@ -148,6 +158,7 @@ static void check(int nr_nodes) WARN_ON_ONCE(count < (1 << black_path_count(rb_last(&root))) - 1); check_postorder(nr_nodes); + check_postorder_foreach(nr_nodes); } static void check_augmented(int nr_nodes) diff --git a/lib/test_module.c b/lib/test_module.c new file mode 100644 index 0000000..319b66f --- /dev/null +++ b/lib/test_module.c @@ -0,0 +1,33 @@ +/* + * This module emits "Hello, world" on printk when loaded. + * + * It is designed to be used for basic evaluation of the module loading + * subsystem (for example when validating module signing/verification). It + * lacks any extra dependencies, and will not normally be loaded by the + * system unless explicitly requested by name. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/printk.h> + +static int __init test_module_init(void) +{ + pr_warn("Hello, world\n"); + + return 0; +} + +module_init(test_module_init); + +static void __exit test_module_exit(void) +{ + pr_warn("Goodbye\n"); +} + +module_exit(test_module_exit); + +MODULE_AUTHOR("Kees Cook <keescook@chromium.org>"); +MODULE_LICENSE("GPL"); diff --git a/lib/test_user_copy.c b/lib/test_user_copy.c new file mode 100644 index 0000000..0ecef3e --- /dev/null +++ b/lib/test_user_copy.c @@ -0,0 +1,110 @@ +/* + * Kernel module for testing copy_to/from_user infrastructure. + * + * Copyright 2013 Google Inc. All Rights Reserved + * + * Authors: + * Kees Cook <keescook@chromium.org> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/mman.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> + +#define test(condition, msg) \ +({ \ + int cond = (condition); \ + if (cond) \ + pr_warn("%s\n", msg); \ + cond; \ +}) + +static int __init test_user_copy_init(void) +{ + int ret = 0; + char *kmem; + char __user *usermem; + char *bad_usermem; + unsigned long user_addr; + unsigned long value = 0x5A; + + kmem = kmalloc(PAGE_SIZE * 2, GFP_KERNEL); + if (!kmem) + return -ENOMEM; + + user_addr = vm_mmap(NULL, 0, PAGE_SIZE * 2, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANONYMOUS | MAP_PRIVATE, 0); + if (user_addr >= (unsigned long)(TASK_SIZE)) { + pr_warn("Failed to allocate user memory\n"); + kfree(kmem); + return -ENOMEM; + } + + usermem = (char __user *)user_addr; + bad_usermem = (char *)user_addr; + + /* Legitimate usage: none of these should fail. */ + ret |= test(copy_from_user(kmem, usermem, PAGE_SIZE), + "legitimate copy_from_user failed"); + ret |= test(copy_to_user(usermem, kmem, PAGE_SIZE), + "legitimate copy_to_user failed"); + ret |= test(get_user(value, (unsigned long __user *)usermem), + "legitimate get_user failed"); + ret |= test(put_user(value, (unsigned long __user *)usermem), + "legitimate put_user failed"); + + /* Invalid usage: none of these should succeed. */ + ret |= test(!copy_from_user(kmem, (char __user *)(kmem + PAGE_SIZE), + PAGE_SIZE), + "illegal all-kernel copy_from_user passed"); + ret |= test(!copy_from_user(bad_usermem, (char __user *)kmem, + PAGE_SIZE), + "illegal reversed copy_from_user passed"); + ret |= test(!copy_to_user((char __user *)kmem, kmem + PAGE_SIZE, + PAGE_SIZE), + "illegal all-kernel copy_to_user passed"); + ret |= test(!copy_to_user((char __user *)kmem, bad_usermem, + PAGE_SIZE), + "illegal reversed copy_to_user passed"); + ret |= test(!get_user(value, (unsigned long __user *)kmem), + "illegal get_user passed"); + ret |= test(!put_user(value, (unsigned long __user *)kmem), + "illegal put_user passed"); + + vm_munmap(user_addr, PAGE_SIZE * 2); + kfree(kmem); + + if (ret == 0) { + pr_info("tests passed.\n"); + return 0; + } + + return -EINVAL; +} + +module_init(test_user_copy_init); + +static void __exit test_user_copy_exit(void) +{ + pr_info("unloaded.\n"); +} + +module_exit(test_user_copy_exit); + +MODULE_AUTHOR("Kees Cook <keescook@chromium.org>"); +MODULE_LICENSE("GPL"); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 10909c5..185b6d3 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1155,6 +1155,30 @@ char *netdev_feature_string(char *buf, char *end, const u8 *addr, return number(buf, end, *(const netdev_features_t *)addr, spec); } +static noinline_for_stack +char *address_val(char *buf, char *end, const void *addr, + struct printf_spec spec, const char *fmt) +{ + unsigned long long num; + + spec.flags |= SPECIAL | SMALL | ZEROPAD; + spec.base = 16; + + switch (fmt[1]) { + case 'd': + num = *(const dma_addr_t *)addr; + spec.field_width = sizeof(dma_addr_t) * 2 + 2; + break; + case 'p': + default: + num = *(const phys_addr_t *)addr; + spec.field_width = sizeof(phys_addr_t) * 2 + 2; + break; + } + + return number(buf, end, num, spec); +} + int kptr_restrict __read_mostly; /* @@ -1218,7 +1242,8 @@ int kptr_restrict __read_mostly; * N no separator * The maximum supported length is 64 bytes of the input. Consider * to use print_hex_dump() for the larger input. - * - 'a' For a phys_addr_t type and its derivative types (passed by reference) + * - 'a[pd]' For address types [p] phys_addr_t, [d] dma_addr_t and derivatives + * (default assumed to be phys_addr_t, passed by reference) * - 'd[234]' For a dentry name (optionally 2-4 last components) * - 'D[234]' Same as 'd' but for a struct file * @@ -1353,11 +1378,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, } break; case 'a': - spec.flags |= SPECIAL | SMALL | ZEROPAD; - spec.field_width = sizeof(phys_addr_t) * 2 + 2; - spec.base = 16; - return number(buf, end, - (unsigned long long) *((phys_addr_t *)ptr), spec); + return address_val(buf, end, ptr, spec, fmt); case 'd': return dentry_name(buf, end, ptr, spec, fmt); case 'D': diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 07dbc8e..6e45a50 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -267,7 +267,7 @@ void balloon_page_putback(struct page *page) put_page(page); } else { WARN_ON(1); - dump_page(page); + dump_page(page, "not movable balloon page"); } unlock_page(page); } @@ -287,7 +287,7 @@ int balloon_page_migrate(struct page *newpage, BUG_ON(!trylock_page(newpage)); if (WARN_ON(!__is_movable_balloon_page(page))) { - dump_page(page); + dump_page(page, "not movable balloon page"); unlock_page(newpage); return rc; } diff --git a/mm/cleancache.c b/mm/cleancache.c index 5875f48..d0eac43 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c @@ -237,7 +237,7 @@ int __cleancache_get_page(struct page *page) goto out; } - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); fake_pool_id = page->mapping->host->i_sb->cleancache_poolid; if (fake_pool_id < 0) goto out; @@ -279,7 +279,7 @@ void __cleancache_put_page(struct page *page) return; } - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); fake_pool_id = page->mapping->host->i_sb->cleancache_poolid; if (fake_pool_id < 0) return; @@ -318,7 +318,7 @@ void __cleancache_invalidate_page(struct address_space *mapping, if (pool_id < 0) return; - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); if (cleancache_get_key(mapping->host, &key) >= 0) { cleancache_ops->invalidate_page(pool_id, key, page->index); diff --git a/mm/compaction.c b/mm/compaction.c index 3a91a2e..b48c525 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -523,7 +523,10 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, if (!isolation_suitable(cc, page)) goto next_pageblock; - /* Skip if free */ + /* + * Skip if free. page_order cannot be used without zone->lock + * as nothing prevents parallel allocations or buddy merging. + */ if (PageBuddy(page)) continue; @@ -601,7 +604,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, if (__isolate_lru_page(page, mode) != 0) continue; - VM_BUG_ON(PageTransCompound(page)); + VM_BUG_ON_PAGE(PageTransCompound(page), page); /* Successfully isolated */ cc->finished_update_migrate = true; diff --git a/mm/filemap.c b/mm/filemap.c index b7749a9..7a7f3e0 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -409,9 +409,9 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) { int error; - VM_BUG_ON(!PageLocked(old)); - VM_BUG_ON(!PageLocked(new)); - VM_BUG_ON(new->mapping); + VM_BUG_ON_PAGE(!PageLocked(old), old); + VM_BUG_ON_PAGE(!PageLocked(new), new); + VM_BUG_ON_PAGE(new->mapping, new); error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); if (!error) { @@ -461,8 +461,8 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, { int error; - VM_BUG_ON(!PageLocked(page)); - VM_BUG_ON(PageSwapBacked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(PageSwapBacked(page), page); error = mem_cgroup_cache_charge(page, current->mm, gfp_mask & GFP_RECLAIM_MASK); @@ -607,7 +607,7 @@ EXPORT_SYMBOL_GPL(add_page_wait_queue); */ void unlock_page(struct page *page) { - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); clear_bit_unlock(PG_locked, &page->flags); smp_mb__after_clear_bit(); wake_up_page(page, PG_locked); @@ -760,7 +760,7 @@ repeat: page_cache_release(page); goto repeat; } - VM_BUG_ON(page->index != offset); + VM_BUG_ON_PAGE(page->index != offset, page); } return page; } @@ -1656,7 +1656,7 @@ retry_find: put_page(page); goto retry_find; } - VM_BUG_ON(page->index != offset); + VM_BUG_ON_PAGE(page->index != offset, page); /* * We have a locked page in the page cache, now we need to check diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 95d1acb..65c98eb 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -130,8 +130,14 @@ static int set_recommended_min_free_kbytes(void) (unsigned long) nr_free_buffer_pages() / 20); recommended_min <<= (PAGE_SHIFT-10); - if (recommended_min > min_free_kbytes) + if (recommended_min > min_free_kbytes) { + if (user_min_free_kbytes >= 0) + pr_info("raising min_free_kbytes from %d to %lu " + "to help transparent hugepage allocations\n", + min_free_kbytes, recommended_min); + min_free_kbytes = recommended_min; + } setup_per_zone_wmarks(); return 0; } @@ -655,7 +661,7 @@ out: hugepage_exit_sysfs(hugepage_kobj); return err; } -module_init(hugepage_init) +subsys_initcall(hugepage_init); static int __init setup_transparent_hugepage(char *str) { @@ -712,7 +718,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, pgtable_t pgtable; spinlock_t *ptl; - VM_BUG_ON(!PageCompound(page)); + VM_BUG_ON_PAGE(!PageCompound(page), page); pgtable = pte_alloc_one(mm, haddr); if (unlikely(!pgtable)) return VM_FAULT_OOM; @@ -893,7 +899,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, goto out; } src_page = pmd_page(pmd); - VM_BUG_ON(!PageHead(src_page)); + VM_BUG_ON_PAGE(!PageHead(src_page), src_page); get_page(src_page); page_dup_rmap(src_page); add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); @@ -1067,7 +1073,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_same(*pmd, orig_pmd))) goto out_free_pages; - VM_BUG_ON(!PageHead(page)); + VM_BUG_ON_PAGE(!PageHead(page), page); pmdp_clear_flush(vma, haddr, pmd); /* leave pmd empty until pte is filled */ @@ -1133,7 +1139,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, goto out_unlock; page = pmd_page(orig_pmd); - VM_BUG_ON(!PageCompound(page) || !PageHead(page)); + VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page); if (page_mapcount(page) == 1) { pmd_t entry; entry = pmd_mkyoung(orig_pmd); @@ -1211,7 +1217,7 @@ alloc: add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); put_huge_zero_page(); } else { - VM_BUG_ON(!PageHead(page)); + VM_BUG_ON_PAGE(!PageHead(page), page); page_remove_rmap(page); put_page(page); } @@ -1249,7 +1255,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, goto out; page = pmd_page(*pmd); - VM_BUG_ON(!PageHead(page)); + VM_BUG_ON_PAGE(!PageHead(page), page); if (flags & FOLL_TOUCH) { pmd_t _pmd; /* @@ -1274,7 +1280,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, } } page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT; - VM_BUG_ON(!PageCompound(page)); + VM_BUG_ON_PAGE(!PageCompound(page), page); if (flags & FOLL_GET) get_page_foll(page); @@ -1432,9 +1438,9 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, } else { page = pmd_page(orig_pmd); page_remove_rmap(page); - VM_BUG_ON(page_mapcount(page) < 0); + VM_BUG_ON_PAGE(page_mapcount(page) < 0, page); add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); - VM_BUG_ON(!PageHead(page)); + VM_BUG_ON_PAGE(!PageHead(page), page); atomic_long_dec(&tlb->mm->nr_ptes); spin_unlock(ptl); tlb_remove_page(tlb, page); @@ -2176,9 +2182,9 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, if (unlikely(!page)) goto out; - VM_BUG_ON(PageCompound(page)); - BUG_ON(!PageAnon(page)); - VM_BUG_ON(!PageSwapBacked(page)); + VM_BUG_ON_PAGE(PageCompound(page), page); + VM_BUG_ON_PAGE(!PageAnon(page), page); + VM_BUG_ON_PAGE(!PageSwapBacked(page), page); /* cannot use mapcount: can't collapse if there's a gup pin */ if (page_count(page) != 1) @@ -2201,8 +2207,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, } /* 0 stands for page_is_file_cache(page) == false */ inc_zone_page_state(page, NR_ISOLATED_ANON + 0); - VM_BUG_ON(!PageLocked(page)); - VM_BUG_ON(PageLRU(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(PageLRU(page), page); /* If there is no mapped pte young don't collapse the page */ if (pte_young(pteval) || PageReferenced(page) || @@ -2232,7 +2238,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, } else { src_page = pte_page(pteval); copy_user_highpage(page, src_page, address, vma); - VM_BUG_ON(page_mapcount(src_page) != 1); + VM_BUG_ON_PAGE(page_mapcount(src_page) != 1, src_page); release_pte_page(src_page); /* * ptl mostly unnecessary, but preempt has to @@ -2311,7 +2317,7 @@ static struct page struct vm_area_struct *vma, unsigned long address, int node) { - VM_BUG_ON(*hpage); + VM_BUG_ON_PAGE(*hpage, *hpage); /* * Allocate the page while the vma is still valid and under * the mmap_sem read mode so there is no memory allocation @@ -2580,7 +2586,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, */ node = page_to_nid(page); khugepaged_node_load[node]++; - VM_BUG_ON(PageCompound(page)); + VM_BUG_ON_PAGE(PageCompound(page), page); if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) goto out_unmap; /* cannot use mapcount: can't collapse if there's a gup pin */ @@ -2876,7 +2882,7 @@ again: return; } page = pmd_page(*pmd); - VM_BUG_ON(!page_count(page)); + VM_BUG_ON_PAGE(!page_count(page), page); get_page(page); spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 04306b9..c01cb9f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -584,7 +584,7 @@ static void update_and_free_page(struct hstate *h, struct page *page) 1 << PG_active | 1 << PG_reserved | 1 << PG_private | 1 << PG_writeback); } - VM_BUG_ON(hugetlb_cgroup_from_page(page)); + VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page); set_compound_page_dtor(page, NULL); set_page_refcounted(page); arch_release_hugepage(page); @@ -1089,7 +1089,7 @@ retry: * no users -- drop the buddy allocator's reference. */ put_page_testzero(page); - VM_BUG_ON(page_count(page)); + VM_BUG_ON_PAGE(page_count(page), page); enqueue_huge_page(h, page); } free: @@ -3503,7 +3503,7 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage) bool isolate_huge_page(struct page *page, struct list_head *list) { - VM_BUG_ON(!PageHead(page)); + VM_BUG_ON_PAGE(!PageHead(page), page); if (!get_page_unless_zero(page)) return false; spin_lock(&hugetlb_lock); @@ -3514,7 +3514,7 @@ bool isolate_huge_page(struct page *page, struct list_head *list) void putback_active_hugepage(struct page *page) { - VM_BUG_ON(!PageHead(page)); + VM_BUG_ON_PAGE(!PageHead(page), page); spin_lock(&hugetlb_lock); list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist); spin_unlock(&hugetlb_lock); @@ -3523,7 +3523,7 @@ void putback_active_hugepage(struct page *page) bool is_hugepage_active(struct page *page) { - VM_BUG_ON(!PageHuge(page)); + VM_BUG_ON_PAGE(!PageHuge(page), page); /* * This function can be called for a tail page because the caller, * scan_movable_pages, scans through a given pfn-range which typically diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index d747a84..cb00829 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -390,7 +390,7 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage) if (hugetlb_cgroup_disabled()) return; - VM_BUG_ON(!PageHuge(oldhpage)); + VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage); spin_lock(&hugetlb_lock); h_cg = hugetlb_cgroup_from_page(oldhpage); set_hugetlb_cgroup(oldhpage, NULL); diff --git a/mm/internal.h b/mm/internal.h index a346ba1..612c14f 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -27,8 +27,8 @@ static inline void set_page_count(struct page *page, int v) */ static inline void set_page_refcounted(struct page *page) { - VM_BUG_ON(PageTail(page)); - VM_BUG_ON(atomic_read(&page->_count)); + VM_BUG_ON_PAGE(PageTail(page), page); + VM_BUG_ON_PAGE(atomic_read(&page->_count), page); set_page_count(page, 1); } @@ -46,7 +46,7 @@ static inline void __get_page_tail_foll(struct page *page, * speculative page access (like in * page_cache_get_speculative()) on tail pages. */ - VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0); + VM_BUG_ON_PAGE(atomic_read(&page->first_page->_count) <= 0, page); if (get_page_head) atomic_inc(&page->first_page->_count); get_huge_page_tail(page); @@ -71,7 +71,7 @@ static inline void get_page_foll(struct page *page) * Getting a normal page or the head of a compound page * requires to already have an elevated page->_count. */ - VM_BUG_ON(atomic_read(&page->_count) <= 0); + VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page); atomic_inc(&page->_count); } } @@ -99,6 +99,7 @@ extern void prep_compound_page(struct page *page, unsigned long order); #ifdef CONFIG_MEMORY_FAILURE extern bool is_free_buddy_page(struct page *page); #endif +extern int user_min_free_kbytes; #if defined CONFIG_COMPACTION || defined CONFIG_CMA @@ -142,9 +143,11 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, #endif /* - * function for dealing with page's order in buddy system. - * zone->lock is already acquired when we use these. - * So, we don't need atomic page->flags operations here. + * This function returns the order of a free page in the buddy system. In + * general, page_zone(page)->lock must be held by the caller to prevent the + * page from being allocated in parallel and returning garbage as the order. + * If a caller does not hold page_zone(page)->lock, it must guarantee that the + * page cannot be allocated or merged in parallel. */ static inline unsigned long page_order(struct page *page) { @@ -173,7 +176,7 @@ static inline void munlock_vma_pages_all(struct vm_area_struct *vma) static inline int mlocked_vma_newpage(struct vm_area_struct *vma, struct page *page) { - VM_BUG_ON(PageLRU(page)); + VM_BUG_ON_PAGE(PageLRU(page), page); if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) return 0; @@ -1898,13 +1898,13 @@ int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc) int ret = SWAP_AGAIN; int search_new_forks = 0; - VM_BUG_ON(!PageKsm(page)); + VM_BUG_ON_PAGE(!PageKsm(page), page); /* * Rely on the page lock to protect against concurrent modifications * to that page's node of the stable tree. */ - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); stable_node = page_stable_node(page); if (!stable_node) @@ -1958,13 +1958,13 @@ void ksm_migrate_page(struct page *newpage, struct page *oldpage) { struct stable_node *stable_node; - VM_BUG_ON(!PageLocked(oldpage)); - VM_BUG_ON(!PageLocked(newpage)); - VM_BUG_ON(newpage->mapping != oldpage->mapping); + VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage); + VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); + VM_BUG_ON_PAGE(newpage->mapping != oldpage->mapping, newpage); stable_node = page_stable_node(newpage); if (stable_node) { - VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage)); + VM_BUG_ON_PAGE(stable_node->kpfn != page_to_pfn(oldpage), oldpage); stable_node->kpfn = page_to_pfn(newpage); /* * newpage->mapping was set in advance; now we need smp_wmb() @@ -2345,4 +2345,4 @@ out_free: out: return err; } -module_init(ksm_init) +subsys_initcall(ksm_init); diff --git a/mm/memblock.c b/mm/memblock.c index 1c2ef2c..9c0aeef 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -266,31 +266,34 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u } } +#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK + phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info( phys_addr_t *addr) { if (memblock.reserved.regions == memblock_reserved_init_regions) return 0; - /* - * Don't allow nobootmem allocator to free reserved memory regions - * array if - * - CONFIG_DEBUG_FS is enabled; - * - CONFIG_ARCH_DISCARD_MEMBLOCK is not enabled; - * - reserved memory regions array have been resized during boot. - * Otherwise debug_fs entry "sys/kernel/debug/memblock/reserved" - * will show garbage instead of state of memory reservations. - */ - if (IS_ENABLED(CONFIG_DEBUG_FS) && - !IS_ENABLED(CONFIG_ARCH_DISCARD_MEMBLOCK)) - return 0; - *addr = __pa(memblock.reserved.regions); return PAGE_ALIGN(sizeof(struct memblock_region) * memblock.reserved.max); } +phys_addr_t __init_memblock get_allocated_memblock_memory_regions_info( + phys_addr_t *addr) +{ + if (memblock.memory.regions == memblock_memory_init_regions) + return 0; + + *addr = __pa(memblock.memory.regions); + + return PAGE_ALIGN(sizeof(struct memblock_region) * + memblock.memory.max); +} + +#endif + /** * memblock_double_array - double the size of the memblock regions array * @type: memblock type of the regions array being doubled diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 67dd2a8..19d5d42 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -49,7 +49,6 @@ #include <linux/sort.h> #include <linux/fs.h> #include <linux/seq_file.h> -#include <linux/vmalloc.h> #include <linux/vmpressure.h> #include <linux/mm_inline.h> #include <linux/page_cgroup.h> @@ -150,7 +149,7 @@ struct mem_cgroup_reclaim_iter { * matches memcg->dead_count of the hierarchy root group. */ struct mem_cgroup *last_visited; - unsigned long last_dead_count; + int last_dead_count; /* scan generation, increased every round-trip */ unsigned int generation; @@ -381,23 +380,12 @@ struct mem_cgroup { /* WARNING: nodeinfo must be the last member here */ }; -static size_t memcg_size(void) -{ - return sizeof(struct mem_cgroup) + - nr_node_ids * sizeof(struct mem_cgroup_per_node *); -} - /* internal only representation about the status of kmem accounting. */ enum { - KMEM_ACCOUNTED_ACTIVE = 0, /* accounted by this cgroup itself */ - KMEM_ACCOUNTED_ACTIVATED, /* static key enabled. */ + KMEM_ACCOUNTED_ACTIVE, /* accounted by this cgroup itself */ KMEM_ACCOUNTED_DEAD, /* dead memcg with pending kmem charges */ }; -/* We account when limit is on, but only after call sites are patched */ -#define KMEM_ACCOUNTED_MASK \ - ((1 << KMEM_ACCOUNTED_ACTIVE) | (1 << KMEM_ACCOUNTED_ACTIVATED)) - #ifdef CONFIG_MEMCG_KMEM static inline void memcg_kmem_set_active(struct mem_cgroup *memcg) { @@ -409,16 +397,6 @@ static bool memcg_kmem_is_active(struct mem_cgroup *memcg) return test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags); } -static void memcg_kmem_set_activated(struct mem_cgroup *memcg) -{ - set_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags); -} - -static void memcg_kmem_clear_activated(struct mem_cgroup *memcg) -{ - clear_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags); -} - static void memcg_kmem_mark_dead(struct mem_cgroup *memcg) { /* @@ -1139,16 +1117,22 @@ skip_node: * skipped and we should continue the tree walk. * last_visited css is safe to use because it is * protected by css_get and the tree walk is rcu safe. + * + * We do not take a reference on the root of the tree walk + * because we might race with the root removal when it would + * be the only node in the iterated hierarchy and mem_cgroup_iter + * would end up in an endless loop because it expects that at + * least one valid node will be returned. Root cannot disappear + * because caller of the iterator should hold it already so + * skipping css reference should be safe. */ if (next_css) { - struct mem_cgroup *mem = mem_cgroup_from_css(next_css); + if ((next_css->flags & CSS_ONLINE) && + (next_css == &root->css || css_tryget(next_css))) + return mem_cgroup_from_css(next_css); - if (css_tryget(&mem->css)) - return mem; - else { - prev_css = next_css; - goto skip_node; - } + prev_css = next_css; + goto skip_node; } return NULL; @@ -1182,7 +1166,15 @@ mem_cgroup_iter_load(struct mem_cgroup_reclaim_iter *iter, if (iter->last_dead_count == *sequence) { smp_rmb(); position = iter->last_visited; - if (position && !css_tryget(&position->css)) + + /* + * We cannot take a reference to root because we might race + * with root removal and returning NULL would end up in + * an endless loop on the iterator user level when root + * would be returned all the time. + */ + if (position && position != root && + !css_tryget(&position->css)) position = NULL; } return position; @@ -1191,9 +1183,11 @@ mem_cgroup_iter_load(struct mem_cgroup_reclaim_iter *iter, static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter, struct mem_cgroup *last_visited, struct mem_cgroup *new_position, + struct mem_cgroup *root, int sequence) { - if (last_visited) + /* root reference counting symmetric to mem_cgroup_iter_load */ + if (last_visited && last_visited != root) css_put(&last_visited->css); /* * We store the sequence count from the time @last_visited was @@ -1268,7 +1262,8 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, memcg = __mem_cgroup_iter_next(root, last_visited); if (reclaim) { - mem_cgroup_iter_update(iter, last_visited, memcg, seq); + mem_cgroup_iter_update(iter, last_visited, memcg, root, + seq); if (!memcg) iter->generation++; @@ -1865,13 +1860,18 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, break; }; points = oom_badness(task, memcg, NULL, totalpages); - if (points > chosen_points) { - if (chosen) - put_task_struct(chosen); - chosen = task; - chosen_points = points; - get_task_struct(chosen); - } + if (!points || points < chosen_points) + continue; + /* Prefer thread group leaders for display purposes */ + if (points == chosen_points && + thread_group_leader(chosen)) + continue; + + if (chosen) + put_task_struct(chosen); + chosen = task; + chosen_points = points; + get_task_struct(chosen); } css_task_iter_end(&it); } @@ -2904,7 +2904,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) unsigned short id; swp_entry_t ent; - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); pc = lookup_page_cgroup(page); lock_page_cgroup(pc); @@ -2938,7 +2938,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, bool anon; lock_page_cgroup(pc); - VM_BUG_ON(PageCgroupUsed(pc)); + VM_BUG_ON_PAGE(PageCgroupUsed(pc), page); /* * we don't need page_cgroup_lock about tail pages, becase they are not * accessed by any other context at this point. @@ -2973,7 +2973,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, if (lrucare) { if (was_on_lru) { lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup); - VM_BUG_ON(PageLRU(page)); + VM_BUG_ON_PAGE(PageLRU(page), page); SetPageLRU(page); add_page_to_lru_list(page, lruvec, page_lru(page)); } @@ -2999,11 +2999,12 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, static DEFINE_MUTEX(set_limit_mutex); #ifdef CONFIG_MEMCG_KMEM +static DEFINE_MUTEX(activate_kmem_mutex); + static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg) { return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) && - (memcg->kmem_account_flags & KMEM_ACCOUNTED_MASK) == - KMEM_ACCOUNTED_MASK; + memcg_kmem_is_active(memcg); } /* @@ -3102,16 +3103,6 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size) css_put(&memcg->css); } -void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep) -{ - if (!memcg) - return; - - mutex_lock(&memcg->slab_caches_mutex); - list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches); - mutex_unlock(&memcg->slab_caches_mutex); -} - /* * helper for acessing a memcg's index. It will be used as an index in the * child cache array in kmem_cache, and also to derive its name. This function @@ -3122,43 +3113,6 @@ int memcg_cache_id(struct mem_cgroup *memcg) return memcg ? memcg->kmemcg_id : -1; } -/* - * This ends up being protected by the set_limit mutex, during normal - * operation, because that is its main call site. - * - * But when we create a new cache, we can call this as well if its parent - * is kmem-limited. That will have to hold set_limit_mutex as well. - */ -static int memcg_update_cache_sizes(struct mem_cgroup *memcg) -{ - int num, ret; - - num = ida_simple_get(&kmem_limited_groups, - 0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL); - if (num < 0) - return num; - /* - * After this point, kmem_accounted (that we test atomically in - * the beginning of this conditional), is no longer 0. This - * guarantees only one process will set the following boolean - * to true. We don't need test_and_set because we're protected - * by the set_limit_mutex anyway. - */ - memcg_kmem_set_activated(memcg); - - ret = memcg_update_all_caches(num+1); - if (ret) { - ida_simple_remove(&kmem_limited_groups, num); - memcg_kmem_clear_activated(memcg); - return ret; - } - - memcg->kmemcg_id = num; - INIT_LIST_HEAD(&memcg->memcg_slab_caches); - mutex_init(&memcg->slab_caches_mutex); - return 0; -} - static size_t memcg_caches_array_size(int num_groups) { ssize_t size; @@ -3195,18 +3149,17 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups) if (num_groups > memcg_limited_groups_array_size) { int i; + struct memcg_cache_params *new_params; ssize_t size = memcg_caches_array_size(num_groups); size *= sizeof(void *); size += offsetof(struct memcg_cache_params, memcg_caches); - s->memcg_params = kzalloc(size, GFP_KERNEL); - if (!s->memcg_params) { - s->memcg_params = cur_params; + new_params = kzalloc(size, GFP_KERNEL); + if (!new_params) return -ENOMEM; - } - s->memcg_params->is_root_cache = true; + new_params->is_root_cache = true; /* * There is the chance it will be bigger than @@ -3220,7 +3173,7 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups) for (i = 0; i < memcg_limited_groups_array_size; i++) { if (!cur_params->memcg_caches[i]) continue; - s->memcg_params->memcg_caches[i] = + new_params->memcg_caches[i] = cur_params->memcg_caches[i]; } @@ -3233,13 +3186,15 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups) * bigger than the others. And all updates will reset this * anyway. */ - kfree(cur_params); + rcu_assign_pointer(s->memcg_params, new_params); + if (cur_params) + kfree_rcu(cur_params, rcu_head); } return 0; } -int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s, - struct kmem_cache *root_cache) +int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s, + struct kmem_cache *root_cache) { size_t size; @@ -3267,35 +3222,85 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s, return 0; } -void memcg_release_cache(struct kmem_cache *s) +void memcg_free_cache_params(struct kmem_cache *s) +{ + kfree(s->memcg_params); +} + +void memcg_register_cache(struct kmem_cache *s) { struct kmem_cache *root; struct mem_cgroup *memcg; int id; - /* - * This happens, for instance, when a root cache goes away before we - * add any memcg. - */ - if (!s->memcg_params) + if (is_root_cache(s)) return; - if (s->memcg_params->is_root_cache) - goto out; + /* + * Holding the slab_mutex assures nobody will touch the memcg_caches + * array while we are modifying it. + */ + lockdep_assert_held(&slab_mutex); + root = s->memcg_params->root_cache; memcg = s->memcg_params->memcg; - id = memcg_cache_id(memcg); + id = memcg_cache_id(memcg); + + css_get(&memcg->css); + + + /* + * Since readers won't lock (see cache_from_memcg_idx()), we need a + * barrier here to ensure nobody will see the kmem_cache partially + * initialized. + */ + smp_wmb(); + + /* + * Initialize the pointer to this cache in its parent's memcg_params + * before adding it to the memcg_slab_caches list, otherwise we can + * fail to convert memcg_params_to_cache() while traversing the list. + */ + VM_BUG_ON(root->memcg_params->memcg_caches[id]); + root->memcg_params->memcg_caches[id] = s; + + mutex_lock(&memcg->slab_caches_mutex); + list_add(&s->memcg_params->list, &memcg->memcg_slab_caches); + mutex_unlock(&memcg->slab_caches_mutex); +} + +void memcg_unregister_cache(struct kmem_cache *s) +{ + struct kmem_cache *root; + struct mem_cgroup *memcg; + int id; + + if (is_root_cache(s)) + return; + + /* + * Holding the slab_mutex assures nobody will touch the memcg_caches + * array while we are modifying it. + */ + lockdep_assert_held(&slab_mutex); root = s->memcg_params->root_cache; - root->memcg_params->memcg_caches[id] = NULL; + memcg = s->memcg_params->memcg; + id = memcg_cache_id(memcg); mutex_lock(&memcg->slab_caches_mutex); list_del(&s->memcg_params->list); mutex_unlock(&memcg->slab_caches_mutex); + /* + * Clear the pointer to this cache in its parent's memcg_params only + * after removing it from the memcg_slab_caches list, otherwise we can + * fail to convert memcg_params_to_cache() while traversing the list. + */ + VM_BUG_ON(!root->memcg_params->memcg_caches[id]); + root->memcg_params->memcg_caches[id] = NULL; + css_put(&memcg->css); -out: - kfree(s->memcg_params); } /* @@ -3354,11 +3359,9 @@ static void kmem_cache_destroy_work_func(struct work_struct *w) * So if we aren't down to zero, we'll just schedule a worker and try * again */ - if (atomic_read(&cachep->memcg_params->nr_pages) != 0) { + if (atomic_read(&cachep->memcg_params->nr_pages) != 0) kmem_cache_shrink(cachep); - if (atomic_read(&cachep->memcg_params->nr_pages) == 0) - return; - } else + else kmem_cache_destroy(cachep); } @@ -3394,27 +3397,16 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep) schedule_work(&cachep->memcg_params->destroy); } -/* - * This lock protects updaters, not readers. We want readers to be as fast as - * they can, and they will either see NULL or a valid cache value. Our model - * allow them to see NULL, in which case the root memcg will be selected. - * - * We need this lock because multiple allocations to the same cache from a non - * will span more than one worker. Only one of them can create the cache. - */ -static DEFINE_MUTEX(memcg_cache_mutex); - -/* - * Called with memcg_cache_mutex held - */ -static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg, - struct kmem_cache *s) +static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, + struct kmem_cache *s) { struct kmem_cache *new; static char *tmp_name = NULL; + static DEFINE_MUTEX(mutex); /* protects tmp_name */ - lockdep_assert_held(&memcg_cache_mutex); + BUG_ON(!memcg_can_account_kmem(memcg)); + mutex_lock(&mutex); /* * kmem_cache_create_memcg duplicates the given name and * cgroup_name for this name requires RCU context. @@ -3437,47 +3429,13 @@ static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg, if (new) new->allocflags |= __GFP_KMEMCG; + else + new = s; + mutex_unlock(&mutex); return new; } -static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, - struct kmem_cache *cachep) -{ - struct kmem_cache *new_cachep; - int idx; - - BUG_ON(!memcg_can_account_kmem(memcg)); - - idx = memcg_cache_id(memcg); - - mutex_lock(&memcg_cache_mutex); - new_cachep = cache_from_memcg_idx(cachep, idx); - if (new_cachep) { - css_put(&memcg->css); - goto out; - } - - new_cachep = kmem_cache_dup(memcg, cachep); - if (new_cachep == NULL) { - new_cachep = cachep; - css_put(&memcg->css); - goto out; - } - - atomic_set(&new_cachep->memcg_params->nr_pages , 0); - - cachep->memcg_params->memcg_caches[idx] = new_cachep; - /* - * the readers won't lock, make sure everybody sees the updated value, - * so they won't put stuff in the queue again for no reason - */ - wmb(); -out: - mutex_unlock(&memcg_cache_mutex); - return new_cachep; -} - void kmem_cache_destroy_memcg_children(struct kmem_cache *s) { struct kmem_cache *c; @@ -3495,9 +3453,10 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s) * * Still, we don't want anyone else freeing memcg_caches under our * noses, which can happen if a new memcg comes to life. As usual, - * we'll take the set_limit_mutex to protect ourselves against this. + * we'll take the activate_kmem_mutex to protect ourselves against + * this. */ - mutex_lock(&set_limit_mutex); + mutex_lock(&activate_kmem_mutex); for_each_memcg_cache_index(i) { c = cache_from_memcg_idx(s, i); if (!c) @@ -3520,7 +3479,7 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s) cancel_work_sync(&c->memcg_params->destroy); kmem_cache_destroy(c); } - mutex_unlock(&set_limit_mutex); + mutex_unlock(&activate_kmem_mutex); } struct create_work { @@ -3552,6 +3511,7 @@ static void memcg_create_cache_work_func(struct work_struct *w) cw = container_of(w, struct create_work, work); memcg_create_kmem_cache(cw->memcg, cw->cachep); + css_put(&cw->memcg->css); kfree(cw); } @@ -3611,7 +3571,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) { struct mem_cgroup *memcg; - int idx; + struct kmem_cache *memcg_cachep; VM_BUG_ON(!cachep->memcg_params); VM_BUG_ON(!cachep->memcg_params->is_root_cache); @@ -3625,15 +3585,9 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, if (!memcg_can_account_kmem(memcg)) goto out; - idx = memcg_cache_id(memcg); - - /* - * barrier to mare sure we're always seeing the up to date value. The - * code updating memcg_caches will issue a write barrier to match this. - */ - read_barrier_depends(); - if (likely(cache_from_memcg_idx(cachep, idx))) { - cachep = cache_from_memcg_idx(cachep, idx); + memcg_cachep = cache_from_memcg_idx(cachep, memcg_cache_id(memcg)); + if (likely(memcg_cachep)) { + cachep = memcg_cachep; goto out; } @@ -3787,7 +3741,7 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order) if (!memcg) return; - VM_BUG_ON(mem_cgroup_is_root(memcg)); + VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page); memcg_uncharge_kmem(memcg, PAGE_SIZE << order); } #else @@ -3866,7 +3820,7 @@ static int mem_cgroup_move_account(struct page *page, bool anon = PageAnon(page); VM_BUG_ON(from == to); - VM_BUG_ON(PageLRU(page)); + VM_BUG_ON_PAGE(PageLRU(page), page); /* * The page is isolated from LRU. So, collapse function * will not handle this page. But page splitting can happen. @@ -3959,7 +3913,7 @@ static int mem_cgroup_move_parent(struct page *page, parent = root_mem_cgroup; if (nr_pages > 1) { - VM_BUG_ON(!PageTransHuge(page)); + VM_BUG_ON_PAGE(!PageTransHuge(page), page); flags = compound_lock_irqsave(page); } @@ -3993,7 +3947,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, if (PageTransHuge(page)) { nr_pages <<= compound_order(page); - VM_BUG_ON(!PageTransHuge(page)); + VM_BUG_ON_PAGE(!PageTransHuge(page), page); /* * Never OOM-kill a process for a huge page. The * fault handler will fall back to regular pages. @@ -4013,8 +3967,8 @@ int mem_cgroup_newpage_charge(struct page *page, { if (mem_cgroup_disabled()) return 0; - VM_BUG_ON(page_mapped(page)); - VM_BUG_ON(page->mapping && !PageAnon(page)); + VM_BUG_ON_PAGE(page_mapped(page), page); + VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page); VM_BUG_ON(!mm); return mem_cgroup_charge_common(page, mm, gfp_mask, MEM_CGROUP_CHARGE_TYPE_ANON); @@ -4218,7 +4172,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype, if (PageTransHuge(page)) { nr_pages <<= compound_order(page); - VM_BUG_ON(!PageTransHuge(page)); + VM_BUG_ON_PAGE(!PageTransHuge(page), page); } /* * Check if our page_cgroup is valid @@ -4310,7 +4264,7 @@ void mem_cgroup_uncharge_page(struct page *page) /* early check. */ if (page_mapped(page)) return; - VM_BUG_ON(page->mapping && !PageAnon(page)); + VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page); /* * If the page is in swap cache, uncharge should be deferred * to the swap path, which also properly accounts swap usage @@ -4330,8 +4284,8 @@ void mem_cgroup_uncharge_page(struct page *page) void mem_cgroup_uncharge_cache_page(struct page *page) { - VM_BUG_ON(page_mapped(page)); - VM_BUG_ON(page->mapping); + VM_BUG_ON_PAGE(page_mapped(page), page); + VM_BUG_ON_PAGE(page->mapping, page); __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false); } @@ -5189,11 +5143,23 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, return val; } -static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val) -{ - int ret = -EINVAL; #ifdef CONFIG_MEMCG_KMEM - struct mem_cgroup *memcg = mem_cgroup_from_css(css); +/* should be called with activate_kmem_mutex held */ +static int __memcg_activate_kmem(struct mem_cgroup *memcg, + unsigned long long limit) +{ + int err = 0; + int memcg_id; + + if (memcg_kmem_is_active(memcg)) + return 0; + + /* + * We are going to allocate memory for data shared by all memory + * cgroups so let's stop accounting here. + */ + memcg_stop_kmem_account(); + /* * For simplicity, we won't allow this to be disabled. It also can't * be changed if the cgroup has children already, or if tasks had @@ -5207,72 +5173,101 @@ static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val) * of course permitted. */ mutex_lock(&memcg_create_mutex); - mutex_lock(&set_limit_mutex); - if (!memcg->kmem_account_flags && val != RES_COUNTER_MAX) { - if (cgroup_task_count(css->cgroup) || memcg_has_children(memcg)) { - ret = -EBUSY; - goto out; - } - ret = res_counter_set_limit(&memcg->kmem, val); - VM_BUG_ON(ret); + if (cgroup_task_count(memcg->css.cgroup) || memcg_has_children(memcg)) + err = -EBUSY; + mutex_unlock(&memcg_create_mutex); + if (err) + goto out; - ret = memcg_update_cache_sizes(memcg); - if (ret) { - res_counter_set_limit(&memcg->kmem, RES_COUNTER_MAX); - goto out; - } - static_key_slow_inc(&memcg_kmem_enabled_key); - /* - * setting the active bit after the inc will guarantee no one - * starts accounting before all call sites are patched - */ - memcg_kmem_set_active(memcg); - } else - ret = res_counter_set_limit(&memcg->kmem, val); + memcg_id = ida_simple_get(&kmem_limited_groups, + 0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL); + if (memcg_id < 0) { + err = memcg_id; + goto out; + } + + /* + * Make sure we have enough space for this cgroup in each root cache's + * memcg_params. + */ + err = memcg_update_all_caches(memcg_id + 1); + if (err) + goto out_rmid; + + memcg->kmemcg_id = memcg_id; + INIT_LIST_HEAD(&memcg->memcg_slab_caches); + mutex_init(&memcg->slab_caches_mutex); + + /* + * We couldn't have accounted to this cgroup, because it hasn't got the + * active bit set yet, so this should succeed. + */ + err = res_counter_set_limit(&memcg->kmem, limit); + VM_BUG_ON(err); + + static_key_slow_inc(&memcg_kmem_enabled_key); + /* + * Setting the active bit after enabling static branching will + * guarantee no one starts accounting before all call sites are + * patched. + */ + memcg_kmem_set_active(memcg); out: - mutex_unlock(&set_limit_mutex); - mutex_unlock(&memcg_create_mutex); -#endif + memcg_resume_kmem_account(); + return err; + +out_rmid: + ida_simple_remove(&kmem_limited_groups, memcg_id); + goto out; +} + +static int memcg_activate_kmem(struct mem_cgroup *memcg, + unsigned long long limit) +{ + int ret; + + mutex_lock(&activate_kmem_mutex); + ret = __memcg_activate_kmem(memcg, limit); + mutex_unlock(&activate_kmem_mutex); + return ret; +} + +static int memcg_update_kmem_limit(struct mem_cgroup *memcg, + unsigned long long val) +{ + int ret; + + if (!memcg_kmem_is_active(memcg)) + ret = memcg_activate_kmem(memcg, val); + else + ret = res_counter_set_limit(&memcg->kmem, val); return ret; } -#ifdef CONFIG_MEMCG_KMEM static int memcg_propagate_kmem(struct mem_cgroup *memcg) { int ret = 0; struct mem_cgroup *parent = parent_mem_cgroup(memcg); - if (!parent) - goto out; - memcg->kmem_account_flags = parent->kmem_account_flags; - /* - * When that happen, we need to disable the static branch only on those - * memcgs that enabled it. To achieve this, we would be forced to - * complicate the code by keeping track of which memcgs were the ones - * that actually enabled limits, and which ones got it from its - * parents. - * - * It is a lot simpler just to do static_key_slow_inc() on every child - * that is accounted. - */ - if (!memcg_kmem_is_active(memcg)) - goto out; + if (!parent) + return 0; + mutex_lock(&activate_kmem_mutex); /* - * __mem_cgroup_free() will issue static_key_slow_dec() because this - * memcg is active already. If the later initialization fails then the - * cgroup core triggers the cleanup so we do not have to do it here. + * If the parent cgroup is not kmem-active now, it cannot be activated + * after this point, because it has at least one child already. */ - static_key_slow_inc(&memcg_kmem_enabled_key); - - mutex_lock(&set_limit_mutex); - memcg_stop_kmem_account(); - ret = memcg_update_cache_sizes(memcg); - memcg_resume_kmem_account(); - mutex_unlock(&set_limit_mutex); -out: + if (memcg_kmem_is_active(parent)) + ret = __memcg_activate_kmem(memcg, RES_COUNTER_MAX); + mutex_unlock(&activate_kmem_mutex); return ret; } +#else +static int memcg_update_kmem_limit(struct mem_cgroup *memcg, + unsigned long long val) +{ + return -EINVAL; +} #endif /* CONFIG_MEMCG_KMEM */ /* @@ -5306,7 +5301,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, else if (type == _MEMSWAP) ret = mem_cgroup_resize_memsw_limit(memcg, val); else if (type == _KMEM) - ret = memcg_update_kmem_limit(css, val); + ret = memcg_update_kmem_limit(memcg, val); else return -EINVAL; break; @@ -6405,14 +6400,12 @@ static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) static struct mem_cgroup *mem_cgroup_alloc(void) { struct mem_cgroup *memcg; - size_t size = memcg_size(); + size_t size; - /* Can be very big if nr_node_ids is very big */ - if (size < PAGE_SIZE) - memcg = kzalloc(size, GFP_KERNEL); - else - memcg = vzalloc(size); + size = sizeof(struct mem_cgroup); + size += nr_node_ids * sizeof(struct mem_cgroup_per_node *); + memcg = kzalloc(size, GFP_KERNEL); if (!memcg) return NULL; @@ -6423,10 +6416,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void) return memcg; out_free: - if (size < PAGE_SIZE) - kfree(memcg); - else - vfree(memcg); + kfree(memcg); return NULL; } @@ -6444,7 +6434,6 @@ out_free: static void __mem_cgroup_free(struct mem_cgroup *memcg) { int node; - size_t size = memcg_size(); mem_cgroup_remove_from_trees(memcg); @@ -6465,10 +6454,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) * the cgroup_lock. */ disarm_static_keys(memcg); - if (size < PAGE_SIZE) - kfree(memcg); - else - vfree(memcg); + kfree(memcg); } /* @@ -6549,7 +6535,6 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css)); - int error = 0; if (css->cgroup->id > MEM_CGROUP_ID_MAX) return -ENOSPC; @@ -6584,10 +6569,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) if (parent != root_mem_cgroup) mem_cgroup_subsys.broken_hierarchy = true; } - - error = memcg_init_kmem(memcg, &mem_cgroup_subsys); mutex_unlock(&memcg_create_mutex); - return error; + + return memcg_init_kmem(memcg, &mem_cgroup_subsys); } /* @@ -6896,7 +6880,7 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma, enum mc_target_type ret = MC_TARGET_NONE; page = pmd_page(pmd); - VM_BUG_ON(!page || !PageHead(page)); + VM_BUG_ON_PAGE(!page || !PageHead(page), page); if (!move_anon()) return ret; pc = lookup_page_cgroup(page); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index b25ed32..4f08a2d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -856,14 +856,14 @@ static int page_action(struct page_state *ps, struct page *p, * the pages and send SIGBUS to the processes if the data was dirty. */ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, - int trapno, int flags) + int trapno, int flags, struct page **hpagep) { enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; struct address_space *mapping; LIST_HEAD(tokill); int ret; int kill = 1, forcekill; - struct page *hpage = compound_head(p); + struct page *hpage = *hpagep; struct page *ppage; if (PageReserved(p) || PageSlab(p)) @@ -942,11 +942,14 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, * We pinned the head page for hwpoison handling, * now we split the thp and we are interested in * the hwpoisoned raw page, so move the refcount - * to it. + * to it. Similarly, page lock is shifted. */ if (hpage != p) { put_page(hpage); get_page(p); + lock_page(p); + unlock_page(hpage); + *hpagep = p; } /* THP is split, so ppage should be the real poisoned page. */ ppage = p; @@ -964,17 +967,11 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, if (kill) collect_procs(ppage, &tokill); - if (hpage != ppage) - lock_page(ppage); - ret = try_to_unmap(ppage, ttu); if (ret != SWAP_SUCCESS) printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", pfn, page_mapcount(ppage)); - if (hpage != ppage) - unlock_page(ppage); - /* * Now that the dirty bit has been propagated to the * struct page and all unmaps done we can decide if @@ -1193,8 +1190,12 @@ int memory_failure(unsigned long pfn, int trapno, int flags) /* * Now take care of user space mappings. * Abort on fail: __delete_from_page_cache() assumes unmapped page. + * + * When the raw error page is thp tail page, hpage points to the raw + * page after thp split. */ - if (hwpoison_user_mappings(p, pfn, trapno, flags) != SWAP_SUCCESS) { + if (hwpoison_user_mappings(p, pfn, trapno, flags, &hpage) + != SWAP_SUCCESS) { printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn); res = -EBUSY; goto out; diff --git a/mm/memory.c b/mm/memory.c index 86487df..be6a0c0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -289,7 +289,7 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) return 0; batch = tlb->active; } - VM_BUG_ON(batch->nr > batch->max); + VM_BUG_ON_PAGE(batch->nr > batch->max, page); return batch->max - batch->nr; } @@ -671,7 +671,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, current->comm, (long long)pte_val(pte), (long long)pmd_val(*pmd)); if (page) - dump_page(page); + dump_page(page, "bad pte"); printk(KERN_ALERT "addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n", (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); @@ -2702,7 +2702,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, goto unwritable_page; } } else - VM_BUG_ON(!PageLocked(old_page)); + VM_BUG_ON_PAGE(!PageLocked(old_page), old_page); /* * Since we dropped the lock we need to revalidate @@ -3358,7 +3358,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(!(ret & VM_FAULT_LOCKED))) lock_page(vmf.page); else - VM_BUG_ON(!PageLocked(vmf.page)); + VM_BUG_ON_PAGE(!PageLocked(vmf.page), vmf.page); /* * Should we do an early C-O-W break? @@ -3395,7 +3395,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, goto unwritable_page; } } else - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); page_mkwrite = 1; } } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index cc2ab37..a650db2 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1107,17 +1107,18 @@ int __ref add_memory(int nid, u64 start, u64 size) if (ret) return ret; - lock_memory_hotplug(); - res = register_memory_resource(start, size); ret = -EEXIST; if (!res) - goto out; + return ret; { /* Stupid hack to suppress address-never-null warning */ void *p = NODE_DATA(nid); new_pgdat = !p; } + + lock_memory_hotplug(); + new_node = !node_online(nid); if (new_node) { pgdat = hotadd_new_pgdat(nid, start); @@ -1309,7 +1310,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) #ifdef CONFIG_DEBUG_VM printk(KERN_ALERT "removing pfn %lx from LRU failed\n", pfn); - dump_page(page); + dump_page(page, "failed to remove from LRU"); #endif put_page(page); /* Because we don't have big zone->lock. we should diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 0cd2c4d..463b7fb 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1199,10 +1199,8 @@ static struct page *new_vma_page(struct page *page, unsigned long private, int * } if (PageHuge(page)) { - if (vma) - return alloc_huge_page_noerr(vma, address, 1); - else - return NULL; + BUG_ON(!vma); + return alloc_huge_page_noerr(vma, address, 1); } /* * if !vma, alloc_page_vma() will use task or system default policy @@ -2668,7 +2666,7 @@ static void __init check_numabalancing_enable(void) if (nr_node_ids > 1 && !numabalancing_override) { printk(KERN_INFO "Enabling automatic NUMA balancing. " - "Configure with numa_balancing= or sysctl"); + "Configure with numa_balancing= or the kernel.numa_balancing sysctl"); set_numabalancing_state(numabalancing_default); } } diff --git a/mm/migrate.c b/mm/migrate.c index a8025be..734704f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -499,7 +499,7 @@ void migrate_page_copy(struct page *newpage, struct page *page) if (PageUptodate(page)) SetPageUptodate(newpage); if (TestClearPageActive(page)) { - VM_BUG_ON(PageUnevictable(page)); + VM_BUG_ON_PAGE(PageUnevictable(page), page); SetPageActive(newpage); } else if (TestClearPageUnevictable(page)) SetPageUnevictable(newpage); @@ -871,7 +871,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage, * free the metadata, so the page can be freed. */ if (!page->mapping) { - VM_BUG_ON(PageAnon(page)); + VM_BUG_ON_PAGE(PageAnon(page), page); if (page_has_private(page)) { try_to_free_buffers(page); goto uncharge; @@ -1618,7 +1618,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) { int page_lru; - VM_BUG_ON(compound_order(page) && !PageTransHuge(page)); + VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page); /* Avoid migrating to a node that is nearly full */ if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page))) @@ -1753,8 +1753,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, if (!new_page) goto out_fail; - page_cpupid_xchg_last(new_page, page_cpupid_last(page)); - isolated = numamigrate_isolate_page(pgdat, page); if (!isolated) { put_page(new_page); diff --git a/mm/mincore.c b/mm/mincore.c index da2be56..1016233 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -225,13 +225,6 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v end = min(vma->vm_end, addr + (pages << PAGE_SHIFT)); - if (is_vm_hugetlb_page(vma)) { - mincore_hugetlb_page_range(vma, addr, end, vec); - return (end - addr) >> PAGE_SHIFT; - } - - end = pmd_addr_end(addr, end); - if (is_vm_hugetlb_page(vma)) mincore_hugetlb_page_range(vma, addr, end, vec); else @@ -91,6 +91,26 @@ void mlock_vma_page(struct page *page) } /* + * Isolate a page from LRU with optional get_page() pin. + * Assumes lru_lock already held and page already pinned. + */ +static bool __munlock_isolate_lru_page(struct page *page, bool getpage) +{ + if (PageLRU(page)) { + struct lruvec *lruvec; + + lruvec = mem_cgroup_page_lruvec(page, page_zone(page)); + if (getpage) + get_page(page); + ClearPageLRU(page); + del_page_from_lru_list(page, lruvec, page_lru(page)); + return true; + } + + return false; +} + +/* * Finish munlock after successful page isolation * * Page must be locked. This is a wrapper for try_to_munlock() @@ -126,9 +146,9 @@ static void __munlock_isolated_page(struct page *page) static void __munlock_isolation_failed(struct page *page) { if (PageUnevictable(page)) - count_vm_event(UNEVICTABLE_PGSTRANDED); + __count_vm_event(UNEVICTABLE_PGSTRANDED); else - count_vm_event(UNEVICTABLE_PGMUNLOCKED); + __count_vm_event(UNEVICTABLE_PGMUNLOCKED); } /** @@ -152,28 +172,34 @@ static void __munlock_isolation_failed(struct page *page) unsigned int munlock_vma_page(struct page *page) { unsigned int nr_pages; + struct zone *zone = page_zone(page); BUG_ON(!PageLocked(page)); - if (TestClearPageMlocked(page)) { - nr_pages = hpage_nr_pages(page); - mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); - if (!isolate_lru_page(page)) - __munlock_isolated_page(page); - else - __munlock_isolation_failed(page); - } else { - nr_pages = hpage_nr_pages(page); - } - /* - * Regardless of the original PageMlocked flag, we determine nr_pages - * after touching the flag. This leaves a possible race with a THP page - * split, such that a whole THP page was munlocked, but nr_pages == 1. - * Returning a smaller mask due to that is OK, the worst that can - * happen is subsequent useless scanning of the former tail pages. - * The NR_MLOCK accounting can however become broken. + * Serialize with any parallel __split_huge_page_refcount() which + * might otherwise copy PageMlocked to part of the tail pages before + * we clear it in the head page. It also stabilizes hpage_nr_pages(). */ + spin_lock_irq(&zone->lru_lock); + + nr_pages = hpage_nr_pages(page); + if (!TestClearPageMlocked(page)) + goto unlock_out; + + __mod_zone_page_state(zone, NR_MLOCK, -nr_pages); + + if (__munlock_isolate_lru_page(page, true)) { + spin_unlock_irq(&zone->lru_lock); + __munlock_isolated_page(page); + goto out; + } + __munlock_isolation_failed(page); + +unlock_out: + spin_unlock_irq(&zone->lru_lock); + +out: return nr_pages - 1; } @@ -253,8 +279,8 @@ static int __mlock_posix_error_return(long retval) static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec, int *pgrescued) { - VM_BUG_ON(PageLRU(page)); - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(PageLRU(page), page); + VM_BUG_ON_PAGE(!PageLocked(page), page); if (page_mapcount(page) <= 1 && page_evictable(page)) { pagevec_add(pvec, page); @@ -310,34 +336,24 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) struct page *page = pvec->pages[i]; if (TestClearPageMlocked(page)) { - struct lruvec *lruvec; - int lru; - - if (PageLRU(page)) { - lruvec = mem_cgroup_page_lruvec(page, zone); - lru = page_lru(page); - /* - * We already have pin from follow_page_mask() - * so we can spare the get_page() here. - */ - ClearPageLRU(page); - del_page_from_lru_list(page, lruvec, lru); - } else { - __munlock_isolation_failed(page); - goto skip_munlock; - } - - } else { -skip_munlock: /* - * We won't be munlocking this page in the next phase - * but we still need to release the follow_page_mask() - * pin. We cannot do it under lru_lock however. If it's - * the last pin, __page_cache_release would deadlock. + * We already have pin from follow_page_mask() + * so we can spare the get_page() here. */ - pagevec_add(&pvec_putback, pvec->pages[i]); - pvec->pages[i] = NULL; + if (__munlock_isolate_lru_page(page, false)) + continue; + else + __munlock_isolation_failed(page); } + + /* + * We won't be munlocking this page in the next phase + * but we still need to release the follow_page_mask() + * pin. We cannot do it under lru_lock however. If it's + * the last pin, __page_cache_release() would deadlock. + */ + pagevec_add(&pvec_putback, pvec->pages[i]); + pvec->pages[i] = NULL; } delta_munlocked = -nr + pagevec_count(&pvec_putback); __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); diff --git a/mm/mm_init.c b/mm/mm_init.c index 68562e9..857a643 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -202,5 +202,4 @@ static int __init mm_sysfs_init(void) return 0; } - -__initcall(mm_sysfs_init); +pure_initcall(mm_sysfs_init); @@ -894,7 +894,15 @@ again: remove_next = 1 + (end > next->vm_end); static inline int is_mergeable_vma(struct vm_area_struct *vma, struct file *file, unsigned long vm_flags) { - if (vma->vm_flags ^ vm_flags) + /* + * VM_SOFTDIRTY should not prevent from VMA merging, if we + * match the flags but dirty bit -- the caller should mark + * merged VMA as dirty. If dirty bit won't be excluded from + * comparison, we increase pressue on the memory system forcing + * the kernel to generate new VMAs when old one could be + * extended instead. + */ + if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY) return 0; if (vma->vm_file != file) return 0; @@ -1083,7 +1091,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct * return a->vm_end == b->vm_start && mpol_equal(vma_policy(a), vma_policy(b)) && a->vm_file == b->vm_file && - !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) && + !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC|VM_SOFTDIRTY)) && b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT); } @@ -3142,7 +3150,7 @@ static int init_user_reserve(void) sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17); return 0; } -module_init(init_user_reserve) +subsys_initcall(init_user_reserve); /* * Initialise sysctl_admin_reserve_kbytes. @@ -3163,7 +3171,7 @@ static int init_admin_reserve(void) sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13); return 0; } -module_init(init_admin_reserve) +subsys_initcall(init_admin_reserve); /* * Reinititalise user and admin reserves if memory is added or removed. @@ -3233,4 +3241,4 @@ static int __meminit init_reserve_notifier(void) return 0; } -module_init(init_reserve_notifier) +subsys_initcall(init_reserve_notifier); diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 93e6089..41cefdf 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -329,5 +329,4 @@ static int __init mmu_notifier_init(void) { return init_srcu_struct(&srcu); } - -module_init(mmu_notifier_init); +subsys_initcall(mmu_notifier_init); diff --git a/mm/nobootmem.c b/mm/nobootmem.c index 19121ce..f73f298 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -45,7 +45,9 @@ static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align, if (!addr) return NULL; - memblock_reserve(addr, size); + if (memblock_reserve(addr, size)) + return NULL; + ptr = phys_to_virt(addr); memset(ptr, 0, size); /* @@ -114,16 +116,27 @@ static unsigned long __init __free_memory_core(phys_addr_t start, static unsigned long __init free_low_memory_core_early(void) { unsigned long count = 0; - phys_addr_t start, end, size; + phys_addr_t start, end; u64 i; for_each_free_mem_range(i, NUMA_NO_NODE, &start, &end, NULL) count += __free_memory_core(start, end); - /* free range that is used for reserved array if we allocate it */ - size = get_allocated_memblock_reserved_regions_info(&start); - if (size) - count += __free_memory_core(start, start + size); +#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK + { + phys_addr_t size; + + /* Free memblock.reserved array if it was allocated */ + size = get_allocated_memblock_reserved_regions_info(&start); + if (size) + count += __free_memory_core(start, start + size); + + /* Free memblock.memory array if it was allocated */ + size = get_allocated_memblock_memory_regions_info(&start); + if (size) + count += __free_memory_core(start, start + size); + } +#endif return count; } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 054ff47..37b1b19 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -327,10 +327,14 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, break; }; points = oom_badness(p, NULL, nodemask, totalpages); - if (points > chosen_points) { - chosen = p; - chosen_points = points; - } + if (!points || points < chosen_points) + continue; + /* Prefer thread group leaders for display purposes */ + if (points == chosen_points && thread_group_leader(chosen)) + continue; + + chosen = p; + chosen_points = points; } if (chosen) get_task_struct(chosen); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 533e214..e3758a0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -205,7 +205,7 @@ static char * const zone_names[MAX_NR_ZONES] = { }; int min_free_kbytes = 1024; -int user_min_free_kbytes; +int user_min_free_kbytes = -1; static unsigned long __meminitdata nr_kernel_pages; static unsigned long __meminitdata nr_all_pages; @@ -295,7 +295,7 @@ static inline int bad_range(struct zone *zone, struct page *page) } #endif -static void bad_page(struct page *page) +static void bad_page(struct page *page, char *reason, unsigned long bad_flags) { static unsigned long resume; static unsigned long nr_shown; @@ -329,7 +329,7 @@ static void bad_page(struct page *page) printk(KERN_ALERT "BUG: Bad page state in process %s pfn:%05lx\n", current->comm, page_to_pfn(page)); - dump_page(page); + dump_page_badflags(page, reason, bad_flags); print_modules(); dump_stack(); @@ -383,7 +383,7 @@ static int destroy_compound_page(struct page *page, unsigned long order) int bad = 0; if (unlikely(compound_order(page) != order)) { - bad_page(page); + bad_page(page, "wrong compound order", 0); bad++; } @@ -392,8 +392,11 @@ static int destroy_compound_page(struct page *page, unsigned long order) for (i = 1; i < nr_pages; i++) { struct page *p = page + i; - if (unlikely(!PageTail(p) || (p->first_page != page))) { - bad_page(page); + if (unlikely(!PageTail(p))) { + bad_page(page, "PageTail not set", 0); + bad++; + } else if (unlikely(p->first_page != page)) { + bad_page(page, "first_page not consistent", 0); bad++; } __ClearPageTail(p); @@ -506,12 +509,12 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, return 0; if (page_is_guard(buddy) && page_order(buddy) == order) { - VM_BUG_ON(page_count(buddy) != 0); + VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); return 1; } if (PageBuddy(buddy) && page_order(buddy) == order) { - VM_BUG_ON(page_count(buddy) != 0); + VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); return 1; } return 0; @@ -561,8 +564,8 @@ static inline void __free_one_page(struct page *page, page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); - VM_BUG_ON(page_idx & ((1 << order) - 1)); - VM_BUG_ON(bad_range(zone, page)); + VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page); + VM_BUG_ON_PAGE(bad_range(zone, page), page); while (order < MAX_ORDER-1) { buddy_idx = __find_buddy_index(page_idx, order); @@ -618,12 +621,23 @@ out: static inline int free_pages_check(struct page *page) { - if (unlikely(page_mapcount(page) | - (page->mapping != NULL) | - (atomic_read(&page->_count) != 0) | - (page->flags & PAGE_FLAGS_CHECK_AT_FREE) | - (mem_cgroup_bad_page_check(page)))) { - bad_page(page); + char *bad_reason = NULL; + unsigned long bad_flags = 0; + + if (unlikely(page_mapcount(page))) + bad_reason = "nonzero mapcount"; + if (unlikely(page->mapping != NULL)) + bad_reason = "non-NULL mapping"; + if (unlikely(atomic_read(&page->_count) != 0)) + bad_reason = "nonzero _count"; + if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_FREE)) { + bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set"; + bad_flags = PAGE_FLAGS_CHECK_AT_FREE; + } + if (unlikely(mem_cgroup_bad_page_check(page))) + bad_reason = "cgroup check failed"; + if (unlikely(bad_reason)) { + bad_page(page, bad_reason, bad_flags); return 1; } page_cpupid_reset_last(page); @@ -813,7 +827,7 @@ static inline void expand(struct zone *zone, struct page *page, area--; high--; size >>= 1; - VM_BUG_ON(bad_range(zone, &page[size])); + VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]); #ifdef CONFIG_DEBUG_PAGEALLOC if (high < debug_guardpage_minorder()) { @@ -843,12 +857,23 @@ static inline void expand(struct zone *zone, struct page *page, */ static inline int check_new_page(struct page *page) { - if (unlikely(page_mapcount(page) | - (page->mapping != NULL) | - (atomic_read(&page->_count) != 0) | - (page->flags & PAGE_FLAGS_CHECK_AT_PREP) | - (mem_cgroup_bad_page_check(page)))) { - bad_page(page); + char *bad_reason = NULL; + unsigned long bad_flags = 0; + + if (unlikely(page_mapcount(page))) + bad_reason = "nonzero mapcount"; + if (unlikely(page->mapping != NULL)) + bad_reason = "non-NULL mapping"; + if (unlikely(atomic_read(&page->_count) != 0)) + bad_reason = "nonzero _count"; + if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) { + bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set"; + bad_flags = PAGE_FLAGS_CHECK_AT_PREP; + } + if (unlikely(mem_cgroup_bad_page_check(page))) + bad_reason = "cgroup check failed"; + if (unlikely(bad_reason)) { + bad_page(page, bad_reason, bad_flags); return 1; } return 0; @@ -955,7 +980,7 @@ int move_freepages(struct zone *zone, for (page = start_page; page <= end_page;) { /* Make sure we are not inadvertently changing nodes */ - VM_BUG_ON(page_to_nid(page) != zone_to_nid(zone)); + VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page); if (!pfn_valid_within(page_to_pfn(page))) { page++; @@ -1404,8 +1429,8 @@ void split_page(struct page *page, unsigned int order) { int i; - VM_BUG_ON(PageCompound(page)); - VM_BUG_ON(!page_count(page)); + VM_BUG_ON_PAGE(PageCompound(page), page); + VM_BUG_ON_PAGE(!page_count(page), page); #ifdef CONFIG_KMEMCHECK /* @@ -1552,7 +1577,7 @@ again: zone_statistics(preferred_zone, zone, gfp_flags); local_irq_restore(flags); - VM_BUG_ON(bad_range(zone, page)); + VM_BUG_ON_PAGE(bad_range(zone, page), page); if (prep_new_page(page, order, gfp_flags)) goto again; return page; @@ -5729,7 +5754,12 @@ module_init(init_per_zone_wmark_min) int min_free_kbytes_sysctl_handler(ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec(table, write, buffer, length, ppos); + int rc; + + rc = proc_dointvec_minmax(table, write, buffer, length, ppos); + if (rc) + return rc; + if (write) { user_min_free_kbytes = min_free_kbytes; setup_per_zone_wmarks(); @@ -5996,7 +6026,7 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, pfn = page_to_pfn(page); bitmap = get_pageblock_bitmap(zone, pfn); bitidx = pfn_to_bitidx(zone, pfn); - VM_BUG_ON(!zone_spans_pfn(zone, pfn)); + VM_BUG_ON_PAGE(!zone_spans_pfn(zone, pfn), page); for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) if (flags & value) @@ -6494,12 +6524,24 @@ static void dump_page_flags(unsigned long flags) printk(")\n"); } -void dump_page(struct page *page) +void dump_page_badflags(struct page *page, char *reason, unsigned long badflags) { printk(KERN_ALERT "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n", page, atomic_read(&page->_count), page_mapcount(page), page->mapping, page->index); dump_page_flags(page->flags); + if (reason) + pr_alert("page dumped because: %s\n", reason); + if (page->flags & badflags) { + pr_alert("bad because of flags:\n"); + dump_page_flags(page->flags & badflags); + } mem_cgroup_print_bad_page(page); } + +void dump_page(struct page *page, char *reason) +{ + dump_page_badflags(page, reason, 0); +} +EXPORT_SYMBOL_GPL(dump_page); diff --git a/mm/page_io.c b/mm/page_io.c index 8c79a47..7247be6 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -320,8 +320,8 @@ int swap_readpage(struct page *page) int ret = 0; struct swap_info_struct *sis = page_swap_info(page); - VM_BUG_ON(!PageLocked(page)); - VM_BUG_ON(PageUptodate(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(PageUptodate(page), page); if (frontswap_load(page) == 0) { SetPageUptodate(page); unlock_page(page); @@ -848,9 +848,9 @@ out: static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg) { if (vma->vm_flags & VM_SHARED) - return 0; + return false; - return 1; + return true; } int page_mkclean(struct page *page) @@ -894,9 +894,9 @@ void page_move_anon_rmap(struct page *page, { struct anon_vma *anon_vma = vma->anon_vma; - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON(!anon_vma); - VM_BUG_ON(page->index != linear_page_index(vma, address)); + VM_BUG_ON_PAGE(page->index != linear_page_index(vma, address), page); anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; page->mapping = (struct address_space *) anon_vma; @@ -995,7 +995,7 @@ void do_page_add_anon_rmap(struct page *page, if (unlikely(PageKsm(page))) return; - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); /* address might be in next vma when migration races vma_adjust */ if (first) __page_set_anon_rmap(page, vma, address, exclusive); @@ -1481,7 +1481,7 @@ int try_to_unmap(struct page *page, enum ttu_flags flags) .anon_lock = page_lock_anon_vma_read, }; - VM_BUG_ON(!PageHuge(page) && PageTransHuge(page)); + VM_BUG_ON_PAGE(!PageHuge(page) && PageTransHuge(page), page); /* * During exec, a temporary VMA is setup and later moved. @@ -1533,7 +1533,7 @@ int try_to_munlock(struct page *page) }; - VM_BUG_ON(!PageLocked(page) || PageLRU(page)); + VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page); ret = rmap_walk(page, &rwc); return ret; @@ -285,8 +285,8 @@ static int shmem_add_to_page_cache(struct page *page, { int error; - VM_BUG_ON(!PageLocked(page)); - VM_BUG_ON(!PageSwapBacked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(!PageSwapBacked(page), page); page_cache_get(page); page->mapping = mapping; @@ -491,7 +491,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, continue; if (!unfalloc || !PageUptodate(page)) { if (page->mapping == mapping) { - VM_BUG_ON(PageWriteback(page)); + VM_BUG_ON_PAGE(PageWriteback(page), page); truncate_inode_page(mapping, page); } } @@ -568,7 +568,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, lock_page(page); if (!unfalloc || !PageUptodate(page)) { if (page->mapping == mapping) { - VM_BUG_ON(PageWriteback(page)); + VM_BUG_ON_PAGE(PageWriteback(page), page); truncate_inode_page(mapping, page); } } @@ -160,12 +160,36 @@ static inline const char *cache_name(struct kmem_cache *s) return s->name; } +/* + * Note, we protect with RCU only the memcg_caches array, not per-memcg caches. + * That said the caller must assure the memcg's cache won't go away. Since once + * created a memcg's cache is destroyed only along with the root cache, it is + * true if we are going to allocate from the cache or hold a reference to the + * root cache by other means. Otherwise, we should hold either the slab_mutex + * or the memcg's slab_caches_mutex while calling this function and accessing + * the returned value. + */ static inline struct kmem_cache * cache_from_memcg_idx(struct kmem_cache *s, int idx) { + struct kmem_cache *cachep; + struct memcg_cache_params *params; + if (!s->memcg_params) return NULL; - return s->memcg_params->memcg_caches[idx]; + + rcu_read_lock(); + params = rcu_dereference(s->memcg_params); + cachep = params->memcg_caches[idx]; + rcu_read_unlock(); + + /* + * Make sure we will access the up-to-date value. The code updating + * memcg_caches issues a write barrier to match this (see + * memcg_register_cache()). + */ + smp_read_barrier_depends(); + return cachep; } static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s) diff --git a/mm/slab_common.c b/mm/slab_common.c index 0b7bb39..8e40321 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -171,13 +171,26 @@ kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size, struct kmem_cache *parent_cache) { struct kmem_cache *s = NULL; - int err = 0; + int err; get_online_cpus(); mutex_lock(&slab_mutex); - if (!kmem_cache_sanity_check(memcg, name, size) == 0) - goto out_locked; + err = kmem_cache_sanity_check(memcg, name, size); + if (err) + goto out_unlock; + + if (memcg) { + /* + * Since per-memcg caches are created asynchronously on first + * allocation (see memcg_kmem_get_cache()), several threads can + * try to create the same cache, but only one of them may + * succeed. Therefore if we get here and see the cache has + * already been created, we silently return NULL. + */ + if (cache_from_memcg_idx(parent_cache, memcg_cache_id(memcg))) + goto out_unlock; + } /* * Some allocators will constraint the set of valid flags to a subset @@ -189,45 +202,45 @@ kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size, s = __kmem_cache_alias(memcg, name, size, align, flags, ctor); if (s) - goto out_locked; + goto out_unlock; + err = -ENOMEM; s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL); - if (s) { - s->object_size = s->size = size; - s->align = calculate_alignment(flags, align, size); - s->ctor = ctor; + if (!s) + goto out_unlock; - if (memcg_register_cache(memcg, s, parent_cache)) { - kmem_cache_free(kmem_cache, s); - err = -ENOMEM; - goto out_locked; - } + s->object_size = s->size = size; + s->align = calculate_alignment(flags, align, size); + s->ctor = ctor; - s->name = kstrdup(name, GFP_KERNEL); - if (!s->name) { - kmem_cache_free(kmem_cache, s); - err = -ENOMEM; - goto out_locked; - } + s->name = kstrdup(name, GFP_KERNEL); + if (!s->name) + goto out_free_cache; - err = __kmem_cache_create(s, flags); - if (!err) { - s->refcount = 1; - list_add(&s->list, &slab_caches); - memcg_cache_list_add(memcg, s); - } else { - kfree(s->name); - kmem_cache_free(kmem_cache, s); - } - } else - err = -ENOMEM; + err = memcg_alloc_cache_params(memcg, s, parent_cache); + if (err) + goto out_free_cache; + + err = __kmem_cache_create(s, flags); + if (err) + goto out_free_cache; -out_locked: + s->refcount = 1; + list_add(&s->list, &slab_caches); + memcg_register_cache(s); + +out_unlock: mutex_unlock(&slab_mutex); put_online_cpus(); - if (err) { - + /* + * There is no point in flooding logs with warnings or especially + * crashing the system if we fail to create a cache for a memcg. In + * this case we will be accounting the memcg allocation to the root + * cgroup until we succeed to create its own cache, but it isn't that + * critical. + */ + if (err && !memcg) { if (flags & SLAB_PANIC) panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n", name, err); @@ -236,11 +249,15 @@ out_locked: name, err); dump_stack(); } - return NULL; } - return s; + +out_free_cache: + memcg_free_cache_params(s); + kfree(s->name); + kmem_cache_free(kmem_cache, s); + goto out_unlock; } struct kmem_cache * @@ -263,11 +280,12 @@ void kmem_cache_destroy(struct kmem_cache *s) list_del(&s->list); if (!__kmem_cache_shutdown(s)) { + memcg_unregister_cache(s); mutex_unlock(&slab_mutex); if (s->flags & SLAB_DESTROY_BY_RCU) rcu_barrier(); - memcg_release_cache(s); + memcg_free_cache_params(s); kfree(s->name); kmem_cache_free(kmem_cache, s); } else { @@ -1559,7 +1559,7 @@ static inline void *acquire_slab(struct kmem_cache *s, new.freelist = freelist; } - VM_BUG_ON(new.frozen); + VM_BUG_ON_PAGE(new.frozen, &new); new.frozen = 1; if (!__cmpxchg_double_slab(s, page, @@ -1812,7 +1812,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, set_freepointer(s, freelist, prior); new.counters = counters; new.inuse--; - VM_BUG_ON(!new.frozen); + VM_BUG_ON_PAGE(!new.frozen, &new); } while (!__cmpxchg_double_slab(s, page, prior, counters, @@ -1840,7 +1840,7 @@ redo: old.freelist = page->freelist; old.counters = page->counters; - VM_BUG_ON(!old.frozen); + VM_BUG_ON_PAGE(!old.frozen, &old); /* Determine target state of the slab */ new.counters = old.counters; @@ -1952,7 +1952,7 @@ static void unfreeze_partials(struct kmem_cache *s, old.freelist = page->freelist; old.counters = page->counters; - VM_BUG_ON(!old.frozen); + VM_BUG_ON_PAGE(!old.frozen, &old); new.counters = old.counters; new.freelist = old.freelist; @@ -2225,7 +2225,7 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) counters = page->counters; new.counters = counters; - VM_BUG_ON(!new.frozen); + VM_BUG_ON_PAGE(!new.frozen, &new); new.inuse = page->objects; new.frozen = freelist != NULL; @@ -2319,7 +2319,7 @@ load_freelist: * page is pointing to the page from which the objects are obtained. * That page must be frozen for per cpu allocations to work. */ - VM_BUG_ON(!c->page->frozen); + VM_BUG_ON_PAGE(!c->page->frozen, c->page); c->freelist = get_freepointer(s, freelist); c->tid = next_tid(c->tid); local_irq_restore(flags); @@ -57,7 +57,7 @@ static void __page_cache_release(struct page *page) spin_lock_irqsave(&zone->lru_lock, flags); lruvec = mem_cgroup_page_lruvec(page, zone); - VM_BUG_ON(!PageLRU(page)); + VM_BUG_ON_PAGE(!PageLRU(page), page); __ClearPageLRU(page); del_page_from_lru_list(page, lruvec, page_off_lru(page)); spin_unlock_irqrestore(&zone->lru_lock, flags); @@ -130,8 +130,8 @@ static void put_compound_page(struct page *page) * __split_huge_page_refcount cannot race * here. */ - VM_BUG_ON(!PageHead(page_head)); - VM_BUG_ON(page_mapcount(page) != 0); + VM_BUG_ON_PAGE(!PageHead(page_head), page_head); + VM_BUG_ON_PAGE(page_mapcount(page) != 0, page); if (put_page_testzero(page_head)) { /* * If this is the tail of a slab @@ -148,7 +148,7 @@ static void put_compound_page(struct page *page) * the compound page enters the buddy * allocator. */ - VM_BUG_ON(PageSlab(page_head)); + VM_BUG_ON_PAGE(PageSlab(page_head), page_head); __put_compound_page(page_head); } return; @@ -199,7 +199,7 @@ out_put_single: __put_single_page(page); return; } - VM_BUG_ON(page_head != page->first_page); + VM_BUG_ON_PAGE(page_head != page->first_page, page); /* * We can release the refcount taken by * get_page_unless_zero() now that @@ -207,12 +207,12 @@ out_put_single: * compound_lock. */ if (put_page_testzero(page_head)) - VM_BUG_ON(1); + VM_BUG_ON_PAGE(1, page_head); /* __split_huge_page_refcount will wait now */ - VM_BUG_ON(page_mapcount(page) <= 0); + VM_BUG_ON_PAGE(page_mapcount(page) <= 0, page); atomic_dec(&page->_mapcount); - VM_BUG_ON(atomic_read(&page_head->_count) <= 0); - VM_BUG_ON(atomic_read(&page->_count) != 0); + VM_BUG_ON_PAGE(atomic_read(&page_head->_count) <= 0, page_head); + VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page); compound_unlock_irqrestore(page_head, flags); if (put_page_testzero(page_head)) { @@ -223,7 +223,7 @@ out_put_single: } } else { /* page_head is a dangling pointer */ - VM_BUG_ON(PageTail(page)); + VM_BUG_ON_PAGE(PageTail(page), page); goto out_put_single; } } @@ -264,7 +264,7 @@ bool __get_page_tail(struct page *page) * page. __split_huge_page_refcount * cannot race here. */ - VM_BUG_ON(!PageHead(page_head)); + VM_BUG_ON_PAGE(!PageHead(page_head), page_head); __get_page_tail_foll(page, true); return true; } else { @@ -604,8 +604,8 @@ EXPORT_SYMBOL(__lru_cache_add); */ void lru_cache_add(struct page *page) { - VM_BUG_ON(PageActive(page) && PageUnevictable(page)); - VM_BUG_ON(PageLRU(page)); + VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page); + VM_BUG_ON_PAGE(PageLRU(page), page); __lru_cache_add(page); } @@ -846,7 +846,7 @@ void release_pages(struct page **pages, int nr, int cold) } lruvec = mem_cgroup_page_lruvec(page, zone); - VM_BUG_ON(!PageLRU(page)); + VM_BUG_ON_PAGE(!PageLRU(page), page); __ClearPageLRU(page); del_page_from_lru_list(page, lruvec, page_off_lru(page)); } @@ -888,9 +888,9 @@ void lru_add_page_tail(struct page *page, struct page *page_tail, { const int file = 0; - VM_BUG_ON(!PageHead(page)); - VM_BUG_ON(PageCompound(page_tail)); - VM_BUG_ON(PageLRU(page_tail)); + VM_BUG_ON_PAGE(!PageHead(page), page); + VM_BUG_ON_PAGE(PageCompound(page_tail), page); + VM_BUG_ON_PAGE(PageLRU(page_tail), page); VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&lruvec_zone(lruvec)->lru_lock)); @@ -929,7 +929,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, int active = PageActive(page); enum lru_list lru = page_lru(page); - VM_BUG_ON(PageLRU(page)); + VM_BUG_ON_PAGE(PageLRU(page), page); SetPageLRU(page); add_page_to_lru_list(page, lruvec, lru); diff --git a/mm/swap_state.c b/mm/swap_state.c index e6f15f8..98e85e9 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -83,9 +83,9 @@ int __add_to_swap_cache(struct page *page, swp_entry_t entry) int error; struct address_space *address_space; - VM_BUG_ON(!PageLocked(page)); - VM_BUG_ON(PageSwapCache(page)); - VM_BUG_ON(!PageSwapBacked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(PageSwapCache(page), page); + VM_BUG_ON_PAGE(!PageSwapBacked(page), page); page_cache_get(page); SetPageSwapCache(page); @@ -139,9 +139,9 @@ void __delete_from_swap_cache(struct page *page) swp_entry_t entry; struct address_space *address_space; - VM_BUG_ON(!PageLocked(page)); - VM_BUG_ON(!PageSwapCache(page)); - VM_BUG_ON(PageWriteback(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(!PageSwapCache(page), page); + VM_BUG_ON_PAGE(PageWriteback(page), page); entry.val = page_private(page); address_space = swap_address_space(entry); @@ -165,8 +165,8 @@ int add_to_swap(struct page *page, struct list_head *list) swp_entry_t entry; int err; - VM_BUG_ON(!PageLocked(page)); - VM_BUG_ON(!PageUptodate(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(!PageUptodate(page), page); entry = get_swap_page(); if (!entry.val) diff --git a/mm/swapfile.c b/mm/swapfile.c index 612a7c9..c6c13b05 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -616,7 +616,7 @@ scan: } } offset = si->lowest_bit; - while (++offset < scan_base) { + while (offset < scan_base) { if (!si->swap_map[offset]) { spin_lock(&si->lock); goto checks; @@ -629,6 +629,7 @@ scan: cond_resched(); latency_ration = LATENCY_LIMIT; } + offset++; } spin_lock(&si->lock); @@ -906,7 +907,7 @@ int reuse_swap_page(struct page *page) { int count; - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); if (unlikely(PageKsm(page))) return 0; count = page_mapcount(page); @@ -926,7 +927,7 @@ int reuse_swap_page(struct page *page) */ int try_to_free_swap(struct page *page) { - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); if (!PageSwapCache(page)) return 0; @@ -2714,7 +2715,7 @@ struct swap_info_struct *page_swap_info(struct page *page) */ struct address_space *__page_file_mapping(struct page *page) { - VM_BUG_ON(!PageSwapCache(page)); + VM_BUG_ON_PAGE(!PageSwapCache(page), page); return page_swap_info(page)->swap_file->f_mapping; } EXPORT_SYMBOL_GPL(__page_file_mapping); @@ -2722,7 +2723,7 @@ EXPORT_SYMBOL_GPL(__page_file_mapping); pgoff_t __page_file_index(struct page *page) { swp_entry_t swap = { .val = page_private(page) }; - VM_BUG_ON(!PageSwapCache(page)); + VM_BUG_ON_PAGE(!PageSwapCache(page), page); return swp_offset(swap); } EXPORT_SYMBOL_GPL(__page_file_index); diff --git a/mm/vmscan.c b/mm/vmscan.c index eea668d..90c4075 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -281,17 +281,34 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker, nr_pages_scanned, lru_pages, max_pass, delta, total_scan); - while (total_scan >= batch_size) { + /* + * Normally, we should not scan less than batch_size objects in one + * pass to avoid too frequent shrinker calls, but if the slab has less + * than batch_size objects in total and we are really tight on memory, + * we will try to reclaim all available objects, otherwise we can end + * up failing allocations although there are plenty of reclaimable + * objects spread over several slabs with usage less than the + * batch_size. + * + * We detect the "tight on memory" situations by looking at the total + * number of objects we want to scan (total_scan). If it is greater + * than the total number of objects on slab (max_pass), we must be + * scanning at high prio and therefore should try to reclaim as much as + * possible. + */ + while (total_scan >= batch_size || + total_scan >= max_pass) { unsigned long ret; + unsigned long nr_to_scan = min(batch_size, total_scan); - shrinkctl->nr_to_scan = batch_size; + shrinkctl->nr_to_scan = nr_to_scan; ret = shrinker->scan_objects(shrinker, shrinkctl); if (ret == SHRINK_STOP) break; freed += ret; - count_vm_events(SLABS_SCANNED, batch_size); - total_scan -= batch_size; + count_vm_events(SLABS_SCANNED, nr_to_scan); + total_scan -= nr_to_scan; cond_resched(); } @@ -352,16 +369,17 @@ unsigned long shrink_slab(struct shrink_control *shrinkctl, } list_for_each_entry(shrinker, &shrinker_list, list) { - for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) { - if (!node_online(shrinkctl->nid)) - continue; - - if (!(shrinker->flags & SHRINKER_NUMA_AWARE) && - (shrinkctl->nid != 0)) - break; - + if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) { + shrinkctl->nid = 0; freed += shrink_slab_node(shrinkctl, shrinker, - nr_pages_scanned, lru_pages); + nr_pages_scanned, lru_pages); + continue; + } + + for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) { + if (node_online(shrinkctl->nid)) + freed += shrink_slab_node(shrinkctl, shrinker, + nr_pages_scanned, lru_pages); } } @@ -603,7 +621,7 @@ void putback_lru_page(struct page *page) bool is_unevictable; int was_unevictable = PageUnevictable(page); - VM_BUG_ON(PageLRU(page)); + VM_BUG_ON_PAGE(PageLRU(page), page); redo: ClearPageUnevictable(page); @@ -794,8 +812,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (!trylock_page(page)) goto keep; - VM_BUG_ON(PageActive(page)); - VM_BUG_ON(page_zone(page) != zone); + VM_BUG_ON_PAGE(PageActive(page), page); + VM_BUG_ON_PAGE(page_zone(page) != zone, page); sc->nr_scanned++; @@ -1079,14 +1097,14 @@ activate_locked: /* Not a candidate for swapping, so reclaim swap space. */ if (PageSwapCache(page) && vm_swap_full()) try_to_free_swap(page); - VM_BUG_ON(PageActive(page)); + VM_BUG_ON_PAGE(PageActive(page), page); SetPageActive(page); pgactivate++; keep_locked: unlock_page(page); keep: list_add(&page->lru, &ret_pages); - VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); + VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page); } free_hot_cold_page_list(&free_pages, 1); @@ -1240,7 +1258,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, page = lru_to_page(src); prefetchw_prev_lru_page(page, src, flags); - VM_BUG_ON(!PageLRU(page)); + VM_BUG_ON_PAGE(!PageLRU(page), page); switch (__isolate_lru_page(page, mode)) { case 0: @@ -1295,7 +1313,7 @@ int isolate_lru_page(struct page *page) { int ret = -EBUSY; - VM_BUG_ON(!page_count(page)); + VM_BUG_ON_PAGE(!page_count(page), page); if (PageLRU(page)) { struct zone *zone = page_zone(page); @@ -1366,7 +1384,7 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list) struct page *page = lru_to_page(page_list); int lru; - VM_BUG_ON(PageLRU(page)); + VM_BUG_ON_PAGE(PageLRU(page), page); list_del(&page->lru); if (unlikely(!page_evictable(page))) { spin_unlock_irq(&zone->lru_lock); @@ -1586,7 +1604,7 @@ static void move_active_pages_to_lru(struct lruvec *lruvec, page = lru_to_page(list); lruvec = mem_cgroup_page_lruvec(page, zone); - VM_BUG_ON(PageLRU(page)); + VM_BUG_ON_PAGE(PageLRU(page), page); SetPageLRU(page); nr_pages = hpage_nr_pages(page); @@ -3701,7 +3719,7 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) if (page_evictable(page)) { enum lru_list lru = page_lru_base_type(page); - VM_BUG_ON(PageActive(page)); + VM_BUG_ON_PAGE(PageActive(page), page); ClearPageUnevictable(page); del_page_from_lru_list(page, lruvec, LRU_UNEVICTABLE); add_page_to_lru_list(page, lruvec, lru); @@ -77,12 +77,12 @@ static u64 zswap_duplicate_entry; **********************************/ /* Enable/disable zswap (disabled by default, fixed at boot for now) */ static bool zswap_enabled __read_mostly; -module_param_named(enabled, zswap_enabled, bool, 0); +module_param_named(enabled, zswap_enabled, bool, 0444); /* Compressor to be used by zswap (fixed at boot for now) */ #define ZSWAP_COMPRESSOR_DEFAULT "lzo" static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT; -module_param_named(compressor, zswap_compressor, charp, 0); +module_param_named(compressor, zswap_compressor, charp, 0444); /* The maximum percentage of memory that the compressed pool can occupy */ static unsigned int zswap_max_pool_percent = 20; diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 8a52099..e498a62 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -23,7 +23,6 @@ #define ALPHA_MIN ((3*ALPHA_SCALE)/10) /* ~0.3 */ #define ALPHA_MAX (10*ALPHA_SCALE) /* 10.0 */ #define ALPHA_BASE ALPHA_SCALE /* 1.0 */ -#define U32_MAX ((u32)~0U) #define RTT_MAX (U32_MAX / ALPHA_MAX) /* 3.3 secs */ #define BETA_SHIFT 6 diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 3f64a66..b827a0f 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -46,31 +46,12 @@ struct iface_node { static void rbtree_destroy(struct rb_root *root) { - struct rb_node *p, *n = root->rb_node; - struct iface_node *node; - - /* Non-recursive destroy, like in ext3 */ - while (n) { - if (n->rb_left) { - n = n->rb_left; - continue; - } - if (n->rb_right) { - n = n->rb_right; - continue; - } - p = rb_parent(n); - node = rb_entry(n, struct iface_node, node); - if (!p) - *root = RB_ROOT; - else if (p->rb_left == n) - p->rb_left = NULL; - else if (p->rb_right == n) - p->rb_right = NULL; + struct iface_node *node, *next; + rbtree_postorder_for_each_entry_safe(node, next, root, node) kfree(node); - n = p; - } + + *root = RB_ROOT; } static int diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 9fb30b1..1dbd6d1 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -29,6 +29,7 @@ my $mailback = 0; my $summary_file = 0; my $show_types = 0; my $fix = 0; +my $fix_inplace = 0; my $root; my %debug; my %camelcase = (); @@ -76,6 +77,9 @@ Options: "<inputfile>.EXPERIMENTAL-checkpatch-fixes" with potential errors corrected to the preferred checkpatch style + --fix-inplace EXPERIMENTAL - may create horrible results + Is the same as --fix, but overwrites the input + file. It's your fault if there's no backup or git --ignore-perl-version override checking of perl version. expect runtime errors. -h, --help, --version display this help and exit @@ -131,6 +135,7 @@ GetOptions( 'mailback!' => \$mailback, 'summary-file!' => \$summary_file, 'fix!' => \$fix, + 'fix-inplace!' => \$fix_inplace, 'ignore-perl-version!' => \$ignore_perl_version, 'debug=s' => \%debug, 'test-only=s' => \$tst_only, @@ -140,6 +145,8 @@ GetOptions( help(0) if ($help); +$fix = 1 if ($fix_inplace); + my $exit = 0; if ($^V && $^V lt $minimum_perl_version) { @@ -1963,15 +1970,14 @@ sub process { } # Check for FSF mailing addresses. - if ($rawline =~ /You should have received a copy/ || - $rawline =~ /write to the Free Software/ || - $rawline =~ /59 Temple Place/ || - $rawline =~ /51 Franklin Street/) { + if ($rawline =~ /\bwrite to the Free/i || + $rawline =~ /\b59\s+Temple\s+Pl/i || + $rawline =~ /\b51\s+Franklin\s+St/i) { my $herevet = "$here\n" . cat_vet($rawline) . "\n"; my $msg_type = \&ERROR; $msg_type = \&CHK if ($file); &{$msg_type}("FSF_MAILING_ADDRESS", - "Do not include the paragraph about writing to the Free Software Foundation's mailing address from the sample GPL notice. The FSF has changed addresses in the past, and may do so again. Linux already includes a copy of the GPL.\n" . $herevet) + "Do not include the paragraph about writing to the Free Software Foundation's mailing address from the sample GPL notice. The FSF has changed addresses in the past, and may do so again. Linux already includes a copy of the GPL.\n" . $herevet) } # check for Kconfig help text having a real description @@ -2034,6 +2040,33 @@ sub process { "Use of $flag is deprecated, please use \`$replacement->{$flag} instead.\n" . $herecurr) if ($replacement->{$flag}); } +# check for DT compatible documentation + if (defined $root && $realfile =~ /\.dts/ && + $rawline =~ /^\+\s*compatible\s*=/) { + my @compats = $rawline =~ /\"([a-zA-Z0-9\-\,\.\+_]+)\"/g; + + foreach my $compat (@compats) { + my $compat2 = $compat; + my $dt_path = $root . "/Documentation/devicetree/bindings/"; + $compat2 =~ s/\,[a-z]*\-/\,<\.\*>\-/; + `grep -Erq "$compat|$compat2" $dt_path`; + if ( $? >> 8 ) { + WARN("UNDOCUMENTED_DT_STRING", + "DT compatible string \"$compat\" appears un-documented -- check $dt_path\n" . $herecurr); + } + + my $vendor = $compat; + my $vendor_path = $dt_path . "vendor-prefixes.txt"; + next if (! -f $vendor_path); + $vendor =~ s/^([a-zA-Z0-9]+)\,.*/$1/; + `grep -Eq "$vendor" $vendor_path`; + if ( $? >> 8 ) { + WARN("UNDOCUMENTED_DT_STRING", + "DT compatible string vendor \"$vendor\" appears un-documented -- check $vendor_path\n" . $herecurr); + } + } + } + # check we are in a valid source file if not then ignore this hunk next if ($realfile !~ /\.(h|c|s|S|pl|sh)$/); @@ -2049,16 +2082,12 @@ sub process { } # Check for user-visible strings broken across lines, which breaks the ability -# to grep for the string. Limited to strings used as parameters (those -# following an open parenthesis), which almost completely eliminates false -# positives, as well as warning only once per parameter rather than once per -# line of the string. Make an exception when the previous string ends in a -# newline (multiple lines in one string constant) or \n\t (common in inline -# assembly to indent the instruction on the following line). +# to grep for the string. Make exceptions when the previous string ends in a +# newline (multiple lines in one string constant) or '\t', '\r', ';', or '{' +# (common in inline assembly) or is a octal \123 or hexadecimal \xaf value if ($line =~ /^\+\s*"/ && $prevline =~ /"\s*$/ && - $prevline =~ /\(/ && - $prevrawline !~ /\\n(?:\\t)*"\s*$/) { + $prevrawline !~ /(?:\\(?:[ntr]|[0-7]{1,3}|x[0-9a-fA-F]{1,2})|;\s*|\{\s*)"\s*$/) { WARN("SPLIT_STRING", "quoted string split across lines\n" . $hereprev); } @@ -2115,8 +2144,10 @@ sub process { if (WARN("SPACE_BEFORE_TAB", "please, no space before tabs\n" . $herevet) && $fix) { - $fixed[$linenr - 1] =~ - s/(^\+.*) +\t/$1\t/; + while ($fixed[$linenr - 1] =~ + s/(^\+.*) {8,8}+\t/$1\t\t/) {} + while ($fixed[$linenr - 1] =~ + s/(^\+.*) +\t/$1\t/) {} } } @@ -2805,6 +2836,65 @@ sub process { } } +# Function pointer declarations +# check spacing between type, funcptr, and args +# canonical declaration is "type (*funcptr)(args...)" +# +# the $Declare variable will capture all spaces after the type +# so check it for trailing missing spaces or multiple spaces + if ($line =~ /^.\s*($Declare)\((\s*)\*(\s*)$Ident(\s*)\)(\s*)\(/) { + my $declare = $1; + my $pre_pointer_space = $2; + my $post_pointer_space = $3; + my $funcname = $4; + my $post_funcname_space = $5; + my $pre_args_space = $6; + + if ($declare !~ /\s$/) { + WARN("SPACING", + "missing space after return type\n" . $herecurr); + } + +# unnecessary space "type (*funcptr)(args...)" + elsif ($declare =~ /\s{2,}$/) { + WARN("SPACING", + "Multiple spaces after return type\n" . $herecurr); + } + +# unnecessary space "type ( *funcptr)(args...)" + if (defined $pre_pointer_space && + $pre_pointer_space =~ /^\s/) { + WARN("SPACING", + "Unnecessary space after function pointer open parenthesis\n" . $herecurr); + } + +# unnecessary space "type (* funcptr)(args...)" + if (defined $post_pointer_space && + $post_pointer_space =~ /^\s/) { + WARN("SPACING", + "Unnecessary space before function pointer name\n" . $herecurr); + } + +# unnecessary space "type (*funcptr )(args...)" + if (defined $post_funcname_space && + $post_funcname_space =~ /^\s/) { + WARN("SPACING", + "Unnecessary space after function pointer name\n" . $herecurr); + } + +# unnecessary space "type (*funcptr) (args...)" + if (defined $pre_args_space && + $pre_args_space =~ /^\s/) { + WARN("SPACING", + "Unnecessary space before function pointer arguments\n" . $herecurr); + } + + if (show_type("SPACING") && $fix) { + $fixed[$linenr - 1] =~ + s/^(.\s*$Declare)\(\s*\*\s*($Ident)\s*\)\s*\(/rtrim($1) . " " . "\(\*$2\)\("/ex; + } + } + # check for spacing round square brackets; allowed: # 1. with a type on the left -- int [] a; # 2. at the beginning of a line for slice initialisers -- [0...10] = 5, @@ -3125,7 +3215,7 @@ sub process { } # check for whitespace before a non-naked semicolon - if ($line =~ /^\+.*\S\s+;/) { + if ($line =~ /^\+.*\S\s+;\s*$/) { if (WARN("SPACING", "space prohibited before semicolon\n" . $herecurr) && $fix) { @@ -3249,6 +3339,20 @@ sub process { } } +# if statements using unnecessary parentheses - ie: if ((foo == bar)) + if ($^V && $^V ge 5.10.0 && + $line =~ /\bif\s*((?:\(\s*){2,})/) { + my $openparens = $1; + my $count = $openparens =~ tr@\(@\(@; + my $msg = ""; + if ($line =~ /\bif\s*(?:\(\s*){$count,$count}$LvalOrFunc\s*($Compare)\s*$LvalOrFunc(?:\s*\)){$count,$count}/) { + my $comp = $4; #Not $1 because of $LvalOrFunc + $msg = " - maybe == should be = ?" if ($comp eq "=="); + WARN("UNNECESSARY_PARENTHESES", + "Unnecessary parentheses$msg\n" . $herecurr); + } + } + # Return of what appears to be an errno should normally be -'ve if ($line =~ /^.\s*return\s*(E[A-Z]*)\s*;/) { my $name = $1; @@ -3983,6 +4087,16 @@ sub process { } } +# Check for memcpy(foo, bar, ETH_ALEN) that could be ether_addr_copy(foo, bar) + if ($^V && $^V ge 5.10.0 && + $line =~ /^\+(?:.*?)\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/s) { + if (WARN("PREFER_ETHER_ADDR_COPY", + "Prefer ether_addr_copy() over memcpy() if the Ethernet addresses are __aligned(2)\n" . $herecurr) && + $fix) { + $fixed[$linenr - 1] =~ s/\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/ether_addr_copy($2, $7)/; + } + } + # typecasts on min/max could be min_t/max_t if ($^V && $^V ge 5.10.0 && defined $stat && @@ -4117,6 +4231,12 @@ sub process { "$1 uses number as first arg, sizeof is generally wrong\n" . $herecurr); } +# check for GFP_NOWAIT use + if ($line =~ /\b__GFP_NOFAIL\b/) { + WARN("__GFP_NOFAIL", + "Use of __GFP_NOFAIL is deprecated, no new users should be added\n" . $herecurr); + } + # check for multiple semicolons if ($line =~ /;\s*;\s*$/) { if (WARN("ONE_SEMICOLON", @@ -4126,6 +4246,31 @@ sub process { } } +# check for case / default statements not preceeded by break/fallthrough/switch + if ($line =~ /^.\s*(?:case\s+(?:$Ident|$Constant)\s*|default):/) { + my $has_break = 0; + my $has_statement = 0; + my $count = 0; + my $prevline = $linenr; + while ($prevline > 1 && $count < 3 && !$has_break) { + $prevline--; + my $rline = $rawlines[$prevline - 1]; + my $fline = $lines[$prevline - 1]; + last if ($fline =~ /^\@\@/); + next if ($fline =~ /^\-/); + next if ($fline =~ /^.(?:\s*(?:case\s+(?:$Ident|$Constant)[\s$;]*|default):[\s$;]*)*$/); + $has_break = 1 if ($rline =~ /fall[\s_-]*(through|thru)/i); + next if ($fline =~ /^.[\s$;]*$/); + $has_statement = 1; + $count++; + $has_break = 1 if ($fline =~ /\bswitch\b|\b(?:break\s*;[\s$;]*$|return\b|goto\b|continue\b)/); + } + if (!$has_break && $has_statement) { + WARN("MISSING_BREAK", + "Possible switch case/default not preceeded by break or fallthrough comment\n" . $herecurr); + } + } + # check for switch/default statements without a break; if ($^V && $^V ge 5.10.0 && defined $stat && @@ -4361,7 +4506,8 @@ sub process { hash_show_words(\%ignore_type, "Ignored"); if ($clean == 0 && $fix && "@rawlines" ne "@fixed") { - my $newfile = $filename . ".EXPERIMENTAL-checkpatch-fixes"; + my $newfile = $filename; + $newfile .= ".EXPERIMENTAL-checkpatch-fixes" if (!$fix_inplace); my $linecount = 0; my $f; diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 5e4fb14..9c3986f 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -98,6 +98,7 @@ my %VCS_cmds_git = ( "available" => '(which("git") ne "") && (-d ".git")', "find_signers_cmd" => "git log --no-color --follow --since=\$email_git_since " . + '--numstat --no-merges ' . '--format="GitCommit: %H%n' . 'GitAuthor: %an <%ae>%n' . 'GitDate: %aD%n' . @@ -106,6 +107,7 @@ my %VCS_cmds_git = ( " -- \$file", "find_commit_signers_cmd" => "git log --no-color " . + '--numstat ' . '--format="GitCommit: %H%n' . 'GitAuthor: %an <%ae>%n' . 'GitDate: %aD%n' . @@ -114,6 +116,7 @@ my %VCS_cmds_git = ( " -1 \$commit", "find_commit_author_cmd" => "git log --no-color " . + '--numstat ' . '--format="GitCommit: %H%n' . 'GitAuthor: %an <%ae>%n' . 'GitDate: %aD%n' . @@ -125,6 +128,7 @@ my %VCS_cmds_git = ( "blame_commit_pattern" => "^([0-9a-f]+) ", "author_pattern" => "^GitAuthor: (.*)", "subject_pattern" => "^GitSubject: (.*)", + "stat_pattern" => "^(\\d+)\\t(\\d+)\\t\$file\$", ); my %VCS_cmds_hg = ( @@ -152,6 +156,7 @@ my %VCS_cmds_hg = ( "blame_commit_pattern" => "^([ 0-9a-f]+):", "author_pattern" => "^HgAuthor: (.*)", "subject_pattern" => "^HgSubject: (.*)", + "stat_pattern" => "^(\\d+)\t(\\d+)\t\$file\$", ); my $conf = which_conf(".get_maintainer.conf"); @@ -1269,20 +1274,30 @@ sub extract_formatted_signatures { } sub vcs_find_signers { - my ($cmd) = @_; + my ($cmd, $file) = @_; my $commits; my @lines = (); my @signatures = (); + my @authors = (); + my @stats = (); @lines = &{$VCS_cmds{"execute_cmd"}}($cmd); my $pattern = $VCS_cmds{"commit_pattern"}; + my $author_pattern = $VCS_cmds{"author_pattern"}; + my $stat_pattern = $VCS_cmds{"stat_pattern"}; + + $stat_pattern =~ s/(\$\w+)/$1/eeg; #interpolate $stat_pattern $commits = grep(/$pattern/, @lines); # of commits + @authors = grep(/$author_pattern/, @lines); @signatures = grep(/^[ \t]*${signature_pattern}.*\@.*$/, @lines); + @stats = grep(/$stat_pattern/, @lines); - return (0, @signatures) if !@signatures; +# print("stats: <@stats>\n"); + + return (0, \@signatures, \@authors, \@stats) if !@signatures; save_commits_by_author(@lines) if ($interactive); save_commits_by_signer(@lines) if ($interactive); @@ -1291,9 +1306,10 @@ sub vcs_find_signers { @signatures = grep(!/${penguin_chiefs}/i, @signatures); } + my ($author_ref, $authors_ref) = extract_formatted_signatures(@authors); my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures); - return ($commits, @$signers_ref); + return ($commits, $signers_ref, $authors_ref, \@stats); } sub vcs_find_author { @@ -1849,7 +1865,12 @@ sub vcs_assign { sub vcs_file_signoffs { my ($file) = @_; + my $authors_ref; + my $signers_ref; + my $stats_ref; + my @authors = (); my @signers = (); + my @stats = (); my $commits; $vcs_used = vcs_exists(); @@ -1858,13 +1879,59 @@ sub vcs_file_signoffs { my $cmd = $VCS_cmds{"find_signers_cmd"}; $cmd =~ s/(\$\w+)/$1/eeg; # interpolate $cmd - ($commits, @signers) = vcs_find_signers($cmd); + ($commits, $signers_ref, $authors_ref, $stats_ref) = vcs_find_signers($cmd, $file); + + @signers = @{$signers_ref} if defined $signers_ref; + @authors = @{$authors_ref} if defined $authors_ref; + @stats = @{$stats_ref} if defined $stats_ref; + +# print("commits: <$commits>\nsigners:<@signers>\nauthors: <@authors>\nstats: <@stats>\n"); foreach my $signer (@signers) { $signer = deduplicate_email($signer); } vcs_assign("commit_signer", $commits, @signers); + vcs_assign("authored", $commits, @authors); + if ($#authors == $#stats) { + my $stat_pattern = $VCS_cmds{"stat_pattern"}; + $stat_pattern =~ s/(\$\w+)/$1/eeg; #interpolate $stat_pattern + + my $added = 0; + my $deleted = 0; + for (my $i = 0; $i <= $#stats; $i++) { + if ($stats[$i] =~ /$stat_pattern/) { + $added += $1; + $deleted += $2; + } + } + my @tmp_authors = uniq(@authors); + foreach my $author (@tmp_authors) { + $author = deduplicate_email($author); + } + @tmp_authors = uniq(@tmp_authors); + my @list_added = (); + my @list_deleted = (); + foreach my $author (@tmp_authors) { + my $auth_added = 0; + my $auth_deleted = 0; + for (my $i = 0; $i <= $#stats; $i++) { + if ($author eq deduplicate_email($authors[$i]) && + $stats[$i] =~ /$stat_pattern/) { + $auth_added += $1; + $auth_deleted += $2; + } + } + for (my $i = 0; $i < $auth_added; $i++) { + push(@list_added, $author); + } + for (my $i = 0; $i < $auth_deleted; $i++) { + push(@list_deleted, $author); + } + } + vcs_assign("added_lines", $added, @list_added); + vcs_assign("removed_lines", $deleted, @list_deleted); + } } sub vcs_file_blame { @@ -1887,6 +1954,10 @@ sub vcs_file_blame { if ($email_git_blame_signatures) { if (vcs_is_hg()) { my $commit_count; + my $commit_authors_ref; + my $commit_signers_ref; + my $stats_ref; + my @commit_authors = (); my @commit_signers = (); my $commit = join(" -r ", @commits); my $cmd; @@ -1894,19 +1965,27 @@ sub vcs_file_blame { $cmd = $VCS_cmds{"find_commit_signers_cmd"}; $cmd =~ s/(\$\w+)/$1/eeg; #substitute variables in $cmd - ($commit_count, @commit_signers) = vcs_find_signers($cmd); + ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file); + @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref; + @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref; push(@signers, @commit_signers); } else { foreach my $commit (@commits) { my $commit_count; + my $commit_authors_ref; + my $commit_signers_ref; + my $stats_ref; + my @commit_authors = (); my @commit_signers = (); my $cmd; $cmd = $VCS_cmds{"find_commit_signers_cmd"}; $cmd =~ s/(\$\w+)/$1/eeg; #substitute variables in $cmd - ($commit_count, @commit_signers) = vcs_find_signers($cmd); + ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file); + @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref; + @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref; push(@signers, @commit_signers); } diff --git a/scripts/headers_check.pl b/scripts/headers_check.pl index 64ac238..62320f9 100644 --- a/scripts/headers_check.pl +++ b/scripts/headers_check.pl @@ -65,7 +65,11 @@ sub check_include sub check_declarations { - if ($line =~m/^(\s*extern|unsigned|char|short|int|long|void)\b/) { + # soundcard.h is what it is + if ($line =~ m/^void seqbuf_dump\(void\);/) { + return; + } + if ($line =~ m/^(\s*extern|unsigned|char|short|int|long|void)\b/) { printf STDERR "$filename:$lineno: " . "userspace cannot reference function or " . "variable defined in the kernel\n"; diff --git a/scripts/sortextable.c b/scripts/sortextable.c index 7941fbd..cc49062 100644 --- a/scripts/sortextable.c +++ b/scripts/sortextable.c @@ -39,6 +39,10 @@ #define EM_AARCH64 183 #endif +#ifndef EM_MICROBLAZE +#define EM_MICROBLAZE 189 +#endif + static int fd_map; /* File descriptor for file being modified. */ static int mmap_failed; /* Boolean flag. */ static void *ehdr_curr; /* current ElfXX_Ehdr * for resource cleanup */ @@ -275,6 +279,7 @@ do_file(char const *const fname) case EM_ARCOMPACT: case EM_ARM: case EM_AARCH64: + case EM_MICROBLAZE: case EM_MIPS: break; } /* end switch */ diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 9f3eae2..32487ed 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -9,6 +9,7 @@ TARGETS += ptrace TARGETS += timers TARGETS += vm TARGETS += powerpc +TARGETS += user all: for TARGET in $(TARGETS); do \ diff --git a/tools/testing/selftests/user/Makefile b/tools/testing/selftests/user/Makefile new file mode 100644 index 0000000..396255b --- /dev/null +++ b/tools/testing/selftests/user/Makefile @@ -0,0 +1,13 @@ +# Makefile for user memory selftests + +# No binaries, but make sure arg-less "make" doesn't trigger "run_tests" +all: + +run_tests: all + @if /sbin/modprobe test_user_copy ; then \ + rmmod test_user_copy; \ + echo "user_copy: ok"; \ + else \ + echo "user_copy: [FAIL]"; \ + exit 1; \ + fi |