From 100a8fdbf525bb11796692a713c267be6523a890 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 29 Jul 2014 11:50:48 +0100 Subject: thermal: trace: Trace temperature changes Create a new event to trace the temperature of a thermal zone. Using this event trace the temperature changes of the thermal zone every-time it is updated. Cc: Zhang Rui Cc: Eduardo Valentin Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Signed-off-by: Punit Agrawal Signed-off-by: Eduardo Valentin --- include/trace/events/thermal.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 include/trace/events/thermal.h (limited to 'include') diff --git a/include/trace/events/thermal.h b/include/trace/events/thermal.h new file mode 100644 index 0000000..8c5ca96 --- /dev/null +++ b/include/trace/events/thermal.h @@ -0,0 +1,38 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM thermal + +#if !defined(_TRACE_THERMAL_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_THERMAL_H + +#include +#include + +TRACE_EVENT(thermal_temperature, + + TP_PROTO(struct thermal_zone_device *tz), + + TP_ARGS(tz), + + TP_STRUCT__entry( + __string(thermal_zone, tz->type) + __field(int, id) + __field(int, temp_prev) + __field(int, temp) + ), + + TP_fast_assign( + __assign_str(thermal_zone, tz->type); + __entry->id = tz->id; + __entry->temp_prev = tz->last_temperature; + __entry->temp = tz->temperature; + ), + + TP_printk("thermal_zone=%s id=%d temp_prev=%d temp=%d", + __get_str(thermal_zone), __entry->id, __entry->temp_prev, + __entry->temp) +); + +#endif /* _TRACE_THERMAL_H */ + +/* This part must be outside protection */ +#include -- cgit v1.1 From 39811569e43a81417bc0ddca3d0c7658c3dcd4b0 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 29 Jul 2014 11:50:49 +0100 Subject: thermal: trace: Trace when a cooling device's state is updated Introduce and use an event to trace when a cooling device's state is updated. This is useful to follow the effect of governor decisions on cooling devices. Cc: Zhang Rui Cc: Eduardo Valentin Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Signed-off-by: Punit Agrawal Signed-off-by: Eduardo Valentin --- include/trace/events/thermal.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/trace/events/thermal.h b/include/trace/events/thermal.h index 8c5ca96..894a79e 100644 --- a/include/trace/events/thermal.h +++ b/include/trace/events/thermal.h @@ -32,6 +32,25 @@ TRACE_EVENT(thermal_temperature, __entry->temp) ); +TRACE_EVENT(cdev_update, + + TP_PROTO(struct thermal_cooling_device *cdev, unsigned long target), + + TP_ARGS(cdev, target), + + TP_STRUCT__entry( + __string(type, cdev->type) + __field(unsigned long, target) + ), + + TP_fast_assign( + __assign_str(type, cdev->type); + __entry->target = target; + ), + + TP_printk("type=%s target=%lu", __get_str(type), __entry->target) +); + #endif /* _TRACE_THERMAL_H */ /* This part must be outside protection */ -- cgit v1.1 From 208cd822a19e683bc890f6708786f2420e172d76 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 29 Jul 2014 11:50:50 +0100 Subject: thermal: trace: Trace when temperature is above a trip point Create a new event to trace when the temperature is above a trip point. Use the trace-point when handling non-critical and critical trip pionts. Cc: Zhang Rui Cc: Eduardo Valentin Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Signed-off-by: Punit Agrawal Signed-off-by: Eduardo Valentin --- include/trace/events/thermal.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/trace/events/thermal.h b/include/trace/events/thermal.h index 894a79e..0f4f95d 100644 --- a/include/trace/events/thermal.h +++ b/include/trace/events/thermal.h @@ -51,6 +51,32 @@ TRACE_EVENT(cdev_update, TP_printk("type=%s target=%lu", __get_str(type), __entry->target) ); +TRACE_EVENT(thermal_zone_trip, + + TP_PROTO(struct thermal_zone_device *tz, int trip, + enum thermal_trip_type trip_type), + + TP_ARGS(tz, trip, trip_type), + + TP_STRUCT__entry( + __string(thermal_zone, tz->type) + __field(int, id) + __field(int, trip) + __field(enum thermal_trip_type, trip_type) + ), + + TP_fast_assign( + __assign_str(thermal_zone, tz->type); + __entry->id = tz->id; + __entry->trip = trip; + __entry->trip_type = trip_type; + ), + + TP_printk("thermal_zone=%s id=%d trip=%d trip_type=%d", + __get_str(thermal_zone), __entry->id, __entry->trip, + __entry->trip_type) +); + #endif /* _TRACE_THERMAL_H */ /* This part must be outside protection */ -- cgit v1.1 From e963bb1de415ab06693357336c1bec664753e1e2 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Mon, 1 Sep 2014 22:22:13 -0400 Subject: ext4: improve extents status tree trace point This commit improves the trace point of extents status tree. We rename trace_ext4_es_shrink_enter in ext4_es_count() because it is also used in ext4_es_scan() and we can not identify them from the result. Further this commit fixes a variable name in trace point in order to keep consistency with others. Cc: Andreas Dilger Cc: Jan Kara Reviewed-by: Jan Kara Signed-off-by: Zheng Liu Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index d4f70a7..849aaba 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2369,7 +2369,7 @@ TRACE_EVENT(ext4_es_lookup_extent_exit, show_extent_status(__entry->found ? __entry->status : 0)) ); -TRACE_EVENT(ext4_es_shrink_enter, +DECLARE_EVENT_CLASS(ext4__es_shrink_enter, TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt), TP_ARGS(sb, nr_to_scan, cache_cnt), @@ -2391,26 +2391,38 @@ TRACE_EVENT(ext4_es_shrink_enter, __entry->nr_to_scan, __entry->cache_cnt) ); -TRACE_EVENT(ext4_es_shrink_exit, - TP_PROTO(struct super_block *sb, int shrunk_nr, int cache_cnt), +DEFINE_EVENT(ext4__es_shrink_enter, ext4_es_shrink_count, + TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt), + + TP_ARGS(sb, nr_to_scan, cache_cnt) +); + +DEFINE_EVENT(ext4__es_shrink_enter, ext4_es_shrink_scan_enter, + TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt), + + TP_ARGS(sb, nr_to_scan, cache_cnt) +); + +TRACE_EVENT(ext4_es_shrink_scan_exit, + TP_PROTO(struct super_block *sb, int nr_shrunk, int cache_cnt), - TP_ARGS(sb, shrunk_nr, cache_cnt), + TP_ARGS(sb, nr_shrunk, cache_cnt), TP_STRUCT__entry( __field( dev_t, dev ) - __field( int, shrunk_nr ) + __field( int, nr_shrunk ) __field( int, cache_cnt ) ), TP_fast_assign( __entry->dev = sb->s_dev; - __entry->shrunk_nr = shrunk_nr; + __entry->nr_shrunk = nr_shrunk; __entry->cache_cnt = cache_cnt; ), - TP_printk("dev %d,%d shrunk_nr %d cache_cnt %d", + TP_printk("dev %d,%d nr_shrunk %d cache_cnt %d", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->shrunk_nr, __entry->cache_cnt) + __entry->nr_shrunk, __entry->cache_cnt) ); TRACE_EVENT(ext4_collapse_range, -- cgit v1.1 From eb68d0e2fc5a4e5c06324ea5f485fccbae626d05 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Mon, 1 Sep 2014 22:26:49 -0400 Subject: ext4: track extent status tree shrinker delay statictics This commit adds some statictics in extent status tree shrinker. The purpose to add these is that we want to collect more details when we encounter a stall caused by extent status tree shrinker. Here we count the following statictics: stats: the number of all objects on all extent status trees the number of reclaimable objects on lru list cache hits/misses the last sorted interval the number of inodes on lru list average: scan time for shrinking some objects the number of shrunk objects maximum: the inode that has max nr. of objects on lru list the maximum scan time for shrinking some objects The output looks like below: $ cat /proc/fs/ext4/sda1/es_shrinker_info stats: 28228 objects 6341 reclaimable objects 5281/631 cache hits/misses 586 ms last sorted interval 250 inodes on lru list average: 153 us scan time 128 shrunk objects maximum: 255 inode (255 objects, 198 reclaimable) 125723 us max scan time If the lru list has never been sorted, the following line will not be printed: 586ms last sorted interval If there is an empty lru list, the following lines also will not be printed: 250 inodes on lru list ... maximum: 255 inode (255 objects, 198 reclaimable) 0 us max scan time Meanwhile in this commit a new trace point is defined to print some details in __ext4_es_shrink(). Cc: Andreas Dilger Cc: Jan Kara Reviewed-by: Jan Kara Signed-off-by: Zheng Liu Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 849aaba..ff4bd1b 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2450,6 +2450,37 @@ TRACE_EVENT(ext4_collapse_range, __entry->offset, __entry->len) ); +TRACE_EVENT(ext4_es_shrink, + TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time, + int skip_precached, int nr_skipped, int retried), + + TP_ARGS(sb, nr_shrunk, scan_time, skip_precached, nr_skipped, retried), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, nr_shrunk ) + __field( unsigned long long, scan_time ) + __field( int, skip_precached ) + __field( int, nr_skipped ) + __field( int, retried ) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->nr_shrunk = nr_shrunk; + __entry->scan_time = div_u64(scan_time, 1000); + __entry->skip_precached = skip_precached; + __entry->nr_skipped = nr_skipped; + __entry->retried = retried; + ), + + TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu skip_precached %d " + "nr_skipped %d retried %d", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk, + __entry->scan_time, __entry->skip_precached, + __entry->nr_skipped, __entry->retried) +); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ -- cgit v1.1 From 3b5e6454aaf6b4439b19400d8365e2ec2d24e411 Mon Sep 17 00:00:00 2001 From: Gioh Kim Date: Thu, 4 Sep 2014 22:04:42 -0400 Subject: fs/buffer.c: support buffer cache allocations with gfp modifiers A buffer cache is allocated from movable area because it is referred for a while and released soon. But some filesystems are taking buffer cache for a long time and it can disturb page migration. New APIs are introduced to allocate buffer cache with user specific flag. *_gfp APIs are for user want to set page allocation flag for page cache allocation. And *_unmovable APIs are for the user wants to allocate page cache from non-movable area. Signed-off-by: Gioh Kim Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- include/linux/buffer_head.h | 47 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 324329c..73b4522 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -175,12 +175,13 @@ void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); -struct buffer_head *__getblk(struct block_device *bdev, sector_t block, - unsigned size); +struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp); void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); void __breadahead(struct block_device *, sector_t block, unsigned int size); -struct buffer_head *__bread(struct block_device *, sector_t block, unsigned size); +struct buffer_head *__bread_gfp(struct block_device *, + sector_t block, unsigned size, gfp_t gfp); void invalidate_bh_lrus(void); struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); @@ -295,7 +296,13 @@ static inline void bforget(struct buffer_head *bh) static inline struct buffer_head * sb_bread(struct super_block *sb, sector_t block) { - return __bread(sb->s_bdev, block, sb->s_blocksize); + return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); +} + +static inline struct buffer_head * +sb_bread_unmovable(struct super_block *sb, sector_t block) +{ + return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, 0); } static inline void @@ -307,7 +314,7 @@ sb_breadahead(struct super_block *sb, sector_t block) static inline struct buffer_head * sb_getblk(struct super_block *sb, sector_t block) { - return __getblk(sb->s_bdev, block, sb->s_blocksize); + return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); } static inline struct buffer_head * @@ -344,6 +351,36 @@ static inline void lock_buffer(struct buffer_head *bh) __lock_buffer(bh); } +static inline struct buffer_head *getblk_unmovable(struct block_device *bdev, + sector_t block, + unsigned size) +{ + return __getblk_gfp(bdev, block, size, 0); +} + +static inline struct buffer_head *__getblk(struct block_device *bdev, + sector_t block, + unsigned size) +{ + return __getblk_gfp(bdev, block, size, __GFP_MOVABLE); +} + +/** + * __bread() - reads a specified block and returns the bh + * @bdev: the block_device to read from + * @block: number of block + * @size: size (in bytes) to read + * + * Reads a specified block, and returns buffer head that contains it. + * The page cache is allocated from movable area so that it can be migrated. + * It returns NULL if the block was unreadable. + */ +static inline struct buffer_head * +__bread(struct block_device *bdev, sector_t block, unsigned size) +{ + return __bread_gfp(bdev, block, size, __GFP_MOVABLE); +} + extern int __set_page_dirty_buffers(struct page *page); #else /* CONFIG_BLOCK */ -- cgit v1.1 From 047133066e6c2549403fe5a2d619f47ba4212ef5 Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Thu, 7 Aug 2014 05:10:22 -0700 Subject: leds: Reorder include directives Reorder include directives so that they are arranged in alphabetical order. Signed-off-by: Jacek Anaszewski Acked-by: Kyungmin Park Cc: Richard Purdie Signed-off-by: Bryan Wu --- include/linux/leds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/leds.h b/include/linux/leds.h index e436864..4be2d76 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -13,8 +13,8 @@ #define __LINUX_LEDS_H_INCLUDED #include -#include #include +#include #include #include -- cgit v1.1 From d8082827d8a214343b761f2c4554d2a7d1573d63 Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Thu, 7 Aug 2014 05:10:23 -0700 Subject: leds: make brightness type consistent across whole subsystem Documentations states that brightness units type is enum led_brightness and this is the type used by the led API functions. Adjust the type of brightness variables in the struct led_classdev accordingly. Signed-off-by: Jacek Anaszewski Acked-by: Kyungmin Park Cc: Richard Purdie Signed-off-by: Bryan Wu --- include/linux/leds.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/leds.h b/include/linux/leds.h index 4be2d76..f2e1cbc 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -31,8 +31,8 @@ enum led_brightness { struct led_classdev { const char *name; - int brightness; - int max_brightness; + enum led_brightness brightness; + enum led_brightness max_brightness; int flags; /* Lower 16 bits reflect status */ -- cgit v1.1 From 3ef7de5304edf60d0b8674dd7cdacc104e15a93c Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Wed, 20 Aug 2014 06:41:55 -0700 Subject: leds: Improve and export led_update_brightness led_update_brightness helper function used to be exploited only locally in the led-class.c module, where its result was being passed to the brightness_show sysfs callback. With the introduction of v4l2-flash subdevice the same functionality becomes required for reading current brightness from a LED device. This patch adds checking of return value of the brightness_get callback and moves the led_update_brightness() function to the LED subsystem public API. Signed-off-by: Jacek Anaszewski Acked-by: Kyungmin Park Cc: Richard Purdie Signed-off-by: Bryan Wu --- include/linux/leds.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/leds.h b/include/linux/leds.h index f2e1cbc..a57611d 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -140,6 +140,16 @@ extern void led_blink_set_oneshot(struct led_classdev *led_cdev, */ extern void led_set_brightness(struct led_classdev *led_cdev, enum led_brightness brightness); +/** + * led_update_brightness - update LED brightness + * @led_cdev: the LED to query + * + * Get an LED's current brightness and update led_cdev->brightness + * member with the obtained value. + * + * Returns: 0 on success or negative error value on failure + */ +extern int led_update_brightness(struct led_classdev *led_cdev); /* * LED Triggers -- cgit v1.1 From 50849db32a9f529235a84bcc84a6b8e631b1d0ec Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 18 Sep 2014 00:58:12 -0400 Subject: jbd2: simplify calling convention around __jbd2_journal_clean_checkpoint_list __jbd2_journal_clean_checkpoint_list() returns number of buffers it freed but noone was using the value so just stop doing that. This also allows for simplifying the calling convention for journal_clean_once_cp_list(). Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 0dae71e..704b9a5 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1042,7 +1042,7 @@ void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); extern void jbd2_journal_commit_transaction(journal_t *); /* Checkpoint list management */ -int __jbd2_journal_clean_checkpoint_list(journal_t *journal); +void __jbd2_journal_clean_checkpoint_list(journal_t *journal); int __jbd2_journal_remove_checkpoint(struct journal_head *); void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *); -- cgit v1.1 From 7990da71ebfa887ae6fe4464ab0d99ddeb8efacc Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Wed, 3 Sep 2014 17:49:32 +0200 Subject: PM / QoS: Add PM_QOS_MEMORY_BANDWIDTH class Also adds a class type PM_QOS_SUM that aggregates the values by summing them. It can be used by memory controllers to calculate the optimum clock frequency based on the bandwidth needs of the different memory clients. Signed-off-by: Tomeu Vizoso Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- include/linux/pm_qos.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 9ab4bf7..636e828 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -15,6 +15,7 @@ enum { PM_QOS_CPU_DMA_LATENCY, PM_QOS_NETWORK_LATENCY, PM_QOS_NETWORK_THROUGHPUT, + PM_QOS_MEMORY_BANDWIDTH, /* insert new class ID */ PM_QOS_NUM_CLASSES, @@ -32,6 +33,7 @@ enum pm_qos_flags_status { #define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) #define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) #define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0 +#define PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE 0 #define PM_QOS_RESUME_LATENCY_DEFAULT_VALUE 0 #define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE 0 #define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT (-1) @@ -69,7 +71,8 @@ struct dev_pm_qos_request { enum pm_qos_type { PM_QOS_UNITIALIZED, PM_QOS_MAX, /* return the largest value */ - PM_QOS_MIN /* return the smallest value */ + PM_QOS_MIN, /* return the smallest value */ + PM_QOS_SUM /* return the sum */ }; /* -- cgit v1.1 From 33940d09937276cd3c81f2874faf43e37c2db0e2 Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Tue, 16 Sep 2014 16:23:12 -0400 Subject: target: encapsulate smp_mb__after_atomic() The target code has a rather generous helping of smp_mb__after_atomic() throughout the code base. Most atomic operations were followed by one and none were preceded by smp_mb__before_atomic(), nor accompanied by a comment explaining the need for a barrier. Instead of trying to prove for every case whether or not it is needed, this patch introduces atomic_inc_mb() and atomic_dec_mb(), which explicitly include the memory barriers before and after the atomic operation. For now they are defined in a target header, although they could be of general use. Most of the existing atomic/mb combinations were replaced by the new helpers. In a few cases the atomic was sandwiched in spin_lock/spin_unlock and I simply removed the barrier. I suspect that in most cases the correct conversion would have been to drop the barrier. I also suspect that a few cases exist where a) the barrier was necessary and b) a second barrier before the atomic would have been necessary and got added by this patch. Signed-off-by: Joern Engel Signed-off-by: Nicholas Bellinger --- include/target/target_core_base.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 9ec9864..b106240 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -903,4 +903,18 @@ struct se_wwn { struct config_group fabric_stat_group; }; +static inline void atomic_inc_mb(atomic_t *v) +{ + smp_mb__before_atomic(); + atomic_inc(v); + smp_mb__after_atomic(); +} + +static inline void atomic_dec_mb(atomic_t *v) +{ + smp_mb__before_atomic(); + atomic_dec(v); + smp_mb__after_atomic(); +} + #endif /* TARGET_CORE_BASE_H */ -- cgit v1.1 From 90a8020278c1598fafd071736a0846b38510309c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Oct 2014 21:49:18 -0400 Subject: vfs: fix data corruption when blocksize < pagesize for mmaped data ->page_mkwrite() is used by filesystems to allocate blocks under a page which is becoming writeably mmapped in some process' address space. This allows a filesystem to return a page fault if there is not enough space available, user exceeds quota or similar problem happens, rather than silently discarding data later when writepage is called. However VFS fails to call ->page_mkwrite() in all the cases where filesystems need it when blocksize < pagesize. For example when blocksize = 1024, pagesize = 4096 the following is problematic: ftruncate(fd, 0); pwrite(fd, buf, 1024, 0); map = mmap(NULL, 1024, PROT_WRITE, MAP_SHARED, fd, 0); map[0] = 'a'; ----> page_mkwrite() for index 0 is called ftruncate(fd, 10000); /* or even pwrite(fd, buf, 1, 10000) */ mremap(map, 1024, 10000, 0); map[4095] = 'a'; ----> no page_mkwrite() called At the moment ->page_mkwrite() is called, filesystem can allocate only one block for the page because i_size == 1024. Otherwise it would create blocks beyond i_size which is generally undesirable. But later at ->writepage() time, we also need to store data at offset 4095 but we don't have block allocated for it. This patch introduces a helper function filesystems can use to have ->page_mkwrite() called at all the necessary moments. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8981cc8..5005464 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1155,6 +1155,7 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, extern void truncate_pagecache(struct inode *inode, loff_t new); extern void truncate_setsize(struct inode *inode, loff_t newsize); +void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); int truncate_inode_page(struct address_space *mapping, struct page *page); int generic_error_remove_page(struct address_space *mapping, struct page *page); -- cgit v1.1 From f14bb039a4e8206439d3e9abd92bc76bd142f243 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 1 Oct 2014 16:07:03 -0700 Subject: uio: Export definition of struct uio_device In order to prevent a O(n) search of the filesystem to link up its uio node with its target configuration, TCMU needs to know the minor number that UIO assigned. Expose the definition of this struct so TCMU can access this field. Signed-off-by: Andy Grover Signed-off-by: Nicholas Bellinger --- include/linux/uio_driver.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 1ad4724..baa8171 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -63,7 +63,17 @@ struct uio_port { #define MAX_UIO_PORT_REGIONS 5 -struct uio_device; +struct uio_device { + struct module *owner; + struct device *dev; + int minor; + atomic_t event; + struct fasync_struct *async_queue; + wait_queue_head_t wait; + struct uio_info *info; + struct kobject *map_dir; + struct kobject *portio_dir; +}; /** * struct uio_info - UIO device capabilities -- cgit v1.1 From 5a17dae422d7de4b776a9753cd4673a343a25b4b Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 5 Aug 2014 11:52:11 +0100 Subject: efi: Add efi= parameter parsing to the EFI boot stub We need a way to customize the behaviour of the EFI boot stub, in particular, we need a way to disable the "chunking" workaround, used when reading files from the EFI System Partition. One of my machines doesn't cope well when reading files in 1MB chunks to a buffer above the 4GB mark - it appears that the "chunking" bug workaround triggers another firmware bug. This was only discovered with commit 4bf7111f5016 ("x86/efi: Support initrd loaded above 4G"), and that commit is perfectly valid. The symptom I observed was a corrupt initrd rather than any kind of crash. efi= is now used to specify EFI parameters in two very different execution environments, the EFI boot stub and during kernel boot. There is also a slight performance optimization by enabling efi=nochunk, but that's offset by the fact that you're more likely to run into firmware issues, at least on x86. This is the rationale behind leaving the workaround enabled by default. Also provide some documentation for EFI_READ_CHUNK_SIZE and why we're using the current value of 1MB. Tested-by: Ard Biesheuvel Cc: Roy Franz Cc: Maarten Lankhorst Cc: Leif Lindholm Cc: Borislav Petkov Signed-off-by: Matt Fleming --- include/linux/efi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/efi.h b/include/linux/efi.h index 45cb4ff..518779f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1227,4 +1227,6 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, unsigned long *load_addr, unsigned long *load_size); +efi_status_t efi_parse_options(char *cmdline); + #endif /* _LINUX_EFI_H */ -- cgit v1.1 From b2e0a54a1296a91b800f316df7bef7d1905e4fd0 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Thu, 14 Aug 2014 17:15:26 +0800 Subject: efi: Move noefi early param code out of x86 arch code noefi param can be used for arches other than X86 later, thus move it out of x86 platform code. Signed-off-by: Dave Young Signed-off-by: Matt Fleming --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/efi.h b/include/linux/efi.h index 518779f..4812ed0 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1229,4 +1229,5 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, efi_status_t efi_parse_options(char *cmdline); +bool efi_runtime_disabled(void); #endif /* _LINUX_EFI_H */ -- cgit v1.1 From 6ccc72b87b83ece31c2a75bbe07f440b0378f7a9 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Thu, 14 Aug 2014 17:15:27 +0800 Subject: lib: Add a generic cmdline parse function parse_option_str There should be a generic function to parse params like a=b,c Adding parse_option_str in lib/cmdline.c which will return true if there's specified option set in the params. Also updated efi=old_map parsing code to use the new function Signed-off-by: Dave Young Signed-off-by: Matt Fleming --- include/linux/kernel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 95624be..f66427e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -407,6 +407,7 @@ int vsscanf(const char *, const char *, va_list); extern int get_option(char **str, int *pint); extern char *get_options(const char *str, int nints, int *ints); extern unsigned long long memparse(const char *ptr, char **retptr); +extern bool parse_option_str(const char *str, const char *option); extern int core_kernel_text(unsigned long addr); extern int core_kernel_data(unsigned long addr); -- cgit v1.1 From 9c97e0bdd4b4ae44577a1b1ec949e782084e9a78 Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Wed, 3 Sep 2014 13:32:19 +0200 Subject: efi: Add macro for EFI_MEMORY_UCE memory attribute Add the following macro from the UEFI spec, for completeness: EFI_MEMORY_UCE Memory cacheability attribute: The memory region supports being configured as not cacheable, exported, and supports the "fetch and add" semaphore mechanism. Signed-off-by: Laszlo Ersek Tested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Signed-off-by: Matt Fleming --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/efi.h b/include/linux/efi.h index 4812ed0..7464032 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -92,6 +92,7 @@ typedef struct { #define EFI_MEMORY_WC ((u64)0x0000000000000002ULL) /* write-coalescing */ #define EFI_MEMORY_WT ((u64)0x0000000000000004ULL) /* write-through */ #define EFI_MEMORY_WB ((u64)0x0000000000000008ULL) /* write-back */ +#define EFI_MEMORY_UCE ((u64)0x0000000000000010ULL) /* uncached, exported */ #define EFI_MEMORY_WP ((u64)0x0000000000001000ULL) /* write-protect */ #define EFI_MEMORY_RP ((u64)0x0000000000002000ULL) /* read-protect */ #define EFI_MEMORY_XP ((u64)0x0000000000004000ULL) /* execute-protect */ -- cgit v1.1 From 98d2a6ca14520904a47c46258d3bad02ffcd3f96 Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Wed, 3 Sep 2014 13:32:20 +0200 Subject: efi: Introduce efi_md_typeattr_format() At the moment, there are three architectures debug-printing the EFI memory map at initialization: x86, ia64, and arm64. They all use different format strings, plus the EFI memory type and the EFI memory attributes are similarly hard to decode for a human reader. Introduce a helper __init function that formats the memory type and the memory attributes in a unified way, to a user-provided character buffer. The array "memory_type_name" is copied from the arm64 code, temporarily duplicating it. The (otherwise optional) braces around each string literal in the initializer list are dropped in order to match the kernel coding style more closely. The element size is tightened from 32 to 20 bytes (maximum actual string length + 1) so that we can derive the field width from the element size. Signed-off-by: Laszlo Ersek Tested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel [ Dropped useless 'register' keyword, which compiler will ignore ] Signed-off-by: Matt Fleming --- include/linux/efi.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/efi.h b/include/linux/efi.h index 7464032..78b29b1 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -887,6 +887,13 @@ extern bool efi_poweroff_required(void); (md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); \ (md) = (void *)(md) + (m)->desc_size) +/* + * Format an EFI memory descriptor's type and attributes to a user-provided + * character buffer, as per snprintf(), and return the buffer. + */ +char * __init efi_md_typeattr_format(char *buf, size_t size, + const efi_memory_desc_t *md); + /** * efi_range_is_wc - check the WC bit on an address range * @start: starting kvirt address -- cgit v1.1 From 6d80dba1c9fe4316ef626980102b92fa30c7845a Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 30 Sep 2014 21:58:52 +0100 Subject: efi: Provide a non-blocking SetVariable() operation There are some circumstances that call for trying to write an EFI variable in a non-blocking way. One such scenario is when writing pstore data in efi_pstore_write() via the pstore_dump() kdump callback. Now that we have an EFI runtime spinlock we need a way of aborting if there is contention instead of spinning, since when writing pstore data from the kdump callback, the runtime lock may already be held by the CPU that's running the callback if we crashed in the middle of an EFI variable operation. The situation is sufficiently special that a new EFI variable operation is warranted. Introduce ->set_variable_nonblocking() for this use case. It is an optional EFI backend operation, and need only be implemented by those backends that usually acquire locks to serialize access to EFI variables, as is the case for virt_efi_set_variable() where we now grab the EFI runtime spinlock. Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Ard Biesheuvel Cc: Matthew Garrett Signed-off-by: Matt Fleming --- include/linux/efi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/efi.h b/include/linux/efi.h index 78b29b1..0949f9c 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -503,6 +503,10 @@ typedef efi_status_t efi_get_next_variable_t (unsigned long *name_size, efi_char typedef efi_status_t efi_set_variable_t (efi_char16_t *name, efi_guid_t *vendor, u32 attr, unsigned long data_size, void *data); +typedef efi_status_t +efi_set_variable_nonblocking_t(efi_char16_t *name, efi_guid_t *vendor, + u32 attr, unsigned long data_size, void *data); + typedef efi_status_t efi_get_next_high_mono_count_t (u32 *count); typedef void efi_reset_system_t (int reset_type, efi_status_t status, unsigned long data_size, efi_char16_t *data); @@ -822,6 +826,7 @@ extern struct efi { efi_get_variable_t *get_variable; efi_get_next_variable_t *get_next_variable; efi_set_variable_t *set_variable; + efi_set_variable_nonblocking_t *set_variable_nonblocking; efi_query_variable_info_t *query_variable_info; efi_update_capsule_t *update_capsule; efi_query_capsule_caps_t *query_capsule_caps; @@ -1042,6 +1047,7 @@ struct efivar_operations { efi_get_variable_t *get_variable; efi_get_next_variable_t *get_next_variable; efi_set_variable_t *set_variable; + efi_set_variable_nonblocking_t *set_variable_nonblocking; efi_query_variable_store_t *query_variable_store; }; -- cgit v1.1 From 7c9e7a6fe11c8dc5b3b9d0e889dde73347247584 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 1 Oct 2014 16:07:05 -0700 Subject: target: Add a user-passthrough backstore Add a LIO storage engine that presents commands to userspace for execution. This would allow more complex backstores to be implemented out-of-kernel, and also make experimentation a-la FUSE (but at the SCSI level -- "SUSE"?) possible. It uses a mmap()able UIO device per LUN to share a command ring and data area. The commands are raw SCSI CDBs and iovs for in/out data. The command ring is also reused for returning scsi command status and optional sense data. This implementation is based on Shaohua Li's earlier version but heavily modified. Differences include: * Shared memory allocated by kernel, not locked-down user pages * Single ring for command request and response * Offsets instead of embedded pointers * Generic SCSI CDB passthrough instead of per-cmd specialization in ring format. * Uses UIO device instead of anon_file passed in mailbox. * Optional in-kernel handling of some commands. The main reason for these differences is to permit greater resiliency if the user process dies or hangs. Things not yet implemented (on purpose): * Zero copy. The data area is flexible enough to allow page flipping or backend-allocated pages to be used by fabrics, but it's not clear these are performance wins. Can come later. * Out-of-order command completion by userspace. Possible to add by just allowing userspace to change cmd_id in rsp cmd entries, but currently not supported. * No locks between kernel cmd submission and completion routines. Sounds like it's possible, but this can come later. * Sparse allocation of mmaped area. Current code vmallocs the whole thing. If the mapped area was larger and not fully mapped then the driver would have more freedom to change cmd and data area sizes based on demand. Current code open issues: * The use of idrs may be overkill -- we maybe can replace them with a simple counter to generate cmd_ids, and a hash table to get a cmd_id's associated pointer. * Use of a free-running counter for cmd ring instead of explicit modulo math. This would require power-of-2 cmd ring size. (Add kconfig depends NET - Randy) Signed-off-by: Andy Grover Signed-off-by: Nicholas Bellinger --- include/uapi/linux/Kbuild | 1 + include/uapi/linux/target_core_user.h | 142 ++++++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 include/uapi/linux/target_core_user.h (limited to 'include') diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index be88166..6ebd0d1 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -371,6 +371,7 @@ header-y += swab.h header-y += synclink.h header-y += sysctl.h header-y += sysinfo.h +header-y += target_core_user.h header-y += taskstats.h header-y += tcp.h header-y += tcp_metrics.h diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h new file mode 100644 index 0000000..7dcfbe6 --- /dev/null +++ b/include/uapi/linux/target_core_user.h @@ -0,0 +1,142 @@ +#ifndef __TARGET_CORE_USER_H +#define __TARGET_CORE_USER_H + +/* This header will be used by application too */ + +#include +#include + +#ifndef __packed +#define __packed __attribute__((packed)) +#endif + +#define TCMU_VERSION "1.0" + +/* + * Ring Design + * ----------- + * + * The mmaped area is divided into three parts: + * 1) The mailbox (struct tcmu_mailbox, below) + * 2) The command ring + * 3) Everything beyond the command ring (data) + * + * The mailbox tells userspace the offset of the command ring from the + * start of the shared memory region, and how big the command ring is. + * + * The kernel passes SCSI commands to userspace by putting a struct + * tcmu_cmd_entry in the ring, updating mailbox->cmd_head, and poking + * userspace via uio's interrupt mechanism. + * + * tcmu_cmd_entry contains a header. If the header type is PAD, + * userspace should skip hdr->length bytes (mod cmdr_size) to find the + * next cmd_entry. + * + * Otherwise, the entry will contain offsets into the mmaped area that + * contain the cdb and data buffers -- the latter accessible via the + * iov array. iov addresses are also offsets into the shared area. + * + * When userspace is completed handling the command, set + * entry->rsp.scsi_status, fill in rsp.sense_buffer if appropriate, + * and also set mailbox->cmd_tail equal to the old cmd_tail plus + * hdr->length, mod cmdr_size. If cmd_tail doesn't equal cmd_head, it + * should process the next packet the same way, and so on. + */ + +#define TCMU_MAILBOX_VERSION 1 +#define ALIGN_SIZE 64 /* Should be enough for most CPUs */ + +struct tcmu_mailbox { + __u16 version; + __u16 flags; + __u32 cmdr_off; + __u32 cmdr_size; + + __u32 cmd_head; + + /* Updated by user. On its own cacheline */ + __u32 cmd_tail __attribute__((__aligned__(ALIGN_SIZE))); + +} __packed; + +enum tcmu_opcode { + TCMU_OP_PAD = 0, + TCMU_OP_CMD, +}; + +/* + * Only a few opcodes, and length is 8-byte aligned, so use low bits for opcode. + */ +struct tcmu_cmd_entry_hdr { + __u32 len_op; +} __packed; + +#define TCMU_OP_MASK 0x7 + +static inline enum tcmu_opcode tcmu_hdr_get_op(struct tcmu_cmd_entry_hdr *hdr) +{ + return hdr->len_op & TCMU_OP_MASK; +} + +static inline void tcmu_hdr_set_op(struct tcmu_cmd_entry_hdr *hdr, enum tcmu_opcode op) +{ + hdr->len_op &= ~TCMU_OP_MASK; + hdr->len_op |= (op & TCMU_OP_MASK); +} + +static inline __u32 tcmu_hdr_get_len(struct tcmu_cmd_entry_hdr *hdr) +{ + return hdr->len_op & ~TCMU_OP_MASK; +} + +static inline void tcmu_hdr_set_len(struct tcmu_cmd_entry_hdr *hdr, __u32 len) +{ + hdr->len_op &= TCMU_OP_MASK; + hdr->len_op |= len; +} + +/* Currently the same as SCSI_SENSE_BUFFERSIZE */ +#define TCMU_SENSE_BUFFERSIZE 96 + +struct tcmu_cmd_entry { + struct tcmu_cmd_entry_hdr hdr; + + uint16_t cmd_id; + uint16_t __pad1; + + union { + struct { + uint64_t cdb_off; + uint64_t iov_cnt; + struct iovec iov[0]; + } req; + struct { + uint8_t scsi_status; + uint8_t __pad1; + uint16_t __pad2; + uint32_t __pad3; + char sense_buffer[TCMU_SENSE_BUFFERSIZE]; + } rsp; + }; + +} __packed; + +#define TCMU_OP_ALIGN_SIZE sizeof(uint64_t) + +enum tcmu_genl_cmd { + TCMU_CMD_UNSPEC, + TCMU_CMD_ADDED_DEVICE, + TCMU_CMD_REMOVED_DEVICE, + __TCMU_CMD_MAX, +}; +#define TCMU_CMD_MAX (__TCMU_CMD_MAX - 1) + +enum tcmu_genl_attr { + TCMU_ATTR_UNSPEC, + TCMU_ATTR_DEVICE, + TCMU_ATTR_MINOR, + __TCMU_ATTR_MAX, +}; +#define TCMU_ATTR_MAX (__TCMU_ATTR_MAX - 1) + +#endif -- cgit v1.1 From 92404e609a2dffc55a9a22540ed48b6f0edc9c59 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sat, 4 Oct 2014 01:06:08 +0000 Subject: target: Add force_pr_aptpl device attribute This patch adds a force_pr_aptpl device attribute used to force SPC-3 PR Activate Persistence across Target Power Loss (APTPL) operation. This makes PR metadata write-out occur during state change regardless if new PERSISTENT_RESERVE_OUT CDBs have their APTPL feature bit set. This is useful during H/A failover in active/passive setups where all PR state is being re-created on a different node, driven by configfs backend device + export layout and pre-loaded $DEV/pr/res_aptpl_metadata. Cc: Mike Christie Signed-off-by: Nicholas Bellinger --- include/target/target_core_base.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index b106240..23c518a 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -108,6 +108,8 @@ #define DA_EMULATE_ALUA 0 /* Enforce SCSI Initiator Port TransportID with 'ISID' for PR */ #define DA_ENFORCE_PR_ISIDS 1 +/* Force SPC-3 PR Activate Persistence across Target Power Loss */ +#define DA_FORCE_PR_APTPL 0 #define DA_STATUS_MAX_SECTORS_MIN 16 #define DA_STATUS_MAX_SECTORS_MAX 8192 /* By default don't report non-rotating (solid state) medium */ @@ -680,6 +682,7 @@ struct se_dev_attrib { enum target_prot_type pi_prot_type; enum target_prot_type hw_pi_prot_type; int enforce_pr_isids; + int force_pr_aptpl; int is_nonrot; int emulate_rest_reord; u32 hw_block_size; -- cgit v1.1 From f2fc42b6ac31f4d808da7a9da460dd433a71e976 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Thu, 12 Jun 2014 22:30:34 +0530 Subject: mailbox: rename pl320-ipc specific mailbox.h The patch 30058677 "ARM / highbank: add support for pl320 IPC" added a pl320 IPC specific header file as a generic mailbox.h. This file has been renamed appropriately to allow the introduction of the generic mailbox API framework. Acked-by: Mark Langsdorf Cc: Rafael J. Wysocki Signed-off-by: Suman Anna Reviewed-by: Mark Brown Acked-by: Arnd Bergmann --- include/linux/mailbox.h | 17 ----------------- include/linux/pl320-ipc.h | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 17 deletions(-) delete mode 100644 include/linux/mailbox.h create mode 100644 include/linux/pl320-ipc.h (limited to 'include') diff --git a/include/linux/mailbox.h b/include/linux/mailbox.h deleted file mode 100644 index 5161f63..0000000 --- a/include/linux/mailbox.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . - */ - -int pl320_ipc_transmit(u32 *data); -int pl320_ipc_register_notifier(struct notifier_block *nb); -int pl320_ipc_unregister_notifier(struct notifier_block *nb); diff --git a/include/linux/pl320-ipc.h b/include/linux/pl320-ipc.h new file mode 100644 index 0000000..5161f63 --- /dev/null +++ b/include/linux/pl320-ipc.h @@ -0,0 +1,17 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +int pl320_ipc_transmit(u32 *data); +int pl320_ipc_register_notifier(struct notifier_block *nb); +int pl320_ipc_unregister_notifier(struct notifier_block *nb); -- cgit v1.1 From 2b6d83e2b8b7de82331a6a1dcd64b51020a6031c Mon Sep 17 00:00:00 2001 From: Jassi Brar Date: Thu, 12 Jun 2014 22:31:19 +0530 Subject: mailbox: Introduce framework for mailbox Introduce common framework for client/protocol drivers and controller drivers of Inter-Processor-Communication (IPC). Client driver developers should have a look at include/linux/mailbox_client.h to understand the part of the API exposed to client drivers. Similarly controller driver developers should have a look at include/linux/mailbox_controller.h Reviewed-by: Mark Brown Signed-off-by: Jassi Brar --- include/linux/mailbox_client.h | 46 +++++++++++++ include/linux/mailbox_controller.h | 133 +++++++++++++++++++++++++++++++++++++ 2 files changed, 179 insertions(+) create mode 100644 include/linux/mailbox_client.h create mode 100644 include/linux/mailbox_controller.h (limited to 'include') diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h new file mode 100644 index 0000000..307d9ca --- /dev/null +++ b/include/linux/mailbox_client.h @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2013-2014 Linaro Ltd. + * Author: Jassi Brar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __MAILBOX_CLIENT_H +#define __MAILBOX_CLIENT_H + +#include +#include + +struct mbox_chan; + +/** + * struct mbox_client - User of a mailbox + * @dev: The client device + * @tx_block: If the mbox_send_message should block until data is + * transmitted. + * @tx_tout: Max block period in ms before TX is assumed failure + * @knows_txdone: If the client could run the TX state machine. Usually + * if the client receives some ACK packet for transmission. + * Unused if the controller already has TX_Done/RTR IRQ. + * @rx_callback: Atomic callback to provide client the data received + * @tx_done: Atomic callback to tell client of data transmission + */ +struct mbox_client { + struct device *dev; + bool tx_block; + unsigned long tx_tout; + bool knows_txdone; + + void (*rx_callback)(struct mbox_client *cl, void *mssg); + void (*tx_done)(struct mbox_client *cl, void *mssg, int r); +}; + +struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index); +int mbox_send_message(struct mbox_chan *chan, void *mssg); +void mbox_client_txdone(struct mbox_chan *chan, int r); /* atomic */ +bool mbox_client_peek_data(struct mbox_chan *chan); /* atomic */ +void mbox_free_channel(struct mbox_chan *chan); /* may sleep */ + +#endif /* __MAILBOX_CLIENT_H */ diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h new file mode 100644 index 0000000..d4cf96f --- /dev/null +++ b/include/linux/mailbox_controller.h @@ -0,0 +1,133 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __MAILBOX_CONTROLLER_H +#define __MAILBOX_CONTROLLER_H + +#include +#include +#include +#include +#include + +struct mbox_chan; + +/** + * struct mbox_chan_ops - methods to control mailbox channels + * @send_data: The API asks the MBOX controller driver, in atomic + * context try to transmit a message on the bus. Returns 0 if + * data is accepted for transmission, -EBUSY while rejecting + * if the remote hasn't yet read the last data sent. Actual + * transmission of data is reported by the controller via + * mbox_chan_txdone (if it has some TX ACK irq). It must not + * sleep. + * @startup: Called when a client requests the chan. The controller + * could ask clients for additional parameters of communication + * to be provided via client's chan_data. This call may + * block. After this call the Controller must forward any + * data received on the chan by calling mbox_chan_received_data. + * The controller may do stuff that need to sleep. + * @shutdown: Called when a client relinquishes control of a chan. + * This call may block too. The controller must not forward + * any received data anymore. + * The controller may do stuff that need to sleep. + * @last_tx_done: If the controller sets 'txdone_poll', the API calls + * this to poll status of last TX. The controller must + * give priority to IRQ method over polling and never + * set both txdone_poll and txdone_irq. Only in polling + * mode 'send_data' is expected to return -EBUSY. + * The controller may do stuff that need to sleep/block. + * Used only if txdone_poll:=true && txdone_irq:=false + * @peek_data: Atomic check for any received data. Return true if controller + * has some data to push to the client. False otherwise. + */ +struct mbox_chan_ops { + int (*send_data)(struct mbox_chan *chan, void *data); + int (*startup)(struct mbox_chan *chan); + void (*shutdown)(struct mbox_chan *chan); + bool (*last_tx_done)(struct mbox_chan *chan); + bool (*peek_data)(struct mbox_chan *chan); +}; + +/** + * struct mbox_controller - Controller of a class of communication channels + * @dev: Device backing this controller + * @ops: Operators that work on each communication chan + * @chans: Array of channels + * @num_chans: Number of channels in the 'chans' array. + * @txdone_irq: Indicates if the controller can report to API when + * the last transmitted data was read by the remote. + * Eg, if it has some TX ACK irq. + * @txdone_poll: If the controller can read but not report the TX + * done. Ex, some register shows the TX status but + * no interrupt rises. Ignored if 'txdone_irq' is set. + * @txpoll_period: If 'txdone_poll' is in effect, the API polls for + * last TX's status after these many millisecs + * @of_xlate: Controller driver specific mapping of channel via DT + * @poll: API private. Used to poll for TXDONE on all channels. + * @node: API private. To hook into list of controllers. + */ +struct mbox_controller { + struct device *dev; + struct mbox_chan_ops *ops; + struct mbox_chan *chans; + int num_chans; + bool txdone_irq; + bool txdone_poll; + unsigned txpoll_period; + struct mbox_chan *(*of_xlate)(struct mbox_controller *mbox, + const struct of_phandle_args *sp); + /* Internal to API */ + struct timer_list poll; + struct list_head node; +}; + +/* + * The length of circular buffer for queuing messages from a client. + * 'msg_count' tracks the number of buffered messages while 'msg_free' + * is the index where the next message would be buffered. + * We shouldn't need it too big because every transfer is interrupt + * triggered and if we have lots of data to transfer, the interrupt + * latencies are going to be the bottleneck, not the buffer length. + * Besides, mbox_send_message could be called from atomic context and + * the client could also queue another message from the notifier 'tx_done' + * of the last transfer done. + * REVISIT: If too many platforms see the "Try increasing MBOX_TX_QUEUE_LEN" + * print, it needs to be taken from config option or somesuch. + */ +#define MBOX_TX_QUEUE_LEN 20 + +/** + * struct mbox_chan - s/w representation of a communication chan + * @mbox: Pointer to the parent/provider of this channel + * @txdone_method: Way to detect TXDone chosen by the API + * @cl: Pointer to the current owner of this channel + * @tx_complete: Transmission completion + * @active_req: Currently active request hook + * @msg_count: No. of mssg currently queued + * @msg_free: Index of next available mssg slot + * @msg_data: Hook for data packet + * @lock: Serialise access to the channel + * @con_priv: Hook for controller driver to attach private data + */ +struct mbox_chan { + struct mbox_controller *mbox; + unsigned txdone_method; + struct mbox_client *cl; + struct completion tx_complete; + void *active_req; + unsigned msg_count, msg_free; + void *msg_data[MBOX_TX_QUEUE_LEN]; + spinlock_t lock; /* Serialise access to the channel */ + void *con_priv; +}; + +int mbox_controller_register(struct mbox_controller *mbox); /* can sleep */ +void mbox_controller_unregister(struct mbox_controller *mbox); /* can sleep */ +void mbox_chan_received_data(struct mbox_chan *chan, void *data); /* atomic */ +void mbox_chan_txdone(struct mbox_chan *chan, int r); /* atomic */ + +#endif /* __MAILBOX_CONTROLLER_H */ -- cgit v1.1 From 083bf668cb70e47b84db64856606e94beac87f01 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 14 Mar 2014 14:06:25 +0800 Subject: ACPI: make acpi_create_platform_device() an external API Signed-off-by: Zhang Rui --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 807cbc4..2c24c2c 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -432,6 +432,7 @@ static inline bool acpi_driver_match_device(struct device *dev, int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *); int acpi_device_modalias(struct device *, char *, int); +struct platform_device *acpi_create_platform_device(struct acpi_device *); #define ACPI_PTR(_ptr) (_ptr) #else /* !CONFIG_ACPI */ -- cgit v1.1 From 2bb3a2bf9939f3361e25045f4ef7b136b864c3b8 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Tue, 19 Nov 2013 15:43:52 +0800 Subject: ACPI / fan: use acpi_device_xxx_power instead of acpi_bus equivelant When we have the acpi_device pointer, there is no need to pass the device's handle to the acpi_bus_xxx_power functions to get/set/update the device's power state, instead, use the acpi_device_xxx_power functions directly. To make this happen for fan module, export acpi_device_update_power. Signed-off-by: Aaron Lu Signed-off-by: Zhang Rui --- include/acpi/acpi_bus.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index bcfd808..6ca3281 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -435,6 +435,7 @@ int acpi_device_set_power(struct acpi_device *device, int state); int acpi_bus_init_power(struct acpi_device *device); int acpi_device_fix_up_power(struct acpi_device *device); int acpi_bus_update_power(acpi_handle handle, int *state_p); +int acpi_device_update_power(struct acpi_device *device, int *state_p); bool acpi_bus_power_manageable(acpi_handle handle); #ifdef CONFIG_PM -- cgit v1.1 From 7b83fd9d91a411158f72d36958103c708c3b5a86 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Tue, 25 Mar 2014 10:40:09 +0800 Subject: Thermal: move the KELVIN_TO_MILLICELSIUS macro to thermal.h This macro can be used by other component so move it to a common header, but in a slightly different way: define two macros, one macro with an offset and the other doesn't. Signed-off-by: Aaron Lu Signed-off-by: Zhang Rui --- include/linux/thermal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 0305cde..79ce6b9 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -44,6 +44,8 @@ #define KELVIN_TO_CELSIUS(t) (long)(((long)t-2732 >= 0) ? \ ((long)t-2732+5)/10 : ((long)t-2732-5)/10) #define CELSIUS_TO_KELVIN(t) ((t)*10+2732) +#define DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, off) (((t) - (off)) * 100) +#define DECI_KELVIN_TO_MILLICELSIUS(t) DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, 2732) /* Adding event notification support elements */ #define THERMAL_GENL_FAMILY_NAME "thermal_event" -- cgit v1.1 From 77e337c6e23e3b9d22e09ffec202a80f755a54c2 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Wed, 3 Sep 2014 15:13:02 +0800 Subject: Thermal: introduce INT3402 thermal driver ACPI INT3402 device object could report temperature for the memory module. To expose such information to user space, a thermal zone device is registered for it so that the thermal sysfs interface can expose such information for userspace to use. Signed-off-by: Aaron Lu Signed-off-by: Zhang Rui --- include/linux/thermal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 79ce6b9..ef90838 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -46,6 +46,8 @@ #define CELSIUS_TO_KELVIN(t) ((t)*10+2732) #define DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, off) (((t) - (off)) * 100) #define DECI_KELVIN_TO_MILLICELSIUS(t) DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, 2732) +#define MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, off) (((t) / 100) + (off)) +#define MILLICELSIUS_TO_DECI_KELVIN(t) MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, 2732) /* Adding event notification support elements */ #define THERMAL_GENL_FAMILY_NAME "thermal_event" -- cgit v1.1 From 174e964ec224c3c591b83a6b5f0984d905d3678f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 9 Oct 2014 12:43:27 -0700 Subject: regulator: Include err.h from consumer.h to fix build failure sh:sh2007_defconfig fails to build with the following error: In file included from include/linux/regulator/machine.h:18:0, from arch/sh/boards/board-sh2007.c:10: include/linux/regulator/consumer.h: In function 'regulator_get_optional': include/linux/regulator/consumer.h:271:2: error: implicit declaration of function 'ERR_PTR' include/linux/err.h: At top level: include/linux/err.h:23:35: error: conflicting types for 'ERR_PTR' include/linux/regulator/consumer.h:271:9: note: previous implicit declaration of 'ERR_PTR' was here Since consumer.h uses ERR_PTR, it should include err.h. Signed-off-by: Guenter Roeck Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index d347c80..f540b14 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -35,6 +35,8 @@ #ifndef __LINUX_REGULATOR_CONSUMER_H_ #define __LINUX_REGULATOR_CONSUMER_H_ +#include + struct device; struct notifier_block; struct regmap; -- cgit v1.1 From 7210e4e38f945dfa173c4a4e59ad827c9ecad541 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 13 Oct 2014 19:50:22 +0200 Subject: netfilter: nf_tables: restrict nat/masq expressions to nat chain type This adds the missing validation code to avoid the use of nat/masq from non-nat chains. The validation assumes two possible configuration scenarios: 1) Use of nat from base chain that is not of nat type. Reject this configuration from the nft_*_init() path of the expression. 2) Use of nat from non-base chain. In this case, we have to wait until the non-base chain is referenced by at least one base chain via jump/goto. This is resolved from the nft_*_validate() path which is called from nf_tables_check_loops(). The user gets an -EOPNOTSUPP in both cases. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 +++ include/net/netfilter/nft_masq.h | 3 +++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 3d72923..845c596 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -530,6 +530,9 @@ enum nft_chain_type { NFT_CHAIN_T_MAX }; +int nft_chain_validate_dependency(const struct nft_chain *chain, + enum nft_chain_type type); + struct nft_stats { u64 bytes; u64 pkts; diff --git a/include/net/netfilter/nft_masq.h b/include/net/netfilter/nft_masq.h index c72729f..e2a518b 100644 --- a/include/net/netfilter/nft_masq.h +++ b/include/net/netfilter/nft_masq.h @@ -13,4 +13,7 @@ int nft_masq_init(const struct nft_ctx *ctx, int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr); +int nft_masq_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data); + #endif /* _NFT_MASQ_H_ */ -- cgit v1.1 From d4c5efdb97773f59a2b711754ca0953f24516739 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 26 Aug 2014 23:16:35 -0400 Subject: random: add and use memzero_explicit() for clearing data zatimend has reported that in his environment (3.16/gcc4.8.3/corei7) memset() calls which clear out sensitive data in extract_{buf,entropy, entropy_user}() in random driver are being optimized away by gcc. Add a helper memzero_explicit() (similarly as explicit_bzero() variants) that can be used in such cases where a variable with sensitive data is being cleared out in the end. Other use cases might also be in crypto code. [ I have put this into lib/string.c though, as it's always built-in and doesn't need any dependencies then. ] Fixes kernel bugzilla: 82041 Reported-by: zatimend@hotmail.co.uk Signed-off-by: Daniel Borkmann Acked-by: Hannes Frederic Sowa Cc: Alexey Dobriyan Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- include/linux/string.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/string.h b/include/linux/string.h index d36977e..3b42b37 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -132,7 +132,7 @@ int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4); #endif extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, - const void *from, size_t available); + const void *from, size_t available); /** * strstarts - does @str start with @prefix? @@ -144,7 +144,8 @@ static inline bool strstarts(const char *str, const char *prefix) return strncmp(str, prefix, strlen(prefix)) == 0; } -extern size_t memweight(const void *ptr, size_t bytes); +size_t memweight(const void *ptr, size_t bytes); +void memzero_explicit(void *s, size_t count); /** * kbasename - return the last part of a pathname. -- cgit v1.1 From 70f3ce0510afdad7cbaf27ab7ab961377205c782 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 29 Sep 2014 11:47:54 +0200 Subject: mtd: spi-nor: make spi_nor_scan() take a chip type name, not spi_device_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drivers currently call spi_nor_match_id() and then spi_nor_scan(). This adds a dependency on struct spi_device_id which we want to avoid. Make spi_nor_scan() do it for them. Signed-off-by: Ben Hutchings Signed-off-by: Rafał Miłecki Signed-off-by: Brian Norris --- include/linux/mtd/spi-nor.h | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 9e6294f..a5a7a08 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -187,32 +187,18 @@ struct spi_nor { /** * spi_nor_scan() - scan the SPI NOR * @nor: the spi_nor structure - * @id: the spi_device_id provided by the driver + * @name: the chip type name * @mode: the read mode supported by the driver * * The drivers can use this fuction to scan the SPI NOR. * In the scanning, it will try to get all the necessary information to * fill the mtd_info{} and the spi_nor{}. * - * The board may assigns a spi_device_id with @id which be used to compared with - * the spi_device_id detected by the scanning. + * The chip type name can be provided through the @name parameter. * * Return: 0 for success, others for failure. */ -int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id, - enum read_mode mode); +int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode); extern const struct spi_device_id spi_nor_ids[]; -/** - * spi_nor_match_id() - find the spi_device_id by the name - * @name: the name of the spi_device_id - * - * The drivers use this function to find the spi_device_id - * specified by the @name. - * - * Return: returns the right spi_device_id pointer on success, - * and returns NULL on failure. - */ -const struct spi_device_id *spi_nor_match_id(char *name); - #endif -- cgit v1.1 From aa281ac631008b9c18c405c8880007789f659c7d Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 19 Oct 2014 19:38:58 +0300 Subject: Boaz Harrosh - Fix broken email address I no longer have access to the Panasas email. So change to an email that can always reach me. Signed-off-by: Boaz Harrosh --- include/linux/pnfs_osd_xdr.h | 2 +- include/scsi/osd_initiator.h | 2 +- include/scsi/osd_ore.h | 2 +- include/scsi/osd_protocol.h | 4 ++-- include/scsi/osd_sec.h | 2 +- include/scsi/osd_sense.h | 2 +- include/scsi/osd_types.h | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/pnfs_osd_xdr.h b/include/linux/pnfs_osd_xdr.h index fe25876..17d7d0d 100644 --- a/include/linux/pnfs_osd_xdr.h +++ b/include/linux/pnfs_osd_xdr.h @@ -5,7 +5,7 @@ * All rights reserved. * * Benny Halevy - * Boaz Harrosh + * Boaz Harrosh * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h index b2e85fd..a09cca8 100644 --- a/include/scsi/osd_initiator.h +++ b/include/scsi/osd_initiator.h @@ -4,7 +4,7 @@ * Copyright (C) 2008 Panasas Inc. All rights reserved. * * Authors: - * Boaz Harrosh + * Boaz Harrosh * Benny Halevy * * This program is free software; you can redistribute it and/or modify diff --git a/include/scsi/osd_ore.h b/include/scsi/osd_ore.h index 6ca3265..7a8d2cd 100644 --- a/include/scsi/osd_ore.h +++ b/include/scsi/osd_ore.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2011 - * Boaz Harrosh + * Boaz Harrosh * * Public Declarations of the ORE API * diff --git a/include/scsi/osd_protocol.h b/include/scsi/osd_protocol.h index a2594af..e0ca835 100644 --- a/include/scsi/osd_protocol.h +++ b/include/scsi/osd_protocol.h @@ -4,7 +4,7 @@ * Copyright (C) 2008 Panasas Inc. All rights reserved. * * Authors: - * Boaz Harrosh + * Boaz Harrosh * Benny Halevy * * This program is free software; you can redistribute it and/or modify @@ -496,7 +496,7 @@ struct osd_timestamp { */ struct osd_key_identifier { - u8 id[7]; /* if you know why 7 please email bharrosh@panasas.com */ + u8 id[7]; /* if you know why 7 please email ooo@electrozaur.com */ } __packed; /* for osd_capability.format */ diff --git a/include/scsi/osd_sec.h b/include/scsi/osd_sec.h index f96151c..7abeb0f 100644 --- a/include/scsi/osd_sec.h +++ b/include/scsi/osd_sec.h @@ -4,7 +4,7 @@ * Copyright (C) 2008 Panasas Inc. All rights reserved. * * Authors: - * Boaz Harrosh + * Boaz Harrosh * Benny Halevy * * This program is free software; you can redistribute it and/or modify diff --git a/include/scsi/osd_sense.h b/include/scsi/osd_sense.h index 91db543..d52aa93 100644 --- a/include/scsi/osd_sense.h +++ b/include/scsi/osd_sense.h @@ -4,7 +4,7 @@ * Copyright (C) 2008 Panasas Inc. All rights reserved. * * Authors: - * Boaz Harrosh + * Boaz Harrosh * Benny Halevy * * This program is free software; you can redistribute it and/or modify diff --git a/include/scsi/osd_types.h b/include/scsi/osd_types.h index bd0be7e..48e8a16 100644 --- a/include/scsi/osd_types.h +++ b/include/scsi/osd_types.h @@ -4,7 +4,7 @@ * Copyright (C) 2008 Panasas Inc. All rights reserved. * * Authors: - * Boaz Harrosh + * Boaz Harrosh * Benny Halevy * * This program is free software; you can redistribute it and/or modify -- cgit v1.1 From 4846e3784585173f48e267b76f968bcb4a12d3b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 9 Sep 2014 22:18:31 +0200 Subject: watchdog: simplify definitions of WATCHDOG_NOWAYOUT(_INIT_STATUS)? Signed-off-by: Uwe Kleine-K=C3=B6nig Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- include/linux/watchdog.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 2a3038e..395b70e 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -97,13 +97,8 @@ struct watchdog_device { #define WDOG_UNREGISTERED 4 /* Has the device been unregistered */ }; -#ifdef CONFIG_WATCHDOG_NOWAYOUT -#define WATCHDOG_NOWAYOUT 1 -#define WATCHDOG_NOWAYOUT_INIT_STATUS (1 << WDOG_NO_WAY_OUT) -#else -#define WATCHDOG_NOWAYOUT 0 -#define WATCHDOG_NOWAYOUT_INIT_STATUS 0 -#endif +#define WATCHDOG_NOWAYOUT IS_BUILTIN(CONFIG_WATCHDOG_NOWAYOUT) +#define WATCHDOG_NOWAYOUT_INIT_STATUS (WATCHDOG_NOWAYOUT << WDOG_NO_WAY_OUT) /* Use the following function to check whether or not the watchdog is active */ static inline bool watchdog_active(struct watchdog_device *wdd) -- cgit v1.1 From f974008f07a62171a9dede08250c9a35c2b2b986 Mon Sep 17 00:00:00 2001 From: Olivier Gay Date: Sat, 18 Oct 2014 01:53:39 +0200 Subject: HID: add keyboard input assist hid usages Add keyboard input assist controls usages from approved hid usage table request HUTTR42: http://www.usb.org/developers/hidpage/HUTRR42c.pdf Signed-off-by: Olivier Gay Acked-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- include/uapi/linux/input.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index 1874ebe..a1d7e93 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -739,6 +739,13 @@ struct input_keymap_entry { #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ +#define KEY_KBDINPUTASSIST_PREV 0x260 +#define KEY_KBDINPUTASSIST_NEXT 0x261 +#define KEY_KBDINPUTASSIST_PREVGROUP 0x262 +#define KEY_KBDINPUTASSIST_NEXTGROUP 0x263 +#define KEY_KBDINPUTASSIST_ACCEPT 0x264 +#define KEY_KBDINPUTASSIST_CANCEL 0x265 + #define BTN_TRIGGER_HAPPY 0x2c0 #define BTN_TRIGGER_HAPPY1 0x2c0 #define BTN_TRIGGER_HAPPY2 0x2c1 -- cgit v1.1 From 5f8b35b6330db14d15fb385cc7b2ccca53dc323e Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 10 Oct 2014 10:39:24 +0800 Subject: ACPICA: Add string for _DDN method name. The _DDN method will be used internally. Signed-off-by: Bob Moore Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- include/acpi/acnames.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/acpi/acnames.h b/include/acpi/acnames.h index f97804b..7461327 100644 --- a/include/acpi/acnames.h +++ b/include/acpi/acnames.h @@ -52,6 +52,7 @@ #define METHOD_NAME__CBA "_CBA" #define METHOD_NAME__CID "_CID" #define METHOD_NAME__CRS "_CRS" +#define METHOD_NAME__DDN "_DDN" #define METHOD_NAME__HID "_HID" #define METHOD_NAME__INI "_INI" #define METHOD_NAME__PLD "_PLD" -- cgit v1.1 From a08f813e58169a8edd01e13d73f60d8561f3ecea Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Fri, 10 Oct 2014 10:39:57 +0800 Subject: ACPICA: Events: Reduce source code difference for the ACPI_EVENT_FLAG_HANDLE support. This patch is a partial linuxized result of the following ACPICA commit: ACPICA commit: a73b66c6aa1846d055bb6390d9c9b9902f7d804d Subject: Add "has handler" flag to event/gpe status interfaces. This change adds a new flag, ACPI_EVENT_FLAGS_HAS_HANDLER to the acpi_get_event_status and acpi_get_gpe_status external interfaces. It is set if the event/gpe currently has a handler associated with it. This commit back ports ACPI_EVENT_FLAG_HANDLE from Linux upstream to ACPICA, the flag along with its support code currently can only be found in the Linux upstream and is used by the ACPI sysfs GPE interfaces and the ACPI bus scanning support. Link: https://github.com/acpica/acpica/commit/a73b66c6 Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- include/acpi/actypes.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index ac03ec8..857830d 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -721,7 +721,7 @@ typedef u32 acpi_event_type; * | | | +--- Enabled for wake? * | | +----- Set? * | +------- Has a handler? - * +----------- + * +------------- */ typedef u32 acpi_event_status; @@ -729,7 +729,7 @@ typedef u32 acpi_event_status; #define ACPI_EVENT_FLAG_ENABLED (acpi_event_status) 0x01 #define ACPI_EVENT_FLAG_WAKE_ENABLED (acpi_event_status) 0x02 #define ACPI_EVENT_FLAG_SET (acpi_event_status) 0x04 -#define ACPI_EVENT_FLAG_HANDLE (acpi_event_status) 0x08 +#define ACPI_EVENT_FLAG_HANDLE (acpi_event_status) 0x08 /* Actions for acpi_set_gpe, acpi_gpe_wakeup, acpi_hw_low_set_gpe */ -- cgit v1.1 From 2f8572344e65296d13c1a771cacfea60916d91dc Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Fri, 10 Oct 2014 10:40:05 +0800 Subject: ACPICA: Events: Reduce source code difference for the ACPI_EVENT_FLAG_HANDLE renaming. This patch is partial linuxized result of the following ACPICA commit: ACPICA commit: a73b66c6aa1846d055bb6390d9c9b9902f7d804d Subject: Add "has handler" flag to event/gpe status interfaces. This change adds a new flag, ACPI_EVENT_FLAGS_HAS_HANDLER to the acpi_get_event_status and acpi_get_gpe_status external interfaces. It is set if the event/gpe currently has a handler associated with it. This patch contains the code to rename ACPI_EVENT_FLAG_HANDLE to ACPI_EVENT_FLAG_HAS_HANDLER, and the corresponding updates of its usages. Link: https://github.com/acpica/acpica/commit/a73b66c6 Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- include/acpi/actypes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 857830d..7000e66 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -729,7 +729,7 @@ typedef u32 acpi_event_status; #define ACPI_EVENT_FLAG_ENABLED (acpi_event_status) 0x01 #define ACPI_EVENT_FLAG_WAKE_ENABLED (acpi_event_status) 0x02 #define ACPI_EVENT_FLAG_SET (acpi_event_status) 0x04 -#define ACPI_EVENT_FLAG_HANDLE (acpi_event_status) 0x08 +#define ACPI_EVENT_FLAG_HAS_HANDLER (acpi_event_status) 0x08 /* Actions for acpi_set_gpe, acpi_gpe_wakeup, acpi_hw_low_set_gpe */ -- cgit v1.1 From 9fc3d1d09cf7f81d5775712dc64c3db4862ee59d Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 10 Oct 2014 10:40:28 +0800 Subject: ACPICA: Update version to 20140926. Version 20140926. Signed-off-by: Bob Moore Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 9fc1d71..ab2acf6 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -46,7 +46,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20140828 +#define ACPI_CA_VERSION 0x20140926 #include #include -- cgit v1.1 From 51315cdfa0521fff3059cec5fb8ffecc7f37cba7 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Sun, 19 Oct 2014 11:30:27 +0200 Subject: cpufreq: allow driver-specific data This commit extends the cpufreq_driver structure with an additional 'void *driver_data' field that can be filled by the ->probe() function of a cpufreq driver to pass additional custom information to the driver itself. A new function called cpufreq_get_driver_data() is added to allow a cpufreq driver to retrieve those driver data, since they are typically needed from a cpufreq_policy->init() callback, which does not have access to the cpufreq_driver structure. This function call is similar to the existing cpufreq_get_current_driver() function call. Signed-off-by: Thomas Petazzoni Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 138336b..503b085 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -219,6 +219,7 @@ __ATTR(_name, 0644, show_##_name, store_##_name) struct cpufreq_driver { char name[CPUFREQ_NAME_LEN]; u8 flags; + void *driver_data; /* needed by all drivers */ int (*init) (struct cpufreq_policy *policy); @@ -312,6 +313,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data); int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); const char *cpufreq_get_current_driver(void); +void *cpufreq_get_driver_data(void); static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, unsigned int min, unsigned int max) -- cgit v1.1 From 34e5a5273d6aa0ee8836bd5d6111b135ffae6931 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Sun, 19 Oct 2014 11:30:28 +0200 Subject: cpufreq: cpufreq-dt: extend with platform_data This commit extends the cpufreq-dt driver to take a platform_data structure. This structure is for now used to tell the cpufreq-dt driver the layout of the clocks on the platform, i.e whether all CPUs share the same clock or whether each CPU has a separate clock. Signed-off-by: Thomas Petazzoni Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq-dt.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 include/linux/cpufreq-dt.h (limited to 'include') diff --git a/include/linux/cpufreq-dt.h b/include/linux/cpufreq-dt.h new file mode 100644 index 0000000..0414009 --- /dev/null +++ b/include/linux/cpufreq-dt.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2014 Marvell + * Thomas Petazzoni + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __CPUFREQ_DT_H__ +#define __CPUFREQ_DT_H__ + +struct cpufreq_dt_platform_data { + /* + * True when each CPU has its own clock to control its + * frequency, false when all CPUs are controlled by a single + * clock. + */ + bool independent_clocks; +}; + +#endif /* __CPUFREQ_DT_H__ */ -- cgit v1.1 From a5b7616c55e188fe3d6ef686bef402d4703ecb62 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 30 Sep 2014 03:14:55 +0100 Subject: mtd: m25p80,spi-nor: Fix module aliases for m25p80 m25p80's device ID table is now spi_nor_ids, defined in spi-nor. The MODULE_DEVICE_TABLE() macro doesn't work with extern definitions, but its use was also removed at the same time. Now if m25p80 is built as a module it doesn't get the necessary aliases to be loaded automatically. A clean solution to this will involve defining the list of device IDs in spi-nor.h and removing struct spi_device_id from the spi-nor API, but this is quite a large change. As a quick fix suitable for stable, copy the device IDs back into m25p80. Fixes: 03e296f613af ("mtd: m25p80: use the SPI nor framework") Cc: # 3.16.x: 32f1b7c8352f: mtd: move support for struct flash_platform_data into m25p80 Cc: # 3.16.x: 90e55b3812a1: mtd: m25p80: get rid of spi_get_device_id Cc: # 3.16.x: 70f3ce0510af: mtd: spi-nor: make spi_nor_scan() take a chip type name, not spi_device_id Cc: # 3.16.x Signed-off-by: Ben Hutchings Signed-off-by: Brian Norris --- include/linux/mtd/spi-nor.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index a5a7a08..046a0a2 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -199,6 +199,5 @@ struct spi_nor { * Return: 0 for success, others for failure. */ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode); -extern const struct spi_device_id spi_nor_ids[]; #endif -- cgit v1.1 From 5695be142e203167e3cb515ef86a88424f3524eb Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 20 Oct 2014 18:12:32 +0200 Subject: OOM, PM: OOM killed task shouldn't escape PM suspend PM freezer relies on having all tasks frozen by the time devices are getting frozen so that no task will touch them while they are getting frozen. But OOM killer is allowed to kill an already frozen task in order to handle OOM situtation. In order to protect from late wake ups OOM killer is disabled after all tasks are frozen. This, however, still keeps a window open when a killed task didn't manage to die by the time freeze_processes finishes. Reduce the race window by checking all tasks after OOM killer has been disabled. This is still not race free completely unfortunately because oom_killer_disable cannot stop an already ongoing OOM killer so a task might still wake up from the fridge and get killed without freeze_processes noticing. Full synchronization of OOM and freezer is, however, too heavy weight for this highly unlikely case. Introduce and check oom_kills counter which gets incremented early when the allocator enters __alloc_pages_may_oom path and only check all the tasks if the counter changes during the freezing attempt. The counter is updated so early to reduce the race window since allocator checked oom_killer_disabled which is set by PM-freezing code. A false positive will push the PM-freezer into a slow path but that is not a big deal. Changes since v1 - push the re-check loop out of freeze_processes into check_frozen_processes and invert the condition to make the code more readable as per Rafael Fixes: f660daac474c6f (oom: thaw threads if oom killed thread is frozen before deferring) Cc: 3.2+ # 3.2+ Signed-off-by: Michal Hocko Signed-off-by: Rafael J. Wysocki --- include/linux/oom.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/oom.h b/include/linux/oom.h index 647395a..e8d6e10 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -50,6 +50,9 @@ static inline bool oom_task_origin(const struct task_struct *p) extern unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages); + +extern int oom_kills_count(void); +extern void note_oom_kill(void); extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, unsigned int points, unsigned long totalpages, struct mem_cgroup *memcg, nodemask_t *nodemask, -- cgit v1.1 From 9e8beeb79ded25c5c1986f80fb8a7f6815345d5a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 13 Oct 2014 18:58:48 -0600 Subject: audit: Remove "weak" from audit_classify_compat_syscall() declaration There's only one audit_classify_compat_syscall() definition, so it doesn't need to be weak. Remove the "weak" attribute from the audit_classify_compat_syscall() declaration. Signed-off-by: Bjorn Helgaas Acked-by: Richard Guy Briggs CC: AKASHI Takahiro --- include/linux/audit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index 36dffec..e58fe7d 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -90,7 +90,7 @@ extern unsigned compat_dir_class[]; extern unsigned compat_chattr_class[]; extern unsigned compat_signal_class[]; -extern int __weak audit_classify_compat_syscall(int abi, unsigned syscall); +extern int audit_classify_compat_syscall(int abi, unsigned syscall); /* audit_names->type values */ #define AUDIT_TYPE_UNKNOWN 0 /* we don't know yet */ -- cgit v1.1 From 96a2adbc6f501996418da9f7afe39bf0e4d006a9 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 13 Oct 2014 18:59:09 -0600 Subject: clocksource: Remove "weak" from clocksource_default_clock() declaration kernel/time/jiffies.c provides a default clocksource_default_clock() definition explicitly marked "weak". arch/s390 provides its own definition intended to override the default, but the "weak" attribute on the declaration applied to the s390 definition as well, so the linker chose one based on link order (see 10629d711ed7 ("PCI: Remove __weak annotation from pcibios_get_phb_of_node decl")). Remove the "weak" attribute from the clocksource_default_clock() declaration so we always prefer a non-weak definition over the weak one, independent of link order. Fixes: f1b82746c1e9 ("clocksource: Cleanup clocksource selection") Signed-off-by: Bjorn Helgaas Acked-by: John Stultz Acked-by: Ingo Molnar CC: Daniel Lezcano CC: Martin Schwidefsky --- include/linux/clocksource.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 653f0e2..abcafaa 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -287,7 +287,7 @@ extern struct clocksource* clocksource_get_next(void); extern void clocksource_change_rating(struct clocksource *cs, int rating); extern void clocksource_suspend(void); extern void clocksource_resume(void); -extern struct clocksource * __init __weak clocksource_default_clock(void); +extern struct clocksource * __init clocksource_default_clock(void); extern void clocksource_mark_unstable(struct clocksource *cs); extern u64 -- cgit v1.1 From 5ab03ac5aaa1f032e071f1b3dc433b7839359c03 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 13 Oct 2014 18:59:41 -0600 Subject: vmcore: Remove "weak" from function declarations For the following functions: elfcorehdr_alloc() elfcorehdr_free() elfcorehdr_read() elfcorehdr_read_notes() remap_oldmem_pfn_range() fs/proc/vmcore.c provides default definitions explicitly marked "weak". arch/s390 provides its own definitions intended to override the default ones, but the "weak" attribute on the declarations applied to the s390 definitions as well, so the linker chose one based on link order (see 10629d711ed7 ("PCI: Remove __weak annotation from pcibios_get_phb_of_node decl")). Remove the "weak" attribute from the declarations so we always prefer a non-weak definition over the weak one, independent of link order. Fixes: be8a8d069e50 ("vmcore: introduce ELF header in new memory feature") Fixes: 9cb218131de1 ("vmcore: introduce remap_oldmem_pfn_range()") Signed-off-by: Bjorn Helgaas Acked-by: Andrew Morton Acked-by: Vivek Goyal CC: Michael Holzheu --- include/linux/crash_dump.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 72ab536..3849fce 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -14,14 +14,13 @@ extern unsigned long long elfcorehdr_addr; extern unsigned long long elfcorehdr_size; -extern int __weak elfcorehdr_alloc(unsigned long long *addr, - unsigned long long *size); -extern void __weak elfcorehdr_free(unsigned long long addr); -extern ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos); -extern ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos); -extern int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma, - unsigned long from, unsigned long pfn, - unsigned long size, pgprot_t prot); +extern int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size); +extern void elfcorehdr_free(unsigned long long addr); +extern ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos); +extern ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos); +extern int remap_oldmem_pfn_range(struct vm_area_struct *vma, + unsigned long from, unsigned long pfn, + unsigned long size, pgprot_t prot); extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, unsigned long, int); -- cgit v1.1 From 107bcc6d566cb40184068d888637f9aefe6252dd Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 13 Oct 2014 19:00:25 -0600 Subject: kgdb: Remove "weak" from kgdb_arch_pc() declaration kernel/debug/debug_core.c provides a default kgdb_arch_pc() definition explicitly marked "weak". Several architectures provide their own definitions intended to override the default, but the "weak" attribute on the declaration applied to the arch definitions as well, so the linker chose one based on link order (see 10629d711ed7 ("PCI: Remove __weak annotation from pcibios_get_phb_of_node decl")). Remove the "weak" attribute from the declaration so we always prefer a non-weak definition over the weak one, independent of link order. Fixes: 688b744d8bc8 ("kgdb: fix signedness mixmatches, add statics, add declaration to header") Tested-by: Vineet Gupta # for ARC build Signed-off-by: Bjorn Helgaas Reviewed-by: Harvey Harrison --- include/linux/kgdb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 6b06d37..e465bb1 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -283,7 +283,7 @@ struct kgdb_io { extern struct kgdb_arch arch_kgdb_ops; -extern unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs); +extern unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs); #ifdef CONFIG_SERIAL_KGDB_NMI extern int kgdb_register_nmi_console(void); -- cgit v1.1 From e0a8400c6923a163265d52798cdd4c33f3f8ab5a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 13 Oct 2014 19:00:47 -0600 Subject: memory-hotplug: Remove "weak" from memory_block_size_bytes() declaration drivers/base/memory.c provides a default memory_block_size_bytes() definition explicitly marked "weak". Several architectures provide their own definitions intended to override the default, but the "weak" attribute on the declaration applied to the arch definitions as well, so the linker chose one based on link order (see 10629d711ed7 ("PCI: Remove __weak annotation from pcibios_get_phb_of_node decl")). Remove the "weak" attribute from the declaration so we always prefer a non-weak definition over the weak one, independent of link order. Fixes: 41f107266b19 ("drivers: base: Add prototype declaration to the header file") Signed-off-by: Bjorn Helgaas Acked-by: Andrew Morton CC: Rashika Kheria CC: Nathan Fontenot CC: Anton Blanchard CC: Heiko Carstens CC: Yinghai Lu --- include/linux/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memory.h b/include/linux/memory.h index bb7384e..8b8d8d1 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -35,7 +35,7 @@ struct memory_block { }; int arch_get_memory_phys_device(unsigned long start_pfn); -unsigned long __weak memory_block_size_bytes(void); +unsigned long memory_block_size_bytes(void); /* These states are exposed to userspace as text strings in sysfs */ #define MEM_ONLINE (1<<0) /* exposed to userspace */ -- cgit v1.1 From 271a9c35158910496f6fc3a635c2ed85df6be3d9 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 13 Oct 2014 19:01:03 -0600 Subject: uprobes: Remove "weak" from function declarations For the following interfaces: set_swbp() set_orig_insn() is_swbp_insn() is_trap_insn() uprobe_get_swbp_addr() arch_uprobe_ignore() arch_uprobe_copy_ixol() kernel/events/uprobes.c provides default definitions explicitly marked "weak". Some architectures provide their own definitions intended to override the defaults, but the "weak" attribute on the declarations applied to the arch definitions as well, so the linker chose one based on link order (see 10629d711ed7 ("PCI: Remove __weak annotation from pcibios_get_phb_of_node decl")). Remove the "weak" attribute from the declarations so we always prefer a non-weak definition over the weak one, independent of link order. Signed-off-by: Bjorn Helgaas Acked-by: Ingo Molnar Acked-by: Srikar Dronamraju CC: Victor Kamensky CC: Oleg Nesterov CC: David A. Long CC: Ananth N Mavinakayanahalli --- include/linux/uprobes.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 4f844c6..60beb5d 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -98,11 +98,11 @@ struct uprobes_state { struct xol_area *xol_area; }; -extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); -extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); -extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); -extern bool __weak is_trap_insn(uprobe_opcode_t *insn); -extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); +extern int set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); +extern int set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); +extern bool is_swbp_insn(uprobe_opcode_t *insn); +extern bool is_trap_insn(uprobe_opcode_t *insn); +extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs); extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); @@ -128,8 +128,8 @@ extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs); -extern bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs); -extern void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, +extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs); +extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, void *src, unsigned long len); #else /* !CONFIG_UPROBES */ struct uprobes_state { -- cgit v1.1 From 4aa7c6346be395bdf776f82bbb2e3e2bc60bdd2b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:35 +0200 Subject: vfs: add i_op->dentry_open() Add a new inode operation i_op->dentry_open(). This is for stacked filesystems that want to return a struct file from a different filesystem. Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index a957d43..5cf7f67 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1528,6 +1528,9 @@ struct inode_operations { umode_t create_mode, int *opened); int (*tmpfile) (struct inode *, struct dentry *, umode_t); int (*set_acl)(struct inode *, struct posix_acl *, int); + + /* WARNING: probably going away soon, do not use! */ + int (*dentry_open)(struct dentry *, struct file *, const struct cred *); } ____cacheline_aligned; ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, @@ -2040,6 +2043,7 @@ extern struct file *file_open_name(struct filename *, int, umode_t); extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(struct dentry *, struct vfsmount *, const char *, int); +extern int vfs_open(const struct path *, struct file *, const struct cred *); extern struct file * dentry_open(const struct path *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); -- cgit v1.1 From 1c118596a7682912106c80007102ce0184c77780 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:35 +0200 Subject: vfs: export do_splice_direct() to modules Export do_splice_direct() to modules. Needed by overlay filesystem. Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 5cf7f67..10ed65b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2456,6 +2456,9 @@ extern ssize_t iter_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, loff_t *, size_t len, unsigned int flags); +extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, + loff_t *opos, size_t len, unsigned int flags); + extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); -- cgit v1.1 From bd5d08569cc379f8366663a61558a9ce17c2e460 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:35 +0200 Subject: vfs: export __inode_permission() to modules We need to be able to check inode permissions (but not filesystem implied permissions) for stackable filesystems. Expose this interface for overlayfs. Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 10ed65b..5419df7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2257,6 +2257,7 @@ extern sector_t bmap(struct inode *, sector_t); #endif extern int notify_change(struct dentry *, struct iattr *, struct inode **); extern int inode_permission(struct inode *, int); +extern int __inode_permission(struct inode *, int); extern int generic_permission(struct inode *, int); static inline bool execute_ok(struct inode *inode) -- cgit v1.1 From c771d683a62e5d36bc46036f5c07f4f5bb7dda61 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:36 +0200 Subject: vfs: introduce clone_private_mount() Overlayfs needs a private clone of the mount, so create a function for this and export to modules. Signed-off-by: Miklos Szeredi --- include/linux/mount.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mount.h b/include/linux/mount.h index 9262e4b..c2c561d 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -81,6 +81,9 @@ extern struct vfsmount *mntget(struct vfsmount *mnt); extern struct vfsmount *mnt_clone_internal(struct path *path); extern int __mnt_is_readonly(struct vfsmount *mnt); +struct path; +extern struct vfsmount *clone_private_mount(struct path *path); + struct file_system_type; extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, int flags, const char *name, -- cgit v1.1 From cbdf35bcb833bfd00f0925d7a9a33a21f41ea582 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:36 +0200 Subject: vfs: export check_sticky() It's already duplicated in btrfs and about to be used in overlayfs too. Move the sticky bit check to an inline helper and call the out-of-line helper only in the unlikly case of the sticky bit being set. Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 5419df7..55cc0a3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2259,6 +2259,7 @@ extern int notify_change(struct dentry *, struct iattr *, struct inode **); extern int inode_permission(struct inode *, int); extern int __inode_permission(struct inode *, int); extern int generic_permission(struct inode *, int); +extern int __check_sticky(struct inode *dir, struct inode *inode); static inline bool execute_ok(struct inode *inode) { @@ -2745,6 +2746,14 @@ static inline int is_sxid(umode_t mode) return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); } +static inline int check_sticky(struct inode *dir, struct inode *inode) +{ + if (!(dir->i_mode & S_ISVTX)) + return 0; + + return __check_sticky(dir, inode); +} + static inline void inode_has_no_xattr(struct inode *inode) { if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC)) -- cgit v1.1 From 787fb6bc9682ec7c05fb5d9561b57100fbc1cc41 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:36 +0200 Subject: vfs: add whiteout support Whiteout isn't actually a new file type, but is represented as a char device (Linus's idea) with 0/0 device number. This has several advantages compared to introducing a new whiteout file type: - no userspace API changes (e.g. trivial to make backups of upper layer filesystem, without losing whiteouts) - no fs image format changes (you can boot an old kernel/fsck without whiteout support and things won't break) - implementation is trivial Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 55cc0a3..69118b3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -223,6 +223,13 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define ATTR_TIMES_SET (1 << 16) /* + * Whiteout is represented by a char device. The following constants define the + * mode and device number to use. + */ +#define WHITEOUT_MODE 0 +#define WHITEOUT_DEV 0 + +/* * This is the Inode Attributes structure, used for notify_change(). It * uses the above definitions as flags, to know which values have changed. * Also, in this manner, a Filesystem can look at only the values it cares @@ -1398,6 +1405,7 @@ extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct ino extern int vfs_rmdir(struct inode *, struct dentry *); extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); +extern int vfs_whiteout(struct inode *, struct dentry *); /* * VFS dentry helper functions. @@ -1628,6 +1636,9 @@ struct super_operations { #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) +#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ + (inode)->i_rdev == WHITEOUT_DEV) + /* * Inode state bits. Protected by inode->i_lock * -- cgit v1.1 From 0d7a855526dd672e114aff2ac22b60fc6f155b08 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:37 +0200 Subject: vfs: add RENAME_WHITEOUT This adds a new RENAME_WHITEOUT flag. This flag makes rename() create a whiteout of source. The whiteout creation is atomic relative to the rename. Signed-off-by: Miklos Szeredi --- include/uapi/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index ca1a11b..3735fa0 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -37,6 +37,7 @@ #define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ #define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ +#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ struct fstrim_range { __u64 start; -- cgit v1.1 From 69c433ed2ecd2d3264efd7afec4439524b319121 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:39 +0200 Subject: fs: limit filesystem stacking depth Add a simple read-only counter to super_block that indicates how deep this is in the stack of filesystems. Previously ecryptfs was the only stackable filesystem and it explicitly disallowed multiple layers of itself. Overlayfs, however, can be stacked recursively and also may be stacked on top of ecryptfs or vice versa. To limit the kernel stack usage we must limit the depth of the filesystem stack. Initially the limit is set to 2. Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 69118b3..4e41a4a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -261,6 +261,12 @@ struct iattr { */ #include +/* + * Maximum number of layers of fs stack. Needs to be limited to + * prevent kernel stack overflow + */ +#define FILESYSTEM_MAX_STACK_DEPTH 2 + /** * enum positive_aop_returns - aop return codes with specific semantics * @@ -1273,6 +1279,11 @@ struct super_block { struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; struct list_lru s_inode_lru ____cacheline_aligned_in_smp; struct rcu_head rcu; + + /* + * Indicates how deep in a filesystem stack this SB is + */ + int s_stack_depth; }; extern struct timespec current_fs_time(struct super_block *sb); -- cgit v1.1 From 607ec6a5abb637642e5b8199828f39ceae83cfb7 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 24 Oct 2014 08:57:01 -0200 Subject: Revert "[media] v4l2-dv-timings: fix a sparse warning" Sparse got a fix for that. Also, it is suspected that reverting this patch might cause compilation breakages on userspace. So, revert it. This reverts commit 5c2cacc1028917168b0f7650008dceaa6f7e3fe2. Requested-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-dv-timings.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/v4l2-dv-timings.h b/include/uapi/linux/v4l2-dv-timings.h index 6a0764c..6c8f159 100644 --- a/include/uapi/linux/v4l2-dv-timings.h +++ b/include/uapi/linux/v4l2-dv-timings.h @@ -21,8 +21,17 @@ #ifndef _V4L2_DV_TIMINGS_H #define _V4L2_DV_TIMINGS_H +#if __GNUC__ < 4 || (__GNUC__ == 4 && (__GNUC_MINOR__ < 6)) +/* Sadly gcc versions older than 4.6 have a bug in how they initialize + anonymous unions where they require additional curly brackets. + This violates the C1x standard. This workaround adds the curly brackets + if needed. */ #define V4L2_INIT_BT_TIMINGS(_width, args...) \ { .bt = { _width , ## args } } +#else +#define V4L2_INIT_BT_TIMINGS(_width, args...) \ + .bt = { _width , ## args } +#endif /* CEA-861-E timings (i.e. standard HDTV timings) */ -- cgit v1.1 From 571ee1b6859869a09ed718d390aac2b9414646a2 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 9 Oct 2014 18:30:08 +0800 Subject: kvm: vfio: fix unregister kvm_device_ops of vfio After commit 80ce163 (KVM: VFIO: register kvm_device_ops dynamically), kvm_device_ops of vfio can be registered dynamically. Commit 3c3c29fd (kvm-vfio: do not use module_init) move the dynamic register invoked by kvm_init in order to fix broke unloading of the kvm module. However, kvm_device_ops of vfio is unregistered after rmmod kvm-intel module which lead to device type collision detection warning after kvm-intel module reinsmod. WARNING: CPU: 1 PID: 10358 at /root/cathy/kvm/arch/x86/kvm/../../../virt/kvm/kvm_main.c:3289 kvm_init+0x234/0x282 [kvm]() Modules linked in: kvm_intel(O+) kvm(O) nfsv3 nfs_acl auth_rpcgss oid_registry nfsv4 dns_resolver nfs fscache lockd sunrpc pci_stub bridge stp llc autofs4 8021q cpufreq_ondemand ipv6 joydev microcode pcspkr igb i2c_algo_bit ehci_pci ehci_hcd e1000e i2c_i801 ixgbe ptp pps_core hwmon mdio tpm_tis tpm ipmi_si ipmi_msghandler acpi_cpufreq isci libsas scsi_transport_sas button dm_mirror dm_region_hash dm_log dm_mod [last unloaded: kvm_intel] CPU: 1 PID: 10358 Comm: insmod Tainted: G W O 3.17.0-rc1 #2 Hardware name: Intel Corporation S2600CP/S2600CP, BIOS RMLSDP.86I.00.29.D696.1311111329 11/11/2013 0000000000000cd9 ffff880ff08cfd18 ffffffff814a61d9 0000000000000cd9 0000000000000000 ffff880ff08cfd58 ffffffff810417b7 ffff880ff08cfd48 ffffffffa045bcac ffffffffa049c420 0000000000000040 00000000000000ff Call Trace: [] dump_stack+0x49/0x60 [] warn_slowpath_common+0x7c/0x96 [] ? kvm_init+0x234/0x282 [kvm] [] warn_slowpath_null+0x15/0x17 [] kvm_init+0x234/0x282 [kvm] [] vmx_init+0x1bf/0x42a [kvm_intel] [] ? vmx_check_processor_compat+0x64/0x64 [kvm_intel] [] do_one_initcall+0xe3/0x170 [] ? __vunmap+0xad/0xb8 [] do_init_module+0x2b/0x174 [] load_module+0x43e/0x569 [] ? do_init_module+0x174/0x174 [] ? copy_module_from_user+0x39/0x82 [] ? module_sect_show+0x20/0x20 [] SyS_init_module+0x54/0x81 [] system_call_fastpath+0x16/0x1b ---[ end trace 0626f4a3ddea56f3 ]--- The bug can be reproduced by: rmmod kvm_intel.ko insmod kvm_intel.ko without rmmod/insmod kvm.ko This patch fixes the bug by unregistering kvm_device_ops of vfio when the kvm-intel module is removed. Reported-by: Liu Rongrong Fixes: 3c3c29fd0d7cddc32862c350d0700ce69953e3bd Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 28be31f..ea53b04 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1080,6 +1080,7 @@ void kvm_device_get(struct kvm_device *dev); void kvm_device_put(struct kvm_device *dev); struct kvm_device *kvm_device_from_filp(struct file *filp); int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); +void kvm_unregister_device_ops(u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; -- cgit v1.1 From a1fc198046181304d28a018dba048b718f2d7ce4 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Tue, 14 Oct 2014 20:41:49 +0300 Subject: ARM: i.MX6: Fix "emi" clock name typo Fix a typo error, the "emi" names refer to the eim clocks. The change fixes typo in EIM and EIM_SLOW pre-output dividers and selectors clock names. Notably EIM_SLOW clock itself is named correctly. Signed-off-by: Steve Longerbeam [vladimir_zapolskiy@mentor.com: ported to v3.17] Signed-off-by: Vladimir Zapolskiy Cc: Sascha Hauer Signed-off-by: Shawn Guo --- include/dt-bindings/clock/imx6qdl-clock.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/imx6qdl-clock.h b/include/dt-bindings/clock/imx6qdl-clock.h index ddaef86..b690cdb 100644 --- a/include/dt-bindings/clock/imx6qdl-clock.h +++ b/include/dt-bindings/clock/imx6qdl-clock.h @@ -62,8 +62,8 @@ #define IMX6QDL_CLK_USDHC3_SEL 50 #define IMX6QDL_CLK_USDHC4_SEL 51 #define IMX6QDL_CLK_ENFC_SEL 52 -#define IMX6QDL_CLK_EMI_SEL 53 -#define IMX6QDL_CLK_EMI_SLOW_SEL 54 +#define IMX6QDL_CLK_EIM_SEL 53 +#define IMX6QDL_CLK_EIM_SLOW_SEL 54 #define IMX6QDL_CLK_VDO_AXI_SEL 55 #define IMX6QDL_CLK_VPU_AXI_SEL 56 #define IMX6QDL_CLK_CKO1_SEL 57 @@ -106,8 +106,8 @@ #define IMX6QDL_CLK_USDHC4_PODF 94 #define IMX6QDL_CLK_ENFC_PRED 95 #define IMX6QDL_CLK_ENFC_PODF 96 -#define IMX6QDL_CLK_EMI_PODF 97 -#define IMX6QDL_CLK_EMI_SLOW_PODF 98 +#define IMX6QDL_CLK_EIM_PODF 97 +#define IMX6QDL_CLK_EIM_SLOW_PODF 98 #define IMX6QDL_CLK_VPU_AXI_PODF 99 #define IMX6QDL_CLK_CKO1_PODF 100 #define IMX6QDL_CLK_AXI 101 -- cgit v1.1 From dda02fd6278d9e995850b3c1dba484f17cbe4de4 Mon Sep 17 00:00:00 2001 From: Weijie Yang Date: Fri, 24 Oct 2014 17:47:57 +0800 Subject: mm, cma: make parameters order consistent in func declaration and definition In the current code, the base and size parameters order is not consistent in functions declaration and definition. If someone calls these functions according to the declaration parameters order in cma.h, he will run into some bug and it's hard to find the reason. This patch makes the parameters order consistent in functions declaration and definition. Signed-off-by: Weijie Yang Acked-by: Michal Nazarewicz Signed-off-by: Marek Szyprowski --- include/linux/cma.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/cma.h b/include/linux/cma.h index 0430ed0..a93438b 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -18,12 +18,12 @@ struct cma; extern phys_addr_t cma_get_base(struct cma *cma); extern unsigned long cma_get_size(struct cma *cma); -extern int __init cma_declare_contiguous(phys_addr_t size, - phys_addr_t base, phys_addr_t limit, +extern int __init cma_declare_contiguous(phys_addr_t base, + phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, bool fixed, struct cma **res_cma); -extern int cma_init_reserved_mem(phys_addr_t size, - phys_addr_t base, int order_per_bit, +extern int cma_init_reserved_mem(phys_addr_t base, + phys_addr_t size, int order_per_bit, struct cma **res_cma); extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align); extern bool cma_release(struct cma *cma, struct page *pages, int count); -- cgit v1.1 From a69d82b9bdf1e53e94423048e8bda8c5f5a3dd4e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 7 Oct 2014 17:47:36 +0200 Subject: power_supply: Add no_thermal property to prevent recursive get_temp calls Add a 'no_thermal' property to the power supply class. If true then thermal zone won't be created for this power supply in power_supply_register(). Power supply drivers may want to set it if they support POWER_SUPPLY_PROP_TEMP and they are forwarding this get property call to other thermal zone. If they won't set it lockdep may report false positive deadlock for thermal zone's mutex because of nested calls to thermal_zone_get_temp(). First is the call to thermal_zone_get_temp() of the driver's thermal zone. Thermal core gets POWER_SUPPLY_PROP_TEMP property from this driver. The driver then calls other thermal zone thermal_zone_get_temp() and returns result. Example of such driver is charger manager. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 3ed0496..096dbce 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -200,6 +200,12 @@ struct power_supply { void (*external_power_changed)(struct power_supply *psy); void (*set_charged)(struct power_supply *psy); + /* + * Set if thermal zone should not be created for this power supply. + * For example for virtual supplies forwarding calls to actual + * sensors or other supplies. + */ + bool no_thermal; /* For APM emulation, think legacy userspace. */ int use_for_apm; -- cgit v1.1 From bdbe81445407644492b9ac69a24d35e3202d773b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 13 Oct 2014 15:34:30 +0200 Subject: power: charger-manager: Fix accessing invalidated power supply after fuel gauge unbind The charger manager obtained reference to fuel gauge power supply in probe with power_supply_get_by_name() for later usage. However if fuel gauge driver was removed and re-added then this reference would point to old power supply (from driver which was removed). This lead to accessing old (and probably invalid) memory which could be observed with: $ echo "12-0036" > /sys/bus/i2c/drivers/max17042/unbind $ echo "12-0036" > /sys/bus/i2c/drivers/max17042/bind $ cat /sys/devices/virtual/power_supply/battery/capacity [ 240.480084] INFO: task cat:1393 blocked for more than 120 seconds. [ 240.484799] Not tainted 3.17.0-next-20141007-00028-ge60b6dd79570 #203 [ 240.491782] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 240.499589] cat D c0469530 0 1393 1 0x00000000 [ 240.505947] [] (__schedule) from [] (schedule_preempt_disabled+0x14/0x20) [ 240.514449] [] (schedule_preempt_disabled) from [] (mutex_lock_nested+0x1bc/0x458) [ 240.523736] [] (mutex_lock_nested) from [] (regmap_read+0x30/0x60) [ 240.531647] [] (regmap_read) from [] (max17042_get_property+0x2e8/0x350) [ 240.540055] [] (max17042_get_property) from [] (charger_get_property+0x264/0x348) [ 240.549252] [] (charger_get_property) from [] (power_supply_show_property+0x48/0x1e0) [ 240.558808] [] (power_supply_show_property) from [] (dev_attr_show+0x1c/0x48) [ 240.567664] [] (dev_attr_show) from [] (sysfs_kf_seq_show+0x84/0x104) [ 240.575814] [] (sysfs_kf_seq_show) from [] (kernfs_seq_show+0x24/0x28) [ 240.584061] [] (kernfs_seq_show) from [] (seq_read+0x1b0/0x484) [ 240.591702] [] (seq_read) from [] (vfs_read+0x88/0x144) [ 240.598640] [] (vfs_read) from [] (SyS_read+0x40/0x8c) [ 240.605507] [] (SyS_read) from [] (ret_fast_syscall+0x0/0x48) [ 240.612952] 4 locks held by cat/1393: [ 240.616589] #0: (&p->lock){+.+.+.}, at: [] seq_read+0x30/0x484 [ 240.623414] #1: (&of->mutex){+.+.+.}, at: [] kernfs_seq_start+0x1c/0x8c [ 240.631086] #2: (s_active#31){++++.+}, at: [] kernfs_seq_start+0x24/0x8c [ 240.638777] #3: (&map->mutex){+.+...}, at: [] regmap_read+0x30/0x60 The charger-manager should get reference to fuel gauge power supply on each use of get_property callback. The thermal zone 'tzd' field of power supply should not be used because of the same reason. Additionally this change solves also the issue with nested thermal_zone_get_temp() calls and related false lockdep positive for deadlock for thermal zone's mutex [1]. When fuel gauge is used as source of temperature then the charger manager forwards its get_temp calls to fuel gauge thermal zone. So actually different mutexes are used (one for charger manager thermal zone and second for fuel gauge thermal zone) but for lockdep this is one class of mutex. The recursion is removed by retrieving temperature through power supply's get_property(). In case external thermal zone is used ('cm-thermal-zone' property is present in DTS) the recursion does not exist. Charger manager simply exports POWER_SUPPLY_PROP_TEMP_AMBIENT property (instead of POWER_SUPPLY_PROP_TEMP) thus no thermal zone is created for this power supply. [1] https://lkml.org/lkml/2014/10/6/309 Signed-off-by: Krzysztof Kozlowski Cc: Fixes: 3bb3dbbd56ea ("power_supply: Add initial Charger-Manager driver") Signed-off-by: Sebastian Reichel --- include/linux/power/charger-manager.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h index 07e7945..5d90d32 100644 --- a/include/linux/power/charger-manager.h +++ b/include/linux/power/charger-manager.h @@ -253,7 +253,6 @@ struct charger_manager { struct device *dev; struct charger_desc *desc; - struct power_supply *fuel_gauge; struct power_supply **charger_stat; #ifdef CONFIG_THERMAL -- cgit v1.1 From cdaf3e15385d3232b52287e50692506f8fd01a09 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 13 Oct 2014 15:34:31 +0200 Subject: power: charger-manager: Fix accessing invalidated power supply after charger unbind The charger manager obtained in probe references to power supplies for all chargers with power_supply_get_by_name() for later usage. However if such charger driver was removed then this reference would point to old power supply (from driver which was removed). This lead to accessing invalid memory which could be observed with: $ echo "max77693-charger" > /sys/bus/platform/drivers/max77693-charger/unbind $ grep . /sys/devices/virtual/power_supply/battery/charger.0/* $ grep . /sys/devices/virtual/power_supply/battery/* [ 15.339817] Unable to handle kernel paging request at virtual address 0001c12c [ 15.346187] pgd = edd08000 [ 15.348814] [0001c12c] *pgd=6dce2831, *pte=00000000, *ppte=00000000 [ 15.355075] Internal error: Oops: 80000007 [#1] PREEMPT SMP ARM [ 15.360967] Modules linked in: [ 15.364010] CPU: 2 PID: 1388 Comm: grep Not tainted 3.17.0-next-20141007-00027-ga95e761db1b0 #245 [ 15.372859] task: ee03ad00 ti: edcf6000 task.ti: edcf6000 [ 15.378241] PC is at 0x1c12c [ 15.381113] LR is at is_ext_pwr_online+0x30/0x6c [ 15.385706] pc : [<0001c12c>] lr : [] psr: a0000013 [ 15.385706] sp : edcf7e88 ip : 00000000 fp : 00000000 [ 15.397161] r10: eeb02c08 r9 : c04b1f84 r8 : eeb02c00 [ 15.402369] r7 : edc69a10 r6 : eea6ac10 r5 : eea6ac10 r4 : 00000004 [ 15.408878] r3 : 0001c12c r2 : edcf7e8c r1 : 00000004 r0 : ee914418 [ 15.415390] Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user [ 15.422506] Control: 10c5387d Table: 6dd0804a DAC: 00000015 [ 15.428236] Process grep (pid: 1388, stack limit = 0xedcf6240) [ 15.434050] Stack: (0xedcf7e88 to 0xedcf8000) [ 15.438395] 7e80: ee03ad00 00000000 edcf7f80 eea6aca8 edcf7ec4 c033b7b0 [ 15.446554] 7ea0: 00000001 ee1cc3f0 00000004 c06e1e44 eebdc000 c06e1e44 eeb02c00 c0337144 [ 15.454713] 7ec0: ee2dac68 c005cffc ee1cc3c0 c06e1e44 00000fff 00001000 eebdc000 c0278ca8 [ 15.462872] 7ee0: c0278c8c ee1cc3c0 eeb7ce00 c014422c edcf7f20 00008000 ee1cc3c0 ee9a48c0 [ 15.471030] 7f00: 00000001 00000001 edcf7f80 c0142d94 c0142d70 c01060f4 00021000 ee1cc3f0 [ 15.479190] 7f20: 00000000 00000000 c06a2150 eebdc000 2e7ec000 ee9a48c0 00008000 00021000 [ 15.487349] 7f40: edcf7f80 00008000 edcf6000 00021000 00021000 c00e39a4 00000000 ee9a48c0 [ 15.495508] 7f60: 00004000 00000000 00000000 ee9a48c0 ee9a48c0 00008000 00021000 c00e3aa0 [ 15.503668] 7f80: 00000000 00000000 0001f2e0 0001f2e0 00021000 00001000 00000003 c000f364 [ 15.511826] 7fa0: 00000000 c000f1a0 0001f2e0 00021000 00000003 00021000 00008000 00000000 [ 15.519986] 7fc0: 0001f2e0 00021000 00001000 00000003 00000001 000205e8 00000000 00021000 [ 15.528145] 7fe0: 00008000 bebbe910 0000a7ad b6edc49c 60000010 00000003 aaaaaaaa aaaaaaaa [ 15.536320] [] (is_ext_pwr_online) from [] (charger_get_property+0x170/0x314) [ 15.545164] [] (charger_get_property) from [] (power_supply_show_property+0x48/0x20c) [ 15.554719] [] (power_supply_show_property) from [] (dev_attr_show+0x1c/0x48) [ 15.563577] [] (dev_attr_show) from [] (sysfs_kf_seq_show+0x84/0x104) [ 15.571725] [] (sysfs_kf_seq_show) from [] (kernfs_seq_show+0x24/0x28) [ 15.579973] [] (kernfs_seq_show) from [] (seq_read+0x1b0/0x484) [ 15.587614] [] (seq_read) from [] (vfs_read+0x88/0x144) [ 15.594552] [] (vfs_read) from [] (SyS_read+0x40/0x8c) [ 15.601417] [] (SyS_read) from [] (ret_fast_syscall+0x0/0x48) [ 15.608877] Code: bad PC value [ 15.611991] ---[ end trace a88fcc95208db283 ]--- The charger-manager should get reference to charger power supply on each use of get_property callback. Signed-off-by: Krzysztof Kozlowski Cc: Fixes: 3bb3dbbd56ea ("power_supply: Add initial Charger-Manager driver") Signed-off-by: Sebastian Reichel --- include/linux/power/charger-manager.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h index 5d90d32..e97fc65 100644 --- a/include/linux/power/charger-manager.h +++ b/include/linux/power/charger-manager.h @@ -253,8 +253,6 @@ struct charger_manager { struct device *dev; struct charger_desc *desc; - struct power_supply **charger_stat; - #ifdef CONFIG_THERMAL struct thermal_zone_device *tzd_batt; #endif -- cgit v1.1 From e999dbc254044e8d2a5818d92d205f65bae28f37 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Oct 2014 17:13:57 +0200 Subject: Revert "block: all blk-mq requests are tagged" This reverts commit fb3ccb5da71273e7f0d50b50bc879e50cedd60e7. SCSI-2/SPI actually needs the tagged/untagged flag in the request to work properly. Revert this patch and add a follow on to set it in the right place. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Acked-by: Jens Axboe Reported-by: Meelis Roos Tested-by: Meelis Roos Cc: stable@vger.kernel.org --- include/linux/blkdev.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0207a78..51d0dc2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1136,8 +1136,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) /* * tag stuff */ -#define blk_rq_tagged(rq) \ - ((rq)->mq_ctx || ((rq)->cmd_flags & REQ_QUEUED)) +#define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) extern int blk_queue_start_tag(struct request_queue *, struct request *); extern struct request *blk_queue_find_tag(struct request_queue *, int); extern void blk_queue_end_tag(struct request_queue *, struct request *); -- cgit v1.1 From b1dd2aac4cc0892b82ec60232ed37e3b0af776cc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Oct 2014 17:13:58 +0200 Subject: scsi: set REQ_QUEUE for the blk-mq case To generate the right SPI tag messages we need to properly set QUEUE_FLAG_QUEUED in the request_queue and mirror it to the request. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Acked-by: Jens Axboe Reported-by: Meelis Roos Tested-by: Meelis Roos Cc: stable@vger.kernel.org --- include/scsi/scsi_tcq.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h index e645835..56ed843 100644 --- a/include/scsi/scsi_tcq.h +++ b/include/scsi/scsi_tcq.h @@ -67,8 +67,9 @@ static inline void scsi_activate_tcq(struct scsi_device *sdev, int depth) if (!sdev->tagged_supported) return; - if (!shost_use_blk_mq(sdev->host) && - !blk_queue_tagged(sdev->request_queue)) + if (shost_use_blk_mq(sdev->host)) + queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, sdev->request_queue); + else if (!blk_queue_tagged(sdev->request_queue)) blk_queue_init_tags(sdev->request_queue, depth, sdev->host->bqt); @@ -81,8 +82,7 @@ static inline void scsi_activate_tcq(struct scsi_device *sdev, int depth) **/ static inline void scsi_deactivate_tcq(struct scsi_device *sdev, int depth) { - if (!shost_use_blk_mq(sdev->host) && - blk_queue_tagged(sdev->request_queue)) + if (blk_queue_tagged(sdev->request_queue)) blk_queue_free_tags(sdev->request_queue); scsi_adjust_queue_depth(sdev, 0, depth); } -- cgit v1.1 From fcd964dda5ece2fa77f78f843bc3455348787282 Mon Sep 17 00:00:00 2001 From: Chen Hanxiao Date: Tue, 7 Oct 2014 17:29:07 +0800 Subject: sched: Update comments for CLONE_NEWNS Signed-off-by: Chen Hanxiao Acked-by: Serge E. Hallyn Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: linux-api@vger.kernel.org Link: http://lkml.kernel.org/r/1412674147-8941-1-git-send-email-chenhanxiao@cn.fujitsu.com Signed-off-by: Ingo Molnar --- include/uapi/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index 34f9d73..b932be9 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -13,7 +13,7 @@ #define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */ #define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */ #define CLONE_THREAD 0x00010000 /* Same thread group? */ -#define CLONE_NEWNS 0x00020000 /* New namespace group? */ +#define CLONE_NEWNS 0x00020000 /* New mount namespace group */ #define CLONE_SYSVSEM 0x00040000 /* share system V SEM_UNDO semantics */ #define CLONE_SETTLS 0x00080000 /* create a new TLS for the child */ #define CLONE_PARENT_SETTID 0x00100000 /* set the TID in the parent */ -- cgit v1.1 From b438b1ab35507bbccc28d13f0b8286ffcf24019d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 2 Oct 2014 22:16:36 -0700 Subject: perf: Fix typos in sample code in the perf_event.h header struct perf_event_mmap_page has members called "index" and "cap_user_rdpmc". Spell them correctly in the examples. Signed-off-by: Andy Lutomirski Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: linux-api@vger.kernel.org Link: http://lkml.kernel.org/r/320ba26391a8123cc16e5f02d24d34bd404332fd.1412313343.git.luto@amacapital.net Signed-off-by: Ingo Molnar --- include/uapi/linux/perf_event.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 9269de2..9d84540 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -364,7 +364,7 @@ struct perf_event_mmap_page { /* * Bits needed to read the hw events in user-space. * - * u32 seq, time_mult, time_shift, idx, width; + * u32 seq, time_mult, time_shift, index, width; * u64 count, enabled, running; * u64 cyc, time_offset; * s64 pmc = 0; @@ -383,11 +383,11 @@ struct perf_event_mmap_page { * time_shift = pc->time_shift; * } * - * idx = pc->index; + * index = pc->index; * count = pc->offset; - * if (pc->cap_usr_rdpmc && idx) { + * if (pc->cap_user_rdpmc && index) { * width = pc->pmc_width; - * pmc = rdpmc(idx - 1); + * pmc = rdpmc(index - 1); * } * * barrier(); @@ -415,7 +415,7 @@ struct perf_event_mmap_page { }; /* - * If cap_usr_rdpmc this field provides the bit-width of the value + * If cap_user_rdpmc this field provides the bit-width of the value * read using the rdpmc() or equivalent instruction. This can be used * to sign extend the result like: * @@ -439,10 +439,10 @@ struct perf_event_mmap_page { * * Where time_offset,time_mult,time_shift and cyc are read in the * seqcount loop described above. This delta can then be added to - * enabled and possible running (if idx), improving the scaling: + * enabled and possible running (if index), improving the scaling: * * enabled += delta; - * if (idx) + * if (index) * running += delta; * * quot = count / running; -- cgit v1.1 From 5631b8fba640a4ab2f8a954f63a603fa34eda96b Mon Sep 17 00:00:00 2001 From: Steven Noonan Date: Sat, 25 Oct 2014 15:09:42 -0700 Subject: compiler/gcc4+: Remove inaccurate comment about 'asm goto' miscompiles The bug referenced by the comment in this commit was not completely fixed in GCC 4.8.2, as I mentioned in a thread back in February: https://lkml.org/lkml/2014/2/12/797 The conclusion at that time was to make the quirk unconditional until the bug could be found and fixed in GCC. Unfortunately, when I submitted the patch (commit a9f18034) I left a comment in that claimed the bug was fixed in GCC 4.8.2+. This comment is inaccurate, and should be removed. Signed-off-by: Steven Noonan Signed-off-by: Ingo Molnar Cc: Jakub Jelinek Cc: Richard Henderson Cc: Linus Torvalds Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1414274982-14040-1-git-send-email-steven@uplinklabs.net Cc: Ingo Molnar --- include/linux/compiler-gcc4.h | 1 - include/linux/compiler-gcc5.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 2507fd2..d1a5582 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -71,7 +71,6 @@ * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 * * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. - * Fixed in GCC 4.8.2 and later versions. * * (asm goto is automatically volatile - the naming reflects this.) */ diff --git a/include/linux/compiler-gcc5.h b/include/linux/compiler-gcc5.h index cdd1cc2..c8c5659 100644 --- a/include/linux/compiler-gcc5.h +++ b/include/linux/compiler-gcc5.h @@ -53,7 +53,6 @@ * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 * * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. - * Fixed in GCC 4.8.2 and later versions. * * (asm goto is automatically volatile - the naming reflects this.) */ -- cgit v1.1 From 8c3e434769b1707fd2d24de5a2eb25fedc634c4a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 26 Oct 2014 15:18:42 -0400 Subject: drm/radeon: remove invalid pci id 0x4c6e is a secondary device id so should not be used by the driver. Noticed-by: Mark Kettenis Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- include/drm/drm_pciids.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index e973540..2dd405c 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -74,7 +74,6 @@ {0x1002, 0x4C64, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV250|RADEON_IS_MOBILITY}, \ {0x1002, 0x4C66, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV250|RADEON_IS_MOBILITY}, \ {0x1002, 0x4C67, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV250|RADEON_IS_MOBILITY}, \ - {0x1002, 0x4C6E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV280|RADEON_IS_MOBILITY}, \ {0x1002, 0x4E44, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R300}, \ {0x1002, 0x4E45, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R300}, \ {0x1002, 0x4E46, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R300}, \ -- cgit v1.1 From d7e29933969e5ca7c112ce1368a07911f4485dc2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Oct 2014 09:15:54 -0700 Subject: rcu: Make rcu_barrier() understand about missing rcuo kthreads Commit 35ce7f29a44a (rcu: Create rcuo kthreads only for onlined CPUs) avoids creating rcuo kthreads for CPUs that never come online. This fixes a bug in many instances of firmware: Instead of lying about their age, these systems instead lie about the number of CPUs that they have. Before commit 35ce7f29a44a, this could result in huge numbers of useless rcuo kthreads being created. It appears that experience indicates that I should have told the people suffering from this problem to fix their broken firmware, but I instead produced what turned out to be a partial fix. The missing piece supplied by this commit makes sure that rcu_barrier() knows not to post callbacks for no-CBs CPUs that have not yet come online, because otherwise rcu_barrier() will hang on systems having firmware that lies about the number of CPUs. It is tempting to simply have rcu_barrier() refuse to post a callback on any no-CBs CPU that does not have an rcuo kthread. This unfortunately does not work because rcu_barrier() is required to wait for all pending callbacks. It is therefore required to wait even for those callbacks that cannot possibly be invoked. Even if doing so hangs the system. Given that posting a callback to a no-CBs CPU that does not yet have an rcuo kthread can hang rcu_barrier(), It is tempting to report an error in this case. Unfortunately, this will result in false positives at boot time, when it is perfectly legal to post callbacks to the boot CPU before the scheduler has started, in other words, before it is legal to invoke rcu_barrier(). So this commit instead has rcu_barrier() avoid posting callbacks to CPUs having neither rcuo kthread nor pending callbacks, and has it complain bitterly if it finds CPUs having no rcuo kthread but some pending callbacks. And when rcu_barrier() does find CPUs having no rcuo kthread but pending callbacks, as noted earlier, it has no choice but to hang indefinitely. Reported-by: Yanko Kaneti Reported-by: Jay Vosburgh Reported-by: Meelis Roos Reported-by: Eric B Munson Signed-off-by: Paul E. McKenney Tested-by: Eric B Munson Tested-by: Jay Vosburgh Tested-by: Yanko Kaneti Tested-by: Kevin Fenzi Tested-by: Meelis Roos --- include/trace/events/rcu.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 9b56f37..e335e7d 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -660,18 +660,18 @@ TRACE_EVENT(rcu_torture_read, /* * Tracepoint for _rcu_barrier() execution. The string "s" describes * the _rcu_barrier phase: - * "Begin": rcu_barrier_callback() started. - * "Check": rcu_barrier_callback() checking for piggybacking. - * "EarlyExit": rcu_barrier_callback() piggybacked, thus early exit. - * "Inc1": rcu_barrier_callback() piggyback check counter incremented. - * "Offline": rcu_barrier_callback() found offline CPU - * "OnlineNoCB": rcu_barrier_callback() found online no-CBs CPU. - * "OnlineQ": rcu_barrier_callback() found online CPU with callbacks. - * "OnlineNQ": rcu_barrier_callback() found online CPU, no callbacks. + * "Begin": _rcu_barrier() started. + * "Check": _rcu_barrier() checking for piggybacking. + * "EarlyExit": _rcu_barrier() piggybacked, thus early exit. + * "Inc1": _rcu_barrier() piggyback check counter incremented. + * "OfflineNoCB": _rcu_barrier() found callback on never-online CPU + * "OnlineNoCB": _rcu_barrier() found online no-CBs CPU. + * "OnlineQ": _rcu_barrier() found online CPU with callbacks. + * "OnlineNQ": _rcu_barrier() found online CPU, no callbacks. * "IRQ": An rcu_barrier_callback() callback posted on remote CPU. * "CB": An rcu_barrier_callback() invoked a callback, not the last. * "LastCB": An rcu_barrier_callback() invoked the last callback. - * "Inc2": rcu_barrier_callback() piggyback check counter incremented. + * "Inc2": _rcu_barrier() piggyback check counter incremented. * The "cpu" argument is the CPU or -1 if meaningless, the "cnt" argument * is the count of remaining callbacks, and "done" is the piggybacking count. */ -- cgit v1.1 From ebcf34f3d4be11f994340aff629f3c17171a4f65 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 26 Oct 2014 19:14:06 -0700 Subject: skbuff.h: fix kernel-doc warning for headers_end Fix kernel-doc warning in by making both headers_start and headers_end private fields. Warning(..//include/linux/skbuff.h:654): No description found for parameter 'headers_end[0]' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a59d934..5884f95 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -557,7 +557,9 @@ struct sk_buff { /* fields enclosed in headers_start/headers_end are copied * using a single memcpy() in __copy_skb_header() */ + /* private: */ __u32 headers_start[0]; + /* public: */ /* if you move pkt_type around you also must adapt those constants */ #ifdef __BIG_ENDIAN_BITFIELD @@ -642,7 +644,9 @@ struct sk_buff { __u16 network_header; __u16 mac_header; + /* private: */ __u32 headers_end[0]; + /* public: */ /* These elements must be at the end, see alloc_skb() for details. */ sk_buff_data_t tail; -- cgit v1.1 From 1efed2d06c703489342ab6af2951683e07509c99 Mon Sep 17 00:00:00 2001 From: Olivier Blin Date: Fri, 24 Oct 2014 19:43:00 +0200 Subject: usbnet: add a callback for set_rx_mode To delegate promiscuous mode and multicast filtering to the subdriver. Signed-off-by: Olivier Blin Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 26088fe..d9a4905 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -78,6 +78,7 @@ struct usbnet { # define EVENT_NO_RUNTIME_PM 9 # define EVENT_RX_KILL 10 # define EVENT_LINK_CHANGE 11 +# define EVENT_SET_RX_MODE 12 }; static inline struct usb_driver *driver_of(struct usb_interface *intf) @@ -159,6 +160,9 @@ struct driver_info { /* called by minidriver when receiving indication */ void (*indication)(struct usbnet *dev, void *ind, int indlen); + /* rx mode change (device changes address list filtering) */ + void (*set_rx_mode)(struct usbnet *dev); + /* for new devices, use the descriptor-reading code instead */ int in; /* rx endpoint */ int out; /* tx endpoint */ -- cgit v1.1 From 54ef6df3f3f1353d99c80c437259d317b2cd1cbd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Oct 2014 21:11:27 -0700 Subject: rcu: Provide counterpart to rcu_dereference() for non-RCU situations Although rcu_dereference() and friends can be used in situations where object lifetimes are being managed by something other than RCU, the resulting sparse and lockdep-RCU noise can be annoying. This commit therefore supplies a lockless_dereference(), which provides the protection for dereferences without the RCU-related debugging noise. Reported-by: Al Viro Signed-off-by: Paul E. McKenney Signed-off-by: Al Viro --- include/linux/rcupdate.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a4a819f..53ff1a7 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -617,6 +617,21 @@ static inline void rcu_preempt_sleep_check(void) #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) /** + * lockless_dereference() - safely load a pointer for later dereference + * @p: The pointer to load + * + * Similar to rcu_dereference(), but for situations where the pointed-to + * object's lifetime is managed by something other than RCU. That + * "something other" might be reference counting or simple immortality. + */ +#define lockless_dereference(p) \ +({ \ + typeof(p) _________p1 = ACCESS_ONCE(p); \ + smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ + (_________p1); \ +}) + +/** * rcu_assign_pointer() - assign to RCU-protected pointer * @p: pointer to assign to * @v: value to assign (publish) -- cgit v1.1 From d1b72cc6d8cb766c802fdc70a5edc2f0ba8a2b57 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 27 Oct 2014 15:42:01 +0100 Subject: overlayfs: fix lockdep misannotation In an overlay directory that shadows an empty lower directory, say /mnt/a/empty102, do: touch /mnt/a/empty102/x unlink /mnt/a/empty102/x rmdir /mnt/a/empty102 It's actually harmless, but needs another level of nesting between I_MUTEX_CHILD and I_MUTEX_NORMAL. Signed-off-by: Miklos Szeredi Tested-by: David Howells Signed-off-by: Al Viro --- include/linux/fs.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4e41a4a..0103626 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -639,11 +639,13 @@ static inline int inode_unhashed(struct inode *inode) * 2: child/target * 3: xattr * 4: second non-directory - * The last is for certain operations (such as rename) which lock two + * 5: second parent (when locking independent directories in rename) + * + * I_MUTEX_NONDIR2 is for certain operations (such as rename) which lock two * non-directories at once. * * The locking order between these classes is - * parent -> child -> normal -> xattr -> second non-directory + * parent[2] -> child -> grandchild -> normal -> xattr -> second non-directory */ enum inode_i_mutex_lock_class { @@ -651,7 +653,8 @@ enum inode_i_mutex_lock_class I_MUTEX_PARENT, I_MUTEX_CHILD, I_MUTEX_XATTR, - I_MUTEX_NONDIR2 + I_MUTEX_NONDIR2, + I_MUTEX_PARENT2, }; void lock_two_nondirectories(struct inode *, struct inode*); -- cgit v1.1 From cb1a5ab6ece7a37da4ac98ee26b0475b7c3ea79e Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 28 Oct 2014 20:27:43 -0600 Subject: block: Fix merge logic when CONFIG_BLK_DEV_INTEGRITY is not defined Commit 4eaf99beadce switched to returning bool and as a result reversed the logic of the integrity merge checks. However, the empty stubs used when the block integrity code is compiled out were still returning 0. Make these stubs return "true". Signed-off-by: Martin K. Petersen Reported-by: Michael L. Semon Tested-by: Michael L. Semon Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0207a78..6cbee83 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1583,13 +1583,13 @@ static inline bool blk_integrity_merge_rq(struct request_queue *rq, struct request *r1, struct request *r2) { - return 0; + return true; } static inline bool blk_integrity_merge_bio(struct request_queue *rq, struct request *r, struct bio *b) { - return 0; + return true; } static inline bool blk_integrity_is_initialized(struct gendisk *g) { -- cgit v1.1 From 47f29df7db78ee4fcdb104cf36918d987ddd0278 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 29 Oct 2014 14:50:29 -0700 Subject: drivers: of: add return value to of_reserved_mem_device_init() Driver calling of_reserved_mem_device_init() might be interested if the initialization has been successful or not, so add support for returning error code. This fixes a build warining caused by commit 7bfa5ab6fa1b ("drivers: dma-coherent: add initialization from device tree"), which has been merged without this change and without fixing function return value. Fixes: 7bfa5ab6fa1b1 ("drivers: dma-coherent: add initialization from device tree") Signed-off-by: Marek Szyprowski Acked-by: Arnd Bergmann Cc: Michal Nazarewicz Cc: Grant Likely Cc: Laura Abbott Cc: Josh Cartwright Cc: Joonsoo Kim Cc: Kyungmin Park Cc: Russell King Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/of_reserved_mem.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h index 5b5efae..ad2f670 100644 --- a/include/linux/of_reserved_mem.h +++ b/include/linux/of_reserved_mem.h @@ -16,7 +16,7 @@ struct reserved_mem { }; struct reserved_mem_ops { - void (*device_init)(struct reserved_mem *rmem, + int (*device_init)(struct reserved_mem *rmem, struct device *dev); void (*device_release)(struct reserved_mem *rmem, struct device *dev); @@ -28,14 +28,17 @@ typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem); _OF_DECLARE(reservedmem, name, compat, init, reservedmem_of_init_fn) #ifdef CONFIG_OF_RESERVED_MEM -void of_reserved_mem_device_init(struct device *dev); +int of_reserved_mem_device_init(struct device *dev); void of_reserved_mem_device_release(struct device *dev); void fdt_init_reserved_mem(void); void fdt_reserved_mem_save_node(unsigned long node, const char *uname, phys_addr_t base, phys_addr_t size); #else -static inline void of_reserved_mem_device_init(struct device *dev) { } +static inline int of_reserved_mem_device_init(struct device *dev) +{ + return -ENOSYS; +} static inline void of_reserved_mem_device_release(struct device *pdev) { } static inline void fdt_init_reserved_mem(void) { } -- cgit v1.1 From 6d50e60cd2edb5a57154db5a6f64eef5aa59b751 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 29 Oct 2014 14:50:31 -0700 Subject: mm, thp: fix collapsing of hugepages on madvise If an anonymous mapping is not allowed to fault thp memory and then madvise(MADV_HUGEPAGE) is used after fault, khugepaged will never collapse this memory into thp memory. This occurs because the madvise(2) handler for thp, hugepage_madvise(), clears VM_NOHUGEPAGE on the stack and it isn't stored in vma->vm_flags until the final action of madvise_behavior(). This causes the khugepaged_enter_vma_merge() to be a no-op in hugepage_madvise() when the vma had previously had VM_NOHUGEPAGE set. Fix this by passing the correct vma flags to the khugepaged mm slot handler. There's no chance khugepaged can run on this vma until after madvise_behavior() returns since we hold mm->mmap_sem. It would be possible to clear VM_NOHUGEPAGE directly from vma->vm_flags in hugepage_advise(), but I didn't want to introduce special case behavior into madvise_behavior(). I think it's best to just let it always set vma->vm_flags itself. Signed-off-by: David Rientjes Reported-by: Suleiman Souhlal Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/khugepaged.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index 6b394f0..eeb3079 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -6,7 +6,8 @@ #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern int __khugepaged_enter(struct mm_struct *mm); extern void __khugepaged_exit(struct mm_struct *mm); -extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma); +extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma, + unsigned long vm_flags); #define khugepaged_enabled() \ (transparent_hugepage_flags & \ @@ -35,13 +36,13 @@ static inline void khugepaged_exit(struct mm_struct *mm) __khugepaged_exit(mm); } -static inline int khugepaged_enter(struct vm_area_struct *vma) +static inline int khugepaged_enter(struct vm_area_struct *vma, + unsigned long vm_flags) { if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags)) if ((khugepaged_always() || - (khugepaged_req_madv() && - vma->vm_flags & VM_HUGEPAGE)) && - !(vma->vm_flags & VM_NOHUGEPAGE)) + (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) && + !(vm_flags & VM_NOHUGEPAGE)) if (__khugepaged_enter(vma->vm_mm)) return -ENOMEM; return 0; @@ -54,11 +55,13 @@ static inline int khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm) static inline void khugepaged_exit(struct mm_struct *mm) { } -static inline int khugepaged_enter(struct vm_area_struct *vma) +static inline int khugepaged_enter(struct vm_area_struct *vma, + unsigned long vm_flags) { return 0; } -static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma) +static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma, + unsigned long vm_flags) { return 0; } -- cgit v1.1 From 3a3c02ecf7f2852f122d6d16fb9b3d9cb0c6f201 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 29 Oct 2014 14:50:46 -0700 Subject: mm: page-writeback: inline account_page_dirtied() into single caller A follow-up patch would have changed the call signature. To save the trouble, just fold it instead. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: Vladimir Davydov Cc: [3.17.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 27eb1bf..b464611 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1235,7 +1235,6 @@ int __set_page_dirty_no_writeback(struct page *page); int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page); void account_page_dirtied(struct page *page, struct address_space *mapping); -void account_page_writeback(struct page *page); int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); int clear_page_dirty_for_io(struct page *page); -- cgit v1.1 From d7365e783edb858279be1d03f61bc8d5d3383d90 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 29 Oct 2014 14:50:48 -0700 Subject: mm: memcontrol: fix missed end-writeback page accounting Commit 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API") changed page migration to uncharge the old page right away. The page is locked, unmapped, truncated, and off the LRU, but it could race with writeback ending, which then doesn't unaccount the page properly: test_clear_page_writeback() migration wait_on_page_writeback() TestClearPageWriteback() mem_cgroup_migrate() clear PCG_USED mem_cgroup_update_page_stat() if (PageCgroupUsed(pc)) decrease memcg pages under writeback release pc->mem_cgroup->move_lock The per-page statistics interface is heavily optimized to avoid a function call and a lookup_page_cgroup() in the file unmap fast path, which means it doesn't verify whether a page is still charged before clearing PageWriteback() and it has to do it in the stat update later. Rework it so that it looks up the page's memcg once at the beginning of the transaction and then uses it throughout. The charge will be verified before clearing PageWriteback() and migration can't uncharge the page as long as that is still set. The RCU lock will protect the memcg past uncharge. As far as losing the optimization goes, the following test results are from a microbenchmark that maps, faults, and unmaps a 4GB sparse file three times in a nested fashion, so that there are two negative passes that don't account but still go through the new transaction overhead. There is no actual difference: old: 33.195102545 seconds time elapsed ( +- 0.01% ) new: 33.199231369 seconds time elapsed ( +- 0.03% ) The time spent in page_remove_rmap()'s callees still adds up to the same, but the time spent in the function itself seems reduced: # Children Self Command Shared Object Symbol old: 0.12% 0.11% filemapstress [kernel.kallsyms] [k] page_remove_rmap new: 0.12% 0.08% filemapstress [kernel.kallsyms] [k] page_remove_rmap Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: Vladimir Davydov Cc: [3.17.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 58 ++++++++++++++-------------------------------- 1 file changed, 17 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 19df5d8..6b75640 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -139,48 +139,23 @@ static inline bool mem_cgroup_disabled(void) return false; } -void __mem_cgroup_begin_update_page_stat(struct page *page, bool *locked, - unsigned long *flags); - -extern atomic_t memcg_moving; - -static inline void mem_cgroup_begin_update_page_stat(struct page *page, - bool *locked, unsigned long *flags) -{ - if (mem_cgroup_disabled()) - return; - rcu_read_lock(); - *locked = false; - if (atomic_read(&memcg_moving)) - __mem_cgroup_begin_update_page_stat(page, locked, flags); -} - -void __mem_cgroup_end_update_page_stat(struct page *page, - unsigned long *flags); -static inline void mem_cgroup_end_update_page_stat(struct page *page, - bool *locked, unsigned long *flags) -{ - if (mem_cgroup_disabled()) - return; - if (*locked) - __mem_cgroup_end_update_page_stat(page, flags); - rcu_read_unlock(); -} - -void mem_cgroup_update_page_stat(struct page *page, - enum mem_cgroup_stat_index idx, - int val); - -static inline void mem_cgroup_inc_page_stat(struct page *page, +struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, bool *locked, + unsigned long *flags); +void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked, + unsigned long flags); +void mem_cgroup_update_page_stat(struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx, int val); + +static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) { - mem_cgroup_update_page_stat(page, idx, 1); + mem_cgroup_update_page_stat(memcg, idx, 1); } -static inline void mem_cgroup_dec_page_stat(struct page *page, +static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) { - mem_cgroup_update_page_stat(page, idx, -1); + mem_cgroup_update_page_stat(memcg, idx, -1); } unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, @@ -315,13 +290,14 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { } -static inline void mem_cgroup_begin_update_page_stat(struct page *page, +static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, bool *locked, unsigned long *flags) { + return NULL; } -static inline void mem_cgroup_end_update_page_stat(struct page *page, - bool *locked, unsigned long *flags) +static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, + bool locked, unsigned long flags) { } @@ -343,12 +319,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait) return false; } -static inline void mem_cgroup_inc_page_stat(struct page *page, +static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) { } -static inline void mem_cgroup_dec_page_stat(struct page *page, +static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) { } -- cgit v1.1 From 39bb5e62867de82b269b07df900165029b928359 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Oct 2014 10:32:34 -0700 Subject: net: skb_fclone_busy() needs to detect orphaned skb Some drivers are unable to perform TX completions in a bound time. They instead call skb_orphan() Problem is skb_fclone_busy() has to detect this case, otherwise we block TCP retransmits and can freeze unlucky tcp sessions on mostly idle hosts. Signed-off-by: Eric Dumazet Fixes: 1f3279ae0c13 ("tcp: avoid retransmits of TCP packets hanging in host queues") Signed-off-by: David S. Miller --- include/linux/skbuff.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5884f95..6c8b6f6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -799,15 +799,19 @@ struct sk_buff_fclones { * @skb: buffer * * Returns true is skb is a fast clone, and its clone is not freed. + * Some drivers call skb_orphan() in their ndo_start_xmit(), + * so we also check that this didnt happen. */ -static inline bool skb_fclone_busy(const struct sk_buff *skb) +static inline bool skb_fclone_busy(const struct sock *sk, + const struct sk_buff *skb) { const struct sk_buff_fclones *fclones; fclones = container_of(skb, struct sk_buff_fclones, skb1); return skb->fclone == SKB_FCLONE_ORIG && - fclones->skb2.fclone == SKB_FCLONE_CLONE; + fclones->skb2.fclone == SKB_FCLONE_CLONE && + fclones->skb2.sk == sk; } static inline struct sk_buff *alloc_skb_fclone(unsigned int size, -- cgit v1.1 From 5188cd44c55db3e92cd9e77a40b5baa7ed4340f7 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 30 Oct 2014 18:27:17 +0000 Subject: drivers/net, ipv6: Select IPv6 fragment idents for virtio UFO packets UFO is now disabled on all drivers that work with virtio net headers, but userland may try to send UFO/IPv6 packets anyway. Instead of sending with ID=0, we should select identifiers on their behalf (as we used to). Signed-off-by: Ben Hutchings Fixes: 916e4cf46d02 ("ipv6: reuse ip6_frag_id from ip6_ufo_append_data") Signed-off-by: David S. Miller --- include/net/ipv6.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 97f4720..4292929 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -671,6 +671,8 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); } +void ipv6_proxy_select_ident(struct sk_buff *skb); + int ip6_dst_hoplimit(struct dst_entry *dst); static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, -- cgit v1.1 From b2de525f095708b2adbadaec3f1e4017a23d1e09 Mon Sep 17 00:00:00 2001 From: David Jeffery Date: Mon, 29 Sep 2014 10:21:10 -0400 Subject: Return short read or 0 at end of a raw device, not EIO Author: David Jeffery Changes to the basic direct I/O code have broken the raw driver when reading to the end of a raw device. Instead of returning a short read for a read that extends partially beyond the device's end or 0 when at the end of the device, these reads now return EIO. The raw driver needs the same end of device handling as was added for normal block devices. Using blkdev_read_iter, which has the needed size checks, prevents the EIO conditions at the end of the device. Signed-off-by: David Jeffery Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0103626..9ab779e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2469,6 +2469,7 @@ extern ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo extern ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); /* fs/block_dev.c */ +extern ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to); extern ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from); extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync); -- cgit v1.1 From 052b9498eea532deb5de75277a53f6e0623215dc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 25 Oct 2014 18:24:57 +0200 Subject: netfilter: nf_reject_ipv4: split nf_send_reset() in smaller functions That can be reused by the reject bridge expression to build the reject packet. The new functions are: * nf_reject_ip_tcphdr_get(): to sanitize and to obtain the TCP header. * nf_reject_iphdr_put(): to build the IPv4 header. * nf_reject_ip_tcphdr_put(): to build the TCP header. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_reject.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h index e842719..03e928a 100644 --- a/include/net/netfilter/ipv4/nf_reject.h +++ b/include/net/netfilter/ipv4/nf_reject.h @@ -1,6 +1,8 @@ #ifndef _IPV4_NF_REJECT_H #define _IPV4_NF_REJECT_H +#include +#include #include static inline void nf_send_unreach(struct sk_buff *skb_in, int code) @@ -10,4 +12,12 @@ static inline void nf_send_unreach(struct sk_buff *skb_in, int code) void nf_send_reset(struct sk_buff *oldskb, int hook); +const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *_oth, int hook); +struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __be16 protocol, int ttl); +void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, + const struct tcphdr *oth); + #endif /* _IPV4_NF_REJECT_H */ -- cgit v1.1 From 8bfcdf6671b1c8006c52c3eaf9fd1b5dfcf41c3d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 26 Oct 2014 12:35:54 +0100 Subject: netfilter: nf_reject_ipv6: split nf_send_reset6() in smaller functions That can be reused by the reject bridge expression to build the reject packet. The new functions are: * nf_reject_ip6_tcphdr_get(): to sanitize and to obtain the TCP header. * nf_reject_ip6hdr_put(): to build the IPv6 header. * nf_reject_ip6_tcphdr_put(): to build the TCP header. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv6/nf_reject.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h index 48e1881..23216d4 100644 --- a/include/net/netfilter/ipv6/nf_reject.h +++ b/include/net/netfilter/ipv6/nf_reject.h @@ -15,4 +15,14 @@ nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code, void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook); +const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *otcph, + unsigned int *otcplen, int hook); +struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __be16 protocol, int hoplimit); +void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + const struct tcphdr *oth, unsigned int otcplen); + #endif /* _IPV6_NF_REJECT_H */ -- cgit v1.1 From 7071cf7fc435ab84df872613f613a9a055964fc1 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Sun, 2 Nov 2014 11:31:41 -0800 Subject: uapi: add missing network related headers to kbuild The makefile for sanitizing kernel headers uses the kbuild file to determine which files to do. Several networking related headers were missing. Without these headers iproute2 build would break. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/Kbuild | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index b70237e..4c94f31 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -125,6 +125,7 @@ header-y += filter.h header-y += firewire-cdev.h header-y += firewire-constants.h header-y += flat.h +header-y += fou.h header-y += fs.h header-y += fsl_hypervisor.h header-y += fuse.h @@ -141,6 +142,7 @@ header-y += hid.h header-y += hiddev.h header-y += hidraw.h header-y += hpet.h +header-y += hsr_netlink.h header-y += hyperv.h header-y += hysdn_if.h header-y += i2c-dev.h @@ -251,6 +253,7 @@ header-y += mii.h header-y += minix_fs.h header-y += mman.h header-y += mmtimer.h +header-y += mpls.h header-y += mqueue.h header-y += mroute.h header-y += mroute6.h @@ -424,6 +427,7 @@ header-y += virtio_net.h header-y += virtio_pci.h header-y += virtio_ring.h header-y += virtio_rng.h +header=y += vm_sockets.h header-y += vt.h header-y += wait.h header-y += wanrouter.h -- cgit v1.1 From c72c553249bb73705f594e292a8f8750027fbcbe Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 27 Oct 2014 17:40:44 +0100 Subject: ARM: imx: clk-vf610: define PLL's clock tree So far, the required PLL's (PLL1/PLL2/PLL5) have been initialized by boot loader and the kernel code defined fixed rates according to those default configurations. Beginning with the USB PLL7 the code started to initialize the PLL's itself (using imx_clk_pllv3). However, since commit dc4805c2e78ba5a22ea1632f3e3e4ee601a1743b (ARM: imx: remove ENABLE and BYPASS bits from clk-pllv3 driver) imx_clk_pllv3 no longer takes care of the ENABLE and BYPASS bits, hence the USB PLL were not configured correctly anymore. This patch not only fixes those USB PLL's, but also makes use of the imx_clk_pllv3 for all PLL's and alignes the code with the PLL support of the i.MX6 series. Signed-off-by: Stefan Agner Signed-off-by: Shawn Guo --- include/dt-bindings/clock/vf610-clock.h | 39 ++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/vf610-clock.h b/include/dt-bindings/clock/vf610-clock.h index d6b56b2..801c0ac 100644 --- a/include/dt-bindings/clock/vf610-clock.h +++ b/include/dt-bindings/clock/vf610-clock.h @@ -21,24 +21,24 @@ #define VF610_CLK_FASK_CLK_SEL 8 #define VF610_CLK_AUDIO_EXT 9 #define VF610_CLK_ENET_EXT 10 -#define VF610_CLK_PLL1_MAIN 11 +#define VF610_CLK_PLL1_SYS 11 #define VF610_CLK_PLL1_PFD1 12 #define VF610_CLK_PLL1_PFD2 13 #define VF610_CLK_PLL1_PFD3 14 #define VF610_CLK_PLL1_PFD4 15 -#define VF610_CLK_PLL2_MAIN 16 +#define VF610_CLK_PLL2_BUS 16 #define VF610_CLK_PLL2_PFD1 17 #define VF610_CLK_PLL2_PFD2 18 #define VF610_CLK_PLL2_PFD3 19 #define VF610_CLK_PLL2_PFD4 20 -#define VF610_CLK_PLL3_MAIN 21 +#define VF610_CLK_PLL3_USB_OTG 21 #define VF610_CLK_PLL3_PFD1 22 #define VF610_CLK_PLL3_PFD2 23 #define VF610_CLK_PLL3_PFD3 24 #define VF610_CLK_PLL3_PFD4 25 -#define VF610_CLK_PLL4_MAIN 26 -#define VF610_CLK_PLL5_MAIN 27 -#define VF610_CLK_PLL6_MAIN 28 +#define VF610_CLK_PLL4_AUDIO 26 +#define VF610_CLK_PLL5_ENET 27 +#define VF610_CLK_PLL6_VIDEO 28 #define VF610_CLK_PLL3_MAIN_DIV 29 #define VF610_CLK_PLL4_MAIN_DIV 30 #define VF610_CLK_PLL6_MAIN_DIV 31 @@ -166,9 +166,32 @@ #define VF610_CLK_DMAMUX3 153 #define VF610_CLK_FLEXCAN0_EN 154 #define VF610_CLK_FLEXCAN1_EN 155 -#define VF610_CLK_PLL7_MAIN 156 +#define VF610_CLK_PLL7_USB_HOST 156 #define VF610_CLK_USBPHY0 157 #define VF610_CLK_USBPHY1 158 -#define VF610_CLK_END 159 +#define VF610_CLK_LVDS1_IN 159 +#define VF610_CLK_ANACLK1 160 +#define VF610_CLK_PLL1_BYPASS_SRC 161 +#define VF610_CLK_PLL2_BYPASS_SRC 162 +#define VF610_CLK_PLL3_BYPASS_SRC 163 +#define VF610_CLK_PLL4_BYPASS_SRC 164 +#define VF610_CLK_PLL5_BYPASS_SRC 165 +#define VF610_CLK_PLL6_BYPASS_SRC 166 +#define VF610_CLK_PLL7_BYPASS_SRC 167 +#define VF610_CLK_PLL1 168 +#define VF610_CLK_PLL2 169 +#define VF610_CLK_PLL3 170 +#define VF610_CLK_PLL4 171 +#define VF610_CLK_PLL5 172 +#define VF610_CLK_PLL6 173 +#define VF610_CLK_PLL7 174 +#define VF610_PLL1_BYPASS 175 +#define VF610_PLL2_BYPASS 176 +#define VF610_PLL3_BYPASS 177 +#define VF610_PLL4_BYPASS 178 +#define VF610_PLL5_BYPASS 179 +#define VF610_PLL6_BYPASS 180 +#define VF610_PLL7_BYPASS 181 +#define VF610_CLK_END 182 #endif /* __DT_BINDINGS_CLOCK_VF610_H */ -- cgit v1.1 From a87fa1d81a9fb5e9adca9820e16008c40ad09f33 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Mon, 3 Nov 2014 15:15:35 +0000 Subject: of: Fix overflow bug in string property parsing functions The string property read helpers will run off the end of the buffer if it is handed a malformed string property. Rework the parsers to make sure that doesn't happen. At the same time add new test cases to make sure the functions behave themselves. The original implementations of of_property_read_string_index() and of_property_count_strings() both open-coded the same block of parsing code, each with it's own subtly different bugs. The fix here merges functions into a single helper and makes the original functions static inline wrappers around the helper. One non-bugfix aspect of this patch is the addition of a new wrapper, of_property_read_string_array(). The new wrapper is needed by the device_properties feature that Rafael is working on and planning to merge for v3.19. The implementation is identical both with and without the new static inline wrapper, so it just got left in to reduce the churn on the header file. Signed-off-by: Grant Likely Cc: Rafael J. Wysocki Cc: Mika Westerberg Cc: Rob Herring Cc: Arnd Bergmann Cc: Darren Hart Cc: # v3.3+: Drop selftest hunks that don't apply --- include/linux/of.h | 84 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 70 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/of.h b/include/linux/of.h index 6545e7a..29f0adc 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -267,14 +267,12 @@ extern int of_property_read_u64(const struct device_node *np, extern int of_property_read_string(struct device_node *np, const char *propname, const char **out_string); -extern int of_property_read_string_index(struct device_node *np, - const char *propname, - int index, const char **output); extern int of_property_match_string(struct device_node *np, const char *propname, const char *string); -extern int of_property_count_strings(struct device_node *np, - const char *propname); +extern int of_property_read_string_helper(struct device_node *np, + const char *propname, + const char **out_strs, size_t sz, int index); extern int of_device_is_compatible(const struct device_node *device, const char *); extern int of_device_is_available(const struct device_node *device); @@ -486,15 +484,9 @@ static inline int of_property_read_string(struct device_node *np, return -ENOSYS; } -static inline int of_property_read_string_index(struct device_node *np, - const char *propname, int index, - const char **out_string) -{ - return -ENOSYS; -} - -static inline int of_property_count_strings(struct device_node *np, - const char *propname) +static inline int of_property_read_string_helper(struct device_node *np, + const char *propname, + const char **out_strs, size_t sz, int index) { return -ENOSYS; } @@ -668,6 +660,70 @@ static inline int of_property_count_u64_elems(const struct device_node *np, } /** + * of_property_read_string_array() - Read an array of strings from a multiple + * strings property. + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @out_strs: output array of string pointers. + * @sz: number of array elements to read. + * + * Search for a property in a device tree node and retrieve a list of + * terminated string values (pointer to data, not a copy) in that property. + * + * If @out_strs is NULL, the number of strings in the property is returned. + */ +static inline int of_property_read_string_array(struct device_node *np, + const char *propname, const char **out_strs, + size_t sz) +{ + return of_property_read_string_helper(np, propname, out_strs, sz, 0); +} + +/** + * of_property_count_strings() - Find and return the number of strings from a + * multiple strings property. + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * + * Search for a property in a device tree node and retrieve the number of null + * terminated string contain in it. Returns the number of strings on + * success, -EINVAL if the property does not exist, -ENODATA if property + * does not have a value, and -EILSEQ if the string is not null-terminated + * within the length of the property data. + */ +static inline int of_property_count_strings(struct device_node *np, + const char *propname) +{ + return of_property_read_string_helper(np, propname, NULL, 0, 0); +} + +/** + * of_property_read_string_index() - Find and read a string from a multiple + * strings property. + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @index: index of the string in the list of strings + * @out_string: pointer to null terminated return string, modified only if + * return value is 0. + * + * Search for a property in a device tree node and retrieve a null + * terminated string value (pointer to data, not a copy) in the list of strings + * contained in that property. + * Returns 0 on success, -EINVAL if the property does not exist, -ENODATA if + * property does not have a value, and -EILSEQ if the string is not + * null-terminated within the length of the property data. + * + * The out_string pointer is modified only if a valid string can be decoded. + */ +static inline int of_property_read_string_index(struct device_node *np, + const char *propname, + int index, const char **output) +{ + int rc = of_property_read_string_helper(np, propname, output, 1, index); + return rc < 0 ? rc : 0; +} + +/** * of_property_read_bool - Findfrom a property * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. -- cgit v1.1 From 315786ebbf4ad6552b6fd8e0e7b2ea220fcbfdbd Mon Sep 17 00:00:00 2001 From: Olav Haugan Date: Sat, 25 Oct 2014 09:55:16 -0700 Subject: iommu: Add iommu_map_sg() function Mapping and unmapping are more often than not in the critical path. map_sg allows IOMMU driver implementations to optimize the process of mapping buffers into the IOMMU page tables. Instead of mapping a buffer one page at a time and requiring potentially expensive TLB operations for each page, this function allows the driver to map all pages in one go and defer TLB maintenance until after all pages have been mapped. Additionally, the mapping operation would be faster in general since clients does not have to keep calling map API over and over again for each physically contiguous chunk of memory that needs to be mapped to a virtually contiguous region. Signed-off-by: Olav Haugan Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e6a7c9f..b29a598 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #define IOMMU_READ (1 << 0) @@ -97,6 +98,8 @@ enum iommu_attr { * @detach_dev: detach device from an iommu domain * @map: map a physically contiguous memory region to an iommu domain * @unmap: unmap a physically contiguous memory region from an iommu domain + * @map_sg: map a scatter-gather list of physically contiguous memory chunks + * to an iommu domain * @iova_to_phys: translate iova to physical address * @add_device: add device to iommu grouping * @remove_device: remove device from iommu grouping @@ -114,6 +117,8 @@ struct iommu_ops { phys_addr_t paddr, size_t size, int prot); size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, size_t size); + size_t (*map_sg)(struct iommu_domain *domain, unsigned long iova, + struct scatterlist *sg, unsigned int nents, int prot); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); int (*add_device)(struct device *dev); void (*remove_device)(struct device *dev); @@ -156,6 +161,9 @@ extern int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size); +extern size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova, + struct scatterlist *sg,unsigned int nents, + int prot); extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova); extern void iommu_set_fault_handler(struct iommu_domain *domain, iommu_fault_handler_t handler, void *token); @@ -241,6 +249,13 @@ static inline int report_iommu_fault(struct iommu_domain *domain, return ret; } +static inline size_t iommu_map_sg(struct iommu_domain *domain, + unsigned long iova, struct scatterlist *sg, + unsigned int nents, int prot) +{ + return domain->ops->map_sg(domain, iova, sg, nents, prot); +} + #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; @@ -293,6 +308,13 @@ static inline int iommu_unmap(struct iommu_domain *domain, unsigned long iova, return -ENODEV; } +static inline size_t iommu_map_sg(struct iommu_domain *domain, + unsigned long iova, struct scatterlist *sg, + unsigned int nents, int prot) +{ + return -ENODEV; +} + static inline int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t paddr, u64 size, int prot) -- cgit v1.1 From 32f638fc11db0526c706454d9ab4339d55ac89f3 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 30 Oct 2014 10:17:25 -0600 Subject: PCI: Don't oops on virtual buses in acpi_pci_get_bridge_handle() acpi_pci_get_bridge_handle() returns the ACPI handle for the bridge device (either a host bridge or a PCI-to-PCI bridge) leading to a PCI bus. But SR-IOV virtual functions can be on a virtual bus with no bridge leading to it. Return a NULL acpi_handle in this case instead of trying to dereference the NULL pointer to the bridge. This fixes a NULL pointer dereference oops in pci_get_hp_params() when adding SR-IOV VF devices on virtual buses. [bhelgaas: changelog, add comment in code] Fixes: 6cd33649fa83 ("PCI: Add pci_configure_device() during enumeration") Link: https://bugzilla.kernel.org/show_bug.cgi?id=87591 Reported-by: Chao Zhou Reported-by: Joerg Roedel Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas --- include/linux/pci-acpi.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 64dacb7..24c7728 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -41,8 +41,13 @@ static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus) if (pci_is_root_bus(pbus)) dev = pbus->bridge; - else + else { + /* If pbus is a virtual bus, there is no bridge to it */ + if (!pbus->self) + return NULL; + dev = &pbus->self->dev; + } return ACPI_HANDLE(dev); } -- cgit v1.1 From 9cdb5dbf79f4e8f43e19ab7f4ec9ed74c146f0af Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 5 Nov 2014 21:44:27 +0100 Subject: include/linux/socket.h: Fix comment File descriptors are always closed on exit :-) Signed-off-by: Rasmus Villemoes Signed-off-by: David S. Miller --- include/linux/socket.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/socket.h b/include/linux/socket.h index ec538fc2..bb9b836 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -256,7 +256,7 @@ struct ucred { #define MSG_EOF MSG_FIN #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ -#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exit for file +#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file descriptor received through SCM_RIGHTS */ #if defined(CONFIG_COMPAT) -- cgit v1.1 From 66f1c44887ba4f47d617f8ae21cf8e04e1892bd7 Mon Sep 17 00:00:00 2001 From: Gregory Fong Date: Tue, 4 Nov 2014 11:21:21 -0800 Subject: bridge: include in6.h in if_bridge.h for struct in6_addr if_bridge.h uses struct in6_addr ip6, but wasn't including the in6.h header. Thomas Backlund originally sent a patch to do this, but this revealed a redefinition issue: https://lkml.org/lkml/2013/1/13/116 The redefinition issue should have been fixed by the following Linux commits: ee262ad827f89e2dc7851ec2986953b5b125c6bc inet: defines IPPROTO_* needed for module alias generation cfd280c91253cc28e4919e349fa7a813b63e71e8 net: sync some IP headers with glibc and the following glibc commit: 6c82a2f8d7c8e21e39237225c819f182ae438db3 Coordinate IPv6 definitions for Linux and glibc so actually include the header now. Reported-by: Colin Guthrie Reported-by: Christiaan Welvaart Reported-by: Thomas Backlund Cc: Florian Fainelli Cc: Cong Wang Cc: David Miller Signed-off-by: Gregory Fong Acked-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 39f621a..da17e45 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -15,6 +15,7 @@ #include #include +#include #define SYSFS_BRIDGE_ATTR "bridge" #define SYSFS_BRIDGE_FDB "brforward" -- cgit v1.1 From 5816c3dafb6c63fd5c7b9f3f707c8565811d9916 Mon Sep 17 00:00:00 2001 From: Ryo Munakata Date: Wed, 5 Nov 2014 23:45:58 +0900 Subject: net/9p: remove a comment about pref member which doesn't exist Signed-off-by: Ryo Munakata Signed-off-by: David S. Miller --- include/net/9p/transport.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h index d9fa68f..2a25dec 100644 --- a/include/net/9p/transport.h +++ b/include/net/9p/transport.h @@ -34,7 +34,6 @@ * @list: used to maintain a list of currently available transports * @name: the human-readable name of the transport * @maxsize: transport provided maximum packet size - * @pref: Preferences of this transport * @def: set if this transport should be considered the default * @create: member function to create a new connection on this transport * @close: member function to discard a connection on this transport -- cgit v1.1 From c16561e8df7a64764ef61f02221e98273add325a Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 6 Nov 2014 00:37:08 +0100 Subject: PM / Domains: Change prototype for the attach and detach callbacks Convert the prototypes to return an int in order to support error handling in these callbacks. Also, as suggested by Dmitry Torokhov, pass the domain pointer for use inside the callbacks, and so that they match the existing power_on/power_off callbacks which currently take the domain pointer. Acked-by: Dmitry Torokhov Acked-by: Geert Uytterhoeven Signed-off-by: Ulf Hansson [ khilman: added domain as parameter to callbacks, as suggested by Dmitry ] Signed-off-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 73e938b..b3ed776 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -72,8 +72,10 @@ struct generic_pm_domain { bool max_off_time_changed; bool cached_power_down_ok; struct gpd_cpuidle_data *cpuidle_data; - void (*attach_dev)(struct device *dev); - void (*detach_dev)(struct device *dev); + int (*attach_dev)(struct generic_pm_domain *domain, + struct device *dev); + void (*detach_dev)(struct generic_pm_domain *domain, + struct device *dev); }; static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd) -- cgit v1.1 From c0acb8144bd6d8d88aee1dab33364b7353e9a903 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 10 Oct 2014 12:48:35 +0200 Subject: mfd: max77693: Fix always masked MUIC interrupts All interrupts coming from MUIC were ignored because interrupt source register was masked. The Maxim 77693 has a "interrupt source" - a separate register and interrupts which give information about PMIC block triggering the individual interrupt (charger, topsys, MUIC, flash LED). By default bootloader could initialize this register to "mask all" value. In such case (observed on Trats2 board) MUIC interrupts won't be generated regardless of their mask status. Regmap irq chip was unmasking individual MUIC interrupts but the source was masked Before introducing regmap irq chip this interrupt source was unmasked, read and acked. Reading and acking is not necessary but unmasking is. Fixes: 342d669c1ee4 ("mfd: max77693: Handle IRQs using regmap") Cc: Signed-off-by: Krzysztof Kozlowski Reviewed-by: Chanwoo Choi Signed-off-by: Lee Jones --- include/linux/mfd/max77693-private.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index fc17d56..582e67f 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -330,6 +330,13 @@ enum max77693_irq_source { MAX77693_IRQ_GROUP_NR, }; +#define SRC_IRQ_CHARGER BIT(0) +#define SRC_IRQ_TOP BIT(1) +#define SRC_IRQ_FLASH BIT(2) +#define SRC_IRQ_MUIC BIT(3) +#define SRC_IRQ_ALL (SRC_IRQ_CHARGER | SRC_IRQ_TOP \ + | SRC_IRQ_FLASH | SRC_IRQ_MUIC) + #define LED_IRQ_FLED2_OPEN BIT(0) #define LED_IRQ_FLED2_SHORT BIT(1) #define LED_IRQ_FLED1_OPEN BIT(2) -- cgit v1.1 From cfdf1e1ba5bf55e095cf4bcaa9585c4759f239e8 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 10 Nov 2014 11:45:13 -0800 Subject: udptunnel: Add SKB_GSO_UDP_TUNNEL during gro_complete. When doing GRO processing for UDP tunnels, we never add SKB_GSO_UDP_TUNNEL to gso_type - only the type of the inner protocol is added (such as SKB_GSO_TCPV4). The result is that if the packet is later resegmented we will do GSO but not treat it as a tunnel. This results in UDP fragmentation of the outer header instead of (i.e.) TCP segmentation of the inner header as was originally on the wire. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- include/net/udp_tunnel.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index a47790b..2a50a70 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -100,6 +100,15 @@ static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb, return iptunnel_handle_offloads(skb, udp_csum, type); } +static inline void udp_tunnel_gro_complete(struct sk_buff *skb, int nhoff) +{ + struct udphdr *uh; + + uh = (struct udphdr *)(skb->data + nhoff - sizeof(struct udphdr)); + skb_shinfo(skb)->gso_type |= uh->check ? + SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; +} + static inline void udp_tunnel_encap_enable(struct socket *sock) { #if IS_ENABLED(CONFIG_IPV6) -- cgit v1.1 From e30f53aad2202b5526c40c36d8eeac8bf290bde5 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Mon, 10 Nov 2014 19:46:34 +0100 Subject: tracing: Do not busy wait in buffer splice On a !PREEMPT kernel, attempting to use trace-cmd results in a soft lockup: # trace-cmd record -e raw_syscalls:* -F false NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [trace-cmd:61] ... Call Trace: [] ? __wake_up_common+0x90/0x90 [] wait_on_pipe+0x35/0x40 [] tracing_buffers_splice_read+0x2e3/0x3c0 [] ? tracing_stats_read+0x2a0/0x2a0 [] ? _raw_spin_unlock+0x2b/0x40 [] ? do_read_fault+0x21b/0x290 [] ? handle_mm_fault+0x2ba/0xbd0 [] ? trace_event_buffer_lock_reserve+0x40/0x80 [] ? trace_buffer_lock_reserve+0x22/0x60 [] ? trace_event_buffer_lock_reserve+0x40/0x80 [] do_splice_to+0x6d/0x90 [] SyS_splice+0x7c1/0x800 [] tracesys_phase2+0xd3/0xd8 The problem is this: tracing_buffers_splice_read() calls ring_buffer_wait() to wait for data in the ring buffers. The buffers are not empty so ring_buffer_wait() returns immediately. But tracing_buffers_splice_read() calls ring_buffer_read_page() with full=1, meaning it only wants to read a full page. When the full page is not available, tracing_buffers_splice_read() tries to wait again with ring_buffer_wait(), which again returns immediately, and so on. Fix this by adding a "full" argument to ring_buffer_wait() which will make ring_buffer_wait() wait until the writer has left the reader's page, i.e. until full-page reads will succeed. Link: http://lkml.kernel.org/r/1415645194-25379-1-git-send-email-rabin@rab.in Cc: stable@vger.kernel.org # 3.16+ Fixes: b1169cc69ba9 ("tracing: Remove mock up poll wait function") Signed-off-by: Rabin Vincent Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 49a4d6f..e2c13cd 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -97,7 +97,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k __ring_buffer_alloc((size), (flags), &__key); \ }) -int ring_buffer_wait(struct ring_buffer *buffer, int cpu); +int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full); int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table); -- cgit v1.1 From 73b3a6657a88ef5348a0d69c9a8107d6f01ae862 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 3 Nov 2014 12:09:52 +0200 Subject: pinctrl: dra: dt-bindings: Fix output pull up/down For PIN_OUTPUT_PULLUP and PIN_OUTPUT_PULLDOWN we must not set the PULL_DIS bit which disables the PULLs. PULL_ENA is a 0 and using it in an OR operation is a NOP, so don't use it in the PIN_OUTPUT_PULLUP/DOWN macros. Fixes: 23d9cec07c58 ("pinctrl: dra: dt-bindings: Fix pull enable/disable") Signed-off-by: Roger Quadros Acked-by: Nishanth Menon Signed-off-by: Tony Lindgren --- include/dt-bindings/pinctrl/dra.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/pinctrl/dra.h b/include/dt-bindings/pinctrl/dra.h index 3d33794..7448edf 100644 --- a/include/dt-bindings/pinctrl/dra.h +++ b/include/dt-bindings/pinctrl/dra.h @@ -40,8 +40,8 @@ /* Active pin states */ #define PIN_OUTPUT (0 | PULL_DIS) -#define PIN_OUTPUT_PULLUP (PIN_OUTPUT | PULL_ENA | PULL_UP) -#define PIN_OUTPUT_PULLDOWN (PIN_OUTPUT | PULL_ENA) +#define PIN_OUTPUT_PULLUP (PULL_UP) +#define PIN_OUTPUT_PULLDOWN (0) #define PIN_INPUT (INPUT_EN | PULL_DIS) #define PIN_INPUT_SLEW (INPUT_EN | SLEWCONTROL) #define PIN_INPUT_PULLUP (PULL_ENA | INPUT_EN | PULL_UP) -- cgit v1.1 From 67732cd34382066ae5df313b6dad65ab14b9735f Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 11 Nov 2014 11:07:08 +0100 Subject: PM / Domains: Fix initial default state of the need_restore flag The initial state of the device's need_restore flag should'nt depend on the current state of the PM domain. For example it should be perfectly valid to attach an inactive device to a powered PM domain. The pm_genpd_dev_need_restore() API allow us to update the need_restore flag to somewhat cope with such scenarios. Typically that should have been done from drivers/buses ->probe() since it's those that put the requirements on the value of the need_restore flag. Until recently, the Exynos SOCs were the only user of the pm_genpd_dev_need_restore() API, though invoking it from a centralized location while adding devices to their PM domains. Due to that Exynos now have swithed to the generic OF-based PM domain look-up, it's no longer possible to invoke the API from a centralized location. The reason is because devices are now added to their PM domains during the probe sequence. Commit "ARM: exynos: Move to generic PM domain DT bindings" did the switch for Exynos to the generic OF-based PM domain look-up, but it also removed the call to pm_genpd_dev_need_restore(). This caused a regression for some of the Exynos drivers. To handle things more properly in the generic PM domain, let's change the default initial value of the need_restore flag to reflect that the state is unknown. As soon as some of the runtime PM callbacks gets invoked, update the initial value accordingly. Moreover, since the generic PM domain is verifying that all devices are both runtime PM enabled and suspended, using pm_runtime_suspended() while pm_genpd_poweroff() is invoked from the scheduled work, we can be sure of that the PM domain won't be powering off while having active devices. Do note that, the generic PM domain can still only know about active devices which has been activated through invoking its runtime PM resume callback. In other words, buses/drivers using pm_runtime_set_active() during ->probe() will still suffer from a race condition, potentially probing a device without having its PM domain being powered. That issue will have to be solved using a different approach. This a log from the boot regression for Exynos5, which is being fixed in this patch. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 308 at ../drivers/clk/clk.c:851 clk_disable+0x24/0x30() Modules linked in: CPU: 0 PID: 308 Comm: kworker/0:1 Not tainted 3.18.0-rc3-00569-gbd9449f-dirty #10 Workqueue: pm pm_runtime_work [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x70/0xbc) [] (dump_stack) from [] (warn_slowpath_common+0x64/0x88) [] (warn_slowpath_common) from [] (warn_slowpath_null+0x1c/0x24) [] (warn_slowpath_null) from [] (clk_disable+0x24/0x30) [] (clk_disable) from [] (gsc_runtime_suspend+0x128/0x160) [] (gsc_runtime_suspend) from [] (pm_generic_runtime_suspend+0x2c/0x38) [] (pm_generic_runtime_suspend) from [] (pm_genpd_default_save_state+0x2c/0x8c) [] (pm_genpd_default_save_state) from [] (pm_genpd_poweroff+0x224/0x3ec) [] (pm_genpd_poweroff) from [] (pm_genpd_runtime_suspend+0x9c/0xcc) [] (pm_genpd_runtime_suspend) from [] (__rpm_callback+0x2c/0x60) [] (__rpm_callback) from [] (rpm_callback+0x20/0x74) [] (rpm_callback) from [] (rpm_suspend+0xd4/0x43c) [] (rpm_suspend) from [] (pm_runtime_work+0x80/0x90) [] (pm_runtime_work) from [] (process_one_work+0x12c/0x314) [] (process_one_work) from [] (worker_thread+0x3c/0x4b0) [] (worker_thread) from [] (kthread+0xcc/0xe8) [] (kthread) from [] (ret_from_fork+0x14/0x3c) ---[ end trace 40cd58bcd6988f12 ]--- Fixes: a4a8c2c4962bb655 (ARM: exynos: Move to generic PM domain DT bindings) Reported-and-tested0by: Sylwester Nawrocki Reviewed-by: Sylwester Nawrocki Reviewed-by: Kevin Hilman Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index b3ed776..2e0e06d 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -106,7 +106,7 @@ struct generic_pm_domain_data { struct notifier_block nb; struct mutex lock; unsigned int refcount; - bool need_restore; + int need_restore; }; #ifdef CONFIG_PM_GENERIC_DOMAINS -- cgit v1.1 From 8c393f9a721c30a030049a680e1bf896669bb279 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Wed, 5 Nov 2014 22:36:50 +0800 Subject: nfs: fix pnfs direct write memory leak For pNFS direct writes, layout driver may dynamically allocate ds_cinfo.buckets. So we need to take care to free them when freeing dreq. Ideally this needs to be done inside layout driver where ds_cinfo.buckets are allocated. But buckets are attached to dreq and reused across LD IO iterations. So I feel it's OK to free them in the generic layer. Cc: stable@vger.kernel.org [v3.4+] Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 983876f..47ebb4f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1224,11 +1224,22 @@ struct nfs41_free_stateid_res { unsigned int status; }; +static inline void +nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo) +{ + kfree(cinfo->buckets); +} + #else struct pnfs_ds_commit_info { }; +static inline void +nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo) +{ +} + #endif /* CONFIG_NFS_V4_1 */ #ifdef CONFIG_NFS_V4_2 -- cgit v1.1 From ad53f92eb416d81e469fa8ea57153e59455e7175 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Thu, 13 Nov 2014 15:19:11 -0800 Subject: mm/page_alloc: fix incorrect isolation behavior by rechecking migratetype Before describing bugs itself, I first explain definition of freepage. 1. pages on buddy list are counted as freepage. 2. pages on isolate migratetype buddy list are *not* counted as freepage. 3. pages on cma buddy list are counted as CMA freepage, too. Now, I describe problems and related patch. Patch 1: There is race conditions on getting pageblock migratetype that it results in misplacement of freepages on buddy list, incorrect freepage count and un-availability of freepage. Patch 2: Freepages on pcp list could have stale cached information to determine migratetype of buddy list to go. This causes misplacement of freepages on buddy list and incorrect freepage count. Patch 4: Merging between freepages on different migratetype of pageblocks will cause freepages accouting problem. This patch fixes it. Without patchset [3], above problem doesn't happens on my CMA allocation test, because CMA reserved pages aren't used at all. So there is no chance for above race. With patchset [3], I did simple CMA allocation test and get below result: - Virtual machine, 4 cpus, 1024 MB memory, 256 MB CMA reservation - run kernel build (make -j16) on background - 30 times CMA allocation(8MB * 30 = 240MB) attempts in 5 sec interval - Result: more than 5000 freepage count are missed With patchset [3] and this patchset, I found that no freepage count are missed so that I conclude that problems are solved. On my simple memory offlining test, these problems also occur on that environment, too. This patch (of 4): There are two paths to reach core free function of buddy allocator, __free_one_page(), one is free_one_page()->__free_one_page() and the other is free_hot_cold_page()->free_pcppages_bulk()->__free_one_page(). Each paths has race condition causing serious problems. At first, this patch is focused on first type of freepath. And then, following patch will solve the problem in second type of freepath. In the first type of freepath, we got migratetype of freeing page without holding the zone lock, so it could be racy. There are two cases of this race. 1. pages are added to isolate buddy list after restoring orignal migratetype CPU1 CPU2 get migratetype => return MIGRATE_ISOLATE call free_one_page() with MIGRATE_ISOLATE grab the zone lock unisolate pageblock release the zone lock grab the zone lock call __free_one_page() with MIGRATE_ISOLATE freepage go into isolate buddy list, although pageblock is already unisolated This may cause two problems. One is that we can't use this page anymore until next isolation attempt of this pageblock, because freepage is on isolate buddy list. The other is that freepage accouting could be wrong due to merging between different buddy list. Freepages on isolate buddy list aren't counted as freepage, but ones on normal buddy list are counted as freepage. If merge happens, buddy freepage on normal buddy list is inevitably moved to isolate buddy list without any consideration of freepage accouting so it could be incorrect. 2. pages are added to normal buddy list while pageblock is isolated. It is similar with above case. This also may cause two problems. One is that we can't keep these freepages from being allocated. Although this pageblock is isolated, freepage would be added to normal buddy list so that it could be allocated without any restriction. And the other problem is same as case 1, that it, incorrect freepage accouting. This race condition would be prevented by checking migratetype again with holding the zone lock. Because it is somewhat heavy operation and it isn't needed in common case, we want to avoid rechecking as much as possible. So this patch introduce new variable, nr_isolate_pageblock in struct zone to check if there is isolated pageblock. With this, we can avoid to re-check migratetype in common case and do it only if there is isolated pageblock or migratetype is MIGRATE_ISOLATE. This solve above mentioned problems. Changes from v3: Add one more check in free_one_page() that checks whether migratetype is MIGRATE_ISOLATE or not. Without this, abovementioned case 1 could happens. Signed-off-by: Joonsoo Kim Acked-by: Minchan Kim Acked-by: Michal Nazarewicz Acked-by: Vlastimil Babka Cc: "Kirill A. Shutemov" Cc: Mel Gorman Cc: Johannes Weiner Cc: Yasuaki Ishimatsu Cc: Zhang Yanfei Cc: Tang Chen Cc: Naoya Horiguchi Cc: Bartlomiej Zolnierkiewicz Cc: Wen Congyang Cc: Marek Szyprowski Cc: Laura Abbott Cc: Heesub Shin Cc: "Aneesh Kumar K.V" Cc: Ritesh Harjani Cc: Gioh Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 9 +++++++++ include/linux/page-isolation.h | 8 ++++++++ 2 files changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 48bf12e..ffe66e3 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -431,6 +431,15 @@ struct zone { */ int nr_migrate_reserve_block; +#ifdef CONFIG_MEMORY_ISOLATION + /* + * Number of isolated pageblock. It is used to solve incorrect + * freepage counting problem due to racy retrieving migratetype + * of pageblock. Protected by zone->lock. + */ + unsigned long nr_isolate_pageblock; +#endif + #ifdef CONFIG_MEMORY_HOTPLUG /* see spanned/present_pages for more description */ seqlock_t span_seqlock; diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 3fff8e7..2dc1e16 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -2,6 +2,10 @@ #define __LINUX_PAGEISOLATION_H #ifdef CONFIG_MEMORY_ISOLATION +static inline bool has_isolate_pageblock(struct zone *zone) +{ + return zone->nr_isolate_pageblock; +} static inline bool is_migrate_isolate_page(struct page *page) { return get_pageblock_migratetype(page) == MIGRATE_ISOLATE; @@ -11,6 +15,10 @@ static inline bool is_migrate_isolate(int migratetype) return migratetype == MIGRATE_ISOLATE; } #else +static inline bool has_isolate_pageblock(struct zone *zone) +{ + return false; +} static inline bool is_migrate_isolate_page(struct page *page) { return false; -- cgit v1.1 From f784a3f19613901ca4539a5b0eed3bdc700e6ee7 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Thu, 13 Nov 2014 15:19:39 -0800 Subject: mem-hotplug: reset node managed pages when hot-adding a new pgdat In free_area_init_core(), zone->managed_pages is set to an approximate value for lowmem, and will be adjusted when the bootmem allocator frees pages into the buddy system. But free_area_init_core() is also called by hotadd_new_pgdat() when hot-adding memory. As a result, zone->managed_pages of the newly added node's pgdat is set to an approximate value in the very beginning. Even if the memory on that node has node been onlined, /sys/device/system/node/nodeXXX/meminfo has wrong value: hot-add node2 (memory not onlined) cat /sys/device/system/node/node2/meminfo Node 2 MemTotal: 33554432 kB Node 2 MemFree: 0 kB Node 2 MemUsed: 33554432 kB Node 2 Active: 0 kB This patch fixes this problem by reset node managed pages to 0 after hot-adding a new node. 1. Move reset_managed_pages_done from reset_node_managed_pages() to reset_all_zones_managed_pages() 2. Make reset_node_managed_pages() non-static 3. Call reset_node_managed_pages() in hotadd_new_pgdat() after pgdat is initialized Signed-off-by: Tang Chen Signed-off-by: Yasuaki Ishimatsu Cc: [3.16+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 4e2bd4c..0995c2d 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -46,6 +46,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat, extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); extern unsigned long free_all_bootmem(void); +extern void reset_node_managed_pages(pg_data_t *pgdat); extern void reset_all_zones_managed_pages(void); extern void free_bootmem_node(pg_data_t *pgdat, -- cgit v1.1 From a720b41c41f5a7e4c51558cf087882c57331581f Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 13 Oct 2014 14:06:16 +0100 Subject: iommu/arm-smmu: change IOMMU_EXEC to IOMMU_NOEXEC Exposing the XN flag of the SMMU driver as IOMMU_NOEXEC instead of IOMMU_EXEC makes it enforceable, since for IOMMUs that don't support the XN flag pages will always be executable. Signed-off-by: Antonios Motakis Acked-by: Joerg Roedel Signed-off-by: Will Deacon --- include/linux/iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e6a7c9f..f47383a 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -27,7 +27,7 @@ #define IOMMU_READ (1 << 0) #define IOMMU_WRITE (1 << 1) #define IOMMU_CACHE (1 << 2) /* DMA cache coherency */ -#define IOMMU_EXEC (1 << 3) +#define IOMMU_NOEXEC (1 << 3) struct iommu_ops; struct iommu_group; -- cgit v1.1 From c49866493b1ffb7c0a7963a1e3c0094e78760184 Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 13 Oct 2014 14:06:17 +0100 Subject: iommu: add capability IOMMU_CAP_NOEXEC Some IOMMUs accept an IOMMU_NOEXEC protection flag in addition to IOMMU_READ and IOMMU_WRITE. Expose this as an IOMMU capability. Signed-off-by: Antonios Motakis Acked-by: Joerg Roedel Signed-off-by: Will Deacon --- include/linux/iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index f47383a..e438b30 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -61,6 +61,7 @@ enum iommu_cap { IOMMU_CAP_CACHE_COHERENCY, /* IOMMU can enforce cache coherent DMA transactions */ IOMMU_CAP_INTR_REMAP, /* IOMMU supports interrupt isolation */ + IOMMU_CAP_NOEXEC, /* IOMMU_NOEXEC flag */ }; /* -- cgit v1.1 From c2a0b538d2c778aef7bf2fbe7973229192c9a392 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Sun, 9 Nov 2014 22:47:56 +0800 Subject: iommu/vt-d: Introduce helper function dmar_walk_resources() Introduce helper function dmar_walk_resources to walk resource entries in DMAR table and ACPI buffer object returned by ACPI _DSM method for IOMMU hot-plug. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- include/linux/dmar.h | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 593fff9..495df5e 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -121,22 +121,21 @@ extern int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, extern int detect_intel_iommu(void); extern int enable_drhd_fault_handling(void); +static inline int dmar_res_noop(struct acpi_dmar_header *hdr, void *arg) +{ + return 0; +} + #ifdef CONFIG_INTEL_IOMMU extern int iommu_detected, no_iommu; extern int intel_iommu_init(void); -extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header); -extern int dmar_parse_one_atsr(struct acpi_dmar_header *header); +extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg); +extern int dmar_parse_one_atsr(struct acpi_dmar_header *header, void *arg); extern int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info); #else /* !CONFIG_INTEL_IOMMU: */ static inline int intel_iommu_init(void) { return -ENODEV; } -static inline int dmar_parse_one_rmrr(struct acpi_dmar_header *header) -{ - return 0; -} -static inline int dmar_parse_one_atsr(struct acpi_dmar_header *header) -{ - return 0; -} +#define dmar_parse_one_rmrr dmar_res_noop +#define dmar_parse_one_atsr dmar_res_noop static inline int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) { return 0; -- cgit v1.1 From 78d8e7046111425bb688cddc4303d79cb0f0d281 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Sun, 9 Nov 2014 22:47:57 +0800 Subject: iommu/vt-d: Dynamically allocate and free seq_id for DMAR units Introduce functions to support dynamic IOMMU seq_id allocating and releasing, which will be used to support DMAR hotplug. Also rename IOMMU_UNITS_SUPPORTED as DMAR_UNITS_SUPPORTED. Signed-off-by: Jiang Liu Reviewed-by: Yijing Wang Signed-off-by: Joerg Roedel --- include/linux/dmar.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 495df5e..725204f 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -30,6 +30,12 @@ struct acpi_dmar_header; +#ifdef CONFIG_X86 +# define DMAR_UNITS_SUPPORTED MAX_IO_APICS +#else +# define DMAR_UNITS_SUPPORTED 64 +#endif + /* DMAR Flags */ #define DMAR_INTR_REMAP 0x1 #define DMAR_X2APIC_OPT_OUT 0x2 -- cgit v1.1 From 6b1972493a84f8fe13ff9d202745590f6c53d670 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Sun, 9 Nov 2014 22:47:58 +0800 Subject: iommu/vt-d: Implement DMAR unit hotplug framework On Intel platforms, an IO Hub (PCI/PCIe host bridge) may contain DMAR units, so we need to support DMAR hotplug when supporting PCI host bridge hotplug on Intel platforms. According to Section 8.8 "Remapping Hardware Unit Hot Plug" in "Intel Virtualization Technology for Directed IO Architecture Specification Rev 2.2", ACPI BIOS should implement ACPI _DSM method under the ACPI object for the PCI host bridge to support DMAR hotplug. This patch introduces interfaces to parse ACPI _DSM method for DMAR unit hotplug. It also implements state machines for DMAR unit hot-addition and hot-removal. The PCI host bridge hotplug driver should call dmar_hotplug_hotplug() before scanning PCI devices connected for hot-addition and after destroying all PCI devices for hot-removal. Signed-off-by: Jiang Liu Reviewed-by: Yijing Wang Signed-off-by: Joerg Roedel --- include/linux/dmar.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 725204f..3062495 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -126,6 +126,8 @@ extern int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, /* Intel IOMMU detection */ extern int detect_intel_iommu(void); extern int enable_drhd_fault_handling(void); +extern int dmar_device_add(acpi_handle handle); +extern int dmar_device_remove(acpi_handle handle); static inline int dmar_res_noop(struct acpi_dmar_header *hdr, void *arg) { @@ -137,17 +139,48 @@ extern int iommu_detected, no_iommu; extern int intel_iommu_init(void); extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg); extern int dmar_parse_one_atsr(struct acpi_dmar_header *header, void *arg); +extern int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg); +extern int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg); +extern int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert); extern int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info); #else /* !CONFIG_INTEL_IOMMU: */ static inline int intel_iommu_init(void) { return -ENODEV; } + #define dmar_parse_one_rmrr dmar_res_noop #define dmar_parse_one_atsr dmar_res_noop +#define dmar_check_one_atsr dmar_res_noop +#define dmar_release_one_atsr dmar_res_noop + static inline int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) { return 0; } + +static inline int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert) +{ + return 0; +} #endif /* CONFIG_INTEL_IOMMU */ +#ifdef CONFIG_IRQ_REMAP +extern int dmar_ir_hotplug(struct dmar_drhd_unit *dmaru, bool insert); +#else /* CONFIG_IRQ_REMAP */ +static inline int dmar_ir_hotplug(struct dmar_drhd_unit *dmaru, bool insert) +{ return 0; } +#endif /* CONFIG_IRQ_REMAP */ + +#else /* CONFIG_DMAR_TABLE */ + +static inline int dmar_device_add(void *handle) +{ + return 0; +} + +static inline int dmar_device_remove(void *handle) +{ + return 0; +} + #endif /* CONFIG_DMAR_TABLE */ struct irte { -- cgit v1.1