From 3b5d56b9317fa7b5407dff1aa7b115bf6cdbd494 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Sat, 10 Mar 2012 14:37:26 -0500 Subject: kvmclock: Add functions to check if the host has stopped the vm When a host stops or suspends a VM it will set a flag to show this. The watchdog will use these functions to determine if a softlockup is real, or the result of a suspended VM. Signed-off-by: Eric B Munson asm-generic changes Acked-by: Arnd Bergmann Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- include/asm-generic/kvm_para.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/asm-generic/kvm_para.h (limited to 'include/asm-generic') diff --git a/include/asm-generic/kvm_para.h b/include/asm-generic/kvm_para.h new file mode 100644 index 0000000..05ef7e7 --- /dev/null +++ b/include/asm-generic/kvm_para.h @@ -0,0 +1,14 @@ +#ifndef _ASM_GENERIC_KVM_PARA_H +#define _ASM_GENERIC_KVM_PARA_H + + +/* + * This function is used by architectures that support kvm to avoid issuing + * false soft lockup messages. + */ +static inline bool kvm_check_and_clear_guest_paused(void) +{ + return false; +} + +#endif -- cgit v1.1 From 1f15d10984c854e077da5aa1a23f901496b49773 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 20 Apr 2012 18:21:46 -0300 Subject: KVM: add kvm_arch_para_features stub to asm-generic/kvm_para.h Needed by kvm_para_has_feature(). Reported-by: Stephen Rothwell Signed-off-by: Marcelo Tosatti --- include/asm-generic/kvm_para.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/asm-generic') diff --git a/include/asm-generic/kvm_para.h b/include/asm-generic/kvm_para.h index 05ef7e7..9a7bbad 100644 --- a/include/asm-generic/kvm_para.h +++ b/include/asm-generic/kvm_para.h @@ -11,4 +11,9 @@ static inline bool kvm_check_and_clear_guest_paused(void) return false; } +static inline unsigned int kvm_arch_para_features(void) +{ + return 0; +} + #endif -- cgit v1.1 From 09d71ff19404b3957fab6de942fb8026ccfd8524 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 2 May 2012 12:46:46 +0100 Subject: gpiolib: Implement devm_gpio_request_one() Allow drivers to use the modern request and configure idiom together with devres. As with plain gpio_request() and gpio_request_one() we can't implement the old school version in terms of _one() as this would force the explicit selection of a direction in gpio_request() which could break systems if we pick the wrong one. Implementing devm_gpio_request_one() in terms of devm_gpio_request() would needlessly complicate things or lead to duplication from the unmanaged version depending on how it's done. Signed-off-by: Mark Brown Acked-by: Linus Walleij Signed-off-by: Grant Likely --- include/asm-generic/gpio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/asm-generic') diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index 5f52690..4ead123 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -179,6 +179,8 @@ extern void gpio_free_array(const struct gpio *array, size_t num); /* bindings for managed devices that want to request gpios */ int devm_gpio_request(struct device *dev, unsigned gpio, const char *label); +int devm_gpio_request_one(struct device *dev, unsigned gpio, + unsigned long flags, const char *label); void devm_gpio_free(struct device *dev, unsigned int gpio); #ifdef CONFIG_GPIO_SYSFS -- cgit v1.1 From 3d0f7cf0f3633f92ddeb767eb59cab73963d4dee Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 17 May 2012 13:54:40 -0600 Subject: gpio: Adjust of_xlate API to support multiple GPIO chips This patch changes the of_xlate API to make it possible for multiple gpio_chips to refer to the same device tree node. This is useful for banked GPIO controllers that use multiple gpio_chips for a single device. With this change the core code will try calling of_xlate on each gpio_chip that references the device_node and will return the gpio number for the first one to return 'true'. Tested-by: Roland Stigge Acked-by: Arnd Bergmann Signed-off-by: Grant Likely --- include/asm-generic/gpio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index 4ead123..1ba08f0 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -144,7 +144,7 @@ extern int gpiochip_add(struct gpio_chip *chip); extern int __must_check gpiochip_remove(struct gpio_chip *chip); extern struct gpio_chip *gpiochip_find(const void *data, int (*match)(struct gpio_chip *chip, - const void *data)); + void *data)); /* Always use the library code for GPIO management calls, -- cgit v1.1 From 07ce8ec7308ab3fa55fe2861671b157f857fff58 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Fri, 18 May 2012 23:01:05 -0600 Subject: gpiolib: Remove 'const' from data argument of gpiochip_find() Commit 3d0f7cf0 "gpio: Adjust of_xlate API to support multiple GPIO chips" changed the api of gpiochip_find to drop const from the data parameter of the match hook, but didn't also drop const from data causing a build warning. Signed-off-by: Grant Likely --- include/asm-generic/gpio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index 1ba08f0..365ea09 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -142,7 +142,7 @@ extern int __must_check gpiochip_reserve(int start, int ngpio); /* add/remove chips */ extern int gpiochip_add(struct gpio_chip *chip); extern int __must_check gpiochip_remove(struct gpio_chip *chip); -extern struct gpio_chip *gpiochip_find(const void *data, +extern struct gpio_chip *gpiochip_find(void *data, int (*match)(struct gpio_chip *chip, void *data)); -- cgit v1.1 From bca0fa5f12a6744a2b2e53154af65a51402b3426 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 23 Mar 2012 13:05:14 +0100 Subject: common: add dma_mmap_from_coherent() function Add a common helper for dma-mapping core for mapping a coherent buffer to userspace. Reported-by: Subash Patel Signed-off-by: Marek Szyprowski Acked-by: Kyungmin Park Tested-By: Subash Patel --- include/asm-generic/dma-coherent.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/dma-coherent.h b/include/asm-generic/dma-coherent.h index 85a3ffa..abfb268 100644 --- a/include/asm-generic/dma-coherent.h +++ b/include/asm-generic/dma-coherent.h @@ -3,13 +3,15 @@ #ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT /* - * These two functions are only for dma allocator. + * These three functions are only for dma allocator. * Don't use them in device drivers. */ int dma_alloc_from_coherent(struct device *dev, ssize_t size, dma_addr_t *dma_handle, void **ret); int dma_release_from_coherent(struct device *dev, int order, void *vaddr); +int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, size_t size, int *ret); /* * Standard interface */ -- cgit v1.1 From c64be2bb1c6eb43c838b2c6d57b074078be208dd Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 29 Dec 2011 13:09:51 +0100 Subject: drivers: add Contiguous Memory Allocator The Contiguous Memory Allocator is a set of helper functions for DMA mapping framework that improves allocations of contiguous memory chunks. CMA grabs memory on system boot, marks it with MIGRATE_CMA migrate type and gives back to the system. Kernel is allowed to allocate only movable pages within CMA's managed memory so that it can be used for example for page cache when DMA mapping do not use it. On dma_alloc_from_contiguous() request such pages are migrated out of CMA area to free required contiguous block and fulfill the request. This allows to allocate large contiguous chunks of memory at any time assuming that there is enough free memory available in the system. This code is heavily based on earlier works by Michal Nazarewicz. Signed-off-by: Marek Szyprowski Signed-off-by: Kyungmin Park Signed-off-by: Michal Nazarewicz Acked-by: Arnd Bergmann Tested-by: Rob Clark Tested-by: Ohad Ben-Cohen Tested-by: Benjamin Gaignard Tested-by: Robert Nelson Tested-by: Barry Song --- include/asm-generic/dma-contiguous.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 include/asm-generic/dma-contiguous.h (limited to 'include/asm-generic') diff --git a/include/asm-generic/dma-contiguous.h b/include/asm-generic/dma-contiguous.h new file mode 100644 index 0000000..c544356 --- /dev/null +++ b/include/asm-generic/dma-contiguous.h @@ -0,0 +1,28 @@ +#ifndef ASM_DMA_CONTIGUOUS_H +#define ASM_DMA_CONTIGUOUS_H + +#ifdef __KERNEL__ +#ifdef CONFIG_CMA + +#include +#include + +static inline struct cma *dev_get_cma_area(struct device *dev) +{ + if (dev && dev->cma_area) + return dev->cma_area; + return dma_contiguous_default_area; +} + +static inline void dev_set_cma_area(struct device *dev, struct cma *cma) +{ + if (dev) + dev->cma_area = cma; + if (!dev || !dma_contiguous_default_area) + dma_contiguous_default_area = cma; +} + +#endif +#endif + +#endif -- cgit v1.1 From 322728e55aa7834e2fab2786b76df183c4843a12 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 18 May 2012 13:59:39 -0400 Subject: KVM: make asm-generic/kvm_para.h have an ifdef __KERNEL__ block There are two functions in this asm-generic file. Looking at other arch which do not use the generic version, these two fcns are within an #ifdef __KERNEL__ block, so make the generic one consistent with those. Signed-off-by: Paul Gortmaker Signed-off-by: Avi Kivity --- include/asm-generic/kvm_para.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/asm-generic') diff --git a/include/asm-generic/kvm_para.h b/include/asm-generic/kvm_para.h index 9a7bbad..5cba37f 100644 --- a/include/asm-generic/kvm_para.h +++ b/include/asm-generic/kvm_para.h @@ -1,6 +1,7 @@ #ifndef _ASM_GENERIC_KVM_PARA_H #define _ASM_GENERIC_KVM_PARA_H +#ifdef __KERNEL__ /* * This function is used by architectures that support kvm to avoid issuing @@ -16,4 +17,6 @@ static inline unsigned int kvm_arch_para_features(void) return 0; } +#endif /* _KERNEL__ */ + #endif -- cgit v1.1 From 73636b1aacb1a07e6fbe0d25e560e69b024a8e25 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 28 Mar 2012 13:59:18 -0400 Subject: arch/tile: allow building Linux with transparent huge pages enabled The change adds some infrastructure for managing tile pmd's more generally, using pte_pmd() and pmd_pte() methods to translate pmd values to and from ptes, since on TILEPro a pmd is really just a nested structure holding a pgd (aka pte). Several existing pmd methods are moved into this framework, and a whole raft of additional pmd accessors are defined that are used by the transparent hugepage framework. The tile PTE now has a "client2" bit. The bit is used to indicate a transparent huge page is in the process of being split into subpages. This change also fixes a generic bug where the return value of the generic pmdp_splitting_flush() was incorrect. Signed-off-by: Chris Metcalf --- include/asm-generic/pgtable.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 125c54e..e2768f1 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -158,9 +158,8 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, #endif #ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH -extern pmd_t pmdp_splitting_flush(struct vm_area_struct *vma, - unsigned long address, - pmd_t *pmdp); +extern void pmdp_splitting_flush(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); #endif #ifndef __HAVE_ARCH_PTE_SAME -- cgit v1.1 From 36126f8f2ed8168eb13aa0662b9b9585cba100a9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 26 May 2012 10:43:17 -0700 Subject: word-at-a-time: make the interfaces truly generic This changes the interfaces in to be a bit more complicated, but a lot more generic. In particular, it allows us to really do the operations efficiently on both little-endian and big-endian machines, pretty much regardless of machine details. For example, if you can rely on a fast population count instruction on your architecture, this will allow you to make your optimized file with that. NOTE! The "generic" version in include/asm-generic/word-at-a-time.h is not truly generic, it actually only works on big-endian. Why? Because on little-endian the generic algorithms are wasteful, since you can inevitably do better. The x86 implementation is an example of that. (The only truly non-generic part of the asm-generic implementation is the "find_zero()" function, and you could make a little-endian version of it. And if the Kbuild infrastructure allowed us to pick a particular header file, that would be lovely) The functions are as follows: - WORD_AT_A_TIME_CONSTANTS: specific constants that the algorithm uses. - has_zero(): take a word, and determine if it has a zero byte in it. It gets the word, the pointer to the constant pool, and a pointer to an intermediate "data" field it can set. This is the "quick-and-dirty" zero tester: it's what is run inside the hot loops. - "prep_zero_mask()": take the word, the data that has_zero() produced, and the constant pool, and generate an *exact* mask of which byte had the first zero. This is run directly *outside* the loop, and allows the "has_zero()" function to answer the "is there a zero byte" question without necessarily getting exactly *which* byte is the first one to contain a zero. If you do multiple byte lookups concurrently (eg "hash_name()", which looks for both NUL and '/' bytes), after you've done the prep_zero_mask() phase, the result of those can be or'ed together to get the "either or" case. - The result from "prep_zero_mask()" can then be fed into "find_zero()" (to find the byte offset of the first byte that was zero) or into "zero_bytemask()" (to find the bytemask of the bytes preceding the zero byte). The existence of zero_bytemask() is optional, and is not necessary for the normal string routines. But dentry name hashing needs it, so if you enable DENTRY_WORD_AT_A_TIME you need to expose it. This changes the generic strncpy_from_user() function and the dentry hashing functions to use these modified word-at-a-time interfaces. This gets us back to the optimized state of the x86 strncpy that we lost in the previous commit when moving over to the generic version. Signed-off-by: Linus Torvalds --- include/asm-generic/word-at-a-time.h | 52 ++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 include/asm-generic/word-at-a-time.h (limited to 'include/asm-generic') diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h new file mode 100644 index 0000000..3f21f1b --- /dev/null +++ b/include/asm-generic/word-at-a-time.h @@ -0,0 +1,52 @@ +#ifndef _ASM_WORD_AT_A_TIME_H +#define _ASM_WORD_AT_A_TIME_H + +/* + * This says "generic", but it's actually big-endian only. + * Little-endian can use more efficient versions of these + * interfaces, see for example + * arch/x86/include/asm/word-at-a-time.h + * for those. + */ + +#include + +struct word_at_a_time { + const unsigned long high_bits, low_bits; +}; + +#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0xfe) + 1, REPEAT_BYTE(0x7f) } + +/* Bit set in the bytes that have a zero */ +static inline long prep_zero_mask(unsigned long val, unsigned long rhs, const struct word_at_a_time *c) +{ + unsigned long mask = (val & c->low_bits) + c->low_bits; + return ~(mask | rhs); +} + +#define create_zero_mask(mask) (mask) + +static inline long find_zero(unsigned long mask) +{ + long byte = 0; +#ifdef CONFIG_64BIT + if (mask >> 32) + mask >>= 32; + else + byte = 4; +#endif + if (mask >> 16) + mask >>= 16; + else + byte += 2; + return (mask >> 8) ? byte : byte + 1; +} + +static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) +{ + unsigned long rhs = val | c->low_bits; + *data = rhs; + return (val + c->high_bits) & ~rhs; +} + +#endif /* _ASM_WORD_AT_A_TIME_H */ -- cgit v1.1 From 56457f38f212344fb38b250cfa7e7311c065022f Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 28 May 2012 17:35:22 +0300 Subject: KVM: Export asm-generic/kvm_para.h Prevents build failures on non-KVM archs. Tested-by: Geert Uytterhoeven Signed-off-by: Avi Kivity --- include/asm-generic/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/asm-generic') diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 53f91b1..2c85a0f 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -8,6 +8,7 @@ header-y += int-ll64.h header-y += ioctl.h header-y += ioctls.h header-y += ipcbuf.h +header-y += kvm_para.h header-y += mman-common.h header-y += mman.h header-y += msgbuf.h -- cgit v1.1 From 26c191788f18129af0eb32a358cdaea0c7479626 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Tue, 29 May 2012 15:06:49 -0700 Subject: mm: pmd_read_atomic: fix 32bit PAE pmd walk vs pmd_populate SMP race condition When holding the mmap_sem for reading, pmd_offset_map_lock should only run on a pmd_t that has been read atomically from the pmdp pointer, otherwise we may read only half of it leading to this crash. PID: 11679 TASK: f06e8000 CPU: 3 COMMAND: "do_race_2_panic" #0 [f06a9dd8] crash_kexec at c049b5ec #1 [f06a9e2c] oops_end at c083d1c2 #2 [f06a9e40] no_context at c0433ded #3 [f06a9e64] bad_area_nosemaphore at c043401a #4 [f06a9e6c] __do_page_fault at c0434493 #5 [f06a9eec] do_page_fault at c083eb45 #6 [f06a9f04] error_code (via page_fault) at c083c5d5 EAX: 01fb470c EBX: fff35000 ECX: 00000003 EDX: 00000100 EBP: 00000000 DS: 007b ESI: 9e201000 ES: 007b EDI: 01fb4700 GS: 00e0 CS: 0060 EIP: c083bc14 ERR: ffffffff EFLAGS: 00010246 #7 [f06a9f38] _spin_lock at c083bc14 #8 [f06a9f44] sys_mincore at c0507b7d #9 [f06a9fb0] system_call at c083becd start len EAX: ffffffda EBX: 9e200000 ECX: 00001000 EDX: 6228537f DS: 007b ESI: 00000000 ES: 007b EDI: 003d0f00 SS: 007b ESP: 62285354 EBP: 62285388 GS: 0033 CS: 0073 EIP: 00291416 ERR: 000000da EFLAGS: 00000286 This should be a longstanding bug affecting x86 32bit PAE without THP. Only archs with 64bit large pmd_t and 32bit unsigned long should be affected. With THP enabled the barrier() in pmd_none_or_trans_huge_or_clear_bad() would partly hide the bug when the pmd transition from none to stable, by forcing a re-read of the *pmd in pmd_offset_map_lock, but when THP is enabled a new set of problem arises by the fact could then transition freely in any of the none, pmd_trans_huge or pmd_trans_stable states. So making the barrier in pmd_none_or_trans_huge_or_clear_bad() unconditional isn't good idea and it would be a flakey solution. This should be fully fixed by introducing a pmd_read_atomic that reads the pmd in order with THP disabled, or by reading the pmd atomically with cmpxchg8b with THP enabled. Luckily this new race condition only triggers in the places that must already be covered by pmd_none_or_trans_huge_or_clear_bad() so the fix is localized there but this bug is not related to THP. NOTE: this can trigger on x86 32bit systems with PAE enabled with more than 4G of ram, otherwise the high part of the pmd will never risk to be truncated because it would be zero at all times, in turn so hiding the SMP race. This bug was discovered and fully debugged by Ulrich, quote: ---- [..] pmd_none_or_trans_huge_or_clear_bad() loads the content of edx and eax. 496 static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) 497 { 498 /* depend on compiler for an atomic pmd read */ 499 pmd_t pmdval = *pmd; // edi = pmd pointer 0xc0507a74 : mov 0x8(%esp),%edi ... // edx = PTE page table high address 0xc0507a84 : mov 0x4(%edi),%edx ... // eax = PTE page table low address 0xc0507a8e : mov (%edi),%eax [..] Please note that the PMD is not read atomically. These are two "mov" instructions where the high order bits of the PMD entry are fetched first. Hence, the above machine code is prone to the following race. - The PMD entry {high|low} is 0x0000000000000000. The "mov" at 0xc0507a84 loads 0x00000000 into edx. - A page fault (on another CPU) sneaks in between the two "mov" instructions and instantiates the PMD. - The PMD entry {high|low} is now 0x00000003fda38067. The "mov" at 0xc0507a8e loads 0xfda38067 into eax. ---- Reported-by: Ulrich Obergfell Signed-off-by: Andrea Arcangeli Cc: Mel Gorman Cc: Hugh Dickins Cc: Larry Woodman Cc: Petr Matousek Cc: Rik van Riel Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/pgtable.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index e2768f1..6f2b45a 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -445,6 +445,18 @@ static inline int pmd_write(pmd_t pmd) #endif /* __HAVE_ARCH_PMD_WRITE */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#ifndef pmd_read_atomic +static inline pmd_t pmd_read_atomic(pmd_t *pmdp) +{ + /* + * Depend on compiler for an atomic pmd read. NOTE: this is + * only going to work, if the pmdval_t isn't larger than + * an unsigned long. + */ + return *pmdp; +} +#endif + /* * This function is meant to be used by sites walking pagetables with * the mmap_sem hold in read mode to protect against MADV_DONTNEED and @@ -458,11 +470,17 @@ static inline int pmd_write(pmd_t pmd) * undefined so behaving like if the pmd was none is safe (because it * can return none anyway). The compiler level barrier() is critically * important to compute the two checks atomically on the same pmdval. + * + * For 32bit kernels with a 64bit large pmd_t this automatically takes + * care of reading the pmd atomically to avoid SMP race conditions + * against pmd_populate() when the mmap_sem is hold for reading by the + * caller (a special atomic read not done by "gcc" as in the generic + * version above, is also needed when THP is disabled because the page + * fault can populate the pmd from under us). */ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) { - /* depend on compiler for an atomic pmd read */ - pmd_t pmdval = *pmd; + pmd_t pmdval = pmd_read_atomic(pmd); /* * The barrier will stabilize the pmdval in a register or on * the stack so that it will stop changing under the code. -- cgit v1.1 From bb8ac181a5cf50458a0d83b4460790badc9fdc16 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 19 May 2012 10:25:23 -0400 Subject: bury __kernel_nlink_t, make internal nlink_t consistent Signed-off-by: Al Viro --- include/asm-generic/posix_types.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/posix_types.h b/include/asm-generic/posix_types.h index 91d44bd..fe74fcc 100644 --- a/include/asm-generic/posix_types.h +++ b/include/asm-generic/posix_types.h @@ -23,10 +23,6 @@ typedef __kernel_ulong_t __kernel_ino_t; typedef unsigned int __kernel_mode_t; #endif -#ifndef __kernel_nlink_t -typedef __kernel_ulong_t __kernel_nlink_t; -#endif - #ifndef __kernel_pid_t typedef int __kernel_pid_t; #endif -- cgit v1.1 From 133fd9f5cda2d86904126f4b9fa4e8f4330c9569 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 31 May 2012 16:26:08 -0700 Subject: vsprintf: further optimize decimal conversion Previous code was using optimizations which were developed to work well even on narrow-word CPUs (by today's standards). But Linux runs only on 32-bit and wider CPUs. We can use that. First: using 32x32->64 multiply and trivial 32-bit shift, we can correctly divide by 10 much larger numbers, and thus we can print groups of 9 digits instead of groups of 5 digits. Next: there are two algorithms to print larger numbers. One is generic: divide by 1000000000 and repeatedly print groups of (up to) 9 digits. It's conceptually simple, but requires an (unsigned long long) / 1000000000 division. Second algorithm splits 64-bit unsigned long long into 16-bit chunks, manipulates them cleverly and generates groups of 4 decimal digits. It so happens that it does NOT require long long division. If long is > 32 bits, division of 64-bit values is relatively easy, and we will use the first algorithm. If long long is > 64 bits (strange architecture with VERY large long long), second algorithm can't be used, and we again use the first one. Else (if long is 32 bits and long long is 64 bits) we use second one. And third: there is a simple optimization which takes fast path not only for zero as was done before, but for all one-digit numbers. In all tested cases new code is faster than old one, in many cases by 30%, in few cases by more than 50% (for example, on x86-32, conversion of 12345678). Code growth is ~0 in 32-bit case and ~130 bytes in 64-bit case. This patch is based upon an original from Michal Nazarewicz. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Michal Nazarewicz Signed-off-by: Denys Vlasenko Cc: Douglas W Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/bitsperlong.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/asm-generic') diff --git a/include/asm-generic/bitsperlong.h b/include/asm-generic/bitsperlong.h index 4ae54e0..a7b0914 100644 --- a/include/asm-generic/bitsperlong.h +++ b/include/asm-generic/bitsperlong.h @@ -28,5 +28,9 @@ #error Inconsistent word size. Check asm/bitsperlong.h #endif +#ifndef BITS_PER_LONG_LONG +#define BITS_PER_LONG_LONG 64 +#endif + #endif /* __KERNEL__ */ #endif /* __ASM_GENERIC_BITS_PER_LONG */ -- cgit v1.1