From 82985258390e85289940d3663344197344e071f2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 7 Apr 2017 17:20:01 -0400 Subject: kill strlen_user() no callers, no consistent semantics, no sane way to use it... Signed-off-by: Al Viro --- arch/ia64/include/asm/uaccess.h | 12 --- arch/ia64/lib/Makefile | 2 +- arch/ia64/lib/strlen_user.S | 200 ---------------------------------------- 3 files changed, 1 insertion(+), 213 deletions(-) delete mode 100644 arch/ia64/lib/strlen_user.S (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h index 82a7646..b2106b0 100644 --- a/arch/ia64/include/asm/uaccess.h +++ b/arch/ia64/include/asm/uaccess.h @@ -277,18 +277,6 @@ extern long __must_check __strncpy_from_user (char *to, const char __user *from, __sfu_ret; \ }) -/* Returns: 0 if bad, string length+1 (memory size) of string if ok */ -extern unsigned long __strlen_user (const char __user *); - -#define strlen_user(str) \ -({ \ - const char __user *__su_str = (str); \ - unsigned long __su_ret = 0; \ - if (__access_ok(__su_str, 0)) \ - __su_ret = __strlen_user(__su_str); \ - __su_ret; \ -}) - /* * Returns: 0 if exception before NUL or reaching the supplied limit * (N), a value greater than N if the limit would be exceeded, else diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile index 0a40b14..1a36a3a 100644 --- a/arch/ia64/lib/Makefile +++ b/arch/ia64/lib/Makefile @@ -5,7 +5,7 @@ lib-y := io.o __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \ __divdi3.o __udivdi3.o __moddi3.o __umoddi3.o \ checksum.o clear_page.o csum_partial_copy.o \ - clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o \ + clear_user.o strncpy_from_user.o strnlen_user.o \ flush.o ip_fast_csum.o do_csum.o \ memset.o strlen.o xor.o diff --git a/arch/ia64/lib/strlen_user.S b/arch/ia64/lib/strlen_user.S deleted file mode 100644 index 9d25768..0000000 --- a/arch/ia64/lib/strlen_user.S +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Optimized version of the strlen_user() function - * - * Inputs: - * in0 address of buffer - * - * Outputs: - * ret0 0 in case of fault, strlen(buffer)+1 otherwise - * - * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co - * David Mosberger-Tang - * Stephane Eranian - * - * 01/19/99 S.Eranian heavily enhanced version (see details below) - * 09/24/99 S.Eranian added speculation recovery code - */ - -#include -#include - -// -// int strlen_user(char *) -// ------------------------ -// Returns: -// - length of string + 1 -// - 0 in case an exception is raised -// -// This is an enhanced version of the basic strlen_user. it includes a -// combination of compute zero index (czx), parallel comparisons, speculative -// loads and loop unroll using rotating registers. -// -// General Ideas about the algorithm: -// The goal is to look at the string in chunks of 8 bytes. -// so we need to do a few extra checks at the beginning because the -// string may not be 8-byte aligned. In this case we load the 8byte -// quantity which includes the start of the string and mask the unused -// bytes with 0xff to avoid confusing czx. -// We use speculative loads and software pipelining to hide memory -// latency and do read ahead safely. This way we defer any exception. -// -// Because we don't want the kernel to be relying on particular -// settings of the DCR register, we provide recovery code in case -// speculation fails. The recovery code is going to "redo" the work using -// only normal loads. If we still get a fault then we return an -// error (ret0=0). Otherwise we return the strlen+1 as usual. -// The fact that speculation may fail can be caused, for instance, by -// the DCR.dm bit being set. In this case TLB misses are deferred, i.e., -// a NaT bit will be set if the translation is not present. The normal -// load, on the other hand, will cause the translation to be inserted -// if the mapping exists. -// -// It should be noted that we execute recovery code only when we need -// to use the data that has been speculatively loaded: we don't execute -// recovery code on pure read ahead data. -// -// Remarks: -// - the cmp r0,r0 is used as a fast way to initialize a predicate -// register to 1. This is required to make sure that we get the parallel -// compare correct. -// -// - we don't use the epilogue counter to exit the loop but we need to set -// it to zero beforehand. -// -// - after the loop we must test for Nat values because neither the -// czx nor cmp instruction raise a NaT consumption fault. We must be -// careful not to look too far for a Nat for which we don't care. -// For instance we don't need to look at a NaT in val2 if the zero byte -// was in val1. -// -// - Clearly performance tuning is required. -// - -#define saved_pfs r11 -#define tmp r10 -#define base r16 -#define orig r17 -#define saved_pr r18 -#define src r19 -#define mask r20 -#define val r21 -#define val1 r22 -#define val2 r23 - -GLOBAL_ENTRY(__strlen_user) - .prologue - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,11,0,0,8 - - .rotr v[2], w[2] // declares our 4 aliases - - extr.u tmp=in0,0,3 // tmp=least significant 3 bits - mov orig=in0 // keep trackof initial byte address - dep src=0,in0,0,3 // src=8byte-aligned in0 address - .save pr, saved_pr - mov saved_pr=pr // preserve predicates (rotation) - ;; - - .body - - ld8.s v[1]=[src],8 // load the initial 8bytes (must speculate) - shl tmp=tmp,3 // multiply by 8bits/byte - mov mask=-1 // our mask - ;; - ld8.s w[1]=[src],8 // load next 8 bytes in 2nd pipeline - cmp.eq p6,p0=r0,r0 // sets p6 (required because of // cmp.and) - sub tmp=64,tmp // how many bits to shift our mask on the right - ;; - shr.u mask=mask,tmp // zero enough bits to hold v[1] valuable part - mov ar.ec=r0 // clear epilogue counter (saved in ar.pfs) - ;; - add base=-16,src // keep track of aligned base - chk.s v[1], .recover // if already NaT, then directly skip to recover - or v[1]=v[1],mask // now we have a safe initial byte pattern - ;; -1: - ld8.s v[0]=[src],8 // speculatively load next - czx1.r val1=v[1] // search 0 byte from right - czx1.r val2=w[1] // search 0 byte from right following 8bytes - ;; - ld8.s w[0]=[src],8 // speculatively load next to next - cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8 - cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8 -(p6) br.wtop.dptk.few 1b // loop until p6 == 0 - ;; - // - // We must return try the recovery code iff - // val1_is_nat || (val1==8 && val2_is_nat) - // - // XXX Fixme - // - there must be a better way of doing the test - // - cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate) - tnat.nz p6,p7=val1 // test NaT on val1 -(p6) br.cond.spnt .recover // jump to recovery if val1 is NaT - ;; - // - // if we come here p7 is true, i.e., initialized for // cmp - // - cmp.eq.and p7,p0=8,val1// val1==8? - tnat.nz.and p7,p0=val2 // test NaT if val2 -(p7) br.cond.spnt .recover // jump to recovery if val2 is NaT - ;; -(p8) mov val1=val2 // val2 contains the value -(p8) adds src=-16,src // correct position when 3 ahead -(p9) adds src=-24,src // correct position when 4 ahead - ;; - sub ret0=src,orig // distance from origin - sub tmp=7,val1 // 7=8-1 because this strlen returns strlen+1 - mov pr=saved_pr,0xffffffffffff0000 - ;; - sub ret0=ret0,tmp // length=now - back -1 - mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what - br.ret.sptk.many rp // end of normal execution - - // - // Outlined recovery code when speculation failed - // - // This time we don't use speculation and rely on the normal exception - // mechanism. that's why the loop is not as good as the previous one - // because read ahead is not possible - // - // XXX Fixme - // - today we restart from the beginning of the string instead - // of trying to continue where we left off. - // -.recover: - EX(.Lexit1, ld8 val=[base],8) // load the initial bytes - ;; - or val=val,mask // remask first bytes - cmp.eq p0,p6=r0,r0 // nullify first ld8 in loop - ;; - // - // ar.ec is still zero here - // -2: - EX(.Lexit1, (p6) ld8 val=[base],8) - ;; - czx1.r val1=val // search 0 byte from right - ;; - cmp.eq p6,p0=8,val1 // val1==8 ? -(p6) br.wtop.dptk.few 2b // loop until p6 == 0 - ;; - sub ret0=base,orig // distance from base - sub tmp=7,val1 // 7=8-1 because this strlen returns strlen+1 - mov pr=saved_pr,0xffffffffffff0000 - ;; - sub ret0=ret0,tmp // length=now - back -1 - mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what - br.ret.sptk.many rp // end of successful recovery code - - // - // We failed even on the normal load (called from exception handler) - // -.Lexit1: - mov ret0=0 - mov pr=saved_pr,0xffffffffffff0000 - mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what - br.ret.sptk.many rp -END(__strlen_user) -EXPORT_SYMBOL(__strlen_user) -- cgit v1.1 From 2a76213072c8d6ac4364455e236d52b7d36601ca Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sun, 14 May 2017 11:54:11 -0300 Subject: ia64, scsi: update references for the device-io book The book is now at Documentation/driver-api/device-io.rst. Update such references. Signed-off-by: Mauro Carvalho Chehab --- arch/ia64/include/asm/io.h | 2 +- arch/ia64/sn/kernel/iomv.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h index 5de673a..a2540e2 100644 --- a/arch/ia64/include/asm/io.h +++ b/arch/ia64/include/asm/io.h @@ -117,7 +117,7 @@ extern int valid_mmap_phys_addr_range (unsigned long pfn, size_t count); * following the barrier will arrive after all previous writes. For most * ia64 platforms, this is a simple 'mf.a' instruction. * - * See Documentation/DocBook/deviceiobook.tmpl for more information. + * See Documentation/driver-api/device-io.rst for more information. */ static inline void ___ia64_mmiowb(void) { diff --git a/arch/ia64/sn/kernel/iomv.c b/arch/ia64/sn/kernel/iomv.c index c77ebdf..2b22a716 100644 --- a/arch/ia64/sn/kernel/iomv.c +++ b/arch/ia64/sn/kernel/iomv.c @@ -63,7 +63,7 @@ EXPORT_SYMBOL(sn_io_addr); /** * __sn_mmiowb - I/O space memory barrier * - * See arch/ia64/include/asm/io.h and Documentation/DocBook/deviceiobook.tmpl + * See arch/ia64/include/asm/io.h and Documentation/driver-api/device-io.rst * for details. * * On SN2, we wait for the PIO_WRITE_STATUS SHub register to clear. -- cgit v1.1 From 1c4f676a68a502e7bef7d0e49952b042d00aa496 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 21 May 2017 23:13:37 -0400 Subject: net: Define SCM_TIMESTAMPING_PKTINFO on all architectures. A definition was only provided for asm-generic/socket.h using platforms, define it for the others as well Reported-by: Stephen Rothwell Signed-off-by: David S. Miller --- arch/ia64/include/uapi/asm/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index 2c3f4b4..8693724 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -107,4 +107,6 @@ #define SO_COOKIE 57 +#define SCM_TIMESTAMPING_PKTINFO 58 + #endif /* _ASM_IA64_SOCKET_H */ -- cgit v1.1 From 8535796579fd08f226878212b39b2682064d41b7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 6 Jun 2017 12:54:36 +0200 Subject: tty: simserial: drop unused alt_speed handling This driver was setting the deprecated and broken alt_speed based on port flags, but never provided a means to change the flags or to actually change the speed. Signed-off-by: Johan Hovold Reviewed-by: Andy Shevchenko Reviewed-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- arch/ia64/hp/sim/simserial.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c index de8cba1..70d52e9 100644 --- a/arch/ia64/hp/sim/simserial.c +++ b/arch/ia64/hp/sim/simserial.c @@ -387,19 +387,6 @@ static int activate(struct tty_port *port, struct tty_struct *tty) } state->xmit.head = state->xmit.tail = 0; - - /* - * Set up the tty->alt_speed kludge - */ - if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI) - tty->alt_speed = 57600; - if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI) - tty->alt_speed = 115200; - if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI) - tty->alt_speed = 230400; - if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP) - tty->alt_speed = 460800; - errout: local_irq_restore(flags); return retval; -- cgit v1.1 From 75fbede1cf8415d23fe1cb59cd70faa4d08813ab Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 21 May 2017 12:28:08 +0200 Subject: ia64: remove DMA_ERROR_CODE All ia64 dma_mapping_ops instances already have a mapping_error member. Signed-off-by: Christoph Hellwig --- arch/ia64/include/asm/dma-mapping.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index 73ec3c6..3ce5ab4 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h @@ -12,8 +12,6 @@ #define ARCH_HAS_DMA_GET_REQUIRED_MASK -#define DMA_ERROR_CODE 0 - extern const struct dma_map_ops *dma_ops; extern struct ia64_machine_vector ia64_mv; extern void set_iommu_machvec(void); -- cgit v1.1 From 28b5ba2aa0f55d80adb2624564ed2b170c19519e Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Wed, 21 Jun 2017 10:47:15 +0200 Subject: net: introduce SO_PEERGROUPS getsockopt This adds the new getsockopt(2) option SO_PEERGROUPS on SOL_SOCKET to retrieve the auxiliary groups of the remote peer. It is designed to naturally extend SO_PEERCRED. That is, the underlying data is from the same credentials. Regarding its syntax, it is based on SO_PEERSEC. That is, if the provided buffer is too small, ERANGE is returned and @optlen is updated. Otherwise, the information is copied, @optlen is set to the actual size, and 0 is returned. While SO_PEERCRED (and thus `struct ucred') already returns the primary group, it lacks the auxiliary group vector. However, nearly all access controls (including kernel side VFS and SYSVIPC, but also user-space polkit, DBus, ...) consider the entire set of groups, rather than just the primary group. But this is currently not possible with pure SO_PEERCRED. Instead, user-space has to work around this and query the system database for the auxiliary groups of a UID retrieved via SO_PEERCRED. Unfortunately, there is no race-free way to query the auxiliary groups of the PID/UID retrieved via SO_PEERCRED. Hence, the current user-space solution is to use getgrouplist(3p), which itself falls back to NSS and whatever is configured in nsswitch.conf(3). This effectively checks which groups we *would* assign to the user if it logged in *now*. On normal systems it is as easy as reading /etc/group, but with NSS it can resort to quering network databases (eg., LDAP), using IPC or network communication. Long story short: Whenever we want to use auxiliary groups for access checks on IPC, we need further IPC to talk to the user/group databases, rather than just relying on SO_PEERCRED and the incoming socket. This is unfortunate, and might even result in dead-locks if the database query uses the same IPC as the original request. So far, those recursions / dead-locks have been avoided by using primitive IPC for all crucial NSS modules. However, we want to avoid re-inventing the wheel for each NSS module that might be involved in user/group queries. Hence, we would preferably make DBus (and other IPC that supports access-management based on groups) work without resorting to the user/group database. This new SO_PEERGROUPS ioctl would allow us to make dbus-daemon work without ever calling into NSS. Cc: Michal Sekletar Cc: Simon McVittie Reviewed-by: Tom Gundersen Signed-off-by: David Herrmann Signed-off-by: David S. Miller --- arch/ia64/include/uapi/asm/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/ia64') diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index 8693724..5dd5c5d 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -109,4 +109,6 @@ #define SCM_TIMESTAMPING_PKTINFO 58 +#define SO_PEERGROUPS 59 + #endif /* _ASM_IA64_SOCKET_H */ -- cgit v1.1 From df91b0262e2cff23db5eac77126ea0bef06d54d2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 30 Jun 2017 00:52:06 +0900 Subject: ia64: remove unneeded extra-y in Makefile.gate All the files listed in "extra-y" are generated according to the dependency. They are still needed in "targets" to include .*.cmd for incremental building. Signed-off-by: Masahiro Yamada --- arch/ia64/kernel/Makefile.gate | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/Makefile.gate b/arch/ia64/kernel/Makefile.gate index a32903a..92693c1 100644 --- a/arch/ia64/kernel/Makefile.gate +++ b/arch/ia64/kernel/Makefile.gate @@ -1,8 +1,6 @@ # The gate DSO image is built using a special linker script. -targets += gate.so gate-syms.o - -extra-y += gate.so gate-syms.o gate.lds gate.o +targets += gate.so gate-syms.o gate.lds gate.o CPPFLAGS_gate.lds := -P -C -U$(ARCH) -- cgit v1.1 From 26160371359ea91d8e2bdad6245f39e7ed0c17a3 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 23 Jun 2017 23:44:15 +1000 Subject: ia64: thin archives fix linking The VDSO symbols can't be linked into built-in.o when building with thin archives, so change this to linking a new object file that is included into the built-in.o. Cc: Tony Luck Cc: Fenghua Yu Cc: linux-ia64@vger.kernel.org Signed-off-by: Nicholas Piggin Signed-off-by: Masahiro Yamada --- arch/ia64/kernel/Makefile.gate | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/Makefile.gate b/arch/ia64/kernel/Makefile.gate index 92693c1..7da7c65 100644 --- a/arch/ia64/kernel/Makefile.gate +++ b/arch/ia64/kernel/Makefile.gate @@ -1,6 +1,8 @@ # The gate DSO image is built using a special linker script. -targets += gate.so gate-syms.o gate.lds gate.o +targets += gate.so gate.lds gate.o gate-dummy.o + +obj-y += gate-syms.o CPPFLAGS_gate.lds := -P -C -U$(ARCH) @@ -12,13 +14,14 @@ GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \ $(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE $(call if_changed,gate) -$(obj)/built-in.o: $(obj)/gate-syms.o -$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o - -GATECFLAGS_gate-syms.o = -r -$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE +GATECFLAGS_gate-dummy.o = -r +$(obj)/gate-dummy.o: $(obj)/gate.lds $(obj)/gate.o FORCE $(call if_changed,gate) +LDFLAGS_gate-syms.o := -r -R +$(obj)/gate-syms.o: $(obj)/gate-dummy.o FORCE + $(call if_changed,ld) + # gate-data.o contains the gate DSO image as data in section .data..gate. # We must build gate.so before we can assemble it. # Note: kbuild does not track this dependency due to usage of .incbin -- cgit v1.1 From 3170d8d226c2053355f3946b4b5ded4c006fe6d4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 2 May 2017 20:06:33 -0400 Subject: kill {__,}{get,put}_user_unaligned() no users left Signed-off-by: Al Viro --- arch/ia64/include/asm/uaccess.h | 36 ------------------------------------ 1 file changed, 36 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h index 82a7646..a217bcf 100644 --- a/arch/ia64/include/asm/uaccess.h +++ b/arch/ia64/include/asm/uaccess.h @@ -87,42 +87,6 @@ static inline int __access_ok(const void __user *p, unsigned long size) #define __put_user(x, ptr) __put_user_nocheck((__typeof__(*(ptr))) (x), (ptr), sizeof(*(ptr))) #define __get_user(x, ptr) __get_user_nocheck((x), (ptr), sizeof(*(ptr))) -extern long __put_user_unaligned_unknown (void); - -#define __put_user_unaligned(x, ptr) \ -({ \ - long __ret; \ - switch (sizeof(*(ptr))) { \ - case 1: __ret = __put_user((x), (ptr)); break; \ - case 2: __ret = (__put_user((x), (u8 __user *)(ptr))) \ - | (__put_user((x) >> 8, ((u8 __user *)(ptr) + 1))); break; \ - case 4: __ret = (__put_user((x), (u16 __user *)(ptr))) \ - | (__put_user((x) >> 16, ((u16 __user *)(ptr) + 1))); break; \ - case 8: __ret = (__put_user((x), (u32 __user *)(ptr))) \ - | (__put_user((x) >> 32, ((u32 __user *)(ptr) + 1))); break; \ - default: __ret = __put_user_unaligned_unknown(); \ - } \ - __ret; \ -}) - -extern long __get_user_unaligned_unknown (void); - -#define __get_user_unaligned(x, ptr) \ -({ \ - long __ret; \ - switch (sizeof(*(ptr))) { \ - case 1: __ret = __get_user((x), (ptr)); break; \ - case 2: __ret = (__get_user((x), (u8 __user *)(ptr))) \ - | (__get_user((x) >> 8, ((u8 __user *)(ptr) + 1))); break; \ - case 4: __ret = (__get_user((x), (u16 __user *)(ptr))) \ - | (__get_user((x) >> 16, ((u16 __user *)(ptr) + 1))); break; \ - case 8: __ret = (__get_user((x), (u32 __user *)(ptr))) \ - | (__get_user((x) >> 32, ((u32 __user *)(ptr) + 1))); break; \ - default: __ret = __get_user_unaligned_unknown(); \ - } \ - __ret; \ -}) - #ifdef ASM_SUPPORTED struct __large_struct { unsigned long buf[100]; }; # define __m(x) (*(struct __large_struct __user *)(x)) -- cgit v1.1 From 1b862aecfbd419cdc4553645bf86d07554279bed Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 6 Jul 2017 15:37:45 -0700 Subject: mm, memory_hotplug: get rid of is_zone_device_section Device memory hotplug hooks into regular memory hotplug only half way. It needs memory sections to track struct pages but there is no need/desire to associate those sections with memory blocks and export them to the userspace via sysfs because they cannot be onlined anyway. This is currently expressed by for_device argument to arch_add_memory which then makes sure to associate the given memory range with ZONE_DEVICE. register_new_memory then relies on is_zone_device_section to distinguish special memory hotplug from the regular one. While this works now, later patches in this series want to move __add_zone outside of arch_add_memory path so we have to come up with something else. Add want_memblock down the __add_pages path and use it to control whether the section->memblock association should be done. arch_add_memory then just trivially want memblock for everything but for_device hotplug. remove_memory_section doesn't need is_zone_device_section either. We can simply skip all the memblock specific cleanup if there is no memblock for the given section. This shouldn't introduce any functional change. Link: http://lkml.kernel.org/r/20170515085827.16474-5-mhocko@kernel.org Signed-off-by: Michal Hocko Tested-by: Dan Williams Acked-by: Vlastimil Babka Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Balbir Singh Cc: Daniel Kiper Cc: David Rientjes Cc: Heiko Carstens Cc: Igor Mammedov Cc: Jerome Glisse Cc: Joonsoo Kim Cc: Martin Schwidefsky Cc: Mel Gorman Cc: Reza Arbab Cc: Tobias Regnery Cc: Toshi Kani Cc: Vitaly Kuznetsov Cc: Xishi Qiu Cc: Yasuaki Ishimatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/ia64') diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 8f3efa6..39e2aeb 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -658,7 +658,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device) zone = pgdat->node_zones + zone_for_memory(nid, start, size, ZONE_NORMAL, for_device); - ret = __add_pages(nid, zone, start_pfn, nr_pages); + ret = __add_pages(nid, zone, start_pfn, nr_pages, !for_device); if (ret) printk("%s: Problem encountered in __add_pages() as ret=%d\n", -- cgit v1.1 From f1dd2cd13c4bbbc9a7c4617b3b034fa643de98fe Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 6 Jul 2017 15:38:11 -0700 Subject: mm, memory_hotplug: do not associate hotadded memory to zones until online The current memory hotplug implementation relies on having all the struct pages associate with a zone/node during the physical hotplug phase (arch_add_memory->__add_pages->__add_section->__add_zone). In the vast majority of cases this means that they are added to ZONE_NORMAL. This has been so since 9d99aaa31f59 ("[PATCH] x86_64: Support memory hotadd without sparsemem") and it wasn't a big deal back then because movable onlining didn't exist yet. Much later memory hotplug wanted to (ab)use ZONE_MOVABLE for movable onlining 511c2aba8f07 ("mm, memory-hotplug: dynamic configure movable memory and portion memory") and then things got more complicated. Rather than reconsidering the zone association which was no longer needed (because the memory hotplug already depended on SPARSEMEM) a convoluted semantic of zone shifting has been developed. Only the currently last memblock or the one adjacent to the zone_movable can be onlined movable. This essentially means that the online type changes as the new memblocks are added. Let's simulate memory hot online manually $ echo 0x100000000 > /sys/devices/system/memory/probe $ grep . /sys/devices/system/memory/memory32/valid_zones Normal Movable $ echo $((0x100000000+(128<<20))) > /sys/devices/system/memory/probe $ grep . /sys/devices/system/memory/memory3?/valid_zones /sys/devices/system/memory/memory32/valid_zones:Normal /sys/devices/system/memory/memory33/valid_zones:Normal Movable $ echo $((0x100000000+2*(128<<20))) > /sys/devices/system/memory/probe $ grep . /sys/devices/system/memory/memory3?/valid_zones /sys/devices/system/memory/memory32/valid_zones:Normal /sys/devices/system/memory/memory33/valid_zones:Normal /sys/devices/system/memory/memory34/valid_zones:Normal Movable $ echo online_movable > /sys/devices/system/memory/memory34/state $ grep . /sys/devices/system/memory/memory3?/valid_zones /sys/devices/system/memory/memory32/valid_zones:Normal /sys/devices/system/memory/memory33/valid_zones:Normal Movable /sys/devices/system/memory/memory34/valid_zones:Movable Normal This is an awkward semantic because an udev event is sent as soon as the block is onlined and an udev handler might want to online it based on some policy (e.g. association with a node) but it will inherently race with new blocks showing up. This patch changes the physical online phase to not associate pages with any zone at all. All the pages are just marked reserved and wait for the onlining phase to be associated with the zone as per the online request. There are only two requirements - existing ZONE_NORMAL and ZONE_MOVABLE cannot overlap - ZONE_NORMAL precedes ZONE_MOVABLE in physical addresses the latter one is not an inherent requirement and can be changed in the future. It preserves the current behavior and made the code slightly simpler. This is subject to change in future. This means that the same physical online steps as above will lead to the following state: Normal Movable /sys/devices/system/memory/memory32/valid_zones:Normal Movable /sys/devices/system/memory/memory33/valid_zones:Normal Movable /sys/devices/system/memory/memory32/valid_zones:Normal Movable /sys/devices/system/memory/memory33/valid_zones:Normal Movable /sys/devices/system/memory/memory34/valid_zones:Normal Movable /sys/devices/system/memory/memory32/valid_zones:Normal Movable /sys/devices/system/memory/memory33/valid_zones:Normal Movable /sys/devices/system/memory/memory34/valid_zones:Movable Implementation: The current move_pfn_range is reimplemented to check the above requirements (allow_online_pfn_range) and then updates the respective zone (move_pfn_range_to_zone), the pgdat and links all the pages in the pfn range with the zone/node. __add_pages is updated to not require the zone and only initializes sections in the range. This allowed to simplify the arch_add_memory code (s390 could get rid of quite some of code). devm_memremap_pages is the only user of arch_add_memory which relies on the zone association because it only hooks into the memory hotplug only half way. It uses it to associate the new memory with ZONE_DEVICE but doesn't allow it to be {on,off}lined via sysfs. This means that this particular code path has to call move_pfn_range_to_zone explicitly. The original zone shifting code is kept in place and will be removed in the follow up patch for an easier review. Please note that this patch also changes the original behavior when offlining a memory block adjacent to another zone (Normal vs. Movable) used to allow to change its movable type. This will be handled later. [richard.weiyang@gmail.com: simplify zone_intersects()] Link: http://lkml.kernel.org/r/20170616092335.5177-1-richard.weiyang@gmail.com [richard.weiyang@gmail.com: remove duplicate call for set_page_links] Link: http://lkml.kernel.org/r/20170616092335.5177-2-richard.weiyang@gmail.com [akpm@linux-foundation.org: remove unused local `i'] Link: http://lkml.kernel.org/r/20170515085827.16474-12-mhocko@kernel.org Signed-off-by: Michal Hocko Signed-off-by: Wei Yang Tested-by: Dan Williams Tested-by: Reza Arbab Acked-by: Heiko Carstens # For s390 bits Acked-by: Vlastimil Babka Cc: Martin Schwidefsky Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Balbir Singh Cc: Daniel Kiper Cc: David Rientjes Cc: Igor Mammedov Cc: Jerome Glisse Cc: Joonsoo Kim Cc: Mel Gorman Cc: Tobias Regnery Cc: Toshi Kani Cc: Vitaly Kuznetsov Cc: Xishi Qiu Cc: Yasuaki Ishimatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/init.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 39e2aeb..80db57d 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -648,18 +648,11 @@ mem_init (void) #ifdef CONFIG_MEMORY_HOTPLUG int arch_add_memory(int nid, u64 start, u64 size, bool for_device) { - pg_data_t *pgdat; - struct zone *zone; unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - pgdat = NODE_DATA(nid); - - zone = pgdat->node_zones + - zone_for_memory(nid, start, size, ZONE_NORMAL, for_device); - ret = __add_pages(nid, zone, start_pfn, nr_pages, !for_device); - + ret = __add_pages(nid, start_pfn, nr_pages, !for_device); if (ret) printk("%s: Problem encountered in __add_pages() as ret=%d\n", __func__, ret); -- cgit v1.1 From 3d79a728f9b2e6ddcce4e02c91c4de1076548a4c Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 6 Jul 2017 15:38:21 -0700 Subject: mm, memory_hotplug: replace for_device by want_memblock in arch_add_memory arch_add_memory gets for_device argument which then controls whether we want to create memblocks for created memory sections. Simplify the logic by telling whether we want memblocks directly rather than going through pointless negation. This also makes the api easier to understand because it is clear what we want rather than nothing telling for_device which can mean anything. This shouldn't introduce any functional change. Link: http://lkml.kernel.org/r/20170515085827.16474-13-mhocko@kernel.org Signed-off-by: Michal Hocko Tested-by: Dan Williams Acked-by: Vlastimil Babka Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Balbir Singh Cc: Daniel Kiper Cc: David Rientjes Cc: Heiko Carstens Cc: Igor Mammedov Cc: Jerome Glisse Cc: Joonsoo Kim Cc: Martin Schwidefsky Cc: Mel Gorman Cc: Reza Arbab Cc: Tobias Regnery Cc: Toshi Kani Cc: Vitaly Kuznetsov Cc: Xishi Qiu Cc: Yasuaki Ishimatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 80db57d..a4e8d6b 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -646,13 +646,13 @@ mem_init (void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, bool for_device) +int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - ret = __add_pages(nid, start_pfn, nr_pages, !for_device); + ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); if (ret) printk("%s: Problem encountered in __add_pages() as ret=%d\n", __func__, ret); -- cgit v1.1 From 7868a2087ec13ec4a5df0c5e00999863be132ba8 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Thu, 6 Jul 2017 15:39:42 -0700 Subject: mm/hugetlb: add size parameter to huge_pte_offset() A poisoned or migrated hugepage is stored as a swap entry in the page tables. On architectures that support hugepages consisting of contiguous page table entries (such as on arm64) this leads to ambiguity in determining the page table entry to return in huge_pte_offset() when a poisoned entry is encountered. Let's remove the ambiguity by adding a size parameter to convey additional information about the requested address. Also fixup the definition/usage of huge_pte_offset() throughout the tree. Link: http://lkml.kernel.org/r/20170522133604.11392-4-punit.agrawal@arm.com Signed-off-by: Punit Agrawal Acked-by: Steve Capper Cc: Catalin Marinas Cc: Will Deacon Cc: Tony Luck Cc: Fenghua Yu Cc: James Hogan (odd fixer:METAG ARCHITECTURE) Cc: Ralf Baechle (supporter:MIPS) Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Yoshinori Sato Cc: Rich Felker Cc: "David S. Miller" Cc: Chris Metcalf Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Alexander Viro Cc: Michal Hocko Cc: Mike Kravetz Cc: Naoya Horiguchi Cc: "Aneesh Kumar K.V" Cc: "Kirill A. Shutemov" Cc: Hillf Danton Cc: Mark Rutland Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/hugetlbpage.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index 85de86d..ae35140 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -44,7 +44,7 @@ huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) } pte_t * -huge_pte_offset (struct mm_struct *mm, unsigned long addr) +huge_pte_offset (struct mm_struct *mm, unsigned long addr, unsigned long sz) { unsigned long taddr = htlbpage_to_page(addr); pgd_t *pgd; @@ -92,7 +92,7 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int writ if (REGION_NUMBER(addr) != RGN_HPAGE) return ERR_PTR(-EINVAL); - ptep = huge_pte_offset(mm, addr); + ptep = huge_pte_offset(mm, addr, HPAGE_SIZE); if (!ptep || pte_none(*ptep)) return NULL; page = pte_page(*ptep); -- cgit v1.1 From e18e340fd171fbb9bd38125c3d0ba5d3fd3ddfb9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 10 Jul 2017 03:32:44 +0900 Subject: ia64: remove redundant generic-y += kvm_para.h from asm/Kbuild "generic-y += kvm_para.h" is doubled in asm/Kbuild and uapi/asm/Kbuild. The one in the former should be simply removed because kvm_para.h is exported. Signed-off-by: Masahiro Yamada --- arch/ia64/include/asm/Kbuild | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index 502a91d..1d7641f 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -1,8 +1,6 @@ - generic-y += clkdev.h generic-y += exec.h generic-y += irq_work.h -generic-y += kvm_para.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += preempt.h -- cgit v1.1 From 203e9e41219b4e7357104e525e91ac609fba2c6c Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 12 Jul 2017 14:33:14 -0700 Subject: kexec: move vmcoreinfo out of the kernel's .bss section As Eric said, "what we need to do is move the variable vmcoreinfo_note out of the kernel's .bss section. And modify the code to regenerate and keep this information in something like the control page. Definitely something like this needs a page all to itself, and ideally far away from any other kernel data structures. I clearly was not watching closely the data someone decided to keep this silly thing in the kernel's .bss section." This patch allocates extra pages for these vmcoreinfo_XXX variables, one advantage is that it enhances some safety of vmcoreinfo, because vmcoreinfo now is kept far away from other kernel data structures. Link: http://lkml.kernel.org/r/1493281021-20737-1-git-send-email-xlpang@redhat.com Signed-off-by: Xunlei Pang Tested-by: Michael Holzheu Reviewed-by: Juergen Gross Suggested-by: Eric Biederman Cc: Benjamin Herrenschmidt Cc: Dave Young Cc: Hari Bathini Cc: Mahesh Salgaonkar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/machine_kexec.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c index 599507b..c14815d 100644 --- a/arch/ia64/kernel/machine_kexec.c +++ b/arch/ia64/kernel/machine_kexec.c @@ -163,8 +163,3 @@ void arch_crash_save_vmcoreinfo(void) #endif } -phys_addr_t paddr_vmcoreinfo_note(void) -{ - return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note); -} - -- cgit v1.1 From d778931d7b32ada39de0a45267b0c4af0600e277 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 12 Jul 2017 14:37:16 -0700 Subject: ia64: move inline before return type Make the use of inline like the rest of the kernel. Link: http://lkml.kernel.org/r/d47074493af80ce12590340294bc49618165c30d.1499284835.git.joe@perches.com Signed-off-by: Joe Perches Cc: Tony Luck Cc: Fenghua Yu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/mca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/ia64') diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 79c7c46..555b111 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -334,7 +334,7 @@ static void ia64_mlogbuf_dump_from_init(void) ia64_mlogbuf_dump(); } -static void inline +static inline void ia64_mca_spin(const char *func) { if (monarch_cpu == smp_processor_id()) -- cgit v1.1 From c02f2a911f2bb833f982d869e3a4ae67e1468969 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 12 Jul 2017 14:37:19 -0700 Subject: ia64: sn: pci: move inline before type Make the use of inline like the rest of the kernel. Link: http://lkml.kernel.org/r/f42b2202bd0d4e7ccf79ce5348bb255a035e67bb.1499284835.git.joe@perches.com Signed-off-by: Joe Perches Cc: Tony Luck Cc: Fenghua Yu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/sn/pci/pcibr/pcibr_ate.c | 2 +- arch/ia64/sn/pci/tioce_provider.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/ia64') diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c index 5bc34ea..b67bb4c 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_ate.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c @@ -140,7 +140,7 @@ static inline u64 __iomem *pcibr_ate_addr(struct pcibus_info *pcibus_info, /* * Update the ate. */ -void inline +inline void ate_write(struct pcibus_info *pcibus_info, int ate_index, int count, volatile u64 ate) { diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c index 46d3df4..3bd9abc 100644 --- a/arch/ia64/sn/pci/tioce_provider.c +++ b/arch/ia64/sn/pci/tioce_provider.c @@ -52,7 +52,7 @@ * All registers defined in struct tioce will meet that criteria. */ -static void inline +static inline void tioce_mmr_war_pre(struct tioce_kernel *kern, void __iomem *mmr_addr) { u64 mmr_base; @@ -78,7 +78,7 @@ tioce_mmr_war_pre(struct tioce_kernel *kern, void __iomem *mmr_addr) } } -static void inline +static inline void tioce_mmr_war_post(struct tioce_kernel *kern, void __iomem *mmr_addr) { u64 mmr_base; -- cgit v1.1