diff options
Diffstat (limited to 'sys')
84 files changed, 2324 insertions, 335 deletions
diff --git a/sys/amd64/conf/NOTES b/sys/amd64/conf/NOTES index 8022c22..33d2542 100644 --- a/sys/amd64/conf/NOTES +++ b/sys/amd64/conf/NOTES @@ -599,6 +599,11 @@ device cpuctl options ENABLE_ALART # Control alarm on Intel intpm driver # +# AMD System Management Network (SMN) +# +device amdsmn + +# # Number of initial kernel page table pages used for early bootstrap. # This number should include enough pages to map the kernel and any # modules or other data loaded with the kernel by the loader. Each diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c index 9beafbc..1f35e84 100644 --- a/sys/amd64/vmm/amd/svm.c +++ b/sys/amd64/vmm/amd/svm.c @@ -964,6 +964,7 @@ svm_save_intinfo(struct svm_softc *svm_sc, int vcpu) vm_exit_intinfo(svm_sc->vm, vcpu, intinfo); } +#ifdef INVARIANTS static __inline int vintr_intercept_enabled(struct svm_softc *sc, int vcpu) { @@ -971,6 +972,7 @@ vintr_intercept_enabled(struct svm_softc *sc, int vcpu) return (svm_get_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_VINTR)); } +#endif static __inline void enable_intr_window_exiting(struct svm_softc *sc, int vcpu) diff --git a/sys/arm/allwinner/if_awg.c b/sys/arm/allwinner/if_awg.c index cded892..6170adb 100644 --- a/sys/arm/allwinner/if_awg.c +++ b/sys/arm/allwinner/if_awg.c @@ -87,7 +87,7 @@ __FBSDID("$FreeBSD$"); #define TX_SKIP(n, o) (((n) + (o)) & (TX_DESC_COUNT - 1)) #define RX_NEXT(n) (((n) + 1) & (RX_DESC_COUNT - 1)) -#define TX_MAX_SEGS 10 +#define TX_MAX_SEGS 20 #define SOFT_RST_RETRY 1000 #define MII_BUSY_RETRY 1000 @@ -148,6 +148,7 @@ struct awg_softc { struct resource *res[2]; struct mtx mtx; if_t ifp; + device_t dev; device_t miibus; struct callout stat_ch; struct task link_task; @@ -375,14 +376,18 @@ awg_setup_txbuf(struct awg_softc *sc, int index, struct mbuf **mp) sc->tx.buf_map[index].map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { m = m_collapse(m, M_NOWAIT, TX_MAX_SEGS); - if (m == NULL) + if (m == NULL) { + device_printf(sc->dev, "awg_setup_txbuf: m_collapse failed\n"); return (0); + } *mp = m; error = bus_dmamap_load_mbuf_sg(sc->tx.buf_tag, sc->tx.buf_map[index].map, m, segs, &nsegs, BUS_DMA_NOWAIT); } - if (error != 0) + if (error != 0) { + device_printf(sc->dev, "awg_setup_txbuf: bus_dmamap_load_mbuf_sg failed\n"); return (0); + } bus_dmamap_sync(sc->tx.buf_tag, sc->tx.buf_map[index].map, BUS_DMASYNC_PREWRITE); @@ -1324,6 +1329,7 @@ awg_attach(device_t dev) int error; sc = device_get_softc(dev); + sc->dev = dev; node = ofw_bus_get_node(dev); if (bus_alloc_resources(dev, awg_spec, sc->res) != 0) { diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c index e01b89f..0a7239a 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c @@ -4237,6 +4237,7 @@ arc_available_memory(void) free_memory_reason_t r = FMR_UNKNOWN; #ifdef _KERNEL +#ifdef __FreeBSD__ /* * Cooperate with pagedaemon when it's time for it to scan * and reclaim some pages. @@ -4247,7 +4248,15 @@ arc_available_memory(void) r = FMR_LOTSFREE; } -#ifdef illumos +#else + if (needfree > 0) { + n = PAGESIZE * (-needfree); + if (n < lowest) { + lowest = n; + r = FMR_NEEDFREE; + } + } + /* * check that we're out of range of the pageout scanner. It starts to * schedule paging if freemem is less than lotsfree and needfree. @@ -4290,7 +4299,7 @@ arc_available_memory(void) r = FMR_PAGES_PP_MAXIMUM; } -#endif /* illumos */ +#endif /* __FreeBSD__ */ #if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC) /* * If we're on an i386 platform, it's possible that we'll exhaust the @@ -4501,6 +4510,11 @@ arc_reclaim_thread(void *dummy __unused) int64_t to_free = (arc_c >> arc_shrink_shift) - free_memory; if (to_free > 0) { +#ifdef _KERNEL +#ifdef illumos + to_free = MAX(to_free, ptob(needfree)); +#endif +#endif arc_shrink(to_free); } } else if (free_memory < arc_c >> arc_no_grow_shift) { diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c index 8731af4..3ec78c3 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c @@ -59,6 +59,12 @@ static dnode_phys_t dnode_phys_zero; int zfs_default_bs = SPA_MINBLOCKSHIFT; int zfs_default_ibs = DN_MAX_INDBLKSHIFT; +SYSCTL_DECL(_vfs_zfs); +SYSCTL_INT(_vfs_zfs, OID_AUTO, default_bs, CTLFLAG_RWTUN, + &zfs_default_bs, 0, "Default dnode block shift"); +SYSCTL_INT(_vfs_zfs, OID_AUTO, default_ibs, CTLFLAG_RWTUN, + &zfs_default_ibs, 0, "Default dnode indirect block shift"); + #ifdef illumos static kmem_cbrc_t dnode_move(void *, void *, size_t, void *); #endif diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c index 9395f3c..e9b46da 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c @@ -2398,7 +2398,7 @@ vdev_raidz_io_done(zio_t *zio) */ if (parity_errors + parity_untried < rm->rm_firstdatacol || - (zio->io_flags & ZIO_FLAG_RESILVER)) { + (zio->io_flags & (ZIO_FLAG_RESILVER | ZIO_FLAG_SCRUB))) { n = raidz_parity_verify(zio, rm); unexpected_errors += n; ASSERT(parity_errors + n <= @@ -2450,7 +2450,7 @@ vdev_raidz_io_done(zio_t *zio) * out to failed devices later. */ if (parity_errors < rm->rm_firstdatacol - n || - (zio->io_flags & ZIO_FLAG_RESILVER)) { + (zio->io_flags & (ZIO_FLAG_RESILVER | ZIO_FLAG_SCRUB))) { n = raidz_parity_verify(zio, rm); unexpected_errors += n; ASSERT(parity_errors + n <= @@ -2552,7 +2552,8 @@ done: zio_checksum_verified(zio); if (zio->io_error == 0 && spa_writeable(zio->io_spa) && - (unexpected_errors || (zio->io_flags & ZIO_FLAG_RESILVER))) { + (unexpected_errors || + (zio->io_flags & (ZIO_FLAG_RESILVER | ZIO_FLAG_SCRUB)))) { /* * Use the good data we have in hand to repair damaged children. */ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c index 4f92cfb..b8def48 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c @@ -886,13 +886,6 @@ zfsctl_snapdir_lookup(ap) break; /* - * The vnode must be referenced at least by this thread and - * the mount point or the thread doing the mounting. - * There can be more references from concurrent lookups. - */ - KASSERT(vrefcnt(*vpp) > 1, ("found unreferenced mountpoint")); - - /* * Check if a snapshot is already mounted on top of the vnode. */ err = zfsctl_mounted_here(vpp, lkflags); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c index c54cc1c..6452543 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c @@ -203,7 +203,6 @@ #include "lua.h" #include "lauxlib.h" -CTASSERT(sizeof(zfs_cmd_t) < IOCPARM_MAX); static struct cdev *zfsdev; extern void zfs_init(void); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index f8c7c47..29e89b9 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -4515,21 +4515,6 @@ zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, } static int -ioflags(int ioflags) -{ - int flags = 0; - - if (ioflags & IO_APPEND) - flags |= FAPPEND; - if (ioflags & IO_NDELAY) - flags |= FNONBLOCK; - if (ioflags & IO_SYNC) - flags |= (FSYNC | FDSYNC | FRSYNC); - - return (flags); -} - -static int zfs_getpages(struct vnode *vp, vm_page_t *m, int count, int *rbehind, int *rahead) { @@ -4716,7 +4701,6 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, } if (zp->z_blksz < PAGE_SIZE) { - i = 0; for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) { tocopy = len > PAGE_SIZE ? PAGE_SIZE : len; va = zfs_map_page(ma[i], &sf); @@ -4852,6 +4836,21 @@ zfs_freebsd_ioctl(ap) } static int +ioflags(int ioflags) +{ + int flags = 0; + + if (ioflags & IO_APPEND) + flags |= FAPPEND; + if (ioflags & IO_NDELAY) + flags |= FNONBLOCK; + if (ioflags & IO_SYNC) + flags |= (FSYNC | FDSYNC | FRSYNC); + + return (flags); +} + +static int zfs_freebsd_read(ap) struct vop_read_args /* { struct vnode *a_vp; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c index fac10c3..58c3807 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c @@ -1250,6 +1250,16 @@ zfs_rezget(znode_t *zp) int count = 0; uint64_t gen; + /* + * Remove cached pages before reloading the znode, so that they are not + * lingering after we run into any error. Ideally, we should vgone() + * the vnode in case of error, but currently we cannot do that + * because of the LOR between the vnode lock and z_teardown_lock. + * So, instead, we have to "doom" the znode in the illumos style. + */ + vp = ZTOV(zp); + vn_pages_remove(vp, 0, 0); + ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); mutex_enter(&zp->z_acl_lock); @@ -1329,18 +1339,12 @@ zfs_rezget(znode_t *zp) * (e.g. via a look-up). The old vnode and znode will be * recycled when the last vnode reference is dropped. */ - vp = ZTOV(zp); if (vp->v_type != IFTOVT((mode_t)zp->z_mode)) { zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); - return (EIO); + return (SET_ERROR(EIO)); } - zp->z_blksz = doi.doi_data_block_size; - vn_pages_remove(vp, 0, 0); - if (zp->z_size != size) - vnode_pager_setsize(vp, zp->z_size); - /* * If the file has zero links, then it has been unlinked on the send * side and it must be in the received unlinked set. @@ -1351,8 +1355,15 @@ zfs_rezget(znode_t *zp) * when the unlinked set gets processed. */ zp->z_unlinked = (zp->z_links == 0); - if (zp->z_unlinked) + if (zp->z_unlinked) { zfs_znode_dmu_fini(zp); + ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + return (0); + } + + zp->z_blksz = doi.doi_data_block_size; + if (zp->z_size != size) + vnode_pager_setsize(vp, zp->z_size); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); diff --git a/sys/compat/linux/linux_mmap.c b/sys/compat/linux/linux_mmap.c index 11da2ebf..94dec5a 100644 --- a/sys/compat/linux/linux_mmap.c +++ b/sys/compat/linux/linux_mmap.c @@ -129,7 +129,7 @@ linux_mmap_common(struct thread *td, uintptr_t addr, size_t len, int prot, error = fget(td, fd, cap_rights_init(&rights, CAP_MMAP), &fp); if (error != 0) return (error); - if (fp->f_type != DTYPE_VNODE) { + if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_DEV) { fdrop(fp, td); return (EINVAL); } diff --git a/sys/compat/linuxkpi/common/include/asm/atomic.h b/sys/compat/linuxkpi/common/include/asm/atomic.h index 7f25319..4b2610c 100644 --- a/sys/compat/linuxkpi/common/include/asm/atomic.h +++ b/sys/compat/linuxkpi/common/include/asm/atomic.h @@ -159,46 +159,80 @@ atomic_cmpxchg(atomic_t *v, int old, int new) return (ret); } -#define cmpxchg(ptr, old, new) ({ \ - __typeof(*(ptr)) __ret; \ - \ - CTASSERT(sizeof(__ret) == 1 || sizeof(__ret) == 2 || \ - sizeof(__ret) == 4 || sizeof(__ret) == 8); \ - \ - __ret = (old); \ - switch (sizeof(__ret)) { \ - case 1: \ - while (!atomic_fcmpset_8((volatile int8_t *)(ptr), \ - (int8_t *)&__ret, (new)) && __ret == (old)) \ - ; \ - break; \ - case 2: \ - while (!atomic_fcmpset_16((volatile int16_t *)(ptr), \ - (int16_t *)&__ret, (new)) && __ret == (old)) \ - ; \ - break; \ - case 4: \ - while (!atomic_fcmpset_32((volatile int32_t *)(ptr), \ - (int32_t *)&__ret, (new)) && __ret == (old)) \ - ; \ - break; \ - case 8: \ - while (!atomic_fcmpset_64((volatile int64_t *)(ptr), \ - (int64_t *)&__ret, (new)) && __ret == (old)) \ - ; \ - break; \ - } \ - __ret; \ +#define cmpxchg(ptr, old, new) ({ \ + union { \ + __typeof(*(ptr)) val; \ + u8 u8[0]; \ + u16 u16[0]; \ + u32 u32[0]; \ + u64 u64[0]; \ + } __ret = { .val = (old) }, __new = { .val = (new) }; \ + \ + CTASSERT(sizeof(__ret.val) == 1 || sizeof(__ret.val) == 2 || \ + sizeof(__ret.val) == 4 || sizeof(__ret.val) == 8); \ + \ + switch (sizeof(__ret.val)) { \ + case 1: \ + while (!atomic_fcmpset_8((volatile u8 *)(ptr), \ + __ret.u8, __new.u8[0]) && __ret.val == (old)) \ + ; \ + break; \ + case 2: \ + while (!atomic_fcmpset_16((volatile u16 *)(ptr), \ + __ret.u16, __new.u16[0]) && __ret.val == (old)) \ + ; \ + break; \ + case 4: \ + while (!atomic_fcmpset_32((volatile u32 *)(ptr), \ + __ret.u32, __new.u32[0]) && __ret.val == (old)) \ + ; \ + break; \ + case 8: \ + while (!atomic_fcmpset_64((volatile u64 *)(ptr), \ + __ret.u64, __new.u64[0]) && __ret.val == (old)) \ + ; \ + break; \ + } \ + __ret.val; \ }) #define cmpxchg_relaxed(...) cmpxchg(__VA_ARGS__) -#define xchg(ptr, v) ({ \ - __typeof(*(ptr)) __ret; \ - \ - __ret = *(ptr); \ - *(ptr) = v; \ - __ret; \ +#define xchg(ptr, new) ({ \ + union { \ + __typeof(*(ptr)) val; \ + u8 u8[0]; \ + u16 u16[0]; \ + u32 u32[0]; \ + u64 u64[0]; \ + } __ret, __new = { .val = (new) }; \ + \ + CTASSERT(sizeof(__ret.val) == 1 || sizeof(__ret.val) == 2 || \ + sizeof(__ret.val) == 4 || sizeof(__ret.val) == 8); \ + \ + switch (sizeof(__ret.val)) { \ + case 1: \ + __ret.val = READ_ONCE(*ptr); \ + while (!atomic_fcmpset_8((volatile u8 *)(ptr), \ + __ret.u8, __new.u8[0])) \ + ; \ + break; \ + case 2: \ + __ret.val = READ_ONCE(*ptr); \ + while (!atomic_fcmpset_16((volatile u16 *)(ptr), \ + __ret.u16, __new.u16[0])) \ + ; \ + break; \ + case 4: \ + __ret.u32[0] = atomic_swap_32((volatile u32 *)(ptr), \ + __new.u32[0]); \ + break; \ + case 8: \ + __ret.u64[0] = atomic_swap_64((volatile u64 *)(ptr), \ + __new.u64[0]); \ + break; \ + } \ + __ret.val; \ }) #define LINUX_ATOMIC_OP(op, c_op) \ diff --git a/sys/compat/linuxkpi/common/include/linux/compiler.h b/sys/compat/linuxkpi/common/include/linux/compiler.h index ae60553..2b9ae10 100644 --- a/sys/compat/linuxkpi/common/include/linux/compiler.h +++ b/sys/compat/linuxkpi/common/include/linux/compiler.h @@ -56,6 +56,8 @@ #define __devexit #define __exit #define __rcu +#define __percpu +#define __weak __weak_symbol #define __malloc #define ___stringify(...) #__VA_ARGS__ #define __stringify(...) ___stringify(__VA_ARGS__) diff --git a/sys/compat/linuxkpi/common/include/linux/dcache.h b/sys/compat/linuxkpi/common/include/linux/dcache.h new file mode 100644 index 0000000..1bafa3d --- /dev/null +++ b/sys/compat/linuxkpi/common/include/linux/dcache.h @@ -0,0 +1,46 @@ +/*- + * Copyright (c) 2017 Limelight Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef __LINUX_DCACHE_H +#define __LINUX_DCACHE_H + +struct vnode; +struct pfs_node; + +struct dentry { + struct vnode *d_inode; + struct pfs_node *d_pfs_node; /* FreeBSD specific field */ +}; + +static inline struct vnode * +d_inode(const struct dentry *dentry) +{ + return (dentry->d_inode); +} + +#endif /* __LINUX_DCACHE_H */ diff --git a/sys/compat/linuxkpi/common/include/linux/device.h b/sys/compat/linuxkpi/common/include/linux/device.h index edc6cd8..7062478 100644 --- a/sys/compat/linuxkpi/common/include/linux/device.h +++ b/sys/compat/linuxkpi/common/include/linux/device.h @@ -46,9 +46,6 @@ #include <sys/bus.h> -enum irqreturn { IRQ_NONE = 0, IRQ_HANDLED, IRQ_WAKE_THREAD, }; -typedef enum irqreturn irqreturn_t; - struct device; struct fwnode_handle; diff --git a/sys/compat/linuxkpi/common/include/linux/fs.h b/sys/compat/linuxkpi/common/include/linux/fs.h index 04fc78d..e05debf 100644 --- a/sys/compat/linuxkpi/common/include/linux/fs.h +++ b/sys/compat/linuxkpi/common/include/linux/fs.h @@ -42,6 +42,7 @@ #include <linux/wait.h> #include <linux/semaphore.h> #include <linux/spinlock.h> +#include <linux/dcache.h> struct module; struct kiocb; @@ -65,11 +66,6 @@ struct pfs_node; typedef struct files_struct *fl_owner_t; -struct dentry { - struct inode *d_inode; - struct pfs_node *d_pfs_node; -}; - struct file_operations; struct linux_file_wait_queue { @@ -288,6 +284,20 @@ noop_llseek(struct linux_file *file, loff_t offset, int whence) return (file->_file->f_offset); } +static inline struct vnode * +file_inode(const struct linux_file *file) +{ + + return (file->f_vnode); +} + +static inline int +call_mmap(struct linux_file *file, struct vm_area_struct *vma) +{ + + return (file->f_op->mmap(file, vma)); +} + /* Shared memory support */ unsigned long linux_invalidate_mapping_pages(vm_object_t, pgoff_t, pgoff_t); struct page *linux_shmem_read_mapping_page_gfp(vm_object_t, int, gfp_t); diff --git a/sys/compat/linuxkpi/common/include/linux/gfp.h b/sys/compat/linuxkpi/common/include/linux/gfp.h index 3d9ddba..2eea6d2 100644 --- a/sys/compat/linuxkpi/common/include/linux/gfp.h +++ b/sys/compat/linuxkpi/common/include/linux/gfp.h @@ -54,6 +54,8 @@ #define __GFP_NO_KSWAPD 0 #define __GFP_WAIT M_WAITOK #define __GFP_DMA32 (1U << 24) /* LinuxKPI only */ +#define __GFP_BITS_SHIFT 25 +#define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1) #define GFP_NOWAIT M_NOWAIT #define GFP_ATOMIC (M_NOWAIT | M_USE_RESERVE) @@ -67,6 +69,9 @@ #define GFP_TEMPORARY M_NOWAIT #define GFP_NATIVE_MASK (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_ZERO) +CTASSERT((__GFP_DMA32 & GFP_NATIVE_MASK) == 0); +CTASSERT((__GFP_BITS_MASK & GFP_NATIVE_MASK) == GFP_NATIVE_MASK); + /* * Resolve a page into a virtual address: * diff --git a/sys/compat/linuxkpi/common/include/linux/interrupt.h b/sys/compat/linuxkpi/common/include/linux/interrupt.h index 6f7b96d..9e78b8a 100644 --- a/sys/compat/linuxkpi/common/include/linux/interrupt.h +++ b/sys/compat/linuxkpi/common/include/linux/interrupt.h @@ -33,14 +33,13 @@ #include <linux/device.h> #include <linux/pci.h> +#include <linux/irqreturn.h> #include <sys/bus.h> #include <sys/rman.h> typedef irqreturn_t (*irq_handler_t)(int, void *); -#define IRQ_RETVAL(x) ((x) != IRQ_NONE) - #define IRQF_SHARED RF_SHAREABLE struct irq_ent { @@ -112,6 +111,39 @@ request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, } static inline int +enable_irq(unsigned int irq) +{ + struct irq_ent *irqe; + struct device *dev; + + dev = linux_pci_find_irq_dev(irq); + if (dev == NULL) + return -EINVAL; + irqe = linux_irq_ent(dev, irq); + if (irqe == NULL || irqe->tag != NULL) + return -EINVAL; + return -bus_setup_intr(dev->bsddev, irqe->res, INTR_TYPE_NET | INTR_MPSAFE, + NULL, linux_irq_handler, irqe, &irqe->tag); +} + +static inline void +disable_irq(unsigned int irq) +{ + struct irq_ent *irqe; + struct device *dev; + + dev = linux_pci_find_irq_dev(irq); + if (dev == NULL) + return; + irqe = linux_irq_ent(dev, irq); + if (irqe == NULL) + return; + if (irqe->tag != NULL) + bus_teardown_intr(dev->bsddev, irqe->res, irqe->tag); + irqe->tag = NULL; +} + +static inline int bind_irq_to_cpu(unsigned int irq, int cpu_id) { struct irq_ent *irqe; @@ -142,7 +174,8 @@ free_irq(unsigned int irq, void *device) irqe = linux_irq_ent(dev, irq); if (irqe == NULL) return; - bus_teardown_intr(dev->bsddev, irqe->res, irqe->tag); + if (irqe->tag != NULL) + bus_teardown_intr(dev->bsddev, irqe->res, irqe->tag); bus_release_resource(dev->bsddev, SYS_RES_IRQ, rid, irqe->res); list_del(&irqe->links); kfree(irqe); @@ -168,5 +201,7 @@ extern void tasklet_schedule(struct tasklet_struct *); extern void tasklet_kill(struct tasklet_struct *); extern void tasklet_init(struct tasklet_struct *, tasklet_func_t *, unsigned long data); +extern void tasklet_enable(struct tasklet_struct *); +extern void tasklet_disable(struct tasklet_struct *); #endif /* _LINUX_INTERRUPT_H_ */ diff --git a/sys/compat/linuxkpi/common/include/linux/irqreturn.h b/sys/compat/linuxkpi/common/include/linux/irqreturn.h new file mode 100644 index 0000000..780fcca --- /dev/null +++ b/sys/compat/linuxkpi/common/include/linux/irqreturn.h @@ -0,0 +1,40 @@ +/*- + * Copyright (c) 2017 Limelight Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _LINUX_IRQRETURN_H +#define _LINUX_IRQRETURN_H + +typedef enum irqreturn { + IRQ_NONE = 0, + IRQ_HANDLED = 1, + IRQ_WAKE_THREAD = 2 +} irqreturn_t; + +#define IRQ_RETVAL(x) ((x) ? IRQ_HANDLED : IRQ_NONE) + +#endif /* _LINUX_IRQRETURN_H */ diff --git a/sys/compat/linuxkpi/common/include/linux/kernel.h b/sys/compat/linuxkpi/common/include/linux/kernel.h index c264132..5053330 100644 --- a/sys/compat/linuxkpi/common/include/linux/kernel.h +++ b/sys/compat/linuxkpi/common/include/linux/kernel.h @@ -89,6 +89,7 @@ #define BUILD_BUG_ON(x) CTASSERT(!(x)) #define BUILD_BUG_ON_MSG(x, msg) BUILD_BUG_ON(x) #define BUILD_BUG_ON_NOT_POWER_OF_2(x) BUILD_BUG_ON(!powerof2(x)) +#define BUILD_BUG_ON_INVALID(expr) while (0) { (void)(expr); } #define BUG() panic("BUG at %s:%d", __FILE__, __LINE__) #define BUG_ON(cond) do { \ diff --git a/sys/compat/linuxkpi/common/include/linux/kref.h b/sys/compat/linuxkpi/common/include/linux/kref.h index 80fd271..7411694 100644 --- a/sys/compat/linuxkpi/common/include/linux/kref.h +++ b/sys/compat/linuxkpi/common/include/linux/kref.h @@ -52,6 +52,13 @@ kref_init(struct kref *kref) refcount_init(&kref->refcount.counter, 1); } +static inline unsigned int +kref_read(const struct kref *kref) +{ + + return (atomic_read(&kref->refcount)); +} + static inline void kref_get(struct kref *kref) { diff --git a/sys/compat/linuxkpi/common/include/linux/ktime.h b/sys/compat/linuxkpi/common/include/linux/ktime.h index f5f11e4..1c6df95 100644 --- a/sys/compat/linuxkpi/common/include/linux/ktime.h +++ b/sys/compat/linuxkpi/common/include/linux/ktime.h @@ -1,5 +1,6 @@ /*- - * Copyright (c) 2014-2015 Mellanox Technologies, Ltd. + * Copyright (c) 2018 Limelight Networks, Inc. + * Copyright (c) 2014-2018 Mellanox Technologies, Ltd. * Copyright (c) 2015 François Tigeot * All rights reserved. * @@ -34,104 +35,96 @@ #include <linux/time.h> #include <linux/jiffies.h> -#define ktime_get_ts(x) getnanouptime(x) +#define ktime_get_ts(x) getnanouptime(x) /* time values in nanoseconds */ -union ktime { - int64_t tv64; -}; +typedef s64 ktime_t; -typedef union ktime ktime_t; - -#define KTIME_MAX ((s64)~((u64)1 << 63)) -#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) +#define KTIME_MAX ((s64)~((u64)1 << 63)) +#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) static inline int64_t ktime_to_ns(ktime_t kt) { - return kt.tv64; + return (kt); } static inline ktime_t ns_to_ktime(uint64_t nsec) { - ktime_t kt; - - kt.tv64 = nsec; - return (kt); + return (nsec); } static inline int64_t ktime_divns(const ktime_t kt, int64_t div) { - return kt.tv64 / div; + return (kt / div); } static inline int64_t ktime_to_us(ktime_t kt) { - return ktime_divns(kt, NSEC_PER_USEC); + return (ktime_divns(kt, NSEC_PER_USEC)); } static inline int64_t ktime_to_ms(ktime_t kt) { - return ktime_divns(kt, NSEC_PER_MSEC); + return (ktime_divns(kt, NSEC_PER_MSEC)); } static inline struct timeval ktime_to_timeval(ktime_t kt) { - return ns_to_timeval(kt.tv64); + return (ns_to_timeval(kt)); } static inline ktime_t ktime_add_ns(ktime_t kt, int64_t ns) { - kt.tv64 += ns; - return kt; + return (kt + ns); } static inline ktime_t ktime_sub_ns(ktime_t kt, int64_t ns) { - kt.tv64 -= ns; - return kt; + return (kt - ns); } static inline ktime_t ktime_set(const long secs, const unsigned long nsecs) { - ktime_t retval = { (s64)secs * NSEC_PER_SEC + (s64)nsecs }; + ktime_t retval = {(s64) secs * NSEC_PER_SEC + (s64) nsecs}; + return (retval); } static inline ktime_t ktime_sub(ktime_t lhs, ktime_t rhs) { - lhs.tv64 -= rhs.tv64; - return (lhs); + return (lhs - rhs); } static inline int64_t ktime_us_delta(ktime_t later, ktime_t earlier) { - ktime_t diff = ktime_sub(later, earlier); - return ktime_to_us(diff); + ktime_t diff = ktime_sub(later, earlier); + + return (ktime_to_us(diff)); } static inline int64_t ktime_ms_delta(ktime_t later, ktime_t earlier) { - ktime_t diff = ktime_sub(later, earlier); - return ktime_to_ms(diff); + ktime_t diff = ktime_sub(later, earlier); + + return (ktime_to_ms(diff)); } static inline ktime_t ktime_add(ktime_t lhs, ktime_t rhs) { - lhs.tv64 += rhs.tv64; - return (lhs); + return (lhs + rhs); } static inline ktime_t @@ -146,22 +139,19 @@ timeval_to_ktime(struct timeval tv) return (ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC)); } -#define ktime_to_timespec(kt) ns_to_timespec((kt).tv64) -#define ktime_to_timeval(kt) ns_to_timeval((kt).tv64) -#define ktime_to_ns(kt) ((kt).tv64) +#define ktime_to_timespec(kt) ns_to_timespec(kt) +#define ktime_to_timeval(kt) ns_to_timeval(kt) +#define ktime_to_ns(kt) (kt) static inline int64_t ktime_get_ns(void) { struct timespec ts; - ktime_t kt; ktime_get_ts(&ts); - kt = timespec_to_ktime(ts); - return (ktime_to_ns(kt)); -} -#define ktime_get_raw_ns() ktime_get_ns() + return (ktime_to_ns(timespec_to_ktime(ts))); +} static inline ktime_t ktime_get(void) @@ -190,4 +180,22 @@ ktime_get_real(void) return (timespec_to_ktime(ts)); } +static inline ktime_t +ktime_get_real_seconds(void) +{ + struct timespec ts; + + nanotime(&ts); + return (ts.tv_sec); +} + +static inline u64 +ktime_get_raw_ns(void) +{ + struct timespec ts; + + nanouptime(&ts); + return (ktime_to_ns(timespec_to_ktime(ts))); +} + #endif /* _LINUX_KTIME_H */ diff --git a/sys/compat/linuxkpi/common/include/linux/list.h b/sys/compat/linuxkpi/common/include/linux/list.h index c235c26..826a8cf 100644 --- a/sys/compat/linuxkpi/common/include/linux/list.h +++ b/sys/compat/linuxkpi/common/include/linux/list.h @@ -117,6 +117,13 @@ __list_del(struct list_head *prev, struct list_head *next) } static inline void +__list_del_entry(struct list_head *entry) +{ + + __list_del(entry->prev, entry->next); +} + +static inline void list_del(struct list_head *entry) { @@ -172,6 +179,9 @@ list_del_init(struct list_head *entry) #define list_next_entry(ptr, member) \ list_entry(((ptr)->member.next), typeof(*(ptr)), member) +#define list_safe_reset_next(ptr, n, member) \ + (n) = list_next_entry(ptr, member) + #define list_prev_entry(ptr, member) \ list_entry(((ptr)->member.prev), typeof(*(ptr)), member) diff --git a/sys/compat/linuxkpi/common/include/linux/lockdep.h b/sys/compat/linuxkpi/common/include/linux/lockdep.h index 4bf902d..6b9f71d 100644 --- a/sys/compat/linuxkpi/common/include/linux/lockdep.h +++ b/sys/compat/linuxkpi/common/include/linux/lockdep.h @@ -48,5 +48,10 @@ struct lock_class_key { #define lockdep_is_held(m) (sx_xholder(&(m)->sx) == curthread) #define might_lock(m) do { } while (0) +#define might_lock_read(m) do { } while (0) + +#define lock_acquire(...) do { } while (0) +#define lock_release(...) do { } while (0) +#define lock_acquire_shared_recursive(...) do { } while (0) #endif /* _LINUX_LOCKDEP_H_ */ diff --git a/sys/compat/linuxkpi/common/include/linux/mm.h b/sys/compat/linuxkpi/common/include/linux/mm.h index a649c8c..ad90382 100644 --- a/sys/compat/linuxkpi/common/include/linux/mm.h +++ b/sys/compat/linuxkpi/common/include/linux/mm.h @@ -118,8 +118,13 @@ struct vm_area_struct { struct vm_fault { unsigned int flags; pgoff_t pgoff; - void *virtual_address; /* user-space address */ + union { + /* user-space address */ + void *virtual_address; + unsigned long address; + }; struct page *page; + struct vm_area_struct *vma; }; struct vm_operations_struct { @@ -243,7 +248,8 @@ static inline void put_page(struct vm_page *page) { vm_page_lock(page); - vm_page_unwire(page, PQ_ACTIVE); + if (vm_page_unwire(page, PQ_ACTIVE) && page->object == NULL) + vm_page_free(page); vm_page_unlock(page); } diff --git a/sys/compat/linuxkpi/common/include/linux/mm_types.h b/sys/compat/linuxkpi/common/include/linux/mm_types.h index 44aad34..81eb278 100644 --- a/sys/compat/linuxkpi/common/include/linux/mm_types.h +++ b/sys/compat/linuxkpi/common/include/linux/mm_types.h @@ -62,6 +62,12 @@ mmput(struct mm_struct *mm) mmdrop(mm); } +static inline void +mmgrab(struct mm_struct *mm) +{ + atomic_inc(&mm->mm_count); +} + extern struct mm_struct *linux_get_task_mm(struct task_struct *); #define get_task_mm(task) linux_get_task_mm(task) diff --git a/sys/compat/linuxkpi/common/include/linux/mutex.h b/sys/compat/linuxkpi/common/include/linux/mutex.h index 36911b1..bbf6023 100644 --- a/sys/compat/linuxkpi/common/include/linux/mutex.h +++ b/sys/compat/linuxkpi/common/include/linux/mutex.h @@ -63,7 +63,7 @@ typedef struct mutex { #define mutex_lock_interruptible(_m) ({ \ MUTEX_SKIP() ? 0 : \ - (sx_xlock_sig(&(_m)->sx) ? -EINTR : 0); \ + linux_mutex_lock_interruptible(_m); \ }) #define mutex_unlock(_m) do { \ @@ -77,6 +77,21 @@ typedef struct mutex { !!sx_try_xlock(&(_m)->sx); \ }) +enum mutex_trylock_recursive_enum { + MUTEX_TRYLOCK_FAILED = 0, + MUTEX_TRYLOCK_SUCCESS = 1, + MUTEX_TRYLOCK_RECURSIVE = 2, +}; + +static inline __must_check enum mutex_trylock_recursive_enum +mutex_trylock_recursive(struct mutex *lock) +{ + if (unlikely(sx_xholder(&lock->sx) == curthread)) + return (MUTEX_TRYLOCK_RECURSIVE); + + return (mutex_trylock(lock)); +} + #define mutex_init(_m) \ linux_mutex_init(_m, mutex_name(#_m), SX_NOWITNESS) @@ -128,4 +143,6 @@ linux_mutex_destroy(mutex_t *m) sx_destroy(&m->sx); } +extern int linux_mutex_lock_interruptible(mutex_t *m); + #endif /* _LINUX_MUTEX_H_ */ diff --git a/sys/compat/linuxkpi/common/include/linux/pid.h b/sys/compat/linuxkpi/common/include/linux/pid.h index 2c7e0ea..73d8f1f 100644 --- a/sys/compat/linuxkpi/common/include/linux/pid.h +++ b/sys/compat/linuxkpi/common/include/linux/pid.h @@ -58,6 +58,11 @@ enum pid_type { __ts; \ }) +#define get_task_pid(task, type) ({ \ + CTASSERT((type) == PIDTYPE_PID); \ + (task)->task_thread->td_tid; \ +}) + struct task_struct; extern struct task_struct *linux_pid_task(pid_t); extern struct task_struct *linux_get_pid_task(pid_t); diff --git a/sys/compat/linuxkpi/common/include/linux/printk.h b/sys/compat/linuxkpi/common/include/linux/printk.h index 1480fc6..6e8e3da 100644 --- a/sys/compat/linuxkpi/common/include/linux/printk.h +++ b/sys/compat/linuxkpi/common/include/linux/printk.h @@ -106,10 +106,16 @@ print_hex_dump_bytes(const char *prefix_str, const int prefix_type, print_hex_dump(NULL, prefix_str, prefix_type, 16, 1, buf, len, 0); } -#define printk_ratelimited(...) do { \ +#define printk_ratelimit() ({ \ static linux_ratelimit_t __ratelimited; \ - if (linux_ratelimited(&__ratelimited)) \ + linux_ratelimited(&__ratelimited); \ +}) + +#define printk_ratelimited(...) ({ \ + bool __retval = printk_ratelimit(); \ + if (__retval) \ printk(__VA_ARGS__); \ -} while (0) + __retval; \ +}) #endif /* _LINUX_PRINTK_H_ */ diff --git a/sys/compat/linuxkpi/common/include/linux/radix-tree.h b/sys/compat/linuxkpi/common/include/linux/radix-tree.h index 0edf04e..cd7c56cb 100644 --- a/sys/compat/linuxkpi/common/include/linux/radix-tree.h +++ b/sys/compat/linuxkpi/common/include/linux/radix-tree.h @@ -2,7 +2,7 @@ * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. - * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. + * Copyright (c) 2013-2018 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,10 +34,14 @@ #include <linux/types.h> #define RADIX_TREE_MAP_SHIFT 6 -#define RADIX_TREE_MAP_SIZE (1 << RADIX_TREE_MAP_SHIFT) -#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE - 1) -#define RADIX_TREE_MAX_HEIGHT \ - DIV_ROUND_UP((sizeof(long) * NBBY), RADIX_TREE_MAP_SHIFT) +#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) +#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE - 1UL) +#define RADIX_TREE_MAX_HEIGHT \ + howmany(sizeof(long) * NBBY, RADIX_TREE_MAP_SHIFT) + +#define RADIX_TREE_ENTRY_MASK 3UL +#define RADIX_TREE_EXCEPTIONAL_ENTRY 2UL +#define RADIX_TREE_EXCEPTIONAL_SHIFT 2 struct radix_tree_node { void *slots[RADIX_TREE_MAP_SIZE]; @@ -50,6 +54,10 @@ struct radix_tree_root { int height; }; +struct radix_tree_iter { + unsigned long index; +}; + #define RADIX_TREE_INIT(mask) \ { .rnode = NULL, .gfp_mask = mask, .height = 0 }; #define INIT_RADIX_TREE(root, mask) \ @@ -57,8 +65,19 @@ struct radix_tree_root { #define RADIX_TREE(name, mask) \ struct radix_tree_root name = RADIX_TREE_INIT(mask) +#define radix_tree_for_each_slot(slot, root, iter, start) \ + for ((iter)->index = (start); \ + radix_tree_iter_find(root, iter, &(slot)); (iter)->index++) + +static inline int +radix_tree_exception(void *arg) +{ + return ((uintptr_t)arg & RADIX_TREE_ENTRY_MASK); +} + void *radix_tree_lookup(struct radix_tree_root *, unsigned long); void *radix_tree_delete(struct radix_tree_root *, unsigned long); int radix_tree_insert(struct radix_tree_root *, unsigned long, void *); +bool radix_tree_iter_find(struct radix_tree_root *, struct radix_tree_iter *, void ***); #endif /* _LINUX_RADIX_TREE_H_ */ diff --git a/sys/compat/linuxkpi/common/include/linux/rcupdate.h b/sys/compat/linuxkpi/common/include/linux/rcupdate.h index b2dd2ae..e4afa5a 100644 --- a/sys/compat/linuxkpi/common/include/linux/rcupdate.h +++ b/sys/compat/linuxkpi/common/include/linux/rcupdate.h @@ -74,14 +74,17 @@ } while (0) #define rcu_access_pointer(p) \ - ((__typeof(*p) *)(READ_ONCE(p))) + ((__typeof(*p) *)READ_ONCE(p)) #define rcu_dereference_protected(p, c) \ - ((__typeof(*p) *)(p)) + ((__typeof(*p) *)READ_ONCE(p)) #define rcu_dereference(p) \ rcu_dereference_protected(p, 0) +#define rcu_dereference_raw(p) \ + ((__typeof(*p) *)READ_ONCE(p)) + #define rcu_pointer_handoff(p) (p) #define rcu_assign_pointer(p, v) do { \ diff --git a/sys/compat/linuxkpi/common/include/linux/rwsem.h b/sys/compat/linuxkpi/common/include/linux/rwsem.h index 3042dcf..34e51c1 100644 --- a/sys/compat/linuxkpi/common/include/linux/rwsem.h +++ b/sys/compat/linuxkpi/common/include/linux/rwsem.h @@ -47,7 +47,7 @@ struct rw_semaphore { #define up_read(_rw) sx_sunlock(&(_rw)->sx) #define down_read_trylock(_rw) !!sx_try_slock(&(_rw)->sx) #define down_write_trylock(_rw) !!sx_try_xlock(&(_rw)->sx) -#define down_write_killable(_rw) !!sx_xlock_sig(&(_rw)->sx) +#define down_write_killable(_rw) linux_down_write_killable(_rw) #define downgrade_write(_rw) sx_downgrade(&(_rw)->sx) #define down_read_nested(_rw, _sc) down_read(_rw) #define init_rwsem(_rw) linux_init_rwsem(_rw, rwsem_name("lnxrwsem")) @@ -79,4 +79,6 @@ linux_init_rwsem(struct rw_semaphore *rw, const char *name) sx_init_flags(&rw->sx, name, SX_NOWITNESS); } +extern int linux_down_write_killable(struct rw_semaphore *); + #endif /* _LINUX_RWSEM_H_ */ diff --git a/sys/compat/linuxkpi/common/include/linux/sched.h b/sys/compat/linuxkpi/common/include/linux/sched.h index 817e16c..b24de2c 100644 --- a/sys/compat/linuxkpi/common/include/linux/sched.h +++ b/sys/compat/linuxkpi/common/include/linux/sched.h @@ -2,7 +2,7 @@ * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. - * Copyright (c) 2013-2017 Mellanox Technologies, Ltd. + * Copyright (c) 2013-2018 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -77,6 +77,7 @@ struct task_struct { struct completion exited; TAILQ_ENTRY(task_struct) rcu_entry; int rcu_recurse; + int bsd_interrupt_value; }; #define current ({ \ @@ -127,12 +128,26 @@ void linux_send_sig(int signo, struct task_struct *task); #define signal_pending_state(state, task) \ linux_signal_pending_state(state, task) #define send_sig(signo, task, priv) do { \ - CTASSERT(priv == 0); \ + CTASSERT((priv) == 0); \ linux_send_sig(signo, task); \ } while (0) int linux_schedule_timeout(int timeout); +static inline void +linux_schedule_save_interrupt_value(struct task_struct *task, int value) +{ + task->bsd_interrupt_value = value; +} + +static inline int +linux_schedule_get_interrupt_value(struct task_struct *task) +{ + int value = task->bsd_interrupt_value; + task->bsd_interrupt_value = 0; + return (value); +} + #define schedule() \ (void)linux_schedule_timeout(MAX_SCHEDULE_TIMEOUT) #define schedule_timeout(timeout) \ diff --git a/sys/compat/linuxkpi/common/include/linux/slab.h b/sys/compat/linuxkpi/common/include/linux/slab.h index a0fdd42..f22a19f 100644 --- a/sys/compat/linuxkpi/common/include/linux/slab.h +++ b/sys/compat/linuxkpi/common/include/linux/slab.h @@ -65,6 +65,10 @@ MALLOC_DECLARE(M_KMALLOC); #define kmem_cache_free(...) linux_kmem_cache_free(__VA_ARGS__) #define kmem_cache_destroy(...) linux_kmem_cache_destroy(__VA_ARGS__) +#define KMEM_CACHE(__struct, flags) \ + linux_kmem_cache_create(#__struct, sizeof(struct __struct), \ + __alignof(struct __struct), (flags), NULL) + typedef void linux_kmem_ctor_t (void *); struct linux_kmem_cache { diff --git a/sys/compat/linuxkpi/common/include/linux/spinlock.h b/sys/compat/linuxkpi/common/include/linux/spinlock.h index dbd7a5a..d88d200 100644 --- a/sys/compat/linuxkpi/common/include/linux/spinlock.h +++ b/sys/compat/linuxkpi/common/include/linux/spinlock.h @@ -98,6 +98,9 @@ typedef struct { __ret; \ }) +#define spin_trylock_irq(_l) \ + spin_trylock(_l) + #define spin_lock_nested(_l, _n) do { \ if (SPIN_SKIP()) \ break; \ diff --git a/sys/compat/linuxkpi/common/include/linux/string.h b/sys/compat/linuxkpi/common/include/linux/string.h index a47eb42..918ff81 100644 --- a/sys/compat/linuxkpi/common/include/linux/string.h +++ b/sys/compat/linuxkpi/common/include/linux/string.h @@ -71,6 +71,22 @@ memdup_user(const void *ptr, size_t len) } static inline void * +memdup_user_nul(const void *ptr, size_t len) +{ + char *retval; + int error; + + retval = malloc(len + 1, M_KMALLOC, M_WAITOK); + error = linux_copyin(ptr, retval, len); + if (error != 0) { + free(retval, M_KMALLOC); + return (ERR_PTR(error)); + } + retval[len] = '\0'; + return (retval); +} + +static inline void * kmemdup(const void *src, size_t len, gfp_t gfp) { void *dst; diff --git a/sys/compat/linuxkpi/common/include/linux/uaccess.h b/sys/compat/linuxkpi/common/include/linux/uaccess.h index c046e1c..a69e9cc 100644 --- a/sys/compat/linuxkpi/common/include/linux/uaccess.h +++ b/sys/compat/linuxkpi/common/include/linux/uaccess.h @@ -58,7 +58,7 @@ linux_copyout(&(__x), (_p), sizeof(*(_p))); \ }) #define get_user(_x, _p) linux_copyin((_p), &(_x), sizeof(*(_p))) -#define put_user(_x, _p) linux_copyout(&(_x), (_p), sizeof(*(_p))) +#define put_user(_x, _p) __put_user(_x, _p) #define clear_user(...) linux_clear_user(__VA_ARGS__) #define access_ok(...) linux_access_ok(__VA_ARGS__) diff --git a/sys/compat/linuxkpi/common/src/linux_compat.c b/sys/compat/linuxkpi/common/src/linux_compat.c index 08ddbbe..b39d718 100644 --- a/sys/compat/linuxkpi/common/src/linux_compat.c +++ b/sys/compat/linuxkpi/common/src/linux_compat.c @@ -2,7 +2,7 @@ * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. - * Copyright (c) 2013-2017 Mellanox Technologies, Ltd. + * Copyright (c) 2013-2018 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -538,6 +538,7 @@ linux_cdev_pager_populate(vm_object_t vm_obj, vm_pindex_t pidx, int fault_type, vmf.flags = (fault_type & VM_PROT_WRITE) ? FAULT_FLAG_WRITE : 0; vmf.pgoff = 0; vmf.page = NULL; + vmf.vma = vmap; vmap->vm_pfn_count = 0; vmap->vm_pfn_pcount = &vmap->vm_pfn_count; @@ -827,10 +828,27 @@ linux_access_ok(int rw, const void *uaddr, size_t len) (eaddr > saddr && eaddr <= VM_MAXUSER_ADDRESS)); } +/* + * This function should return either EINTR or ERESTART depending on + * the signal type sent to this thread: + */ +static int +linux_get_error(struct task_struct *task, int error) +{ + /* check for signal type interrupt code */ + if (error == EINTR || error == ERESTARTSYS || error == ERESTART) { + error = -linux_schedule_get_interrupt_value(task); + if (error == 0) + error = EINTR; + } + return (error); +} + static int linux_file_ioctl_sub(struct file *fp, struct linux_file *filp, u_long cmd, caddr_t data, struct thread *td) { + struct task_struct *task = current; unsigned size; int error; @@ -843,8 +861,8 @@ linux_file_ioctl_sub(struct file *fp, struct linux_file *filp, * Background: Linux code expects a user-space address * while FreeBSD supplies a kernel-space address. */ - current->bsd_ioctl_data = data; - current->bsd_ioctl_len = size; + task->bsd_ioctl_data = data; + task->bsd_ioctl_len = size; data = (void *)LINUX_IOCTL_MIN_PTR; } else { /* fetch user-space pointer */ @@ -868,16 +886,17 @@ linux_file_ioctl_sub(struct file *fp, struct linux_file *filp, else error = ENOTTY; if (size > 0) { - current->bsd_ioctl_data = NULL; - current->bsd_ioctl_len = 0; + task->bsd_ioctl_data = NULL; + task->bsd_ioctl_len = 0; } if (error == EWOULDBLOCK) { /* update kqfilter status, if any */ linux_file_kqfilter_poll(filp, LINUX_KQ_FLAG_HAS_READ | LINUX_KQ_FLAG_HAS_WRITE); - } else if (error == ERESTARTSYS) - error = ERESTART; + } else { + error = linux_get_error(task, error); + } return (error); } @@ -1110,6 +1129,7 @@ linux_file_mmap_single(struct file *fp, vm_ooffset_t *offset, vm_size_t size, struct vm_object **object, int nprot, struct thread *td) { + struct task_struct *task; struct vm_area_struct *vmap; struct mm_struct *mm; struct linux_file *filp; @@ -1131,7 +1151,8 @@ linux_file_mmap_single(struct file *fp, vm_ooffset_t *offset, * The atomic reference below makes sure the mm_struct is * available as long as the vmap is in the linux_vma_head. */ - mm = current->mm; + task = current; + mm = task->mm; if (atomic_inc_not_zero(&mm->mm_users) == 0) return (EINVAL); @@ -1146,11 +1167,10 @@ linux_file_mmap_single(struct file *fp, vm_ooffset_t *offset, vmap->vm_mm = mm; if (unlikely(down_write_killable(&vmap->vm_mm->mmap_sem))) { - error = EINTR; + error = linux_get_error(task, EINTR); } else { error = -OPW(fp, td, filp->f_op->mmap(filp, vmap)); - if (error == ERESTARTSYS) - error = ERESTART; + error = linux_get_error(task, error); up_write(&vmap->vm_mm->mmap_sem); } @@ -1289,9 +1309,7 @@ linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred, uio->uio_iov->iov_len -= bytes; uio->uio_resid -= bytes; } else { - error = -bytes; - if (error == ERESTARTSYS) - error = ERESTART; + error = linux_get_error(current, -bytes); } } else error = ENXIO; @@ -1328,9 +1346,7 @@ linux_file_write(struct file *file, struct uio *uio, struct ucred *active_cred, uio->uio_iov->iov_len -= bytes; uio->uio_resid -= bytes; } else { - error = -bytes; - if (error == ERESTARTSYS) - error = ERESTART; + error = linux_get_error(current, -bytes); } } else error = ENXIO; @@ -1779,6 +1795,7 @@ linux_complete_common(struct completion *c, int all) int linux_wait_for_common(struct completion *c, int flags) { + struct task_struct *task; int error; if (SCHEDULER_STOPPED()) @@ -1786,6 +1803,8 @@ linux_wait_for_common(struct completion *c, int flags) DROP_GIANT(); + task = current; + if (flags != 0) flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; else @@ -1797,7 +1816,9 @@ linux_wait_for_common(struct completion *c, int flags) break; sleepq_add(c, NULL, "completion", flags, 0); if (flags & SLEEPQ_INTERRUPTIBLE) { - if (sleepq_wait_sig(c, 0) != 0) { + error = -sleepq_wait_sig(c, 0); + if (error != 0) { + linux_schedule_save_interrupt_value(task, error); error = -ERESTARTSYS; goto intr; } @@ -1819,22 +1840,22 @@ intr: int linux_wait_for_timeout_common(struct completion *c, int timeout, int flags) { + struct task_struct *task; int end = jiffies + timeout; int error; - int ret; if (SCHEDULER_STOPPED()) return (0); DROP_GIANT(); + task = current; + if (flags != 0) flags = SLEEPQ_INTERRUPTIBLE | SLEEPQ_SLEEP; else flags = SLEEPQ_SLEEP; - error = 0; - ret = 0; for (;;) { sleepq_lock(c); if (c->done) @@ -1842,26 +1863,30 @@ linux_wait_for_timeout_common(struct completion *c, int timeout, int flags) sleepq_add(c, NULL, "completion", flags, 0); sleepq_set_timeout(c, linux_timer_jiffies_until(end)); if (flags & SLEEPQ_INTERRUPTIBLE) - ret = sleepq_timedwait_sig(c, 0); + error = -sleepq_timedwait_sig(c, 0); else - ret = sleepq_timedwait(c, 0); - if (ret != 0) { - /* check for timeout or signal */ - if (ret == EWOULDBLOCK) - error = 0; - else + error = -sleepq_timedwait(c, 0); + if (error != 0) { + /* check for timeout */ + if (error == -EWOULDBLOCK) { + error = 0; /* timeout */ + } else { + /* signal happened */ + linux_schedule_save_interrupt_value(task, error); error = -ERESTARTSYS; - goto intr; + } + goto done; } } c->done--; sleepq_release(c); -intr: + /* return how many jiffies are left */ + error = linux_timer_jiffies_until(end); +done: PICKUP_GIANT(); - /* return how many jiffies are left */ - return (ret != 0 ? error : linux_timer_jiffies_until(end)); + return (error); } int diff --git a/sys/compat/linuxkpi/common/src/linux_hrtimer.c b/sys/compat/linuxkpi/common/src/linux_hrtimer.c index c650256..a0041b8 100644 --- a/sys/compat/linuxkpi/common/src/linux_hrtimer.c +++ b/sys/compat/linuxkpi/common/src/linux_hrtimer.c @@ -98,7 +98,7 @@ linux_hrtimer_start_range_ns(struct hrtimer *hrtimer, ktime_t time, int64_t nsec { mtx_lock(&hrtimer->mtx); - callout_reset_sbt(&hrtimer->callout, nstosbt(time.tv64), nstosbt(nsec), + callout_reset_sbt(&hrtimer->callout, nstosbt(time), nstosbt(nsec), hrtimer_call_handler, hrtimer, 0); mtx_unlock(&hrtimer->mtx); } diff --git a/sys/compat/linuxkpi/common/src/linux_lock.c b/sys/compat/linuxkpi/common/src/linux_lock.c index ff91514..f037cd3 100644 --- a/sys/compat/linuxkpi/common/src/linux_lock.c +++ b/sys/compat/linuxkpi/common/src/linux_lock.c @@ -28,6 +28,7 @@ #include <sys/queue.h> +#include <linux/sched.h> #include <linux/ww_mutex.h> struct ww_mutex_thread { @@ -72,10 +73,13 @@ linux_ww_unlock(void) int linux_ww_mutex_lock_sub(struct ww_mutex *lock, int catch_signal) { + struct task_struct *task; struct ww_mutex_thread entry; struct ww_mutex_thread *other; int retval = 0; + task = current; + linux_ww_lock(); if (unlikely(sx_try_xlock(&lock->base.sx) == 0)) { entry.thread = curthread; @@ -105,7 +109,9 @@ linux_ww_mutex_lock_sub(struct ww_mutex *lock, int catch_signal) } } if (catch_signal) { - if (cv_wait_sig(&lock->condvar, &ww_mutex_global) != 0) { + retval = -cv_wait_sig(&lock->condvar, &ww_mutex_global); + if (retval != 0) { + linux_schedule_save_interrupt_value(task, retval); retval = -EINTR; goto done; } @@ -134,3 +140,29 @@ linux_ww_mutex_unlock_sub(struct ww_mutex *lock) cv_signal(&lock->condvar); linux_ww_unlock(); } + +int +linux_mutex_lock_interruptible(mutex_t *m) +{ + int error; + + error = -sx_xlock_sig(&m->sx); + if (error != 0) { + linux_schedule_save_interrupt_value(current, error); + error = -EINTR; + } + return (error); +} + +int +linux_down_write_killable(struct rw_semaphore *rw) +{ + int error; + + error = -sx_xlock_sig(&rw->sx); + if (error != 0) { + linux_schedule_save_interrupt_value(current, error); + error = -EINTR; + } + return (error); +} diff --git a/sys/compat/linuxkpi/common/src/linux_radix.c b/sys/compat/linuxkpi/common/src/linux_radix.c index 6a8bd11..053f08b 100644 --- a/sys/compat/linuxkpi/common/src/linux_radix.c +++ b/sys/compat/linuxkpi/common/src/linux_radix.c @@ -2,7 +2,7 @@ * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. - * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. + * Copyright (c) 2013-2018 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -43,10 +43,10 @@ __FBSDID("$FreeBSD$"); static MALLOC_DEFINE(M_RADIX, "radix", "Linux radix compat"); -static inline int +static inline unsigned long radix_max(struct radix_tree_root *root) { - return (1 << (root->height * RADIX_TREE_MAP_SHIFT)) - 1; + return ((1UL << (root->height * RADIX_TREE_MAP_SHIFT)) - 1UL); } static inline int @@ -76,6 +76,45 @@ out: return (item); } +bool +radix_tree_iter_find(struct radix_tree_root *root, struct radix_tree_iter *iter, + void ***pppslot) +{ + struct radix_tree_node *node; + unsigned long index = iter->index; + int height; + +restart: + node = root->rnode; + if (node == NULL) + return (false); + height = root->height - 1; + if (height == -1 || index > radix_max(root)) + return (false); + do { + unsigned long mask = RADIX_TREE_MAP_MASK << (RADIX_TREE_MAP_SHIFT * height); + unsigned long step = 1UL << (RADIX_TREE_MAP_SHIFT * height); + int pos = radix_pos(index, height); + struct radix_tree_node *next; + + /* track last slot */ + *pppslot = node->slots + pos; + + next = node->slots[pos]; + if (next == NULL) { + index += step; + index &= -step; + if ((index & mask) == 0) + goto restart; + } else { + node = next; + height--; + } + } while (height != -1); + iter->index = index; + return (true); +} + void * radix_tree_delete(struct radix_tree_root *root, unsigned long index) { diff --git a/sys/compat/linuxkpi/common/src/linux_schedule.c b/sys/compat/linuxkpi/common/src/linux_schedule.c index dc3dd91..0958b3a 100644 --- a/sys/compat/linuxkpi/common/src/linux_schedule.c +++ b/sys/compat/linuxkpi/common/src/linux_schedule.c @@ -41,7 +41,8 @@ __FBSDID("$FreeBSD$"); #include <linux/wait.h> static int -linux_add_to_sleepqueue(void *wchan, const char *wmesg, int timeout, int state) +linux_add_to_sleepqueue(void *wchan, struct task_struct *task, + const char *wmesg, int timeout, int state) { int flags, ret; @@ -66,8 +67,10 @@ linux_add_to_sleepqueue(void *wchan, const char *wmesg, int timeout, int state) ret = -sleepq_timedwait(wchan, 0); } /* filter return value */ - if (ret != 0 && ret != -EWOULDBLOCK) + if (ret != 0 && ret != -EWOULDBLOCK) { + linux_schedule_save_interrupt_value(task, ret); ret = -ERESTARTSYS; + } return (ret); } @@ -235,10 +238,10 @@ linux_wait_event_common(wait_queue_head_t *wqh, wait_queue_t *wq, int timeout, PHOLD(task->task_thread->td_proc); sleepq_lock(task); if (atomic_read(&task->state) != TASK_WAKING) { - ret = linux_add_to_sleepqueue(task, "wevent", timeout, state); + ret = linux_add_to_sleepqueue(task, task, "wevent", timeout, state); } else { sleepq_release(task); - ret = linux_signal_pending_state(state, task) ? -ERESTARTSYS : 0; + ret = 0; } PRELE(task->task_thread->td_proc); @@ -253,6 +256,7 @@ int linux_schedule_timeout(int timeout) { struct task_struct *task; + int ret; int state; int remainder; @@ -270,10 +274,12 @@ linux_schedule_timeout(int timeout) sleepq_lock(task); state = atomic_read(&task->state); - if (state != TASK_WAKING) - (void)linux_add_to_sleepqueue(task, "sched", timeout, state); - else + if (state != TASK_WAKING) { + ret = linux_add_to_sleepqueue(task, task, "sched", timeout, state); + } else { sleepq_release(task); + ret = 0; + } set_task_state(task, TASK_RUNNING); PICKUP_GIANT(); @@ -283,7 +289,11 @@ linux_schedule_timeout(int timeout) /* range check return value */ remainder -= ticks; - if (remainder < 0) + + /* range check return value */ + if (ret == -ERESTARTSYS && remainder < 1) + remainder = 1; + else if (remainder < 0) remainder = 0; else if (remainder > timeout) remainder = timeout; @@ -337,7 +347,7 @@ linux_wait_on_bit_timeout(unsigned long *word, int bit, unsigned int state, break; } set_task_state(task, state); - ret = linux_add_to_sleepqueue(wchan, "wbit", timeout, state); + ret = linux_add_to_sleepqueue(wchan, task, "wbit", timeout, state); if (ret != 0) break; } @@ -374,7 +384,7 @@ linux_wait_on_atomic_t(atomic_t *a, unsigned int state) break; } set_task_state(task, state); - ret = linux_add_to_sleepqueue(wchan, "watomic", 0, state); + ret = linux_add_to_sleepqueue(wchan, task, "watomic", 0, state); if (ret != 0) break; } diff --git a/sys/compat/linuxkpi/common/src/linux_tasklet.c b/sys/compat/linuxkpi/common/src/linux_tasklet.c index 5fe9455..549af86 100644 --- a/sys/compat/linuxkpi/common/src/linux_tasklet.c +++ b/sys/compat/linuxkpi/common/src/linux_tasklet.c @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); #define TASKLET_ST_BUSY 1 #define TASKLET_ST_EXEC 2 #define TASKLET_ST_LOOP 3 +#define TASKLET_ST_PAUSED 4 #define TASKLET_ST_CMPSET(ts, old, new) \ atomic_cmpset_ptr((volatile uintptr_t *)&(ts)->entry.tqe_prev, old, new) @@ -196,3 +197,21 @@ tasklet_kill(struct tasklet_struct *ts) while (TASKLET_ST_GET(ts) != TASKLET_ST_IDLE) pause("W", 1); } + +void +tasklet_enable(struct tasklet_struct *ts) +{ + (void) TASKLET_ST_CMPSET(ts, TASKLET_ST_PAUSED, TASKLET_ST_IDLE); +} + +void +tasklet_disable(struct tasklet_struct *ts) +{ + while (1) { + if (TASKLET_ST_GET(ts) == TASKLET_ST_PAUSED) + break; + if (TASKLET_ST_CMPSET(ts, TASKLET_ST_IDLE, TASKLET_ST_PAUSED)) + break; + pause("W", 1); + } +} diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 7d5081b..28e7e2b 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -2541,8 +2541,10 @@ device smb # SMBus peripheral devices # +# jedec_dimm Asset and temperature reporting for DDR3 and DDR4 DIMMs # jedec_ts Temperature Sensor compliant with JEDEC Standard 21-C # +device jedec_dimm device jedec_ts # I2C Bus diff --git a/sys/conf/files b/sys/conf/files index aeebb47..59351b2 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -2164,6 +2164,7 @@ dev/ixgbe/ixgbe_dcb_82598.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_dcb_82599.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" +dev/jedec_dimm/jedec_dimm.c optional jedec_dimm smbus dev/jedec_ts/jedec_ts.c optional jedec_ts smbus dev/jme/if_jme.c optional jme pci dev/joy/joy.c optional joy diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index 4bbe63c..3e19915 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -192,6 +192,7 @@ dev/agp/agp_amd64.c optional agp dev/agp/agp_i810.c optional agp dev/agp/agp_via.c optional agp dev/amdsbwd/amdsbwd.c optional amdsbwd +dev/amdsmn/amdsmn.c optional amdsmn | amdtemp dev/amdtemp/amdtemp.c optional amdtemp dev/arcmsr/arcmsr.c optional arcmsr pci dev/asmc/asmc.c optional asmc isa diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index e96f783..8aee3cf 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -175,6 +175,7 @@ dev/agp/agp_sis.c optional agp dev/agp/agp_via.c optional agp dev/aic/aic_isa.c optional aic isa dev/amdsbwd/amdsbwd.c optional amdsbwd +dev/amdsmn/amdsmn.c optional amdsmn | amdtemp dev/amdtemp/amdtemp.c optional amdtemp dev/arcmsr/arcmsr.c optional arcmsr pci dev/asmc/asmc.c optional asmc isa diff --git a/sys/dev/amdsmn/amdsmn.c b/sys/dev/amdsmn/amdsmn.c new file mode 100644 index 0000000..bc2ed7c --- /dev/null +++ b/sys/dev/amdsmn/amdsmn.c @@ -0,0 +1,193 @@ +/*- + * Copyright (c) 2017 Conrad Meyer <cem@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Driver for the AMD Family 17h CPU System Management Network. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/conf.h> +#include <sys/lock.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/mutex.h> +#include <sys/sysctl.h> +#include <sys/systm.h> + +#include <machine/cpufunc.h> +#include <machine/md_var.h> +#include <machine/specialreg.h> + +#include <dev/pci/pcivar.h> +#include <x86/pci_cfgreg.h> + +#include <dev/amdsmn/amdsmn.h> + +#define SMN_ADDR_REG 0x60 +#define SMN_DATA_REG 0x64 + +struct amdsmn_softc { + struct mtx smn_lock; +}; + +static struct pciid { + uint32_t device_id; +} amdsmn_ids[] = { + { 0x14501022 }, +}; + +/* + * Device methods. + */ +static void amdsmn_identify(driver_t *driver, device_t parent); +static int amdsmn_probe(device_t dev); +static int amdsmn_attach(device_t dev); +static int amdsmn_detach(device_t dev); + +static device_method_t amdsmn_methods[] = { + /* Device interface */ + DEVMETHOD(device_identify, amdsmn_identify), + DEVMETHOD(device_probe, amdsmn_probe), + DEVMETHOD(device_attach, amdsmn_attach), + DEVMETHOD(device_detach, amdsmn_detach), + DEVMETHOD_END +}; + +static driver_t amdsmn_driver = { + "amdsmn", + amdsmn_methods, + sizeof(struct amdsmn_softc), +}; + +static devclass_t amdsmn_devclass; +DRIVER_MODULE(amdsmn, hostb, amdsmn_driver, amdsmn_devclass, NULL, NULL); +MODULE_VERSION(amdsmn, 1); + +static bool +amdsmn_match(device_t parent) +{ + uint32_t devid; + size_t i; + + devid = pci_get_devid(parent); + for (i = 0; i < nitems(amdsmn_ids); i++) + if (amdsmn_ids[i].device_id == devid) + return (true); + return (false); +} + +static void +amdsmn_identify(driver_t *driver, device_t parent) +{ + device_t child; + + /* Make sure we're not being doubly invoked. */ + if (device_find_child(parent, "amdsmn", -1) != NULL) + return; + if (!amdsmn_match(parent)) + return; + + child = device_add_child(parent, "amdsmn", -1); + if (child == NULL) + device_printf(parent, "add amdsmn child failed\n"); +} + +static int +amdsmn_probe(device_t dev) +{ + uint32_t family; + + if (resource_disabled("amdsmn", 0)) + return (ENXIO); + if (!amdsmn_match(device_get_parent(dev))) + return (ENXIO); + + family = CPUID_TO_FAMILY(cpu_id); + + switch (family) { + case 0x17: + break; + default: + return (ENXIO); + } + device_set_desc(dev, "AMD Family 17h System Management Network"); + + return (BUS_PROBE_GENERIC); +} + +static int +amdsmn_attach(device_t dev) +{ + struct amdsmn_softc *sc = device_get_softc(dev); + + mtx_init(&sc->smn_lock, "SMN mtx", "SMN", MTX_DEF); + return (0); +} + +int +amdsmn_detach(device_t dev) +{ + struct amdsmn_softc *sc = device_get_softc(dev); + + mtx_destroy(&sc->smn_lock); + return (0); +} + +int +amdsmn_read(device_t dev, uint32_t addr, uint32_t *value) +{ + struct amdsmn_softc *sc = device_get_softc(dev); + device_t parent; + + parent = device_get_parent(dev); + + mtx_lock(&sc->smn_lock); + pci_write_config(parent, SMN_ADDR_REG, addr, 4); + *value = pci_read_config(parent, SMN_DATA_REG, 4); + mtx_unlock(&sc->smn_lock); + + return (0); +} + +int +amdsmn_write(device_t dev, uint32_t addr, uint32_t value) +{ + struct amdsmn_softc *sc = device_get_softc(dev); + device_t parent; + + parent = device_get_parent(dev); + + mtx_lock(&sc->smn_lock); + pci_write_config(parent, SMN_ADDR_REG, addr, 4); + pci_write_config(parent, SMN_DATA_REG, value, 4); + mtx_unlock(&sc->smn_lock); + + return (0); +} diff --git a/sys/dev/amdsmn/amdsmn.h b/sys/dev/amdsmn/amdsmn.h new file mode 100644 index 0000000..c3225ff --- /dev/null +++ b/sys/dev/amdsmn/amdsmn.h @@ -0,0 +1,32 @@ +/*- + * Copyright (c) 2017 Conrad Meyer <cem@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#pragma once + +int amdsmn_read(device_t dev, uint32_t addr, uint32_t *value); +int amdsmn_write(device_t dev, uint32_t addr, uint32_t value); diff --git a/sys/dev/amdtemp/amdtemp.c b/sys/dev/amdtemp/amdtemp.c index 1e658e6..2080c92 100644 --- a/sys/dev/amdtemp/amdtemp.c +++ b/sys/dev/amdtemp/amdtemp.c @@ -49,6 +49,8 @@ __FBSDID("$FreeBSD$"); #include <dev/pci/pcivar.h> #include <x86/pci_cfgreg.h> +#include <dev/amdsmn/amdsmn.h> + typedef enum { CORE0_SENSOR0, CORE0_SENSOR1, @@ -59,7 +61,6 @@ typedef enum { } amdsensor_t; struct amdtemp_softc { - device_t sc_dev; int sc_ncores; int sc_ntemps; int sc_flags; @@ -70,6 +71,7 @@ struct amdtemp_softc { int32_t (*sc_gettemp)(device_t, amdsensor_t); struct sysctl_oid *sc_sysctl_cpu[MAXCPU]; struct intr_config_hook sc_ich; + device_t sc_smn; }; #define VENDORID_AMD 0x1022 @@ -82,6 +84,7 @@ struct amdtemp_softc { #define DEVICEID_AMD_MISC16 0x1533 #define DEVICEID_AMD_MISC16_M30H 0x1583 #define DEVICEID_AMD_MISC17 0x141d +#define DEVICEID_AMD_HOSTB17H 0x1450 static struct amdtemp_product { uint16_t amdtemp_vendorid; @@ -96,6 +99,7 @@ static struct amdtemp_product { { VENDORID_AMD, DEVICEID_AMD_MISC16 }, { VENDORID_AMD, DEVICEID_AMD_MISC16_M30H }, { VENDORID_AMD, DEVICEID_AMD_MISC17 }, + { VENDORID_AMD, DEVICEID_AMD_HOSTB17H }, { 0, 0 } }; @@ -105,6 +109,11 @@ static struct amdtemp_product { #define AMDTEMP_REPTMP_CTRL 0xa4 /* + * Reported Temperature, Family 17h + */ +#define AMDTEMP_17H_CUR_TMP 0x59800 + +/* * Thermaltrip Status Register (Family 0Fh only) */ #define AMDTEMP_THERMTP_STAT 0xe4 @@ -133,6 +142,7 @@ static int amdtemp_detach(device_t dev); static int amdtemp_match(device_t dev); static int32_t amdtemp_gettemp0f(device_t dev, amdsensor_t sensor); static int32_t amdtemp_gettemp(device_t dev, amdsensor_t sensor); +static int32_t amdtemp_gettemp17h(device_t dev, amdsensor_t sensor); static int amdtemp_sysctl(SYSCTL_HANDLER_ARGS); static device_method_t amdtemp_methods[] = { @@ -153,6 +163,8 @@ static driver_t amdtemp_driver = { static devclass_t amdtemp_devclass; DRIVER_MODULE(amdtemp, hostb, amdtemp_driver, amdtemp_devclass, NULL, NULL); +MODULE_VERSION(amdtemp, 1); +MODULE_DEPEND(amdtemp, amdsmn, 1, 1, 1); static int amdtemp_match(device_t dev) @@ -195,6 +207,8 @@ amdtemp_probe(device_t dev) if (resource_disabled("amdtemp", 0)) return (ENXIO); + if (!amdtemp_match(device_get_parent(dev))) + return (ENXIO); family = CPUID_TO_FAMILY(cpu_id); model = CPUID_TO_MODEL(cpu_id); @@ -211,6 +225,7 @@ amdtemp_probe(device_t dev) case 0x14: case 0x15: case 0x16: + case 0x17: break; default: return (ENXIO); @@ -240,7 +255,7 @@ amdtemp_attach(device_t dev) cpuid = cpu_id; family = CPUID_TO_FAMILY(cpuid); model = CPUID_TO_MODEL(cpuid); - if (family != 0x0f || model >= 0x40) { + if ((family != 0x0f || model >= 0x40) && family != 0x17) { cpuid = pci_read_config(dev, AMDTEMP_CPUID, 4); family = CPUID_TO_FAMILY(cpuid); model = CPUID_TO_MODEL(cpuid); @@ -342,6 +357,17 @@ amdtemp_attach(device_t dev) sc->sc_gettemp = amdtemp_gettemp; break; + case 0x17: + sc->sc_ntemps = 1; + sc->sc_gettemp = amdtemp_gettemp17h; + sc->sc_smn = device_find_child( + device_get_parent(dev), "amdsmn", -1); + if (sc->sc_smn == NULL) { + if (bootverbose) + device_printf(dev, "No SMN device found\n"); + return (ENXIO); + } + break; } /* Find number of cores per package. */ @@ -557,3 +583,19 @@ amdtemp_gettemp(device_t dev, amdsensor_t sensor) return (temp); } + +static int32_t +amdtemp_gettemp17h(device_t dev, amdsensor_t sensor) +{ + struct amdtemp_softc *sc = device_get_softc(dev); + uint32_t temp; + int error; + + error = amdsmn_read(sc->sc_smn, AMDTEMP_17H_CUR_TMP, &temp); + KASSERT(error == 0, ("amdsmn_read")); + + temp = ((temp >> 21) & 0x7ff) * 5 / 4; + temp += AMDTEMP_ZERO_C_TO_K + sc->sc_offset * 10; + + return (temp); +} diff --git a/sys/dev/jedec_dimm/jedec_dimm.c b/sys/dev/jedec_dimm/jedec_dimm.c new file mode 100644 index 0000000..c496742 --- /dev/null +++ b/sys/dev/jedec_dimm/jedec_dimm.c @@ -0,0 +1,1010 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Authors: Ravi Pokala (rpokala@freebsd.org), Andriy Gapon (avg@FreeBSD.org) + * + * Copyright (c) 2016 Andriy Gapon <avg@FreeBSD.org> + * Copyright (c) 2018 Panasas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * This driver is a super-set of jedec_ts(4), and most of the code for reading + * and reporting the temperature is either based on that driver, or copied + * from it verbatim. + */ + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/bus.h> +#include <sys/endian.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/sysctl.h> +#include <sys/systm.h> + +#include <dev/jedec_dimm/jedec_dimm.h> +#include <dev/smbus/smbconf.h> +#include <dev/smbus/smbus.h> + +#include "smbus_if.h" + +struct jedec_dimm_softc { + device_t dev; + device_t smbus; + uint8_t spd_addr; /* SMBus address of the SPD EEPROM. */ + uint8_t tsod_addr; /* Address of the Thermal Sensor On DIMM */ + uint32_t capacity_mb; + char type_str[5]; + char part_str[21]; /* 18 (DDR3) or 20 (DDR4) chars, plus terminator */ + char serial_str[9]; /* 4 bytes = 8 nybble characters, plus terminator */ + char *slotid_str; /* Optional DIMM slot identifier (silkscreen) */ +}; + +/* General Thermal Sensor on DIMM (TSOD) identification notes. + * + * The JEDEC TSE2004av specification defines the device ID that all compliant + * devices should use, but very few do in practice. Maybe that's because the + * earlier TSE2002av specification was rather vague about that. + * Rare examples are IDT TSE2004GB2B0 and Atmel AT30TSE004A, not sure if + * they are TSE2004av compliant by design or by accident. + * Also, the specification mandates that PCI SIG manufacturer IDs are to be + * used, but in practice the JEDEC manufacturer IDs are often used. + */ +const struct jedec_dimm_tsod_dev { + uint16_t vendor_id; + uint8_t device_id; + const char *description; +} known_tsod_devices[] = { + /* Analog Devices ADT7408. + * http://www.analog.com/media/en/technical-documentation/data-sheets/ADT7408.pdf + */ + { 0x11d4, 0x08, "Analog Devices TSOD" }, + + /* Atmel AT30TSE002B, AT30TSE004A. + * http://www.atmel.com/images/doc8711.pdf + * http://www.atmel.com/images/atmel-8868-dts-at30tse004a-datasheet.pdf + * Note how one chip uses the JEDEC Manufacturer ID while the other + * uses the PCI SIG one. + */ + { 0x001f, 0x82, "Atmel TSOD" }, + { 0x1114, 0x22, "Atmel TSOD" }, + + /* Integrated Device Technology (IDT) TS3000B3A, TSE2002B3C, + * TSE2004GB2B0 chips and their variants. + * http://www.idt.com/sites/default/files/documents/IDT_TSE2002B3C_DST_20100512_120303152056.pdf + * http://www.idt.com/sites/default/files/documents/IDT_TS3000B3A_DST_20101129_120303152013.pdf + * https://www.idt.com/document/dst/tse2004gb2b0-datasheet + */ + { 0x00b3, 0x29, "IDT TSOD" }, + { 0x00b3, 0x22, "IDT TSOD" }, + + /* Maxim Integrated MAX6604. + * Different document revisions specify different Device IDs. + * Document 19-3837; Rev 0; 10/05 has 0x3e00 while + * 19-3837; Rev 3; 10/11 has 0x5400. + * http://datasheets.maximintegrated.com/en/ds/MAX6604.pdf + */ + { 0x004d, 0x3e, "Maxim Integrated TSOD" }, + { 0x004d, 0x54, "Maxim Integrated TSOD" }, + + /* Microchip Technology MCP9805, MCP9843, MCP98242, MCP98243 + * and their variants. + * http://ww1.microchip.com/downloads/en/DeviceDoc/21977b.pdf + * Microchip Technology EMC1501. + * http://ww1.microchip.com/downloads/en/DeviceDoc/00001605A.pdf + */ + { 0x0054, 0x00, "Microchip TSOD" }, + { 0x0054, 0x20, "Microchip TSOD" }, + { 0x0054, 0x21, "Microchip TSOD" }, + { 0x1055, 0x08, "Microchip TSOD" }, + + /* NXP Semiconductors SE97 and SE98. + * http://www.nxp.com/docs/en/data-sheet/SE97B.pdf + */ + { 0x1131, 0xa1, "NXP TSOD" }, + { 0x1131, 0xa2, "NXP TSOD" }, + + /* ON Semiconductor CAT34TS02 revisions B and C, CAT6095 and compatible. + * https://www.onsemi.com/pub/Collateral/CAT34TS02-D.PDF + * http://www.onsemi.com/pub/Collateral/CAT6095-D.PDF + */ + { 0x1b09, 0x08, "ON Semiconductor TSOD" }, + { 0x1b09, 0x0a, "ON Semiconductor TSOD" }, + + /* ST[Microelectronics] STTS424E02, STTS2002 and others. + * http://www.st.com/resource/en/datasheet/cd00157558.pdf + * http://www.st.com/resource/en/datasheet/stts2002.pdf + */ + { 0x104a, 0x00, "ST Microelectronics TSOD" }, + { 0x104a, 0x03, "ST Microelectronics TSOD" }, +}; + +static int jedec_dimm_attach(device_t dev); + +static int jedec_dimm_capacity(struct jedec_dimm_softc *sc, enum dram_type type, + uint32_t *capacity_mb); + +static int jedec_dimm_detach(device_t dev); + +static int jedec_dimm_dump(struct jedec_dimm_softc *sc, enum dram_type type); + +static int jedec_dimm_field_to_str(struct jedec_dimm_softc *sc, char *dst, + size_t dstsz, uint16_t offset, uint16_t len, bool ascii); + +static int jedec_dimm_probe(device_t dev); + +static int jedec_dimm_readw_be(struct jedec_dimm_softc *sc, uint8_t reg, + uint16_t *val); + +static int jedec_dimm_temp_sysctl(SYSCTL_HANDLER_ARGS); + +static const char *jedec_dimm_tsod_match(uint16_t vid, uint16_t did); + + +/** + * device_attach() method. Read the DRAM type, use that to determine the offsets + * and lengths of the asset string fields. Calculate the capacity. If a TSOD is + * present, figure out exactly what it is, and update the device description. + * If all of that was successful, create the sysctls for the DIMM. If an + * optional slotid has been hinted, create a sysctl for that too. + * + * @author rpokala + * + * @param[in,out] dev + * Device being attached. + */ +static int +jedec_dimm_attach(device_t dev) +{ + uint8_t byte; + uint16_t devid; + uint16_t partnum_len; + uint16_t partnum_offset; + uint16_t serial_len; + uint16_t serial_offset; + uint16_t tsod_present_offset; + uint16_t vendorid; + bool tsod_present; + int rc; + int new_desc_len; + enum dram_type type; + struct jedec_dimm_softc *sc; + struct sysctl_ctx_list *ctx; + struct sysctl_oid *oid; + struct sysctl_oid_list *children; + const char *tsod_match; + const char *slotid_str; + char *new_desc; + + sc = device_get_softc(dev); + ctx = device_get_sysctl_ctx(dev); + oid = device_get_sysctl_tree(dev); + children = SYSCTL_CHILDREN(oid); + + bzero(sc, sizeof(*sc)); + sc->dev = dev; + sc->smbus = device_get_parent(dev); + sc->spd_addr = smbus_get_addr(dev); + + /* The TSOD address has a different DTI from the SPD address, but shares + * the LSA bits. + */ + sc->tsod_addr = JEDEC_DTI_TSOD | (sc->spd_addr & 0x0f); + + /* Read the DRAM type, and set the various offsets and lengths. */ + rc = smbus_readb(sc->smbus, sc->spd_addr, SPD_OFFSET_DRAM_TYPE, &byte); + if (rc != 0) { + device_printf(dev, "failed to read dram_type: %d\n", rc); + goto out; + } + type = (enum dram_type) byte; + switch (type) { + case DRAM_TYPE_DDR3_SDRAM: + (void) snprintf(sc->type_str, sizeof(sc->type_str), "DDR3"); + partnum_len = SPD_LEN_DDR3_PARTNUM; + partnum_offset = SPD_OFFSET_DDR3_PARTNUM; + serial_len = SPD_LEN_DDR3_SERIAL; + serial_offset = SPD_OFFSET_DDR3_SERIAL; + tsod_present_offset = SPD_OFFSET_DDR3_TSOD_PRESENT; + break; + case DRAM_TYPE_DDR4_SDRAM: + (void) snprintf(sc->type_str, sizeof(sc->type_str), "DDR4"); + partnum_len = SPD_LEN_DDR4_PARTNUM; + partnum_offset = SPD_OFFSET_DDR4_PARTNUM; + serial_len = SPD_LEN_DDR4_SERIAL; + serial_offset = SPD_OFFSET_DDR4_SERIAL; + tsod_present_offset = SPD_OFFSET_DDR4_TSOD_PRESENT; + break; + default: + device_printf(dev, "unsupported dram_type 0x%02x\n", type); + rc = EINVAL; + goto out; + } + + if (bootverbose) { + /* bootverbose debuggery is best-effort, so ignore the rc. */ + (void) jedec_dimm_dump(sc, type); + } + + /* Read all the required info from the SPD. If any of it fails, error + * out without creating the sysctls. + */ + rc = jedec_dimm_capacity(sc, type, &sc->capacity_mb); + if (rc != 0) { + goto out; + } + + rc = jedec_dimm_field_to_str(sc, sc->part_str, sizeof(sc->part_str), + partnum_offset, partnum_len, true); + if (rc != 0) { + goto out; + } + + rc = jedec_dimm_field_to_str(sc, sc->serial_str, sizeof(sc->serial_str), + serial_offset, serial_len, false); + if (rc != 0) { + goto out; + } + + /* The MSBit of the TSOD-presence byte reports whether or not the TSOD + * is in fact present. If it is, read manufacturer and device info from + * it to confirm that it's a valid TSOD device. It's an error if any of + * those bytes are unreadable; it's not an error if the device is simply + * not known to us (tsod_match == NULL). + * While DDR3 and DDR4 don't explicitly require a TSOD, essentially all + * DDR3 and DDR4 DIMMs include one. + */ + rc = smbus_readb(sc->smbus, sc->spd_addr, tsod_present_offset, &byte); + if (rc != 0) { + device_printf(dev, "failed to read TSOD-present byte: %d\n", + rc); + goto out; + } + if (byte & 0x80) { + tsod_present = true; + rc = jedec_dimm_readw_be(sc, TSOD_REG_MANUFACTURER, &vendorid); + if (rc != 0) { + device_printf(dev, + "failed to read TSOD Manufacturer ID\n"); + goto out; + } + rc = jedec_dimm_readw_be(sc, TSOD_REG_DEV_REV, &devid); + if (rc != 0) { + device_printf(dev, "failed to read TSOD Device ID\n"); + goto out; + } + + tsod_match = jedec_dimm_tsod_match(vendorid, devid); + if (bootverbose) { + if (tsod_match == NULL) { + device_printf(dev, + "Unknown TSOD Manufacturer and Device IDs," + " 0x%x and 0x%x\n", vendorid, devid); + } else { + device_printf(dev, + "TSOD: %s\n", tsod_match); + } + } + } else { + tsod_match = NULL; + tsod_present = false; + } + + SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "type", + CTLFLAG_RD | CTLFLAG_MPSAFE, sc->type_str, 0, + "DIMM type"); + + SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "capacity", + CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, sc->capacity_mb, + "DIMM capacity (MB)"); + + SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "part", + CTLFLAG_RD | CTLFLAG_MPSAFE, sc->part_str, 0, + "DIMM Part Number"); + + SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "serial", + CTLFLAG_RD | CTLFLAG_MPSAFE, sc->serial_str, 0, + "DIMM Serial Number"); + + /* Create the temperature sysctl IFF the TSOD is present and valid */ + if (tsod_present && (tsod_match != NULL)) { + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temp", + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, 0, + jedec_dimm_temp_sysctl, "IK", "DIMM temperature (deg C)"); + } + + /* If a "slotid" was hinted, add the sysctl for it. */ + if (resource_string_value(device_get_name(dev), device_get_unit(dev), + "slotid", &slotid_str) == 0) { + if (slotid_str != NULL) { + sc->slotid_str = malloc(strlen(slotid_str) + 1, + M_DEVBUF, (M_WAITOK | M_ZERO)); + strlcpy(sc->slotid_str, slotid_str, + sizeof(sc->slotid_str)); + SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "slotid", + CTLFLAG_RD | CTLFLAG_MPSAFE, sc->slotid_str, 0, + "DIMM Slot Identifier"); + } + } + + /* If a TSOD type string or a slotid are present, add them to the + * device description. + */ + if ((tsod_match != NULL) || (sc->slotid_str != NULL)) { + new_desc_len = strlen(device_get_desc(dev)); + if (tsod_match != NULL) { + new_desc_len += strlen(tsod_match); + new_desc_len += 4; /* " w/ " */ + } + if (sc->slotid_str != NULL) { + new_desc_len += strlen(sc->slotid_str); + new_desc_len += 3; /* space + parens */ + } + new_desc_len++; /* terminator */ + new_desc = malloc(new_desc_len, M_TEMP, (M_WAITOK | M_ZERO)); + (void) snprintf(new_desc, new_desc_len, "%s%s%s%s%s%s", + device_get_desc(dev), + (tsod_match ? " w/ " : ""), + (tsod_match ? tsod_match : ""), + (sc->slotid_str ? " (" : ""), + (sc->slotid_str ? sc->slotid_str : ""), + (sc->slotid_str ? ")" : "")); + device_set_desc_copy(dev, new_desc); + free(new_desc, M_TEMP); + } + +out: + return (rc); +} + +/** + * Calculate the capacity of a DIMM. Both DDR3 and DDR4 encode "geometry" + * information in various SPD bytes. The standards documents codify everything + * in look-up tables, but it's trivial to reverse-engineer the the formulas for + * most of them. Unless otherwise noted, the same formulas apply for both DDR3 + * and DDR4. The SPD offsets of where the data comes from are different between + * the two types, because having them be the same would be too easy. + * + * @author rpokala + * + * @param[in] sc + * Instance-specific context data + * + * @param[in] dram_type + * The locations of the data used to calculate the capacity depends on the + * type of the DIMM. + * + * @param[out] capacity_mb + * The calculated capacity, in MB + */ +static int +jedec_dimm_capacity(struct jedec_dimm_softc *sc, enum dram_type type, + uint32_t *capacity_mb) +{ + uint8_t bus_width_byte; + uint8_t bus_width_offset; + uint8_t dimm_ranks_byte; + uint8_t dimm_ranks_offset; + uint8_t sdram_capacity_byte; + uint8_t sdram_capacity_offset; + uint8_t sdram_pkg_type_byte; + uint8_t sdram_pkg_type_offset; + uint8_t sdram_width_byte; + uint8_t sdram_width_offset; + uint32_t bus_width; + uint32_t dimm_ranks; + uint32_t sdram_capacity; + uint32_t sdram_pkg_type; + uint32_t sdram_width; + int rc; + + switch (type) { + case DRAM_TYPE_DDR3_SDRAM: + bus_width_offset = SPD_OFFSET_DDR3_BUS_WIDTH; + dimm_ranks_offset = SPD_OFFSET_DDR3_DIMM_RANKS; + sdram_capacity_offset = SPD_OFFSET_DDR3_SDRAM_CAPACITY; + sdram_width_offset = SPD_OFFSET_DDR3_SDRAM_WIDTH; + break; + case DRAM_TYPE_DDR4_SDRAM: + bus_width_offset = SPD_OFFSET_DDR4_BUS_WIDTH; + dimm_ranks_offset = SPD_OFFSET_DDR4_DIMM_RANKS; + sdram_capacity_offset = SPD_OFFSET_DDR4_SDRAM_CAPACITY; + sdram_pkg_type_offset = SPD_OFFSET_DDR4_SDRAM_PKG_TYPE; + sdram_width_offset = SPD_OFFSET_DDR4_SDRAM_WIDTH; + break; + default: + device_printf(sc->dev, "unsupported dram_type 0x%02x\n", type); + rc = EINVAL; + goto out; + } + + rc = smbus_readb(sc->smbus, sc->spd_addr, bus_width_offset, + &bus_width_byte); + if (rc != 0) { + device_printf(sc->dev, "failed to read bus_width: %d\n", rc); + goto out; + } + + rc = smbus_readb(sc->smbus, sc->spd_addr, dimm_ranks_offset, + &dimm_ranks_byte); + if (rc != 0) { + device_printf(sc->dev, "failed to read dimm_ranks: %d\n", rc); + goto out; + } + + rc = smbus_readb(sc->smbus, sc->spd_addr, sdram_capacity_offset, + &sdram_capacity_byte); + if (rc != 0) { + device_printf(sc->dev, "failed to read sdram_capacity: %d\n", + rc); + goto out; + } + + rc = smbus_readb(sc->smbus, sc->spd_addr, sdram_width_offset, + &sdram_width_byte); + if (rc != 0) { + device_printf(sc->dev, "failed to read sdram_width: %d\n", rc); + goto out; + } + + /* The "SDRAM Package Type" is only needed for DDR4 DIMMs. */ + if (type == DRAM_TYPE_DDR4_SDRAM) { + rc = smbus_readb(sc->smbus, sc->spd_addr, sdram_pkg_type_offset, + &sdram_pkg_type_byte); + if (rc != 0) { + device_printf(sc->dev, + "failed to read sdram_pkg_type: %d\n", rc); + goto out; + } + } + + /* "Primary bus width, in bits" is in bits [2:0]. */ + bus_width_byte &= 0x07; + if (bus_width_byte <= 3) { + bus_width = 1 << bus_width_byte; + bus_width *= 8; + } else { + device_printf(sc->dev, "invalid bus width info\n"); + rc = EINVAL; + goto out; + } + + /* "Number of ranks per DIMM" is in bits [5:3]. Values 4-7 are only + * valid for DDR4. + */ + dimm_ranks_byte >>= 3; + dimm_ranks_byte &= 0x07; + if (dimm_ranks_byte <= 7) { + dimm_ranks = dimm_ranks_byte + 1; + } else { + device_printf(sc->dev, "invalid DIMM Rank info\n"); + rc = EINVAL; + goto out; + } + if ((dimm_ranks_byte >= 4) && (type != DRAM_TYPE_DDR4_SDRAM)) { + device_printf(sc->dev, "invalid DIMM Rank info\n"); + rc = EINVAL; + goto out; + } + + /* "Total SDRAM capacity per die, in Mb" is in bits [3:0]. There are two + * different formulas, for values 0-7 and for values 8-9. Also, values + * 7-9 are only valid for DDR4. + */ + sdram_capacity_byte &= 0x0f; + if (sdram_capacity_byte <= 7) { + sdram_capacity = 1 << sdram_capacity_byte; + sdram_capacity *= 256; + } else if (sdram_capacity_byte <= 9) { + sdram_capacity = 12 << (sdram_capacity_byte - 8); + sdram_capacity *= 1024; + } else { + device_printf(sc->dev, "invalid SDRAM capacity info\n"); + rc = EINVAL; + goto out; + } + if ((sdram_capacity_byte >= 7) && (type != DRAM_TYPE_DDR4_SDRAM)) { + device_printf(sc->dev, "invalid SDRAM capacity info\n"); + rc = EINVAL; + goto out; + } + + /* "SDRAM device width" is in bits [2:0]. */ + sdram_width_byte &= 0x7; + if (sdram_width_byte <= 3) { + sdram_width = 1 << sdram_width_byte; + sdram_width *= 4; + } else { + device_printf(sc->dev, "invalid SDRAM width info\n"); + rc = EINVAL; + goto out; + } + + /* DDR4 has something called "3DS", which is indicated by [1:0] = 2; + * when that is the case, the die count is encoded in [6:4], and + * dimm_ranks is multiplied by it. + */ + if ((type == DRAM_TYPE_DDR4_SDRAM) && + ((sdram_pkg_type_byte & 0x3) == 2)) { + sdram_pkg_type_byte >>= 4; + sdram_pkg_type_byte &= 0x07; + sdram_pkg_type = sdram_pkg_type_byte + 1; + dimm_ranks *= sdram_pkg_type; + } + + /* Finally, assemble the actual capacity. The formula is the same for + * both DDR3 and DDR4. + */ + *capacity_mb = sdram_capacity / 8 * bus_width / sdram_width * + dimm_ranks; + +out: + return (rc); +} + +/** + * device_detach() method. If we allocated sc->slotid_str, free it. Even if we + * didn't allocate, free it anyway; free(NULL) is safe. + * + * @author rpokala + * + * @param[in,out] dev + * Device being detached. + */ +static int +jedec_dimm_detach(device_t dev) +{ + struct jedec_dimm_softc *sc; + + sc = device_get_softc(dev); + free(sc->slotid_str, M_DEVBUF); + + return (0); +} + +/** + * Read and dump the entire SPD contents. + * + * @author rpokala + * + * @param[in] sc + * Instance-specific context data + * + * @param[in] dram_type + * The length of data which needs to be read and dumped differs based on + * the type of the DIMM. + */ +static int +jedec_dimm_dump(struct jedec_dimm_softc *sc, enum dram_type type) +{ + int i; + int rc; + bool page_changed; + uint8_t bytes[512]; + + page_changed = false; + + for (i = 0; i < 256; i++) { + rc = smbus_readb(sc->smbus, sc->spd_addr, i, &bytes[i]); + if (rc != 0) { + device_printf(sc->dev, + "unable to read page0:0x%02x: %d\n", i, rc); + goto out; + } + } + + /* The DDR4 SPD is 512 bytes, but SMBus only allows for 8-bit offsets. + * JEDEC gets around this by defining the "PAGE" DTI and LSAs. + */ + if (type == DRAM_TYPE_DDR4_SDRAM) { + page_changed = true; + rc = smbus_writeb(sc->smbus, + (JEDEC_DTI_PAGE | JEDEC_LSA_PAGE_SET1), 0, 0); + if (rc != 0) { + device_printf(sc->dev, "unable to change page: %d\n", + rc); + goto out; + } + /* Add 256 to the store location, because we're in the second + * page. + */ + for (i = 0; i < 256; i++) { + rc = smbus_readb(sc->smbus, sc->spd_addr, i, + &bytes[256 + i]); + if (rc != 0) { + device_printf(sc->dev, + "unable to read page1:0x%02x: %d\n", i, rc); + goto out; + } + } + } + + /* Display the data in a nice hexdump format, with byte offsets. */ + hexdump(bytes, (page_changed ? 512 : 256), NULL, 0); + +out: + if (page_changed) { + int rc2; + /* Switch back to page0 before returning. */ + rc2 = smbus_writeb(sc->smbus, + (JEDEC_DTI_PAGE | JEDEC_LSA_PAGE_SET0), 0, 0); + if (rc2 != 0) { + device_printf(sc->dev, "unable to restore page: %d\n", + rc2); + } + } + return (rc); +} + +/** + * Read a specified range of bytes from the SPD, convert them to a string, and + * store them in the provided buffer. Some SPD fields are space-padded ASCII, + * and some are just a string of bits that we want to convert to a hex string. + * + * @author rpokala + * + * @param[in] sc + * Instance-specific context data + * + * @param[out] dst + * The output buffer to populate + * + * @param[in] dstsz + * The size of the output buffer + * + * @param[in] offset + * The starting offset of the field within the SPD + * + * @param[in] len + * The length in bytes of the field within the SPD + * + * @param[in] ascii + * Is the field a sequence of ASCII characters? If not, it is binary data + * which should be converted to characters. + */ +static int +jedec_dimm_field_to_str(struct jedec_dimm_softc *sc, char *dst, size_t dstsz, + uint16_t offset, uint16_t len, bool ascii) +{ + uint8_t byte; + int i; + int rc; + bool page_changed; + + /* Change to the proper page. Offsets [0, 255] are in page0; offsets + * [256, 512] are in page1. + * + * *The page must be reset to page0 before returning.* + * + * For the page-change operation, only the DTI and LSA matter; the + * offset and write-value are ignored, so use just 0. + * + * Mercifully, JEDEC defined the fields such that none of them cross + * pages, so we don't need to worry about that complication. + */ + if (offset < JEDEC_SPD_PAGE_SIZE) { + page_changed = false; + } else if (offset < (2 * JEDEC_SPD_PAGE_SIZE)) { + page_changed = true; + rc = smbus_writeb(sc->smbus, + (JEDEC_DTI_PAGE | JEDEC_LSA_PAGE_SET1), 0, 0); + if (rc != 0) { + device_printf(sc->dev, + "unable to change page for offset 0x%04x: %d\n", + offset, rc); + } + /* Adjust the offset to account for the page change. */ + offset -= JEDEC_SPD_PAGE_SIZE; + } else { + page_changed = false; + rc = EINVAL; + device_printf(sc->dev, "invalid offset 0x%04x\n", offset); + goto out; + } + + /* Sanity-check (adjusted) offset and length; everything must be within + * the same page. + */ + if (offset >= JEDEC_SPD_PAGE_SIZE) { + rc = EINVAL; + device_printf(sc->dev, "invalid offset 0x%04x\n", offset); + goto out; + } + if ((offset + len) >= JEDEC_SPD_PAGE_SIZE) { + rc = EINVAL; + device_printf(sc->dev, + "(offset + len) would cross page (0x%04x + 0x%04x)\n", + offset, len); + goto out; + } + + /* Sanity-check the destination string length. If we're dealing with + * ASCII chars, then the destination must be at least the same length; + * otherwise, it must be *twice* the length, because each byte must + * be converted into two nybble characters. + * + * And, of course, there needs to be an extra byte for the terminator. + */ + if (ascii) { + if (dstsz < (len + 1)) { + rc = EINVAL; + device_printf(sc->dev, + "destination too short (%u < %u)\n", + (uint16_t) dstsz, (len + 1)); + goto out; + } + } else { + if (dstsz < ((2 * len) + 1)) { + rc = EINVAL; + device_printf(sc->dev, + "destination too short (%u < %u)\n", + (uint16_t) dstsz, ((2 * len) + 1)); + goto out; + } + } + + /* Read a byte at a time. */ + for (i = 0; i < len; i++) { + rc = smbus_readb(sc->smbus, sc->spd_addr, (offset + i), &byte); + if (rc != 0) { + device_printf(sc->dev, + "failed to read byte at 0x%02x: %d\n", + (offset + i), rc); + goto out; + } + if (ascii) { + /* chars can be copied directly. */ + dst[i] = byte; + } else { + /* Raw bytes need to be converted to a two-byte hex + * string, plus the terminator. + */ + (void) snprintf(&dst[(2 * i)], 3, "%02x", byte); + } + } + + /* If we're dealing with ASCII, convert trailing spaces to NULs. */ + if (ascii) { + for (i = dstsz; i > 0; i--) { + if (dst[i] == ' ') { + dst[i] = 0; + } else if (dst[i] == 0) { + continue; + } else { + break; + } + } + } + +out: + if (page_changed) { + int rc2; + /* Switch back to page0 before returning. */ + rc2 = smbus_writeb(sc->smbus, + (JEDEC_DTI_PAGE | JEDEC_LSA_PAGE_SET0), 0, 0); + if (rc2 != 0) { + device_printf(sc->dev, + "unable to restore page for offset 0x%04x: %d\n", + offset, rc2); + } + } + + return (rc); +} + +/** + * device_probe() method. Validate the address that was given as a hint, and + * display an error if it's bogus. Make sure that we're dealing with one of the + * SPD versions that we can handle. + * + * @author rpokala + * + * @param[in] dev + * Device being probed. + */ +static int +jedec_dimm_probe(device_t dev) +{ + uint8_t addr; + uint8_t byte; + int rc; + enum dram_type type; + device_t smbus; + + smbus = device_get_parent(dev); + addr = smbus_get_addr(dev); + + /* Don't bother if this isn't an SPD address, or if the LSBit is set. */ + if (((addr & 0xf0) != JEDEC_DTI_SPD) || + ((addr & 0x01) != 0)) { + device_printf(dev, + "invalid \"addr\" hint; address must start with \"0x%x\"," + " and the least-significant bit must be 0\n", + JEDEC_DTI_SPD); + rc = ENXIO; + goto out; + } + + /* Try to read the DRAM_TYPE from the SPD. */ + rc = smbus_readb(smbus, addr, SPD_OFFSET_DRAM_TYPE, &byte); + if (rc != 0) { + device_printf(dev, "failed to read dram_type\n"); + goto out; + } + + /* This driver currently only supports DDR3 and DDR4 SPDs. */ + type = (enum dram_type) byte; + switch (type) { + case DRAM_TYPE_DDR3_SDRAM: + rc = BUS_PROBE_DEFAULT; + device_set_desc(dev, "DDR3 DIMM"); + break; + case DRAM_TYPE_DDR4_SDRAM: + rc = BUS_PROBE_DEFAULT; + device_set_desc(dev, "DDR4 DIMM"); + break; + default: + rc = ENXIO; + break; + } + +out: + return (rc); +} + +/** + * SMBus specifies little-endian byte order, but it looks like the TSODs use + * big-endian. Read and convert. + * + * @author avg + * + * @param[in] sc + * Instance-specific context data + * + * @param[in] reg + * The register number to read. + * + * @param[out] val + * Pointer to populate with the value read. + */ +static int +jedec_dimm_readw_be(struct jedec_dimm_softc *sc, uint8_t reg, uint16_t *val) +{ + int rc; + + rc = smbus_readw(sc->smbus, sc->tsod_addr, reg, val); + if (rc != 0) { + goto out; + } + *val = be16toh(*val); + +out: + return (rc); +} + +/** + * Read the temperature data from the TSOD and convert it to the deciKelvin + * value that the sysctl expects. + * + * @author avg + */ +static int +jedec_dimm_temp_sysctl(SYSCTL_HANDLER_ARGS) +{ + uint16_t val; + int rc; + int temp; + device_t dev = arg1; + struct jedec_dimm_softc *sc; + + sc = device_get_softc(dev); + + rc = jedec_dimm_readw_be(sc, TSOD_REG_TEMPERATURE, &val); + if (rc != 0) { + goto out; + } + + /* The three MSBits are flags, and the next bit is a sign bit. */ + temp = val & 0xfff; + if ((val & 0x1000) != 0) + temp = -temp; + /* Each step is 0.0625 degrees, so convert to 1000ths of a degree C. */ + temp *= 625; + /* ... and then convert to 1000ths of a Kelvin */ + temp += 2731500; + /* As a practical matter, few (if any) TSODs are more accurate than + * about a tenth of a degree, so round accordingly. This correlates with + * the "IK" formatting used for this sysctl. + */ + temp = (temp + 500) / 1000; + + rc = sysctl_handle_int(oidp, &temp, 0, req); + +out: + return (rc); +} + +/** + * Check the TSOD's Vendor ID and Device ID against the list of known TSOD + * devices. Return the description, or NULL if this doesn't look like a valid + * TSOD. + * + * @author avg + * + * @param[in] vid + * The Vendor ID of the TSOD device + * + * @param[in] did + * The Device ID of the TSOD device + * + * @return + * The description string, or NULL for a failure to match. + */ +static const char * +jedec_dimm_tsod_match(uint16_t vid, uint16_t did) +{ + const struct jedec_dimm_tsod_dev *d; + int i; + + for (i = 0; i < nitems(known_tsod_devices); i++) { + d = &known_tsod_devices[i]; + if ((vid == d->vendor_id) && ((did >> 8) == d->device_id)) { + return (d->description); + } + } + + /* If no matches for a specific device, then check for a generic + * TSE2004av-compliant device. + */ + if ((did >> 8) == 0x22) { + return ("TSE2004av compliant TSOD"); + } + + return (NULL); +} + +static device_method_t jedec_dimm_methods[] = { + /* Methods from the device interface */ + DEVMETHOD(device_probe, jedec_dimm_probe), + DEVMETHOD(device_attach, jedec_dimm_attach), + DEVMETHOD(device_detach, jedec_dimm_detach), + DEVMETHOD_END +}; + +static driver_t jedec_dimm_driver = { + .name = "jedec_dimm", + .methods = jedec_dimm_methods, + .size = sizeof(struct jedec_dimm_softc), +}; + +static devclass_t jedec_dimm_devclass; + +DRIVER_MODULE(jedec_dimm, smbus, jedec_dimm_driver, jedec_dimm_devclass, 0, 0); +MODULE_DEPEND(jedec_dimm, smbus, SMBUS_MINVER, SMBUS_PREFVER, SMBUS_MAXVER); +MODULE_VERSION(jedec_dimm, 1); + +/* vi: set ts=8 sw=4 sts=8 noet: */ diff --git a/sys/dev/jedec_dimm/jedec_dimm.h b/sys/dev/jedec_dimm/jedec_dimm.h new file mode 100644 index 0000000..f6c5485 --- /dev/null +++ b/sys/dev/jedec_dimm/jedec_dimm.h @@ -0,0 +1,147 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Authors: Ravi Pokala (rpokala@freebsd.org) + * + * Copyright (c) 2018 Panasas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _DEV__JEDEC_DIMM__JEDEC_DIMM_H_ +#define _DEV__JEDEC_DIMM__JEDEC_DIMM_H_ + +/* JEDEC DIMMs include one or more SMBus devices. + * + * At a minimum, they have an EEPROM containing either 256 bytes (DDR3) or 512 + * bytes (DDR4) of "Serial Presence Detect" (SPD) information. The SPD contains + * data used by the memory controller to configure itself, and it also includes + * asset information. The layout of SPD data is defined in: + * + * JEDEC Standard 21-C, Annex K (DDR3) + * JEDEC Standard 21-C, Annex L (DDR4) + * + * DIMMs may also include a "Thermal Sensor on DIMM" (TSOD), which reports + * temperature data. While not strictly required, the TSOD is so often included + * that JEDEC defined standards for single chips which include both SPD and TSOD + * functions. They respond on multiple SMBus addresses, depending on the + * function. + * + * JEDEC Standard 21-C, TSE2002av (DDR3) + * JEDEC Standard 21-C, TSE2004av (DDR4) + */ + +/* TSE2004av defines several Device Type Identifiers (DTIs), which are the high + * nybble of the SMBus address. Addresses with DTIs of PROTECT (or PAGE, which + * has the same value) are essentially "broadcast" addresses; all SPD devices + * respond to them, changing their mode based on the Logical Serial Address + * (LSA) encoded in bits [3:1]. For normal SPD access, bits [3:1] encode the + * DIMM slot number. + */ +#define JEDEC_SPD_PAGE_SIZE 256 +#define JEDEC_DTI_SPD 0xa0 +#define JEDEC_DTI_TSOD 0x30 +#define JEDEC_DTI_PROTECT 0x60 +#define JEDEC_LSA_PROTECT_SET0 0x02 +#define JEDEC_LSA_PROTECT_SET1 0x08 +#define JEDEC_LSA_PROTECT_SET2 0x0a +#define JEDEC_LSA_PROTECT_SET3 0x00 +#define JEDEC_LSA_PROTECT_CLR 0x06 +#define JEDEC_LSA_PROTECT_GET0 0x03 +#define JEDEC_LSA_PROTECT_GET1 0x09 +#define JEDEC_LSA_PROTECT_GET2 0x0b +#define JEDEC_LSA_PROTECT_GET3 0x01 +#define JEDEC_DTI_PAGE 0x60 +#define JEDEC_LSA_PAGE_SET0 0x0c +#define JEDEC_LSA_PAGE_SET1 0x0e +#define JEDEC_LSA_PAGE_GET 0x0d + +/* The offsets and lengths of various SPD bytes are defined in Annex K (DDR3) + * and Annex L (DDR4). Conveniently, the DRAM type is at the same offset for + * both versions. + * + * This list only includes information needed to get the asset information and + * calculate the DIMM capacity. + */ +#define SPD_OFFSET_DRAM_TYPE 2 +#define SPD_OFFSET_DDR3_SDRAM_CAPACITY 4 +#define SPD_OFFSET_DDR3_DIMM_RANKS 7 +#define SPD_OFFSET_DDR3_SDRAM_WIDTH 7 +#define SPD_OFFSET_DDR3_BUS_WIDTH 8 +#define SPD_OFFSET_DDR3_TSOD_PRESENT 32 +#define SPD_OFFSET_DDR3_SERIAL 122 +#define SPD_LEN_DDR3_SERIAL 4 +#define SPD_OFFSET_DDR3_PARTNUM 128 +#define SPD_LEN_DDR3_PARTNUM 18 +#define SPD_OFFSET_DDR4_SDRAM_CAPACITY 4 +#define SPD_OFFSET_DDR4_SDRAM_PKG_TYPE 6 +#define SPD_OFFSET_DDR4_DIMM_RANKS 12 +#define SPD_OFFSET_DDR4_SDRAM_WIDTH 12 +#define SPD_OFFSET_DDR4_BUS_WIDTH 13 +#define SPD_OFFSET_DDR4_TSOD_PRESENT 14 +#define SPD_OFFSET_DDR4_SERIAL 325 +#define SPD_LEN_DDR4_SERIAL 4 +#define SPD_OFFSET_DDR4_PARTNUM 329 +#define SPD_LEN_DDR4_PARTNUM 20 + +/* The "DRAM Type" field of the SPD enumerates various memory technologies which + * have been used over the years. The list is append-only, so we need only refer + * to the latest SPD specification. In this case, Annex L for DDR4. + */ +enum dram_type { + DRAM_TYPE_RESERVED = 0x00, + DRAM_TYPE_FAST_PAGE_MODE = 0x01, + DRAM_TYPE_EDO = 0x02, + DRAM_TYPE_PIPLEINED_NYBBLE = 0x03, + DRAM_TYPE_SDRAM = 0x04, + DRAM_TYPE_ROM = 0x05, + DRAM_TYPE_DDR_SGRAM = 0x06, + DRAM_TYPE_DDR_SDRAM = 0x07, + DRAM_TYPE_DDR2_SDRAM = 0x08, + DRAM_TYPE_DDR2_SDRAM_FBDIMM = 0x09, + DRAM_TYPE_DDR2_SDRAM_FBDIMM_PROBE = 0x0a, + DRAM_TYPE_DDR3_SDRAM = 0x0b, + DRAM_TYPE_DDR4_SDRAM = 0x0c, + DRAM_TYPE_RESERVED_0D = 0x0d, + DRAM_TYPE_DDR4E_SDRAM = 0x0e, + DRAM_TYPE_LPDDR3_SDRAM = 0x0f, + DRAM_TYPE_LPDDR4_SDRAM = 0x10, +}; + +/* The TSOD is accessed using a simple word interface, which is identical + * between TSE2002av (DDR3) and TSE2004av (DDR4). + */ +#define TSOD_REG_CAPABILITES 0 +#define TSOD_REG_CONFIG 1 +#define TSOD_REG_LIM_HIGH 2 +#define TSOD_REG_LIM_LOW 3 +#define TSOD_REG_LIM_CRIT 4 +#define TSOD_REG_TEMPERATURE 5 +#define TSOD_REG_MANUFACTURER 6 +#define TSOD_REG_DEV_REV 7 + +#endif /* _DEV__JEDEC_DIMM__JEDEC_DIMM_H_ */ + +/* vi: set ts=8 sw=4 sts=8 noet: */ diff --git a/sys/dev/mxge/if_mxge.c b/sys/dev/mxge/if_mxge.c index c14fda9..f7d09e6 100644 --- a/sys/dev/mxge/if_mxge.c +++ b/sys/dev/mxge/if_mxge.c @@ -4161,11 +4161,6 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) err = 0; switch (command) { - case SIOCSIFADDR: - case SIOCGIFADDR: - err = ether_ioctl(ifp, command, data); - break; - case SIOCSIFMTU: err = mxge_change_mtu(sc, ifr->ifr_mtu); break; @@ -4289,7 +4284,8 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) break; default: - err = ENOTTY; + err = ether_ioctl(ifp, command, data); + break; } return err; } diff --git a/sys/dev/nctgpio/nctgpio.c b/sys/dev/nctgpio/nctgpio.c index 30c364f..e0edbd9 100644 --- a/sys/dev/nctgpio/nctgpio.c +++ b/sys/dev/nctgpio/nctgpio.c @@ -140,6 +140,10 @@ struct nuvoton_vendor_device_id { .chip_id = 0xc452, .descr = "Nuvoton NCT5104D (PC-Engines APU)", }, + { + .chip_id = 0xc453, + .descr = "Nuvoton NCT5104D (PC-Engines APU3)", + }, }; static void diff --git a/sys/dev/usb/quirk/usb_quirk.c b/sys/dev/usb/quirk/usb_quirk.c index a451ee0..42b7cc7 100644 --- a/sys/dev/usb/quirk/usb_quirk.c +++ b/sys/dev/usb/quirk/usb_quirk.c @@ -136,6 +136,8 @@ static struct usb_quirk_entry usb_quirks[USB_DEV_QUIRKS_MAX] = { USB_QUIRK(CORSAIR, K60, 0x0000, 0xffff, UQ_KBD_BOOTPROTO), /* Quirk for Corsair Vengeance K70 keyboard */ USB_QUIRK(CORSAIR, K70, 0x0000, 0xffff, UQ_KBD_BOOTPROTO), + /* Quirk for Corsair K70 RGB keyboard */ + USB_QUIRK(CORSAIR, K70_RGB, 0x0000, 0xffff, UQ_KBD_BOOTPROTO), /* Quirk for Corsair STRAFE Gaming keyboard */ USB_QUIRK(CORSAIR, STRAFE, 0x0000, 0xffff, UQ_KBD_BOOTPROTO), /* umodem(4) device quirks */ diff --git a/sys/dev/usb/usbdevs b/sys/dev/usb/usbdevs index f191a24..5717a99 100644 --- a/sys/dev/usb/usbdevs +++ b/sys/dev/usb/usbdevs @@ -1520,6 +1520,7 @@ product COREGA FETHER_USB_TXC 0x9601 FEther USB-TXC /* Corsair products */ product CORSAIR K60 0x0a60 Corsair Vengeance K60 keyboard product CORSAIR K70 0x1b09 Corsair Vengeance K70 keyboard +product CORSAIR K70_RGB 0x1b13 Corsair K70 RGB Keyboard product CORSAIR STRAFE 0x1b15 Cossair STRAFE Gaming keyboard /* Creative products */ diff --git a/sys/dev/usb/usbdi.h b/sys/dev/usb/usbdi.h index 202ad89..1a59fbb 100644 --- a/sys/dev/usb/usbdi.h +++ b/sys/dev/usb/usbdi.h @@ -334,7 +334,7 @@ struct usb_device_id { unsigned long driver_info; } __aligned(32); -#define USB_STD_PNP_INFO "M16:mask;U16:vendor;U16:product;L16:product;G16:product;" \ +#define USB_STD_PNP_INFO "M16:mask;U16:vendor;U16:product;L16:release;G16:release;" \ "U8:devclass;U8:devsubclass;U8:devprotocol;" \ "U8:intclass;U8:intsubclass;U8:intprotocol;" #define USB_STD_PNP_HOST_INFO USB_STD_PNP_INFO "T:mode=host;" diff --git a/sys/geom/mirror/g_mirror.c b/sys/geom/mirror/g_mirror.c index 216adb5..8903dfe 100644 --- a/sys/geom/mirror/g_mirror.c +++ b/sys/geom/mirror/g_mirror.c @@ -1334,9 +1334,7 @@ g_mirror_sync_request(struct g_mirror_softc *sc, struct bio *bp) */ switch (bp->bio_cmd) { case BIO_READ: { - struct g_mirror_disk *d; struct g_consumer *cp; - int readable; KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_read, bp->bio_error); @@ -1347,31 +1345,17 @@ g_mirror_sync_request(struct g_mirror_softc *sc, struct bio *bp) bp->bio_error); /* - * If there's at least one other disk from which we can - * read the block, retry the request. - */ - readable = 0; - LIST_FOREACH(d, &sc->sc_disks, d_next) - if (d->d_state == G_MIRROR_DISK_STATE_ACTIVE && - !(d->d_flags & G_MIRROR_DISK_FLAG_BROKEN)) - readable++; - - /* * The read error will trigger a syncid bump, so there's * no need to do that here. * - * If we can retry the read from another disk, do so. - * Otherwise, all we can do is kick out the new disk. + * The read error handling for regular requests will + * retry the read from all active mirrors before passing + * the error back up, so there's no need to retry here. */ - if (readable == 0) { - g_mirror_sync_request_free(disk, bp); - g_mirror_event_send(disk, - G_MIRROR_DISK_STATE_DISCONNECTED, - G_MIRROR_EVENT_DONTWAIT); - } else { - g_mirror_sync_reinit(disk, bp, bp->bio_offset); - goto retry_read; - } + g_mirror_sync_request_free(disk, bp); + g_mirror_event_send(disk, + G_MIRROR_DISK_STATE_DISCONNECTED, + G_MIRROR_EVENT_DONTWAIT); return; } G_MIRROR_LOGREQ(3, bp, @@ -1427,7 +1411,6 @@ g_mirror_sync_request(struct g_mirror_softc *sc, struct bio *bp) g_mirror_sync_reinit(disk, bp, sync->ds_offset); sync->ds_offset += bp->bio_length; -retry_read: G_MIRROR_LOGREQ(3, bp, "Sending synchronization request."); sync->ds_consumer->index++; diff --git a/sys/geom/nop/g_nop.c b/sys/geom/nop/g_nop.c index f36472d..743811a 100644 --- a/sys/geom/nop/g_nop.c +++ b/sys/geom/nop/g_nop.c @@ -124,6 +124,11 @@ g_nop_start(struct bio *bp) break; case BIO_GETATTR: sc->sc_getattrs++; + if (sc->sc_physpath && + g_handleattr_str(bp, "GEOM::physpath", sc->sc_physpath)) { + mtx_unlock(&sc->sc_lock); + return; + } break; case BIO_FLUSH: sc->sc_flushes++; @@ -180,7 +185,7 @@ g_nop_access(struct g_provider *pp, int dr, int dw, int de) static int g_nop_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp, int ioerror, u_int rfailprob, u_int wfailprob, off_t offset, off_t size, - u_int secsize, u_int stripesize, u_int stripeoffset) + u_int secsize, u_int stripesize, u_int stripeoffset, const char *physpath) { struct g_nop_softc *sc; struct g_geom *gp; @@ -251,6 +256,10 @@ g_nop_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp, sc->sc_explicitsize = explicitsize; sc->sc_stripesize = stripesize; sc->sc_stripeoffset = stripeoffset; + if (physpath && strcmp(physpath, G_NOP_PHYSPATH_PASSTHROUGH)) { + sc->sc_physpath = strndup(physpath, MAXPATHLEN, M_GEOM); + } else + sc->sc_physpath = NULL; sc->sc_error = ioerror; sc->sc_rfailprob = rfailprob; sc->sc_wfailprob = wfailprob; @@ -297,6 +306,7 @@ fail: g_destroy_consumer(cp); g_destroy_provider(newpp); mtx_destroy(&sc->sc_lock); + free(sc->sc_physpath, M_GEOM); g_free(gp->softc); g_destroy_geom(gp); return (error); @@ -312,6 +322,7 @@ g_nop_destroy(struct g_geom *gp, boolean_t force) sc = gp->softc; if (sc == NULL) return (ENXIO); + free(sc->sc_physpath, M_GEOM); pp = LIST_FIRST(&gp->provider); if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { if (force) { @@ -346,7 +357,7 @@ g_nop_ctl_create(struct gctl_req *req, struct g_class *mp) struct g_provider *pp; intmax_t *error, *rfailprob, *wfailprob, *offset, *secsize, *size, *stripesize, *stripeoffset; - const char *name; + const char *name, *physpath; char param[16]; int i, *nargs; @@ -429,6 +440,7 @@ g_nop_ctl_create(struct gctl_req *req, struct g_class *mp) gctl_error(req, "Invalid '%s' argument", "stripeoffset"); return; } + physpath = gctl_get_asciiparam(req, "physpath"); for (i = 0; i < *nargs; i++) { snprintf(param, sizeof(param), "arg%d", i); @@ -450,7 +462,8 @@ g_nop_ctl_create(struct gctl_req *req, struct g_class *mp) *rfailprob == -1 ? 0 : (u_int)*rfailprob, *wfailprob == -1 ? 0 : (u_int)*wfailprob, (off_t)*offset, (off_t)*size, (u_int)*secsize, - (u_int)*stripesize, (u_int)*stripeoffset) != 0) { + (u_int)*stripesize, (u_int)*stripeoffset, + physpath) != 0) { return; } } diff --git a/sys/geom/nop/g_nop.h b/sys/geom/nop/g_nop.h index beba43e..34a0526 100644 --- a/sys/geom/nop/g_nop.h +++ b/sys/geom/nop/g_nop.h @@ -32,6 +32,11 @@ #define G_NOP_CLASS_NAME "NOP" #define G_NOP_VERSION 4 #define G_NOP_SUFFIX ".nop" +/* + * Special flag to instruct gnop to passthrough the underlying provider's + * physical path + */ +#define G_NOP_PHYSPATH_PASSTHROUGH "\255" #ifdef _KERNEL #define G_NOP_DEBUG(lvl, ...) do { \ @@ -73,6 +78,7 @@ struct g_nop_softc { uintmax_t sc_cmd2s; uintmax_t sc_readbytes; uintmax_t sc_wrotebytes; + char* sc_physpath; struct mtx sc_lock; }; #endif /* _KERNEL */ diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 3a26b4e..2a375f2 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -1004,7 +1004,7 @@ exec_map_first_page(imgp) if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) { if (ma[i]->valid) break; - if (vm_page_tryxbusy(ma[i])) + if (!vm_page_tryxbusy(ma[i])) break; } else { ma[i] = vm_page_alloc(object, i, diff --git a/sys/kern/sysv_msg.c b/sys/kern/sysv_msg.c index 48f6e64..b7cdafd 100644 --- a/sys/kern/sysv_msg.c +++ b/sys/kern/sysv_msg.c @@ -1493,7 +1493,8 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0, "Number of message segments"); SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, - NULL, 0, sysctl_msqids, "", "Message queue IDs"); + NULL, 0, sysctl_msqids, "", + "Array of struct msqid_kernel for each potential message queue"); static int msg_prison_check(void *obj, void *data) diff --git a/sys/kern/sysv_sem.c b/sys/kern/sysv_sem.c index deae3b0..9f2203d 100644 --- a/sys/kern/sysv_sem.c +++ b/sys/kern/sysv_sem.c @@ -221,7 +221,8 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, semaem, CTLFLAG_RWTUN, &seminfo.semaem, 0, "Adjust on exit max value"); SYSCTL_PROC(_kern_ipc, OID_AUTO, sema, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, - NULL, 0, sysctl_sema, "", "Semaphore id pool"); + NULL, 0, sysctl_sema, "", + "Array of struct semid_kernel for each potential semaphore"); static struct syscall_helper_data sem_syscalls[] = { SYSCALL_INIT_HELPER(__semctl), diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c index c17d791..6c77ff2 100644 --- a/sys/kern/sysv_shm.c +++ b/sys/kern/sysv_shm.c @@ -190,7 +190,7 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, shm_allow_removed, CTLFLAG_RWTUN, "Enable/Disable attachment to attached segments marked for removal"); SYSCTL_PROC(_kern_ipc, OID_AUTO, shmsegs, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_shmsegs, "", - "Current number of shared memory segments allocated"); + "Array of struct shmid_kernel for each potential shared memory segment"); static struct sx sysvshmsx; #define SYSVSHM_LOCK() sx_xlock(&sysvshmsx) diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 5569895..812f688 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -36,6 +36,7 @@ SUBDIR= \ alq \ ${_amd_ecc_inject} \ ${_amdsbwd} \ + ${_amdsmn} \ ${_amdtemp} \ amr \ ${_an} \ @@ -631,6 +632,7 @@ _aesni= aesni .endif _amd_ecc_inject=amd_ecc_inject _amdsbwd= amdsbwd +_amdsmn= amdsmn _amdtemp= amdtemp _arcmsr= arcmsr _asmc= asmc diff --git a/sys/modules/amdsmn/Makefile b/sys/modules/amdsmn/Makefile new file mode 100644 index 0000000..1f03027 --- /dev/null +++ b/sys/modules/amdsmn/Makefile @@ -0,0 +1,8 @@ +# $FreeBSD$ + +.PATH: ${SRCTOP}/sys/dev/amdsmn + +KMOD= amdsmn +SRCS= amdsmn.c bus_if.h device_if.h pci_if.h + +.include <bsd.kmod.mk> diff --git a/sys/modules/i2c/Makefile b/sys/modules/i2c/Makefile index 5e73eb3..fa41c4f 100644 --- a/sys/modules/i2c/Makefile +++ b/sys/modules/i2c/Makefile @@ -14,6 +14,7 @@ SUBDIR = \ iicsmb \ isl \ isl12xx \ + jedec_dimm \ jedec_ts \ nxprtc \ s35390a \ diff --git a/sys/modules/i2c/jedec_dimm/Makefile b/sys/modules/i2c/jedec_dimm/Makefile new file mode 100644 index 0000000..35e66c3 --- /dev/null +++ b/sys/modules/i2c/jedec_dimm/Makefile @@ -0,0 +1,7 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../../dev/jedec_dimm +KMOD = jedec_dimm +SRCS = jedec_dimm.c jedec_dimm.h bus_if.h device_if.h smbus_if.h + +.include <bsd.kmod.mk> diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index 790c431..53a228c 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -2302,6 +2302,14 @@ icmp6_redirect_input(struct mbuf *m, int off) goto bad; } + /* + * Embed scope zone id into next hop address, since + * fib6_lookup_nh_basic() returns address without embedded + * scope zone id. + */ + if (in6_setscope(&nh6.nh_addr, m->m_pkthdr.rcvif, NULL)) + goto freeit; + if (IN6_ARE_ADDR_EQUAL(&src6, &nh6.nh_addr) == 0) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index d473995..a6e2841 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -1449,7 +1449,7 @@ in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags) /* - * find the internet address corresponding to a given address. + * find the interface address corresponding to a given IPv6 address. * ifaddr is returned referenced. */ struct in6_ifaddr * diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c index c5ce353..e8044f7 100644 --- a/sys/netinet6/nd6_rtr.c +++ b/sys/netinet6/nd6_rtr.c @@ -406,8 +406,11 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len) int change = (ndi->linkmtu != mtu); ndi->linkmtu = mtu; - if (change) /* in6_maxmtu may change */ + if (change) { + /* in6_maxmtu may change */ in6_setmaxmtu(); + rt_updatemtu(ifp); + } } else { nd6log((LOG_INFO, "nd6_ra_input: bogus mtu " "mtu=%lu sent from %s; " diff --git a/sys/netinet6/scope6.c b/sys/netinet6/scope6.c index a00842e..e5da367 100644 --- a/sys/netinet6/scope6.c +++ b/sys/netinet6/scope6.c @@ -409,7 +409,7 @@ in6_setscope(struct in6_addr *in6, struct ifnet *ifp, u_int32_t *ret_id) if (scope == IPV6_ADDR_SCOPE_INTFACELOCAL || scope == IPV6_ADDR_SCOPE_LINKLOCAL) { /* - * Currently we use interface indeces as the + * Currently we use interface indices as the * zone IDs for interface-local and link-local * scopes. */ diff --git a/sys/netipsec/ipsec.c b/sys/netipsec/ipsec.c index fd53da9..92dbaa9 100644 --- a/sys/netipsec/ipsec.c +++ b/sys/netipsec/ipsec.c @@ -108,7 +108,6 @@ VNET_PCPUSTAT_SYSINIT(ipsec4stat); VNET_PCPUSTAT_SYSUNINIT(ipsec4stat); #endif /* VIMAGE */ -VNET_DEFINE(int, ip4_ah_offsetmask) = 0; /* maybe IP_DF? */ /* DF bit on encap. 0: clear 1: set 2: copy */ VNET_DEFINE(int, ip4_ipsec_dfbit) = 0; VNET_DEFINE(int, ip4_esp_trans_deflev) = IPSEC_LEVEL_USE; @@ -117,7 +116,6 @@ VNET_DEFINE(int, ip4_ah_trans_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip4_ah_net_deflev) = IPSEC_LEVEL_USE; /* ECN ignore(-1)/forbidden(0)/allowed(1) */ VNET_DEFINE(int, ip4_ipsec_ecn) = 0; -VNET_DEFINE(int, ip4_esp_randpad) = -1; static VNET_DEFINE(int, ip4_filtertunnel) = 0; #define V_ip4_filtertunnel VNET(ip4_filtertunnel) @@ -192,9 +190,6 @@ SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_AH_NETLEV, ah_net_deflev, SYSCTL_INT(_net_inet_ipsec, IPSECCTL_AH_CLEARTOS, ah_cleartos, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ah_cleartos), 0, "If set, clear type-of-service field when doing AH computation."); -SYSCTL_INT(_net_inet_ipsec, IPSECCTL_AH_OFFSETMASK, ah_offsetmask, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ah_offsetmask), 0, - "If not set, clear offset field mask when doing AH computation."); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DFBIT, dfbit, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ipsec_dfbit), 0, "Do not fragment bit on encap."); diff --git a/sys/netipsec/ipsec.h b/sys/netipsec/ipsec.h index 1e73c01..46ce853 100644 --- a/sys/netipsec/ipsec.h +++ b/sys/netipsec/ipsec.h @@ -278,10 +278,8 @@ VNET_DECLARE(int, ip4_esp_trans_deflev); VNET_DECLARE(int, ip4_esp_net_deflev); VNET_DECLARE(int, ip4_ah_trans_deflev); VNET_DECLARE(int, ip4_ah_net_deflev); -VNET_DECLARE(int, ip4_ah_offsetmask); VNET_DECLARE(int, ip4_ipsec_dfbit); VNET_DECLARE(int, ip4_ipsec_ecn); -VNET_DECLARE(int, ip4_esp_randpad); VNET_DECLARE(int, crypto_support); VNET_DECLARE(int, async_crypto); VNET_DECLARE(int, natt_cksum_policy); @@ -292,10 +290,8 @@ VNET_DECLARE(int, natt_cksum_policy); #define V_ip4_esp_net_deflev VNET(ip4_esp_net_deflev) #define V_ip4_ah_trans_deflev VNET(ip4_ah_trans_deflev) #define V_ip4_ah_net_deflev VNET(ip4_ah_net_deflev) -#define V_ip4_ah_offsetmask VNET(ip4_ah_offsetmask) #define V_ip4_ipsec_dfbit VNET(ip4_ipsec_dfbit) #define V_ip4_ipsec_ecn VNET(ip4_ipsec_ecn) -#define V_ip4_esp_randpad VNET(ip4_esp_randpad) #define V_crypto_support VNET(crypto_support) #define V_async_crypto VNET(async_crypto) #define V_natt_cksum_policy VNET(natt_cksum_policy) diff --git a/sys/netipsec/xform_ah.c b/sys/netipsec/xform_ah.c index 530542d..44d4b1b 100644 --- a/sys/netipsec/xform_ah.c +++ b/sys/netipsec/xform_ah.c @@ -582,6 +582,16 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) error = EACCES; goto bad; } + if (skip + authsize + rplen > m->m_pkthdr.len) { + DPRINTF(("%s: bad mbuf length %u (expecting %lu)" + " for packet in SA %s/%08lx\n", __func__, + m->m_pkthdr.len, (u_long) (skip + authsize + rplen), + ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), + (u_long) ntohl(sav->spi))); + AHSTAT_INC(ahs_badauthl); + error = EACCES; + goto bad; + } AHSTAT_ADD(ahs_ibytes, m->m_pkthdr.len - skip - hl); /* Get crypto descriptors. */ @@ -626,6 +636,9 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) /* Zeroize the authenticator on the packet. */ m_copyback(m, skip + rplen, authsize, ipseczeroes); + /* Save ah_nxt, since ah pointer can become invalid after "massage" */ + hl = ah->ah_nxt; + /* "Massage" the packet headers for crypto processing. */ error = ah_massage_headers(&m, sav->sah->saidx.dst.sa.sa_family, skip, ahx->type, 0); @@ -650,7 +663,7 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) /* These are passed as-is to the callback. */ xd->sav = sav; - xd->nxt = ah->ah_nxt; + xd->nxt = hl; xd->protoff = protoff; xd->skip = skip; xd->cryptoid = cryptoid; diff --git a/sys/opencrypto/cryptodev.c b/sys/opencrypto/cryptodev.c index bda77c7..0784570 100644 --- a/sys/opencrypto/cryptodev.c +++ b/sys/opencrypto/cryptodev.c @@ -443,6 +443,7 @@ cryptof_ioctl( default: CRYPTDEB("invalid cipher"); + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); return (EINVAL); } @@ -490,6 +491,7 @@ cryptof_ioctl( break; default: CRYPTDEB("invalid mac"); + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); return (EINVAL); } @@ -503,6 +505,8 @@ cryptof_ioctl( sop->keylen < txform->minkey) { CRYPTDEB("invalid cipher parameters"); error = EINVAL; + SDT_PROBE1(opencrypto, dev, ioctl, error, + __LINE__); goto bail; } @@ -511,6 +515,8 @@ cryptof_ioctl( if ((error = copyin(sop->key, crie.cri_key, crie.cri_klen / 8))) { CRYPTDEB("invalid key"); + SDT_PROBE1(opencrypto, dev, ioctl, error, + __LINE__); goto bail; } if (thash) @@ -523,6 +529,8 @@ cryptof_ioctl( if (sop->mackeylen != thash->keysize) { CRYPTDEB("invalid mac key length"); error = EINVAL; + SDT_PROBE1(opencrypto, dev, ioctl, error, + __LINE__); goto bail; } @@ -532,6 +540,8 @@ cryptof_ioctl( if ((error = copyin(sop->mackey, cria.cri_key, cria.cri_klen / 8))) { CRYPTDEB("invalid mac key"); + SDT_PROBE1(opencrypto, dev, ioctl, + error, __LINE__); goto bail; } } @@ -547,6 +557,8 @@ cryptof_ioctl( error = checkforsoftware(&crid); if (error) { CRYPTDEB("checkforsoftware"); + SDT_PROBE1(opencrypto, dev, ioctl, error, + __LINE__); goto bail; } } else @@ -554,6 +566,7 @@ cryptof_ioctl( error = crypto_newsession(&sid, (txform ? &crie : &cria), crid); if (error) { CRYPTDEB("crypto_newsession"); + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; } @@ -564,6 +577,7 @@ cryptof_ioctl( if (cse == NULL) { crypto_freesession(sid); error = EINVAL; + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); CRYPTDEB("csecreate"); goto bail; } @@ -596,8 +610,10 @@ bail: case CIOCFSESSION: ses = *(u_int32_t *)data; cse = csefind(fcr, ses); - if (cse == NULL) + if (cse == NULL) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); return (EINVAL); + } csedelete(fcr, cse); error = csefree(cse); break; @@ -627,8 +643,10 @@ bail: case CIOCKEY32: case CIOCKEY232: #endif - if (!crypto_userasymcrypto) + if (!crypto_userasymcrypto) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); return (EPERM); /* XXX compat? */ + } #ifdef COMPAT_FREEBSD32 if (cmd == CIOCKEY32 || cmd == CIOCKEY232) { kop = &kopc; @@ -662,8 +680,12 @@ bail: * fallback to doing them in software. */ *(int *)data = 0; - } else + } else { error = crypto_getfeat((int *)data); + if (error) + SDT_PROBE1(opencrypto, dev, ioctl, error, + __LINE__); + } break; case CIOCFINDDEV: error = cryptodev_find((struct crypt_find_op *)data); @@ -671,12 +693,15 @@ bail: case CIOCCRYPTAEAD: caead = (struct crypt_aead *)data; cse = csefind(fcr, caead->ses); - if (cse == NULL) + if (cse == NULL) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); return (EINVAL); + } error = cryptodev_aead(cse, caead, active_cred, td); break; default: error = EINVAL; + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); break; } return (error); @@ -887,18 +912,22 @@ cryptodev_aead( struct cryptodesc *crde = NULL, *crda = NULL; int error; - if (caead->len > 256*1024-4 || caead->aadlen > 256*1024-4) + if (caead->len > 256*1024-4 || caead->aadlen > 256*1024-4) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); return (E2BIG); + } if (cse->txform == NULL || cse->thash == NULL || caead->tag == NULL || - (caead->len % cse->txform->blocksize) != 0) + (caead->len % cse->txform->blocksize) != 0) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); return (EINVAL); + } uio = &cse->uio; uio->uio_iov = &cse->iovec; uio->uio_iovcnt = 1; uio->uio_offset = 0; - uio->uio_resid = caead->len + caead->aadlen + cse->thash->hashsize; + uio->uio_resid = caead->aadlen + caead->len + cse->thash->hashsize; uio->uio_segflg = UIO_SYSSPACE; uio->uio_rw = UIO_WRITE; uio->uio_td = td; @@ -910,23 +939,28 @@ cryptodev_aead( crp = crypto_getreq(2); if (crp == NULL) { error = ENOMEM; + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; } crda = crp->crp_desc; crde = crda->crd_next; - if ((error = copyin(caead->src, cse->uio.uio_iov[0].iov_base, - caead->len))) + if ((error = copyin(caead->aad, cse->uio.uio_iov[0].iov_base, + caead->aadlen))) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; + } - if ((error = copyin(caead->aad, (char *)cse->uio.uio_iov[0].iov_base + - caead->len, caead->aadlen))) + if ((error = copyin(caead->src, (char *)cse->uio.uio_iov[0].iov_base + + caead->aadlen, caead->len))) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; + } - crda->crd_skip = caead->len; + crda->crd_skip = 0; crda->crd_len = caead->aadlen; - crda->crd_inject = caead->len + caead->aadlen; + crda->crd_inject = caead->aadlen + caead->len; crda->crd_alg = cse->mac; crda->crd_key = cse->mackey; @@ -936,15 +970,15 @@ cryptodev_aead( crde->crd_flags |= CRD_F_ENCRYPT; else crde->crd_flags &= ~CRD_F_ENCRYPT; - /* crde->crd_skip set below */ + crde->crd_skip = caead->aadlen; crde->crd_len = caead->len; - crde->crd_inject = 0; + crde->crd_inject = caead->aadlen; crde->crd_alg = cse->cipher; crde->crd_key = cse->key; crde->crd_klen = cse->keylen * 8; - crp->crp_ilen = caead->len + caead->aadlen; + crp->crp_ilen = caead->aadlen + caead->len; crp->crp_flags = CRYPTO_F_IOV | CRYPTO_F_CBIMM | (caead->flags & COP_F_BATCH); crp->crp_buf = (caddr_t)&cse->uio.uio_iov; @@ -955,23 +989,27 @@ cryptodev_aead( if (caead->iv) { if (caead->ivlen > sizeof cse->tmp_iv) { error = EINVAL; + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; } - if ((error = copyin(caead->iv, cse->tmp_iv, caead->ivlen))) + if ((error = copyin(caead->iv, cse->tmp_iv, caead->ivlen))) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; + } bcopy(cse->tmp_iv, crde->crd_iv, caead->ivlen); crde->crd_flags |= CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT; - crde->crd_skip = 0; } else { crde->crd_flags |= CRD_F_IV_PRESENT; - crde->crd_skip = cse->txform->blocksize; + crde->crd_skip += cse->txform->blocksize; crde->crd_len -= cse->txform->blocksize; } if ((error = copyin(caead->tag, (caddr_t)cse->uio.uio_iov[0].iov_base + - caead->len + caead->aadlen, cse->thash->hashsize))) + caead->len + caead->aadlen, cse->thash->hashsize))) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; + } again: /* * Let the dispatch run unlocked, then, interlock against the @@ -986,8 +1024,10 @@ again: error = msleep(crp, &cse->lock, PWAIT, "crydev", 0); mtx_unlock(&cse->lock); - if (error != 0) + if (error != 0) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; + } if (crp->crp_etype == EAGAIN) { crp->crp_etype = 0; @@ -997,21 +1037,28 @@ again: if (crp->crp_etype != 0) { error = crp->crp_etype; + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; } if (cse->error) { error = cse->error; + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; } - if (caead->dst && (error = copyout(cse->uio.uio_iov[0].iov_base, - caead->dst, caead->len))) + if (caead->dst && (error = copyout( + (caddr_t)cse->uio.uio_iov[0].iov_base + caead->aadlen, caead->dst, + caead->len))) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; + } if ((error = copyout((caddr_t)cse->uio.uio_iov[0].iov_base + - caead->len + caead->aadlen, caead->tag, cse->thash->hashsize))) + caead->aadlen + caead->len, caead->tag, cse->thash->hashsize))) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; + } bail: crypto_freereq(crp); @@ -1050,6 +1097,7 @@ cryptodev_key(struct crypt_kop *kop) int in, out, size, i; if (kop->crk_iparams + kop->crk_oparams > CRK_MAXPARAM) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); return (EFBIG); } @@ -1059,30 +1107,38 @@ cryptodev_key(struct crypt_kop *kop) case CRK_MOD_EXP: if (in == 3 && out == 1) break; + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); return (EINVAL); case CRK_MOD_EXP_CRT: if (in == 6 && out == 1) break; + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); return (EINVAL); case CRK_DSA_SIGN: if (in == 5 && out == 2) break; + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); return (EINVAL); case CRK_DSA_VERIFY: if (in == 7 && out == 0) break; + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); return (EINVAL); case CRK_DH_COMPUTE_KEY: if (in == 3 && out == 1) break; + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); return (EINVAL); default: + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); return (EINVAL); } krp = (struct cryptkop *)malloc(sizeof *krp, M_XDATA, M_WAITOK|M_ZERO); - if (!krp) + if (!krp) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); return (ENOMEM); + } krp->krp_op = kop->crk_op; krp->krp_status = kop->crk_status; krp->krp_iparams = kop->crk_iparams; @@ -1092,9 +1148,11 @@ cryptodev_key(struct crypt_kop *kop) krp->krp_callback = (int (*) (struct cryptkop *)) cryptodevkey_cb; for (i = 0; i < CRK_MAXPARAM; i++) { - if (kop->crk_param[i].crp_nbits > 65536) + if (kop->crk_param[i].crp_nbits > 65536) { /* Limit is the same as in OpenBSD */ + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto fail; + } krp->krp_param[i].crp_nbits = kop->crk_param[i].crp_nbits; } for (i = 0; i < krp->krp_iparams + krp->krp_oparams; i++) { @@ -1105,22 +1163,28 @@ cryptodev_key(struct crypt_kop *kop) if (i >= krp->krp_iparams) continue; error = copyin(kop->crk_param[i].crp_p, krp->krp_param[i].crp_p, size); - if (error) + if (error) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto fail; + } } error = crypto_kdispatch(krp); - if (error) + if (error) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto fail; + } error = tsleep(krp, PSOCK, "crydev", 0); if (error) { /* XXX can this happen? if so, how do we recover? */ + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto fail; } kop->crk_crid = krp->krp_crid; /* device that did the work */ if (krp->krp_status != 0) { error = krp->krp_status; + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto fail; } @@ -1129,8 +1193,10 @@ cryptodev_key(struct crypt_kop *kop) if (size == 0) continue; error = copyout(krp->krp_param[i].crp_p, kop->crk_param[i].crp_p, size); - if (error) + if (error) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto fail; + } } fail: diff --git a/sys/sys/param.h b/sys/sys/param.h index dba258e..249b325 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1101510 /* Master, propagated to newvers */ +#define __FreeBSD_version 1101511 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/sys/sys/vmmeter.h b/sys/sys/vmmeter.h index 1ff09de..a5c9dc6 100644 --- a/sys/sys/vmmeter.h +++ b/sys/sys/vmmeter.h @@ -202,13 +202,12 @@ u_int vm_meter_cnt(size_t); #endif -/* systemwide totals computed every five seconds */ struct vmtotal { int16_t t_rq; /* length of the run queue */ - int16_t t_dw; /* jobs in ``disk wait'' (neg priority) */ - int16_t t_pw; /* jobs in page wait */ - int16_t t_sl; /* jobs sleeping in core */ - int16_t t_sw; /* swapped out runnable/short block jobs */ + int16_t t_dw; /* threads in ``disk wait'' (neg priority) */ + int16_t t_pw; /* threads in page wait */ + int16_t t_sl; /* threads sleeping in core */ + int16_t t_sw; /* swapped out runnable/short block threads */ int32_t t_vm; /* total virtual memory */ int32_t t_avm; /* active virtual memory */ int32_t t_rm; /* total real memory in use */ diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 22f1c0f..d827256 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -1083,16 +1083,16 @@ swap_pager_unswapped(vm_page_t m) /* * swap_pager_getpages() - bring pages in from swap * - * Attempt to page in the pages in array "m" of length "count". The caller - * may optionally specify that additional pages preceding and succeeding - * the specified range be paged in. The number of such pages is returned - * in the "rbehind" and "rahead" parameters, and they will be in the - * inactive queue upon return. + * Attempt to page in the pages in array "ma" of length "count". The + * caller may optionally specify that additional pages preceding and + * succeeding the specified range be paged in. The number of such pages + * is returned in the "rbehind" and "rahead" parameters, and they will + * be in the inactive queue upon return. * - * The pages in "m" must be busied and will remain busied upon return. + * The pages in "ma" must be busied and will remain busied upon return. */ static int -swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, +swap_pager_getpages(vm_object_t object, vm_page_t *ma, int count, int *rbehind, int *rahead) { struct buf *bp; @@ -1107,7 +1107,7 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, bp = getpbuf(&nsw_rcount); VM_OBJECT_WLOCK(object); - if (!swap_pager_haspage(object, m[0]->pindex, &maxbehind, &maxahead)) { + if (!swap_pager_haspage(object, ma[0]->pindex, &maxbehind, &maxahead)) { relpbuf(bp, &nsw_rcount); return (VM_PAGER_FAIL); } @@ -1119,15 +1119,15 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, KASSERT(reqcount - 1 <= maxahead, ("page count %d extends beyond swap block", reqcount)); *rahead = imin(*rahead, maxahead - (reqcount - 1)); - pindex = m[reqcount - 1]->pindex; - msucc = TAILQ_NEXT(m[reqcount - 1], listq); + pindex = ma[reqcount - 1]->pindex; + msucc = TAILQ_NEXT(ma[reqcount - 1], listq); if (msucc != NULL && msucc->pindex - pindex - 1 < *rahead) *rahead = msucc->pindex - pindex - 1; } if (rbehind != NULL) { *rbehind = imin(*rbehind, maxbehind); - pindex = m[0]->pindex; - mpred = TAILQ_PREV(m[0], pglist, listq); + pindex = ma[0]->pindex; + mpred = TAILQ_PREV(ma[0], pglist, listq); if (mpred != NULL && pindex - mpred->pindex - 1 < *rbehind) *rbehind = pindex - mpred->pindex - 1; } @@ -1138,7 +1138,7 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, shift = rbehind != NULL ? *rbehind : 0; if (shift != 0) { for (i = 1; i <= shift; i++) { - p = vm_page_alloc(object, m[0]->pindex - i, + p = vm_page_alloc(object, ma[0]->pindex - i, VM_ALLOC_NORMAL); if (p == NULL) { /* Shift allocated pages to the left. */ @@ -1153,11 +1153,11 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, *rbehind = shift; } for (i = 0; i < reqcount; i++) - bp->b_pages[i + shift] = m[i]; + bp->b_pages[i + shift] = ma[i]; if (rahead != NULL) { for (i = 0; i < *rahead; i++) { p = vm_page_alloc(object, - m[reqcount - 1]->pindex + i + 1, VM_ALLOC_NORMAL); + ma[reqcount - 1]->pindex + i + 1, VM_ALLOC_NORMAL); if (p == NULL) break; bp->b_pages[shift + reqcount + i] = p; @@ -1202,7 +1202,7 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, * Instead, we look at the one page we are interested in which we * still hold a lock on even through the I/O completion. * - * The other pages in our m[] array are also released on completion, + * The other pages in our ma[] array are also released on completion, * so we cannot assume they are valid anymore either. * * NOTE: b_blkno is destroyed by the call to swapdev_strategy @@ -1216,8 +1216,8 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, * is set in the metadata for each page in the request. */ VM_OBJECT_WLOCK(object); - while ((m[0]->oflags & VPO_SWAPINPROG) != 0) { - m[0]->oflags |= VPO_SWAPSLEEP; + while ((ma[0]->oflags & VPO_SWAPINPROG) != 0) { + ma[0]->oflags |= VPO_SWAPSLEEP; PCPU_INC(cnt.v_intrans); if (VM_OBJECT_SLEEP(object, &object->paging_in_progress, PSWP, "swread", hz * 20)) { @@ -1231,7 +1231,7 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, * If we had an unrecoverable read error pages will not be valid. */ for (i = 0; i < reqcount; i++) - if (m[i]->valid != VM_PAGE_BITS_ALL) + if (ma[i]->valid != VM_PAGE_BITS_ALL) return (VM_PAGER_ERROR); return (VM_PAGER_OK); @@ -1251,12 +1251,12 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, * swap_pager_getpages(). */ static int -swap_pager_getpages_async(vm_object_t object, vm_page_t *m, int count, +swap_pager_getpages_async(vm_object_t object, vm_page_t *ma, int count, int *rbehind, int *rahead, pgo_getpages_iodone_t iodone, void *arg) { int r, error; - r = swap_pager_getpages(object, m, count, rbehind, rahead); + r = swap_pager_getpages(object, ma, count, rbehind, rahead); VM_OBJECT_WUNLOCK(object); switch (r) { case VM_PAGER_OK: @@ -1271,7 +1271,7 @@ swap_pager_getpages_async(vm_object_t object, vm_page_t *m, int count, default: panic("unhandled swap_pager_getpages() error %d", r); } - (iodone)(arg, m, count, error); + (iodone)(arg, ma, count, error); VM_OBJECT_WLOCK(object); return (r); @@ -1300,16 +1300,16 @@ swap_pager_getpages_async(vm_object_t object, vm_page_t *m, int count, * We need to unbusy the rest on I/O completion. */ static void -swap_pager_putpages(vm_object_t object, vm_page_t *m, int count, +swap_pager_putpages(vm_object_t object, vm_page_t *ma, int count, int flags, int *rtvals) { int i, n; boolean_t sync; - if (count && m[0]->object != object) { + if (count && ma[0]->object != object) { panic("swap_pager_putpages: object mismatch %p/%p", object, - m[0]->object + ma[0]->object ); } @@ -1387,7 +1387,7 @@ swap_pager_putpages(vm_object_t object, vm_page_t *m, int count, VM_OBJECT_WLOCK(object); for (j = 0; j < n; ++j) { - vm_page_t mreq = m[i+j]; + vm_page_t mreq = ma[i+j]; swp_pager_meta_build( mreq->object, diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 92c761b..e829bd7 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -1133,6 +1133,10 @@ readrest: */ pmap_copy_page(fs.m, fs.first_m); fs.first_m->valid = VM_PAGE_BITS_ALL; + if ((fault_flags & VM_FAULT_WIRE) == 0) { + prot &= ~VM_PROT_WRITE; + fault_type &= ~VM_PROT_WRITE; + } if (wired && (fault_flags & VM_FAULT_WIRE) == 0) { vm_page_lock(fs.first_m); @@ -1217,6 +1221,12 @@ readrest: * write-enabled after all. */ prot &= retry_prot; + fault_type &= retry_prot; + if (prot == 0) { + release_page(&fs); + unlock_and_deallocate(&fs); + goto RetryFault; + } } } diff --git a/sys/vm/vm_reserv.c b/sys/vm/vm_reserv.c index ce3289e..65b65e2 100644 --- a/sys/vm/vm_reserv.c +++ b/sys/vm/vm_reserv.c @@ -231,7 +231,7 @@ static long vm_reserv_reclaimed; SYSCTL_LONG(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD, &vm_reserv_reclaimed, 0, "Cumulative number of reclaimed reservations"); -static void vm_reserv_break(vm_reserv_t rv, vm_page_t m); +static void vm_reserv_break(vm_reserv_t rv); static void vm_reserv_depopulate(vm_reserv_t rv, int index); static vm_reserv_t vm_reserv_from_page(vm_page_t m); static boolean_t vm_reserv_has_pindex(vm_reserv_t rv, @@ -726,16 +726,15 @@ found: } /* - * Breaks the given reservation. Except for the specified free page, all free - * pages in the reservation are returned to the physical memory allocator. - * The reservation's population count and map are reset to their initial - * state. + * Breaks the given reservation. All free pages in the reservation + * are returned to the physical memory allocator. The reservation's + * population count and map are reset to their initial state. * * The given reservation must not be in the partially populated reservation * queue. The free page queue lock must be held. */ static void -vm_reserv_break(vm_reserv_t rv, vm_page_t m) +vm_reserv_break(vm_reserv_t rv) { int begin_zeroes, hi, i, lo; @@ -746,18 +745,7 @@ vm_reserv_break(vm_reserv_t rv, vm_page_t m) ("vm_reserv_break: reserv %p's inpartpopq is TRUE", rv)); LIST_REMOVE(rv, objq); rv->object = NULL; - if (m != NULL) { - /* - * Since the reservation is being broken, there is no harm in - * abusing the population map to stop "m" from being returned - * to the physical memory allocator. - */ - i = m - rv->pages; - KASSERT(popmap_is_clear(rv->popmap, i), - ("vm_reserv_break: reserv %p's popmap is corrupted", rv)); - popmap_set(rv->popmap, i); - rv->popcnt++; - } + rv->pages->psind = 0; i = hi = 0; do { /* Find the next 0 bit. Any previous 0 bits are < "hi". */ @@ -818,7 +806,7 @@ vm_reserv_break_all(vm_object_t object) TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq); rv->inpartpopq = FALSE; } - vm_reserv_break(rv, NULL); + vm_reserv_break(rv); } mtx_unlock(&vm_page_queue_free_mtx); } @@ -927,7 +915,7 @@ vm_reserv_reclaim(vm_reserv_t rv) ("vm_reserv_reclaim: reserv %p's inpartpopq is FALSE", rv)); TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq); rv->inpartpopq = FALSE; - vm_reserv_break(rv, NULL); + vm_reserv_break(rv); vm_reserv_reclaimed++; } diff --git a/sys/x86/iommu/intel_gas.c b/sys/x86/iommu/intel_gas.c index d4aca10..8cebe37 100644 --- a/sys/x86/iommu/intel_gas.c +++ b/sys/x86/iommu/intel_gas.c @@ -79,7 +79,7 @@ intel_gas_init(void) dmar_map_entry_zone = uma_zcreate("DMAR_MAP_ENTRY", sizeof(struct dmar_map_entry), NULL, NULL, - NULL, NULL, UMA_ALIGN_PTR, 0); + NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NODUMP); } SYSINIT(intel_gas, SI_SUB_DRIVERS, SI_ORDER_FIRST, intel_gas_init, NULL); diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c index 9cbdca9..0db3d9b 100644 --- a/sys/x86/x86/local_apic.c +++ b/sys/x86/x86/local_apic.c @@ -520,6 +520,9 @@ native_lapic_init(vm_paddr_t addr) do_cpuid(0x06, regs); if ((regs[0] & CPUTPM1_ARAT) != 0) arat = 1; + } else if (cpu_vendor_id == CPU_VENDOR_AMD && + CPUID_TO_FAMILY(cpu_id) >= 0x12) { + arat = 1; } bzero(&lapic_et, sizeof(lapic_et)); lapic_et.et_name = "LAPIC"; diff --git a/sys/x86/x86/mp_x86.c b/sys/x86/x86/mp_x86.c index cd10782..41e37d7 100644 --- a/sys/x86/x86/mp_x86.c +++ b/sys/x86/x86/mp_x86.c @@ -74,15 +74,6 @@ __FBSDID("$FreeBSD$"); #include <machine/specialreg.h> #include <machine/cpu.h> -#define WARMBOOT_TARGET 0 -#define WARMBOOT_OFF (KERNBASE + 0x0467) -#define WARMBOOT_SEG (KERNBASE + 0x0469) - -#define CMOS_REG (0x70) -#define CMOS_DATA (0x71) -#define BIOS_RESET (0x0f) -#define BIOS_WARM (0x0a) - /* lock region used by kernel profiling */ int mcount_lock; |