summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2015-02-17 19:53:41 +0000
committerdim <dim@FreeBSD.org>2015-02-17 19:53:41 +0000
commitd27bd4650ea928097e260433cb5c69be0dda440f (patch)
tree9b840acec4a7a5a64b3092e4f4cbb8197e629360
parent9377b5ad0feb5dd018ed6cfc6378ac19c1252dfe (diff)
parent68a4902d98c88ebff2f1dfd8bee849d62233ba60 (diff)
downloadFreeBSD-src-d27bd4650ea928097e260433cb5c69be0dda440f.zip
FreeBSD-src-d27bd4650ea928097e260433cb5c69be0dda440f.tar.gz
Merge ^/head r278756 through r278915.
-rw-r--r--Makefile.inc12
-rw-r--r--ObsoleteFiles.inc2
-rw-r--r--bin/ln/symlink.73
-rw-r--r--bin/pkill/tests/pkill-j_test.sh141
-rw-r--r--bin/sh/cd.c12
-rw-r--r--bin/sh/eval.c2
-rw-r--r--bin/sh/expand.c123
-rw-r--r--bin/sh/mail.c2
-rw-r--r--bin/sh/memalloc.c12
-rw-r--r--bin/sh/memalloc.h1
-rw-r--r--bin/sh/miscbltin.c2
-rw-r--r--bin/sh/nodetypes2
-rw-r--r--bin/sh/parser.c2
-rw-r--r--bin/sh/tests/expansion/Makefile2
-rw-r--r--bin/sh/tests/expansion/ifs5.04
-rw-r--r--bin/sh/tests/expansion/pathname5.03
-rw-r--r--bin/sh/var.c2
-rw-r--r--contrib/elftoolchain/addr2line/addr2line.c2
-rw-r--r--contrib/elftoolchain/common/_elftc.h15
-rw-r--r--contrib/elftoolchain/common/elfdefinitions.h9
-rw-r--r--contrib/elftoolchain/elfcopy/main.c2
-rw-r--r--contrib/elftoolchain/elfcopy/sections.c2
-rw-r--r--contrib/elftoolchain/libdwarf/_libdwarf.h2
-rw-r--r--contrib/elftoolchain/libdwarf/dwarf_attrval.c2
-rw-r--r--contrib/elftoolchain/libdwarf/dwarf_get_AT_name.32
-rw-r--r--contrib/elftoolchain/libdwarf/dwarf_get_arange_info.32
-rw-r--r--contrib/elftoolchain/libdwarf/dwarf_get_section_max_offsets.36
-rw-r--r--contrib/elftoolchain/libdwarf/dwarf_hasattr.32
-rw-r--r--contrib/elftoolchain/libdwarf/dwarf_reloc.c2
-rw-r--r--contrib/elftoolchain/libdwarf/dwarf_set_reloc_application.32
-rw-r--r--contrib/elftoolchain/libdwarf/dwarf_whatattr.32
-rw-r--r--contrib/elftoolchain/libdwarf/libdwarf.c2
-rw-r--r--contrib/elftoolchain/libdwarf/libdwarf.h2
-rw-r--r--contrib/elftoolchain/libdwarf/libdwarf_elf_init.c2
-rw-r--r--contrib/elftoolchain/libdwarf/libdwarf_reloc.c2
-rw-r--r--contrib/elftoolchain/libelf/_libelf_config.h2
-rw-r--r--contrib/elftoolchain/libelf/elf.32
-rw-r--r--contrib/elftoolchain/libelf/elf_scn.c2
-rw-r--r--contrib/elftoolchain/libelf/libelf_ar_util.c3
-rw-r--r--contrib/elftoolchain/libelf/libelf_convert.m42
-rw-r--r--contrib/elftoolchain/nm/nm.14
-rw-r--r--contrib/elftoolchain/nm/nm.c2
-rw-r--r--contrib/elftoolchain/readelf/readelf.c46
-rw-r--r--contrib/llvm/tools/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp5
-rw-r--r--lib/libc/gen/dlopen.34
-rw-r--r--lib/libc/gen/fstab.c2
-rw-r--r--lib/libc/gen/getgrent.c2
-rw-r--r--lib/libc/gen/getpwent.c2
-rw-r--r--lib/libc/gen/ulimit.c12
-rw-r--r--lib/libc/include/libc_private.h2
-rw-r--r--lib/libc/sparc64/sys/Makefile.inc2
-rw-r--r--lib/libc/sparc64/sys/sigaction1.S (renamed from lib/libc/sparc64/sys/sigaction.S)5
-rw-r--r--lib/libc/sys/Makefile.inc8
-rw-r--r--lib/libcompat/4.3/rexec.c8
-rw-r--r--lib/libelftc/elftc_version.c2
-rw-r--r--lib/liblzma/config.h1
-rw-r--r--share/man/man4/Makefile1
-rw-r--r--share/man/man4/udl.467
-rw-r--r--share/man/man4/wlan.44
-rw-r--r--sys/amd64/amd64/mp_machdep.c1
-rw-r--r--sys/arm/arm/cpuinfo.c8
-rw-r--r--sys/arm/arm/db_trace.c340
-rw-r--r--sys/arm/arm/unwind.c369
-rw-r--r--sys/arm/broadcom/bcm2835/bcm2835_bsc.c2
-rw-r--r--sys/arm/broadcom/bcm2835/bcm2835_bscvar.h5
-rw-r--r--sys/arm/broadcom/bcm2835/bcm2835_mbox_prop.h47
-rw-r--r--sys/arm/include/stack.h18
-rw-r--r--sys/boot/fdt/dts/arm/bcm2835.dtsi4
-rw-r--r--sys/conf/files20
-rw-r--r--sys/conf/files.arm1
-rw-r--r--sys/conf/kern.mk1
-rw-r--r--sys/conf/kern.pre.mk4
-rw-r--r--sys/conf/kmod.mk1
-rw-r--r--sys/conf/options3
-rw-r--r--sys/contrib/dev/ath/ath_hal/ar9300/ar9300.h2
-rw-r--r--sys/contrib/dev/ath/ath_hal/ar9300/ar9300_beacon.c3
-rw-r--r--sys/contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c55
-rw-r--r--sys/contrib/dev/ath/ath_hal/ar9300/ar9300_gpio.c13
-rw-r--r--sys/contrib/dev/ath/ath_hal/ar9300/ar9300_misc.c50
-rw-r--r--sys/contrib/rdma/krping/krping.c18
-rw-r--r--sys/dev/acpica/acpi_pcib.c7
-rw-r--r--sys/dev/ath/if_ath_pci.c35
-rw-r--r--sys/dev/atkbdc/atkbd.c92
-rw-r--r--sys/dev/atkbdc/atkbdc.c39
-rw-r--r--sys/dev/atkbdc/atkbdcreg.h5
-rw-r--r--sys/dev/atkbdc/psm.c22
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c35
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c14
-rw-r--r--sys/dev/cxgbe/iw_cxgbe/cq.c3
-rw-r--r--sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h5
-rw-r--r--sys/dev/cxgbe/iw_cxgbe/mem.c43
-rw-r--r--sys/dev/fb/fbd.c3
-rw-r--r--sys/dev/ic/hd64570.h372
-rw-r--r--sys/dev/lmc/if_lmc.h113
-rw-r--r--sys/dev/netmap/netmap.c19
-rw-r--r--sys/dev/pci/pci.c2
-rw-r--r--sys/dev/random/dummy_rng.c10
-rw-r--r--sys/dev/random/random_adaptors.c6
-rw-r--r--sys/dev/random/randomdev.c11
-rw-r--r--sys/dev/random/randomdev.h4
-rw-r--r--sys/dev/sfxge/common/efx_ev.c6
-rw-r--r--sys/dev/sfxge/common/efx_mac.c4
-rw-r--r--sys/dev/sfxge/common/efx_mcdi.c6
-rw-r--r--sys/dev/sfxge/common/efx_nic.c3
-rw-r--r--sys/dev/sfxge/common/efx_rx.c7
-rw-r--r--sys/dev/sfxge/common/efx_tx.c4
-rw-r--r--sys/dev/sfxge/common/siena_mon.c12
-rw-r--r--sys/dev/sfxge/common/siena_nic.c3
-rw-r--r--sys/dev/sfxge/common/siena_vpd.c6
-rw-r--r--sys/dev/sfxge/sfxge_port.c61
-rw-r--r--sys/dev/sfxge/sfxge_rx.c6
-rw-r--r--sys/dev/sfxge/sfxge_tx.c27
-rw-r--r--sys/dev/usb/controller/musb_otg.c9
-rw-r--r--sys/dev/usb/controller/uhci.c3
-rw-r--r--sys/dev/usb/usbdevs23
-rw-r--r--sys/dev/usb/video/udl.c1075
-rw-r--r--sys/dev/usb/video/udl.h311
-rw-r--r--sys/dev/videomode/Makefile.ediddevs16
-rw-r--r--sys/dev/videomode/Makefile.videomode18
-rw-r--r--sys/dev/videomode/devlist2h.awk193
-rw-r--r--sys/dev/videomode/edid.c647
-rw-r--r--sys/dev/videomode/ediddevs106
-rw-r--r--sys/dev/videomode/ediddevs.h91
-rw-r--r--sys/dev/videomode/ediddevs_data.h107
-rw-r--r--sys/dev/videomode/edidreg.h256
-rw-r--r--sys/dev/videomode/edidvar.h96
-rw-r--r--sys/dev/videomode/modelines181
-rw-r--r--sys/dev/videomode/modelines2c.awk150
-rw-r--r--sys/dev/videomode/pickmode.c205
-rw-r--r--sys/dev/videomode/test.c26
-rw-r--r--sys/dev/videomode/vesagtf.c703
-rw-r--r--sys/dev/videomode/vesagtf.h86
-rw-r--r--sys/dev/videomode/videomode.c130
-rw-r--r--sys/dev/videomode/videomode.h74
-rw-r--r--sys/dev/vt/vt_core.c15
-rw-r--r--sys/dev/wpi/if_wpi.c120
-rw-r--r--sys/dev/wpi/if_wpireg.h6
-rw-r--r--sys/dev/xen/xenstore/xenstore_dev.c54
-rw-r--r--sys/fs/ext2fs/ext2_htree.c2
-rw-r--r--sys/fs/ext2fs/ext2_vfsops.c11
-rw-r--r--sys/i386/i386/mp_machdep.c1
-rw-r--r--sys/kern/imgact_elf.c3
-rw-r--r--sys/kern/kern_procctl.c10
-rw-r--r--sys/kern/subr_taskqueue.c37
-rw-r--r--sys/kern/uipc_mbuf.c12
-rw-r--r--sys/kern/uipc_socket.c12
-rw-r--r--sys/kern/vfs_subr.c15
-rw-r--r--sys/mips/conf/AR71XX_BASE1
-rw-r--r--sys/modules/cxgbe/if_cxgbe/Makefile1
-rw-r--r--sys/modules/drm2/radeonkms/Makefile2
-rw-r--r--sys/modules/ibcore/Makefile6
-rw-r--r--sys/modules/ipoib/Makefile2
-rw-r--r--sys/modules/mlx4/Makefile3
-rw-r--r--sys/modules/mlx4ib/Makefile4
-rw-r--r--sys/modules/mlxen/Makefile3
-rw-r--r--sys/modules/mthca/Makefile2
-rw-r--r--sys/modules/usb/udl/Makefile12
-rw-r--r--sys/modules/videomode/Makefile40
-rw-r--r--sys/net/if_bridge.c11
-rw-r--r--sys/net/pfvar.h11
-rw-r--r--sys/netinet6/frag6.c51
-rw-r--r--sys/netinet6/ip6_output.c111
-rw-r--r--sys/netinet6/ip6_var.h2
-rw-r--r--sys/netpfil/pf/pf.c51
-rw-r--r--sys/netpfil/pf/pf.h2
-rw-r--r--sys/netpfil/pf/pf_mtag.h1
-rw-r--r--sys/netpfil/pf/pf_norm.c906
-rw-r--r--sys/ofed/drivers/infiniband/core/Makefile32
-rw-r--r--sys/ofed/drivers/infiniband/core/addr.c278
-rw-r--r--sys/ofed/drivers/infiniband/core/cache.c117
-rw-r--r--sys/ofed/drivers/infiniband/core/cm.c283
-rw-r--r--sys/ofed/drivers/infiniband/core/cm_msgs.h45
-rw-r--r--sys/ofed/drivers/infiniband/core/cma.c1242
-rw-r--r--sys/ofed/drivers/infiniband/core/core_priv.h3
-rw-r--r--sys/ofed/drivers/infiniband/core/device.c65
-rw-r--r--sys/ofed/drivers/infiniband/core/fmr_pool.c1
-rw-r--r--sys/ofed/drivers/infiniband/core/iwcm.c33
-rw-r--r--sys/ofed/drivers/infiniband/core/local_sa.c1273
-rw-r--r--sys/ofed/drivers/infiniband/core/mad.c755
-rw-r--r--sys/ofed/drivers/infiniband/core/mad_priv.h35
-rw-r--r--sys/ofed/drivers/infiniband/core/mad_rmpp.c2
-rw-r--r--sys/ofed/drivers/infiniband/core/multicast.c60
-rw-r--r--sys/ofed/drivers/infiniband/core/notice.c749
-rw-r--r--sys/ofed/drivers/infiniband/core/packer.c1
-rw-r--r--sys/ofed/drivers/infiniband/core/peer_mem.c461
-rw-r--r--sys/ofed/drivers/infiniband/core/sa.h39
-rw-r--r--sys/ofed/drivers/infiniband/core/sa_query.c412
-rw-r--r--sys/ofed/drivers/infiniband/core/smi.c8
-rw-r--r--sys/ofed/drivers/infiniband/core/sysfs.c129
-rw-r--r--sys/ofed/drivers/infiniband/core/ucm.c62
-rw-r--r--sys/ofed/drivers/infiniband/core/ucma.c188
-rw-r--r--sys/ofed/drivers/infiniband/core/ud_header.c63
-rw-r--r--sys/ofed/drivers/infiniband/core/umem.c559
-rw-r--r--sys/ofed/drivers/infiniband/core/user_mad.c237
-rw-r--r--sys/ofed/drivers/infiniband/core/uverbs.h95
-rw-r--r--sys/ofed/drivers/infiniband/core/uverbs_cmd.c2848
-rw-r--r--sys/ofed/drivers/infiniband/core/uverbs_main.c758
-rw-r--r--sys/ofed/drivers/infiniband/core/uverbs_marshall.c5
-rw-r--r--sys/ofed/drivers/infiniband/core/verbs.c663
-rw-r--r--sys/ofed/drivers/infiniband/debug/memtrack.c658
-rw-r--r--sys/ofed/drivers/infiniband/debug/memtrack.h71
-rw-r--r--sys/ofed/drivers/infiniband/debug/mtrack.h778
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/Makefile31
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/ah.c24
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c23
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/cm.c60
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/cq.c140
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/doorbell.c4
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/mad.c98
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/main.c1544
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/mcg.c30
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/mlx4_exp.c116
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/mlx4_exp.h46
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h76
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/mr.c248
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/qp.c368
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c4
-rw-r--r--sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c50
-rw-r--r--sys/ofed/drivers/infiniband/hw/mthca/mthca_qp.c3
-rw-r--r--sys/ofed/drivers/infiniband/ulp/ipoib/Makefile11
-rw-r--r--sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h2
-rw-r--r--sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c4
-rw-r--r--sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c1
-rw-r--r--sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c8
-rw-r--r--sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c10
-rw-r--r--sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c2
-rw-r--r--sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c2
-rw-r--r--sys/ofed/drivers/net/mlx4/Makefile2
-rw-r--r--sys/ofed/drivers/net/mlx4/en_rx.c2
-rw-r--r--sys/ofed/include/linux/device.h13
-rw-r--r--sys/ofed/include/linux/linux_compat.c14
-rw-r--r--sys/ofed/include/linux/printk.h40
-rw-r--r--sys/ofed/include/rdma/ib_addr.h124
-rw-r--r--sys/ofed/include/rdma/ib_cache.h16
-rw-r--r--sys/ofed/include/rdma/ib_cm.h4
-rw-r--r--sys/ofed/include/rdma/ib_mad.h9
-rw-r--r--sys/ofed/include/rdma/ib_pack.h2
-rw-r--r--sys/ofed/include/rdma/ib_peer_mem.h59
-rw-r--r--sys/ofed/include/rdma/ib_sa.h177
-rw-r--r--sys/ofed/include/rdma/ib_umem.h75
-rw-r--r--sys/ofed/include/rdma/ib_user_verbs.h354
-rw-r--r--sys/ofed/include/rdma/ib_user_verbs_exp.h204
-rw-r--r--sys/ofed/include/rdma/ib_verbs.h852
-rw-r--r--sys/ofed/include/rdma/ib_verbs_exp.h100
-rw-r--r--sys/ofed/include/rdma/iw_cm.h15
-rw-r--r--sys/ofed/include/rdma/peer_mem.h73
-rw-r--r--sys/ofed/include/rdma/rdma_cm.h77
-rw-r--r--sys/ofed/include/rdma/rdma_user_cm.h9
-rw-r--r--sys/powerpc/powerpc/db_trace.c3
-rw-r--r--sys/sparc64/include/asm.h10
-rw-r--r--sys/sys/fbio.h2
-rw-r--r--sys/sys/mbuf.h12
-rw-r--r--sys/sys/param.h2
-rw-r--r--sys/sys/taskqueue.h3
-rw-r--r--sys/vm/memguard.c6
-rw-r--r--sys/x86/acpica/acpi_wakeup.c1
-rw-r--r--sys/x86/x86/local_apic.c1
-rw-r--r--sys/x86/xen/pvcpu_enum.c22
-rw-r--r--sys/x86/xen/xen_intr.c49
-rw-r--r--usr.bin/compress/compress.c11
-rw-r--r--usr.bin/gcore/elfcore.c3
-rw-r--r--usr.bin/gzip/gzip.c10
-rw-r--r--usr.bin/kdump/kdump.c2
-rw-r--r--usr.bin/timeout/timeout.c32
-rw-r--r--usr.bin/touch/touch.c39
-rw-r--r--usr.sbin/binmiscctl/binmiscctl.811
-rw-r--r--usr.sbin/binmiscctl/binmiscctl.c4
-rw-r--r--usr.sbin/devctl/Makefile1
-rwxr-xr-xusr.sbin/pw/tests/pw_useradd.sh111
-rwxr-xr-xusr.sbin/pw/tests/pw_usernext.sh27
270 files changed, 18326 insertions, 8713 deletions
diff --git a/Makefile.inc1 b/Makefile.inc1
index 9fefae8..4dcbc0f 100644
--- a/Makefile.inc1
+++ b/Makefile.inc1
@@ -929,7 +929,7 @@ packageworld:
@${DESTDIR}/${DISTDIR}/${dist}.debug.meta
. else
${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \
- tar cvJfL ${DESTDIR}/${DISTDIR}/${dist}-dbg.txz \
+ tar cvJLf ${DESTDIR}/${DISTDIR}/${dist}-dbg.txz \
usr/lib/debug
. endif
.endfor
diff --git a/ObsoleteFiles.inc b/ObsoleteFiles.inc
index c29e87d..0c969ff 100644
--- a/ObsoleteFiles.inc
+++ b/ObsoleteFiles.inc
@@ -93,6 +93,8 @@ OLD_FILES+=usr/lib/clang/3.5.1/lib/freebsd/libclang_rt.ubsan_cxx-x86_64.a
OLD_DIRS+=usr/lib/clang/3.5.1/lib/freebsd
OLD_DIRS+=usr/lib/clang/3.5.1/lib
OLD_DIRS+=usr/lib/clang/3.5.1
+# 20150217: Removed remnants of ar(4) driver
+OLD_FILES+=usr/include/sys/dev/ic/hd64570.h
# 20150212: /usr/games moving into /usr/bin
OLD_FILES+=usr/games/bcd
OLD_FILES+=usr/games/caesar
diff --git a/bin/ln/symlink.7 b/bin/ln/symlink.7
index 821a312..089c010 100644
--- a/bin/ln/symlink.7
+++ b/bin/ln/symlink.7
@@ -29,7 +29,7 @@
.\" @(#)symlink.7 8.3 (Berkeley) 3/31/94
.\" $FreeBSD$
.\"
-.Dd January 23, 2015
+.Dd February 16, 2015
.Dt SYMLINK 7
.Os
.Sh NAME
@@ -146,6 +146,7 @@ The following system calls follow symbolic links
unless given the
.Dv AT_SYMLINK_NOFOLLOW
flag:
+.Xr chflagsat 2 ,
.Xr fchmodat 2 ,
.Xr fchownat 2 ,
.Xr fstatat 2
diff --git a/bin/pkill/tests/pkill-j_test.sh b/bin/pkill/tests/pkill-j_test.sh
index a844149..5635df3 100644
--- a/bin/pkill/tests/pkill-j_test.sh
+++ b/bin/pkill/tests/pkill-j_test.sh
@@ -4,99 +4,90 @@
jail_name_to_jid()
{
local check_name="$1"
- (
- line="$(jls -n 2> /dev/null | grep name=$check_name )"
- for nv in $line; do
- local name="${nv%=*}"
- if [ "${name}" = "jid" ]; then
- eval $nv
- echo $jid
- break
- fi
- done
- )
+ jls -j "$check_name" -s 2>/dev/null | tr ' ' '\n' | grep jid= | sed -e 's/.*=//g'
}
base=pkill_j_test
+if [ `id -u` -ne 0 ]; then
+ echo "1..0 # skip Test needs uid 0."
+ exit 0
+fi
+
echo "1..3"
+sleep=$(pwd)/sleep.txt
+ln -sf /bin/sleep $sleep
+
name="pkill -j <jid>"
-if [ `id -u` -eq 0 ]; then
- sleep=$(pwd)/sleep.txt
- ln -sf /bin/sleep $sleep
- jail -c path=/ name=${base}_1_1 ip4.addr=127.0.0.1 \
- command=daemon -p ${PWD}/${base}_1_1.pid $sleep 5 &
+sleep_amount=5
+jail -c path=/ name=${base}_1_1 ip4.addr=127.0.0.1 \
+ command=daemon -p ${PWD}/${base}_1_1.pid $sleep $sleep_amount &
- jail -c path=/ name=${base}_1_2 ip4.addr=127.0.0.1 \
- command=daemon -p ${PWD}/${base}_1_2.pid $sleep 5 &
+jail -c path=/ name=${base}_1_2 ip4.addr=127.0.0.1 \
+ command=daemon -p ${PWD}/${base}_1_2.pid $sleep $sleep_amount &
- $sleep 5 &
- sleep 0.5
+$sleep $sleep_amount &
+
+for i in `seq 1 10`; do
jid1=$(jail_name_to_jid ${base}_1_1)
jid2=$(jail_name_to_jid ${base}_1_2)
jid="${jid1},${jid2}"
- if pkill -f -j "$jid" $sleep && sleep 0.5 &&
- ! -f ${PWD}/${base}_1_1.pid &&
- ! -f ${PWD}/${base}_1_2.pid ; then
- echo "ok 1 - $name"
- else
- echo "not ok 1 - $name"
- fi 2>/dev/null
- rm -f $sleep
- [ -f ${PWD}/${base}_1_1.pid ] && kill $(cat ${PWD}/${base}_1_1.pid)
- [ -f ${PWD}/${base}_1_2.pid ] && kill $(cat ${PWD}/${base}_1_2.pid)
- wait
+ case "$jid" in
+ [0-9]+,[0-9]+)
+ break
+ ;;
+ esac
+ sleep 0.1
+done
+
+if pkill -f -j "$jid" $sleep && sleep 0.5 &&
+ ! -f ${PWD}/${base}_1_1.pid &&
+ ! -f ${PWD}/${base}_1_2.pid ; then
+ echo "ok 1 - $name"
else
- echo "ok 1 - $name # skip Test needs uid 0."
-fi
+ echo "not ok 1 - $name"
+fi 2>/dev/null
+[ -f ${PWD}/${base}_1_1.pid ] && kill $(cat ${PWD}/${base}_1_1.pid)
+[ -f ${PWD}/${base}_1_2.pid ] && kill $(cat ${PWD}/${base}_1_2.pid)
+wait
name="pkill -j any"
-if [ `id -u` -eq 0 ]; then
- sleep=$(pwd)/sleep.txt
- ln -sf /bin/sleep $sleep
- jail -c path=/ name=${base}_2_1 ip4.addr=127.0.0.1 \
- command=daemon -p ${PWD}/${base}_2_1.pid $sleep 5 &
+sleep_amount=6
+jail -c path=/ name=${base}_2_1 ip4.addr=127.0.0.1 \
+ command=daemon -p ${PWD}/${base}_2_1.pid $sleep $sleep_amount &
- jail -c path=/ name=${base}_2_2 ip4.addr=127.0.0.1 \
- command=daemon -p ${PWD}/${base}_2_2.pid $sleep 5 &
+jail -c path=/ name=${base}_2_2 ip4.addr=127.0.0.1 \
+ command=daemon -p ${PWD}/${base}_2_2.pid $sleep $sleep_amount &
- $sleep 5 &
- sleep 0.5
- chpid3=$!
- if pkill -f -j any $sleep && sleep 0.5 &&
- [ ! -f ${PWD}/${base}_2_1.pid -a
- ! -f ${PWD}/${base}_2_2.pid ] && kill $chpid3; then
- echo "ok 2 - $name"
- else
- echo "not ok 2 - $name"
- fi 2>/dev/null
- rm -f $sleep
- [ -f ${PWD}/${base}_2_1.pid ] && kill $(cat ${PWD}/${base}_2_1.pid)
- [ -f ${PWD}/${base}_2_2.pid ] && kill $(cat ${PWD}/${base}_2_2.pid)
- wait
+$sleep $sleep_amount &
+chpid3=$!
+sleep 0.5
+if pkill -f -j any $sleep && sleep 0.5 &&
+ [ ! -f ${PWD}/${base}_2_1.pid -a
+ ! -f ${PWD}/${base}_2_2.pid ] && kill $chpid3; then
+ echo "ok 2 - $name"
else
- echo "ok 2 - $name # skip Test needs uid 0."
-fi
+ echo "not ok 2 - $name"
+fi 2>/dev/null
+[ -f ${PWD}/${base}_2_1.pid ] && kill $(cat ${PWD}/${base}_2_1.pid)
+[ -f ${PWD}/${base}_2_2.pid ] && kill $(cat ${PWD}/${base}_2_2.pid)
+wait
name="pkill -j none"
-if [ `id -u` -eq 0 ]; then
- sleep=$(pwd)/sleep.txt
- ln -sf /bin/sleep $sleep
- daemon -p ${PWD}/${base}_3_1.pid $sleep 5
- jail -c path=/ name=${base}_3_2 ip4.addr=127.0.0.1 \
- command=daemon -p ${PWD}/${base}_3_2.pid $sleep 5 &
- sleep 1
- if pkill -f -j none "$sleep 5" && sleep 1 &&
- [ ! -f ${PWD}/${base}_3_1.pid -a -f ${PWD}/${base}_3_2.pid ] ; then
- echo "ok 3 - $name"
- else
- ls ${PWD}/*.pid
- echo "not ok 3 - $name"
- fi 2>/dev/null
- rm -f $sleep
- [ -f ${PWD}/${base}_3_1.pid ] && kill $(cat ${base}_3_1.pid)
- [ -f ${PWD}/${base}_3_2.pid ] && kill $(cat ${base}_3_2.pid)
+sleep_amount=7
+daemon -p ${PWD}/${base}_3_1.pid $sleep $sleep_amount
+jail -c path=/ name=${base}_3_2 ip4.addr=127.0.0.1 \
+ command=daemon -p ${PWD}/${base}_3_2.pid $sleep $sleep_amount &
+sleep 1
+if pkill -f -j none "$sleep $sleep_amount" && sleep 1 &&
+ [ ! -f ${PWD}/${base}_3_1.pid -a -f ${PWD}/${base}_3_2.pid ] ; then
+ echo "ok 3 - $name"
else
- echo "ok 3 - $name # skip Test needs uid 0."
-fi
+ ls ${PWD}/*.pid
+ echo "not ok 3 - $name"
+fi 2>/dev/null
+[ -f ${PWD}/${base}_3_1.pid ] && kill $(cat ${base}_3_1.pid)
+[ -f ${PWD}/${base}_3_2.pid ] && kill $(cat ${base}_3_2.pid)
+
+rm -f $sleep
diff --git a/bin/sh/cd.c b/bin/sh/cd.c
index 7720fad..88f03f5 100644
--- a/bin/sh/cd.c
+++ b/bin/sh/cd.c
@@ -122,7 +122,7 @@ cdcmd(int argc __unused, char **argv __unused)
(dest[0] == '.' && (dest[1] == '/' || dest[1] == '\0')) ||
(dest[0] == '.' && dest[1] == '.' && (dest[2] == '/' || dest[2] == '\0')) ||
(path = bltinlookup("CDPATH", 1)) == NULL)
- path = nullstr;
+ path = "";
while ((p = padvance(&path, dest)) != NULL) {
if (stat(p, &statb) < 0) {
if (errno != ENOENT)
@@ -182,7 +182,6 @@ cdlogical(char *dest)
struct stat statb;
int first;
int badstat;
- size_t len;
/*
* Check each component of the path. If we find a symlink or
@@ -190,9 +189,7 @@ cdlogical(char *dest)
* next time we get the value of the current directory.
*/
badstat = 0;
- len = strlen(dest);
- cdcomppath = stalloc(len + 1);
- memcpy(cdcomppath, dest, len + 1);
+ cdcomppath = stsavestr(dest);
STARTSTACKSTR(p);
if (*dest == '/') {
STPUTC('/', p);
@@ -277,7 +274,6 @@ findcwd(char *dir)
{
char *new;
char *p;
- size_t len;
/*
* If our argument is NULL, we don't know the current directory
@@ -286,9 +282,7 @@ findcwd(char *dir)
*/
if (dir == NULL || curdir == NULL)
return getpwd2();
- len = strlen(dir);
- cdcomppath = stalloc(len + 1);
- memcpy(cdcomppath, dir, len + 1);
+ cdcomppath = stsavestr(dir);
STARTSTACKSTR(new);
if (*dir != '/') {
STPUTS(curdir, new);
diff --git a/bin/sh/eval.c b/bin/sh/eval.c
index 486de9c..347824a 100644
--- a/bin/sh/eval.c
+++ b/bin/sh/eval.c
@@ -498,7 +498,7 @@ exphere(union node *redir, struct arglist *fn)
struct localvar *savelocalvars;
int need_longjmp = 0;
- redir->nhere.expdoc = nullstr;
+ redir->nhere.expdoc = "";
savelocalvars = localvars;
localvars = NULL;
forcelocal++;
diff --git a/bin/sh/expand.c b/bin/sh/expand.c
index b542303..e1c1a2e 100644
--- a/bin/sh/expand.c
+++ b/bin/sh/expand.c
@@ -105,11 +105,12 @@ static void expbackq(union node *, int, int);
static int subevalvar(char *, char *, int, int, int, int, int);
static char *evalvar(char *, int);
static int varisset(const char *, int);
+static void strtodest(const char *, int, int, int);
static void varvalue(const char *, int, int, int);
static void recordregion(int, int, int);
static void removerecordregions(int);
static void ifsbreakup(char *, struct arglist *);
-static void expandmeta(struct strlist *, int);
+static void expandmeta(struct strlist *);
static void expmeta(char *, char *);
static void addfname(char *);
static struct strlist *expsort(struct strlist *);
@@ -175,7 +176,7 @@ expandarg(union node *arg, struct arglist *arglist, int flag)
ifsbreakup(p, &exparg);
*exparg.lastp = NULL;
exparg.lastp = &exparg.list;
- expandmeta(exparg.list, flag);
+ expandmeta(exparg.list);
} else {
sp = (struct strlist *)stalloc(sizeof (struct strlist));
sp->text = p;
@@ -298,9 +299,9 @@ exptilde(char *p, int flag)
char c, *startp = p;
struct passwd *pw;
char *home;
- int quotes = flag & (EXP_FULL | EXP_CASE);
- while ((c = *p) != '\0') {
+ for (;;) {
+ c = *p;
switch(c) {
case CTLESC: /* This means CTL* are always considered quoted. */
case CTLVAR:
@@ -311,31 +312,27 @@ exptilde(char *p, int flag)
case CTLQUOTEMARK:
return (startp);
case ':':
- if (flag & EXP_VARTILDE)
- goto done;
- break;
+ if ((flag & EXP_VARTILDE) == 0)
+ break;
+ /* FALLTHROUGH */
+ case '\0':
case '/':
case CTLENDVAR:
- goto done;
+ *p = '\0';
+ if (*(startp+1) == '\0') {
+ home = lookupvar("HOME");
+ } else {
+ pw = getpwnam(startp+1);
+ home = pw != NULL ? pw->pw_dir : NULL;
+ }
+ *p = c;
+ if (home == NULL || *home == '\0')
+ return (startp);
+ strtodest(home, flag, VSNORMAL, 1);
+ return (p);
}
p++;
}
-done:
- *p = '\0';
- if (*(startp+1) == '\0') {
- home = lookupvar("HOME");
- } else {
- pw = getpwnam(startp+1);
- home = pw != NULL ? pw->pw_dir : NULL;
- }
- *p = c;
- if (home == NULL || *home == '\0')
- return (startp);
- if (quotes)
- STPUTS_QUOTES(home, DQSYNTAX, expdest);
- else
- STPUTS(home, expdest);
- return (p);
}
@@ -496,6 +493,17 @@ expbackq(union node *cmd, int quoted, int flag)
+static void
+recordleft(const char *str, const char *loc, char *startp)
+{
+ int amount;
+
+ amount = ((str - 1) - (loc - startp)) - expdest;
+ STADJUST(amount, expdest);
+ while (loc != str - 1)
+ *startp++ = *loc++;
+}
+
static int
subevalvar(char *p, char *str, int strloc, int subtype, int startloc,
int varflags, int quotes)
@@ -530,8 +538,7 @@ subevalvar(char *p, char *str, int strloc, int subtype, int startloc,
error((char *)NULL);
}
error("%.*s: parameter %snot set", (int)(p - str - 1),
- str, (varflags & VSNUL) ? "null or "
- : nullstr);
+ str, (varflags & VSNUL) ? "null or " : "");
return 0;
case VSTRIMLEFT:
@@ -540,7 +547,8 @@ subevalvar(char *p, char *str, int strloc, int subtype, int startloc,
*loc = '\0';
if (patmatch(str, startp, quotes)) {
*loc = c;
- goto recordleft;
+ recordleft(str, loc, startp);
+ return 1;
}
*loc = c;
if (quotes && *loc == CTLESC)
@@ -554,7 +562,8 @@ subevalvar(char *p, char *str, int strloc, int subtype, int startloc,
*loc = '\0';
if (patmatch(str, startp, quotes)) {
*loc = c;
- goto recordleft;
+ recordleft(str, loc, startp);
+ return 1;
}
*loc = c;
loc--;
@@ -602,13 +611,6 @@ subevalvar(char *p, char *str, int strloc, int subtype, int startloc,
default:
abort();
}
-
-recordleft:
- amount = ((str - 1) - (loc - startp)) - expdest;
- STADJUST(amount, expdest);
- while (loc != str - 1)
- *startp++ = *loc++;
- return 1;
}
@@ -633,6 +635,7 @@ evalvar(char *p, int flag)
int varlenb;
int easy;
int quotes = flag & (EXP_FULL | EXP_CASE);
+ int record;
varflags = (unsigned char)*p++;
subtype = varflags & VSTYPE;
@@ -690,22 +693,15 @@ again: /* jump here after setting a variable with ${var=text} */
STADJUST(-varlenb, expdest);
}
} else {
- char const *syntax = (varflags & VSQUOTE) ? DQSYNTAX
- : BASESYNTAX;
-
if (subtype == VSLENGTH) {
for (;*val; val++)
if (!localeisutf8 ||
(*val & 0xC0) != 0x80)
varlen++;
}
- else {
- if (quotes)
- STPUTS_QUOTES(val, syntax, expdest);
- else
- STPUTS(val, expdest);
-
- }
+ else
+ strtodest(val, flag, subtype,
+ varflags & VSQUOTE);
}
}
@@ -719,15 +715,11 @@ again: /* jump here after setting a variable with ${var=text} */
switch (subtype) {
case VSLENGTH:
expdest = cvtnum(varlen, expdest);
- goto record;
+ record = 1;
+ break;
case VSNORMAL:
- if (!easy)
- break;
-record:
- recordregion(startloc, expdest - stackblock(),
- varflags & VSQUOTE || (ifsset() && ifsval()[0] == '\0' &&
- (*var == '@' || *var == '*')));
+ record = easy;
break;
case VSPLUS:
@@ -737,8 +729,7 @@ record:
(varflags & VSQUOTE ? EXP_LIT_QUOTED : 0));
break;
}
- if (easy)
- goto record;
+ record = easy;
break;
case VSTRIMLEFT:
@@ -760,7 +751,8 @@ record:
}
/* Remove any recorded regions beyond start of variable */
removerecordregions(startloc);
- goto record;
+ record = 1;
+ break;
case VSASSIGN:
case VSQUESTION:
@@ -777,8 +769,7 @@ record:
}
break;
}
- if (easy)
- goto record;
+ record = easy;
break;
case VSERROR:
@@ -790,6 +781,11 @@ record:
abort();
}
+ if (record)
+ recordregion(startloc, expdest - stackblock(),
+ varflags & VSQUOTE || (ifsset() && ifsval()[0] == '\0' &&
+ (*var == '@' || *var == '*')));
+
if (subtype != VSNORMAL) { /* skip to end of alternative */
int nesting = 1;
for (;;) {
@@ -1093,7 +1089,7 @@ static char expdir[PATH_MAX];
* The results are stored in the list exparg.
*/
static void
-expandmeta(struct strlist *str, int flag __unused)
+expandmeta(struct strlist *str)
{
char *p;
struct strlist **savelastp;
@@ -1284,11 +1280,8 @@ addfname(char *name)
{
char *p;
struct strlist *sp;
- size_t len;
- len = strlen(name);
- p = stalloc(len + 1);
- memcpy(p, name, len + 1);
+ p = stsavestr(name);
sp = (struct strlist *)stalloc(sizeof *sp);
sp->text = p;
*exparg.lastp = sp;
@@ -1478,16 +1471,14 @@ patmatch(const char *pattern, const char *string, int squoted)
endp = p;
if (*endp == '!' || *endp == '^')
endp++;
- for (;;) {
+ do {
while (*endp == CTLQUOTEMARK)
endp++;
if (*endp == 0)
goto dft; /* no matching ] */
if (*endp == CTLESC)
endp++;
- if (*++endp == ']')
- break;
- }
+ } while (*++endp != ']');
invert = 0;
if (*p == '!' || *p == '^') {
invert++;
diff --git a/bin/sh/mail.c b/bin/sh/mail.c
index 597e733..720cab0 100644
--- a/bin/sh/mail.c
+++ b/bin/sh/mail.c
@@ -85,7 +85,7 @@ chkmail(int silent)
setstackmark(&smark);
mpath = mpathset()? mpathval() : mailval();
for (i = 0 ; i < nmboxes ; i++) {
- p = padvance(&mpath, nullstr);
+ p = padvance(&mpath, "");
if (p == NULL)
break;
if (*p == '\0')
diff --git a/bin/sh/memalloc.c b/bin/sh/memalloc.c
index 119f12e..a04020f 100644
--- a/bin/sh/memalloc.c
+++ b/bin/sh/memalloc.c
@@ -180,6 +180,18 @@ stunalloc(pointer p)
}
+char *
+stsavestr(const char *s)
+{
+ char *p;
+ size_t len;
+
+ len = strlen(s);
+ p = stalloc(len + 1);
+ memcpy(p, s, len + 1);
+ return p;
+}
+
void
setstackmark(struct stackmark *mark)
diff --git a/bin/sh/memalloc.h b/bin/sh/memalloc.h
index a22fa39..e8df7cb 100644
--- a/bin/sh/memalloc.h
+++ b/bin/sh/memalloc.h
@@ -52,6 +52,7 @@ void ckfree(pointer);
char *savestr(const char *);
pointer stalloc(int);
void stunalloc(pointer);
+char *stsavestr(const char *);
void setstackmark(struct stackmark *);
void popstackmark(struct stackmark *);
char *growstackstr(void);
diff --git a/bin/sh/miscbltin.c b/bin/sh/miscbltin.c
index 027d8ae..715e324 100644
--- a/bin/sh/miscbltin.c
+++ b/bin/sh/miscbltin.c
@@ -265,7 +265,7 @@ readcmd(int argc __unused, char **argv __unused)
/* Set any remaining args to "" */
while (*++ap != NULL)
- setvar(*ap, nullstr, 0);
+ setvar(*ap, "", 0);
return status;
}
diff --git a/bin/sh/nodetypes b/bin/sh/nodetypes
index 603c777..d480093 100644
--- a/bin/sh/nodetypes
+++ b/bin/sh/nodetypes
@@ -138,7 +138,7 @@ NXHERE nhere # fd<<!
fd int # file descriptor being redirected
next nodeptr # next redirection in list
doc nodeptr # input to command (NARG node)
- expdoc temp char *expdoc # actual document (for NXHERE)
+ expdoc temp const char *expdoc # actual document (for NXHERE)
NNOT nnot # ! command (actually pipeline)
type int
diff --git a/bin/sh/parser.c b/bin/sh/parser.c
index 0048314..2bba84e 100644
--- a/bin/sh/parser.c
+++ b/bin/sh/parser.c
@@ -1940,7 +1940,7 @@ getprompt(void *unused __unused)
*/
switch (whichprompt) {
case 0:
- fmt = nullstr;
+ fmt = "";
break;
case 1:
fmt = ps1val();
diff --git a/bin/sh/tests/expansion/Makefile b/bin/sh/tests/expansion/Makefile
index 027bc95..0c3e89e 100644
--- a/bin/sh/tests/expansion/Makefile
+++ b/bin/sh/tests/expansion/Makefile
@@ -48,6 +48,7 @@ FILES+= ifs1.0
FILES+= ifs2.0
FILES+= ifs3.0
FILES+= ifs4.0
+FILES+= ifs5.0
FILES+= length1.0
FILES+= length2.0
FILES+= length3.0
@@ -62,6 +63,7 @@ FILES+= pathname1.0
FILES+= pathname2.0
FILES+= pathname3.0
FILES+= pathname4.0
+FILES+= pathname5.0
FILES+= plus-minus1.0
FILES+= plus-minus2.0
FILES+= plus-minus3.0
diff --git a/bin/sh/tests/expansion/ifs5.0 b/bin/sh/tests/expansion/ifs5.0
new file mode 100644
index 0000000..ab0e646
--- /dev/null
+++ b/bin/sh/tests/expansion/ifs5.0
@@ -0,0 +1,4 @@
+# $FreeBSD$
+
+set -- $(echo a b c d)
+[ "$#" = 4 ]
diff --git a/bin/sh/tests/expansion/pathname5.0 b/bin/sh/tests/expansion/pathname5.0
new file mode 100644
index 0000000..bc27812
--- /dev/null
+++ b/bin/sh/tests/expansion/pathname5.0
@@ -0,0 +1,3 @@
+# $FreeBSD$
+
+[ `echo '/[e]tc'` = /etc ]
diff --git a/bin/sh/var.c b/bin/sh/var.c
index ebeff16..def1e0c 100644
--- a/bin/sh/var.c
+++ b/bin/sh/var.c
@@ -872,7 +872,7 @@ unsetvar(const char *s)
if (vp->flags & VREADONLY)
return (1);
if (vp->text[vp->name_len + 1] != '\0')
- setvar(s, nullstr, 0);
+ setvar(s, "", 0);
if ((vp->flags & VEXPORT) && localevar(vp->text)) {
change_env(s, 0);
setlocale(LC_ALL, "");
diff --git a/contrib/elftoolchain/addr2line/addr2line.c b/contrib/elftoolchain/addr2line/addr2line.c
index e1db599..6dcf19c 100644
--- a/contrib/elftoolchain/addr2line/addr2line.c
+++ b/contrib/elftoolchain/addr2line/addr2line.c
@@ -40,7 +40,7 @@
#include "_elftc.h"
-ELFTC_VCSID("$Id: addr2line.c 2185 2011-11-19 16:07:16Z jkoshy $");
+ELFTC_VCSID("$Id: addr2line.c 3148 2015-02-15 18:47:39Z emaste $");
static struct option longopts[] = {
{"target" , required_argument, NULL, 'b'},
diff --git a/contrib/elftoolchain/common/_elftc.h b/contrib/elftoolchain/common/_elftc.h
index 0b8c77c..d6c8784 100644
--- a/contrib/elftoolchain/common/_elftc.h
+++ b/contrib/elftoolchain/common/_elftc.h
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: _elftc.h 2922 2013-03-17 22:53:15Z kaiwang27 $
+ * $Id: _elftc.h 3139 2015-01-05 03:17:06Z kaiwang27 $
*/
/**
@@ -76,10 +76,17 @@
* SUCH DAMAGE.
*/
+#ifndef LIST_FOREACH_SAFE
+#define LIST_FOREACH_SAFE(var, head, field, tvar) \
+ for ((var) = LIST_FIRST((head)); \
+ (var) && ((tvar) = LIST_NEXT((var), field), 1); \
+ (var) = (tvar))
+#endif
+
#ifndef SLIST_FOREACH_SAFE
-#define SLIST_FOREACH_SAFE(var, head, field, tvar) \
- for ((var) = SLIST_FIRST((head)); \
- (var) && ((tvar) = SLIST_NEXT((var), field), 1); \
+#define SLIST_FOREACH_SAFE(var, head, field, tvar) \
+ for ((var) = SLIST_FIRST((head)); \
+ (var) && ((tvar) = SLIST_NEXT((var), field), 1); \
(var) = (tvar))
#endif
diff --git a/contrib/elftoolchain/common/elfdefinitions.h b/contrib/elftoolchain/common/elfdefinitions.h
index 8b28aeb..f0a2fc2 100644
--- a/contrib/elftoolchain/common/elfdefinitions.h
+++ b/contrib/elftoolchain/common/elfdefinitions.h
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: elfdefinitions.h 3110 2014-12-20 08:32:46Z kaiwang27 $
+ * $Id: elfdefinitions.h 3149 2015-02-15 19:00:06Z emaste $
*/
/*
@@ -1396,6 +1396,12 @@ _ELF_DEFINE_RELOC(R_386_8, 22) \
_ELF_DEFINE_RELOC(R_386_PC8, 23)
/*
+ */
+#define _ELF_DEFINE_AARCH64_RELOCATIONS() \
+_ELF_DEFINE_RELOC(R_AARCH64_ABS64, 257) \
+_ELF_DEFINE_RELOC(R_AARCH64_ABS32, 258) \
+
+/*
* These are the symbols used in the Sun ``Linkers and Loaders
* Guide'', Document No: 817-1984-17. See the X86_64 relocations list
* below for the spellings used in the ELF specification.
@@ -1962,6 +1968,7 @@ _ELF_DEFINE_RELOC(R_X86_64_IRELATIVE, 37)
#define _ELF_DEFINE_RELOCATIONS() \
_ELF_DEFINE_386_RELOCATIONS() \
+_ELF_DEFINE_AARCH64_RELOCATIONS() \
_ELF_DEFINE_AMD64_RELOCATIONS() \
_ELF_DEFINE_ARM_RELOCATIONS() \
_ELF_DEFINE_IA64_RELOCATIONS() \
diff --git a/contrib/elftoolchain/elfcopy/main.c b/contrib/elftoolchain/elfcopy/main.c
index 4a693ca..e5bea86 100644
--- a/contrib/elftoolchain/elfcopy/main.c
+++ b/contrib/elftoolchain/elfcopy/main.c
@@ -40,7 +40,7 @@
#include "elfcopy.h"
-ELFTC_VCSID("$Id: main.c 3111 2014-12-20 08:33:01Z kaiwang27 $");
+ELFTC_VCSID("$Id: main.c 3156 2015-02-15 21:40:01Z emaste $");
enum options
{
diff --git a/contrib/elftoolchain/elfcopy/sections.c b/contrib/elftoolchain/elfcopy/sections.c
index 4d23bc7..ee6d172 100644
--- a/contrib/elftoolchain/elfcopy/sections.c
+++ b/contrib/elftoolchain/elfcopy/sections.c
@@ -35,7 +35,7 @@
#include "elfcopy.h"
-ELFTC_VCSID("$Id: sections.c 3134 2014-12-23 10:43:59Z kaiwang27 $");
+ELFTC_VCSID("$Id: sections.c 3150 2015-02-15 19:07:46Z emaste $");
static void add_gnu_debuglink(struct elfcopy *ecp);
static uint32_t calc_crc32(const char *p, size_t len, uint32_t crc);
diff --git a/contrib/elftoolchain/libdwarf/_libdwarf.h b/contrib/elftoolchain/libdwarf/_libdwarf.h
index a7669e2..06413be 100644
--- a/contrib/elftoolchain/libdwarf/_libdwarf.h
+++ b/contrib/elftoolchain/libdwarf/_libdwarf.h
@@ -24,7 +24,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: _libdwarf.h 3106 2014-12-19 16:00:58Z kaiwang27 $
+ * $Id: _libdwarf.h 3161 2015-02-15 21:43:36Z emaste $
*/
#ifndef __LIBDWARF_H_
diff --git a/contrib/elftoolchain/libdwarf/dwarf_attrval.c b/contrib/elftoolchain/libdwarf/dwarf_attrval.c
index 179deed..0dd38a4 100644
--- a/contrib/elftoolchain/libdwarf/dwarf_attrval.c
+++ b/contrib/elftoolchain/libdwarf/dwarf_attrval.c
@@ -26,7 +26,7 @@
#include "_libdwarf.h"
-ELFTC_VCSID("$Id: dwarf_attrval.c 2977 2014-01-21 20:13:31Z kaiwang27 $");
+ELFTC_VCSID("$Id: dwarf_attrval.c 3159 2015-02-15 21:43:27Z emaste $");
int
dwarf_attrval_flag(Dwarf_Die die, Dwarf_Half attr, Dwarf_Bool *valp, Dwarf_Error *err)
diff --git a/contrib/elftoolchain/libdwarf/dwarf_get_AT_name.3 b/contrib/elftoolchain/libdwarf/dwarf_get_AT_name.3
index e88e3cf..473adc3 100644
--- a/contrib/elftoolchain/libdwarf/dwarf_get_AT_name.3
+++ b/contrib/elftoolchain/libdwarf/dwarf_get_AT_name.3
@@ -22,7 +22,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.\" $Id: dwarf_get_AT_name.3 2071 2011-10-27 03:20:00Z jkoshy $
+.\" $Id: dwarf_get_AT_name.3 3142 2015-01-29 23:11:14Z jkoshy $
.\"
.Dd April 22, 2011
.Os
diff --git a/contrib/elftoolchain/libdwarf/dwarf_get_arange_info.3 b/contrib/elftoolchain/libdwarf/dwarf_get_arange_info.3
index 2e67871..3878edd 100644
--- a/contrib/elftoolchain/libdwarf/dwarf_get_arange_info.3
+++ b/contrib/elftoolchain/libdwarf/dwarf_get_arange_info.3
@@ -22,7 +22,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.\" $Id: dwarf_get_arange_info.3 2134 2011-11-10 08:40:14Z jkoshy $
+.\" $Id: dwarf_get_arange_info.3 3142 2015-01-29 23:11:14Z jkoshy $
.\"
.Dd April 16, 2011
.Os
diff --git a/contrib/elftoolchain/libdwarf/dwarf_get_section_max_offsets.3 b/contrib/elftoolchain/libdwarf/dwarf_get_section_max_offsets.3
index 6f79341..963d4ac 100644
--- a/contrib/elftoolchain/libdwarf/dwarf_get_section_max_offsets.3
+++ b/contrib/elftoolchain/libdwarf/dwarf_get_section_max_offsets.3
@@ -22,9 +22,9 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.\" $Id: dwarf_get_section_max_offsets.3 3098 2014-09-02 22:18:29Z kaiwang27 $
+.\" $Id: dwarf_get_section_max_offsets.3 3141 2015-01-29 23:11:10Z jkoshy $
.\"
-.Dd July 27, 2014
+.Dd December 21, 2014
.Os
.Dt DWARF_GET_SECTION_MAX_OFFSETS
.Sh NAME
@@ -101,7 +101,7 @@ is identical to function
.Fn dwarf_get_section_max_offsets_b
except that it does not provide argument
.Ar debug_types ,
-thus it can not retrieve the size of the
+and thus cannot return the size of the
.Dq \&.debug_types
section.
.Sh RETURN VALUES
diff --git a/contrib/elftoolchain/libdwarf/dwarf_hasattr.3 b/contrib/elftoolchain/libdwarf/dwarf_hasattr.3
index d3bcb27..5875848 100644
--- a/contrib/elftoolchain/libdwarf/dwarf_hasattr.3
+++ b/contrib/elftoolchain/libdwarf/dwarf_hasattr.3
@@ -22,7 +22,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.\" $Id: dwarf_hasattr.3 2073 2011-10-27 03:30:47Z jkoshy $
+.\" $Id: dwarf_hasattr.3 3142 2015-01-29 23:11:14Z jkoshy $
.\"
.Dd April 17, 2010
.Os
diff --git a/contrib/elftoolchain/libdwarf/dwarf_reloc.c b/contrib/elftoolchain/libdwarf/dwarf_reloc.c
index c912f27..0430e4d 100644
--- a/contrib/elftoolchain/libdwarf/dwarf_reloc.c
+++ b/contrib/elftoolchain/libdwarf/dwarf_reloc.c
@@ -26,7 +26,7 @@
#include "_libdwarf.h"
-ELFTC_VCSID("$Id: dwarf_reloc.c 2075 2011-10-27 03:47:28Z jkoshy $");
+ELFTC_VCSID("$Id: dwarf_reloc.c 3161 2015-02-15 21:43:36Z emaste $");
int
dwarf_set_reloc_application(int apply)
diff --git a/contrib/elftoolchain/libdwarf/dwarf_set_reloc_application.3 b/contrib/elftoolchain/libdwarf/dwarf_set_reloc_application.3
index e62b262..db40cbb 100644
--- a/contrib/elftoolchain/libdwarf/dwarf_set_reloc_application.3
+++ b/contrib/elftoolchain/libdwarf/dwarf_set_reloc_application.3
@@ -22,7 +22,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.\" $Id: dwarf_set_reloc_application.3 2075 2011-10-27 03:47:28Z jkoshy $
+.\" $Id: dwarf_set_reloc_application.3 3161 2015-02-15 21:43:36Z emaste $
.\"
.Dd February 11, 2015
.Os
diff --git a/contrib/elftoolchain/libdwarf/dwarf_whatattr.3 b/contrib/elftoolchain/libdwarf/dwarf_whatattr.3
index 96d9ad2..a975d3e 100644
--- a/contrib/elftoolchain/libdwarf/dwarf_whatattr.3
+++ b/contrib/elftoolchain/libdwarf/dwarf_whatattr.3
@@ -22,7 +22,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.\" $Id: dwarf_whatattr.3 2075 2011-10-27 03:47:28Z jkoshy $
+.\" $Id: dwarf_whatattr.3 3142 2015-01-29 23:11:14Z jkoshy $
.\"
.Dd May 22, 2010
.Os
diff --git a/contrib/elftoolchain/libdwarf/libdwarf.c b/contrib/elftoolchain/libdwarf/libdwarf.c
index 961fe2c..b2406cb 100644
--- a/contrib/elftoolchain/libdwarf/libdwarf.c
+++ b/contrib/elftoolchain/libdwarf/libdwarf.c
@@ -26,7 +26,7 @@
#include "_libdwarf.h"
-ELFTC_VCSID("$Id: libdwarf.c 2070 2011-10-27 03:05:32Z jkoshy $");
+ELFTC_VCSID("$Id: libdwarf.c 3161 2015-02-15 21:43:36Z emaste $");
struct _libdwarf_globals _libdwarf = {
.errhand = NULL,
diff --git a/contrib/elftoolchain/libdwarf/libdwarf.h b/contrib/elftoolchain/libdwarf/libdwarf.h
index 20360a7..fdbcb4e 100644
--- a/contrib/elftoolchain/libdwarf/libdwarf.h
+++ b/contrib/elftoolchain/libdwarf/libdwarf.h
@@ -24,7 +24,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: libdwarf.h 3064 2014-06-06 19:35:55Z kaiwang27 $
+ * $Id: libdwarf.h 3149 2015-02-15 19:00:06Z emaste $
*/
#ifndef _LIBDWARF_H_
diff --git a/contrib/elftoolchain/libdwarf/libdwarf_elf_init.c b/contrib/elftoolchain/libdwarf/libdwarf_elf_init.c
index 731a20d..af2d370 100644
--- a/contrib/elftoolchain/libdwarf/libdwarf_elf_init.c
+++ b/contrib/elftoolchain/libdwarf/libdwarf_elf_init.c
@@ -26,7 +26,7 @@
#include "_libdwarf.h"
-ELFTC_VCSID("$Id: libdwarf_elf_init.c 2972 2013-12-23 06:46:04Z kaiwang27 $");
+ELFTC_VCSID("$Id: libdwarf_elf_init.c 3161 2015-02-15 21:43:36Z emaste $");
static const char *debug_name[] = {
".debug_abbrev",
diff --git a/contrib/elftoolchain/libdwarf/libdwarf_reloc.c b/contrib/elftoolchain/libdwarf/libdwarf_reloc.c
index e3bba67..96bb785 100644
--- a/contrib/elftoolchain/libdwarf/libdwarf_reloc.c
+++ b/contrib/elftoolchain/libdwarf/libdwarf_reloc.c
@@ -26,7 +26,7 @@
#include "_libdwarf.h"
-ELFTC_VCSID("$Id: libdwarf_reloc.c 2948 2013-05-30 21:25:52Z kaiwang27 $");
+ELFTC_VCSID("$Id: libdwarf_reloc.c 3149 2015-02-15 19:00:06Z emaste $");
Dwarf_Unsigned
_dwarf_get_reloc_type(Dwarf_P_Debug dbg, int is64)
diff --git a/contrib/elftoolchain/libelf/_libelf_config.h b/contrib/elftoolchain/libelf/_libelf_config.h
index 2ad0630..45d8714 100644
--- a/contrib/elftoolchain/libelf/_libelf_config.h
+++ b/contrib/elftoolchain/libelf/_libelf_config.h
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: _libelf_config.h 2287 2011-12-04 06:45:47Z jkoshy $
+ * $Id: _libelf_config.h 3143 2015-02-15 17:57:38Z emaste $
*/
#ifdef __DragonFly__
diff --git a/contrib/elftoolchain/libelf/elf.3 b/contrib/elftoolchain/libelf/elf.3
index e057a92..618b4f7 100644
--- a/contrib/elftoolchain/libelf/elf.3
+++ b/contrib/elftoolchain/libelf/elf.3
@@ -21,7 +21,7 @@
.\" out of the use of this software, even if advised of the possibility of
.\" such damage.
.\"
-.\" $Id: elf.3 3082 2014-07-28 09:13:33Z jkoshy $
+.\" $Id: elf.3 3142 2015-01-29 23:11:14Z jkoshy $
.\"
.Dd July 28, 2014
.Os
diff --git a/contrib/elftoolchain/libelf/elf_scn.c b/contrib/elftoolchain/libelf/elf_scn.c
index 9a9c816..0d1ac5c 100644
--- a/contrib/elftoolchain/libelf/elf_scn.c
+++ b/contrib/elftoolchain/libelf/elf_scn.c
@@ -37,7 +37,7 @@
#include "_libelf.h"
-ELFTC_VCSID("$Id: elf_scn.c 3013 2014-03-23 06:16:59Z jkoshy $");
+ELFTC_VCSID("$Id: elf_scn.c 3147 2015-02-15 18:45:23Z emaste $");
/*
* Load an ELF section table and create a list of Elf_Scn structures.
diff --git a/contrib/elftoolchain/libelf/libelf_ar_util.c b/contrib/elftoolchain/libelf/libelf_ar_util.c
index 62630ac..7b824fb 100644
--- a/contrib/elftoolchain/libelf/libelf_ar_util.c
+++ b/contrib/elftoolchain/libelf/libelf_ar_util.c
@@ -34,7 +34,7 @@
#include "_libelf.h"
#include "_libelf_ar.h"
-ELFTC_VCSID("$Id: libelf_ar_util.c 3013 2014-03-23 06:16:59Z jkoshy $");
+ELFTC_VCSID("$Id: libelf_ar_util.c 3157 2015-02-15 21:42:02Z emaste $");
/*
* Convert a string bounded by `start' and `start+sz' (exclusive) to a
@@ -278,7 +278,6 @@ _libelf_ar_open(Elf *e, int reporterror)
* Handle special archive members for the SVR4 format.
*/
if (arh.ar_name[0] == '/') {
-
if (sz == 0)
goto error;
diff --git a/contrib/elftoolchain/libelf/libelf_convert.m4 b/contrib/elftoolchain/libelf/libelf_convert.m4
index a11ace4..f400367 100644
--- a/contrib/elftoolchain/libelf/libelf_convert.m4
+++ b/contrib/elftoolchain/libelf/libelf_convert.m4
@@ -32,7 +32,7 @@
#include "_libelf.h"
-ELFTC_VCSID("$Id: libelf_convert.m4 3009 2014-03-23 01:49:59Z jkoshy $");
+ELFTC_VCSID("$Id: libelf_convert.m4 3158 2015-02-15 21:42:07Z emaste $");
/* WARNING: GENERATED FROM __file__. */
diff --git a/contrib/elftoolchain/nm/nm.1 b/contrib/elftoolchain/nm/nm.1
index d9a0325..35439e4 100644
--- a/contrib/elftoolchain/nm/nm.1
+++ b/contrib/elftoolchain/nm/nm.1
@@ -22,9 +22,9 @@
.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.\"
-.\" $Id: nm.1 2377 2012-01-03 07:10:59Z jkoshy $
+.\" $Id: nm.1 3145 2015-02-15 18:04:37Z emaste $
.\"
-.Dd January 8, 2015
+.Dd February 15, 2015
.Os
.Dt NM 1
.Sh NAME
diff --git a/contrib/elftoolchain/nm/nm.c b/contrib/elftoolchain/nm/nm.c
index 8d9cbbd..f984966 100644
--- a/contrib/elftoolchain/nm/nm.c
+++ b/contrib/elftoolchain/nm/nm.c
@@ -48,7 +48,7 @@
#include "_elftc.h"
-ELFTC_VCSID("$Id: nm.c 3124 2014-12-21 05:46:28Z kaiwang27 $");
+ELFTC_VCSID("$Id: nm.c 3145 2015-02-15 18:04:37Z emaste $");
/* symbol information list */
STAILQ_HEAD(sym_head, sym_entry);
diff --git a/contrib/elftoolchain/readelf/readelf.c b/contrib/elftoolchain/readelf/readelf.c
index a8c15c4..93e53a1 100644
--- a/contrib/elftoolchain/readelf/readelf.c
+++ b/contrib/elftoolchain/readelf/readelf.c
@@ -47,7 +47,7 @@
#include "_elftc.h"
-ELFTC_VCSID("$Id: readelf.c 3110 2014-12-20 08:32:46Z kaiwang27 $");
+ELFTC_VCSID("$Id: readelf.c 3155 2015-02-15 19:15:57Z emaste $");
/*
* readelf(1) options.
@@ -1503,7 +1503,8 @@ r_type(unsigned int mach, unsigned int type)
static const char *
note_type(const char *name, unsigned int et, unsigned int nt)
{
- if (strcmp(name, "CORE") == 0 && et == ET_CORE)
+ if ((strcmp(name, "CORE") == 0 || strcmp(name, "LINUX") == 0) &&
+ et == ET_CORE)
return note_type_linux_core(nt);
else if (strcmp(name, "FreeBSD") == 0)
if (et == ET_CORE)
@@ -1559,13 +1560,27 @@ note_type_linux_core(unsigned int nt)
case 1: return "NT_PRSTATUS (Process status)";
case 2: return "NT_FPREGSET (Floating point information)";
case 3: return "NT_PRPSINFO (Process information)";
+ case 4: return "NT_TASKSTRUCT (Task structure)";
case 6: return "NT_AUXV (Auxiliary vector)";
- case 0x46E62B7FUL: return "NT_PRXFPREG (Linux user_xfpregs structure)";
case 10: return "NT_PSTATUS (Linux process status)";
case 12: return "NT_FPREGS (Linux floating point regset)";
case 13: return "NT_PSINFO (Linux process information)";
case 16: return "NT_LWPSTATUS (Linux lwpstatus_t type)";
case 17: return "NT_LWPSINFO (Linux lwpinfo_t type)";
+ case 18: return "NT_WIN32PSTATUS (win32_pstatus structure)";
+ case 0x100: return "NT_PPC_VMX (ppc Altivec registers)";
+ case 0x102: return "NT_PPC_VSX (ppc VSX registers)";
+ case 0x202: return "NT_X86_XSTATE (x86 XSAVE extended state)";
+ case 0x300: return "NT_S390_HIGH_GPRS (s390 upper register halves)";
+ case 0x301: return "NT_S390_TIMER (s390 timer register)";
+ case 0x302: return "NT_S390_TODCMP (s390 TOD comparator register)";
+ case 0x303: return "NT_S390_TODPREG (s390 TOD programmable register)";
+ case 0x304: return "NT_S390_CTRS (s390 control registers)";
+ case 0x305: return "NT_S390_PREFIX (s390 prefix register)";
+ case 0x400: return "NT_ARM_VFP (arm VFP registers)";
+ case 0x46494c45UL: return "NT_FILE (mapped files)";
+ case 0x46E62B7FUL: return "NT_PRXFPREG (Linux user_xfpregs structure)";
+ case 0x53494749UL: return "NT_SIGINFO (siginfo_t data)";
default: return (note_type_unknown(nt));
}
}
@@ -1605,7 +1620,8 @@ note_type_unknown(unsigned int nt)
{
static char s_nt[32];
- snprintf(s_nt, sizeof(s_nt), "<unknown: %u>", nt);
+ snprintf(s_nt, sizeof(s_nt),
+ nt >= 0x100 ? "<unknown: 0x%x>" : "<unknown: %u>", nt);
return (s_nt);
}
@@ -3154,6 +3170,10 @@ dump_rel(struct readelf *re, struct section *s, Elf_Data *d)
warnx("gelf_getrel failed: %s", elf_errmsg(-1));
continue;
}
+ if (s->link >= re->shnum) {
+ warnx("invalid section link index %u", s->link);
+ continue;
+ }
symname = get_symbol_name(re, s->link, GELF_R_SYM(r.r_info));
symval = get_symbol_value(re, s->link, GELF_R_SYM(r.r_info));
if (re->ec == ELFCLASS32) {
@@ -3206,6 +3226,10 @@ dump_rela(struct readelf *re, struct section *s, Elf_Data *d)
warnx("gelf_getrel failed: %s", elf_errmsg(-1));
continue;
}
+ if (s->link >= re->shnum) {
+ warnx("invalid section link index %u", s->link);
+ continue;
+ }
symname = get_symbol_name(re, s->link, GELF_R_SYM(r.r_info));
symval = get_symbol_value(re, s->link, GELF_R_SYM(r.r_info));
if (re->ec == ELFCLASS32) {
@@ -4219,14 +4243,22 @@ dump_attributes(struct readelf *re)
len = d->d_size - 1;
p++;
while (len > 0) {
+ if (len < 4) {
+ warnx("truncated attribute section length");
+ break;
+ }
seclen = re->dw_decode(&p, 4);
if (seclen > len) {
warnx("invalid attribute section length");
break;
}
len -= seclen;
- printf("Attribute Section: %s\n", (char *) p);
nlen = strlen((char *) p) + 1;
+ if (nlen + 4 > seclen) {
+ warnx("invalid attribute section name");
+ break;
+ }
+ printf("Attribute Section: %s\n", (char *) p);
p += nlen;
seclen -= nlen + 4;
while (seclen > 0) {
@@ -6696,10 +6728,8 @@ load_sections(struct readelf *re)
return;
}
- if ((scn = elf_getscn(re->elf, 0)) == NULL) {
- warnx("elf_getscn failed: %s", elf_errmsg(-1));
+ if ((scn = elf_getscn(re->elf, 0)) == NULL)
return;
- }
(void) elf_errno();
do {
diff --git a/contrib/llvm/tools/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/contrib/llvm/tools/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
index e7bf20e..772adb6 100644
--- a/contrib/llvm/tools/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/contrib/llvm/tools/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -1395,8 +1395,11 @@ ObjectFileELF::GetSectionHeaderInfo(SectionHeaderColl &section_headers,
}
// If there are no section headers we are done.
- if (header.e_shnum == 0)
+ if (header.e_shnum == 0) {
+ if (arch_spec.GetTriple().getOS() == llvm::Triple::OSType::UnknownOS)
+ arch_spec.GetTriple().setOSName(HostInfo::GetOSString().data());
return 0;
+ }
Log *log(lldb_private::GetLogIfAllCategoriesSet (LIBLLDB_LOG_MODULES));
diff --git a/lib/libc/gen/dlopen.3 b/lib/libc/gen/dlopen.3
index 089e631..1963528 100644
--- a/lib/libc/gen/dlopen.3
+++ b/lib/libc/gen/dlopen.3
@@ -32,7 +32,7 @@
.\" @(#) dlopen.3 1.6 90/01/31 SMI
.\" $FreeBSD$
.\"
-.Dd December 21, 2011
+.Dd February 14, 2015
.Dt DLOPEN 3
.Os
.Sh NAME
@@ -236,7 +236,7 @@ as follows, in the given order:
The referencing object itself (or the object from which the call to
.Fn dlsym
is made), if that object was linked using the
-.Fl Wsymbolic
+.Fl Bsymbolic
option to
.Xr ld 1 .
.It
diff --git a/lib/libc/gen/fstab.c b/lib/libc/gen/fstab.c
index 6a77abd..c21ceb3 100644
--- a/lib/libc/gen/fstab.c
+++ b/lib/libc/gen/fstab.c
@@ -181,7 +181,7 @@ fstabscan(void)
if (cp != NULL)
_fs_fstab.fs_passno = atoi(cp);
}
- strcpy(subline, _fs_fstab.fs_mntops);
+ (void)strlcpy(subline, _fs_fstab.fs_mntops, sizeof(subline));
p = subline;
for (typexx = 0, cp = strsep(&p, ","); cp;
cp = strsep(&p, ",")) {
diff --git a/lib/libc/gen/getgrent.c b/lib/libc/gen/getgrent.c
index caa5ad5..4ba24ae 100644
--- a/lib/libc/gen/getgrent.c
+++ b/lib/libc/gen/getgrent.c
@@ -1450,7 +1450,7 @@ docompat:
pos = ftello(st->fp);
}
fin:
- if (!stayopen && st->fp != NULL) {
+ if (st->fp != NULL && !stayopen) {
fclose(st->fp);
st->fp = NULL;
}
diff --git a/lib/libc/gen/getpwent.c b/lib/libc/gen/getpwent.c
index 7cf7f47..6cd7eaf 100644
--- a/lib/libc/gen/getpwent.c
+++ b/lib/libc/gen/getpwent.c
@@ -815,7 +815,7 @@ files_passwd(void *retval, void *mdata, va_list ap)
size_t bufsize, namesize;
uid_t uid;
uint32_t store;
- int rv, stayopen, *errnop;
+ int rv, stayopen = 0, *errnop;
name = NULL;
uid = (uid_t)-1;
diff --git a/lib/libc/gen/ulimit.c b/lib/libc/gen/ulimit.c
index e1bc020..2c090c0 100644
--- a/lib/libc/gen/ulimit.c
+++ b/lib/libc/gen/ulimit.c
@@ -40,7 +40,7 @@ ulimit(int cmd, ...)
{
struct rlimit limit;
va_list ap;
- long arg;
+ rlim_t arg;
if (cmd == UL_GETFSIZE) {
if (getrlimit(RLIMIT_FSIZE, &limit) == -1)
@@ -53,14 +53,16 @@ ulimit(int cmd, ...)
va_start(ap, cmd);
arg = va_arg(ap, long);
va_end(ap);
- limit.rlim_max = limit.rlim_cur = (rlim_t)arg * 512;
+ if (arg < 0)
+ arg = LONG_MAX;
+ if (arg > RLIM_INFINITY / 512)
+ arg = RLIM_INFINITY / 512;
+ limit.rlim_max = limit.rlim_cur = arg * 512;
/* The setrlimit() function sets errno to EPERM if needed. */
if (setrlimit(RLIMIT_FSIZE, &limit) == -1)
return (-1);
- if (arg * 512 > LONG_MAX)
- return (LONG_MAX);
- return (arg);
+ return ((long)arg);
} else {
errno = EINVAL;
return (-1);
diff --git a/lib/libc/include/libc_private.h b/lib/libc/include/libc_private.h
index 71fc8df..e4bf4a6 100644
--- a/lib/libc/include/libc_private.h
+++ b/lib/libc/include/libc_private.h
@@ -271,7 +271,7 @@ void _malloc_first_thread(void);
/*
* Function to clean up streams, called from abort() and exit().
*/
-void (*__cleanup)(void) __hidden;
+extern void (*__cleanup)(void) __hidden;
/*
* Get kern.osreldate to detect ABI revisions. Explicitly
diff --git a/lib/libc/sparc64/sys/Makefile.inc b/lib/libc/sparc64/sys/Makefile.inc
index 031af19..726c0c9 100644
--- a/lib/libc/sparc64/sys/Makefile.inc
+++ b/lib/libc/sparc64/sys/Makefile.inc
@@ -12,7 +12,7 @@ SRCS+= __sparc_sigtramp_setup.c \
CFLAGS+= -I${LIBC_SRCTOP}/sparc64/fpu
-MDASM+= brk.S cerror.S exect.S pipe.S ptrace.S sbrk.S setlogin.S sigaction.S
+MDASM+= brk.S cerror.S exect.S pipe.S ptrace.S sbrk.S setlogin.S sigaction1.S
# Don't generate default code for these syscalls:
NOASM= break.o exit.o getlogin.o openbsd_poll.o sstk.o yield.o
diff --git a/lib/libc/sparc64/sys/sigaction.S b/lib/libc/sparc64/sys/sigaction1.S
index 7d32f97..219a238 100644
--- a/lib/libc/sparc64/sys/sigaction.S
+++ b/lib/libc/sparc64/sys/sigaction1.S
@@ -29,7 +29,8 @@ __FBSDID("$FreeBSD$");
#include "SYS.h"
-_SYSENTRY(sigaction)
+ WEAK_REFERENCE(__sys_sigaction, _sigaction)
+ENTRY(__sys_sigaction)
PIC_PROLOGUE(%o3, %o4)
SET(sigcode_installed, %o4, %o3)
lduw [%o3], %o4
@@ -44,6 +45,6 @@ _SYSENTRY(sigaction)
1: _SYSCALL(sigaction)
retl
nop
-_SYSEND(sigaction)
+END(__sys_sigaction)
.comm sigcode_installed, 4, 4
diff --git a/lib/libc/sys/Makefile.inc b/lib/libc/sys/Makefile.inc
index e8ec58e..0edf644 100644
--- a/lib/libc/sys/Makefile.inc
+++ b/lib/libc/sys/Makefile.inc
@@ -65,7 +65,6 @@ INTERPOSED = \
sendmsg \
sendto \
setcontext \
- sigaction \
sigprocmask \
sigsuspend \
sigtimedwait \
@@ -76,6 +75,13 @@ INTERPOSED = \
write \
writev
+.if ${MACHINE_CPUARCH} == "sparc64"
+SRCS+= sigaction.c
+NOASM+= sigaction.o
+.else
+INTERPOSED+= sigaction
+.endif
+
SRCS+= ${INTERPOSED:S/$/.c/}
NOASM+= ${INTERPOSED:S/$/.o/}
PSEUDO+= ${INTERPOSED:C/^.*$/_&.o/}
diff --git a/lib/libcompat/4.3/rexec.c b/lib/libcompat/4.3/rexec.c
index 4e01eb6..92357aa 100644
--- a/lib/libcompat/4.3/rexec.c
+++ b/lib/libcompat/4.3/rexec.c
@@ -332,10 +332,10 @@ retry:
perror(hp->h_name);
return (-1);
}
- if (fd2p == 0) {
- (void) write(s, "", 1);
- port = 0;
- } else {
+ port = 0;
+ if (fd2p == 0)
+ (void) write(s, "", 1);
+ else {
char num[8];
int s2, sin2len;
diff --git a/lib/libelftc/elftc_version.c b/lib/libelftc/elftc_version.c
index a6bf571..5df0587 100644
--- a/lib/libelftc/elftc_version.c
+++ b/lib/libelftc/elftc_version.c
@@ -6,5 +6,5 @@
const char *
elftc_version(void)
{
- return "elftoolchain r3136M";
+ return "elftoolchain r3163M";
}
diff --git a/lib/liblzma/config.h b/lib/liblzma/config.h
index 2a0087b..29b7fdb 100644
--- a/lib/liblzma/config.h
+++ b/lib/liblzma/config.h
@@ -26,6 +26,7 @@
#define HAVE_ENCODER_SPARC 1
#define HAVE_ENCODER_X86 1
#define HAVE_FCNTL_H 1
+#define HAVE_FUTIMENS 1
#define HAVE_FUTIMES 1
#define HAVE_GETOPT_H 1
#define HAVE_GETOPT_LONG 1
diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile
index e6f21ba..8d83f62 100644
--- a/share/man/man4/Makefile
+++ b/share/man/man4/Makefile
@@ -843,6 +843,7 @@ MAN+= \
udbp.4 \
udp.4 \
udplite.4 \
+ udl.4 \
uep.4 \
ufm.4 \
ufoma.4 \
diff --git a/share/man/man4/udl.4 b/share/man/man4/udl.4
new file mode 100644
index 0000000..2699cc7
--- /dev/null
+++ b/share/man/man4/udl.4
@@ -0,0 +1,67 @@
+.\" $OpenBSD: udl.4,v 1.20 2012/09/18 17:11:41 jasper Exp $
+.\" $FreeBSD$
+.\"
+.\" Copyright (c) 2009 Marcus Glocker <mglocker@openbsd.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd February 15, 2015
+.Dt UDL 4
+.Os
+.Sh NAME
+.Nm udl
+.Nd DisplayLink DL-120 / DL-160 USB display devices
+.Sh SYNOPSIS
+To compile this driver into the kernel,
+place the following line in your
+kernel configuration file:
+.Bd -ragged -offset indent
+.Cd "device udl"
+.Ed
+.Pp
+Alternatively, to load the driver as a
+module at boot time, place the following line in
+.Xr loader.conf 5 :
+.Bd -literal -offset indent
+udl_load="YES"
+.Ed
+.Sh DESCRIPTION
+The
+.Nm
+driver supports USB display devices based on the DisplayLink DL-120 / DL-160
+graphic chip.
+.Sh HARDWARE
+The following devices should work:
+.Pp
+.Bl -tag -width Ds -offset indent -compact
+.It Century Corp. Japan Plus One LCD-8000U
+.It Century Corp. Japan Plus One LCD-4300U
+.It DisplayLink USB to DVI
+.It ForwardVideo EasyCAP008 USB to DVI
+.It HP USB 2.0 Docking Station (FQ834)
+.It HP USB Graphics Adapter (NL571)
+.It IOGEAR USB 2.0 External DVI (GUC2020)
+.It Koenig CMP-USBVGA10 and CMP-USBVGA11
+.It Lenovo 45K5296 USB to DVI
+.It Lenovo ThinkVision LT1421
+.It Lilliput UM-70
+.It Nanovision MiMo UM-710 and UM-740
+.It Rextron VCUD60 USB to DVI
+.It Samsung LD220
+.It StarTech CONV-USB2DVI
+.It Sunweit USB to DVI
+.It Unitek Y-2240 USB to DVI
+.It VideoHome NBdock1920
+.El
+.Sh SEE ALSO
+.Xr usb 4
diff --git a/share/man/man4/wlan.4 b/share/man/man4/wlan.4
index 16d1f2e..1581778 100644
--- a/share/man/man4/wlan.4
+++ b/share/man/man4/wlan.4
@@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd February 22, 2012
+.Dd February 16, 2015
.Dt WLAN 4
.Os
.Sh NAME
@@ -182,12 +182,14 @@ may not interoperate.
.Xr mwl 4 ,
.Xr netintro 4 ,
.Xr ral 4 ,
+.Xr rsu 4 ,
.Xr rum 4 ,
.Xr run 4 ,
.Xr uath 4 ,
.Xr upgt 4 ,
.Xr ural 4 ,
.Xr urtw 4 ,
+.Xr urtwn 4 ,
.Xr wi 4 ,
.Xr wlan_acl 4 ,
.Xr wlan_ccmp 4 ,
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 13c3d43..b767691 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -1507,6 +1507,7 @@ cpususpend_handler(void)
vmm_resume_p();
/* Resume MCA and local APIC */
+ lapic_xapic_mode();
mca_resume();
lapic_setup(0);
diff --git a/sys/arm/arm/cpuinfo.c b/sys/arm/arm/cpuinfo.c
index d20c561..b0b8a88 100644
--- a/sys/arm/arm/cpuinfo.c
+++ b/sys/arm/arm/cpuinfo.c
@@ -58,9 +58,13 @@ cpuinfo_init(void)
/* ARMv4T CPU */
cpuinfo.architecture = 1;
cpuinfo.revision = (cpuinfo.midr >> 16) & 0x7F;
- }
+ } else {
+ /* ARM new id scheme */
+ cpuinfo.architecture = (cpuinfo.midr >> 16) & 0x0F;
+ cpuinfo.revision = (cpuinfo.midr >> 20) & 0x0F;
+ }
} else {
- /* must be new id scheme */
+ /* non ARM -> must be new id scheme */
cpuinfo.architecture = (cpuinfo.midr >> 16) & 0x0F;
cpuinfo.revision = (cpuinfo.midr >> 20) & 0x0F;
}
diff --git a/sys/arm/arm/db_trace.c b/sys/arm/arm/db_trace.c
index cbeee1f..1719ec5 100644
--- a/sys/arm/arm/db_trace.c
+++ b/sys/arm/arm/db_trace.c
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/kdb.h>
#include <sys/stack.h>
+
#include <machine/armreg.h>
#include <machine/asm.h>
#include <machine/cpufunc.h>
@@ -45,322 +46,15 @@ __FBSDID("$FreeBSD$");
#include <machine/pcb.h>
#include <machine/stack.h>
#include <machine/vmparam.h>
+
#include <ddb/ddb.h>
#include <ddb/db_access.h>
#include <ddb/db_sym.h>
#include <ddb/db_output.h>
-/*
- * Definitions for the instruction interpreter.
- *
- * The ARM EABI specifies how to perform the frame unwinding in the
- * Exception Handling ABI for the ARM Architecture document. To perform
- * the unwind we need to know the initial frame pointer, stack pointer,
- * link register and program counter. We then find the entry within the
- * index table that points to the function the program counter is within.
- * This gives us either a list of three instructions to process, a 31-bit
- * relative offset to a table of instructions, or a value telling us
- * we can't unwind any further.
- *
- * When we have the instructions to process we need to decode them
- * following table 4 in section 9.3. This describes a collection of bit
- * patterns to encode that steps to take to update the stack pointer and
- * link register to the correct values at the start of the function.
- */
-
-/* A special case when we are unable to unwind past this function */
-#define EXIDX_CANTUNWIND 1
-
-/* The register names */
-#define FP 11
-#define SP 13
-#define LR 14
-#define PC 15
-
-/*
- * These are set in the linker script. Their addresses will be
- * either the start or end of the exception table or index.
- */
-extern int extab_start, extab_end, exidx_start, exidx_end;
-
-/*
- * Entry types.
- * These are the only entry types that have been seen in the kernel.
- */
-#define ENTRY_MASK 0xff000000
-#define ENTRY_ARM_SU16 0x80000000
-#define ENTRY_ARM_LU16 0x81000000
-
-/* Instruction masks. */
-#define INSN_VSP_MASK 0xc0
-#define INSN_VSP_SIZE_MASK 0x3f
-#define INSN_STD_MASK 0xf0
-#define INSN_STD_DATA_MASK 0x0f
-#define INSN_POP_TYPE_MASK 0x08
-#define INSN_POP_COUNT_MASK 0x07
-#define INSN_VSP_LARGE_INC_MASK 0xff
-
-/* Instruction definitions */
-#define INSN_VSP_INC 0x00
-#define INSN_VSP_DEC 0x40
-#define INSN_POP_MASKED 0x80
-#define INSN_VSP_REG 0x90
-#define INSN_POP_COUNT 0xa0
-#define INSN_FINISH 0xb0
-#define INSN_POP_REGS 0xb1
-#define INSN_VSP_LARGE_INC 0xb2
-
-/* An item in the exception index table */
-struct unwind_idx {
- uint32_t offset;
- uint32_t insn;
-};
-
-/* The state of the unwind process */
-struct unwind_state {
- uint32_t registers[16];
- uint32_t start_pc;
- uint32_t *insn;
- u_int entries;
- u_int byte;
- uint16_t update_mask;
-};
-
-/* Expand a 31-bit signed value to a 32-bit signed value */
-static __inline int32_t
-db_expand_prel31(uint32_t prel31)
-{
-
- return ((int32_t)(prel31 & 0x7fffffffu) << 1) / 2;
-}
-
-/*
- * Perform a binary search of the index table to find the function
- * with the largest address that doesn't exceed addr.
- */
-static struct unwind_idx *
-db_find_index(uint32_t addr)
-{
- unsigned int min, mid, max;
- struct unwind_idx *start;
- struct unwind_idx *item;
- int32_t prel31_addr;
- uint32_t func_addr;
-
- start = (struct unwind_idx *)&exidx_start;
-
- min = 0;
- max = (&exidx_end - &exidx_start) / 2;
-
- while (min != max) {
- mid = min + (max - min + 1) / 2;
-
- item = &start[mid];
-
- prel31_addr = db_expand_prel31(item->offset);
- func_addr = (uint32_t)&item->offset + prel31_addr;
-
- if (func_addr <= addr) {
- min = mid;
- } else {
- max = mid - 1;
- }
- }
-
- return &start[min];
-}
-
-/* Reads the next byte from the instruction list */
-static uint8_t
-db_unwind_exec_read_byte(struct unwind_state *state)
-{
- uint8_t insn;
-
- /* Read the unwind instruction */
- insn = (*state->insn) >> (state->byte * 8);
-
- /* Update the location of the next instruction */
- if (state->byte == 0) {
- state->byte = 3;
- state->insn++;
- state->entries--;
- } else
- state->byte--;
-
- return insn;
-}
-
-/* Executes the next instruction on the list */
-static int
-db_unwind_exec_insn(struct unwind_state *state)
-{
- unsigned int insn;
- uint32_t *vsp = (uint32_t *)state->registers[SP];
- int update_vsp = 0;
-
- /* This should never happen */
- if (state->entries == 0)
- return 1;
-
- /* Read the next instruction */
- insn = db_unwind_exec_read_byte(state);
-
- if ((insn & INSN_VSP_MASK) == INSN_VSP_INC) {
- state->registers[SP] += ((insn & INSN_VSP_SIZE_MASK) << 2) + 4;
-
- } else if ((insn & INSN_VSP_MASK) == INSN_VSP_DEC) {
- state->registers[SP] -= ((insn & INSN_VSP_SIZE_MASK) << 2) + 4;
-
- } else if ((insn & INSN_STD_MASK) == INSN_POP_MASKED) {
- unsigned int mask, reg;
-
- /* Load the mask */
- mask = db_unwind_exec_read_byte(state);
- mask |= (insn & INSN_STD_DATA_MASK) << 8;
-
- /* We have a refuse to unwind instruction */
- if (mask == 0)
- return 1;
-
- /* Update SP */
- update_vsp = 1;
-
- /* Load the registers */
- for (reg = 4; mask && reg < 16; mask >>= 1, reg++) {
- if (mask & 1) {
- state->registers[reg] = *vsp++;
- state->update_mask |= 1 << reg;
-
- /* If we have updated SP kep its value */
- if (reg == SP)
- update_vsp = 0;
- }
- }
-
- } else if ((insn & INSN_STD_MASK) == INSN_VSP_REG &&
- ((insn & INSN_STD_DATA_MASK) != 13) &&
- ((insn & INSN_STD_DATA_MASK) != 15)) {
- /* sp = register */
- state->registers[SP] =
- state->registers[insn & INSN_STD_DATA_MASK];
-
- } else if ((insn & INSN_STD_MASK) == INSN_POP_COUNT) {
- unsigned int count, reg;
-
- /* Read how many registers to load */
- count = insn & INSN_POP_COUNT_MASK;
-
- /* Update sp */
- update_vsp = 1;
-
- /* Pop the registers */
- for (reg = 4; reg <= 4 + count; reg++) {
- state->registers[reg] = *vsp++;
- state->update_mask |= 1 << reg;
- }
-
- /* Check if we are in the pop r14 version */
- if ((insn & INSN_POP_TYPE_MASK) != 0) {
- state->registers[14] = *vsp++;
- }
-
- } else if (insn == INSN_FINISH) {
- /* Stop processing */
- state->entries = 0;
-
- } else if (insn == INSN_POP_REGS) {
- unsigned int mask, reg;
-
- mask = db_unwind_exec_read_byte(state);
- if (mask == 0 || (mask & 0xf0) != 0)
- return 1;
-
- /* Update SP */
- update_vsp = 1;
-
- /* Load the registers */
- for (reg = 0; mask && reg < 4; mask >>= 1, reg++) {
- if (mask & 1) {
- state->registers[reg] = *vsp++;
- state->update_mask |= 1 << reg;
- }
- }
-
- } else if ((insn & INSN_VSP_LARGE_INC_MASK) == INSN_VSP_LARGE_INC) {
- unsigned int uleb128;
-
- /* Read the increment value */
- uleb128 = db_unwind_exec_read_byte(state);
-
- state->registers[SP] += 0x204 + (uleb128 << 2);
-
- } else {
- /* We hit a new instruction that needs to be implemented */
- db_printf("Unhandled instruction %.2x\n", insn);
- return 1;
- }
-
- if (update_vsp) {
- state->registers[SP] = (uint32_t)vsp;
- }
-
-#if 0
- db_printf("fp = %08x, sp = %08x, lr = %08x, pc = %08x\n",
- state->registers[FP], state->registers[SP], state->registers[LR],
- state->registers[PC]);
-#endif
-
- return 0;
-}
-
-/* Performs the unwind of a function */
-static int
-db_unwind_tab(struct unwind_state *state)
-{
- uint32_t entry;
-
- /* Set PC to a known value */
- state->registers[PC] = 0;
-
- /* Read the personality */
- entry = *state->insn & ENTRY_MASK;
-
- if (entry == ENTRY_ARM_SU16) {
- state->byte = 2;
- state->entries = 1;
- } else if (entry == ENTRY_ARM_LU16) {
- state->byte = 1;
- state->entries = ((*state->insn >> 16) & 0xFF) + 1;
- } else {
- db_printf("Unknown entry: %x\n", entry);
- return 1;
- }
-
- while (state->entries > 0) {
- if (db_unwind_exec_insn(state) != 0)
- return 1;
- }
-
- /*
- * The program counter was not updated, load it from the link register.
- */
- if (state->registers[PC] == 0) {
- state->registers[PC] = state->registers[LR];
-
- /*
- * If the program counter changed, flag it in the update mask.
- */
- if (state->start_pc != state->registers[PC])
- state->update_mask |= 1 << PC;
- }
-
- return 0;
-}
-
static void
db_stack_trace_cmd(struct unwind_state *state)
{
- struct unwind_idx *index;
const char *name;
db_expr_t value;
db_expr_t offset;
@@ -372,28 +66,7 @@ db_stack_trace_cmd(struct unwind_state *state)
finished = false;
while (!finished) {
- /* Reset the mask of updated registers */
- state->update_mask = 0;
-
- /* The pc value is correct and will be overwritten, save it */
- state->start_pc = state->registers[PC];
-
- /* Find the item to run */
- index = db_find_index(state->start_pc);
-
- if (index->insn != EXIDX_CANTUNWIND) {
- if (index->insn & (1U << 31)) {
- /* The data is within the instruction */
- state->insn = &index->insn;
- } else {
- /* A prel31 offset to the unwind table */
- state->insn = (uint32_t *)
- ((uintptr_t)&index->insn +
- db_expand_prel31(index->insn));
- }
- /* Run the unwind function */
- finished = db_unwind_tab(state);
- }
+ finished = unwind_stack_one(state);
/* Print the frame details */
sym = db_search_symbol(state->start_pc, DB_STGY_ANY, &offset);
@@ -432,6 +105,9 @@ db_stack_trace_cmd(struct unwind_state *state)
}
db_printf("\n");
+ if (finished)
+ break;
+
/*
* Stop if directed to do so, or if we've unwound back to the
* kernel entry point, or if the unwind function didn't change
@@ -441,9 +117,7 @@ db_stack_trace_cmd(struct unwind_state *state)
* the last frame printed before you see the unwind failure
* message (maybe it needs a STOP_UNWINDING).
*/
- if (index->insn == EXIDX_CANTUNWIND) {
- finished = true;
- } else if (state->registers[PC] < VM_MIN_KERNEL_ADDRESS) {
+ if (state->registers[PC] < VM_MIN_KERNEL_ADDRESS) {
db_printf("Unable to unwind into user mode\n");
finished = true;
} else if (state->update_mask == 0) {
diff --git a/sys/arm/arm/unwind.c b/sys/arm/arm/unwind.c
new file mode 100644
index 0000000..29f8200
--- /dev/null
+++ b/sys/arm/arm/unwind.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright 2013-2014 Andrew Turner.
+ * Copyright 2013-2014 Ian Lepore.
+ * Copyright 2013-2014 Rui Paulo.
+ * Copyright 2013 Eitan Adler.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <machine/stack.h>
+
+/*
+ * Definitions for the instruction interpreter.
+ *
+ * The ARM EABI specifies how to perform the frame unwinding in the
+ * Exception Handling ABI for the ARM Architecture document. To perform
+ * the unwind we need to know the initial frame pointer, stack pointer,
+ * link register and program counter. We then find the entry within the
+ * index table that points to the function the program counter is within.
+ * This gives us either a list of three instructions to process, a 31-bit
+ * relative offset to a table of instructions, or a value telling us
+ * we can't unwind any further.
+ *
+ * When we have the instructions to process we need to decode them
+ * following table 4 in section 9.3. This describes a collection of bit
+ * patterns to encode that steps to take to update the stack pointer and
+ * link register to the correct values at the start of the function.
+ */
+
+/* A special case when we are unable to unwind past this function */
+#define EXIDX_CANTUNWIND 1
+
+/*
+ * These are set in the linker script. Their addresses will be
+ * either the start or end of the exception table or index.
+ */
+extern int extab_start, extab_end, exidx_start, exidx_end;
+
+/*
+ * Entry types.
+ * These are the only entry types that have been seen in the kernel.
+ */
+#define ENTRY_MASK 0xff000000
+#define ENTRY_ARM_SU16 0x80000000
+#define ENTRY_ARM_LU16 0x81000000
+
+/* Instruction masks. */
+#define INSN_VSP_MASK 0xc0
+#define INSN_VSP_SIZE_MASK 0x3f
+#define INSN_STD_MASK 0xf0
+#define INSN_STD_DATA_MASK 0x0f
+#define INSN_POP_TYPE_MASK 0x08
+#define INSN_POP_COUNT_MASK 0x07
+#define INSN_VSP_LARGE_INC_MASK 0xff
+
+/* Instruction definitions */
+#define INSN_VSP_INC 0x00
+#define INSN_VSP_DEC 0x40
+#define INSN_POP_MASKED 0x80
+#define INSN_VSP_REG 0x90
+#define INSN_POP_COUNT 0xa0
+#define INSN_FINISH 0xb0
+#define INSN_POP_REGS 0xb1
+#define INSN_VSP_LARGE_INC 0xb2
+
+/* An item in the exception index table */
+struct unwind_idx {
+ uint32_t offset;
+ uint32_t insn;
+};
+
+/* Expand a 31-bit signed value to a 32-bit signed value */
+static __inline int32_t
+expand_prel31(uint32_t prel31)
+{
+
+ return ((int32_t)(prel31 & 0x7fffffffu) << 1) / 2;
+}
+
+/*
+ * Perform a binary search of the index table to find the function
+ * with the largest address that doesn't exceed addr.
+ */
+static struct unwind_idx *
+find_index(uint32_t addr)
+{
+ unsigned int min, mid, max;
+ struct unwind_idx *start;
+ struct unwind_idx *item;
+ int32_t prel31_addr;
+ uint32_t func_addr;
+
+ start = (struct unwind_idx *)&exidx_start;
+
+ min = 0;
+ max = (&exidx_end - &exidx_start) / 2;
+
+ while (min != max) {
+ mid = min + (max - min + 1) / 2;
+
+ item = &start[mid];
+
+ prel31_addr = expand_prel31(item->offset);
+ func_addr = (uint32_t)&item->offset + prel31_addr;
+
+ if (func_addr <= addr) {
+ min = mid;
+ } else {
+ max = mid - 1;
+ }
+ }
+
+ return &start[min];
+}
+
+/* Reads the next byte from the instruction list */
+static uint8_t
+unwind_exec_read_byte(struct unwind_state *state)
+{
+ uint8_t insn;
+
+ /* Read the unwind instruction */
+ insn = (*state->insn) >> (state->byte * 8);
+
+ /* Update the location of the next instruction */
+ if (state->byte == 0) {
+ state->byte = 3;
+ state->insn++;
+ state->entries--;
+ } else
+ state->byte--;
+
+ return insn;
+}
+
+/* Executes the next instruction on the list */
+static int
+unwind_exec_insn(struct unwind_state *state)
+{
+ unsigned int insn;
+ uint32_t *vsp = (uint32_t *)state->registers[SP];
+ int update_vsp = 0;
+
+ /* This should never happen */
+ if (state->entries == 0)
+ return 1;
+
+ /* Read the next instruction */
+ insn = unwind_exec_read_byte(state);
+
+ if ((insn & INSN_VSP_MASK) == INSN_VSP_INC) {
+ state->registers[SP] += ((insn & INSN_VSP_SIZE_MASK) << 2) + 4;
+
+ } else if ((insn & INSN_VSP_MASK) == INSN_VSP_DEC) {
+ state->registers[SP] -= ((insn & INSN_VSP_SIZE_MASK) << 2) + 4;
+
+ } else if ((insn & INSN_STD_MASK) == INSN_POP_MASKED) {
+ unsigned int mask, reg;
+
+ /* Load the mask */
+ mask = unwind_exec_read_byte(state);
+ mask |= (insn & INSN_STD_DATA_MASK) << 8;
+
+ /* We have a refuse to unwind instruction */
+ if (mask == 0)
+ return 1;
+
+ /* Update SP */
+ update_vsp = 1;
+
+ /* Load the registers */
+ for (reg = 4; mask && reg < 16; mask >>= 1, reg++) {
+ if (mask & 1) {
+ state->registers[reg] = *vsp++;
+ state->update_mask |= 1 << reg;
+
+ /* If we have updated SP kep its value */
+ if (reg == SP)
+ update_vsp = 0;
+ }
+ }
+
+ } else if ((insn & INSN_STD_MASK) == INSN_VSP_REG &&
+ ((insn & INSN_STD_DATA_MASK) != 13) &&
+ ((insn & INSN_STD_DATA_MASK) != 15)) {
+ /* sp = register */
+ state->registers[SP] =
+ state->registers[insn & INSN_STD_DATA_MASK];
+
+ } else if ((insn & INSN_STD_MASK) == INSN_POP_COUNT) {
+ unsigned int count, reg;
+
+ /* Read how many registers to load */
+ count = insn & INSN_POP_COUNT_MASK;
+
+ /* Update sp */
+ update_vsp = 1;
+
+ /* Pop the registers */
+ for (reg = 4; reg <= 4 + count; reg++) {
+ state->registers[reg] = *vsp++;
+ state->update_mask |= 1 << reg;
+ }
+
+ /* Check if we are in the pop r14 version */
+ if ((insn & INSN_POP_TYPE_MASK) != 0) {
+ state->registers[14] = *vsp++;
+ }
+
+ } else if (insn == INSN_FINISH) {
+ /* Stop processing */
+ state->entries = 0;
+
+ } else if (insn == INSN_POP_REGS) {
+ unsigned int mask, reg;
+
+ mask = unwind_exec_read_byte(state);
+ if (mask == 0 || (mask & 0xf0) != 0)
+ return 1;
+
+ /* Update SP */
+ update_vsp = 1;
+
+ /* Load the registers */
+ for (reg = 0; mask && reg < 4; mask >>= 1, reg++) {
+ if (mask & 1) {
+ state->registers[reg] = *vsp++;
+ state->update_mask |= 1 << reg;
+ }
+ }
+
+ } else if ((insn & INSN_VSP_LARGE_INC_MASK) == INSN_VSP_LARGE_INC) {
+ unsigned int uleb128;
+
+ /* Read the increment value */
+ uleb128 = unwind_exec_read_byte(state);
+
+ state->registers[SP] += 0x204 + (uleb128 << 2);
+
+ } else {
+ /* We hit a new instruction that needs to be implemented */
+#if 0
+ db_printf("Unhandled instruction %.2x\n", insn);
+#endif
+ return 1;
+ }
+
+ if (update_vsp) {
+ state->registers[SP] = (uint32_t)vsp;
+ }
+
+#if 0
+ db_printf("fp = %08x, sp = %08x, lr = %08x, pc = %08x\n",
+ state->registers[FP], state->registers[SP], state->registers[LR],
+ state->registers[PC]);
+#endif
+
+ return 0;
+}
+
+/* Performs the unwind of a function */
+static int
+unwind_tab(struct unwind_state *state)
+{
+ uint32_t entry;
+
+ /* Set PC to a known value */
+ state->registers[PC] = 0;
+
+ /* Read the personality */
+ entry = *state->insn & ENTRY_MASK;
+
+ if (entry == ENTRY_ARM_SU16) {
+ state->byte = 2;
+ state->entries = 1;
+ } else if (entry == ENTRY_ARM_LU16) {
+ state->byte = 1;
+ state->entries = ((*state->insn >> 16) & 0xFF) + 1;
+ } else {
+#if 0
+ db_printf("Unknown entry: %x\n", entry);
+#endif
+ return 1;
+ }
+
+ while (state->entries > 0) {
+ if (unwind_exec_insn(state) != 0)
+ return 1;
+ }
+
+ /*
+ * The program counter was not updated, load it from the link register.
+ */
+ if (state->registers[PC] == 0) {
+ state->registers[PC] = state->registers[LR];
+
+ /*
+ * If the program counter changed, flag it in the update mask.
+ */
+ if (state->start_pc != state->registers[PC])
+ state->update_mask |= 1 << PC;
+ }
+
+ return 0;
+}
+
+int
+unwind_stack_one(struct unwind_state *state)
+{
+ struct unwind_idx *index;
+ int finished;
+
+ /* Reset the mask of updated registers */
+ state->update_mask = 0;
+
+ /* The pc value is correct and will be overwritten, save it */
+ state->start_pc = state->registers[PC];
+
+ /* Find the item to run */
+ index = find_index(state->start_pc);
+
+ finished = 0;
+ if (index->insn != EXIDX_CANTUNWIND) {
+ if (index->insn & (1U << 31)) {
+ /* The data is within the instruction */
+ state->insn = &index->insn;
+ } else {
+ /* A prel31 offset to the unwind table */
+ state->insn = (uint32_t *)
+ ((uintptr_t)&index->insn +
+ expand_prel31(index->insn));
+ }
+ /* Run the unwind function */
+ finished = unwind_tab(state);
+ }
+
+ /* This is the top of the stack, finish */
+ if (index->insn == EXIDX_CANTUNWIND)
+ finished = 1;
+
+ return (finished);
+}
diff --git a/sys/arm/broadcom/bcm2835/bcm2835_bsc.c b/sys/arm/broadcom/bcm2835/bcm2835_bsc.c
index 3e1afcd..debbf82 100644
--- a/sys/arm/broadcom/bcm2835/bcm2835_bsc.c
+++ b/sys/arm/broadcom/bcm2835/bcm2835_bsc.c
@@ -247,7 +247,7 @@ bcm_bsc_attach(device_t dev)
/* Check the unit we are attaching by its base address. */
start = rman_get_start(sc->sc_mem_res);
for (i = 0; i < nitems(bcm_bsc_pins); i++) {
- if (bcm_bsc_pins[i].start == start)
+ if (bcm_bsc_pins[i].start == (start & BCM_BSC_BASE_MASK))
break;
}
if (i == nitems(bcm_bsc_pins)) {
diff --git a/sys/arm/broadcom/bcm2835/bcm2835_bscvar.h b/sys/arm/broadcom/bcm2835/bcm2835_bscvar.h
index 5068356..6b31dc3 100644
--- a/sys/arm/broadcom/bcm2835/bcm2835_bscvar.h
+++ b/sys/arm/broadcom/bcm2835/bcm2835_bscvar.h
@@ -35,9 +35,10 @@ struct {
uint32_t scl;
unsigned long start;
} bcm_bsc_pins[] = {
- { 0, 1, 0x20205000 }, /* BSC0 GPIO pins and base address. */
- { 2, 3, 0x20804000 } /* BSC1 GPIO pins and base address. */
+ { 0, 1, 0x205000 }, /* BSC0 GPIO pins and base address. */
+ { 2, 3, 0x804000 } /* BSC1 GPIO pins and base address. */
};
+#define BCM_BSC_BASE_MASK 0x00ffffff
struct bcm_bsc_softc {
device_t sc_dev;
diff --git a/sys/arm/broadcom/bcm2835/bcm2835_mbox_prop.h b/sys/arm/broadcom/bcm2835/bcm2835_mbox_prop.h
index a2e212e..954ded3 100644
--- a/sys/arm/broadcom/bcm2835/bcm2835_mbox_prop.h
+++ b/sys/arm/broadcom/bcm2835/bcm2835_mbox_prop.h
@@ -52,6 +52,53 @@ struct bcm2835_mbox_tag_hdr {
uint32_t val_len;
};
+#define BCM2835_MBOX_POWER_ID_EMMC 0x00000000
+#define BCM2835_MBOX_POWER_ID_UART0 0x00000001
+#define BCM2835_MBOX_POWER_ID_UART1 0x00000002
+#define BCM2835_MBOX_POWER_ID_USB_HCD 0x00000003
+#define BCM2835_MBOX_POWER_ID_I2C0 0x00000004
+#define BCM2835_MBOX_POWER_ID_I2C1 0x00000005
+#define BCM2835_MBOX_POWER_ID_I2C2 0x00000006
+#define BCM2835_MBOX_POWER_ID_SPI 0x00000007
+#define BCM2835_MBOX_POWER_ID_CCP2TX 0x00000008
+
+#define BCM2835_MBOX_POWER_ON (1 << 0)
+#define BCM2835_MBOX_POWER_WAIT (1 << 1)
+
+#define BCM2835_MBOX_TAG_GET_POWER_STATE 0x00020001
+#define BCM2835_MBOX_TAG_SET_POWER_STATE 0x00028001
+
+struct msg_get_power_state {
+ struct bcm2835_mbox_hdr hdr;
+ struct bcm2835_mbox_tag_hdr tag_hdr;
+ union {
+ struct {
+ uint32_t device_id;
+ } req;
+ struct {
+ uint32_t device_id;
+ uint32_t state;
+ } resp;
+ } body;
+ uint32_t end_tag;
+};
+
+struct msg_set_power_state {
+ struct bcm2835_mbox_hdr hdr;
+ struct bcm2835_mbox_tag_hdr tag_hdr;
+ union {
+ struct {
+ uint32_t device_id;
+ uint32_t state;
+ } req;
+ struct {
+ uint32_t device_id;
+ uint32_t state;
+ } resp;
+ } body;
+ uint32_t end_tag;
+};
+
#define BCM2835_MBOX_CLOCK_ID_EMMC 0x00000001
#define BCM2835_MBOX_CLOCK_ID_UART 0x00000002
#define BCM2835_MBOX_CLOCK_ID_ARM 0x00000003
diff --git a/sys/arm/include/stack.h b/sys/arm/include/stack.h
index 0a5ebfe..c76ad66 100644
--- a/sys/arm/include/stack.h
+++ b/sys/arm/include/stack.h
@@ -39,4 +39,22 @@
#define FR_RSP (-2)
#define FR_RFP (-3)
+/* The state of the unwind process */
+struct unwind_state {
+ uint32_t registers[16];
+ uint32_t start_pc;
+ uint32_t *insn;
+ u_int entries;
+ u_int byte;
+ uint16_t update_mask;
+};
+
+/* The register names */
+#define FP 11
+#define SP 13
+#define LR 14
+#define PC 15
+
+int unwind_stack_one(struct unwind_state *);
+
#endif /* !_MACHINE_STACK_H_ */
diff --git a/sys/boot/fdt/dts/arm/bcm2835.dtsi b/sys/boot/fdt/dts/arm/bcm2835.dtsi
index bb30248..6ff1944 100644
--- a/sys/boot/fdt/dts/arm/bcm2835.dtsi
+++ b/sys/boot/fdt/dts/arm/bcm2835.dtsi
@@ -397,6 +397,8 @@
};
bsc0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
compatible = "broadcom,bcm2835-bsc",
"broadcom,bcm2708-bsc";
reg = <0x205000 0x20>;
@@ -405,6 +407,8 @@
};
bsc1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
compatible = "broadcom,bcm2835-bsc",
"broadcom,bcm2708-bsc";
reg = <0x804000 0x20>;
diff --git a/sys/conf/files b/sys/conf/files
index e10fd5e..c08e40c 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1141,7 +1141,7 @@ dev/cxgb/sys/uipc_mvec.c optional cxgb pci \
dev/cxgb/cxgb_t3fw.c optional cxgb cxgb_t3fw \
compile-with "${NORMAL_C} -I$S/dev/cxgb"
dev/cxgbe/t4_mp_ring.c optional cxgbe pci \
- compile-with "${NORMAL_C} -I$S/dev/cxgbe ${GCC_MS_EXTENSIONS}"
+ compile-with "${NORMAL_C} -I$S/dev/cxgbe"
dev/cxgbe/t4_main.c optional cxgbe pci \
compile-with "${NORMAL_C} -I$S/dev/cxgbe"
dev/cxgbe/t4_netmap.c optional cxgbe pci \
@@ -2563,8 +2563,16 @@ dev/usb/template/usb_template_mtp.c optional usb_template
dev/usb/template/usb_template_phone.c optional usb_template
dev/usb/template/usb_template_serialnet.c optional usb_template
#
+# USB video drivers
+#
+dev/usb/video/udl.c optional udl
+#
# USB END
#
+dev/videomode/videomode.c optional videomode
+dev/videomode/edid.c optional videomode
+dev/videomode/pickmode.c optional videomode
+dev/videomode/vesagtf.c optional videomode
dev/utopia/idtphy.c optional utopia
dev/utopia/suni.c optional utopia
dev/utopia/utopia.c optional utopia
@@ -3616,19 +3624,16 @@ ofed/drivers/infiniband/core/fmr_pool.c optional ofed \
ofed/drivers/infiniband/core/iwcm.c optional ofed \
no-depend \
compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
-ofed/drivers/infiniband/core/local_sa.c optional ofed \
- no-depend \
- compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
ofed/drivers/infiniband/core/mad_rmpp.c optional ofed \
no-depend \
compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
ofed/drivers/infiniband/core/multicast.c optional ofed \
no-depend \
compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
-ofed/drivers/infiniband/core/notice.c optional ofed \
+ofed/drivers/infiniband/core/packer.c optional ofed \
no-depend \
compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
-ofed/drivers/infiniband/core/packer.c optional ofed \
+ofed/drivers/infiniband/core/peer_mem.c optional ofed \
no-depend \
compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
ofed/drivers/infiniband/core/sa_query.c optional ofed \
@@ -3733,6 +3738,9 @@ ofed/drivers/infiniband/hw/mlx4/mad.c optional mlx4ib \
ofed/drivers/infiniband/hw/mlx4/main.c optional mlx4ib \
no-depend obj-prefix "mlx4ib_" \
compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
+ofed/drivers/infiniband/hw/mlx4/mlx4_exp.c optional mlx4ib \
+ no-depend obj-prefix "mlx4ib_" \
+ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
ofed/drivers/infiniband/hw/mlx4/mr.c optional mlx4ib \
no-depend obj-prefix "mlx4ib_" \
compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
diff --git a/sys/conf/files.arm b/sys/conf/files.arm
index 8ed4a7e..7fbccd1 100644
--- a/sys/conf/files.arm
+++ b/sys/conf/files.arm
@@ -57,6 +57,7 @@ arm/arm/trap.c optional !armv6
arm/arm/trap-v6.c optional armv6
arm/arm/uio_machdep.c standard
arm/arm/undefined.c standard
+arm/arm/unwind.c optional ddb
arm/arm/vm_machdep.c standard
arm/arm/vfp.c standard
board_id.h standard \
diff --git a/sys/conf/kern.mk b/sys/conf/kern.mk
index d97f4d2..65babc5 100644
--- a/sys/conf/kern.mk
+++ b/sys/conf/kern.mk
@@ -39,7 +39,6 @@ CLANG_NO_IAS34= -no-integrated-as
.endif
.if ${COMPILER_TYPE} == "gcc"
-GCC_MS_EXTENSIONS= -fms-extensions
.if ${COMPILER_VERSION} >= 40300
# Catch-all for all the things that are in our tree, but for which we're
# not yet ready for this compiler. Note: we likely only really "support"
diff --git a/sys/conf/kern.pre.mk b/sys/conf/kern.pre.mk
index 232d88b..ede4ae8 100644
--- a/sys/conf/kern.pre.mk
+++ b/sys/conf/kern.pre.mk
@@ -97,7 +97,7 @@ CFLAGS_PARAM_LARGE_FUNCTION_GROWTH?=1000
.if ${MACHINE_CPUARCH} == "mips"
CFLAGS_ARCH_PARAMS?=--param max-inline-insns-single=1000
.endif
-CFLAGS.gcc+= -fno-common -finline-limit=${INLINE_LIMIT}
+CFLAGS.gcc+= -fno-common -fms-extensions -finline-limit=${INLINE_LIMIT}
CFLAGS.gcc+= --param inline-unit-growth=${CFLAGS_PARAM_INLINE_UNIT_GROWTH}
CFLAGS.gcc+= --param large-function-growth=${CFLAGS_PARAM_LARGE_FUNCTION_GROWTH}
.if defined(CFLAGS_ARCH_PARAMS)
@@ -162,7 +162,7 @@ NORMAL_LINT= ${LINT} ${LINTFLAGS} ${CFLAGS:M-[DIU]*} ${.IMPSRC}
# Infiniband C flags. Correct include paths and omit errors that linux
# does not honor.
OFEDINCLUDES= -I$S/ofed/include/
-OFEDNOERR= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS}
+OFEDNOERR= -Wno-cast-qual -Wno-pointer-arith
OFEDCFLAGS= ${CFLAGS:N-I*} ${OFEDINCLUDES} ${CFLAGS:M-I*} ${OFEDNOERR}
OFED_C_NOIMP= ${CC} -c -o ${.TARGET} ${OFEDCFLAGS} ${WERROR} ${PROF}
OFED_C= ${OFED_C_NOIMP} ${.IMPSRC}
diff --git a/sys/conf/kmod.mk b/sys/conf/kmod.mk
index 8e00aa4..91d4f49 100644
--- a/sys/conf/kmod.mk
+++ b/sys/conf/kmod.mk
@@ -105,6 +105,7 @@ CFLAGS+= -I. -I${SYSDIR}
CFLAGS+= -I${SYSDIR}/contrib/altq
CFLAGS.gcc+= -finline-limit=${INLINE_LIMIT}
+CFLAGS.gcc+= -fms-extensions
CFLAGS.gcc+= --param inline-unit-growth=100
CFLAGS.gcc+= --param large-function-growth=1000
diff --git a/sys/conf/options b/sys/conf/options
index bf15767..08a5523 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -889,6 +889,9 @@ TDMA_TXRATE_QUARTER_DEFAULT opt_tdma.h
TDMA_TXRATE_11NA_DEFAULT opt_tdma.h
TDMA_TXRATE_11NG_DEFAULT opt_tdma.h
+# VideoMode
+PICKMODE_DEBUG opt_videomode.h
+
# Network stack virtualization options
VIMAGE opt_global.h
VNET_DEBUG opt_global.h
diff --git a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300.h b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300.h
index eaabbe2..f5d1274 100644
--- a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300.h
+++ b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300.h
@@ -1239,7 +1239,9 @@ extern HAL_BOOL ar9300_set_mac_address(struct ath_hal *ah, const u_int8_t *);
extern void ar9300_get_bss_id_mask(struct ath_hal *ah, u_int8_t *mac);
extern HAL_BOOL ar9300_set_bss_id_mask(struct ath_hal *, const u_int8_t *);
extern HAL_STATUS ar9300_select_ant_config(struct ath_hal *ah, u_int32_t cfg);
+#if 0
extern u_int32_t ar9300_ant_ctrl_common_get(struct ath_hal *ah, HAL_BOOL is_2ghz);
+#endif
extern HAL_BOOL ar9300_ant_swcom_sel(struct ath_hal *ah, u_int8_t ops,
u_int32_t *common_tbl1, u_int32_t *common_tbl2);
extern HAL_BOOL ar9300_set_regulatory_domain(struct ath_hal *ah,
diff --git a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_beacon.c b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_beacon.c
index a3cca95..f4da88c 100644
--- a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_beacon.c
+++ b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_beacon.c
@@ -57,6 +57,9 @@ ar9300_beacon_init(struct ath_hal *ah,
/* Add the fraction adjustment lost due to unit conversions. */
beacon_period_usec += beacon_period_fraction;
+ HALDEBUG(ah, HAL_DEBUG_BEACON,
+ "%s: next_beacon=0x%08x, beacon_period=%d, opmode=%d, beacon_period_usec=%d\n",
+ __func__, next_beacon, beacon_period, opmode, beacon_period_usec);
OS_REG_WRITE(ah, AR_BEACON_PERIOD, beacon_period_usec);
OS_REG_WRITE(ah, AR_DMA_BEACON_PERIOD, beacon_period_usec);
diff --git a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c
index 7a5919e..7ba7823 100644
--- a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c
+++ b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c
@@ -36,6 +36,9 @@
static HAL_BOOL ar9300ClrMulticastFilterIndex(struct ath_hal *ah, uint32_t ix);
static HAL_BOOL ar9300SetMulticastFilterIndex(struct ath_hal *ah, uint32_t ix);
+static void ar9300_beacon_set_beacon_timers(struct ath_hal *ah,
+ const HAL_BEACON_TIMERS *bt);
+
static void
ar9300SetChainMasks(struct ath_hal *ah, uint32_t tx_chainmask,
uint32_t rx_chainmask)
@@ -193,10 +196,9 @@ ar9300_attach_freebsd_ops(struct ath_hal *ah)
/* Beacon functions */
/* ah_setBeaconTimers */
ah->ah_beaconInit = ar9300_freebsd_beacon_init;
- /* ah_setBeaconTimers */
+ ah->ah_setBeaconTimers = ar9300_beacon_set_beacon_timers;
ah->ah_setStationBeaconTimers = ar9300_set_sta_beacon_timers;
/* ah_resetStationBeaconTimers */
- /* ah_getNextTBTT */
ah->ah_getNextTBTT = ar9300_get_next_tbtt;
/* Interrupt functions */
@@ -669,6 +671,55 @@ ar9300SetMulticastFilterIndex(struct ath_hal *ah, uint32_t ix)
return (AH_TRUE);
}
+#define TU_TO_USEC(_tu) ((_tu) << 10)
+#define ONE_EIGHTH_TU_TO_USEC(_tu8) ((_tu8) << 7)
+
+/*
+ * Initializes all of the hardware registers used to
+ * send beacons. Note that for station operation the
+ * driver calls ar9300_set_sta_beacon_timers instead.
+ */
+static void
+ar9300_beacon_set_beacon_timers(struct ath_hal *ah,
+ const HAL_BEACON_TIMERS *bt)
+{
+ uint32_t bperiod;
+
+#if 0
+ HALASSERT(opmode == HAL_M_IBSS || opmode == HAL_M_HOSTAP);
+ if (opmode == HAL_M_IBSS) {
+ OS_REG_SET_BIT(ah, AR_TXCFG, AR_TXCFG_ADHOC_BEACON_ATIM_TX_POLICY);
+ }
+#endif
+
+ /* XXX TODO: should migrate the HAL code to always use ONE_EIGHTH_TU */
+ OS_REG_WRITE(ah, AR_NEXT_TBTT_TIMER, TU_TO_USEC(bt->bt_nexttbtt));
+ OS_REG_WRITE(ah, AR_NEXT_DMA_BEACON_ALERT, ONE_EIGHTH_TU_TO_USEC(bt->bt_nextdba));
+ OS_REG_WRITE(ah, AR_NEXT_SWBA, ONE_EIGHTH_TU_TO_USEC(bt->bt_nextswba));
+ OS_REG_WRITE(ah, AR_NEXT_NDP_TIMER, TU_TO_USEC(bt->bt_nextatim));
+
+ bperiod = TU_TO_USEC(bt->bt_intval & HAL_BEACON_PERIOD);
+ /* XXX TODO! */
+// ahp->ah_beaconInterval = bt->bt_intval & HAL_BEACON_PERIOD;
+ OS_REG_WRITE(ah, AR_BEACON_PERIOD, bperiod);
+ OS_REG_WRITE(ah, AR_DMA_BEACON_PERIOD, bperiod);
+ OS_REG_WRITE(ah, AR_SWBA_PERIOD, bperiod);
+ OS_REG_WRITE(ah, AR_NDP_PERIOD, bperiod);
+
+ /*
+ * Reset TSF if required.
+ */
+ if (bt->bt_intval & HAL_BEACON_RESET_TSF)
+ ar9300_reset_tsf(ah);
+
+ /* enable timers */
+ /* NB: flags == 0 handled specially for backwards compatibility */
+ OS_REG_SET_BIT(ah, AR_TIMER_MODE,
+ bt->bt_flags != 0 ? bt->bt_flags :
+ AR_TBTT_TIMER_EN | AR_DBA_TIMER_EN | AR_SWBA_TIMER_EN);
+}
+
+
/*
* RF attach stubs
*/
diff --git a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_gpio.c b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_gpio.c
index 5660c1f..1dcdafe 100644
--- a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_gpio.c
+++ b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_gpio.c
@@ -162,7 +162,6 @@ ar9300_gpio_cfg_output(
HALASSERT(gpio < AH_PRIVATE(ah)->ah_caps.halNumGpioPins);
if ((gpio == AR9382_GPIO_PIN_8_RESERVED) ||
- (gpio == AR9382_GPIO_PIN_11_RESERVED) ||
(gpio == AR9382_GPIO_9_INPUT_ONLY))
{
return AH_FALSE;
@@ -348,7 +347,6 @@ ar9300_gpio_cfg_input(struct ath_hal *ah, u_int32_t gpio)
HALASSERT(gpio < AH_PRIVATE(ah)->ah_caps.halNumGpioPins);
if ((gpio == AR9382_GPIO_PIN_8_RESERVED) ||
- (gpio == AR9382_GPIO_PIN_11_RESERVED) ||
(gpio > AR9382_MAX_GPIO_INPUT_PIN_NUM))
{
return AH_FALSE;
@@ -378,7 +376,6 @@ ar9300_gpio_set(struct ath_hal *ah, u_int32_t gpio, u_int32_t val)
{
HALASSERT(gpio < AH_PRIVATE(ah)->ah_caps.halNumGpioPins);
if ((gpio == AR9382_GPIO_PIN_8_RESERVED) ||
- (gpio == AR9382_GPIO_PIN_11_RESERVED) ||
(gpio == AR9382_GPIO_9_INPUT_ONLY))
{
return AH_FALSE;
@@ -397,8 +394,7 @@ ar9300_gpio_get(struct ath_hal *ah, u_int32_t gpio)
{
u_int32_t gpio_in;
HALASSERT(gpio < AH_PRIVATE(ah)->ah_caps.halNumGpioPins);
- if ((gpio == AR9382_GPIO_PIN_8_RESERVED) ||
- (gpio == AR9382_GPIO_PIN_11_RESERVED))
+ if (gpio == AR9382_GPIO_PIN_8_RESERVED)
{
return 0xffffffff;
}
@@ -453,7 +449,6 @@ ar9300_gpio_set_intr(struct ath_hal *ah, u_int gpio, u_int32_t ilevel)
HALASSERT(gpio < AH_PRIVATE(ah)->ah_caps.halNumGpioPins);
if ((gpio == AR9382_GPIO_PIN_8_RESERVED) ||
- (gpio == AR9382_GPIO_PIN_11_RESERVED) ||
(gpio > AR9382_MAX_GPIO_INPUT_PIN_NUM))
{
return;
@@ -549,8 +544,7 @@ ar9300_gpio_get_mask(struct ath_hal *ah)
if (AH_PRIVATE(ah)->ah_devid == AR9300_DEVID_AR9380_PCIE) {
mask = (1 << AR9382_MAX_GPIO_PIN_NUM) - 1;
- mask &= ~(1 << AR9382_GPIO_PIN_8_RESERVED |
- 1 << AR9382_GPIO_PIN_11_RESERVED);
+ mask &= ~(1 << AR9382_GPIO_PIN_8_RESERVED);
}
return mask;
}
@@ -562,8 +556,7 @@ ar9300_gpio_set_mask(struct ath_hal *ah, u_int32_t mask, u_int32_t pol_map)
if (AH_PRIVATE(ah)->ah_devid == AR9300_DEVID_AR9380_PCIE) {
invalid = ~((1 << AR9382_MAX_GPIO_PIN_NUM) - 1);
- invalid |= 1 << AR9382_GPIO_PIN_8_RESERVED |
- 1 << AR9382_GPIO_PIN_11_RESERVED;
+ invalid |= 1 << AR9382_GPIO_PIN_8_RESERVED;
}
if (mask & invalid) {
ath_hal_printf(ah, "%s: invalid GPIO mask 0x%x\n", __func__, mask);
diff --git a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_misc.c b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_misc.c
index dbf58f6..21c98a1 100644
--- a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_misc.c
+++ b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_misc.c
@@ -683,6 +683,7 @@ ar9300_get_capability(struct ath_hal *ah, HAL_CAPABILITY_TYPE type,
{
struct ath_hal_9300 *ahp = AH9300(ah);
const HAL_CAPABILITIES *p_cap = &AH_PRIVATE(ah)->ah_caps;
+ struct ar9300_ani_state *ani;
switch (type) {
case HAL_CAP_CIPHER: /* cipher handled in hardware */
@@ -911,6 +912,34 @@ ar9300_get_capability(struct ath_hal *ah, HAL_CAPABILITY_TYPE type,
return HAL_ENOTSUPP;
}
#endif
+
+ /* FreeBSD ANI */
+ case HAL_CAP_INTMIT: /* interference mitigation */
+ switch (capability) {
+ case HAL_CAP_INTMIT_PRESENT: /* hardware capability */
+ return HAL_OK;
+ case HAL_CAP_INTMIT_ENABLE:
+ return (ahp->ah_proc_phy_err & HAL_PROCESS_ANI) ?
+ HAL_OK : HAL_ENXIO;
+ case HAL_CAP_INTMIT_NOISE_IMMUNITY_LEVEL:
+ case HAL_CAP_INTMIT_OFDM_WEAK_SIGNAL_LEVEL:
+// case HAL_CAP_INTMIT_CCK_WEAK_SIGNAL_THR:
+ case HAL_CAP_INTMIT_FIRSTEP_LEVEL:
+ case HAL_CAP_INTMIT_SPUR_IMMUNITY_LEVEL:
+ ani = ar9300_ani_get_current_state(ah);
+ if (ani == AH_NULL)
+ return HAL_ENXIO;
+ switch (capability) {
+ /* XXX AR9300 HAL has OFDM/CCK noise immunity level params? */
+ case 2: *result = ani->ofdm_noise_immunity_level; break;
+ case 3: *result = !ani->ofdm_weak_sig_detect_off; break;
+ // case 4: *result = ani->cck_weak_sig_threshold; break;
+ case 5: *result = ani->firstep_level; break;
+ case 6: *result = ani->spur_immunity_level; break;
+ }
+ return HAL_OK;
+ }
+ return HAL_EINVAL;
default:
return ath_hal_getcapability(ah, type, capability, result);
}
@@ -986,6 +1015,27 @@ ar9300_set_capability(struct ath_hal *ah, HAL_CAPABILITY_TYPE type,
return AH_TRUE;
}
return AH_FALSE;
+
+ /* FreeBSD interrupt mitigation / ANI */
+ case HAL_CAP_INTMIT: { /* interference mitigation */
+ /* This maps the public ANI commands to the internal ANI commands */
+ /* Private: HAL_ANI_CMD; Public: HAL_CAP_INTMIT_CMD */
+ static const HAL_ANI_CMD cmds[] = {
+ HAL_ANI_PRESENT,
+ HAL_ANI_MODE,
+ HAL_ANI_NOISE_IMMUNITY_LEVEL,
+ HAL_ANI_OFDM_WEAK_SIGNAL_DETECTION,
+ HAL_ANI_CCK_WEAK_SIGNAL_THR,
+ HAL_ANI_FIRSTEP_LEVEL,
+ HAL_ANI_SPUR_IMMUNITY_LEVEL,
+ };
+#define N(a) (sizeof(a) / sizeof(a[0]))
+ return capability < N(cmds) ?
+ ar9300_ani_control(ah, cmds[capability], setting) :
+ AH_FALSE;
+#undef N
+ }
+
case HAL_CAP_RXBUFSIZE: /* set MAC receive buffer size */
ahp->rx_buf_size = setting & AR_DATABUF_MASK;
OS_REG_WRITE(ah, AR_DATABUF, ahp->rx_buf_size);
diff --git a/sys/contrib/rdma/krping/krping.c b/sys/contrib/rdma/krping/krping.c
index c89339e..94b2eb9 100644
--- a/sys/contrib/rdma/krping/krping.c
+++ b/sys/contrib/rdma/krping/krping.c
@@ -525,7 +525,7 @@ static void krping_setup_wr(struct krping_cb *cb)
case MW:
cb->bind_attr.wr_id = 0xabbaabba;
cb->bind_attr.send_flags = 0; /* unsignaled */
- cb->bind_attr.length = cb->size;
+ cb->bind_attr.bind_info.length = cb->size;
break;
default:
break;
@@ -627,7 +627,7 @@ static int krping_setup_buffers(struct krping_cb *cb)
cb->page_list, cb->page_list_len);
break;
case MW:
- cb->mw = ib_alloc_mw(cb->pd);
+ cb->mw = ib_alloc_mw(cb->pd,IB_MW_TYPE_1);
if (IS_ERR(cb->mw)) {
DEBUG_LOG(cb, "recv_buf alloc_mw failed\n");
ret = PTR_ERR(cb->mw);
@@ -898,15 +898,15 @@ static u32 krping_rdma_rkey(struct krping_cb *cb, u64 buf, int post_inv)
* Update the MW with new buf info.
*/
if (buf == (u64)cb->start_dma_addr) {
- cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_READ;
- cb->bind_attr.mr = cb->start_mr;
+ cb->bind_attr.bind_info.mw_access_flags = IB_ACCESS_REMOTE_READ;
+ cb->bind_attr.bind_info.mr = cb->start_mr;
} else {
- cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_WRITE;
- cb->bind_attr.mr = cb->rdma_mr;
+ cb->bind_attr.bind_info.mw_access_flags = IB_ACCESS_REMOTE_WRITE;
+ cb->bind_attr.bind_info.mr = cb->rdma_mr;
}
- cb->bind_attr.addr = buf;
+ cb->bind_attr.bind_info.addr = buf;
DEBUG_LOG(cb, "binding mw rkey 0x%x to buf %llx mr rkey 0x%x\n",
- cb->mw->rkey, buf, cb->bind_attr.mr->rkey);
+ cb->mw->rkey, buf, cb->bind_attr.bind_info.mr->rkey);
ret = ib_bind_mw(cb->qp, cb->mw, &cb->bind_attr);
if (ret) {
PRINTF(cb, "bind mw error %d\n", ret);
@@ -2304,7 +2304,7 @@ int krping_doit(char *cmd, void *cookie)
goto out;
}
- cb->cm_id = rdma_create_id(krping_cma_event_handler, cb, RDMA_PS_TCP);
+ cb->cm_id = rdma_create_id(krping_cma_event_handler, cb, RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(cb->cm_id)) {
ret = PTR_ERR(cb->cm_id);
PRINTF(cb, "rdma_create_id error %d\n", ret);
diff --git a/sys/dev/acpica/acpi_pcib.c b/sys/dev/acpica/acpi_pcib.c
index c4bded8..93dada5 100644
--- a/sys/dev/acpica/acpi_pcib.c
+++ b/sys/dev/acpica/acpi_pcib.c
@@ -95,7 +95,7 @@ prt_attach_devices(ACPI_PCI_ROUTING_TABLE *entry, void *arg)
int error;
/* We only care about entries that reference a link device. */
- if (entry->Source == NULL || entry->Source[0] == '\0')
+ if (entry->Source[0] == '\0')
return;
/*
@@ -222,7 +222,7 @@ acpi_pcib_route_interrupt(device_t pcib, device_t dev, int pin,
if (bootverbose) {
device_printf(pcib, "matched entry for %d.%d.INT%c",
pci_get_bus(dev), pci_get_slot(dev), 'A' + pin);
- if (prt->Source != NULL && prt->Source[0] != '\0')
+ if (prt->Source[0] != '\0')
printf(" (src %s:%u)", prt->Source, prt->SourceIndex);
printf("\n");
}
@@ -234,8 +234,7 @@ acpi_pcib_route_interrupt(device_t pcib, device_t dev, int pin,
* XXX: If the source index is non-zero, ignore the source device and
* assume that this is a hard-wired entry.
*/
- if (prt->Source == NULL || prt->Source[0] == '\0' ||
- prt->SourceIndex != 0) {
+ if (prt->Source[0] == '\0' || prt->SourceIndex != 0) {
if (bootverbose)
device_printf(pcib, "slot %d INT%c hardwired to IRQ %d\n",
pci_get_slot(dev), 'A' + pin, prt->SourceIndex);
diff --git a/sys/dev/ath/if_ath_pci.c b/sys/dev/ath/if_ath_pci.c
index 057ec2c..5610882 100644
--- a/sys/dev/ath/if_ath_pci.c
+++ b/sys/dev/ath/if_ath_pci.c
@@ -279,6 +279,13 @@ ath_pci_attach(device_t dev)
*/
sc->sc_invalid = 1;
+ ATH_LOCK_INIT(sc);
+ ATH_PCU_LOCK_INIT(sc);
+ ATH_RX_LOCK_INIT(sc);
+ ATH_TX_LOCK_INIT(sc);
+ ATH_TX_IC_LOCK_INIT(sc);
+ ATH_TXSTATUS_LOCK_INIT(sc);
+
/*
* Arrange interrupt line.
*/
@@ -329,7 +336,7 @@ ath_pci_attach(device_t dev)
if (fw == NULL) {
device_printf(dev, "%s: couldn't find firmware\n",
__func__);
- goto bad3;
+ goto bad4;
}
device_printf(dev, "%s: EEPROM firmware @ %p\n",
@@ -339,30 +346,20 @@ ath_pci_attach(device_t dev)
if (! sc->sc_eepromdata) {
device_printf(dev, "%s: can't malloc eepromdata\n",
__func__);
- goto bad3;
+ goto bad4;
}
memcpy(sc->sc_eepromdata, fw->data, fw->datasize);
firmware_put(fw, 0);
}
#endif /* ATH_EEPROM_FIRMWARE */
- ATH_LOCK_INIT(sc);
- ATH_PCU_LOCK_INIT(sc);
- ATH_RX_LOCK_INIT(sc);
- ATH_TX_LOCK_INIT(sc);
- ATH_TX_IC_LOCK_INIT(sc);
- ATH_TXSTATUS_LOCK_INIT(sc);
-
error = ath_attach(pci_get_device(dev), sc);
if (error == 0) /* success */
return 0;
- ATH_TXSTATUS_LOCK_DESTROY(sc);
- ATH_PCU_LOCK_DESTROY(sc);
- ATH_RX_LOCK_DESTROY(sc);
- ATH_TX_IC_LOCK_DESTROY(sc);
- ATH_TX_LOCK_DESTROY(sc);
- ATH_LOCK_DESTROY(sc);
+#ifdef ATH_EEPROM_FIRMWARE
+bad4:
+#endif
bus_dma_tag_destroy(sc->sc_dmat);
bad3:
bus_teardown_intr(dev, psc->sc_irq, psc->sc_ih);
@@ -370,6 +367,14 @@ bad2:
bus_release_resource(dev, SYS_RES_IRQ, 0, psc->sc_irq);
bad1:
bus_release_resource(dev, SYS_RES_MEMORY, BS_BAR, psc->sc_sr);
+
+ ATH_TXSTATUS_LOCK_DESTROY(sc);
+ ATH_PCU_LOCK_DESTROY(sc);
+ ATH_RX_LOCK_DESTROY(sc);
+ ATH_TX_IC_LOCK_DESTROY(sc);
+ ATH_TX_LOCK_DESTROY(sc);
+ ATH_LOCK_DESTROY(sc);
+
bad:
return (error);
}
diff --git a/sys/dev/atkbdc/atkbd.c b/sys/dev/atkbdc/atkbd.c
index 0d2b44b..d93c1c6 100644
--- a/sys/dev/atkbdc/atkbd.c
+++ b/sys/dev/atkbdc/atkbd.c
@@ -77,6 +77,10 @@ typedef struct atkbd_state {
static void atkbd_timeout(void *arg);
static void atkbd_shutdown_final(void *v);
+static int atkbd_reset(KBDC kbdc, int flags, int c);
+
+#define HAS_QUIRK(p, q) (((atkbdc_softc_t *)(p))->quirks & q)
+#define ALLOW_DISABLE_KBD(kbdc) !HAS_QUIRK(kbdc, KBDC_QUIRK_KEEP_ACTIVATED)
int
atkbd_probe_unit(device_t dev, int irq, int flags)
@@ -1095,6 +1099,39 @@ atkbd_shutdown_final(void *v)
#endif
}
+static int
+atkbd_reset(KBDC kbdc, int flags, int c)
+{
+ /* reset keyboard hardware */
+ if (!(flags & KB_CONF_NO_RESET) && !reset_kbd(kbdc)) {
+ /*
+ * KEYBOARD ERROR
+ * Keyboard reset may fail either because the keyboard
+ * doen't exist, or because the keyboard doesn't pass
+ * the self-test, or the keyboard controller on the
+ * motherboard and the keyboard somehow fail to shake hands.
+ * It is just possible, particularly in the last case,
+ * that the keyboard controller may be left in a hung state.
+ * test_controller() and test_kbd_port() appear to bring
+ * the keyboard controller back (I don't know why and how,
+ * though.)
+ */
+ empty_both_buffers(kbdc, 10);
+ test_controller(kbdc);
+ test_kbd_port(kbdc);
+ /*
+ * We could disable the keyboard port and interrupt... but,
+ * the keyboard may still exist (see above).
+ */
+ set_controller_command_byte(kbdc,
+ ALLOW_DISABLE_KBD(kbdc) ? 0xff : KBD_KBD_CONTROL_BITS, c);
+ if (bootverbose)
+ printf("atkbd: failed to reset the keyboard.\n");
+ return (EIO);
+ }
+ return (0);
+}
+
/* local functions */
static int
@@ -1250,13 +1287,14 @@ probe_keyboard(KBDC kbdc, int flags)
kbdc_set_device_mask(kbdc, m | KBD_KBD_CONTROL_BITS);
} else {
/* try to restore the command byte as before */
- set_controller_command_byte(kbdc, 0xff, c);
+ set_controller_command_byte(kbdc,
+ ALLOW_DISABLE_KBD(kbdc) ? 0xff : KBD_KBD_CONTROL_BITS, c);
kbdc_set_device_mask(kbdc, m);
}
#endif
kbdc_lock(kbdc, FALSE);
- return err;
+ return (HAS_QUIRK(kbdc, KBDC_QUIRK_IGNORE_PROBE_RESULT) ? 0 : err);
}
static int
@@ -1299,6 +1337,12 @@ init_keyboard(KBDC kbdc, int *type, int flags)
return EIO;
}
+ if (HAS_QUIRK(kbdc, KBDC_QUIRK_RESET_AFTER_PROBE) &&
+ atkbd_reset(kbdc, flags, c)) {
+ kbdc_lock(kbdc, FALSE);
+ return EIO;
+ }
+
/*
* Check if we have an XT keyboard before we attempt to reset it.
* The procedure assumes that the keyboard and the controller have
@@ -1343,31 +1387,9 @@ init_keyboard(KBDC kbdc, int *type, int flags)
if (bootverbose)
printf("atkbd: keyboard ID 0x%x (%d)\n", id, *type);
- /* reset keyboard hardware */
- if (!(flags & KB_CONF_NO_RESET) && !reset_kbd(kbdc)) {
- /*
- * KEYBOARD ERROR
- * Keyboard reset may fail either because the keyboard
- * doen't exist, or because the keyboard doesn't pass
- * the self-test, or the keyboard controller on the
- * motherboard and the keyboard somehow fail to shake hands.
- * It is just possible, particularly in the last case,
- * that the keyboard controller may be left in a hung state.
- * test_controller() and test_kbd_port() appear to bring
- * the keyboard controller back (I don't know why and how,
- * though.)
- */
- empty_both_buffers(kbdc, 10);
- test_controller(kbdc);
- test_kbd_port(kbdc);
- /*
- * We could disable the keyboard port and interrupt... but,
- * the keyboard may still exist (see above).
- */
- set_controller_command_byte(kbdc, 0xff, c);
+ if (!HAS_QUIRK(kbdc, KBDC_QUIRK_RESET_AFTER_PROBE) &&
+ atkbd_reset(kbdc, flags, c)) {
kbdc_lock(kbdc, FALSE);
- if (bootverbose)
- printf("atkbd: failed to reset the keyboard.\n");
return EIO;
}
@@ -1387,7 +1409,8 @@ init_keyboard(KBDC kbdc, int *type, int flags)
* The XT kbd isn't usable unless the proper scan
* code set is selected.
*/
- set_controller_command_byte(kbdc, 0xff, c);
+ set_controller_command_byte(kbdc, ALLOW_DISABLE_KBD(kbdc)
+ ? 0xff : KBD_KBD_CONTROL_BITS, c);
kbdc_lock(kbdc, FALSE);
printf("atkbd: unable to set the XT keyboard mode.\n");
return EIO;
@@ -1402,6 +1425,17 @@ init_keyboard(KBDC kbdc, int *type, int flags)
c |= KBD_TRANSLATION;
#endif
+ /*
+ * Some keyboards require a SETLEDS command to be sent after
+ * the reset command before they will send keystrokes to us
+ */
+ if (HAS_QUIRK(kbdc, KBDC_QUIRK_SETLEDS_ON_INIT) &&
+ send_kbd_command_and_data(kbdc, KBDC_SET_LEDS, 0) != KBD_ACK) {
+ printf("atkbd: setleds failed\n");
+ }
+ if (!ALLOW_DISABLE_KBD(kbdc))
+ send_kbd_command(kbdc, KBDC_ENABLE_KBD);
+
/* enable the keyboard port and intr. */
if (!set_controller_command_byte(kbdc,
KBD_KBD_CONTROL_BITS | KBD_TRANSLATION | KBD_OVERRIDE_KBD_LOCK,
@@ -1412,7 +1446,9 @@ init_keyboard(KBDC kbdc, int *type, int flags)
* This is serious; we are left with the disabled
* keyboard intr.
*/
- set_controller_command_byte(kbdc, 0xff, c);
+ set_controller_command_byte(kbdc, ALLOW_DISABLE_KBD(kbdc)
+ ? 0xff : (KBD_KBD_CONTROL_BITS | KBD_TRANSLATION |
+ KBD_OVERRIDE_KBD_LOCK), c);
kbdc_lock(kbdc, FALSE);
printf("atkbd: unable to enable the keyboard port and intr.\n");
return EIO;
diff --git a/sys/dev/atkbdc/atkbdc.c b/sys/dev/atkbdc/atkbdc.c
index 9368dbe..69ffa63 100644
--- a/sys/dev/atkbdc/atkbdc.c
+++ b/sys/dev/atkbdc/atkbdc.c
@@ -114,6 +114,41 @@ static int wait_for_kbd_ack(atkbdc_softc_t *kbdc);
static int wait_for_aux_data(atkbdc_softc_t *kbdc);
static int wait_for_aux_ack(atkbdc_softc_t *kbdc);
+struct atkbdc_quirks {
+ const char* bios_vendor;
+ const char* maker;
+ const char* product;
+ int quirk;
+};
+
+static struct atkbdc_quirks quirks[] = {
+ {"coreboot", "Acer", "Peppy",
+ KBDC_QUIRK_KEEP_ACTIVATED | KBDC_QUIRK_IGNORE_PROBE_RESULT |
+ KBDC_QUIRK_RESET_AFTER_PROBE | KBDC_QUIRK_SETLEDS_ON_INIT},
+
+ {NULL, NULL, NULL, 0}
+};
+
+#define QUIRK_STR_MATCH(s1, s2) (s1 == NULL || \
+ (s2 != NULL && !strcmp(s1, s2)))
+
+static int
+atkbdc_getquirks(void)
+{
+ int i;
+ char* bios_vendor = kern_getenv("smbios.bios.vendor");
+ char* maker = kern_getenv("smbios.system.maker");
+ char* product = kern_getenv("smbios.system.product");
+
+ for (i=0; quirks[i].quirk != 0; ++i)
+ if (QUIRK_STR_MATCH(quirks[i].bios_vendor, bios_vendor) &&
+ QUIRK_STR_MATCH(quirks[i].maker, maker) &&
+ QUIRK_STR_MATCH(quirks[i].product, product))
+ return (quirks[i].quirk);
+
+ return (0);
+}
+
atkbdc_softc_t
*atkbdc_get_softc(int unit)
{
@@ -295,6 +330,7 @@ atkbdc_setup(atkbdc_softc_t *sc, bus_space_tag_t tag, bus_space_handle_t h0,
#else
sc->retry = 5000;
#endif
+ sc->quirks = atkbdc_getquirks();
return 0;
}
@@ -1124,7 +1160,8 @@ void
kbdc_set_device_mask(KBDC p, int mask)
{
kbdcp(p)->command_mask =
- mask & (KBD_KBD_CONTROL_BITS | KBD_AUX_CONTROL_BITS);
+ mask & (((kbdcp(p)->quirks & KBDC_QUIRK_KEEP_ACTIVATED)
+ ? 0 : KBD_KBD_CONTROL_BITS) | KBD_AUX_CONTROL_BITS);
}
int
diff --git a/sys/dev/atkbdc/atkbdcreg.h b/sys/dev/atkbdc/atkbdcreg.h
index 44a9801..db590b9 100644
--- a/sys/dev/atkbdc/atkbdcreg.h
+++ b/sys/dev/atkbdc/atkbdcreg.h
@@ -202,6 +202,11 @@ typedef struct atkbdc_softc {
kqueue kbd; /* keyboard data queue */
kqueue aux; /* auxiliary data queue */
int retry;
+ int quirks; /* controller doesn't like deactivate */
+#define KBDC_QUIRK_KEEP_ACTIVATED (1 << 0)
+#define KBDC_QUIRK_IGNORE_PROBE_RESULT (1 << 1)
+#define KBDC_QUIRK_RESET_AFTER_PROBE (1 << 2)
+#define KBDC_QUIRK_SETLEDS_ON_INIT (1 << 3)
} atkbdc_softc_t;
enum kbdc_device_ivar {
diff --git a/sys/dev/atkbdc/psm.c b/sys/dev/atkbdc/psm.c
index 9a6ae72..94cf880 100644
--- a/sys/dev/atkbdc/psm.c
+++ b/sys/dev/atkbdc/psm.c
@@ -371,6 +371,10 @@ static devclass_t psm_devclass;
/* other flags (flags) */
#define PSM_FLAGS_FINGERDOWN 0x0001 /* VersaPad finger down */
+#define kbdcp(p) ((atkbdc_softc_t *)(p))
+#define ALWAYS_RESTORE_CONTROLLER(kbdc) !(kbdcp(kbdc)->quirks \
+ & KBDC_QUIRK_KEEP_ACTIVATED)
+
/* Tunables */
static int tap_enabled = -1;
TUNABLE_INT("hw.psm.tap_enabled", &tap_enabled);
@@ -1231,7 +1235,8 @@ psmprobe(device_t dev)
* this is CONTROLLER ERROR; I don't know how to recover
* from this error...
*/
- restore_controller(sc->kbdc, command_byte);
+ if (ALWAYS_RESTORE_CONTROLLER(sc->kbdc))
+ restore_controller(sc->kbdc, command_byte);
printf("psm%d: unable to set the command byte.\n", unit);
endprobe(ENXIO);
}
@@ -1270,7 +1275,8 @@ psmprobe(device_t dev)
recover_from_error(sc->kbdc);
if (sc->config & PSM_CONFIG_IGNPORTERROR)
break;
- restore_controller(sc->kbdc, command_byte);
+ if (ALWAYS_RESTORE_CONTROLLER(sc->kbdc))
+ restore_controller(sc->kbdc, command_byte);
if (verbose)
printf("psm%d: the aux port is not functioning (%d).\n",
unit, i);
@@ -1293,7 +1299,8 @@ psmprobe(device_t dev)
*/
if (!reset_aux_dev(sc->kbdc)) {
recover_from_error(sc->kbdc);
- restore_controller(sc->kbdc, command_byte);
+ if (ALWAYS_RESTORE_CONTROLLER(sc->kbdc))
+ restore_controller(sc->kbdc, command_byte);
if (verbose)
printf("psm%d: failed to reset the aux "
"device.\n", unit);
@@ -1315,7 +1322,8 @@ psmprobe(device_t dev)
if (!enable_aux_dev(sc->kbdc) || !disable_aux_dev(sc->kbdc)) {
/* MOUSE ERROR */
recover_from_error(sc->kbdc);
- restore_controller(sc->kbdc, command_byte);
+ if (ALWAYS_RESTORE_CONTROLLER(sc->kbdc))
+ restore_controller(sc->kbdc, command_byte);
if (verbose)
printf("psm%d: failed to enable the aux device.\n",
unit);
@@ -1337,7 +1345,8 @@ psmprobe(device_t dev)
/* verify the device is a mouse */
sc->hw.hwid = get_aux_id(sc->kbdc);
if (!is_a_mouse(sc->hw.hwid)) {
- restore_controller(sc->kbdc, command_byte);
+ if (ALWAYS_RESTORE_CONTROLLER(sc->kbdc))
+ restore_controller(sc->kbdc, command_byte);
if (verbose)
printf("psm%d: unknown device type (%d).\n", unit,
sc->hw.hwid);
@@ -1443,7 +1452,8 @@ psmprobe(device_t dev)
* this is CONTROLLER ERROR; I don't know the proper way to
* recover from this error...
*/
- restore_controller(sc->kbdc, command_byte);
+ if (ALWAYS_RESTORE_CONTROLLER(sc->kbdc))
+ restore_controller(sc->kbdc, command_byte);
printf("psm%d: unable to set the command byte.\n", unit);
endprobe(ENXIO);
}
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c
index 64ac36c..9a3d75f 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c
@@ -176,7 +176,7 @@ iwch_destroy_cq(struct ib_cq *ib_cq)
}
static struct ib_cq *
-iwch_create_cq(struct ib_device *ibdev, int entries, int vector,
+iwch_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr,
struct ib_ucontext *ib_context,
struct ib_udata *udata)
{
@@ -187,6 +187,7 @@ iwch_create_cq(struct ib_device *ibdev, int entries, int vector,
struct iwch_ucontext *ucontext = NULL;
static int warned;
size_t resplen;
+ int entries = attr->cqe;
CTR3(KTR_IW_CXGB, "%s ib_dev %p entries %d", __FUNCTION__, ibdev, entries);
rhp = to_iwch_dev(ibdev);
@@ -545,16 +546,14 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int mr_id)
{
__be64 *pages;
- int shift, i, n;
+ int shift, n, len;
+ int i, k, entry;
int err = 0;
- struct ib_umem_chunk *chunk;
struct iwch_dev *rhp;
struct iwch_pd *php;
struct iwch_mr *mhp;
struct iwch_reg_user_mr_resp uresp;
-#ifdef notyet
- int j, k, len;
-#endif
+ struct scatterlist *sg;
CTR2(KTR_IW_CXGB, "%s ib_pd %p", __FUNCTION__, pd);
@@ -575,9 +574,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
shift = ffs(mhp->umem->page_size) - 1;
- n = 0;
- list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
- n += chunk->nents;
+ n = mhp->umem->nmap;
err = iwch_alloc_pbl(mhp, n);
if (err)
@@ -591,7 +588,21 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
i = n = 0;
-#ifdef notyet
+ for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
+ len = sg_dma_len(sg) >> shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = cpu_to_be64(sg_dma_address(sg) +
+ mhp->umem->page_size * k);
+ if (i == PAGE_SIZE / sizeof *pages) {
+ err = iwch_write_pbl(mhp, pages, i, n);
+ if (err)
+ goto pbl_done;
+ n += i;
+ i = 0;
+ }
+ }
+ }
+#if 0
TAILQ_FOREACH(chunk, &mhp->umem->chunk_list, entry)
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
@@ -612,9 +623,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (i)
err = iwch_write_pbl(mhp, pages, i, n);
-#ifdef notyet
pbl_done:
-#endif
cxfree(pages);
if (err)
goto err_pbl;
@@ -672,7 +681,7 @@ static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
return ibmr;
}
-static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
+static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
{
struct iwch_dev *rhp;
struct iwch_pd *php;
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c
index 648d96b..3e8e6b3 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c
@@ -551,18 +551,18 @@ int iwch_bind_mw(struct ib_qp *qp,
if (mw_bind->send_flags & IB_SEND_SIGNALED)
t3_wr_flags = T3_COMPLETION_FLAG;
- sgl.addr = mw_bind->addr;
- sgl.lkey = mw_bind->mr->lkey;
- sgl.length = mw_bind->length;
+ sgl.addr = mw_bind->bind_info.addr;
+ sgl.lkey = mw_bind->bind_info.mr->lkey;
+ sgl.length = mw_bind->bind_info.length;
wqe->bind.reserved = 0;
wqe->bind.type = T3_VA_BASED_TO;
/* TBD: check perms */
- wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->mw_access_flags);
- wqe->bind.mr_stag = htobe32(mw_bind->mr->lkey);
+ wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->bind_info.mw_access_flags);
+ wqe->bind.mr_stag = htobe32(mw_bind->bind_info.mr->lkey);
wqe->bind.mw_stag = htobe32(mw->rkey);
- wqe->bind.mw_len = htobe32(mw_bind->length);
- wqe->bind.mw_va = htobe64(mw_bind->addr);
+ wqe->bind.mw_len = htobe32(mw_bind->bind_info.length);
+ wqe->bind.mw_va = htobe64(mw_bind->bind_info.addr);
err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
if (err) {
mtx_unlock(&qhp->lock);
diff --git a/sys/dev/cxgbe/iw_cxgbe/cq.c b/sys/dev/cxgbe/iw_cxgbe/cq.c
index ec72a6c..8710e03 100644
--- a/sys/dev/cxgbe/iw_cxgbe/cq.c
+++ b/sys/dev/cxgbe/iw_cxgbe/cq.c
@@ -775,7 +775,7 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq)
}
struct ib_cq *
-c4iw_create_cq(struct ib_device *ibdev, int entries, int vector,
+c4iw_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr,
struct ib_ucontext *ib_context, struct ib_udata *udata)
{
struct c4iw_dev *rhp;
@@ -785,6 +785,7 @@ c4iw_create_cq(struct ib_device *ibdev, int entries, int vector,
int ret;
size_t memsize, hwentries;
struct c4iw_mm_entry *mm, *mm2;
+ int entries = attr->cqe;
CTR3(KTR_IW_CXGBE, "%s ib_dev %p entries %d", __func__, ibdev, entries);
diff --git a/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h b/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
index 245e045..5f2542c 100644
--- a/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
+++ b/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
@@ -864,7 +864,7 @@ struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(
int page_list_len);
struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth);
int c4iw_dealloc_mw(struct ib_mw *mw);
-struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd);
+struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64
virt, int acc, struct ib_udata *udata, int mr_id);
struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
@@ -881,8 +881,7 @@ int c4iw_reregister_phys_mem(struct ib_mr *mr,
int acc, u64 *iova_start);
int c4iw_dereg_mr(struct ib_mr *ib_mr);
int c4iw_destroy_cq(struct ib_cq *ib_cq);
-struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
- int vector,
+struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr,
struct ib_ucontext *ib_context,
struct ib_udata *udata);
int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);
diff --git a/sys/dev/cxgbe/iw_cxgbe/mem.c b/sys/dev/cxgbe/iw_cxgbe/mem.c
index 50c5ed0..f7c460a 100644
--- a/sys/dev/cxgbe/iw_cxgbe/mem.c
+++ b/sys/dev/cxgbe/iw_cxgbe/mem.c
@@ -563,9 +563,9 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
__be64 *pages;
int shift, n, len;
- int i, j, k;
+ int i, k, entry;
int err = 0;
- struct ib_umem_chunk *chunk;
+ struct scatterlist *sg;
struct c4iw_dev *rhp;
struct c4iw_pd *php;
struct c4iw_mr *mhp;
@@ -594,11 +594,8 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
shift = ffs(mhp->umem->page_size) - 1;
-
- n = 0;
- list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
- n += chunk->nents;
-
+
+ n = mhp->umem->nmap;
err = alloc_pbl(mhp, n);
if (err)
goto err;
@@ -610,25 +607,23 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
i = n = 0;
-
- list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
- for (j = 0; j < chunk->nmap; ++j) {
- len = sg_dma_len(&chunk->page_list[j]) >> shift;
- for (k = 0; k < len; ++k) {
- pages[i++] = cpu_to_be64(sg_dma_address(
- &chunk->page_list[j]) +
+ for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
+ len = sg_dma_len(sg) >> shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = cpu_to_be64(sg_dma_address(sg) +
mhp->umem->page_size * k);
- if (i == PAGE_SIZE / sizeof *pages) {
- err = write_pbl(&mhp->rhp->rdev,
- pages,
- mhp->attr.pbl_addr + (n << 3), i);
- if (err)
- goto pbl_done;
- n += i;
- i = 0;
- }
+ if (i == PAGE_SIZE / sizeof *pages) {
+ err = write_pbl(&mhp->rhp->rdev,
+ pages,
+ mhp->attr.pbl_addr + (n << 3), i);
+ if (err)
+ goto pbl_done;
+ n += i;
+ i = 0;
+
}
}
+ }
if (i)
err = write_pbl(&mhp->rhp->rdev, pages,
@@ -662,7 +657,7 @@ err:
return ERR_PTR(err);
}
-struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd)
+struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
{
struct c4iw_dev *rhp;
struct c4iw_pd *php;
diff --git a/sys/dev/fb/fbd.c b/sys/dev/fb/fbd.c
index 6bd5766..f9b4d8e 100644
--- a/sys/dev/fb/fbd.c
+++ b/sys/dev/fb/fbd.c
@@ -134,7 +134,8 @@ fb_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
break;
case FBIO_BLANK: /* blank display */
- error = 0; /* TODO */
+ if (info->setblankmode != NULL)
+ error = info->setblankmode(info->fb_priv, *(int *)data);
break;
default:
diff --git a/sys/dev/ic/hd64570.h b/sys/dev/ic/hd64570.h
deleted file mode 100644
index 3399e0a..0000000
--- a/sys/dev/ic/hd64570.h
+++ /dev/null
@@ -1,372 +0,0 @@
-/*-
- * Copyright (c) 1995 John Hay. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by [your name]
- * and [any other names deserving credit ]
- * 4. Neither the name of the author nor the names of any co-contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY [your name] AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-#ifndef _HD64570_H_
-#define _HD64570_H_
-
-typedef struct msci_channel
- {
- union
- {
- unsigned short us_trb; /* rw */
- struct
- {
- unsigned char uc_trbl;
- unsigned char uc_trbh;
- }uc_trb;
- }u_trb;
- unsigned char st0; /* ro */
- unsigned char st1; /* rw */
- unsigned char st2; /* rw */
- unsigned char st3; /* ro */
- unsigned char fst; /* rw */
- unsigned char unused0;
- unsigned char ie0; /* rw */
- unsigned char ie1; /* rw */
- unsigned char ie2; /* rw */
- unsigned char fie; /* rw */
- unsigned char cmd; /* wo */
- unsigned char unused1;
- unsigned char md0; /* rw */
- unsigned char md1; /* rw */
- unsigned char md2; /* rw */
- unsigned char ctl; /* rw */
- unsigned char sa0; /* rw */
- unsigned char sa1; /* rw */
- unsigned char idl; /* rw */
- unsigned char tmc; /* rw */
- unsigned char rxs; /* rw */
- unsigned char txs; /* rw */
- unsigned char trc0; /* rw */
- unsigned char trc1; /* rw */
- unsigned char rrc; /* rw */
- unsigned char unused2;
- unsigned char cst0; /* rw */
- unsigned char cst1; /* rw */
- unsigned char unused3[2];
- }msci_channel;
-
-#define trb u_trb.us_trb
-#define trbl u_trb.uc_trb.uc_trbl
-#define trbh u_trb.uc_trb.uc_trbh
-
-typedef struct timer_channel
- {
- unsigned short tcnt; /* rw */
- unsigned short tconr; /* wo */
- unsigned char tcsr; /* rw */
- unsigned char tepr; /* rw */
- unsigned char unused[2];
- }timer_channel;
-
-typedef struct dmac_channel
- {
- unsigned short dar; /* rw */
- unsigned char darb; /* rw */
- unsigned char unused0;
- unsigned short sar; /* rw On odd numbered dmacs (tx) only */
- unsigned char sarb; /* rw */
-#define cpb sarb
- unsigned char unused1;
- unsigned short cda; /* rw */
- unsigned short eda; /* rw */
- unsigned short bfl; /* rw On even numbered dmacs (rx) only */
- unsigned short bcr; /* rw */
- unsigned char dsr; /* rw */
- unsigned char dmr; /* rw */
- unsigned char unused2;
- unsigned char fct; /* rw */
- unsigned char dir; /* rw */
- unsigned char dcr; /* rw */
- unsigned char unused3[10];
- }dmac_channel;
-
-/* x is the channel number. rx channels are even numbered and tx, odd. */
-#define DMAC_RXCH(x) ((x*2) + 0)
-#define DMAC_TXCH(x) ((x*2) + 1)
-
-typedef struct sca_regs
- {
- unsigned char lpr; /* rw */
- unsigned char unused0; /* -- */
- /* Wait system */
- unsigned char pabr0; /* rw */
- unsigned char pabr1; /* rw */
- unsigned char wcrl; /* rw */
- unsigned char wcrm; /* rw */
- unsigned char wcrh; /* rw */
- unsigned char unused1;
- /* DMAC */
- unsigned char pcr; /* rw */
- unsigned char dmer; /* rw */
- unsigned char unused2[6];
- /* Interrupt */
- unsigned char isr0; /* ro */
- unsigned char isr1; /* ro */
- unsigned char isr2; /* ro */
- unsigned char unused3;
- unsigned char ier0; /* rw */
- unsigned char ier1; /* rw */
- unsigned char ier2; /* rw */
- unsigned char unused4;
- unsigned char itcr; /* rw */
- unsigned char unused5;
- unsigned char ivr; /* rw */
- unsigned char unused6;
- unsigned char imvr; /* rw */
- unsigned char unused7[3];
- /* MSCI Channel 0 */
- msci_channel msci[2];
- timer_channel timer[4];
- dmac_channel dmac[4];
- }sca_regs;
-
-#define SCA_CMD_TXRESET 0x01
-#define SCA_CMD_TXENABLE 0x02
-#define SCA_CMD_TXDISABLE 0x03
-#define SCA_CMD_TXCRCINIT 0x04
-#define SCA_CMD_TXCRCEXCL 0x05
-#define SCA_CMS_TXEOM 0x06
-#define SCA_CMD_TXABORT 0x07
-#define SCA_CMD_MPON 0x08
-#define SCA_CMD_TXBCLEAR 0x09
-
-#define SCA_CMD_RXRESET 0x11
-#define SCA_CMD_RXENABLE 0x12
-#define SCA_CMD_RXDISABLE 0x13
-#define SCA_CMD_RXCRCINIT 0x14
-#define SCA_CMD_RXMSGREJ 0x15
-#define SCA_CMD_MPSEARCH 0x16
-#define SCA_CMD_RXCRCEXCL 0x17
-#define SCA_CMD_RXCRCCALC 0x18
-
-#define SCA_CMD_NOP 0x00
-#define SCA_CMD_RESET 0x21
-#define SCA_CMD_SEARCH 0x31
-
-#define SCA_MD0_CRC_1 0x01
-#define SCA_MD0_CRC_CCITT 0x02
-#define SCA_MD0_CRC_ENABLE 0x04
-#define SCA_MD0_AUTO_ENABLE 0x10
-#define SCA_MD0_MODE_ASYNC 0x00
-#define SCA_MD0_MODE_BYTESYNC1 0x20
-#define SCA_MD0_MODE_BISYNC 0x40
-#define SCA_MD0_MODE_BYTESYNC2 0x60
-#define SCA_MD0_MODE_HDLC 0x80
-
-#define SCA_MD1_NOADDRCHK 0x00
-#define SCA_MD1_SNGLADDR1 0x40
-#define SCA_MD1_SNGLADDR2 0x80
-#define SCA_MD1_DUALADDR 0xC0
-
-#define SCA_MD2_DUPLEX 0x00
-#define SCA_MD2_ECHO 0x01
-#define SCA_MD2_LOOPBACK 0x03
-#define SCA_MD2_ADPLLx8 0x00
-#define SCA_MD2_ADPLLx16 0x08
-#define SCA_MD2_ADPLLx32 0x10
-#define SCA_MD2_NRZ 0x00
-#define SCA_MD2_NRZI 0x20
-#define SCA_MD2_MANCHESTER 0x80
-#define SCA_MD2_FM0 0xC0
-#define SCA_MD2_FM1 0xA0
-
-#define SCA_CTL_RTS 0x01
-#define SCA_CTL_IDLPAT 0x10
-#define SCA_CTL_UDRNC 0x20
-
-#define SCA_RXS_DIV_MASK 0x0F
-#define SCA_RXS_DIV1 0x00
-#define SCA_RXS_DIV2 0x01
-#define SCA_RXS_DIV4 0x02
-#define SCA_RXS_DIV8 0x03
-#define SCA_RXS_DIV16 0x04
-#define SCA_RXS_DIV32 0x05
-#define SCA_RXS_DIV64 0x06
-#define SCA_RXS_DIV128 0x07
-#define SCA_RXS_DIV256 0x08
-#define SCA_RXS_DIV512 0x09
-#define SCA_RXS_CLK_RXC0 0x00
-#define SCA_RXS_CLK_RXC1 0x20
-#define SCA_RXS_CLK_INT 0x40
-#define SCA_RXS_CLK_ADPLL_OUT 0x60
-#define SCA_RXS_CLK_ADPLL_IN 0x70
-
-#define SCA_TXS_DIV_MASK 0x0F
-#define SCA_TXS_DIV1 0x00
-#define SCA_TXS_DIV2 0x01
-#define SCA_TXS_DIV4 0x02
-#define SCA_TXS_DIV8 0x03
-#define SCA_TXS_DIV16 0x04
-#define SCA_TXS_DIV32 0x05
-#define SCA_TXS_DIV64 0x06
-#define SCA_TXS_DIV128 0x07
-#define SCA_TXS_DIV256 0x08
-#define SCA_TXS_DIV512 0x09
-#define SCA_TXS_CLK_TXC 0x00
-#define SCA_TXS_CLK_INT 0x40
-#define SCA_TXS_CLK_RX 0x60
-
-#define SCA_ST0_RXRDY 0x01
-#define SCA_ST0_TXRDY 0x02
-#define SCA_ST0_RXINT 0x40
-#define SCA_ST0_TXINT 0x80
-
-#define SCA_ST1_IDLST 0x01
-#define SCA_ST1_ABTST 0x02
-#define SCA_ST1_DCDCHG 0x04
-#define SCA_ST1_CTSCHG 0x08
-#define SCA_ST1_FLAG 0x10
-#define SCA_ST1_TXIDL 0x40
-#define SCA_ST1_UDRN 0x80
-
-/* ST2 and FST look the same */
-#define SCA_FST_CRCERR 0x04
-#define SCA_FST_OVRN 0x08
-#define SCA_FST_RESFRM 0x10
-#define SCA_FST_ABRT 0x20
-#define SCA_FST_SHRT 0x40
-#define SCA_FST_EOM 0x80
-
-#define SCA_ST3_RXENA 0x01
-#define SCA_ST3_TXENA 0x02
-#define SCA_ST3_DCD 0x04
-#define SCA_ST3_CTS 0x08
-#define SCA_ST3_ADPLLSRCH 0x10
-#define SCA_ST3_TXDATA 0x20
-
-#define SCA_FIE_EOMFE 0x80
-
-#define SCA_IE0_RXRDY 0x01
-#define SCA_IE0_TXRDY 0x02
-#define SCA_IE0_RXINT 0x40
-#define SCA_IE0_TXINT 0x80
-
-#define SCA_IE1_IDLDE 0x01
-#define SCA_IE1_ABTDE 0x02
-#define SCA_IE1_DCD 0x04
-#define SCA_IE1_CTS 0x08
-#define SCA_IE1_FLAG 0x10
-#define SCA_IE1_IDL 0x40
-#define SCA_IE1_UDRN 0x80
-
-#define SCA_IE2_CRCERR 0x04
-#define SCA_IE2_OVRN 0x08
-#define SCA_IE2_RESFRM 0x10
-#define SCA_IE2_ABRT 0x20
-#define SCA_IE2_SHRT 0x40
-#define SCA_IE2_EOM 0x80
-
-/* This is for RRC, TRC0 and TRC1. */
-#define SCA_RCR_MASK 0x1F
-
-#define SCA_IE1_
-
-#define SCA_IV_CHAN0 0x00
-#define SCA_IV_CHAN1 0x20
-
-#define SCA_IV_RXRDY 0x04
-#define SCA_IV_TXRDY 0x06
-#define SCA_IV_RXINT 0x08
-#define SCA_IV_TXINT 0x0A
-
-#define SCA_IV_DMACH0 0x00
-#define SCA_IV_DMACH1 0x08
-#define SCA_IV_DMACH2 0x20
-#define SCA_IV_DMACH3 0x28
-
-#define SCA_IV_DMIA 0x14
-#define SCA_IV_DMIB 0x16
-
-#define SCA_IV_TIMER0 0x1C
-#define SCA_IV_TIMER1 0x1E
-#define SCA_IV_TIMER2 0x3C
-#define SCA_IV_TIMER3 0x3E
-
-/*
- * DMA registers
- */
-#define SCA_DSR_EOT 0x80
-#define SCA_DSR_EOM 0x40
-#define SCA_DSR_BOF 0x20
-#define SCA_DSR_COF 0x10
-#define SCA_DSR_DE 0x02
-#define SCA_DSR_DWE 0x01
-
-#define SCA_DMR_TMOD 0x10
-#define SCA_DMR_NF 0x04
-#define SCA_DMR_CNTE 0x02
-
-#define SCA_DMER_EN 0x80
-
-#define SCA_DCR_ABRT 0x01
-#define SCA_DCR_FCCLR 0x02 /* Clear frame end intr counter */
-
-#define SCA_DIR_EOT 0x80
-#define SCA_DIR_EOM 0x40
-#define SCA_DIR_BOF 0x20
-#define SCA_DIR_COF 0x10
-
-#define SCA_PCR_BRC 0x10
-#define SCA_PCR_CCC 0x08
-#define SCA_PCR_PR2 0x04
-#define SCA_PCR_PR1 0x02
-#define SCA_PCR_PR0 0x01
-
-typedef struct sca_descriptor
- {
- unsigned short cp;
- unsigned short bp;
- unsigned char bpb;
- unsigned char unused0;
- unsigned short len;
- unsigned char stat;
- unsigned char unused1;
- }sca_descriptor;
-
-#define SCA_DESC_EOT 0x01
-#define SCA_DESC_CRC 0x04
-#define SCA_DESC_OVRN 0x08
-#define SCA_DESC_RESD 0x10
-#define SCA_DESC_ABORT 0x20
-#define SCA_DESC_SHRTFRM 0x40
-#define SCA_DESC_EOM 0x80
-#define SCA_DESC_ERRORS 0x7C
-
-/*
-***************************************************************************
-** END
-***************************************************************************
-**/
-#endif /* _HD64570_H_ */
-
diff --git a/sys/dev/lmc/if_lmc.h b/sys/dev/lmc/if_lmc.h
index fe4e7d0..8291dcd 100644
--- a/sys/dev/lmc/if_lmc.h
+++ b/sys/dev/lmc/if_lmc.h
@@ -1052,7 +1052,7 @@ struct card
*/
#define IOREF_CSR 1 /* access Tulip CSRs with IO cycles if 1 */
-#if (defined(__FreeBSD__) && defined(DEVICE_POLLING))
+#if defined(DEVICE_POLLING)
# define DEV_POLL 1
#else
# define DEV_POLL 0
@@ -1076,15 +1076,10 @@ struct softc
struct ifnet *ifp;
struct ifmedia ifm; /* hooks for ifconfig(8) */
# if NSPPP
-# if (__FreeBSD_version < 600000)
- struct sppp spppcom; /* must be first in sc for fbsd < 6 */
-# endif
struct sppp *sppp;
# elif P2P
struct p2pcom p2pcom;
struct p2pcom *p2p;
-# elif (__FreeBSD_version < 600000)
- struct ifnet ifnet; /* must be first in sc for fbsd < 6 */
# endif
#endif
@@ -1092,13 +1087,8 @@ struct softc
#if NETGRAPH
node_p ng_node; /* pointer to our node struct */
hook_p ng_hook; /* non-zero means NETGRAPH owns device */
-# if (__FreeBSD_version >= 503000)
struct ifaltq ng_sndq;
struct ifaltq ng_fastq;
-# else
- struct ifqueue ng_sndq;
- struct ifqueue ng_fastq;
-# endif
#endif
struct callout callout; /* watchdog needs this */
@@ -1115,13 +1105,8 @@ struct softc
# ifdef DEVICE_POLLING
int quota; /* used for incoming packet flow control */
# endif
-# if (__FreeBSD_version >= 500000)
struct mtx top_mtx; /* lock card->watchdog vs core_ioctl */
struct mtx bottom_mtx; /* lock for buf queues & descriptor rings */
-# else /* FreeBSD-4 */
- int top_spl; /* lock card->watchdog vs core_ioctl */
- int bottom_spl; /* lock for buf queues & descriptor rings */
-# endif
/* Top-half state used by all card types; lock with top_lock, */
@@ -1154,23 +1139,11 @@ struct softc
# define WRITE_CSR(csr, val) bus_space_write_4(sc->csr_tag, sc->csr_handle, csr, val)
# define NAME_UNIT device_get_nameunit(sc->dev)
# define DRIVER_DEBUG ((sc->config.debug) || (sc->ifp->if_flags & IFF_DEBUG))
-# if (__FreeBSD_version >= 500000)
-# define TOP_TRYLOCK mtx_trylock(&sc->top_mtx)
-# define TOP_UNLOCK mtx_unlock (&sc->top_mtx)
-# define BOTTOM_TRYLOCK mtx_trylock(&sc->bottom_mtx)
-# define BOTTOM_UNLOCK mtx_unlock (&sc->bottom_mtx)
-# if (__FreeBSD_version >= 700000)
-# define CHECK_CAP priv_check(curthread, PRIV_DRIVER)
-# else
-# define CHECK_CAP suser(curthread)
-# endif
-# else /* FreeBSD-4 */
-# define TOP_TRYLOCK (sc->top_spl = splimp())
-# define TOP_UNLOCK splx(sc->top_spl)
-# define BOTTOM_TRYLOCK 1 /* giant_lock protects */
-# define BOTTOM_UNLOCK /* nothing */
-# define CHECK_CAP suser(curproc)
-# endif
+# define TOP_TRYLOCK mtx_trylock(&sc->top_mtx)
+# define TOP_UNLOCK mtx_unlock (&sc->top_mtx)
+# define BOTTOM_TRYLOCK mtx_trylock(&sc->bottom_mtx)
+# define BOTTOM_UNLOCK mtx_unlock (&sc->bottom_mtx)
+# define CHECK_CAP priv_check(curthread, PRIV_DRIVER)
# define DISABLE_INTR /* nothing */
# define ENABLE_INTR /* nothing */
# define IRQ_NONE /* nothing */
@@ -1181,28 +1154,13 @@ struct softc
# define DMA_SYNC(map, size, flags) bus_dmamap_sync(ring->tag, map, flags)
# define DMA_LOAD(map, addr, size) bus_dmamap_load(ring->tag, map, addr, size, fbsd_dmamap_load, ring, 0)
# if (NBPFILTER != 0)
-# if (__FreeBSD_version >= 500000)
-# define LMC_BPF_MTAP(mbuf) BPF_MTAP(sc->ifp, mbuf)
-# else /* FreeBSD-4 */
-# define LMC_BPF_MTAP(mbuf) if (sc->ifp->if_bpf) bpf_mtap(sc->ifp, mbuf)
-# endif
+# define LMC_BPF_MTAP(mbuf) BPF_MTAP(sc->ifp, mbuf)
# define LMC_BPF_ATTACH(dlt, len) bpfattach(sc->ifp, dlt, len)
# define LMC_BPF_DETACH bpfdetach(sc->ifp)
# endif
-# if (__FreeBSD_version >= 500000)
-# define IF_DROP(ifq) _IF_DROP(ifq)
-# define IF_QFULL(ifq) _IF_QFULL(ifq)
-# endif
-# if (__FreeBSD_version < 500000)
-# define INTR_MPSAFE 0
-# define BUS_DMA_COHERENT 0
-# endif
-# if (__FreeBSD_version >= 600000)
-# define IFF_RUNNING IFF_DRV_RUNNING
-# endif
-
-
-
+# define IF_DROP(ifq) _IF_DROP(ifq)
+# define IF_QFULL(ifq) _IF_QFULL(ifq)
+# define IFF_RUNNING IFF_DRV_RUNNING
#if (NBPFILTER == 0)
@@ -1211,40 +1169,6 @@ struct softc
# define LMC_BPF_DETACH /* nothing */
#endif
-#if (defined(__bsdi__) || /* unconditionally */ \
- (defined(__FreeBSD__) && (__FreeBSD_version < 503000)) || \
- (defined(__NetBSD__) && (__NetBSD_Version__ < 106000000)) || \
- (defined(__OpenBSD__) && ( OpenBSD < 200111)))
-# define IFQ_ENQUEUE(ifq, m, pa, err) \
-do { \
- if (pa==0); /* suppress warning */ \
- if (IF_QFULL(ifq)) \
- { \
- IF_DROP(ifq); \
- m_freem(m); \
- err = ENOBUFS; \
- } \
- else \
- { \
- IF_ENQUEUE(ifq, m); \
- err = 0; \
- } \
- } while (0)
-# define IFQ_DEQUEUE(ifq, m) do { IF_DEQUEUE((ifq), m) } while (0)
-# define IFQ_IS_EMPTY(ifq) ((ifq)->ifq_head == NULL)
-# define IFQ_SET_MAXLEN(ifq, len) (ifq)->ifq_maxlen = len
-# define IFQ_SET_READY(ifq) /* nothing */
-# define IFQ_PURGE(ifq) \
-do { \
- while ((ifq)->ifq_head != NULL) \
- { \
- struct mbuf *m; \
- IF_DEQUEUE(ifq, m); \
- m_freem(m); \
- } \
- } while (0)
-#endif
-
#define HSSI_DESC "SBE/LMC HSSI Card"
#define T3_DESC "SBE/LMC T3 Card"
#define SSI_DESC "SBE/LMC SSI Card"
@@ -1315,10 +1239,6 @@ static void t1_send_bop(softc_t *, int);
static int t1_ioctl(softc_t *, struct ioctl *);
#if IFNET
-# if ((defined(__FreeBSD__) && (__FreeBSD_version < 500000)) ||\
- defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__))
-static void netisr_dispatch(int, struct mbuf *);
-# endif
static void lmc_raw_input(struct ifnet *, struct mbuf *);
#endif /* IFNET */
@@ -1372,25 +1292,12 @@ static void lmc_ifnet_detach(softc_t *);
#endif /* IFNET */
#if NETGRAPH
-# if (__FreeBSD_version >= 500000)
static int ng_constructor(node_p);
-# else /* FreeBSD-4 */
-static int ng_constructor(node_p *);
-# endif
-# if (__FreeBSD_version >= 500000)
static int ng_rcvmsg(node_p, item_p, hook_p);
-# else /* FreeBSD-4 */
-static int ng_rcvmsg(node_p, struct ng_mesg *,
- const char *, struct ng_mesg **);
-# endif
static int ng_shutdown(node_p);
static int ng_newhook(node_p, hook_p, const char *);
static int ng_connect(hook_p);
-# if (__FreeBSD_version >= 500000)
static int ng_rcvdata(hook_p, item_p);
-# else /* FreeBSD-4 */
-static int ng_rcvdata(hook_p, struct mbuf *, meta_p);
-# endif
static int ng_disconnect(hook_p);
# if (IFNET == 0)
static void ng_watchdog(void *);
diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c
index 959b270..5401df3 100644
--- a/sys/dev/netmap/netmap.c
+++ b/sys/dev/netmap/netmap.c
@@ -656,9 +656,8 @@ netmap_update_config(struct netmap_adapter *na)
u_int txr, txd, rxr, rxd;
txr = txd = rxr = rxd = 0;
- if (na->nm_config) {
- na->nm_config(na, &txr, &txd, &rxr, &rxd);
- } else {
+ if (na->nm_config == NULL ||
+ na->nm_config(na, &txr, &txd, &rxr, &rxd)) {
/* take whatever we had at init time */
txr = na->num_tx_rings;
txd = na->num_tx_desc;
@@ -2168,7 +2167,7 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
error = ENXIO;
break;
}
- rmb(); /* make sure following reads are not from cache */
+ mb(); /* make sure following reads are not from cache */
na = priv->np_na; /* we have a reference */
@@ -3071,16 +3070,14 @@ netmap_init(void)
error = netmap_mem_init();
if (error != 0)
goto fail;
- /* XXX could use make_dev_credv() to get error number */
-#ifdef __FreeBSD__
- /* support for the 'eternal' flag */
+ /*
+ * MAKEDEV_ETERNAL_KLD avoids an expensive check on syscalls
+ * when the module is compiled in.
+ * XXX could use make_dev_credv() to get error number
+ */
netmap_dev = make_dev_credf(MAKEDEV_ETERNAL_KLD,
&netmap_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0600,
"netmap");
-#else
- netmap_dev = make_dev(&netmap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
- "netmap");
-#endif
if (!netmap_dev)
goto fail;
diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c
index 263904b..ba5a333 100644
--- a/sys/dev/pci/pci.c
+++ b/sys/dev/pci/pci.c
@@ -605,8 +605,6 @@ pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
- if (devlist_entry == NULL)
- return (NULL);
cfg = &devlist_entry->cfg;
diff --git a/sys/dev/random/dummy_rng.c b/sys/dev/random/dummy_rng.c
index a7ca4b3..e78f5a8 100644
--- a/sys/dev/random/dummy_rng.c
+++ b/sys/dev/random/dummy_rng.c
@@ -82,19 +82,13 @@ dummy_random_init(void)
*
* Caveat Emptor.
*/
-u_int
+void
dummy_random_read_phony(uint8_t *buf, u_int count)
{
/* If no entropy device is loaded, don't spam the console with warnings */
- static int warned = 0;
u_long randval;
size_t size, i;
- if (!warned) {
- log(LOG_WARNING, "random device not loaded/active; using insecure pseudo-random number generator\n");
- warned = 1;
- }
-
/* srandom() is called in kern/init_main.c:proc0_post() */
/* Fill buf[] with random(9) output */
@@ -103,8 +97,6 @@ dummy_random_read_phony(uint8_t *buf, u_int count)
size = MIN(count - i, sizeof(randval));
memcpy(buf + i, &randval, (size_t)size);
}
-
- return (count);
}
struct random_adaptor randomdev_dummy = {
diff --git a/sys/dev/random/random_adaptors.c b/sys/dev/random/random_adaptors.c
index 30f3e3d..5a67f50 100644
--- a/sys/dev/random/random_adaptors.c
+++ b/sys/dev/random/random_adaptors.c
@@ -149,10 +149,14 @@ random_adaptor_choose(void)
(random_adaptor_previous == NULL ? "NULL" : random_adaptor_previous->ra_ident),
random_adaptor->ra_ident);
#endif
- if (random_adaptor_previous != NULL)
+ if (random_adaptor_previous != NULL) {
+ randomdev_deinit_reader();
(random_adaptor_previous->ra_deinit)();
+ }
(random_adaptor->ra_init)();
}
+
+ randomdev_init_reader(random_adaptor->ra_read);
}
diff --git a/sys/dev/random/randomdev.c b/sys/dev/random/randomdev.c
index c61bed7..9d41aed 100644
--- a/sys/dev/random/randomdev.c
+++ b/sys/dev/random/randomdev.c
@@ -214,11 +214,11 @@ random_harvest(const void *entropy, u_int count, u_int bits, enum random_entropy
*/
/* Hold the address of the routine which is actually called */
-static u_int (*read_func)(uint8_t *, u_int) = dummy_random_read_phony;
+static void (*read_func)(uint8_t *, u_int) = dummy_random_read_phony;
/* Initialise the reader when/if it is loaded */
void
-randomdev_init_reader(u_int (*reader)(uint8_t *, u_int))
+randomdev_init_reader(void (*reader)(uint8_t *, u_int))
{
read_func = reader;
@@ -240,5 +240,10 @@ int
read_random(void *buf, int count)
{
- return ((int)(*read_func)(buf, (u_int)count));
+ if (count < 0)
+ return 0;
+
+ read_func(buf, count);
+
+ return count;
}
diff --git a/sys/dev/random/randomdev.h b/sys/dev/random/randomdev.h
index 4daf735..4ca88ff 100644
--- a/sys/dev/random/randomdev.h
+++ b/sys/dev/random/randomdev.h
@@ -37,12 +37,12 @@ typedef void random_init_func_t(void);
typedef void random_deinit_func_t(void);
void randomdev_init_harvester(void (*)(const void *, u_int, u_int, enum random_entropy_source));
-void randomdev_init_reader(u_int (*)(uint8_t *, u_int));
+void randomdev_init_reader(void (*)(uint8_t *, u_int));
void randomdev_deinit_harvester(void);
void randomdev_deinit_reader(void);
/* Stub/fake routines for when no entropy processor is loaded */
-extern u_int dummy_random_read_phony(uint8_t *, u_int);
+extern void dummy_random_read_phony(uint8_t *, u_int);
/* kern.random sysctls */
#ifdef SYSCTL_DECL /* from sysctl.h */
diff --git a/sys/dev/sfxge/common/efx_ev.c b/sys/dev/sfxge/common/efx_ev.c
index 515435b..783a047 100644
--- a/sys/dev/sfxge/common/efx_ev.c
+++ b/sys/dev/sfxge/common/efx_ev.c
@@ -89,7 +89,8 @@ efx_ev_rx_not_ok(
if (EFX_QWORD_FIELD(*eqp, FSF_AZ_RX_EV_TOBE_DISC) != 0) {
EFX_EV_QSTAT_INCR(eep, EV_RX_TOBE_DISC);
EFSYS_PROBE(tobe_disc);
- /* Assume this is a unicast address mismatch, unless below
+ /*
+ * Assume this is a unicast address mismatch, unless below
* we find either FSF_AZ_RX_EV_ETH_CRC_ERR or
* EV_RX_PAUSE_FRM_ERR is set.
*/
@@ -102,7 +103,8 @@ efx_ev_rx_not_ok(
(*flagsp) |= EFX_DISCARD;
#if (EFSYS_OPT_RX_HDR_SPLIT || EFSYS_OPT_RX_SCATTER)
- /* Lookout for payload queue ran dry errors and ignore them.
+ /*
+ * Lookout for payload queue ran dry errors and ignore them.
*
* Sadly for the header/data split cases, the descriptor
* pointer in this event refers to the header queue and
diff --git a/sys/dev/sfxge/common/efx_mac.c b/sys/dev/sfxge/common/efx_mac.c
index 5b868ed..3e9449a 100644
--- a/sys/dev/sfxge/common/efx_mac.c
+++ b/sys/dev/sfxge/common/efx_mac.c
@@ -669,11 +669,11 @@ chosen:
EFSYS_ASSERT(emop != NULL);
epp->ep_mac_type = type;
-
+
if (emop->emo_reset != NULL) {
if ((rc = emop->emo_reset(enp)) != 0)
goto fail1;
-
+
EFSYS_ASSERT(enp->en_reset_flags & EFX_RESET_MAC);
enp->en_reset_flags &= ~EFX_RESET_MAC;
}
diff --git a/sys/dev/sfxge/common/efx_mcdi.c b/sys/dev/sfxge/common/efx_mcdi.c
index 55297c0..5853b06 100644
--- a/sys/dev/sfxge/common/efx_mcdi.c
+++ b/sys/dev/sfxge/common/efx_mcdi.c
@@ -44,7 +44,8 @@ __FBSDID("$FreeBSD$");
#define MCDI_P1_REBOOT_OFST 0x1fe
#define MCDI_P2_REBOOT_OFST 0x1ff
-/* A reboot/assertion causes the MCDI status word to be set after the
+/*
+ * A reboot/assertion causes the MCDI status word to be set after the
* command word is set or a REBOOT event is sent. If we notice a reboot
* via these mechanisms then wait 10ms for the status word to be set.
*/
@@ -459,7 +460,8 @@ efx_mcdi_ev_death(
++emip->emi_aborted;
}
- /* Since we're running in parallel with a request, consume the
+ /*
+ * Since we're running in parallel with a request, consume the
* status word before dropping the lock.
*/
if (rc == EIO || rc == EINTR) {
diff --git a/sys/dev/sfxge/common/efx_nic.c b/sys/dev/sfxge/common/efx_nic.c
index fdbe5a2..2bb55d1 100644
--- a/sys/dev/sfxge/common/efx_nic.c
+++ b/sys/dev/sfxge/common/efx_nic.c
@@ -253,7 +253,8 @@ efx_nic_create(
EFX_FEATURE_LFSR_HASH_INSERT |
EFX_FEATURE_LINK_EVENTS | EFX_FEATURE_PERIODIC_MAC_STATS |
EFX_FEATURE_WOL | EFX_FEATURE_MCDI |
- EFX_FEATURE_LOOKAHEAD_SPLIT | EFX_FEATURE_MAC_HEADER_FILTERS;
+ EFX_FEATURE_LOOKAHEAD_SPLIT |
+ EFX_FEATURE_MAC_HEADER_FILTERS;
break;
#endif /* EFSYS_OPT_SIENA */
diff --git a/sys/dev/sfxge/common/efx_rx.c b/sys/dev/sfxge/common/efx_rx.c
index 5b52d86..c6e0d70 100644
--- a/sys/dev/sfxge/common/efx_rx.c
+++ b/sys/dev/sfxge/common/efx_rx.c
@@ -527,7 +527,7 @@ efx_rx_filter_insert(
EFSYS_ASSERT3P(spec, !=, NULL);
spec->efs_dmaq_id = (uint16_t)erp->er_index;
- return efx_filter_insert_filter(erp->er_enp, spec, B_FALSE);
+ return (efx_filter_insert_filter(erp->er_enp, spec, B_FALSE));
}
#endif
@@ -541,7 +541,7 @@ efx_rx_filter_remove(
EFSYS_ASSERT3P(spec, !=, NULL);
spec->efs_dmaq_id = (uint16_t)erp->er_index;
- return efx_filter_remove_filter(erp->er_enp, spec);
+ return (efx_filter_remove_filter(erp->er_enp, spec));
}
#endif
@@ -673,7 +673,8 @@ efx_rx_qcreate(
EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC);
EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_RX);
- EFX_STATIC_ASSERT(EFX_EV_RX_NLABELS == (1 << FRF_AZ_RX_DESCQ_LABEL_WIDTH));
+ EFX_STATIC_ASSERT(EFX_EV_RX_NLABELS ==
+ (1 << FRF_AZ_RX_DESCQ_LABEL_WIDTH));
EFSYS_ASSERT3U(label, <, EFX_EV_RX_NLABELS);
EFSYS_ASSERT3U(enp->en_rx_qcount + 1, <, encp->enc_rxq_limit);
diff --git a/sys/dev/sfxge/common/efx_tx.c b/sys/dev/sfxge/common/efx_tx.c
index a7ec361..dcc225d 100644
--- a/sys/dev/sfxge/common/efx_tx.c
+++ b/sys/dev/sfxge/common/efx_tx.c
@@ -114,7 +114,7 @@ efx_tx_filter_insert(
EFSYS_ASSERT3P(spec, !=, NULL);
spec->efs_dmaq_id = (uint16_t)etp->et_index;
- return efx_filter_insert_filter(etp->et_enp, spec, B_FALSE);
+ return (efx_filter_insert_filter(etp->et_enp, spec, B_FALSE));
}
#endif
@@ -128,7 +128,7 @@ efx_tx_filter_remove(
EFSYS_ASSERT3P(spec, !=, NULL);
spec->efs_dmaq_id = (uint16_t)etp->et_index;
- return efx_filter_remove_filter(etp->et_enp, spec);
+ return (efx_filter_remove_filter(etp->et_enp, spec));
}
#endif
diff --git a/sys/dev/sfxge/common/siena_mon.c b/sys/dev/sfxge/common/siena_mon.c
index de7b793..dc3e59e 100644
--- a/sys/dev/sfxge/common/siena_mon.c
+++ b/sys/dev/sfxge/common/siena_mon.c
@@ -138,13 +138,13 @@ siena_mon_decode_stats(
efx_dword_t dword;
EFSYS_MEM_READD(esmp, 4 * mc_sensor, &dword);
emsvp->emsv_value =
- (uint16_t)EFX_DWORD_FIELD(
- dword,
- MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_VALUE);
+ (uint16_t)EFX_DWORD_FIELD(
+ dword,
+ MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_VALUE);
emsvp->emsv_state =
- (uint16_t)EFX_DWORD_FIELD(
- dword,
- MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE);
+ (uint16_t)EFX_DWORD_FIELD(
+ dword,
+ MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE);
}
}
diff --git a/sys/dev/sfxge/common/siena_nic.c b/sys/dev/sfxge/common/siena_nic.c
index 857b78a..b7d42de 100644
--- a/sys/dev/sfxge/common/siena_nic.c
+++ b/sys/dev/sfxge/common/siena_nic.c
@@ -324,7 +324,8 @@ siena_board_cfg(
efx_mcdi_execute(enp, &req);
if (req.emr_rc == 0) {
- if (req.emr_out_length_used < MC_CMD_GET_RESOURCE_LIMITS_OUT_LEN) {
+ if (req.emr_out_length_used <
+ MC_CMD_GET_RESOURCE_LIMITS_OUT_LEN) {
rc = EMSGSIZE;
goto fail3;
}
diff --git a/sys/dev/sfxge/common/siena_vpd.c b/sys/dev/sfxge/common/siena_vpd.c
index cd643c8..3f1008b 100644
--- a/sys/dev/sfxge/common/siena_vpd.c
+++ b/sys/dev/sfxge/common/siena_vpd.c
@@ -541,11 +541,9 @@ siena_vpd_write(
/* Copy in new vpd and update header */
vpd_offset = dcfg_size - vpd_length;
- EFX_POPULATE_DWORD_1(dcfg->dynamic_vpd_offset,
- EFX_DWORD_0, vpd_offset);
+ EFX_POPULATE_DWORD_1(dcfg->dynamic_vpd_offset, EFX_DWORD_0, vpd_offset);
memcpy((caddr_t)dcfg + vpd_offset, data, vpd_length);
- EFX_POPULATE_DWORD_1(dcfg->dynamic_vpd_length,
- EFX_DWORD_0, vpd_length);
+ EFX_POPULATE_DWORD_1(dcfg->dynamic_vpd_length, EFX_DWORD_0, vpd_length);
/* Update the checksum */
cksum = 0;
diff --git a/sys/dev/sfxge/sfxge_port.c b/sys/dev/sfxge/sfxge_port.c
index 6e21130..4953c92 100644
--- a/sys/dev/sfxge/sfxge_port.c
+++ b/sys/dev/sfxge/sfxge_port.c
@@ -91,16 +91,15 @@ sfxge_mac_stat_handler(SYSCTL_HANDLER_ARGS)
struct sfxge_softc *sc = arg1;
unsigned int id = arg2;
int rc;
+ uint64_t val;
SFXGE_PORT_LOCK(&sc->port);
- if ((rc = sfxge_mac_stat_update(sc)) != 0)
- goto out;
-
- rc = SYSCTL_OUT(req,
- (uint64_t *)sc->port.mac_stats.decode_buf + id,
- sizeof(uint64_t));
-out:
+ if ((rc = sfxge_mac_stat_update(sc)) == 0)
+ val = ((uint64_t *)sc->port.mac_stats.decode_buf)[id];
SFXGE_PORT_UNLOCK(&sc->port);
+
+ if (rc == 0)
+ rc = SYSCTL_OUT(req, &val, sizeof(val));
return (rc);
}
@@ -173,28 +172,29 @@ sfxge_port_wanted_fc_handler(SYSCTL_HANDLER_ARGS)
sc = arg1;
port = &sc->port;
- SFXGE_PORT_LOCK(port);
-
if (req->newptr != NULL) {
if ((error = SYSCTL_IN(req, &fcntl, sizeof(fcntl))) != 0)
- goto out;
-
- if (port->wanted_fc == fcntl)
- goto out;
+ return (error);
- port->wanted_fc = fcntl;
+ SFXGE_PORT_LOCK(port);
- if (port->init_state != SFXGE_PORT_STARTED)
- goto out;
+ if (port->wanted_fc != fcntl) {
+ if (port->init_state == SFXGE_PORT_STARTED)
+ error = efx_mac_fcntl_set(sc->enp,
+ port->wanted_fc,
+ B_TRUE);
+ if (error == 0)
+ port->wanted_fc = fcntl;
+ }
- error = efx_mac_fcntl_set(sc->enp, port->wanted_fc, B_TRUE);
+ SFXGE_PORT_UNLOCK(port);
} else {
- error = SYSCTL_OUT(req, &port->wanted_fc,
- sizeof(port->wanted_fc));
- }
+ SFXGE_PORT_LOCK(port);
+ fcntl = port->wanted_fc;
+ SFXGE_PORT_UNLOCK(port);
-out:
- SFXGE_PORT_UNLOCK(port);
+ error = SYSCTL_OUT(req, &fcntl, sizeof(fcntl));
+ }
return (error);
}
@@ -205,7 +205,6 @@ sfxge_port_link_fc_handler(SYSCTL_HANDLER_ARGS)
struct sfxge_softc *sc;
struct sfxge_port *port;
unsigned int wanted_fc, link_fc;
- int error;
sc = arg1;
port = &sc->port;
@@ -215,10 +214,9 @@ sfxge_port_link_fc_handler(SYSCTL_HANDLER_ARGS)
efx_mac_fcntl_get(sc->enp, &wanted_fc, &link_fc);
else
link_fc = 0;
- error = SYSCTL_OUT(req, &link_fc, sizeof(link_fc));
SFXGE_PORT_UNLOCK(port);
- return (error);
+ return (SYSCTL_OUT(req, &link_fc, sizeof(link_fc)));
}
#endif /* SFXGE_HAVE_PAUSE_MEDIAOPTS */
@@ -499,16 +497,15 @@ sfxge_phy_stat_handler(SYSCTL_HANDLER_ARGS)
struct sfxge_softc *sc = arg1;
unsigned int id = arg2;
int rc;
+ uint32_t val;
SFXGE_PORT_LOCK(&sc->port);
- if ((rc = sfxge_phy_stat_update(sc)) != 0)
- goto out;
-
- rc = SYSCTL_OUT(req,
- (uint32_t *)sc->port.phy_stats.decode_buf + id,
- sizeof(uint32_t));
-out:
+ if ((rc = sfxge_phy_stat_update(sc)) == 0)
+ val = ((uint32_t *)sc->port.phy_stats.decode_buf)[id];
SFXGE_PORT_UNLOCK(&sc->port);
+
+ if (rc == 0)
+ rc = SYSCTL_OUT(req, &val, sizeof(val));
return (rc);
}
diff --git a/sys/dev/sfxge/sfxge_rx.c b/sys/dev/sfxge/sfxge_rx.c
index 0a4b803..23101a5 100644
--- a/sys/dev/sfxge/sfxge_rx.c
+++ b/sys/dev/sfxge/sfxge_rx.c
@@ -92,8 +92,8 @@ static int lro_loss_packets = 20;
#define SFXGE_LRO_CONN_IS_TCPIPV4(c) (!((c)->l2_id & SFXGE_LRO_L2_ID_IPV6))
/* Compare IPv6 addresses, avoiding conditional branches */
-static __inline unsigned long ipv6_addr_cmp(const struct in6_addr *left,
- const struct in6_addr *right)
+static unsigned long ipv6_addr_cmp(const struct in6_addr *left,
+ const struct in6_addr *right)
{
#if LONG_BIT == 64
const uint64_t *left64 = (const uint64_t *)left;
@@ -167,7 +167,7 @@ sfxge_rx_schedule_refill(struct sfxge_rxq *rxq, boolean_t retrying)
sfxge_rx_post_refill, rxq);
}
-static inline struct mbuf *sfxge_rx_alloc_mbuf(struct sfxge_softc *sc)
+static struct mbuf *sfxge_rx_alloc_mbuf(struct sfxge_softc *sc)
{
struct mb_args args;
struct mbuf *m;
diff --git a/sys/dev/sfxge/sfxge_tx.c b/sys/dev/sfxge/sfxge_tx.c
index 394a751..d726dac 100644
--- a/sys/dev/sfxge/sfxge_tx.c
+++ b/sys/dev/sfxge/sfxge_tx.c
@@ -107,7 +107,7 @@ SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_put_max, CTLFLAG_RDTUN,
/* Forward declarations. */
-static inline void sfxge_tx_qdpl_service(struct sfxge_txq *txq);
+static void sfxge_tx_qdpl_service(struct sfxge_txq *txq);
static void sfxge_tx_qlist_post(struct sfxge_txq *txq);
static void sfxge_tx_qunblock(struct sfxge_txq *txq);
static int sfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf,
@@ -156,7 +156,7 @@ sfxge_tx_qcomplete(struct sfxge_txq *txq, struct sfxge_evq *evq)
#ifdef SFXGE_HAVE_MQ
-static inline unsigned int
+static unsigned int
sfxge_is_mbuf_non_tcp(struct mbuf *mbuf)
{
/* Absense of TCP checksum flags does not mean that it is non-TCP
@@ -481,7 +481,7 @@ sfxge_tx_qdpl_drain(struct sfxge_txq *txq)
*
* NOTE: drops the txq mutex!
*/
-static inline void
+static void
sfxge_tx_qdpl_service(struct sfxge_txq *txq)
{
SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
@@ -509,7 +509,7 @@ sfxge_tx_qdpl_service(struct sfxge_txq *txq)
* overload the csum_data field in the mbuf to keep track of this length
* because there is no cheap alternative to avoid races.
*/
-static inline int
+static int
sfxge_tx_qdpl_put(struct sfxge_txq *txq, struct mbuf *mbuf, int locked)
{
struct sfxge_tx_dpl *stdp;
@@ -649,7 +649,7 @@ sfxge_if_qflush(struct ifnet *ifp)
sc = ifp->if_softc;
- for (i = 0; i < SFXGE_TX_SCALE(sc); i++)
+ for (i = 0; i < SFXGE_TXQ_IP_TCP_UDP_CKSUM + SFXGE_TX_SCALE(sc); i++)
sfxge_tx_qdpl_flush(sc->txq[i]);
}
@@ -758,7 +758,7 @@ void sfxge_if_start(struct ifnet *ifp)
SFXGE_TXQ_UNLOCK(sc->txq[0]);
}
-static inline void
+static void
sfxge_tx_qdpl_service(struct sfxge_txq *txq)
{
struct ifnet *ifp = txq->sc->ifnet;
@@ -783,7 +783,6 @@ struct sfxge_tso_state {
unsigned packet_space; /* Remaining space in current packet */
/* Input position */
- unsigned dma_seg_i; /* Current DMA segment number */
uint64_t dma_addr; /* DMA address of current position */
unsigned in_len; /* Remaining length in current mbuf */
@@ -792,23 +791,21 @@ struct sfxge_tso_state {
ssize_t nh_off; /* Offset of network header */
ssize_t tcph_off; /* Offset of TCP header */
unsigned header_len; /* Number of bytes of header */
- int full_packet_size; /* Number of bytes to put in each outgoing
- * segment */
};
-static inline const struct ip *tso_iph(const struct sfxge_tso_state *tso)
+static const struct ip *tso_iph(const struct sfxge_tso_state *tso)
{
KASSERT(tso->protocol == htons(ETHERTYPE_IP),
("tso_iph() in non-IPv4 state"));
return (const struct ip *)(tso->mbuf->m_data + tso->nh_off);
}
-static inline const struct ip6_hdr *tso_ip6h(const struct sfxge_tso_state *tso)
+static __unused const struct ip6_hdr *tso_ip6h(const struct sfxge_tso_state *tso)
{
KASSERT(tso->protocol == htons(ETHERTYPE_IPV6),
("tso_ip6h() in non-IPv6 state"));
return (const struct ip6_hdr *)(tso->mbuf->m_data + tso->nh_off);
}
-static inline const struct tcphdr *tso_tcph(const struct sfxge_tso_state *tso)
+static const struct tcphdr *tso_tcph(const struct sfxge_tso_state *tso)
{
return (const struct tcphdr *)(tso->mbuf->m_data + tso->tcph_off);
}
@@ -895,7 +892,6 @@ static void tso_start(struct sfxge_tso_state *tso, struct mbuf *mbuf)
}
tso->header_len = tso->tcph_off + 4 * tso_tcph(tso)->th_off;
- tso->full_packet_size = tso->header_len + mbuf->m_pkthdr.tso_segsz;
tso->seqnum = ntohl(tso_tcph(tso)->th_seq);
@@ -1015,7 +1011,8 @@ static int tso_start_new_packet(struct sfxge_txq *txq,
tso->seqnum += tso->mbuf->m_pkthdr.tso_segsz;
if (tso->out_len > tso->mbuf->m_pkthdr.tso_segsz) {
/* This packet will not finish the TSO burst. */
- ip_length = tso->full_packet_size - tso->nh_off;
+ ip_length = tso->header_len - tso->nh_off +
+ tso->mbuf->m_pkthdr.tso_segsz;
tsoh_th->th_flags &= ~(TH_FIN | TH_PUSH);
} else {
/* This packet will be the last in the TSO burst. */
@@ -1280,7 +1277,6 @@ fail:
void
sfxge_tx_stop(struct sfxge_softc *sc)
{
- const efx_nic_cfg_t *encp;
int index;
index = SFXGE_TX_SCALE(sc);
@@ -1289,7 +1285,6 @@ sfxge_tx_stop(struct sfxge_softc *sc)
sfxge_tx_qstop(sc, SFXGE_TXQ_IP_CKSUM);
- encp = efx_nic_cfg_get(sc->enp);
sfxge_tx_qstop(sc, SFXGE_TXQ_NON_CKSUM);
/* Tear down the transmit module */
diff --git a/sys/dev/usb/controller/musb_otg.c b/sys/dev/usb/controller/musb_otg.c
index a751412..eba8c65 100644
--- a/sys/dev/usb/controller/musb_otg.c
+++ b/sys/dev/usb/controller/musb_otg.c
@@ -2258,7 +2258,8 @@ repeat:
if (usb_status & (MUSB2_MASK_IRESET |
MUSB2_MASK_IRESUME | MUSB2_MASK_ISUSP |
- MUSB2_MASK_ICONN | MUSB2_MASK_IDISC)) {
+ MUSB2_MASK_ICONN | MUSB2_MASK_IDISC |
+ MUSB2_MASK_IVBUSERR)) {
DPRINTFN(4, "real bus interrupt 0x%08x\n", usb_status);
@@ -2330,6 +2331,12 @@ repeat:
* always in reset state once device is connected.
*/
if (sc->sc_mode == MUSB2_HOST_MODE) {
+ /* check for VBUS error in USB host mode */
+ if (usb_status & MUSB2_MASK_IVBUSERR) {
+ temp = MUSB2_READ_1(sc, MUSB2_REG_DEVCTL);
+ temp |= MUSB2_MASK_SESS;
+ MUSB2_WRITE_1(sc, MUSB2_REG_DEVCTL, temp);
+ }
if (usb_status & MUSB2_MASK_ICONN)
sc->sc_flags.status_bus_reset = 1;
if (usb_status & MUSB2_MASK_IDISC)
diff --git a/sys/dev/usb/controller/uhci.c b/sys/dev/usb/controller/uhci.c
index 0077615..5204d63 100644
--- a/sys/dev/usb/controller/uhci.c
+++ b/sys/dev/usb/controller/uhci.c
@@ -1476,7 +1476,8 @@ uhci_interrupt(uhci_softc_t *sc)
UHCI_STS_USBEI |
UHCI_STS_RD |
UHCI_STS_HSE |
- UHCI_STS_HCPE);
+ UHCI_STS_HCPE |
+ UHCI_STS_HCH);
if (status == 0) {
/* nothing to acknowledge */
diff --git a/sys/dev/usb/usbdevs b/sys/dev/usb/usbdevs
index a40c913..d1c7374 100644
--- a/sys/dev/usb/usbdevs
+++ b/sys/dev/usb/usbdevs
@@ -686,6 +686,7 @@ vendor ASUS2 0x1761 ASUS
vendor SWEEX2 0x177f Sweex
vendor METAGEEK 0x1781 MetaGeek
vendor KAMSTRUP 0x17a8 Kamstrup A/S
+vendor DISPLAYLINK 0x17e9 DisplayLink
vendor LENOVO 0x17ef Lenovo
vendor WAVESENSE 0x17f4 WaveSense
vendor VAISALA 0x1843 Vaisala
@@ -1655,6 +1656,28 @@ product DLINK2 RT3070_4 0x3c15 RT3070
product DLINK2 RT3070_5 0x3c16 RT3070
product DLINK3 DWM652 0x3e04 DWM-652
+/* DisplayLink products */
+product DISPLAYLINK LCD4300U 0x01ba LCD-4300U
+product DISPLAYLINK LCD8000U 0x01bb LCD-8000U
+product DISPLAYLINK LD220 0x0100 Samsung LD220
+product DISPLAYLINK GUC2020 0x0059 IOGEAR DVI GUC2020
+product DISPLAYLINK VCUD60 0x0136 Rextron DVI
+product DISPLAYLINK CONV 0x0138 StarTech CONV-USB2DVI
+product DISPLAYLINK DLDVI 0x0141 DisplayLink DVI
+product DISPLAYLINK VGA10 0x015a CMP-USBVGA10
+product DISPLAYLINK WSDVI 0x0198 WS Tech DVI
+product DISPLAYLINK EC008 0x019b EasyCAP008 DVI
+product DISPLAYLINK HPDOCK 0x01d4 HP USB Docking
+product DISPLAYLINK NL571 0x01d7 HP USB DVI
+product DISPLAYLINK M01061 0x01e2 Lenovo DVI
+product DISPLAYLINK SWDVI 0x024c SUNWEIT DVI
+product DISPLAYLINK NBDOCK 0x0215 VideoHome NBdock1920
+product DISPLAYLINK LUM70 0x02a9 Lilliput UM-70
+product DISPLAYLINK UM7X0 0x401a nanovision MiMo
+product DISPLAYLINK LT1421 0x03e0 Lenovo ThinkVision LT1421
+product DISPLAYLINK POLARIS2 0x0117 Polaris2 USB dock
+product DISPLAYLINK PLUGABLE 0x0377 Plugable docking station
+
/* DMI products */
product DMI CFSM_RW 0xa109 CF/SM Reader/Writer
product DMI DISK 0x2bcf Generic Disk
diff --git a/sys/dev/usb/video/udl.c b/sys/dev/usb/video/udl.c
new file mode 100644
index 0000000..71d6fff
--- /dev/null
+++ b/sys/dev/usb/video/udl.c
@@ -0,0 +1,1075 @@
+/* $OpenBSD: udl.c,v 1.81 2014/12/09 07:05:06 doug Exp $ */
+/* $FreeBSD$ */
+
+/*-
+ * Copyright (c) 2015 Hans Petter Selasky <hselasky@freebsd.org>
+ * Copyright (c) 2009 Marcus Glocker <mglocker@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Driver for the "DisplayLink DL-120 / DL-160" graphic chips based on
+ * the reversed engineered specifications of Florian Echtler
+ * <floe@butterbrot.org>:
+ *
+ * http://floe.butterbrot.org/displaylink/doku.php
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/callout.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/condvar.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <sys/consio.h>
+#include <sys/fbio.h>
+
+#include <dev/fb/fbreg.h>
+#include <dev/syscons/syscons.h>
+
+#include <dev/videomode/videomode.h>
+#include <dev/videomode/edidvar.h>
+
+#include <dev/usb/usb.h>
+#include <dev/usb/usbdi.h>
+#include <dev/usb/usbdi_util.h>
+#include "usbdevs.h"
+
+#include <dev/usb/video/udl.h>
+
+#include "fb_if.h"
+
+#undef DPRINTF
+#undef DPRINTFN
+#define USB_DEBUG_VAR udl_debug
+#include <dev/usb/usb_debug.h>
+
+#ifdef USB_DEBUG
+static int udl_debug = 0;
+
+static SYSCTL_NODE(_hw_usb, OID_AUTO, udl, CTLFLAG_RW, 0, "USB UDL");
+
+SYSCTL_INT(_hw_usb_udl, OID_AUTO, debug, CTLFLAG_RWTUN,
+ &udl_debug, 0, "Debug level");
+#endif
+
+/*
+ * Prototypes.
+ */
+static usb_callback_t udl_bulk_write_callback;
+
+static device_probe_t udl_probe;
+static device_attach_t udl_attach;
+static device_detach_t udl_detach;
+static fb_getinfo_t udl_fb_getinfo;
+static fb_setblankmode_t udl_fb_setblankmode;
+
+static void udl_select_chip(struct udl_softc *, struct usb_attach_arg *);
+static int udl_init_chip(struct udl_softc *);
+static void udl_select_mode(struct udl_softc *);
+static int udl_init_resolution(struct udl_softc *);
+static void udl_fbmem_alloc(struct udl_softc *);
+static int udl_cmd_write_buf_le16(struct udl_softc *, const uint8_t *, uint32_t, uint8_t, int);
+static int udl_cmd_buf_copy_le16(struct udl_softc *, uint32_t, uint32_t, uint8_t, int);
+static void udl_cmd_insert_int_1(struct udl_cmd_buf *, uint8_t);
+static void udl_cmd_insert_int_3(struct udl_cmd_buf *, uint32_t);
+static void udl_cmd_insert_buf_le16(struct udl_cmd_buf *, const uint8_t *, uint32_t);
+static void udl_cmd_write_reg_1(struct udl_cmd_buf *, uint8_t, uint8_t);
+static void udl_cmd_write_reg_3(struct udl_cmd_buf *, uint8_t, uint32_t);
+static int udl_power_save(struct udl_softc *, int, int);
+
+static const struct usb_config udl_config[UDL_N_TRANSFER] = {
+ [UDL_BULK_WRITE_0] = {
+ .type = UE_BULK,
+ .endpoint = UE_ADDR_ANY,
+ .direction = UE_DIR_TX,
+ .flags = {.pipe_bof = 1,.force_short_xfer = 1,.ext_buffer = 1,},
+ .bufsize = UDL_CMD_MAX_DATA_SIZE * UDL_CMD_MAX_FRAMES,
+ .callback = &udl_bulk_write_callback,
+ .frames = UDL_CMD_MAX_FRAMES,
+ .timeout = 5000, /* 5 seconds */
+ },
+ [UDL_BULK_WRITE_1] = {
+ .type = UE_BULK,
+ .endpoint = UE_ADDR_ANY,
+ .direction = UE_DIR_TX,
+ .flags = {.pipe_bof = 1,.force_short_xfer = 1,.ext_buffer = 1,},
+ .bufsize = UDL_CMD_MAX_DATA_SIZE * UDL_CMD_MAX_FRAMES,
+ .callback = &udl_bulk_write_callback,
+ .frames = UDL_CMD_MAX_FRAMES,
+ .timeout = 5000, /* 5 seconds */
+ },
+};
+
+/*
+ * Driver glue.
+ */
+static devclass_t udl_devclass;
+
+static device_method_t udl_methods[] = {
+ DEVMETHOD(device_probe, udl_probe),
+ DEVMETHOD(device_attach, udl_attach),
+ DEVMETHOD(device_detach, udl_detach),
+ DEVMETHOD(fb_getinfo, udl_fb_getinfo),
+ DEVMETHOD_END
+};
+
+static driver_t udl_driver = {
+ .name = "udl",
+ .methods = udl_methods,
+ .size = sizeof(struct udl_softc),
+};
+
+DRIVER_MODULE(udl, uhub, udl_driver, udl_devclass, NULL, NULL);
+MODULE_DEPEND(udl, usb, 1, 1, 1);
+MODULE_DEPEND(udl, fbd, 1, 1, 1);
+MODULE_DEPEND(udl, videomode, 1, 1, 1);
+MODULE_VERSION(udl, 1);
+
+/*
+ * Matching devices.
+ */
+static const STRUCT_USB_HOST_ID udl_devs[] = {
+ {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LCD4300U, DL120)},
+ {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LCD8000U, DL120)},
+ {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_GUC2020, DL160)},
+ {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LD220, DL165)},
+ {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_VCUD60, DL160)},
+ {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_DLDVI, DL160)},
+ {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_VGA10, DL120)},
+ {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_WSDVI, DLUNK)},
+ {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_EC008, DL160)},
+ {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_HPDOCK, DL160)},
+ {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_NL571, DL160)},
+ {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_M01061, DL195)},
+ {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_NBDOCK, DL165)},
+ {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_SWDVI, DLUNK)},
+ {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_UM7X0, DL120)},
+ {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_CONV, DL160)},
+ {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_PLUGABLE, DL160)},
+ {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LUM70, DL125)},
+ {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_POLARIS2, DLUNK)},
+ {USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LT1421, DLUNK)}
+};
+
+static uint32_t
+udl_get_fb_size(struct udl_softc *sc)
+{
+ unsigned i = sc->sc_cur_mode;
+
+ return ((uint32_t)udl_modes[i].hdisplay *
+ (uint32_t)udl_modes[i].vdisplay * 2);
+}
+
+static uint32_t
+udl_get_fb_width(struct udl_softc *sc)
+{
+ unsigned i = sc->sc_cur_mode;
+
+ return (udl_modes[i].hdisplay);
+}
+
+static uint32_t
+udl_get_fb_height(struct udl_softc *sc)
+{
+ unsigned i = sc->sc_cur_mode;
+
+ return (udl_modes[i].vdisplay);
+}
+
+static uint32_t
+udl_get_fb_hz(struct udl_softc *sc)
+{
+ unsigned i = sc->sc_cur_mode;
+
+ return (udl_modes[i].hz);
+}
+
+static void
+udl_callout(void *arg)
+{
+ struct udl_softc *sc = arg;
+ const uint32_t max = udl_get_fb_size(sc);
+
+ if (sc->sc_power_save == 0) {
+ if (sc->sc_sync_off >= max)
+ sc->sc_sync_off = 0;
+ usbd_transfer_start(sc->sc_xfer[UDL_BULK_WRITE_0]);
+ usbd_transfer_start(sc->sc_xfer[UDL_BULK_WRITE_1]);
+ }
+ callout_reset(&sc->sc_callout, hz / 5, &udl_callout, sc);
+}
+
+static int
+udl_probe(device_t dev)
+{
+ struct usb_attach_arg *uaa = device_get_ivars(dev);
+
+ if (uaa->usb_mode != USB_MODE_HOST)
+ return (ENXIO);
+ if (uaa->info.bConfigIndex != 0)
+ return (ENXIO);
+ if (uaa->info.bIfaceIndex != 0)
+ return (ENXIO);
+
+ return (usbd_lookup_id_by_uaa(udl_devs, sizeof(udl_devs), uaa));
+}
+
+static int
+udl_attach(device_t dev)
+{
+ struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
+ struct sysctl_oid *tree = device_get_sysctl_tree(dev);
+ struct udl_softc *sc = device_get_softc(dev);
+ struct usb_attach_arg *uaa = device_get_ivars(dev);
+ int error;
+ int i;
+
+ device_set_usb_desc(dev);
+
+ mtx_init(&sc->sc_mtx, "UDL lock", NULL, MTX_DEF);
+ cv_init(&sc->sc_cv, "UDLCV");
+ callout_init_mtx(&sc->sc_callout, &sc->sc_mtx, 0);
+ sc->sc_udev = uaa->device;
+
+ error = usbd_transfer_setup(uaa->device, &uaa->info.bIfaceIndex,
+ sc->sc_xfer, udl_config, UDL_N_TRANSFER, sc, &sc->sc_mtx);
+
+ if (error) {
+ DPRINTF("usbd_transfer_setup error=%s\n", usbd_errstr(error));
+ goto detach;
+ }
+ usbd_xfer_set_priv(sc->sc_xfer[UDL_BULK_WRITE_0], &sc->sc_xfer_head[0]);
+ usbd_xfer_set_priv(sc->sc_xfer[UDL_BULK_WRITE_1], &sc->sc_xfer_head[1]);
+
+ TAILQ_INIT(&sc->sc_xfer_head[0]);
+ TAILQ_INIT(&sc->sc_xfer_head[1]);
+ TAILQ_INIT(&sc->sc_cmd_buf_free);
+ TAILQ_INIT(&sc->sc_cmd_buf_pending);
+
+ sc->sc_def_chip = -1;
+ sc->sc_chip = USB_GET_DRIVER_INFO(uaa);
+ sc->sc_def_mode = -1;
+ sc->sc_cur_mode = UDL_MAX_MODES;
+
+ /* Allow chip ID to be overwritten */
+ SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "chipid_force",
+ CTLFLAG_RWTUN, &sc->sc_def_chip, 0, "chip ID");
+
+ /* Export current chip ID */
+ SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "chipid",
+ CTLFLAG_RD, &sc->sc_chip, 0, "chip ID");
+
+ if (sc->sc_def_chip > -1 && sc->sc_def_chip <= DLMAX) {
+ device_printf(dev, "Forcing chip ID to 0x%04x\n", sc->sc_def_chip);
+ sc->sc_chip = sc->sc_def_chip;
+ }
+ /*
+ * The product might have more than one chip
+ */
+ if (sc->sc_chip == DLUNK)
+ udl_select_chip(sc, uaa);
+
+ for (i = 0; i != UDL_CMD_MAX_BUFFERS; i++) {
+ struct udl_cmd_buf *cb = &sc->sc_cmd_buf_temp[i];
+
+ TAILQ_INSERT_TAIL(&sc->sc_cmd_buf_free, cb, entry);
+ }
+
+ /*
+ * Initialize chip.
+ */
+ error = udl_init_chip(sc);
+ if (error != USB_ERR_NORMAL_COMPLETION)
+ goto detach;
+
+ /*
+ * Select edid mode.
+ */
+ udl_select_mode(sc);
+
+ /* Allow default mode to be overwritten */
+ SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "mode_force",
+ CTLFLAG_RWTUN, &sc->sc_def_mode, 0, "mode");
+
+ /* Export current mode */
+ SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "mode",
+ CTLFLAG_RD, &sc->sc_cur_mode, 0, "mode");
+
+ i = sc->sc_def_mode;
+ if (i > -1 && i < UDL_MAX_MODES) {
+ if (udl_modes[i].chip <= sc->sc_chip) {
+ device_printf(dev, "Forcing mode to %d\n", i);
+ sc->sc_cur_mode = i;
+ }
+ }
+ /* Printout current mode */
+ device_printf(dev, "Mode selected %dx%d @ %dHz\n",
+ (int)udl_get_fb_width(sc),
+ (int)udl_get_fb_height(sc),
+ (int)udl_get_fb_hz(sc));
+
+ udl_init_resolution(sc);
+
+ /* Allocate frame buffer */
+ udl_fbmem_alloc(sc);
+
+ UDL_LOCK(sc);
+ udl_callout(sc);
+ UDL_UNLOCK(sc);
+
+ sc->sc_fb_info.fb_name = device_get_nameunit(dev);
+ sc->sc_fb_info.fb_size = sc->sc_fb_size;
+ sc->sc_fb_info.fb_bpp = 16;
+ sc->sc_fb_info.fb_depth = 16;
+ sc->sc_fb_info.fb_width = udl_get_fb_width(sc);
+ sc->sc_fb_info.fb_height = udl_get_fb_height(sc);
+ sc->sc_fb_info.fb_stride = sc->sc_fb_info.fb_width * 2;
+ sc->sc_fb_info.fb_pbase = 0;
+ sc->sc_fb_info.fb_vbase = (uintptr_t)sc->sc_fb_addr;
+ sc->sc_fb_info.fb_priv = sc;
+ sc->sc_fb_info.setblankmode = &udl_fb_setblankmode;
+
+ sc->sc_fbdev = device_add_child(dev, "fbd", -1);
+ if (sc->sc_fbdev == NULL)
+ goto detach;
+ if (device_probe_and_attach(sc->sc_fbdev) != 0)
+ goto detach;
+
+ return (0);
+
+detach:
+ udl_detach(dev);
+
+ return (ENXIO);
+}
+
+static int
+udl_detach(device_t dev)
+{
+ struct udl_softc *sc = device_get_softc(dev);
+
+ if (sc->sc_fbdev != NULL) {
+ device_t bdev;
+
+ bdev = sc->sc_fbdev;
+ sc->sc_fbdev = NULL;
+ device_detach(bdev);
+ device_delete_child(dev, bdev);
+ }
+ UDL_LOCK(sc);
+ sc->sc_gone = 1;
+ callout_stop(&sc->sc_callout);
+ UDL_UNLOCK(sc);
+
+ usbd_transfer_unsetup(sc->sc_xfer, UDL_N_TRANSFER);
+
+ callout_drain(&sc->sc_callout);
+
+ mtx_destroy(&sc->sc_mtx);
+ cv_destroy(&sc->sc_cv);
+
+ /*
+ * Free framebuffer memory, if any.
+ */
+ free(sc->sc_fb_addr, M_DEVBUF);
+ free(sc->sc_fb_copy, M_DEVBUF);
+
+ return (0);
+}
+
+static struct fb_info *
+udl_fb_getinfo(device_t dev)
+{
+ struct udl_softc *sc = device_get_softc(dev);
+
+ return (&sc->sc_fb_info);
+}
+
+static int
+udl_fb_setblankmode(void *arg, int mode)
+{
+ struct udl_softc *sc = arg;
+
+ switch (mode) {
+ case V_DISPLAY_ON:
+ udl_power_save(sc, 1, M_WAITOK);
+ break;
+ case V_DISPLAY_BLANK:
+ udl_power_save(sc, 1, M_WAITOK);
+ if (sc->sc_fb_addr != 0) {
+ const uint32_t max = udl_get_fb_size(sc);
+
+ memset((void *)sc->sc_fb_addr, 0, max);
+ }
+ break;
+ case V_DISPLAY_STAND_BY:
+ case V_DISPLAY_SUSPEND:
+ udl_power_save(sc, 0, M_WAITOK);
+ break;
+ }
+ return (0);
+}
+
+static struct udl_cmd_buf *
+udl_cmd_buf_alloc_locked(struct udl_softc *sc, int flags)
+{
+ struct udl_cmd_buf *cb;
+
+ while ((cb = TAILQ_FIRST(&sc->sc_cmd_buf_free)) == NULL) {
+ if (flags != M_WAITOK)
+ break;
+ cv_wait(&sc->sc_cv, &sc->sc_mtx);
+ }
+ if (cb != NULL) {
+ TAILQ_REMOVE(&sc->sc_cmd_buf_free, cb, entry);
+ cb->off = 0;
+ }
+ return (cb);
+}
+
+static struct udl_cmd_buf *
+udl_cmd_buf_alloc(struct udl_softc *sc, int flags)
+{
+ struct udl_cmd_buf *cb;
+
+ UDL_LOCK(sc);
+ cb = udl_cmd_buf_alloc_locked(sc, flags);
+ UDL_UNLOCK(sc);
+ return (cb);
+}
+
+static void
+udl_cmd_buf_send(struct udl_softc *sc, struct udl_cmd_buf *cb)
+{
+ UDL_LOCK(sc);
+ if (sc->sc_gone) {
+ TAILQ_INSERT_TAIL(&sc->sc_cmd_buf_free, cb, entry);
+ } else {
+ /* mark end of command stack */
+ udl_cmd_insert_int_1(cb, UDL_BULK_SOC);
+ udl_cmd_insert_int_1(cb, UDL_BULK_CMD_EOC);
+
+ TAILQ_INSERT_TAIL(&sc->sc_cmd_buf_pending, cb, entry);
+ usbd_transfer_start(sc->sc_xfer[UDL_BULK_WRITE_0]);
+ usbd_transfer_start(sc->sc_xfer[UDL_BULK_WRITE_1]);
+ }
+ UDL_UNLOCK(sc);
+}
+
+static struct udl_cmd_buf *
+udl_fb_synchronize_locked(struct udl_softc *sc)
+{
+ const uint32_t max = udl_get_fb_size(sc);
+
+ /* check if framebuffer is not ready */
+ if (sc->sc_fb_addr == NULL ||
+ sc->sc_fb_copy == NULL)
+ return (NULL);
+
+ while (sc->sc_sync_off < max) {
+ uint32_t delta = max - sc->sc_sync_off;
+
+ if (delta > UDL_CMD_MAX_PIXEL_COUNT * 2)
+ delta = UDL_CMD_MAX_PIXEL_COUNT * 2;
+ if (bcmp(sc->sc_fb_addr + sc->sc_sync_off, sc->sc_fb_copy + sc->sc_sync_off, delta) != 0) {
+ struct udl_cmd_buf *cb;
+
+ cb = udl_cmd_buf_alloc_locked(sc, M_NOWAIT);
+ if (cb == NULL)
+ goto done;
+ memcpy(sc->sc_fb_copy + sc->sc_sync_off,
+ sc->sc_fb_addr + sc->sc_sync_off, delta);
+ udl_cmd_insert_int_1(cb, UDL_BULK_SOC);
+ udl_cmd_insert_int_1(cb, UDL_BULK_CMD_FB_WRITE | UDL_BULK_CMD_FB_WORD);
+ udl_cmd_insert_int_3(cb, sc->sc_sync_off);
+ udl_cmd_insert_int_1(cb, delta / 2);
+ udl_cmd_insert_buf_le16(cb, sc->sc_fb_copy + sc->sc_sync_off, delta);
+ sc->sc_sync_off += delta;
+ return (cb);
+ } else {
+ sc->sc_sync_off += delta;
+ }
+ }
+done:
+ return (NULL);
+}
+
+static void
+udl_bulk_write_callback(struct usb_xfer *xfer, usb_error_t error)
+{
+ struct udl_softc *sc = usbd_xfer_softc(xfer);
+ struct udl_cmd_head *phead = usbd_xfer_get_priv(xfer);
+ struct udl_cmd_buf *cb;
+ unsigned i;
+
+ switch (USB_GET_STATE(xfer)) {
+ case USB_ST_TRANSFERRED:
+ TAILQ_CONCAT(&sc->sc_cmd_buf_free, phead, entry);
+ case USB_ST_SETUP:
+tr_setup:
+ for (i = 0; i != UDL_CMD_MAX_FRAMES; i++) {
+ cb = TAILQ_FIRST(&sc->sc_cmd_buf_pending);
+ if (cb == NULL) {
+ cb = udl_fb_synchronize_locked(sc);
+ if (cb == NULL)
+ break;
+ } else {
+ TAILQ_REMOVE(&sc->sc_cmd_buf_pending, cb, entry);
+ }
+ TAILQ_INSERT_TAIL(phead, cb, entry);
+ usbd_xfer_set_frame_data(xfer, i, cb->buf, cb->off);
+ }
+ if (i != 0) {
+ usbd_xfer_set_frames(xfer, i);
+ usbd_transfer_submit(xfer);
+ }
+ break;
+ default:
+ TAILQ_CONCAT(&sc->sc_cmd_buf_free, phead, entry);
+ if (error != USB_ERR_CANCELLED) {
+ /* try clear stall first */
+ usbd_xfer_set_stall(xfer);
+ goto tr_setup;
+ }
+ break;
+ }
+ /* wakeup any waiters */
+ cv_signal(&sc->sc_cv);
+}
+
+static int
+udl_power_save(struct udl_softc *sc, int on, int flags)
+{
+ struct udl_cmd_buf *cb;
+
+ /* get new buffer */
+ cb = udl_cmd_buf_alloc(sc, flags);
+ if (cb == NULL)
+ return (EAGAIN);
+
+ DPRINTF("screen %s\n", on ? "ON" : "OFF");
+
+ sc->sc_power_save = on ? 0 : 1;
+
+ if (on)
+ udl_cmd_write_reg_1(cb, UDL_REG_SCREEN, UDL_REG_SCREEN_ON);
+ else
+ udl_cmd_write_reg_1(cb, UDL_REG_SCREEN, UDL_REG_SCREEN_OFF);
+
+ udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0xff);
+ udl_cmd_buf_send(sc, cb);
+ return (0);
+}
+
+static int
+udl_ctrl_msg(struct udl_softc *sc, uint8_t rt, uint8_t r,
+ uint16_t index, uint16_t value, uint8_t *buf, size_t len)
+{
+ usb_device_request_t req;
+ int error;
+
+ req.bmRequestType = rt;
+ req.bRequest = r;
+ USETW(req.wIndex, index);
+ USETW(req.wValue, value);
+ USETW(req.wLength, len);
+
+ error = usbd_do_request_flags(sc->sc_udev, NULL,
+ &req, buf, 0, NULL, USB_DEFAULT_TIMEOUT);
+
+ DPRINTF("%s\n", usbd_errstr(error));
+
+ return (error);
+}
+
+static int
+udl_poll(struct udl_softc *sc, uint32_t *buf)
+{
+ uint32_t lbuf;
+ int error;
+
+ error = udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE,
+ UDL_CTRL_CMD_POLL, 0x0000, 0x0000, (uint8_t *)&lbuf, sizeof(lbuf));
+ if (error == USB_ERR_NORMAL_COMPLETION)
+ *buf = le32toh(lbuf);
+ return (error);
+}
+
+static int
+udl_read_1(struct udl_softc *sc, uint16_t addr, uint8_t *buf)
+{
+ uint8_t lbuf[1];
+ int error;
+
+ error = udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE,
+ UDL_CTRL_CMD_READ_1, addr, 0x0000, lbuf, 1);
+ if (error == USB_ERR_NORMAL_COMPLETION)
+ *buf = *(uint8_t *)lbuf;
+ return (error);
+}
+
+static int
+udl_write_1(struct udl_softc *sc, uint16_t addr, uint8_t buf)
+{
+ int error;
+
+ error = udl_ctrl_msg(sc, UT_WRITE_VENDOR_DEVICE,
+ UDL_CTRL_CMD_WRITE_1, addr, 0x0000, &buf, 1);
+ return (error);
+}
+
+static int
+udl_read_edid(struct udl_softc *sc, uint8_t *buf)
+{
+ uint8_t lbuf[64];
+ uint16_t offset;
+ int error;
+
+ offset = 0;
+
+ error = udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE,
+ UDL_CTRL_CMD_READ_EDID, 0x00a1, (offset << 8), lbuf, 64);
+ if (error != USB_ERR_NORMAL_COMPLETION)
+ goto fail;
+ bcopy(lbuf + 1, buf + offset, 63);
+ offset += 63;
+
+ error = udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE,
+ UDL_CTRL_CMD_READ_EDID, 0x00a1, (offset << 8), lbuf, 64);
+ if (error != USB_ERR_NORMAL_COMPLETION)
+ goto fail;
+ bcopy(lbuf + 1, buf + offset, 63);
+ offset += 63;
+
+ error = udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE,
+ UDL_CTRL_CMD_READ_EDID, 0x00a1, (offset << 8), lbuf, 3);
+ if (error != USB_ERR_NORMAL_COMPLETION)
+ goto fail;
+ bcopy(lbuf + 1, buf + offset, 2);
+fail:
+ return (error);
+}
+
+static uint8_t
+udl_lookup_mode(uint16_t hdisplay, uint16_t vdisplay, uint8_t hz,
+ uint16_t chip, uint32_t clock)
+{
+ uint8_t idx;
+
+ /*
+ * Check first if we have a matching mode with pixelclock
+ */
+ for (idx = 0; idx != UDL_MAX_MODES; idx++) {
+ if ((udl_modes[idx].hdisplay == hdisplay) &&
+ (udl_modes[idx].vdisplay == vdisplay) &&
+ (udl_modes[idx].clock == clock) &&
+ (udl_modes[idx].chip <= chip)) {
+ return (idx);
+ }
+ }
+
+ /*
+ * If not, check for matching mode with update frequency
+ */
+ for (idx = 0; idx != UDL_MAX_MODES; idx++) {
+ if ((udl_modes[idx].hdisplay == hdisplay) &&
+ (udl_modes[idx].vdisplay == vdisplay) &&
+ (udl_modes[idx].hz == hz) &&
+ (udl_modes[idx].chip <= chip)) {
+ return (idx);
+ }
+ }
+ return (idx);
+}
+
+static void
+udl_select_chip(struct udl_softc *sc, struct usb_attach_arg *uaa)
+{
+ const char *pserial;
+
+ pserial = usb_get_serial(uaa->device);
+
+ sc->sc_chip = DL120;
+
+ if ((uaa->info.idVendor == USB_VENDOR_DISPLAYLINK) &&
+ (uaa->info.idProduct == USB_PRODUCT_DISPLAYLINK_WSDVI)) {
+
+ /*
+ * WS Tech DVI is DL120 or DL160. All deviced uses the
+ * same revision (0.04) so iSerialNumber must be used
+ * to determin which chip it is.
+ */
+
+ if (strlen(pserial) > 7) {
+ if (strncmp(pserial, "0198-13", 7) == 0)
+ sc->sc_chip = DL160;
+ }
+ DPRINTF("iSerialNumber (%s) used to select chip (%d)\n",
+ pserial, sc->sc_chip);
+ }
+ if ((uaa->info.idVendor == USB_VENDOR_DISPLAYLINK) &&
+ (uaa->info.idProduct == USB_PRODUCT_DISPLAYLINK_SWDVI)) {
+
+ /*
+ * SUNWEIT DVI is DL160, DL125, DL165 or DL195. Major revision
+ * can be used to differ between DL1x0 and DL1x5. Minor to
+ * differ between DL1x5. iSerialNumber seems not to be uniqe.
+ */
+
+ sc->sc_chip = DL160;
+
+ if (uaa->info.bcdDevice >= 0x100) {
+ sc->sc_chip = DL165;
+ if (uaa->info.bcdDevice == 0x104)
+ sc->sc_chip = DL195;
+ if (uaa->info.bcdDevice == 0x108)
+ sc->sc_chip = DL125;
+ }
+ DPRINTF("bcdDevice (%02x) used to select chip (%d)\n",
+ uaa->info.bcdDevice, sc->sc_chip);
+ }
+}
+
+static int
+udl_set_enc_key(struct udl_softc *sc, uint8_t *buf, uint8_t len)
+{
+ int error;
+
+ error = udl_ctrl_msg(sc, UT_WRITE_VENDOR_DEVICE,
+ UDL_CTRL_CMD_SET_KEY, 0x0000, 0x0000, buf, len);
+ return (error);
+}
+
+static void
+udl_fbmem_alloc(struct udl_softc *sc)
+{
+ uint32_t size;
+
+ size = udl_get_fb_size(sc);
+ size = round_page(size);
+
+ sc->sc_fb_addr = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
+ sc->sc_fb_copy = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
+ sc->sc_fb_size = size;
+}
+
+static void
+udl_cmd_insert_int_1(struct udl_cmd_buf *cb, uint8_t value)
+{
+
+ cb->buf[cb->off] = value;
+ cb->off += 1;
+}
+
+#if 0
+static void
+udl_cmd_insert_int_2(struct udl_cmd_buf *cb, uint16_t value)
+{
+ uint16_t lvalue;
+
+ lvalue = htobe16(value);
+ bcopy(&lvalue, cb->buf + cb->off, 2);
+
+ cb->off += 2;
+}
+
+#endif
+
+static void
+udl_cmd_insert_int_3(struct udl_cmd_buf *cb, uint32_t value)
+{
+ uint32_t lvalue;
+
+#if BYTE_ORDER == BIG_ENDIAN
+ lvalue = htobe32(value) << 8;
+#else
+ lvalue = htobe32(value) >> 8;
+#endif
+ bcopy(&lvalue, cb->buf + cb->off, 3);
+
+ cb->off += 3;
+}
+
+#if 0
+static void
+udl_cmd_insert_int_4(struct udl_cmd_buf *cb, uint32_t value)
+{
+ uint32_t lvalue;
+
+ lvalue = htobe32(value);
+ bcopy(&lvalue, cb->buf + cb->off, 4);
+
+ cb->off += 4;
+}
+
+#endif
+
+static void
+udl_cmd_insert_buf_le16(struct udl_cmd_buf *cb, const uint8_t *buf, uint32_t len)
+{
+ uint32_t x;
+
+ for (x = 0; x != len; x += 2) {
+ /* byte swap from little endian to big endian */
+ cb->buf[cb->off + x + 0] = buf[x + 1];
+ cb->buf[cb->off + x + 1] = buf[x + 0];
+ }
+ cb->off += len;
+}
+
+static void
+udl_cmd_write_reg_1(struct udl_cmd_buf *cb, uint8_t reg, uint8_t val)
+{
+
+ udl_cmd_insert_int_1(cb, UDL_BULK_SOC);
+ udl_cmd_insert_int_1(cb, UDL_BULK_CMD_REG_WRITE_1);
+ udl_cmd_insert_int_1(cb, reg);
+ udl_cmd_insert_int_1(cb, val);
+}
+
+static void
+udl_cmd_write_reg_3(struct udl_cmd_buf *cb, uint8_t reg, uint32_t val)
+{
+
+ udl_cmd_write_reg_1(cb, reg + 0, (val >> 16) & 0xff);
+ udl_cmd_write_reg_1(cb, reg + 1, (val >> 8) & 0xff);
+ udl_cmd_write_reg_1(cb, reg + 2, (val >> 0) & 0xff);
+}
+
+static int
+udl_init_chip(struct udl_softc *sc)
+{
+ uint32_t ui32;
+ uint8_t ui8;
+ int error;
+
+ error = udl_poll(sc, &ui32);
+ if (error != USB_ERR_NORMAL_COMPLETION)
+ return (error);
+ DPRINTF("poll=0x%08x\n", ui32);
+
+ /* Some products may use later chip too */
+ switch (ui32 & 0xff) {
+ case 0xf1: /* DL1x5 */
+ switch (sc->sc_chip) {
+ case DL120:
+ sc->sc_chip = DL125;
+ break;
+ case DL160:
+ sc->sc_chip = DL165;
+ break;
+ }
+ break;
+ }
+ DPRINTF("chip 0x%04x\n", sc->sc_chip);
+
+ error = udl_read_1(sc, 0xc484, &ui8);
+ if (error != USB_ERR_NORMAL_COMPLETION)
+ return (error);
+ DPRINTF("read 0x%02x from 0xc484\n", ui8);
+
+ error = udl_write_1(sc, 0xc41f, 0x01);
+ if (error != USB_ERR_NORMAL_COMPLETION)
+ return (error);
+ DPRINTF("write 0x01 to 0xc41f\n");
+
+ error = udl_read_edid(sc, sc->sc_edid);
+ if (error != USB_ERR_NORMAL_COMPLETION)
+ return (error);
+ DPRINTF("read EDID\n");
+
+ error = udl_set_enc_key(sc, __DECONST(void *, udl_null_key_1),
+ sizeof(udl_null_key_1));
+ if (error != USB_ERR_NORMAL_COMPLETION)
+ return (error);
+ DPRINTF("set encryption key\n");
+
+ error = udl_write_1(sc, 0xc40b, 0x00);
+ if (error != USB_ERR_NORMAL_COMPLETION)
+ return (error);
+ DPRINTF("write 0x00 to 0xc40b\n");
+
+ return (USB_ERR_NORMAL_COMPLETION);
+}
+
+static void
+udl_init_fb_offsets(struct udl_cmd_buf *cb, uint32_t start16, uint32_t stride16,
+ uint32_t start8, uint32_t stride8)
+{
+ udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0x00);
+ udl_cmd_write_reg_3(cb, UDL_REG_ADDR_START16, start16);
+ udl_cmd_write_reg_3(cb, UDL_REG_ADDR_STRIDE16, stride16);
+ udl_cmd_write_reg_3(cb, UDL_REG_ADDR_START8, start8);
+ udl_cmd_write_reg_3(cb, UDL_REG_ADDR_STRIDE8, stride8);
+ udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0xff);
+}
+
+static int
+udl_init_resolution(struct udl_softc *sc)
+{
+ const uint32_t max = udl_get_fb_size(sc);
+ const uint8_t *buf = udl_modes[sc->sc_cur_mode].mode;
+ struct udl_cmd_buf *cb;
+ uint32_t delta;
+ uint32_t i;
+ int error;
+
+ /* get new buffer */
+ cb = udl_cmd_buf_alloc(sc, M_WAITOK);
+ if (cb == NULL)
+ return (EAGAIN);
+
+ /* write resolution values and set video memory offsets */
+ udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0x00);
+ for (i = 0; i < UDL_MODE_SIZE; i++)
+ udl_cmd_write_reg_1(cb, i, buf[i]);
+ udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0xff);
+
+ udl_init_fb_offsets(cb, 0x000000, 0x000a00, 0x555555, 0x000500);
+ udl_cmd_buf_send(sc, cb);
+
+ /* fill screen with black color */
+ for (i = 0; i < max; i += delta) {
+ static const uint8_t udl_black[UDL_CMD_MAX_PIXEL_COUNT * 2] __aligned(4);
+
+ delta = max - i;
+ if (delta > UDL_CMD_MAX_PIXEL_COUNT * 2)
+ delta = UDL_CMD_MAX_PIXEL_COUNT * 2;
+ if (i == 0)
+ error = udl_cmd_write_buf_le16(sc, udl_black, i, delta / 2, M_WAITOK);
+ else
+ error = udl_cmd_buf_copy_le16(sc, 0, i, delta / 2, M_WAITOK);
+ if (error)
+ return (error);
+ }
+
+ /* get new buffer */
+ cb = udl_cmd_buf_alloc(sc, M_WAITOK);
+ if (cb == NULL)
+ return (EAGAIN);
+
+ /* show framebuffer content */
+ udl_cmd_write_reg_1(cb, UDL_REG_SCREEN, UDL_REG_SCREEN_ON);
+ udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0xff);
+ udl_cmd_buf_send(sc, cb);
+ return (0);
+}
+
+static void
+udl_select_mode(struct udl_softc *sc)
+{
+ struct udl_mode mode;
+ int index = UDL_MAX_MODES;
+ int i;
+
+ /* try to get the preferred mode from EDID */
+ edid_parse(sc->sc_edid, &sc->sc_edid_info);
+#ifdef USB_DEBUG
+ edid_print(&sc->sc_edid_info);
+#endif
+ if (sc->sc_edid_info.edid_preferred_mode != NULL) {
+ mode.hz =
+ (sc->sc_edid_info.edid_preferred_mode->dot_clock * 1000) /
+ (sc->sc_edid_info.edid_preferred_mode->htotal *
+ sc->sc_edid_info.edid_preferred_mode->vtotal);
+ mode.clock =
+ sc->sc_edid_info.edid_preferred_mode->dot_clock / 10;
+ mode.hdisplay =
+ sc->sc_edid_info.edid_preferred_mode->hdisplay;
+ mode.vdisplay =
+ sc->sc_edid_info.edid_preferred_mode->vdisplay;
+ index = udl_lookup_mode(mode.hdisplay, mode.vdisplay, mode.hz,
+ sc->sc_chip, mode.clock);
+ sc->sc_cur_mode = index;
+ } else {
+ DPRINTF("no preferred mode found!\n");
+ }
+
+ if (index == UDL_MAX_MODES) {
+ DPRINTF("no mode line found for %dx%d @ %dHz!\n",
+ mode.hdisplay, mode.vdisplay, mode.hz);
+
+ i = 0;
+ while (i < sc->sc_edid_info.edid_nmodes) {
+ mode.hz =
+ (sc->sc_edid_info.edid_modes[i].dot_clock * 1000) /
+ (sc->sc_edid_info.edid_modes[i].htotal *
+ sc->sc_edid_info.edid_modes[i].vtotal);
+ mode.clock =
+ sc->sc_edid_info.edid_modes[i].dot_clock / 10;
+ mode.hdisplay =
+ sc->sc_edid_info.edid_modes[i].hdisplay;
+ mode.vdisplay =
+ sc->sc_edid_info.edid_modes[i].vdisplay;
+ index = udl_lookup_mode(mode.hdisplay, mode.vdisplay,
+ mode.hz, sc->sc_chip, mode.clock);
+ if (index < UDL_MAX_MODES)
+ if ((sc->sc_cur_mode == UDL_MAX_MODES) ||
+ (index > sc->sc_cur_mode))
+ sc->sc_cur_mode = index;
+ i++;
+ }
+ }
+ /*
+ * If no mode found use default.
+ */
+ if (sc->sc_cur_mode == UDL_MAX_MODES)
+ sc->sc_cur_mode = udl_lookup_mode(800, 600, 60, sc->sc_chip, 0);
+}
+
+static int
+udl_cmd_write_buf_le16(struct udl_softc *sc, const uint8_t *buf, uint32_t off,
+ uint8_t pixels, int flags)
+{
+ struct udl_cmd_buf *cb;
+
+ cb = udl_cmd_buf_alloc(sc, flags);
+ if (cb == NULL)
+ return (EAGAIN);
+
+ udl_cmd_insert_int_1(cb, UDL_BULK_SOC);
+ udl_cmd_insert_int_1(cb, UDL_BULK_CMD_FB_WRITE | UDL_BULK_CMD_FB_WORD);
+ udl_cmd_insert_int_3(cb, off);
+ udl_cmd_insert_int_1(cb, pixels);
+ udl_cmd_insert_buf_le16(cb, buf, 2 * pixels);
+ udl_cmd_buf_send(sc, cb);
+
+ return (0);
+}
+
+static int
+udl_cmd_buf_copy_le16(struct udl_softc *sc, uint32_t src, uint32_t dst,
+ uint8_t pixels, int flags)
+{
+ struct udl_cmd_buf *cb;
+
+ cb = udl_cmd_buf_alloc(sc, flags);
+ if (cb == NULL)
+ return (EAGAIN);
+
+ udl_cmd_insert_int_1(cb, UDL_BULK_SOC);
+ udl_cmd_insert_int_1(cb, UDL_BULK_CMD_FB_COPY | UDL_BULK_CMD_FB_WORD);
+ udl_cmd_insert_int_3(cb, dst);
+ udl_cmd_insert_int_1(cb, pixels);
+ udl_cmd_insert_int_3(cb, src);
+ udl_cmd_buf_send(sc, cb);
+
+ return (0);
+}
diff --git a/sys/dev/usb/video/udl.h b/sys/dev/usb/video/udl.h
new file mode 100644
index 0000000..1fdae7a
--- /dev/null
+++ b/sys/dev/usb/video/udl.h
@@ -0,0 +1,311 @@
+/* $OpenBSD: udl.h,v 1.21 2013/04/15 09:23:02 mglocker Exp $ */
+/* $FreeBSD$ */
+
+/*
+ * Copyright (c) 2009 Marcus Glocker <mglocker@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _UDL_H_
+#define _UDL_H_
+
+#include <sys/types.h>
+#include <sys/queue.h>
+
+/*
+ * BULK command transfer structure.
+ */
+#define UDL_CMD_MAX_FRAMES 64 /* units */
+#define UDL_CMD_MAX_DATA_SIZE 512 /* bytes */
+#define UDL_CMD_MAX_HEAD_SIZE 16 /* bytes */
+#define UDL_CMD_MAX_PIXEL_COUNT ((UDL_CMD_MAX_DATA_SIZE - UDL_CMD_MAX_HEAD_SIZE) / 2)
+#define UDL_CMD_MAX_BUFFERS (3 * UDL_CMD_MAX_FRAMES)
+#define UDL_FONT_HEIGHT 16 /* pixels */
+#define UDL_MAX_MODES 25 /* units */
+
+struct udl_cmd_buf {
+ TAILQ_ENTRY(udl_cmd_buf) entry;
+ uint32_t off;
+ uint8_t buf[UDL_CMD_MAX_DATA_SIZE] __aligned(4);
+};
+
+TAILQ_HEAD(udl_cmd_head, udl_cmd_buf);
+
+enum {
+ UDL_BULK_WRITE_0,
+ UDL_BULK_WRITE_1,
+ UDL_N_TRANSFER,
+};
+
+/*
+ * Our per device structure.
+ */
+struct udl_softc {
+ struct mtx sc_mtx;
+ struct cv sc_cv;
+ struct callout sc_callout;
+ struct usb_xfer *sc_xfer[UDL_N_TRANSFER];
+ struct usb_device *sc_udev;
+ device_t sc_fbdev;
+ struct fb_info sc_fb_info;
+ uint8_t sc_edid[128];
+ struct edid_info sc_edid_info;
+ struct udl_cmd_head sc_xfer_head[2];
+ struct udl_cmd_head sc_cmd_buf_free;
+ struct udl_cmd_head sc_cmd_buf_pending;
+ struct udl_cmd_buf sc_cmd_buf_temp[UDL_CMD_MAX_BUFFERS];
+ uint32_t sc_sync_off;
+ uint32_t sc_fb_size;
+ uint8_t *sc_fb_addr;
+ uint8_t *sc_fb_copy;
+ int sc_def_chip; /* default chip version */
+ int sc_chip;
+#define DLALL 0x0000
+#define DL125 0x0000 /* max 1280x1024, 1440x900 */
+#define DL120 0x0001 /* max 1280x1024, 1440x1050 */
+#define DL160 0x0002 /* max 1600x1200, 1680x1050 */
+#define DL165 0x0003 /* max 1600x1200, 1920x1080 */
+#define DL195 0x0004 /* max 1920x1200, 2048x1152 */
+#define DLMAX 0x0004
+#define DLUNK 0x00ff /* unknown */
+ int sc_def_mode; /* default mode */
+ int sc_cur_mode;
+ uint8_t sc_power_save; /* set if power save is enabled */
+ uint8_t sc_gone;
+};
+
+#define UDL_LOCK(sc) mtx_lock(&(sc)->sc_mtx)
+#define UDL_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx)
+
+/*
+ * Chip commands.
+ */
+#define UDL_CTRL_CMD_READ_EDID 0x02
+#define UDL_CTRL_CMD_WRITE_1 0x03
+#define UDL_CTRL_CMD_READ_1 0x04
+#define UDL_CTRL_CMD_POLL 0x06
+#define UDL_CTRL_CMD_SET_KEY 0x12
+
+#define UDL_BULK_SOC 0xaf /* start of command token */
+
+#define UDL_BULK_CMD_REG_WRITE_1 0x20 /* write 1 byte to register */
+#define UDL_BULK_CMD_EOC 0xa0 /* end of command stack */
+#define UDL_BULK_CMD_DECOMP 0xe0 /* send decompression table */
+
+#define UDL_BULK_CMD_FB_BASE 0x60
+#define UDL_BULK_CMD_FB_WORD 0x08
+#define UDL_BULK_CMD_FB_COMP 0x10
+#define UDL_BULK_CMD_FB_WRITE (UDL_BULK_CMD_FB_BASE | 0x00)
+#define UDL_BULK_CMD_FB_COPY (UDL_BULK_CMD_FB_BASE | 0x02)
+
+/*
+ * Chip registers.
+ */
+#define UDL_REG_ADDR_START16 0x20
+#define UDL_REG_ADDR_STRIDE16 0x23
+#define UDL_REG_ADDR_START8 0x26
+#define UDL_REG_ADDR_STRIDE8 0x29
+
+#define UDL_REG_SCREEN 0x1f
+#define UDL_REG_SCREEN_ON 0x00
+#define UDL_REG_SCREEN_OFF 0x01
+#define UDL_REG_SYNC 0xff
+
+#define UDL_MODE_SIZE 29
+
+/*
+ * Register values for screen resolution initialization.
+ */
+static const uint8_t udl_reg_vals_640x480_60[UDL_MODE_SIZE] = { /* 25.17 Mhz 59.9 Hz
+ * VESA std */
+ 0x00, 0x99, 0x30, 0x26, 0x94, 0x60, 0xa9, 0xce, 0x60, 0x07, 0xb3, 0x0f,
+ 0x79, 0xff, 0xff, 0x02, 0x80, 0x83, 0xbc, 0xff, 0xfc, 0xff, 0xff, 0x01,
+ 0xe0, 0x01, 0x02, 0xab, 0x13
+};
+static const uint8_t udl_reg_vals_640x480_67[UDL_MODE_SIZE] = { /* 30.25 MHz 66.6 Hz MAC
+ * std */
+ 0x00, 0x1d, 0x33, 0x07, 0xb3, 0x60, 0xa9, 0xce, 0x60, 0xb6, 0xa8, 0xff,
+ 0xff, 0xbf, 0x70, 0x02, 0x80, 0x83, 0xbc, 0xff, 0xff, 0xff, 0xf9, 0x01,
+ 0xe0, 0x01, 0x02, 0xa2, 0x17
+};
+static const uint8_t udl_reg_vals_640x480_72[UDL_MODE_SIZE] = { /* 31.50 Mhz 72.8 Hz
+ * VESA std */
+ 0x00, 0x2b, 0xeb, 0x35, 0xd3, 0x0a, 0x95, 0xe6, 0x0e, 0x0f, 0xb5, 0x15,
+ 0x2a, 0xff, 0xff, 0x02, 0x80, 0xcc, 0x1d, 0xff, 0xf9, 0xff, 0xff, 0x01,
+ 0xe0, 0x01, 0x02, 0x9c, 0x18
+};
+static const uint8_t udl_reg_vals_640x480_75[UDL_MODE_SIZE] = { /* 31.50 Mhz 75.7 Hz
+ * VESA std */
+ 0x00, 0xeb, 0xf7, 0xd3, 0x0f, 0x4f, 0x93, 0xfa, 0x47, 0xb5, 0x58, 0xff,
+ 0xff, 0xbf, 0x70, 0x02, 0x80, 0xf4, 0x8f, 0xff, 0xff, 0xff, 0xf9, 0x01,
+ 0xe0, 0x01, 0x02, 0x9c, 0x18
+};
+static const uint8_t udl_reg_vals_800x480_61[UDL_MODE_SIZE] = { /* 33.00 MHz 61.9 Hz */
+ 0x00, 0x20, 0x3c, 0x7a, 0xc9, 0xf2, 0x6c, 0x48, 0xf9, 0x70, 0x53, 0xff,
+ 0xff, 0x21, 0x27, 0x03, 0x20, 0x91, 0xf3, 0xff, 0xff, 0xff, 0xf9, 0x01,
+ 0xe0, 0x01, 0x02, 0xc8, 0x19
+};
+static const uint8_t udl_reg_vals_800x600_56[UDL_MODE_SIZE] = { /* 36.00 MHz 56.2 Hz
+ * VESA std */
+ 0x00, 0x65, 0x35, 0x48, 0xf4, 0xf2, 0x6c, 0x19, 0x18, 0xc9, 0x4b, 0xff,
+ 0xff, 0x70, 0x35, 0x03, 0x20, 0x32, 0x31, 0xff, 0xff, 0xff, 0xfc, 0x02,
+ 0x58, 0x01, 0x02, 0x20, 0x1c
+};
+static const uint8_t udl_reg_vals_800x600_60[UDL_MODE_SIZE] = { /* 40.00 MHz 60.3 Hz
+ * VESA std */
+ 0x00, 0x20, 0x3c, 0x7a, 0xc9, 0x93, 0x60, 0xc8, 0xc7, 0x70, 0x53, 0xff,
+ 0xff, 0x21, 0x27, 0x03, 0x20, 0x91, 0x8f, 0xff, 0xff, 0xff, 0xf2, 0x02,
+ 0x58, 0x01, 0x02, 0x40, 0x1f
+};
+static const uint8_t udl_reg_vals_800x600_72[UDL_MODE_SIZE] = { /* 50.00 MHz 72.1 Hz
+ * VESA std */
+ 0x00, 0xeb, 0xf7, 0xd1, 0x90, 0x4d, 0x82, 0x23, 0x1f, 0x39, 0xcf, 0xff,
+ 0xff, 0x43, 0x21, 0x03, 0x20, 0x62, 0xc5, 0xff, 0xff, 0xff, 0xca, 0x02,
+ 0x58, 0x01, 0x02, 0x10, 0x27
+};
+static const uint8_t udl_reg_vals_800x600_74[UDL_MODE_SIZE] = { /* 50.00 MHz 74.4 Hz */
+ 0x00, 0xb3, 0x76, 0x39, 0xcf, 0x60, 0xa9, 0xc7, 0xf4, 0x70, 0x53, 0xff,
+ 0xff, 0x35, 0x33, 0x03, 0x20, 0x8f, 0xe9, 0xff, 0xff, 0xff, 0xf9, 0x02,
+ 0x58, 0x01, 0x02, 0x10, 0x27
+};
+static const uint8_t udl_reg_vals_800x600_75[UDL_MODE_SIZE] = { /* 49.50 MHz 75.0 Hz
+ * VESA std */
+ 0x00, 0xb3, 0x76, 0x39, 0xcf, 0xf2, 0x6c, 0x19, 0x18, 0x70, 0x53, 0xff,
+ 0xff, 0x35, 0x33, 0x03, 0x20, 0x32, 0x31, 0xff, 0xff, 0xff, 0xf9, 0x02,
+ 0x58, 0x01, 0x02, 0xac, 0x26
+};
+static const uint8_t udl_reg_vals_1024x768_60[UDL_MODE_SIZE] = { /* 65.00 MHz 60.0 Hz
+ * VESA std */
+ 0x00, 0x36, 0x18, 0xd5, 0x10, 0x60, 0xa9, 0x7b, 0x33, 0xa1, 0x2b, 0x27,
+ 0x32, 0xff, 0xff, 0x04, 0x00, 0xd9, 0x9a, 0xff, 0xca, 0xff, 0xff, 0x03,
+ 0x00, 0x04, 0x03, 0xc8, 0x32
+};
+static const uint8_t udl_reg_vals_1024x768_70[UDL_MODE_SIZE] = { /* 75.00 MHz 70.0 Hz
+ * VESA std */
+ 0x00, 0xb4, 0xed, 0x4c, 0x5e, 0x60, 0xa9, 0x7b, 0x33, 0x10, 0x4d, 0xff,
+ 0xff, 0x27, 0x32, 0x04, 0x00, 0xd9, 0x9a, 0xff, 0xff, 0xff, 0xca, 0x03,
+ 0x00, 0x04, 0x02, 0x98, 0x3a
+};
+static const uint8_t udl_reg_vals_1024x768_75[UDL_MODE_SIZE] = { /* 78.75 MHz 75.0 Hz
+ * VESA std */
+ 0x00, 0xec, 0xb4, 0xa0, 0x4c, 0x36, 0x0a, 0x07, 0xb3, 0x5e, 0xd5, 0xff,
+ 0xff, 0x0f, 0x79, 0x04, 0x00, 0x0f, 0x66, 0xff, 0xff, 0xff, 0xf9, 0x03,
+ 0x00, 0x04, 0x02, 0x86, 0x3d
+};
+static const uint8_t udl_reg_vals_1280x800_60[UDL_MODE_SIZE] = { /* 83.46 MHz 59.9 MHz */
+ 0x00, 0xb2, 0x19, 0x34, 0xdf, 0x93, 0x60, 0x30, 0xfb, 0x9f, 0xca, 0xff,
+ 0xff, 0x27, 0x32, 0x05, 0x00, 0x61, 0xf6, 0xff, 0xff, 0xff, 0xf9, 0x03,
+ 0x20, 0x04, 0x02, 0x34, 0x41
+};
+static const uint8_t udl_reg_vals_1280x960_60[UDL_MODE_SIZE] = { /* 108.00 MHz 60.0 Hz
+ * VESA std */
+ 0x00, 0xa6, 0x03, 0x5c, 0x7e, 0x0a, 0x95, 0x48, 0xf4, 0x61, 0xbd, 0xff,
+ 0xff, 0x94, 0x43, 0x05, 0x00, 0x91, 0xe8, 0xff, 0xff, 0xff, 0xf9, 0x03,
+ 0xc0, 0x04, 0x02, 0x60, 0x54
+};
+static const uint8_t udl_reg_vals_1280x1024_60[UDL_MODE_SIZE] = { /* 108.00 MHz 60.0 Hz
+ * VESA std */
+ 0x00, 0x98, 0xf8, 0x0d, 0x57, 0x2a, 0x55, 0x4d, 0x54, 0xca, 0x0d, 0xff,
+ 0xff, 0x94, 0x43, 0x05, 0x00, 0x9a, 0xa8, 0xff, 0xff, 0xff, 0xf9, 0x04,
+ 0x00, 0x04, 0x02, 0x60, 0x54
+};
+static const uint8_t udl_reg_vals_1280x1024_75[UDL_MODE_SIZE] = { /* 135.00 MHz 75.0 Hz
+ * VESA std */
+ 0x00, 0xce, 0x12, 0x3f, 0x9f, 0x2a, 0x55, 0x4d, 0x54, 0xca, 0x0d, 0xff,
+ 0xff, 0x32, 0x60, 0x05, 0x00, 0x9a, 0xa8, 0xff, 0xff, 0xff, 0xf9, 0x04,
+ 0x00, 0x04, 0x02, 0x78, 0x69
+};
+static const uint8_t udl_reg_vals_1366x768_60[UDL_MODE_SIZE] = { /* 90 MHz 60.0 Hz */
+ 0x01, 0x19, 0x1e, 0x1f, 0xb0, 0x93, 0x60, 0x40, 0x7b, 0x36, 0xe8, 0x27,
+ 0x32, 0xff, 0xff, 0x05, 0x56, 0x03, 0xd9, 0xff, 0xff, 0xfc, 0xa7, 0x03,
+ 0x00, 0x04, 0x02, 0x9a, 0x42
+};
+static const uint8_t udl_reg_vals_1440x900_60[UDL_MODE_SIZE] = { /* 106.47 MHz 59.9 Hz */
+ 0x00, 0x24, 0xce, 0xe7, 0x72, 0x36, 0x0a, 0x86, 0xca, 0x1c, 0x10, 0xff,
+ 0xff, 0x60, 0x3a, 0x05, 0xa0, 0x0d, 0x94, 0xff, 0xff, 0xff, 0xf9, 0x03,
+ 0x84, 0x04, 0x02, 0x2e, 0x53
+};
+static const uint8_t udl_reg_vals_1440x900_59[UDL_MODE_SIZE] = { /* 106.50 MHz 59.8 Hz */
+ 0x00, 0x24, 0xce, 0xe7, 0x72, 0xd8, 0x2a, 0x1b, 0x28, 0x1c, 0x10, 0xff,
+ 0xff, 0x60, 0x3a, 0x05, 0xa0, 0x36, 0x50, 0xff, 0xff, 0xff, 0xf9, 0x03,
+ 0x84, 0x04, 0x02, 0x34, 0x53
+};
+static const uint8_t udl_reg_vals_1440x900_75[UDL_MODE_SIZE] = { /* 136.49 MHz 75.0 Hz */
+ 0x00, 0x73, 0xa6, 0x14, 0xea, 0x0a, 0x95, 0xca, 0x10, 0x7f, 0x46, 0xff,
+ 0xff, 0x60, 0x3a, 0x05, 0xa0, 0x94, 0x20, 0xff, 0xff, 0xff, 0xf9, 0x03,
+ 0x84, 0x04, 0x02, 0xa2, 0x6a
+};
+static const uint8_t udl_reg_vals_1680x1050_60[UDL_MODE_SIZE] = { /* 147.14 MHz 60.0 Hz */
+ 0x00, 0x53, 0x43, 0xa6, 0x71, 0xc1, 0x52, 0xd9, 0x29, 0x69, 0x9f, 0xff,
+ 0xff, 0xd7, 0xee, 0x06, 0x90, 0xb2, 0x53, 0xff, 0xff, 0xff, 0xf9, 0x04,
+ 0x1a, 0x04, 0x02, 0xf4, 0x72
+};
+static const uint8_t udl_reg_vals_1600x1200_60[UDL_MODE_SIZE] = { /* 162.00 MHz 60.0 Hz
+ * VESA std */
+ 0x00, 0xcf, 0xa4, 0x3c, 0x4e, 0x55, 0x73, 0x71, 0x2b, 0x71, 0x52, 0xff,
+ 0xff, 0xee, 0xca, 0x06, 0x40, 0xe2, 0x57, 0xff, 0xff, 0xff, 0xf9, 0x04,
+ 0xb0, 0x04, 0x02, 0x90, 0x7e
+};
+static const uint8_t udl_reg_vals_1920x1080_60[UDL_MODE_SIZE] = { /* 138.50 MHz 59.9 Hz */
+ 0x00, 0x73, 0xa6, 0x28, 0xb3, 0x54, 0xaa, 0x41, 0x5d, 0x0d, 0x9f, 0x32,
+ 0x60, 0xff, 0xff, 0x07, 0x80, 0x0a, 0xea, 0xff, 0xf9, 0xff, 0xff, 0x04,
+ 0x38, 0x04, 0x02, 0xe0, 0x7c
+};
+
+struct udl_mode {
+ uint16_t hdisplay;
+ uint16_t vdisplay;
+ uint8_t hz;
+ uint16_t chip;
+ uint32_t clock;
+ const uint8_t *mode;
+};
+
+static const struct udl_mode udl_modes[UDL_MAX_MODES] = {
+ {640, 480, 60, DLALL, 2520, udl_reg_vals_640x480_60},
+ {640, 480, 67, DLALL, 3025, udl_reg_vals_640x480_67},
+ {640, 480, 72, DLALL, 3150, udl_reg_vals_640x480_72},
+ {640, 480, 75, DLALL, 3150, udl_reg_vals_640x480_75},
+ {800, 480, 59, DLALL, 5000, udl_reg_vals_800x480_61},
+ {800, 480, 61, DLALL, 3300, udl_reg_vals_800x480_61},
+ {800, 600, 56, DLALL, 3600, udl_reg_vals_800x600_56},
+ {800, 600, 60, DLALL, 4000, udl_reg_vals_800x600_60},
+ {800, 600, 72, DLALL, 5000, udl_reg_vals_800x600_72},
+ {800, 600, 74, DLALL, 5000, udl_reg_vals_800x600_74},
+ {800, 600, 75, DLALL, 4950, udl_reg_vals_800x600_75},
+ {1024, 768, 60, DLALL, 6500, udl_reg_vals_1024x768_60},
+ {1024, 768, 70, DLALL, 7500, udl_reg_vals_1024x768_70},
+ {1024, 768, 75, DLALL, 7850, udl_reg_vals_1024x768_75},
+ {1280, 800, 60, DLALL, 8346, udl_reg_vals_1280x800_60},
+ {1280, 960, 60, DLALL, 10800, udl_reg_vals_1280x960_60},
+ {1280, 1024, 60, DLALL, 10800, udl_reg_vals_1280x1024_60},
+ {1280, 1024, 75, DLALL, 13500, udl_reg_vals_1280x1024_75},
+ {1366, 768, 60, DLALL, 9000, udl_reg_vals_1366x768_60},
+ {1440, 900, 59, DL125, 10650, udl_reg_vals_1440x900_59},
+ {1440, 900, 60, DL125, 10647, udl_reg_vals_1440x900_60},
+ {1440, 900, 75, DL125, 13649, udl_reg_vals_1440x900_75},
+ {1680, 1050, 60, DL160, 14714, udl_reg_vals_1680x1050_60},
+ {1600, 1200, 60, DL160, 16200, udl_reg_vals_1600x1200_60},
+ {1920, 1080, 60, DL165, 13850, udl_reg_vals_1920x1080_60}
+};
+
+/*
+ * Encryption.
+ */
+static const uint8_t udl_null_key_1[] = {
+ 0x57, 0xcd, 0xdc, 0xa7, 0x1c, 0x88, 0x5e, 0x15, 0x60, 0xfe, 0xc6, 0x97,
+ 0x16, 0x3d, 0x47, 0xf2
+};
+
+#endif /* _UDL_H_ */
diff --git a/sys/dev/videomode/Makefile.ediddevs b/sys/dev/videomode/Makefile.ediddevs
new file mode 100644
index 0000000..69dc8bc
--- /dev/null
+++ b/sys/dev/videomode/Makefile.ediddevs
@@ -0,0 +1,16 @@
+# $NetBSD: Makefile.ediddevs,v 1.1 2006/05/11 01:49:53 gdamore Exp $
+# $FreeBSD$
+#
+# As per tron@NetBSD.org, the proper procedure is
+#
+# 1.) Change "src/sys/dev/videomode/ediddevs".
+# 2.) Commit "src/sys/dev/videomode/ediddevs".
+# 3.) Execute "make -f Makefile.ediddevs" in "src/sys/dev/videomode".
+# 4.) Commit "src/sys/dev/videomode/ediddevs.h" and
+# "src/sys/dev/videomode/ediddevs_data.h".
+
+.include <bsd.own.mk>
+
+ediddevs.h ediddevs_data.h: ediddevs devlist2h.awk
+ /bin/rm -f ediddevs.h ediddevs_data.h
+ ${TOOL_AWK} -f devlist2h.awk ediddevs
diff --git a/sys/dev/videomode/Makefile.videomode b/sys/dev/videomode/Makefile.videomode
new file mode 100644
index 0000000..27ac06e
--- /dev/null
+++ b/sys/dev/videomode/Makefile.videomode
@@ -0,0 +1,18 @@
+# $NetBSD: Makefile.videomode,v 1.1 2006/03/04 02:34:27 gdamore Exp $
+# $FreeBSD$
+#
+# As per tron@NetBSD.org, the proper procedure is
+#
+# 1.) Change "src/sys/dev/videomode/modelines".
+# 2.) Commit "src/sys/dev/videomode/modelines".
+# 3.) Execute "make -f Makefile.videomode" in "src/sys/dev/videomode".
+# 4.) Commit "src/sys/dev/videomode/videomode.c"
+
+.include <bsd.own.mk>
+
+videomode.c: modelines modelines2c.awk
+ /bin/rm -f videomode.c
+ ${TOOL_AWK} -f modelines2c.awk modelines > videomode.c
+
+test: videomode.c videomode.h test.c
+ ${CC} -I ../../ -o test test.c videomode.c
diff --git a/sys/dev/videomode/devlist2h.awk b/sys/dev/videomode/devlist2h.awk
new file mode 100644
index 0000000..f317da6
--- /dev/null
+++ b/sys/dev/videomode/devlist2h.awk
@@ -0,0 +1,193 @@
+#! /usr/bin/awk -f
+# $NetBSD: devlist2h.awk,v 1.9 2005/12/11 12:21:20 christos Exp $
+# $FreeBSD$
+#
+# Copyright (c) 1995, 1996 Christopher G. Demetriou
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+# must display the following acknowledgement:
+# This product includes software developed by Christopher G. Demetriou.
+# 4. The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+BEGIN {
+ nproducts = nvendors = blanklines = 0
+ dfile="ediddevs_data.h"
+ hfile="ediddevs.h"
+}
+NR == 1 {
+ VERSION = $0
+ gsub("\\$", "", VERSION)
+ gsub(/ $/, "", VERSION)
+
+ printf("/*\t$FreeBSD" "$\t*/\n\n") > dfile
+ printf("/*\n") > dfile
+ printf(" * THIS FILE AUTOMATICALLY GENERATED. DO NOT EDIT.\n") \
+ > dfile
+ printf(" *\n") > dfile
+ printf(" * generated from:\n") > dfile
+ printf(" *\t%s\n", VERSION) > dfile
+ printf(" */\n") > dfile
+
+ printf("/*\t$NetBSD" "$\t*/\n\n") > hfile
+ printf("/*\n") > hfile
+ printf(" * THIS FILE AUTOMATICALLY GENERATED. DO NOT EDIT.\n") \
+ > hfile
+ printf(" *\n") > hfile
+ printf(" * generated from:\n") > hfile
+ printf(" *\t%s\n", VERSION) > hfile
+ printf(" */\n") > hfile
+
+ next
+}
+
+NF > 0 && $1 == "vendor" {
+ nvendors++
+
+ vendorindex[$2] = nvendors; # record index for this name, for later.
+ vendors[nvendors, 1] = $2; # name/ID
+ i = 2; f = 3;
+
+ printf("#define\tEDID_VENDOR_%s\t\"", vendors[nvendors, 1]) > hfile
+
+ # comments
+ oparen = 0
+ while (f <= NF) {
+ if ($f == "#") {
+ printf("(") > hfile
+ oparen = 1
+ f++
+ continue
+ }
+ if (oparen) {
+ printf("%s", $f) > hfile
+ f++
+ continue
+ }
+ vendors[nvendors, i] = $f
+ printf("%s", vendors[nvendors, i]) > hfile
+ if (f < NF)
+ printf(" ") > hfile
+ i++; f++;
+ }
+ if (oparen)
+ printf(")") > hfile
+ printf("\"") > hfile
+ printf("\n") > hfile
+
+ next
+}
+
+NF > 0 && $1 == "product" {
+ nproducts++
+
+ products[nproducts, 1] = $2; # vendor name
+ products[nproducts, 2] = $3; # product id
+ products[nproducts, 3] = $4; # id
+ printf("#define\tEDID_PRODUCT_%s_%s\t%s", products[nproducts, 1],
+ products[nproducts, 2], products[nproducts, 3]) > hfile
+
+ i = 4; f = 5;
+
+ ocomment = oparen = 0
+ if (f <= NF) {
+ printf("\t\t/* ") > hfile
+ ocomment = 1;
+ }
+ while (f <= NF) {
+ if ($f == "#") {
+ printf("(") > hfile
+ oparen = 1
+ f++
+ continue
+ }
+ if (oparen) {
+ printf("%s", $f) > hfile
+ if (f < NF)
+ printf(" ") > hfile
+ f++
+ continue
+ }
+ products[nproducts, i] = $f
+ printf("%s", products[nproducts, i]) > hfile
+ if (f < NF)
+ printf(" ") > hfile
+ i++; f++;
+ }
+ if (oparen)
+ printf(")") > hfile
+ if (ocomment)
+ printf(" */") > hfile
+ printf("\n") > hfile
+
+ next
+}
+{
+ if ($0 == "")
+ blanklines++
+ if (blanklines != 2 && blanklines != 3)
+ print $0 > hfile
+ if (blanklines < 2)
+ print $0 > dfile
+}
+END {
+ # print out the match tables
+
+ printf("\n") > dfile
+ printf("const struct edid_vendor edid_vendors[] = {\n") > dfile
+
+ for (i = 1; i <= nvendors; i++) {
+ printf("\t{") > dfile
+ printf(" \"%s\", EDID_VENDOR_%s", vendors[i, 1], \
+ vendors[i, 1]) > dfile
+ printf(" },\n") > dfile
+ }
+ printf("};\n") > dfile
+ printf("const int edid_nvendors = %d;\n", nvendors) > dfile
+
+ printf("\n") > dfile
+
+ printf("const struct edid_product edid_products[] = {\n") > dfile
+ for (i = 1; i <= nproducts; i++) {
+ printf("\t{\n") > dfile
+ printf("\t \"%s\", EDID_PRODUCT_%s_%s,\n", \
+ products[i, 1], products[i, 1], products[i, 2]) > dfile
+ printf("\t \"") > dfile
+ j = 4
+ needspace = 0
+ while ((i, j) in products) {
+ if (needspace)
+ printf(" ") > dfile
+ printf("%s", products[i, j]) > dfile
+ needspace = 1
+ j++
+ }
+ printf("\",\n") > dfile
+ printf("\t},\n") > dfile
+ }
+ printf("};\n") > dfile
+ printf("const int edid_nproducts = %d;\n", nproducts) >dfile
+
+ close(dfile)
+ close(hfile)
+}
diff --git a/sys/dev/videomode/edid.c b/sys/dev/videomode/edid.c
new file mode 100644
index 0000000..5bd7ee3
--- /dev/null
+++ b/sys/dev/videomode/edid.c
@@ -0,0 +1,647 @@
+/* $NetBSD: edid.c,v 1.12 2013/02/08 16:35:10 skrll Exp $ */
+/* $FreeBSD$ */
+
+/*-
+ * Copyright (c) 2006 Itronix Inc.
+ * All rights reserved.
+ *
+ * Written by Garrett D'Amore for Itronix Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of Itronix Inc. may not be used to endorse
+ * or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/libkern.h>
+#include <dev/videomode/videomode.h>
+#include <dev/videomode/ediddevs.h>
+#include <dev/videomode/edidreg.h>
+#include <dev/videomode/edidvar.h>
+#include <dev/videomode/vesagtf.h>
+
+#define EDIDVERBOSE 1
+#define DIVIDE(x,y) (((x) + ((y) / 2)) / (y))
+
+/* These are reversed established timing order */
+static const char *_edid_modes[] = {
+ "1280x1024x75",
+ "1024x768x75",
+ "1024x768x70",
+ "1024x768x60",
+ "1024x768x87i",
+ "832x624x74", /* rounding error, 74.55 Hz aka "832x624x75" */
+ "800x600x75",
+ "800x600x72",
+ "800x600x60",
+ "800x600x56",
+ "640x480x75",
+ "640x480x72",
+ "640x480x67",
+ "640x480x60",
+ "720x400x87", /* rounding error, 87.85 Hz aka "720x400x88" */
+ "720x400x70",
+};
+
+#ifdef EDIDVERBOSE
+struct edid_vendor {
+ const char *vendor;
+ const char *name;
+};
+
+struct edid_product {
+ const char *vendor;
+ uint16_t product;
+ const char *name;
+};
+
+#include <dev/videomode/ediddevs_data.h>
+#endif /* EDIDVERBOSE */
+
+static const char *
+edid_findvendor(const char *vendor)
+{
+#ifdef EDIDVERBOSE
+ int n;
+
+ for (n = 0; n < edid_nvendors; n++)
+ if (memcmp(edid_vendors[n].vendor, vendor, 3) == 0)
+ return edid_vendors[n].name;
+#endif
+ return NULL;
+}
+
+static const char *
+edid_findproduct(const char *vendor, uint16_t product)
+{
+#ifdef EDIDVERBOSE
+ int n;
+
+ for (n = 0; n < edid_nproducts; n++)
+ if (edid_products[n].product == product &&
+ memcmp(edid_products[n].vendor, vendor, 3) == 0)
+ return edid_products[n].name;
+#endif /* EDIDVERBOSE */
+ return NULL;
+
+}
+
+static void
+edid_strchomp(char *ptr)
+{
+ for (;;) {
+ switch (*ptr) {
+ case '\0':
+ return;
+ case '\r':
+ case '\n':
+ *ptr = '\0';
+ return;
+ }
+ ptr++;
+ }
+}
+
+int
+edid_is_valid(uint8_t *d)
+{
+ int sum = 0, i;
+ uint8_t sig[8] = EDID_SIGNATURE;
+
+ if (memcmp(d, sig, 8) != 0)
+ return EINVAL;
+
+ for (i = 0; i < 128; i++)
+ sum += d[i];
+ if ((sum & 0xff) != 0)
+ return EINVAL;
+
+ return 0;
+}
+
+void
+edid_print(struct edid_info *edid)
+{
+ int i;
+
+ if (edid == NULL)
+ return;
+ printf("Vendor: [%s] %s\n", edid->edid_vendor, edid->edid_vendorname);
+ printf("Product: [%04X] %s\n", edid->edid_product,
+ edid->edid_productname);
+ printf("Serial number: %s\n", edid->edid_serial);
+ printf("Manufactured %d Week %d\n",
+ edid->edid_year, edid->edid_week);
+ printf("EDID Version %d.%d\n", edid->edid_version,
+ edid->edid_revision);
+ printf("EDID Comment: %s\n", edid->edid_comment);
+
+ printf("Video Input: %x\n", edid->edid_video_input);
+ if (edid->edid_video_input & EDID_VIDEO_INPUT_DIGITAL) {
+ printf("\tDigital");
+ if (edid->edid_video_input & EDID_VIDEO_INPUT_DFP1_COMPAT)
+ printf(" (DFP 1.x compatible)");
+ printf("\n");
+ } else {
+ printf("\tAnalog\n");
+ switch (EDID_VIDEO_INPUT_LEVEL(edid->edid_video_input)) {
+ case 0:
+ printf("\t-0.7, 0.3V\n");
+ break;
+ case 1:
+ printf("\t-0.714, 0.286V\n");
+ break;
+ case 2:
+ printf("\t-1.0, 0.4V\n");
+ break;
+ case 3:
+ printf("\t-0.7, 0.0V\n");
+ break;
+ }
+ if (edid->edid_video_input & EDID_VIDEO_INPUT_BLANK_TO_BLACK)
+ printf("\tBlank-to-black setup\n");
+ if (edid->edid_video_input & EDID_VIDEO_INPUT_SEPARATE_SYNCS)
+ printf("\tSeperate syncs\n");
+ if (edid->edid_video_input & EDID_VIDEO_INPUT_COMPOSITE_SYNC)
+ printf("\tComposite sync\n");
+ if (edid->edid_video_input & EDID_VIDEO_INPUT_SYNC_ON_GRN)
+ printf("\tSync on green\n");
+ if (edid->edid_video_input & EDID_VIDEO_INPUT_SERRATION)
+ printf("\tSerration vsync\n");
+ }
+
+ printf("Gamma: %d.%02d\n",
+ edid->edid_gamma / 100, edid->edid_gamma % 100);
+
+ printf("Max Size: %d cm x %d cm\n",
+ edid->edid_max_hsize, edid->edid_max_vsize);
+
+ printf("Features: %x\n", edid->edid_features);
+ if (edid->edid_features & EDID_FEATURES_STANDBY)
+ printf("\tDPMS standby\n");
+ if (edid->edid_features & EDID_FEATURES_SUSPEND)
+ printf("\tDPMS suspend\n");
+ if (edid->edid_features & EDID_FEATURES_ACTIVE_OFF)
+ printf("\tDPMS active-off\n");
+ switch (EDID_FEATURES_DISP_TYPE(edid->edid_features)) {
+ case EDID_FEATURES_DISP_TYPE_MONO:
+ printf("\tMonochrome\n");
+ break;
+ case EDID_FEATURES_DISP_TYPE_RGB:
+ printf("\tRGB\n");
+ break;
+ case EDID_FEATURES_DISP_TYPE_NON_RGB:
+ printf("\tMulticolor\n");
+ break;
+ case EDID_FEATURES_DISP_TYPE_UNDEFINED:
+ printf("\tUndefined monitor type\n");
+ break;
+ }
+ if (edid->edid_features & EDID_FEATURES_STD_COLOR)
+ printf("\tStandard color space\n");
+ if (edid->edid_features & EDID_FEATURES_PREFERRED_TIMING)
+ printf("\tPreferred timing\n");
+ if (edid->edid_features & EDID_FEATURES_DEFAULT_GTF)
+ printf("\tDefault GTF supported\n");
+
+ printf("Chroma Info:\n");
+ printf("\tRed X: 0.%03d\n", edid->edid_chroma.ec_redx);
+ printf("\tRed Y: 0.%03d\n", edid->edid_chroma.ec_redy);
+ printf("\tGrn X: 0.%03d\n", edid->edid_chroma.ec_greenx);
+ printf("\tGrn Y: 0.%03d\n", edid->edid_chroma.ec_greeny);
+ printf("\tBlu X: 0.%03d\n", edid->edid_chroma.ec_bluex);
+ printf("\tBlu Y: 0.%03d\n", edid->edid_chroma.ec_bluey);
+ printf("\tWht X: 0.%03d\n", edid->edid_chroma.ec_whitex);
+ printf("\tWht Y: 0.%03d\n", edid->edid_chroma.ec_whitey);
+
+ if (edid->edid_have_range) {
+ printf("Range:\n");
+ printf("\tHorizontal: %d - %d kHz\n",
+ edid->edid_range.er_min_hfreq,
+ edid->edid_range.er_max_hfreq);
+ printf("\tVertical: %d - %d Hz\n",
+ edid->edid_range.er_min_vfreq,
+ edid->edid_range.er_max_vfreq);
+ printf("\tMax Dot Clock: %d MHz\n",
+ edid->edid_range.er_max_clock);
+ if (edid->edid_range.er_have_gtf2) {
+ printf("\tGTF2 hfreq: %d\n",
+ edid->edid_range.er_gtf2_hfreq);
+ printf("\tGTF2 C: %d\n", edid->edid_range.er_gtf2_c);
+ printf("\tGTF2 M: %d\n", edid->edid_range.er_gtf2_m);
+ printf("\tGTF2 J: %d\n", edid->edid_range.er_gtf2_j);
+ printf("\tGTF2 K: %d\n", edid->edid_range.er_gtf2_k);
+ }
+ }
+ printf("Video modes:\n");
+ for (i = 0; i < edid->edid_nmodes; i++) {
+ printf("\t%dx%d @ %dHz",
+ edid->edid_modes[i].hdisplay,
+ edid->edid_modes[i].vdisplay,
+ DIVIDE(DIVIDE(edid->edid_modes[i].dot_clock * 1000,
+ edid->edid_modes[i].htotal), edid->edid_modes[i].vtotal));
+ printf(" (%d %d %d %d %d %d %d",
+ edid->edid_modes[i].dot_clock,
+ edid->edid_modes[i].hsync_start,
+ edid->edid_modes[i].hsync_end,
+ edid->edid_modes[i].htotal,
+ edid->edid_modes[i].vsync_start,
+ edid->edid_modes[i].vsync_end,
+ edid->edid_modes[i].vtotal);
+ printf(" %s%sH %s%sV)\n",
+ edid->edid_modes[i].flags & VID_PHSYNC ? "+" : "",
+ edid->edid_modes[i].flags & VID_NHSYNC ? "-" : "",
+ edid->edid_modes[i].flags & VID_PVSYNC ? "+" : "",
+ edid->edid_modes[i].flags & VID_NVSYNC ? "-" : "");
+ }
+ if (edid->edid_preferred_mode)
+ printf("Preferred mode: %dx%d @ %dHz\n",
+ edid->edid_preferred_mode->hdisplay,
+ edid->edid_preferred_mode->vdisplay,
+ DIVIDE(DIVIDE(edid->edid_preferred_mode->dot_clock * 1000,
+ edid->edid_preferred_mode->htotal),
+ edid->edid_preferred_mode->vtotal));
+
+ printf("Number of extension blocks: %d\n", edid->edid_ext_block_count);
+}
+
+static const struct videomode *
+edid_mode_lookup_list(const char *name)
+{
+ int i;
+
+ for (i = 0; i < videomode_count; i++)
+ if (strcmp(name, videomode_list[i].name) == 0)
+ return &videomode_list[i];
+ return NULL;
+}
+
+static struct videomode *
+edid_search_mode(struct edid_info *edid, const struct videomode *mode)
+{
+ int refresh, i;
+
+ refresh = DIVIDE(DIVIDE(mode->dot_clock * 1000,
+ mode->htotal), mode->vtotal);
+ for (i = 0; i < edid->edid_nmodes; i++) {
+ if (mode->hdisplay == edid->edid_modes[i].hdisplay &&
+ mode->vdisplay == edid->edid_modes[i].vdisplay &&
+ refresh == DIVIDE(DIVIDE(
+ edid->edid_modes[i].dot_clock * 1000,
+ edid->edid_modes[i].htotal), edid->edid_modes[i].vtotal)) {
+ return &edid->edid_modes[i];
+ }
+ }
+ return NULL;
+}
+
+static int
+edid_std_timing(uint8_t *data, struct videomode *vmp)
+{
+ unsigned x, y, f;
+ const struct videomode *lookup;
+ char name[80];
+
+ if ((data[0] == 1 && data[1] == 1) ||
+ (data[0] == 0 && data[1] == 0) ||
+ (data[0] == 0x20 && data[1] == 0x20))
+ return 0;
+
+ x = EDID_STD_TIMING_HRES(data);
+ switch (EDID_STD_TIMING_RATIO(data)) {
+ case EDID_STD_TIMING_RATIO_16_10:
+ y = x * 10 / 16;
+ break;
+ case EDID_STD_TIMING_RATIO_4_3:
+ y = x * 3 / 4;
+ break;
+ case EDID_STD_TIMING_RATIO_5_4:
+ y = x * 4 / 5;
+ break;
+ case EDID_STD_TIMING_RATIO_16_9:
+ default:
+ y = x * 9 / 16;
+ break;
+ }
+ f = EDID_STD_TIMING_VFREQ(data);
+
+ /* first try to lookup the mode as a DMT timing */
+ snprintf(name, sizeof(name), "%dx%dx%d", x, y, f);
+ if ((lookup = edid_mode_lookup_list(name)) != NULL) {
+ *vmp = *lookup;
+ } else {
+ /* failing that, calculate it using gtf */
+ /*
+ * Hmm. I'm not using alternate GTF timings, which
+ * could, in theory, be present.
+ */
+ vesagtf_mode(x, y, f, vmp);
+ }
+ return 1;
+}
+
+static int
+edid_det_timing(uint8_t *data, struct videomode *vmp)
+{
+ unsigned hactive, hblank, hsyncwid, hsyncoff;
+ unsigned vactive, vblank, vsyncwid, vsyncoff;
+ uint8_t flags;
+
+ flags = EDID_DET_TIMING_FLAGS(data);
+
+ /* we don't support stereo modes (for now) */
+ if (flags & (EDID_DET_TIMING_FLAG_STEREO |
+ EDID_DET_TIMING_FLAG_STEREO_MODE))
+ return 0;
+
+ vmp->dot_clock = EDID_DET_TIMING_DOT_CLOCK(data) / 1000;
+
+ hactive = EDID_DET_TIMING_HACTIVE(data);
+ hblank = EDID_DET_TIMING_HBLANK(data);
+ hsyncwid = EDID_DET_TIMING_HSYNC_WIDTH(data);
+ hsyncoff = EDID_DET_TIMING_HSYNC_OFFSET(data);
+
+ vactive = EDID_DET_TIMING_VACTIVE(data);
+ vblank = EDID_DET_TIMING_VBLANK(data);
+ vsyncwid = EDID_DET_TIMING_VSYNC_WIDTH(data);
+ vsyncoff = EDID_DET_TIMING_VSYNC_OFFSET(data);
+
+ /* Borders are contained within the blank areas. */
+
+ vmp->hdisplay = hactive;
+ vmp->htotal = hactive + hblank;
+ vmp->hsync_start = hactive + hsyncoff;
+ vmp->hsync_end = vmp->hsync_start + hsyncwid;
+
+ vmp->vdisplay = vactive;
+ vmp->vtotal = vactive + vblank;
+ vmp->vsync_start = vactive + vsyncoff;
+ vmp->vsync_end = vmp->vsync_start + vsyncwid;
+
+ vmp->flags = 0;
+
+ if (flags & EDID_DET_TIMING_FLAG_INTERLACE)
+ vmp->flags |= VID_INTERLACE;
+ if (flags & EDID_DET_TIMING_FLAG_HSYNC_POSITIVE)
+ vmp->flags |= VID_PHSYNC;
+ else
+ vmp->flags |= VID_NHSYNC;
+
+ if (flags & EDID_DET_TIMING_FLAG_VSYNC_POSITIVE)
+ vmp->flags |= VID_PVSYNC;
+ else
+ vmp->flags |= VID_NVSYNC;
+
+ return 1;
+}
+
+static void
+edid_block(struct edid_info *edid, uint8_t *data)
+{
+ int i;
+ struct videomode mode, *exist_mode;
+
+ if (EDID_BLOCK_IS_DET_TIMING(data)) {
+ if (!edid_det_timing(data, &mode))
+ return;
+ /* Does this mode already exist? */
+ exist_mode = edid_search_mode(edid, &mode);
+ if (exist_mode != NULL) {
+ *exist_mode = mode;
+ if (edid->edid_preferred_mode == NULL)
+ edid->edid_preferred_mode = exist_mode;
+ } else {
+ edid->edid_modes[edid->edid_nmodes] = mode;
+ if (edid->edid_preferred_mode == NULL)
+ edid->edid_preferred_mode =
+ &edid->edid_modes[edid->edid_nmodes];
+ edid->edid_nmodes++;
+ }
+ return;
+ }
+
+ switch (EDID_BLOCK_TYPE(data)) {
+ case EDID_DESC_BLOCK_TYPE_SERIAL:
+ memcpy(edid->edid_serial, data + EDID_DESC_ASCII_DATA_OFFSET,
+ EDID_DESC_ASCII_DATA_LEN);
+ edid->edid_serial[sizeof(edid->edid_serial) - 1] = 0;
+ break;
+
+ case EDID_DESC_BLOCK_TYPE_ASCII:
+ memcpy(edid->edid_comment, data + EDID_DESC_ASCII_DATA_OFFSET,
+ EDID_DESC_ASCII_DATA_LEN);
+ edid->edid_comment[sizeof(edid->edid_comment) - 1] = 0;
+ break;
+
+ case EDID_DESC_BLOCK_TYPE_RANGE:
+ edid->edid_have_range = 1;
+ edid->edid_range.er_min_vfreq = EDID_DESC_RANGE_MIN_VFREQ(data);
+ edid->edid_range.er_max_vfreq = EDID_DESC_RANGE_MAX_VFREQ(data);
+ edid->edid_range.er_min_hfreq = EDID_DESC_RANGE_MIN_HFREQ(data);
+ edid->edid_range.er_max_hfreq = EDID_DESC_RANGE_MAX_HFREQ(data);
+ edid->edid_range.er_max_clock = EDID_DESC_RANGE_MAX_CLOCK(data);
+ if (!EDID_DESC_RANGE_HAVE_GTF2(data))
+ break;
+ edid->edid_range.er_have_gtf2 = 1;
+ edid->edid_range.er_gtf2_hfreq =
+ EDID_DESC_RANGE_GTF2_HFREQ(data);
+ edid->edid_range.er_gtf2_c = EDID_DESC_RANGE_GTF2_C(data);
+ edid->edid_range.er_gtf2_m = EDID_DESC_RANGE_GTF2_M(data);
+ edid->edid_range.er_gtf2_j = EDID_DESC_RANGE_GTF2_J(data);
+ edid->edid_range.er_gtf2_k = EDID_DESC_RANGE_GTF2_K(data);
+ break;
+
+ case EDID_DESC_BLOCK_TYPE_NAME:
+ /* copy the product name into place */
+ memcpy(edid->edid_productname,
+ data + EDID_DESC_ASCII_DATA_OFFSET,
+ EDID_DESC_ASCII_DATA_LEN);
+ break;
+
+ case EDID_DESC_BLOCK_TYPE_STD_TIMING:
+ data += EDID_DESC_STD_TIMING_START;
+ for (i = 0; i < EDID_DESC_STD_TIMING_COUNT; i++) {
+ if (edid_std_timing(data, &mode)) {
+ /* Does this mode already exist? */
+ exist_mode = edid_search_mode(edid, &mode);
+ if (exist_mode == NULL) {
+ edid->edid_modes[edid->edid_nmodes] =
+ mode;
+ edid->edid_nmodes++;
+ }
+ }
+ data += 2;
+ }
+ break;
+
+ case EDID_DESC_BLOCK_TYPE_COLOR_POINT:
+ /* XXX: not implemented yet */
+ break;
+ }
+}
+
+/*
+ * Gets EDID version in BCD, e.g. EDID v1.3 returned as 0x0103
+ */
+int
+edid_parse(uint8_t *data, struct edid_info *edid)
+{
+ uint16_t manfid, estmodes;
+ const struct videomode *vmp;
+ int i;
+ const char *name;
+ int max_dotclock = 0;
+ int mhz;
+
+ if (edid_is_valid(data) != 0)
+ return -1;
+
+ /* get product identification */
+ manfid = EDID_VENDOR_ID(data);
+ edid->edid_vendor[0] = EDID_MANFID_0(manfid);
+ edid->edid_vendor[1] = EDID_MANFID_1(manfid);
+ edid->edid_vendor[2] = EDID_MANFID_2(manfid);
+ edid->edid_vendor[3] = 0; /* null terminate for convenience */
+
+ edid->edid_product = data[EDID_OFFSET_PRODUCT_ID] +
+ (data[EDID_OFFSET_PRODUCT_ID + 1] << 8);
+
+ name = edid_findvendor(edid->edid_vendor);
+ if (name != NULL)
+ strlcpy(edid->edid_vendorname, name,
+ sizeof(edid->edid_vendorname));
+ else
+ edid->edid_vendorname[0] = '\0';
+
+ name = edid_findproduct(edid->edid_vendor, edid->edid_product);
+ if (name != NULL)
+ strlcpy(edid->edid_productname, name,
+ sizeof(edid->edid_productname));
+ else
+ edid->edid_productname[0] = '\0';
+
+ snprintf(edid->edid_serial, sizeof(edid->edid_serial), "%08x",
+ EDID_SERIAL_NUMBER(data));
+
+ edid->edid_week = EDID_WEEK(data);
+ edid->edid_year = EDID_YEAR(data);
+
+ /* get edid revision */
+ edid->edid_version = EDID_VERSION(data);
+ edid->edid_revision = EDID_REVISION(data);
+
+ edid->edid_video_input = EDID_VIDEO_INPUT(data);
+ edid->edid_max_hsize = EDID_MAX_HSIZE(data);
+ edid->edid_max_vsize = EDID_MAX_VSIZE(data);
+
+ edid->edid_gamma = EDID_GAMMA(data);
+ edid->edid_features = EDID_FEATURES(data);
+
+ edid->edid_chroma.ec_redx = EDID_CHROMA_REDX(data);
+ edid->edid_chroma.ec_redy = EDID_CHROMA_REDX(data);
+ edid->edid_chroma.ec_greenx = EDID_CHROMA_GREENX(data);
+ edid->edid_chroma.ec_greeny = EDID_CHROMA_GREENY(data);
+ edid->edid_chroma.ec_bluex = EDID_CHROMA_BLUEX(data);
+ edid->edid_chroma.ec_bluey = EDID_CHROMA_BLUEY(data);
+ edid->edid_chroma.ec_whitex = EDID_CHROMA_WHITEX(data);
+ edid->edid_chroma.ec_whitey = EDID_CHROMA_WHITEY(data);
+
+ edid->edid_ext_block_count = EDID_EXT_BLOCK_COUNT(data);
+
+ /* lookup established modes */
+ edid->edid_nmodes = 0;
+ edid->edid_preferred_mode = NULL;
+ estmodes = EDID_EST_TIMING(data);
+ /* Iterate in esztablished timing order */
+ for (i = 15; i >= 0; i--) {
+ if (estmodes & (1 << i)) {
+ vmp = edid_mode_lookup_list(_edid_modes[i]);
+ if (vmp != NULL) {
+ edid->edid_modes[edid->edid_nmodes] = *vmp;
+ edid->edid_nmodes++;
+ }
+#ifdef DIAGNOSTIC
+ else
+ printf("no data for est. mode %s\n",
+ _edid_modes[i]);
+#endif
+ }
+ }
+
+ /* do standard timing section */
+ for (i = 0; i < EDID_STD_TIMING_COUNT; i++) {
+ struct videomode mode, *exist_mode;
+ if (edid_std_timing(data + EDID_OFFSET_STD_TIMING + i * 2,
+ &mode)) {
+ /* Does this mode already exist? */
+ exist_mode = edid_search_mode(edid, &mode);
+ if (exist_mode == NULL) {
+ edid->edid_modes[edid->edid_nmodes] = mode;
+ edid->edid_nmodes++;
+ }
+ }
+ }
+
+ /* do detailed timings and descriptors */
+ for (i = 0; i < EDID_BLOCK_COUNT; i++) {
+ edid_block(edid, data + EDID_OFFSET_DESC_BLOCK +
+ i * EDID_BLOCK_SIZE);
+ }
+
+ edid_strchomp(edid->edid_vendorname);
+ edid_strchomp(edid->edid_productname);
+ edid_strchomp(edid->edid_serial);
+ edid_strchomp(edid->edid_comment);
+
+ /*
+ * XXX
+ * some monitors lie about their maximum supported dot clock
+ * by claiming to support modes which need a higher dot clock
+ * than the stated maximum.
+ * For sanity's sake we bump it to the highest dot clock we find
+ * in the list of supported modes
+ */
+ for (i = 0; i < edid->edid_nmodes; i++)
+ if (edid->edid_modes[i].dot_clock > max_dotclock)
+ max_dotclock = edid->edid_modes[i].dot_clock;
+ if (bootverbose) {
+ printf("edid: max_dotclock according to supported modes: %d\n",
+ max_dotclock);
+ }
+ mhz = (max_dotclock + 999) / 1000;
+
+ if (edid->edid_have_range) {
+ if (mhz > edid->edid_range.er_max_clock)
+ edid->edid_range.er_max_clock = mhz;
+ } else
+ edid->edid_range.er_max_clock = mhz;
+
+ return 0;
+}
+
diff --git a/sys/dev/videomode/ediddevs b/sys/dev/videomode/ediddevs
new file mode 100644
index 0000000..0029704
--- /dev/null
+++ b/sys/dev/videomode/ediddevs
@@ -0,0 +1,106 @@
+$NetBSD: ediddevs,v 1.2 2007/03/07 18:47:13 macallan Exp $
+$FreeBSD$
+
+/*-
+ * Copyright (c) 2006 Itronix Inc.
+ * All rights reserved.
+ *
+ * Written by Garrett D'Amore for Itronix Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of Itronix Inc. may not be used to endorse
+ * or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Use "make -f Makefile.ediddevs" to regenerate ediddevs.h and ediddevs_data.h
+ */
+
+/*
+ * List of known EDID monitor vendors
+ *
+ * These are standard PNP ids, managed (apparently) by Microsoft.
+ * It is likely that this list is grossly incomplete.
+ */
+vendor AAC AcerView
+vendor AOC AOC
+vendor APP Apple Computer
+vendor AST AST Research
+vendor CPL Compal
+vendor CPQ Compaq
+vendor CTX CTX
+vendor DEC DEC
+vendor DEL Dell
+vendor DPC Delta
+vendor DWE Daewoo
+vendor EIZ EIZO
+vendor ELS ELSA
+vendor EPI Envision
+vendor FCM Funai
+vendor FUJ Fujitsu
+vendor GSM LG Electronics
+vendor GWY Gateway 2000
+vendor HEI Hyundai
+vendor HIT Hitachi
+vendor HSL Hansol
+vendor HTC Hitachi/Nissei
+vendor HWP HP
+vendor IBM IBM
+vendor ICL Fujitsu ICL
+vendor IVM Iiyama
+vendor KDS Korea Data Systems
+vendor MEI Panasonic
+vendor MEL Mitsubishi Electronics
+vendor NAN Nanao
+vendor NEC NEC
+vendor NOK Nokia Data
+vendor PHL Philips
+vendor REL Relisys
+vendor SAM Samsung
+vendor SGI SGI
+vendor SNY Sony
+vendor SRC Shamrock
+vendor SUN Sun Microsystems
+vendor TAT Tatung
+vendor TOS Toshiba
+vendor TSB Toshiba
+vendor VSC ViewSonic
+vendor ZCM Zenith
+
+/*
+ * List of known products, grouped and sorted by vendor.
+ *
+ * EDID version 1.3 requires that monitors expose the monitor name with
+ * the ASCII descriptor type 0xFC, so for monitors using that block, this
+ * information is redundant, and there is not point in listing them here,
+ * unless it is desired to have a symbolic macro to detect the monitor in
+ * special handling code or somesuch.
+ */
+
+/* Dell - this exists for now as a sample. I don't have one of these. */
+product DEL ULTRASCAN14XE_REVA 0x139A Ultrascan 14XE
+product DEL ULTRASCAN14XE_REVB 0x139B Ultrascan 14XE
+
+/* ViewSonic */
+product VSC 17GS 0x0c00 17GS
+product VSC 17PS 0x0c0f 17PS
diff --git a/sys/dev/videomode/ediddevs.h b/sys/dev/videomode/ediddevs.h
new file mode 100644
index 0000000..b383c8a
--- /dev/null
+++ b/sys/dev/videomode/ediddevs.h
@@ -0,0 +1,91 @@
+/* $FreeBSD$ */
+
+/*
+ * THIS FILE AUTOMATICALLY GENERATED. DO NOT EDIT.
+ *
+ * generated from:
+ * NetBSD: ediddevs,v 1.1 2006/05/11 01:49:53 gdamore Exp
+ */
+
+/*-
+ * Copyright (c) 2006 Itronix Inc.
+ * All rights reserved.
+ *
+ * Written by Garrett D'Amore for Itronix Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of Itronix Inc. may not be used to endorse
+ * or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#define EDID_VENDOR_AAC "AcerView"
+#define EDID_VENDOR_AOC "AOC"
+#define EDID_VENDOR_APP "Apple Computer"
+#define EDID_VENDOR_AST "AST Research"
+#define EDID_VENDOR_CPL "Compal"
+#define EDID_VENDOR_CPQ "Compaq"
+#define EDID_VENDOR_CTX "CTX"
+#define EDID_VENDOR_DEC "DEC"
+#define EDID_VENDOR_DEL "Dell"
+#define EDID_VENDOR_DPC "Delta"
+#define EDID_VENDOR_DWE "Daewoo"
+#define EDID_VENDOR_EIZ "EIZO"
+#define EDID_VENDOR_ELS "ELSA"
+#define EDID_VENDOR_EPI "Envision"
+#define EDID_VENDOR_FCM "Funai"
+#define EDID_VENDOR_FUJ "Fujitsu"
+#define EDID_VENDOR_GSM "LG Electronics"
+#define EDID_VENDOR_GWY "Gateway 2000"
+#define EDID_VENDOR_HEI "Hyundai"
+#define EDID_VENDOR_HIT "Hitachi"
+#define EDID_VENDOR_HSL "Hansol"
+#define EDID_VENDOR_HTC "Hitachi/Nissei"
+#define EDID_VENDOR_HWP "HP"
+#define EDID_VENDOR_IBM "IBM"
+#define EDID_VENDOR_ICL "Fujitsu ICL"
+#define EDID_VENDOR_IVM "Iiyama"
+#define EDID_VENDOR_KDS "Korea Data Systems"
+#define EDID_VENDOR_MEI "Panasonic"
+#define EDID_VENDOR_MEL "Mitsubishi Electronics"
+#define EDID_VENDOR_NAN "Nanao"
+#define EDID_VENDOR_NEC "NEC"
+#define EDID_VENDOR_NOK "Nokia Data"
+#define EDID_VENDOR_PHL "Philips"
+#define EDID_VENDOR_REL "Relisys"
+#define EDID_VENDOR_SAM "Samsung"
+#define EDID_VENDOR_SGI "SGI"
+#define EDID_VENDOR_SNY "Sony"
+#define EDID_VENDOR_SRC "Shamrock"
+#define EDID_VENDOR_SUN "Sun Microsystems"
+#define EDID_VENDOR_TAT "Tatung"
+#define EDID_VENDOR_TOS "Toshiba"
+#define EDID_VENDOR_TSB "Toshiba"
+#define EDID_VENDOR_VSC "ViewSonic"
+#define EDID_VENDOR_ZCM "Zenith"
+
+/* Dell - this exists for now as a sample. I don't have one of these. */
+#define EDID_PRODUCT_DEL_ULTRASCAN14XE_REVA 0x139A /* Ultrascan 14XE */
+#define EDID_PRODUCT_DEL_ULTRASCAN14XE_REVB 0x139B /* Ultrascan 14XE */
+
+/* ViewSonic */
+#define EDID_PRODUCT_VSC_17GS 0x0c00 /* 17GS */
+#define EDID_PRODUCT_VSC_17PS 0x0c0f /* 17PS */
diff --git a/sys/dev/videomode/ediddevs_data.h b/sys/dev/videomode/ediddevs_data.h
new file mode 100644
index 0000000..e0ad80a
--- /dev/null
+++ b/sys/dev/videomode/ediddevs_data.h
@@ -0,0 +1,107 @@
+/* $FreeBSD$ */
+
+/*
+ * THIS FILE AUTOMATICALLY GENERATED. DO NOT EDIT.
+ *
+ * generated from:
+ * NetBSD: ediddevs,v 1.1 2006/05/11 01:49:53 gdamore Exp
+ */
+
+/*-
+ * Copyright (c) 2006 Itronix Inc.
+ * All rights reserved.
+ *
+ * Written by Garrett D'Amore for Itronix Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of Itronix Inc. may not be used to endorse
+ * or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+const struct edid_vendor edid_vendors[] = {
+ { "AAC", EDID_VENDOR_AAC },
+ { "AOC", EDID_VENDOR_AOC },
+ { "APP", EDID_VENDOR_APP },
+ { "AST", EDID_VENDOR_AST },
+ { "CPL", EDID_VENDOR_CPL },
+ { "CPQ", EDID_VENDOR_CPQ },
+ { "CTX", EDID_VENDOR_CTX },
+ { "DEC", EDID_VENDOR_DEC },
+ { "DEL", EDID_VENDOR_DEL },
+ { "DPC", EDID_VENDOR_DPC },
+ { "DWE", EDID_VENDOR_DWE },
+ { "EIZ", EDID_VENDOR_EIZ },
+ { "ELS", EDID_VENDOR_ELS },
+ { "EPI", EDID_VENDOR_EPI },
+ { "FCM", EDID_VENDOR_FCM },
+ { "FUJ", EDID_VENDOR_FUJ },
+ { "GSM", EDID_VENDOR_GSM },
+ { "GWY", EDID_VENDOR_GWY },
+ { "HEI", EDID_VENDOR_HEI },
+ { "HIT", EDID_VENDOR_HIT },
+ { "HSL", EDID_VENDOR_HSL },
+ { "HTC", EDID_VENDOR_HTC },
+ { "HWP", EDID_VENDOR_HWP },
+ { "IBM", EDID_VENDOR_IBM },
+ { "ICL", EDID_VENDOR_ICL },
+ { "IVM", EDID_VENDOR_IVM },
+ { "KDS", EDID_VENDOR_KDS },
+ { "MEI", EDID_VENDOR_MEI },
+ { "MEL", EDID_VENDOR_MEL },
+ { "NAN", EDID_VENDOR_NAN },
+ { "NEC", EDID_VENDOR_NEC },
+ { "NOK", EDID_VENDOR_NOK },
+ { "PHL", EDID_VENDOR_PHL },
+ { "REL", EDID_VENDOR_REL },
+ { "SAM", EDID_VENDOR_SAM },
+ { "SGI", EDID_VENDOR_SGI },
+ { "SNY", EDID_VENDOR_SNY },
+ { "SRC", EDID_VENDOR_SRC },
+ { "SUN", EDID_VENDOR_SUN },
+ { "TAT", EDID_VENDOR_TAT },
+ { "TOS", EDID_VENDOR_TOS },
+ { "TSB", EDID_VENDOR_TSB },
+ { "VSC", EDID_VENDOR_VSC },
+ { "ZCM", EDID_VENDOR_ZCM },
+};
+const int edid_nvendors = 44;
+
+const struct edid_product edid_products[] = {
+ {
+ "DEL", EDID_PRODUCT_DEL_ULTRASCAN14XE_REVA,
+ "Ultrascan 14XE",
+ },
+ {
+ "DEL", EDID_PRODUCT_DEL_ULTRASCAN14XE_REVB,
+ "Ultrascan 14XE",
+ },
+ {
+ "VSC", EDID_PRODUCT_VSC_17GS,
+ "17GS",
+ },
+ {
+ "VSC", EDID_PRODUCT_VSC_17PS,
+ "17PS",
+ },
+};
+const int edid_nproducts = 4;
diff --git a/sys/dev/videomode/edidreg.h b/sys/dev/videomode/edidreg.h
new file mode 100644
index 0000000..29b0466
--- /dev/null
+++ b/sys/dev/videomode/edidreg.h
@@ -0,0 +1,256 @@
+/* $NetBSD: edidreg.h,v 1.3 2011/03/30 18:49:56 jdc Exp $ */
+/* $FreeBSD$ */
+
+/*-
+ * Copyright (c) 2006 Itronix Inc.
+ * All rights reserved.
+ *
+ * Written by Garrett D'Amore for Itronix Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of Itronix Inc. may not be used to endorse
+ * or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _DEV_VIDEOMODE_EDIDREG_H
+#define _DEV_VIDEOMODE_EDIDREG_H
+
+#define EDID_OFFSET_SIGNATURE 0x00
+#define EDID_OFFSET_MANUFACTURER_ID 0x08
+#define EDID_OFFSET_PRODUCT_ID 0x0a
+#define EDID_OFFSET_SERIAL_NUMBER 0x0c
+#define EDID_OFFSET_MANUFACTURE_WEEK 0x10
+#define EDID_OFFSET_MANUFACTURE_YEAR 0x11
+#define EDID_OFFSET_VERSION 0x12
+#define EDID_OFFSET_REVISION 0x13
+#define EDID_OFFSET_VIDEO_INPUT 0x14
+#define EDID_OFFSET_MAX_HSIZE 0x15 /* in cm */
+#define EDID_OFFSET_MAX_VSIZE 0x16
+#define EDID_OFFSET_GAMMA 0x17
+#define EDID_OFFSET_FEATURE 0x18
+#define EDID_OFFSET_CHROMA 0x19
+#define EDID_OFFSET_EST_TIMING_1 0x23
+#define EDID_OFFSET_EST_TIMING_2 0x24
+#define EDID_OFFSET_MFG_TIMING 0x25
+#define EDID_OFFSET_STD_TIMING 0x26
+#define EDID_OFFSET_DESC_BLOCK 0x36
+
+#define EDID_SIGNATURE { 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0 }
+
+/* assume x is 16-bit value */
+#define EDID_VENDOR_ID(ptr) ((((ptr)[8]) << 8) + ptr[9])
+#define EDID_MANFID_0(x) ((((x) >> 10) & 0x1f) + '@')
+#define EDID_MANFID_1(x) ((((x) >> 5) & 0x1f) + '@')
+#define EDID_MANFID_2(x) ((((x) >> 0) & 0x1f) + '@')
+
+/* relative to edid block */
+#define EDID_PRODUCT_ID(ptr) (((ptr)[10]) | (((ptr)[11]) << 8))
+#define EDID_SERIAL_NUMBER(ptr) (((ptr)[12] << 24) + \
+ ((ptr)[13] << 16) + \
+ ((ptr)[14] << 8) + \
+ (ptr)[15])
+
+/* relative to edid block */
+#define EDID_WEEK(ptr) ((ptr)[16])
+#define EDID_YEAR(ptr) (((ptr)[17]) + 1990)
+
+#define EDID_VERSION(ptr) ((ptr)[18])
+#define EDID_REVISION(ptr) ((ptr)[19])
+
+#define EDID_VIDEO_INPUT(ptr) ((ptr)[20])
+#define EDID_VIDEO_INPUT_DIGITAL 0x80
+/* if INPUT_BIT_DIGITAL set */
+#define EDID_VIDEO_INPUT_DFP1_COMPAT 0x01
+/* if INPUT_BIT_DIGITAL not set */
+#define EDID_VIDEO_INPUT_BLANK_TO_BLACK 0x10
+#define EDID_VIDEO_INPUT_SEPARATE_SYNCS 0x08
+#define EDID_VIDEO_INPUT_COMPOSITE_SYNC 0x04
+#define EDID_VIDEO_INPUT_SYNC_ON_GRN 0x02
+#define EDID_VIDEO_INPUT_SERRATION 0x01
+#define EDID_VIDEO_INPUT_LEVEL(x) (((x) & 0x60) >> 5)
+/* meanings of level bits are as follows, I don't know names */
+/* 0 = 0.7,0.3, 1 = 0.714,0.286, 2 = 1.0,0.4, 3 = 0.7,0.0 */
+
+/* relative to edid block */
+#define EDID_MAX_HSIZE(ptr) ((ptr)[21]) /* cm */
+#define EDID_MAX_VSIZE(ptr) ((ptr)[22]) /* cm */
+/* gamma is scaled by 100 (avoid fp), e.g. 213 == 2.13 */
+#define _GAMMA(x) ((x) == 0xff ? 100 : ((x) + 100))
+#define EDID_GAMMA(ptr) _GAMMA(ptr[23])
+
+#define EDID_FEATURES(ptr) ((ptr)[24])
+#define EDID_FEATURES_STANDBY 0x80
+#define EDID_FEATURES_SUSPEND 0x40
+#define EDID_FEATURES_ACTIVE_OFF 0x20
+#define EDID_FEATURES_DISP_TYPE(x) (((x) & 0x18) >> 3)
+#define EDID_FEATURES_DISP_TYPE_MONO 0
+#define EDID_FEATURES_DISP_TYPE_RGB 1
+#define EDID_FEATURES_DISP_TYPE_NON_RGB 2
+#define EDID_FEATURES_DISP_TYPE_UNDEFINED 3
+#define EDID_FEATURES_STD_COLOR 0x04
+#define EDID_FEATURES_PREFERRED_TIMING 0x02
+#define EDID_FEATURES_DEFAULT_GTF 0x01
+
+/* chroma values 0.0 - 0.999 scaled as 0-999 */
+#define _CHLO(byt, shft) (((byt) >> (shft)) & 0x3)
+#define _CHHI(byt) ((byt) << 2)
+#define _CHHILO(ptr, l, s, h) (_CHLO((ptr)[l], s) | _CHHI((ptr)[h]))
+#define _CHROMA(ptr, l, s, h) ((_CHHILO(ptr, l, s, h) * 1000) / 1024)
+
+#define EDID_CHROMA_REDX(ptr) (_CHROMA(ptr, 25, 6, 27))
+#define EDID_CHROMA_REDY(ptr) (_CHROMA(ptr, 25, 4, 28))
+#define EDID_CHROMA_GREENX(ptr) (_CHROMA(ptr, 25, 2, 29))
+#define EDID_CHROMA_GREENY(ptr) (_CHROMA(ptr, 25, 0, 30))
+#define EDID_CHROMA_BLUEX(ptr) (_CHROMA(ptr, 26, 6, 31))
+#define EDID_CHROMA_BLUEY(ptr) (_CHROMA(ptr, 26, 4, 32))
+#define EDID_CHROMA_WHITEX(ptr) (_CHROMA(ptr, 26, 2, 33))
+#define EDID_CHROMA_WHITEY(ptr) (_CHROMA(ptr, 26, 0, 34))
+
+/* relative to edid block */
+#define EDID_EST_TIMING(ptr) (((ptr)[35] << 8) | (ptr)[36])
+#define EDID_EST_TIMING_720_400_70 0x8000 /* 720x400 @ 70Hz */
+#define EDID_EST_TIMING_720_400_88 0x4000 /* 720x400 @ 88Hz */
+#define EDID_EST_TIMING_640_480_60 0x2000 /* 640x480 @ 60Hz */
+#define EDID_EST_TIMING_640_480_67 0x1000 /* 640x480 @ 67Hz */
+#define EDID_EST_TIMING_640_480_72 0x0800 /* 640x480 @ 72Hz */
+#define EDID_EST_TIMING_640_480_75 0x0400 /* 640x480 @ 75Hz */
+#define EDID_EST_TIMING_800_600_56 0x0200 /* 800x600 @ 56Hz */
+#define EDID_EST_TIMING_800_600_60 0x0100 /* 800x600 @ 60Hz */
+#define EDID_EST_TIMING_800_600_72 0x0080 /* 800x600 @ 72Hz */
+#define EDID_EST_TIMING_800_600_75 0x0040 /* 800x600 @ 75Hz */
+#define EDID_EST_TIMING_832_624_75 0x0020 /* 832x624 @ 75Hz */
+#define EDID_EST_TIMING_1024_768_87I 0x0010 /* 1024x768i @ 87Hz */
+#define EDID_EST_TIMING_1024_768_60 0x0008 /* 1024x768 @ 60Hz */
+#define EDID_EST_TIMING_1024_768_70 0x0004 /* 1024x768 @ 70Hz */
+#define EDID_EST_TIMING_1024_768_75 0x0002 /* 1024x768 @ 75Hz */
+#define EDID_EST_TIMING_1280_1024_75 0x0001 /* 1280x1024 @ 75Hz */
+
+/*
+ * N.B.: ptr is relative to standard timing block - used for standard timing
+ * descriptors as well as standard timings section of edid!
+ */
+#define EDID_STD_TIMING_HRES(ptr) ((((ptr)[0]) * 8) + 248)
+#define EDID_STD_TIMING_VFREQ(ptr) ((((ptr)[1]) & 0x3f) + 60)
+#define EDID_STD_TIMING_RATIO(ptr) ((ptr)[1] & 0xc0)
+#define EDID_STD_TIMING_RATIO_16_10 0x00
+#define EDID_STD_TIMING_RATIO_4_3 0x40
+#define EDID_STD_TIMING_RATIO_5_4 0x80
+#define EDID_STD_TIMING_RATIO_16_9 0xc0
+
+#define EDID_STD_TIMING_SIZE 16
+#define EDID_STD_TIMING_COUNT 8
+
+/*
+ * N.B.: ptr is relative to descriptor block start
+ */
+#define EDID_BLOCK_SIZE 18
+#define EDID_BLOCK_COUNT 4
+
+/* detailed timing block.... what a mess */
+#define EDID_BLOCK_IS_DET_TIMING(ptr) ((ptr)[0] | (ptr)[1])
+
+#define EDID_DET_TIMING_DOT_CLOCK(ptr) (((ptr)[0] | ((ptr)[1] << 8)) * 10000)
+#define _HACT_LO(ptr) ((ptr)[2])
+#define _HBLK_LO(ptr) ((ptr)[3])
+#define _HACT_HI(ptr) (((ptr)[4] & 0xf0) << 4)
+#define _HBLK_HI(ptr) (((ptr)[4] & 0x0f) << 8)
+#define EDID_DET_TIMING_HACTIVE(ptr) (_HACT_LO(ptr) | _HACT_HI(ptr))
+#define EDID_DET_TIMING_HBLANK(ptr) (_HBLK_LO(ptr) | _HBLK_HI(ptr))
+#define _VACT_LO(ptr) ((ptr)[5])
+#define _VBLK_LO(ptr) ((ptr)[6])
+#define _VACT_HI(ptr) (((ptr)[7] & 0xf0) << 4)
+#define _VBLK_HI(ptr) (((ptr)[7] & 0x0f) << 8)
+#define EDID_DET_TIMING_VACTIVE(ptr) (_VACT_LO(ptr) | _VACT_HI(ptr))
+#define EDID_DET_TIMING_VBLANK(ptr) (_VBLK_LO(ptr) | _VBLK_HI(ptr))
+#define _HOFF_LO(ptr) ((ptr)[8])
+#define _HWID_LO(ptr) ((ptr)[9])
+#define _VOFF_LO(ptr) ((ptr)[10] >> 4)
+#define _VWID_LO(ptr) ((ptr)[10] & 0xf)
+#define _HOFF_HI(ptr) (((ptr)[11] & 0xc0) << 2)
+#define _HWID_HI(ptr) (((ptr)[11] & 0x30) << 4)
+#define _VOFF_HI(ptr) (((ptr)[11] & 0x0c) << 2)
+#define _VWID_HI(ptr) (((ptr)[11] & 0x03) << 4)
+#define EDID_DET_TIMING_HSYNC_OFFSET(ptr) (_HOFF_LO(ptr) | _HOFF_HI(ptr))
+#define EDID_DET_TIMING_HSYNC_WIDTH(ptr) (_HWID_LO(ptr) | _HWID_HI(ptr))
+#define EDID_DET_TIMING_VSYNC_OFFSET(ptr) (_VOFF_LO(ptr) | _VOFF_HI(ptr))
+#define EDID_DET_TIMING_VSYNC_WIDTH(ptr) (_VWID_LO(ptr) | _VWID_HI(ptr))
+#define _HSZ_LO(ptr) ((ptr)[12])
+#define _VSZ_LO(ptr) ((ptr)[13])
+#define _HSZ_HI(ptr) (((ptr)[14] & 0xf0) << 4)
+#define _VSZ_HI(ptr) (((ptr)[14] & 0x0f) << 8)
+#define EDID_DET_TIMING_HSIZE(ptr) (_HSZ_LO(ptr) | _HSZ_HI(ptr))
+#define EDID_DET_TIMING_VSIZE(ptr) (_VSZ_LO(ptr) | _VSZ_HI(ptr))
+#define EDID_DET_TIMING_HBORDER(ptr) ((ptr)[15])
+#define EDID_DET_TIMING_VBORDER(ptr) ((ptr)[16])
+#define EDID_DET_TIMING_FLAGS(ptr) ((ptr)[17])
+#define EDID_DET_TIMING_FLAG_INTERLACE 0x80
+#define EDID_DET_TIMING_FLAG_STEREO 0x60 /* stereo or not */
+#define EDID_DET_TIMING_FLAG_SYNC_SEPARATE 0x18
+#define EDID_DET_TIMING_FLAG_VSYNC_POSITIVE 0x04
+#define EDID_DET_TIMING_FLAG_HSYNC_POSITIVE 0x02
+#define EDID_DET_TIMING_FLAG_STEREO_MODE 0x01 /* stereo mode */
+
+
+/* N.B.: these tests assume that we already checked for detailed timing! */
+#define EDID_BLOCK_TYPE(ptr) ((ptr)[3])
+
+#define EDID_DESC_BLOCK_SIZE 18
+#define EDID_DESC_BLOCK_TYPE_SERIAL 0xFF
+#define EDID_DESC_BLOCK_TYPE_ASCII 0xFE
+#define EDID_DESC_BLOCK_TYPE_RANGE 0xFD
+#define EDID_DESC_BLOCK_TYPE_NAME 0xFC
+#define EDID_DESC_BLOCK_TYPE_COLOR_POINT 0xFB
+#define EDID_DESC_BLOCK_TYPE_STD_TIMING 0xFA
+
+/* used for descriptors 0xFF, 0xFE, and 0xFC */
+#define EDID_DESC_ASCII_DATA_OFFSET 5
+#define EDID_DESC_ASCII_DATA_LEN 13
+
+#define EDID_DESC_RANGE_MIN_VFREQ(ptr) ((ptr)[5]) /* Hz */
+#define EDID_DESC_RANGE_MAX_VFREQ(ptr) ((ptr)[6]) /* Hz */
+#define EDID_DESC_RANGE_MIN_HFREQ(ptr) ((ptr)[7]) /* kHz */
+#define EDID_DESC_RANGE_MAX_HFREQ(ptr) ((ptr)[8]) /* kHz */
+#define EDID_DESC_RANGE_MAX_CLOCK(ptr) (((ptr)[9]) * 10) /* MHz */
+#define EDID_DESC_RANGE_HAVE_GTF2(ptr) (((ptr)[10]) == 0x02)
+#define EDID_DESC_RANGE_GTF2_HFREQ(ptr) (((ptr)[12]) * 2)
+#define EDID_DESC_RANGE_GTF2_C(ptr) (((ptr)[13]) / 2)
+#define EDID_DESC_RANGE_GTF2_M(ptr) ((ptr)[14] + ((ptr)[15] << 8))
+#define EDID_DESC_RANGE_GTF2_K(ptr) ((ptr)[16])
+#define EDID_DESC_RANGE_GTF2_J(ptr) ((ptr)[17] / 2)
+
+#define EDID_DESC_COLOR_WHITEX(ptr)
+#define EDID_DESC_COLOR_WHITE_INDEX_1(ptr) ((ptr)[5])
+#define EDID_DESC_COLOR_WHITEX_1(ptr) _CHROMA(ptr, 6, 2, 7)
+#define EDID_DESC_COLOR_WHITEY_1(ptr) _CHROMA(ptr, 6, 0, 8)
+#define EDID_DESC_COLOR_GAMMA_1(ptr) _GAMMA(ptr[9])
+#define EDID_DESC_COLOR_WHITE_INDEX_2(ptr) ((ptr)[10])
+#define EDID_DESC_COLOR_WHITEX_2(ptr) _CHROMA(ptr, 11, 2, 12)
+#define EDID_DESC_COLOR_WHITEY_2(ptr) _CHROMA(ptr, 11, 0, 13)
+#define EDID_DESC_COLOR_GAMMA_2(ptr) _GAMMA(ptr[14])
+
+#define EDID_DESC_STD_TIMING_START 5
+#define EDID_DESC_STD_TIMING_COUNT 6
+
+#define EDID_EXT_BLOCK_COUNT(ptr) ((ptr)[126])
+
+#endif /* _DEV_VIDEOMODE_EDIDREG_H */
diff --git a/sys/dev/videomode/edidvar.h b/sys/dev/videomode/edidvar.h
new file mode 100644
index 0000000..da1211b
--- /dev/null
+++ b/sys/dev/videomode/edidvar.h
@@ -0,0 +1,96 @@
+/* $NetBSD: edidvar.h,v 1.2 2006/05/11 19:05:41 gdamore Exp $ */
+/* $FreeBSD$ */
+
+/*-
+ * Copyright (c) 2006 Itronix Inc.
+ * All rights reserved.
+ *
+ * Written by Garrett D'Amore for Itronix Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of Itronix Inc. may not be used to endorse
+ * or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _DEV_VIDEOMODE_EDIDVAR_H
+#define _DEV_VIDEOMODE_EDIDVAR_H
+
+struct edid_chroma {
+ uint16_t ec_redx;
+ uint16_t ec_redy;
+ uint16_t ec_greenx;
+ uint16_t ec_greeny;
+ uint16_t ec_bluex;
+ uint16_t ec_bluey;
+ uint16_t ec_whitex;
+ uint16_t ec_whitey;
+};
+
+struct edid_range {
+ uint16_t er_min_vfreq; /* Hz */
+ uint16_t er_max_vfreq; /* Hz */
+ uint16_t er_min_hfreq; /* kHz */
+ uint16_t er_max_hfreq; /* kHz */
+ uint16_t er_max_clock; /* MHz */
+ int er_have_gtf2;
+ uint16_t er_gtf2_hfreq;
+ uint16_t er_gtf2_c;
+ uint16_t er_gtf2_m;
+ uint16_t er_gtf2_k;
+ uint16_t er_gtf2_j;
+};
+
+struct edid_info {
+ uint8_t edid_vendor[4];
+ char edid_vendorname[16];
+ char edid_productname[16];
+ char edid_comment[16];
+ char edid_serial[16];
+ uint16_t edid_product;
+ uint8_t edid_version;
+ uint8_t edid_revision;
+ int edid_year;
+ int edid_week;
+ uint8_t edid_video_input; /* see edidregs.h */
+ uint8_t edid_max_hsize; /* in cm */
+ uint8_t edid_max_vsize; /* in cm */
+ uint8_t edid_gamma;
+ uint8_t edid_features;
+ uint8_t edid_ext_block_count;
+
+ int edid_have_range;
+ struct edid_range edid_range;
+
+ struct edid_chroma edid_chroma;
+
+ /* parsed modes */
+ struct videomode *edid_preferred_mode;
+ int edid_nmodes;
+ struct videomode edid_modes[64];
+};
+
+int edid_is_valid(uint8_t *);
+int edid_parse(uint8_t *, struct edid_info *);
+void edid_print(struct edid_info *);
+
+#endif /* _DEV_VIDEOMODE_EDIDVAR_H */
diff --git a/sys/dev/videomode/modelines b/sys/dev/videomode/modelines
new file mode 100644
index 0000000..147f6a5
--- /dev/null
+++ b/sys/dev/videomode/modelines
@@ -0,0 +1,181 @@
+// $NetBSD: modelines,v 1.8 2011/03/21 19:28:37 jdc Exp $
+// $FreeBSD$
+//
+// This file was imported from XFree86, and is made of the contents of both
+// the vesamodes and extramodes files. As a result these should correspond
+// to the same default modes compiled into XFree86.
+//
+// Default modes distilled from
+// "VESA and Industry Standards and Guide for Computer Display Monitor
+// Timing", version 1.0, revision 0.8, adopted September 17, 1998.
+//
+// $XFree86: xc/programs/Xserver/hw/xfree86/etc/vesamodes,v 1.3 1999/11/16 03:28:03 tsi Exp $
+// $XFree86: xc/programs/Xserver/hw/xfree86/etc/extramodes,v 1.5 2002/06/05 19:43:05 dawes Exp $
+//
+//
+// Use "make -f Makefile.videomode" to regenerate videomode.c
+//
+
+# 640x350 @ 85Hz (VESA) hsync: 37.9kHz
+ModeLine "640x350" 31.5 640 672 736 832 350 382 385 445 +hsync -vsync
+
+# 640x400 @ 85Hz (VESA) hsync: 37.9kHz
+ModeLine "640x400" 31.5 640 672 736 832 400 401 404 445 -hsync +vsync
+
+# 720x400 @ 70Hz (EDID established timing) hsync: 31.47kHz
+ModeLine "720x400" 28.32 720 738 846 900 400 412 414 449 -hsync +vsync
+
+# 720x400 @ 85Hz (VESA) hsync: 37.9kHz
+ModeLine "720x400" 35.5 720 756 828 936 400 401 404 446 -hsync +vsync
+
+# 720x400 @ 88Hz (EDID established timing) hsync: 39.44kHz
+ModeLine "720x400" 35.5 720 738 846 900 400 421 423 449 -hsync -vsync
+
+# 640x480 @ 60Hz (Industry standard) hsync: 31.5kHz
+ModeLine "640x480" 25.175 640 656 752 800 480 490 492 525 -hsync -vsync
+
+# 640x480 @ 72Hz (VESA) hsync: 37.9kHz
+ModeLine "640x480" 31.5 640 664 704 832 480 489 492 520 -hsync -vsync
+
+# 640x480 @ 75Hz (VESA) hsync: 37.5kHz
+ModeLine "640x480" 31.5 640 656 720 840 480 481 484 500 -hsync -vsync
+
+# 640x480 @ 85Hz (VESA) hsync: 43.3kHz
+ModeLine "640x480" 36.0 640 696 752 832 480 481 484 509 -hsync -vsync
+
+# 800x600 @ 56Hz (VESA) hsync: 35.2kHz
+ModeLine "800x600" 36.0 800 824 896 1024 600 601 603 625 +hsync +vsync
+
+# 800x600 @ 60Hz (VESA) hsync: 37.9kHz
+ModeLine "800x600" 40.0 800 840 968 1056 600 601 605 628 +hsync +vsync
+
+# 800x600 @ 72Hz (VESA) hsync: 48.1kHz
+ModeLine "800x600" 50.0 800 856 976 1040 600 637 643 666 +hsync +vsync
+
+# 800x600 @ 75Hz (VESA) hsync: 46.9kHz
+ModeLine "800x600" 49.5 800 816 896 1056 600 601 604 625 +hsync +vsync
+
+# 800x600 @ 85Hz (VESA) hsync: 53.7kHz
+ModeLine "800x600" 56.25 800 832 896 1048 600 601 604 631 +hsync +vsync
+
+# 1024x768i @ 43Hz (industry standard) hsync: 35.5kHz
+ModeLine "1024x768" 44.9 1024 1032 1208 1264 768 768 776 817 +hsync +vsync Interlace
+
+# 1024x768 @ 60Hz (VESA) hsync: 48.4kHz
+ModeLine "1024x768" 65.0 1024 1048 1184 1344 768 771 777 806 -hsync -vsync
+
+# 1024x768 @ 70Hz (VESA) hsync: 56.5kHz
+ModeLine "1024x768" 75.0 1024 1048 1184 1328 768 771 777 806 -hsync -vsync
+
+# 1024x768 @ 75Hz (VESA) hsync: 60.0kHz
+ModeLine "1024x768" 78.75 1024 1040 1136 1312 768 769 772 800 +hsync +vsync
+
+# 1024x768 @ 85Hz (VESA) hsync: 68.7kHz
+ModeLine "1024x768" 94.5 1024 1072 1168 1376 768 769 772 808 +hsync +vsync
+
+# 1024x768 @ 89Hz (non-standard) hsync: 72.0kHz
+ModeLine "1024x768" 100 1024 1108 1280 1408 768 768 780 796 +hsync +vsync
+
+# 1152x864 @ 75Hz (VESA) hsync: 67.5kHz
+ModeLine "1152x864" 108.0 1152 1216 1344 1600 864 865 868 900 +hsync +vsync
+
+# 1280x768 @ 75Hz (non-standard) hsync: 60.6kHz
+ModeLine "1280x768" 105.64 1280 1312 1712 1744 768 782 792 807 -hsync +vsync
+
+# 1280x960 @ 60Hz (VESA) hsync: 60.0kHz
+ModeLine "1280x960" 108.0 1280 1376 1488 1800 960 961 964 1000 +hsync +vsync
+
+# 1280x960 @ 85Hz (VESA) hsync: 85.9kHz
+ModeLine "1280x960" 148.5 1280 1344 1504 1728 960 961 964 1011 +hsync +vsync
+
+# 1280x1024 @ 60Hz (VESA) hsync: 64.0kHz
+ModeLine "1280x1024" 108.0 1280 1328 1440 1688 1024 1025 1028 1066 +hsync +vsync
+
+# 1280x1024 @ 70Hz (non-standard) hsync: 74.0kHz
+ModeLine "1280x1024" 126.0 1280 1328 1440 1688 1024 1025 1028 1066 +hsync +vsync
+
+# 1280x1024 @ 75Hz (VESA) hsync: 80.0kHz
+ModeLine "1280x1024" 135.0 1280 1296 1440 1688 1024 1025 1028 1066 +hsync +vsync
+
+# 1280x1024 @ 85Hz (VESA) hsync: 91.1kHz
+ModeLine "1280x1024" 157.5 1280 1344 1504 1728 1024 1025 1028 1072 +hsync +vsync
+
+# 1600x1200 @ 60Hz (VESA) hsync: 75.0kHz
+ModeLine "1600x1200" 162.0 1600 1664 1856 2160 1200 1201 1204 1250 +hsync +vsync
+
+# 1600x1200 @ 65Hz (VESA) hsync: 81.3kHz
+ModeLine "1600x1200" 175.5 1600 1664 1856 2160 1200 1201 1204 1250 +hsync +vsync
+
+# 1600x1200 @ 70Hz (VESA) hsync: 87.5kHz
+ModeLine "1600x1200" 189.0 1600 1664 1856 2160 1200 1201 1204 1250 +hsync +vsync
+
+# 1600x1200 @ 75Hz (VESA) hsync: 93.8kHz
+ModeLine "1600x1200" 202.5 1600 1664 1856 2160 1200 1201 1204 1250 +hsync +vsync
+
+# 1600x1200 @ 85Hz (VESA) hsync: 106.3kHz
+ModeLine "1600x1200" 229.5 1600 1664 1856 2160 1200 1201 1204 1250 +hsync +vsync
+
+# 1680x1050 @ 60.00Hz (GTF) hsync: 65.22 kHz; pclk: 147.14 MHz
+ModeLine "1680x1050" 147.14 1680 1784 1968 2256 1050 1051 1054 1087 +hsync +vsync
+
+# 1792x1344 @ 60Hz (VESA) hsync: 83.6kHz
+ModeLine "1792x1344" 204.8 1792 1920 2120 2448 1344 1345 1348 1394 -hsync +vsync
+
+# 1792x1344 @ 75Hz (VESA) hsync: 106.3kHz
+ModeLine "1792x1344" 261.0 1792 1888 2104 2456 1344 1345 1348 1417 -hsync +vsync
+
+# 1856x1392 @ 60Hz (VESA) hsync: 86.3kHz
+ModeLine "1856x1392" 218.3 1856 1952 2176 2528 1392 1393 1396 1439 -hsync +vsync
+
+# 1856x1392 @ 75Hz (VESA) hsync: 112.5kHz
+ModeLine "1856x1392" 288.0 1856 1984 2208 2560 1392 1393 1396 1500 -hsync +vsync
+
+# 1920x1440 @ 60Hz (VESA) hsync: 90.0kHz
+ModeLine "1920x1440" 234.0 1920 2048 2256 2600 1440 1441 1444 1500 -hsync +vsync
+
+# 1920x1440 @ 75Hz (VESA) hsync: 112.5kHz
+ModeLine "1920x1440" 297.0 1920 2064 2288 2640 1440 1441 1444 1500 -hsync +vsync
+
+
+//
+// Extra modes to include as default modes in the X server.
+//
+// $XFree86: xc/programs/Xserver/hw/xfree86/etc/extramodes,v 1.5 2002/06/05 19:43:05 dawes Exp $
+//
+
+# 832x624 @ 75Hz (74.55Hz) (fix if the official/Apple spec is different) hsync: 49.725kHz
+ModeLine "832x624" 57.284 832 864 928 1152 624 625 628 667 -Hsync -Vsync
+
+# 1152x768 @ 54.8Hz (Titanium PowerBook) hsync: 44.2kHz
+ModeLine "1152x768" 64.995 1152 1178 1314 1472 768 771 777 806 +hsync +vsync
+
+# 1400x1050 @ 60Hz (VESA GTF) hsync: 65.5kHz
+ModeLine "1400x1050" 122.0 1400 1488 1640 1880 1050 1052 1064 1082 +hsync +vsync
+
+# 1400x1050 @ 75Hz (VESA GTF) hsync: 82.2kHz
+ModeLine "1400x1050" 155.8 1400 1464 1784 1912 1050 1052 1064 1090 +hsync +vsync
+
+# 1600x1024 @ 60Hz (SGI 1600SW) hsync: 64.0kHz
+Modeline "1600x1024" 106.910 1600 1620 1640 1670 1024 1027 1030 1067 -hsync -vsync
+
+# 1920x1440 @ 85Hz (VESA GTF) hsync: 128.5kHz
+Modeline "1920x1440" 341.35 1920 2072 2288 2656 1440 1441 1444 1512 -hsync +vsync
+
+# 2048x1536 @ 60Hz (VESA GTF) hsync: 95.3kHz
+Modeline "2048x1536" 266.95 2048 2200 2424 2800 1536 1537 1540 1589 -hsync +vsync
+
+# 2048x1536 @ 75Hz (VESA GTF) hsync: 120.2kHz
+Modeline "2048x1536" 340.48 2048 2216 2440 2832 1536 1537 1540 1603 -hsync +vsync
+
+# 2048x1536 @ 85Hz (VESA GTF) hsync: 137.0kHz
+Modeline "2048x1536" 388.04 2048 2216 2440 2832 1536 1537 1540 1612 -hsync +vsync
+
+//
+// some Sun-specific modes
+//
+
+# 1152x900 @ 66Hz - default on most SBus graphics devices
+ModeLine "1152x900" 94.50 1152 1192 1320 1528 900 902 906 937 -hsync -vsync
+
+# 1152x900 @ 76Hz
+ModeLine "1152x900" 105.56 1152 1168 1280 1472 900 902 906 943 -hsync -vsync
diff --git a/sys/dev/videomode/modelines2c.awk b/sys/dev/videomode/modelines2c.awk
new file mode 100644
index 0000000..9b95c3f
--- /dev/null
+++ b/sys/dev/videomode/modelines2c.awk
@@ -0,0 +1,150 @@
+#! /usr/bin/awk -f
+# $NetBSD: modelines2c.awk,v 1.4 2006/10/26 23:19:50 bjh21 Exp $
+# $FreeBSD$
+#
+# Copyright (c) 2006 Itronix Inc.
+# All rights reserved.
+#
+# Written by Garrett D'Amore for Itronix Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. The name of Itronix Inc. may not be used to endorse
+# or promote products derived from this software without specific
+# prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+BEGIN {
+ nmodes = 0;
+}
+
+NR == 1 {
+ split($0,v,"$");
+
+ VERSION=v[2];
+
+ printf("/*\t$NetBSD" "$\t*/\n\n");
+ printf("/*\n") ;
+ printf(" * THIS FILE AUTOMATICALLY GENERATED. DO NOT EDIT.\n");
+ printf(" *\n");
+ printf(" * generated from:\n");
+ printf(" *\t%s\n", VERSION);
+ printf(" */\n\n");
+
+ printf("#include <sys/cdefs.h>\n");
+ printf("__KERNEL_RCSID(0, \"$NetBSD" "$\");\n\n");
+
+ printf("#include <dev/videomode/videomode.h>\n\n");
+
+ printf("/*\n");
+ printf(" * These macros help the modelines below fit on one line.\n");
+ printf(" */\n");
+ printf("#define HP VID_PHSYNC\n");
+ printf("#define HN VID_NHSYNC\n");
+ printf("#define VP VID_PVSYNC\n");
+ printf("#define VN VID_NVSYNC\n");
+ printf("#define I VID_INTERLACE\n");
+ printf("#define DS VID_DBLSCAN\n");
+ printf("\n");
+
+ printf("#define M(nm,hr,vr,clk,hs,he,ht,vs,ve,vt,f) \\\n");
+ printf("\t{ clk, hr, hs, he, ht, vr, vs, ve, vt, f, nm } \n\n");
+
+ printf("const struct videomode videomode_list[] = {\n");
+
+ next
+}
+
+(/^ModeLine/) {
+ dotclock = $3;
+
+ hdisplay = $4;
+ hsyncstart = $5;
+ hsyncend = $6;
+ htotal = $7;
+
+ vdisplay = $8;
+ vsyncstart = $9;
+ vsyncend = $10;
+ vtotal = $11;
+
+ macro = "MODE";
+ iflag = "";
+ iflags = "";
+ hflags = "HP";
+ vflags = "VP";
+
+ if ($12 ~ "^-")
+ hflags = "HN";
+
+ if ($13 ~ "^-")
+ vflags = "VN";
+
+ ifactor=1.0;
+ if ($14 ~ "[Ii][Nn][Tt][Ee][Rr][Ll][Aa][Cc][Ee]") {
+ iflag = "i";
+ iflags = "|I";
+ ifactor = 2.0;
+ }
+
+ # We truncate the vrefresh figure, but some mode descriptions rely
+ # on rounding, so we can't win here. Adding an additional .1
+ # compensates to some extent.
+
+ hrefresh= (dotclock * 1000000) / htotal;
+ vrefresh= int(((hrefresh * ifactor) / vtotal) + .1);
+
+ modestr = sprintf("%dx%dx%d%s", hdisplay, vdisplay, vrefresh, iflag);
+
+# printf("/* %dx%d%s refresh %d Hz, hsync %d kHz */\n",
+# hdisplay, vdisplay, iflag, vrefresh, hrefresh/1000);
+ printf("M(\"%s\",%d,%d,%d,%d,%d,%d,%d,%d,%d,%s),\n",
+ modestr,
+ hdisplay, vdisplay, dotclock * 1000,
+ hsyncstart, hsyncend, htotal,
+ vsyncstart, vsyncend, vtotal, hflags "|" vflags iflags);
+
+ modestr = sprintf("%dx%dx%d%s",
+ hdisplay/2 , vdisplay/2, vrefresh, iflag);
+
+ dmodes[nmodes]=sprintf("M(\"%s\",%d,%d,%d,%d,%d,%d,%d,%d,%d,%s),",
+ modestr,
+ hdisplay/2, vdisplay/2, dotclock * 1000 / 2,
+ hsyncstart/2, hsyncend/2, htotal/2,
+ vsyncstart/2, vsyncend/2, vtotal/2,
+ hflags "|" vflags "|DS" iflags);
+
+ nmodes = nmodes + 1
+
+}
+
+END {
+
+ printf("\n/* Derived Double Scan Modes */\n\n");
+
+ for ( i = 0; i < nmodes; i++ )
+ {
+ print dmodes[i];
+ }
+
+ printf("};\n\n");
+ printf("const int videomode_count = %d;\n", nmodes);
+}
diff --git a/sys/dev/videomode/pickmode.c b/sys/dev/videomode/pickmode.c
new file mode 100644
index 0000000..66dea82
--- /dev/null
+++ b/sys/dev/videomode/pickmode.c
@@ -0,0 +1,205 @@
+/* $NetBSD: pickmode.c,v 1.3 2011/04/09 18:22:31 jdc Exp $ */
+/* $FreeBSD$ */
+
+/*-
+ * Copyright (c) 2006 The NetBSD Foundation
+ * All rights reserved.
+ *
+ * this code was contributed to The NetBSD Foundation by Michael Lorenz
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE NETBSD FOUNDATION BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/libkern.h>
+#include <dev/videomode/videomode.h>
+#include "opt_videomode.h"
+
+#ifdef PICKMODE_DEBUG
+#define DPRINTF printf
+#else
+#define DPRINTF while (0) printf
+#endif
+
+const struct videomode *
+pick_mode_by_dotclock(int width, int height, int dotclock)
+{
+ const struct videomode *this, *best = NULL;
+ int i;
+
+ DPRINTF("%s: looking for %d x %d at up to %d kHz\n", __func__, width,
+ height, dotclock);
+ for (i = 0; i < videomode_count; i++) {
+ this = &videomode_list[i];
+ if ((this->hdisplay != width) || (this->vdisplay != height) ||
+ (this->dot_clock > dotclock))
+ continue;
+ if (best != NULL) {
+ if (this->dot_clock > best->dot_clock)
+ best = this;
+ } else
+ best = this;
+ }
+ if (best != NULL)
+ DPRINTF("found %s\n", best->name);
+
+ return best;
+}
+
+const struct videomode *
+pick_mode_by_ref(int width, int height, int refresh)
+{
+ const struct videomode *this, *best = NULL;
+ int mref, closest = 1000, i, diff;
+
+ DPRINTF("%s: looking for %d x %d at up to %d Hz\n", __func__, width,
+ height, refresh);
+ for (i = 0; i < videomode_count; i++) {
+
+ this = &videomode_list[i];
+ mref = this->dot_clock * 1000 / (this->htotal * this->vtotal);
+ diff = abs(mref - refresh);
+ if ((this->hdisplay != width) || (this->vdisplay != height))
+ continue;
+ DPRINTF("%s in %d hz, diff %d\n", this->name, mref, diff);
+ if (best != NULL) {
+ if (diff < closest) {
+ best = this;
+ closest = diff;
+ }
+ } else {
+ best = this;
+ closest = diff;
+ }
+ }
+ if (best != NULL)
+ DPRINTF("found %s %d\n", best->name, best->dot_clock);
+
+ return best;
+}
+
+static inline void
+swap_modes(struct videomode *left, struct videomode *right)
+{
+ struct videomode temp;
+
+ temp = *left;
+ *left = *right;
+ *right = temp;
+}
+
+/*
+ * Sort modes by refresh rate, aspect ratio (*), then resolution.
+ * Preferred mode or largest mode is first in the list and other modes
+ * are sorted on closest match to that mode.
+ * (*) Note that the aspect ratio calculation treats "close" aspect ratios
+ * (within 12.5%) as the same for this purpose.
+ */
+#define DIVIDE(x, y) (((x) + ((y) / 2)) / (y))
+void
+sort_modes(struct videomode *modes, struct videomode **preferred, int nmodes)
+{
+ int aspect, refresh, hbest, vbest, abest, atemp, rbest, rtemp;
+ int i, j;
+ struct videomode *mtemp = NULL;
+
+ if (nmodes < 2)
+ return;
+
+ if (*preferred != NULL) {
+ /* Put the preferred mode first in the list */
+ aspect = (*preferred)->hdisplay * 100 / (*preferred)->vdisplay;
+ refresh = DIVIDE(DIVIDE((*preferred)->dot_clock * 1000,
+ (*preferred)->htotal), (*preferred)->vtotal);
+ if (*preferred != modes) {
+ swap_modes(*preferred, modes);
+ *preferred = modes;
+ }
+ } else {
+ /*
+ * Find the largest horizontal and vertical mode and put that
+ * first in the list. Preferred refresh rate is taken from
+ * the first mode of this size.
+ */
+ hbest = 0;
+ vbest = 0;
+ for (i = 0; i < nmodes; i++) {
+ if (modes[i].hdisplay > hbest) {
+ hbest = modes[i].hdisplay;
+ vbest = modes[i].vdisplay;
+ mtemp = &modes[i];
+ } else if (modes[i].hdisplay == hbest &&
+ modes[i].vdisplay > vbest) {
+ vbest = modes[i].vdisplay;
+ mtemp = &modes[i];
+ }
+ }
+ aspect = mtemp->hdisplay * 100 / mtemp->vdisplay;
+ refresh = DIVIDE(DIVIDE(mtemp->dot_clock * 1000,
+ mtemp->htotal), mtemp->vtotal);
+ if (mtemp != modes)
+ swap_modes(mtemp, modes);
+ }
+
+ /* Sort other modes by refresh rate, aspect ratio, then resolution */
+ for (j = 1; j < nmodes - 1; j++) {
+ rbest = 1000;
+ abest = 1000;
+ hbest = 0;
+ vbest = 0;
+ for (i = j; i < nmodes; i++) {
+ rtemp = abs(refresh -
+ DIVIDE(DIVIDE(modes[i].dot_clock * 1000,
+ modes[i].htotal), modes[i].vtotal));
+ atemp = (modes[i].hdisplay * 100 / modes[i].vdisplay);
+ if (rtemp < rbest) {
+ rbest = rtemp;
+ mtemp = &modes[i];
+ }
+ if (rtemp == rbest) {
+ /* Treat "close" aspect ratios as identical */
+ if (abs(abest - atemp) > (abest / 8) &&
+ abs(aspect - atemp) < abs(aspect - abest)) {
+ abest = atemp;
+ mtemp = &modes[i];
+ }
+ if (atemp == abest ||
+ abs(abest - atemp) <= (abest / 8)) {
+ if (modes[i].hdisplay > hbest) {
+ hbest = modes[i].hdisplay;
+ mtemp = &modes[i];
+ }
+ if (modes[i].hdisplay == hbest &&
+ modes[i].vdisplay > vbest) {
+ vbest = modes[i].vdisplay;
+ mtemp = &modes[i];
+ }
+ }
+ }
+ }
+ if (mtemp != &modes[j])
+ swap_modes(mtemp, &modes[j]);
+ }
+}
diff --git a/sys/dev/videomode/test.c b/sys/dev/videomode/test.c
new file mode 100644
index 0000000..d51ffdd
--- /dev/null
+++ b/sys/dev/videomode/test.c
@@ -0,0 +1,26 @@
+/* $FreeBSD$ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "videomode.h"
+
+int
+main(int argc, char **argv)
+{
+ int i, j;
+
+ for (i = 1; i < argc ; i++) {
+ for (j = 0; j < videomode_count; j++) {
+ if (strcmp(videomode_list[j].name, argv[i]) == 0) {
+ printf("dotclock for mode %s = %d, flags %x\n",
+ argv[i],
+ videomode_list[j].dot_clock,
+ videomode_list[j].flags);
+ break;
+ }
+ }
+ if (j == videomode_count) {
+ printf("dotclock for mode %s not found\n", argv[i]);
+ }
+ }
+}
diff --git a/sys/dev/videomode/vesagtf.c b/sys/dev/videomode/vesagtf.c
new file mode 100644
index 0000000..7164ae4
--- /dev/null
+++ b/sys/dev/videomode/vesagtf.c
@@ -0,0 +1,703 @@
+/* $NetBSD: vesagtf.c,v 1.2 2013/09/15 15:56:07 martin Exp $ */
+/* $FreeBSD$ */
+
+/*-
+ * Copyright (c) 2006 Itronix Inc.
+ * All rights reserved.
+ *
+ * Written by Garrett D'Amore for Itronix Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of Itronix Inc. may not be used to endorse
+ * or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This was derived from a userland GTF program supplied by NVIDIA.
+ * NVIDIA's original boilerplate follows.
+ *
+ * Note that I have heavily modified the program for use in the EDID
+ * kernel code for NetBSD, including removing the use of floating
+ * point operations and making significant adjustments to minimize
+ * error propagation while operating with integer only math.
+ *
+ * This has required the use of 64-bit integers in a few places, but
+ * the upshot is that for a calculation of 1920x1200x85 (as an
+ * example), the error deviates by only ~.004% relative to the
+ * floating point version. This error is *well* within VESA
+ * tolerances.
+ */
+
+/*
+ * Copyright (c) 2001, Andy Ritger aritger@nvidia.com
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * o Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * o Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * o Neither the name of NVIDIA nor the names of its contributors
+ * may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
+ * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ *
+ * This program is based on the Generalized Timing Formula(GTF TM)
+ * Standard Version: 1.0, Revision: 1.0
+ *
+ * The GTF Document contains the following Copyright information:
+ *
+ * Copyright (c) 1994, 1995, 1996 - Video Electronics Standards
+ * Association. Duplication of this document within VESA member
+ * companies for review purposes is permitted. All other rights
+ * reserved.
+ *
+ * While every precaution has been taken in the preparation
+ * of this standard, the Video Electronics Standards Association and
+ * its contributors assume no responsibility for errors or omissions,
+ * and make no warranties, expressed or implied, of functionality
+ * of suitability for any purpose. The sample code contained within
+ * this standard may be used without restriction.
+ *
+ *
+ *
+ * The GTF EXCEL(TM) SPREADSHEET, a sample (and the definitive)
+ * implementation of the GTF Timing Standard, is available at:
+ *
+ * ftp://ftp.vesa.org/pub/GTF/GTF_V1R1.xls
+ *
+ *
+ *
+ * This program takes a desired resolution and vertical refresh rate,
+ * and computes mode timings according to the GTF Timing Standard.
+ * These mode timings can then be formatted as an XFree86 modeline
+ * or a mode description for use by fbset(8).
+ *
+ *
+ *
+ * NOTES:
+ *
+ * The GTF allows for computation of "margins" (the visible border
+ * surrounding the addressable video); on most non-overscan type
+ * systems, the margin period is zero. I've implemented the margin
+ * computations but not enabled it because 1) I don't really have
+ * any experience with this, and 2) neither XFree86 modelines nor
+ * fbset fb.modes provide an obvious way for margin timings to be
+ * included in their mode descriptions (needs more investigation).
+ *
+ * The GTF provides for computation of interlaced mode timings;
+ * I've implemented the computations but not enabled them, yet.
+ * I should probably enable and test this at some point.
+ *
+ *
+ *
+ * TODO:
+ *
+ * o Add support for interlaced modes.
+ *
+ * o Implement the other portions of the GTF: compute mode timings
+ * given either the desired pixel clock or the desired horizontal
+ * frequency.
+ *
+ * o It would be nice if this were more general purpose to do things
+ * outside the scope of the GTF: like generate double scan mode
+ * timings, for example.
+ *
+ * o Printing digits to the right of the decimal point when the
+ * digits are 0 annoys me.
+ *
+ * o Error checking.
+ *
+ */
+
+
+#ifdef _KERNEL
+#include <sys/cdefs.h>
+
+__FBSDID("$FreeBSD$");
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <dev/videomode/videomode.h>
+#include <dev/videomode/vesagtf.h>
+#else
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include "videomode.h"
+#include "vesagtf.h"
+
+void print_xf86_mode(struct videomode *m);
+#endif
+
+#define CELL_GRAN 8 /* assumed character cell granularity */
+
+/* C' and M' are part of the Blanking Duty Cycle computation */
+/*
+ * #define C_PRIME (((C - J) * K/256.0) + J)
+ * #define M_PRIME (K/256.0 * M)
+ */
+
+/*
+ * C' and M' multiplied by 256 to give integer math. Make sure to
+ * scale results using these back down, appropriately.
+ */
+#define C_PRIME256(p) (((p->C - p->J) * p->K) + (p->J * 256))
+#define M_PRIME256(p) (p->K * p->M)
+
+#define DIVIDE(x,y) (((x) + ((y) / 2)) / (y))
+
+/*
+ * print_value() - print the result of the named computation; this is
+ * useful when comparing against the GTF EXCEL spreadsheet.
+ */
+
+#ifdef GTFDEBUG
+
+static void
+print_value(int n, const char *name, unsigned val)
+{
+ printf("%2d: %-27s: %u\n", n, name, val);
+}
+#else
+#define print_value(n, name, val)
+#endif
+
+
+/*
+ * vert_refresh() - as defined by the GTF Timing Standard, compute the
+ * Stage 1 Parameters using the vertical refresh frequency. In other
+ * words: input a desired resolution and desired refresh rate, and
+ * output the GTF mode timings.
+ *
+ * XXX All the code is in place to compute interlaced modes, but I don't
+ * feel like testing it right now.
+ *
+ * XXX margin computations are implemented but not tested (nor used by
+ * XFree86 of fbset mode descriptions, from what I can tell).
+ */
+
+void
+vesagtf_mode_params(unsigned h_pixels, unsigned v_lines, unsigned freq,
+ struct vesagtf_params *params, int flags, struct videomode *vmp)
+{
+ unsigned v_field_rqd;
+ unsigned top_margin;
+ unsigned bottom_margin;
+ unsigned interlace;
+ uint64_t h_period_est;
+ unsigned vsync_plus_bp;
+ unsigned v_back_porch __unused;
+ unsigned total_v_lines;
+ uint64_t v_field_est;
+ uint64_t h_period;
+ unsigned v_field_rate;
+ unsigned v_frame_rate __unused;
+ unsigned left_margin;
+ unsigned right_margin;
+ unsigned total_active_pixels;
+ uint64_t ideal_duty_cycle;
+ unsigned h_blank;
+ unsigned total_pixels;
+ unsigned pixel_freq;
+
+ unsigned h_sync;
+ unsigned h_front_porch;
+ unsigned v_odd_front_porch_lines;
+
+#ifdef GTFDEBUG
+ unsigned h_freq;
+#endif
+
+ /* 1. In order to give correct results, the number of horizontal
+ * pixels requested is first processed to ensure that it is divisible
+ * by the character size, by rounding it to the nearest character
+ * cell boundary:
+ *
+ * [H PIXELS RND] = ((ROUND([H PIXELS]/[CELL GRAN RND],0))*[CELLGRAN RND])
+ */
+
+ h_pixels = DIVIDE(h_pixels, CELL_GRAN) * CELL_GRAN;
+
+ print_value(1, "[H PIXELS RND]", h_pixels);
+
+
+ /* 2. If interlace is requested, the number of vertical lines assumed
+ * by the calculation must be halved, as the computation calculates
+ * the number of vertical lines per field. In either case, the
+ * number of lines is rounded to the nearest integer.
+ *
+ * [V LINES RND] = IF([INT RQD?]="y", ROUND([V LINES]/2,0),
+ * ROUND([V LINES],0))
+ */
+
+ v_lines = (flags & VESAGTF_FLAG_ILACE) ? DIVIDE(v_lines, 2) : v_lines;
+
+ print_value(2, "[V LINES RND]", v_lines);
+
+
+ /* 3. Find the frame rate required:
+ *
+ * [V FIELD RATE RQD] = IF([INT RQD?]="y", [I/P FREQ RQD]*2,
+ * [I/P FREQ RQD])
+ */
+
+ v_field_rqd = (flags & VESAGTF_FLAG_ILACE) ? (freq * 2) : (freq);
+
+ print_value(3, "[V FIELD RATE RQD]", v_field_rqd);
+
+
+ /* 4. Find number of lines in Top margin:
+ * 5. Find number of lines in Bottom margin:
+ *
+ * [TOP MARGIN (LINES)] = IF([MARGINS RQD?]="Y",
+ * ROUND(([MARGIN%]/100*[V LINES RND]),0),
+ * 0)
+ *
+ * Ditto for bottom margin. Note that instead of %, we use PPT, which
+ * is parts per thousand. This helps us with integer math.
+ */
+
+ top_margin = bottom_margin = (flags & VESAGTF_FLAG_MARGINS) ?
+ DIVIDE(v_lines * params->margin_ppt, 1000) : 0;
+
+ print_value(4, "[TOP MARGIN (LINES)]", top_margin);
+ print_value(5, "[BOT MARGIN (LINES)]", bottom_margin);
+
+
+ /* 6. If interlace is required, then set variable [INTERLACE]=0.5:
+ *
+ * [INTERLACE]=(IF([INT RQD?]="y",0.5,0))
+ *
+ * To make this integer friendly, we use some special hacks in step
+ * 7 below. Please read those comments to understand why I am using
+ * a whole number of 1.0 instead of 0.5 here.
+ */
+ interlace = (flags & VESAGTF_FLAG_ILACE) ? 1 : 0;
+
+ print_value(6, "[2*INTERLACE]", interlace);
+
+
+ /* 7. Estimate the Horizontal period
+ *
+ * [H PERIOD EST] = ((1/[V FIELD RATE RQD]) - [MIN VSYNC+BP]/1000000) /
+ * ([V LINES RND] + (2*[TOP MARGIN (LINES)]) +
+ * [MIN PORCH RND]+[INTERLACE]) * 1000000
+ *
+ * To make it integer friendly, we pre-multiply the 1000000 to get to
+ * usec. This gives us:
+ *
+ * [H PERIOD EST] = ((1000000/[V FIELD RATE RQD]) - [MIN VSYNC+BP]) /
+ * ([V LINES RND] + (2 * [TOP MARGIN (LINES)]) +
+ * [MIN PORCH RND]+[INTERLACE])
+ *
+ * The other problem is that the interlace value is wrong. To get
+ * the interlace to a whole number, we multiply both the numerator and
+ * divisor by 2, so we can use a value of either 1 or 0 for the interlace
+ * factor.
+ *
+ * This gives us:
+ *
+ * [H PERIOD EST] = ((2*((1000000/[V FIELD RATE RQD]) - [MIN VSYNC+BP])) /
+ * (2*([V LINES RND] + (2*[TOP MARGIN (LINES)]) +
+ * [MIN PORCH RND]) + [2*INTERLACE]))
+ *
+ * Finally we multiply by another 1000, to get value in picosec.
+ * Why picosec? To minimize rounding errors. Gotta love integer
+ * math and error propagation.
+ */
+
+ h_period_est = DIVIDE(((DIVIDE(2000000000000ULL, v_field_rqd)) -
+ (2000000 * params->min_vsbp)),
+ ((2 * (v_lines + (2 * top_margin) + params->min_porch)) + interlace));
+
+ print_value(7, "[H PERIOD EST (ps)]", h_period_est);
+
+
+ /* 8. Find the number of lines in V sync + back porch:
+ *
+ * [V SYNC+BP] = ROUND(([MIN VSYNC+BP]/[H PERIOD EST]),0)
+ *
+ * But recall that h_period_est is in psec. So multiply by 1000000.
+ */
+
+ vsync_plus_bp = DIVIDE(params->min_vsbp * 1000000, h_period_est);
+
+ print_value(8, "[V SYNC+BP]", vsync_plus_bp);
+
+
+ /* 9. Find the number of lines in V back porch alone:
+ *
+ * [V BACK PORCH] = [V SYNC+BP] - [V SYNC RND]
+ *
+ * XXX is "[V SYNC RND]" a typo? should be [V SYNC RQD]?
+ */
+
+ v_back_porch = vsync_plus_bp - params->vsync_rqd;
+
+ print_value(9, "[V BACK PORCH]", v_back_porch);
+
+
+ /* 10. Find the total number of lines in Vertical field period:
+ *
+ * [TOTAL V LINES] = [V LINES RND] + [TOP MARGIN (LINES)] +
+ * [BOT MARGIN (LINES)] + [V SYNC+BP] + [INTERLACE] +
+ * [MIN PORCH RND]
+ */
+
+ total_v_lines = v_lines + top_margin + bottom_margin + vsync_plus_bp +
+ interlace + params->min_porch;
+
+ print_value(10, "[TOTAL V LINES]", total_v_lines);
+
+
+ /* 11. Estimate the Vertical field frequency:
+ *
+ * [V FIELD RATE EST] = 1 / [H PERIOD EST] / [TOTAL V LINES] * 1000000
+ *
+ * Again, we want to pre multiply by 10^9 to convert for nsec, thereby
+ * making it usable in integer math.
+ *
+ * So we get:
+ *
+ * [V FIELD RATE EST] = 1000000000 / [H PERIOD EST] / [TOTAL V LINES]
+ *
+ * This is all scaled to get the result in uHz. Again, we're trying to
+ * minimize error propagation.
+ */
+ v_field_est = DIVIDE(DIVIDE(1000000000000000ULL, h_period_est),
+ total_v_lines);
+
+ print_value(11, "[V FIELD RATE EST(uHz)]", v_field_est);
+
+
+ /* 12. Find the actual horizontal period:
+ *
+ * [H PERIOD] = [H PERIOD EST] / ([V FIELD RATE RQD] / [V FIELD RATE EST])
+ */
+
+ h_period = DIVIDE(h_period_est * v_field_est, v_field_rqd * 1000);
+
+ print_value(12, "[H PERIOD(ps)]", h_period);
+
+
+ /* 13. Find the actual Vertical field frequency:
+ *
+ * [V FIELD RATE] = 1 / [H PERIOD] / [TOTAL V LINES] * 1000000
+ *
+ * And again, we convert to nsec ahead of time, giving us:
+ *
+ * [V FIELD RATE] = 1000000 / [H PERIOD] / [TOTAL V LINES]
+ *
+ * And another rescaling back to mHz. Gotta love it.
+ */
+
+ v_field_rate = DIVIDE(1000000000000ULL, h_period * total_v_lines);
+
+ print_value(13, "[V FIELD RATE]", v_field_rate);
+
+
+ /* 14. Find the Vertical frame frequency:
+ *
+ * [V FRAME RATE] = (IF([INT RQD?]="y", [V FIELD RATE]/2, [V FIELD RATE]))
+ *
+ * N.B. that the result here is in mHz.
+ */
+
+ v_frame_rate = (flags & VESAGTF_FLAG_ILACE) ?
+ v_field_rate / 2 : v_field_rate;
+
+ print_value(14, "[V FRAME RATE]", v_frame_rate);
+
+
+ /* 15. Find number of pixels in left margin:
+ * 16. Find number of pixels in right margin:
+ *
+ * [LEFT MARGIN (PIXELS)] = (IF( [MARGINS RQD?]="Y",
+ * (ROUND( ([H PIXELS RND] * [MARGIN%] / 100 /
+ * [CELL GRAN RND]),0)) * [CELL GRAN RND],
+ * 0))
+ *
+ * Again, we deal with margin percentages as PPT (parts per thousand).
+ * And the calculations for left and right are the same.
+ */
+
+ left_margin = right_margin = (flags & VESAGTF_FLAG_MARGINS) ?
+ DIVIDE(DIVIDE(h_pixels * params->margin_ppt, 1000),
+ CELL_GRAN) * CELL_GRAN : 0;
+
+ print_value(15, "[LEFT MARGIN (PIXELS)]", left_margin);
+ print_value(16, "[RIGHT MARGIN (PIXELS)]", right_margin);
+
+
+ /* 17. Find total number of active pixels in image and left and right
+ * margins:
+ *
+ * [TOTAL ACTIVE PIXELS] = [H PIXELS RND] + [LEFT MARGIN (PIXELS)] +
+ * [RIGHT MARGIN (PIXELS)]
+ */
+
+ total_active_pixels = h_pixels + left_margin + right_margin;
+
+ print_value(17, "[TOTAL ACTIVE PIXELS]", total_active_pixels);
+
+
+ /* 18. Find the ideal blanking duty cycle from the blanking duty cycle
+ * equation:
+ *
+ * [IDEAL DUTY CYCLE] = [C'] - ([M']*[H PERIOD]/1000)
+ *
+ * However, we have modified values for [C'] as [256*C'] and
+ * [M'] as [256*M']. Again the idea here is to get good scaling.
+ * We use 256 as the factor to make the math fast.
+ *
+ * Note that this means that we have to scale it appropriately in
+ * later calculations.
+ *
+ * The ending result is that our ideal_duty_cycle is 256000x larger
+ * than the duty cycle used by VESA. But again, this reduces error
+ * propagation.
+ */
+
+ ideal_duty_cycle =
+ ((C_PRIME256(params) * 1000) -
+ (M_PRIME256(params) * h_period / 1000000));
+
+ print_value(18, "[IDEAL DUTY CYCLE]", ideal_duty_cycle);
+
+
+ /* 19. Find the number of pixels in the blanking time to the nearest
+ * double character cell:
+ *
+ * [H BLANK (PIXELS)] = (ROUND(([TOTAL ACTIVE PIXELS] *
+ * [IDEAL DUTY CYCLE] /
+ * (100-[IDEAL DUTY CYCLE]) /
+ * (2*[CELL GRAN RND])), 0))
+ * * (2*[CELL GRAN RND])
+ *
+ * Of course, we adjust to make this rounding work in integer math.
+ */
+
+ h_blank = DIVIDE(DIVIDE(total_active_pixels * ideal_duty_cycle,
+ (256000 * 100ULL) - ideal_duty_cycle),
+ 2 * CELL_GRAN) * (2 * CELL_GRAN);
+
+ print_value(19, "[H BLANK (PIXELS)]", h_blank);
+
+
+ /* 20. Find total number of pixels:
+ *
+ * [TOTAL PIXELS] = [TOTAL ACTIVE PIXELS] + [H BLANK (PIXELS)]
+ */
+
+ total_pixels = total_active_pixels + h_blank;
+
+ print_value(20, "[TOTAL PIXELS]", total_pixels);
+
+
+ /* 21. Find pixel clock frequency:
+ *
+ * [PIXEL FREQ] = [TOTAL PIXELS] / [H PERIOD]
+ *
+ * We calculate this in Hz rather than MHz, to get a value that
+ * is usable with integer math. Recall that the [H PERIOD] is in
+ * nsec.
+ */
+
+ pixel_freq = DIVIDE(total_pixels * 1000000, DIVIDE(h_period, 1000));
+
+ print_value(21, "[PIXEL FREQ]", pixel_freq);
+
+
+ /* 22. Find horizontal frequency:
+ *
+ * [H FREQ] = 1000 / [H PERIOD]
+ *
+ * I've ifdef'd this out, because we don't need it for any of
+ * our calculations.
+ * We calculate this in Hz rather than kHz, to avoid rounding
+ * errors. Recall that the [H PERIOD] is in usec.
+ */
+
+#ifdef GTFDEBUG
+ h_freq = 1000000000 / h_period;
+
+ print_value(22, "[H FREQ]", h_freq);
+#endif
+
+
+
+ /* Stage 1 computations are now complete; I should really pass
+ the results to another function and do the Stage 2
+ computations, but I only need a few more values so I'll just
+ append the computations here for now */
+
+
+
+ /* 17. Find the number of pixels in the horizontal sync period:
+ *
+ * [H SYNC (PIXELS)] =(ROUND(([H SYNC%] / 100 * [TOTAL PIXELS] /
+ * [CELL GRAN RND]),0))*[CELL GRAN RND]
+ *
+ * Rewriting for integer math:
+ *
+ * [H SYNC (PIXELS)]=(ROUND((H SYNC%] * [TOTAL PIXELS] / 100 /
+ * [CELL GRAN RND),0))*[CELL GRAN RND]
+ */
+
+ h_sync = DIVIDE(((params->hsync_pct * total_pixels) / 100), CELL_GRAN) *
+ CELL_GRAN;
+
+ print_value(17, "[H SYNC (PIXELS)]", h_sync);
+
+
+ /* 18. Find the number of pixels in the horizontal front porch period:
+ *
+ * [H FRONT PORCH (PIXELS)] = ([H BLANK (PIXELS)]/2)-[H SYNC (PIXELS)]
+ *
+ * Note that h_blank is always an even number of characters (i.e.
+ * h_blank % (CELL_GRAN * 2) == 0)
+ */
+
+ h_front_porch = (h_blank / 2) - h_sync;
+
+ print_value(18, "[H FRONT PORCH (PIXELS)]", h_front_porch);
+
+
+ /* 36. Find the number of lines in the odd front porch period:
+ *
+ * [V ODD FRONT PORCH(LINES)]=([MIN PORCH RND]+[INTERLACE])
+ *
+ * Adjusting for the fact that the interlace is scaled:
+ *
+ * [V ODD FRONT PORCH(LINES)]=(([MIN PORCH RND] * 2) + [2*INTERLACE]) / 2
+ */
+
+ v_odd_front_porch_lines = ((2 * params->min_porch) + interlace) / 2;
+
+ print_value(36, "[V ODD FRONT PORCH(LINES)]", v_odd_front_porch_lines);
+
+
+ /* finally, pack the results in the mode struct */
+
+ vmp->hsync_start = h_pixels + h_front_porch;
+ vmp->hsync_end = vmp->hsync_start + h_sync;
+ vmp->htotal = total_pixels;
+ vmp->hdisplay = h_pixels;
+
+ vmp->vsync_start = v_lines + v_odd_front_porch_lines;
+ vmp->vsync_end = vmp->vsync_start + params->vsync_rqd;
+ vmp->vtotal = total_v_lines;
+ vmp->vdisplay = v_lines;
+
+ vmp->dot_clock = pixel_freq;
+
+}
+
+void
+vesagtf_mode(unsigned x, unsigned y, unsigned refresh, struct videomode *vmp)
+{
+ struct vesagtf_params params;
+
+ params.margin_ppt = VESAGTF_MARGIN_PPT;
+ params.min_porch = VESAGTF_MIN_PORCH;
+ params.vsync_rqd = VESAGTF_VSYNC_RQD;
+ params.hsync_pct = VESAGTF_HSYNC_PCT;
+ params.min_vsbp = VESAGTF_MIN_VSBP;
+ params.M = VESAGTF_M;
+ params.C = VESAGTF_C;
+ params.K = VESAGTF_K;
+ params.J = VESAGTF_J;
+
+ vesagtf_mode_params(x, y, refresh, &params, 0, vmp);
+}
+
+/*
+ * The tidbit here is so that you can compile this file as a
+ * standalone user program to generate X11 modelines using VESA GTF.
+ * This also allows for testing of the code itself, without
+ * necessitating a full kernel recompile.
+ */
+
+/* print_xf86_mode() - print the XFree86 modeline, given mode timings. */
+
+#ifndef _KERNEL
+void
+print_xf86_mode (struct videomode *vmp)
+{
+ float vf, hf;
+
+ hf = 1000.0 * vmp->dot_clock / vmp->htotal;
+ vf = 1.0 * hf / vmp->vtotal;
+
+ printf("\n");
+ printf(" # %dx%d @ %.2f Hz (GTF) hsync: %.2f kHz; pclk: %.2f MHz\n",
+ vmp->hdisplay, vmp->vdisplay, vf, hf, vmp->dot_clock / 1000.0);
+
+ printf(" Modeline \"%dx%d_%.2f\" %.2f"
+ " %d %d %d %d"
+ " %d %d %d %d"
+ " -HSync +Vsync\n\n",
+ vmp->hdisplay, vmp->vdisplay, vf, (vmp->dot_clock / 1000.0),
+ vmp->hdisplay, vmp->hsync_start, vmp->hsync_end, vmp->htotal,
+ vmp->vdisplay, vmp->vsync_start, vmp->vsync_end, vmp->vtotal);
+}
+
+int
+main (int argc, char *argv[])
+{
+ struct videomode m;
+
+ if (argc != 4) {
+ printf("usage: %s x y refresh\n", argv[0]);
+ exit(1);
+ }
+
+ vesagtf_mode(atoi(argv[1]), atoi(argv[2]), atoi(argv[3]), &m);
+
+ print_xf86_mode(&m);
+
+ return 0;
+
+}
+#endif
diff --git a/sys/dev/videomode/vesagtf.h b/sys/dev/videomode/vesagtf.h
new file mode 100644
index 0000000..ea9dae4
--- /dev/null
+++ b/sys/dev/videomode/vesagtf.h
@@ -0,0 +1,86 @@
+/* $NetBSD$ */
+/* $FreeBSD$ */
+
+/*-
+ * Copyright (c) 2006 Itronix Inc.
+ * All rights reserved.
+ *
+ * Written by Garrett D'Amore for Itronix Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of Itronix Inc. may not be used to endorse
+ * or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _DEV_VIDEOMODE_VESAGTF_H
+#define _DEV_VIDEOMODE_VESAGTF_H
+
+/*
+ * Use VESA GTF formula to generate a monitor mode, given resolution and
+ * refresh rates.
+ */
+
+struct vesagtf_params {
+ unsigned margin_ppt; /* vertical margin size, percent * 10
+ * think parts-per-thousand */
+ unsigned min_porch; /* minimum front porch */
+ unsigned vsync_rqd; /* width of vsync in lines */
+ unsigned hsync_pct; /* hsync as % of total width */
+ unsigned min_vsbp; /* minimum vsync + back porch (usec) */
+ unsigned M; /* blanking formula gradient */
+ unsigned C; /* blanking formula offset */
+ unsigned K; /* blanking formula scaling factor */
+ unsigned J; /* blanking formula scaling factor */
+};
+
+/*
+ * Default values to use for params.
+ */
+#define VESAGTF_MARGIN_PPT 18 /* 1.8% */
+#define VESAGTF_MIN_PORCH 1 /* minimum front porch */
+#define VESAGTF_VSYNC_RQD 3 /* vsync width in lines */
+#define VESAGTF_HSYNC_PCT 8 /* width of hsync % of total line */
+#define VESAGTF_MIN_VSBP 550 /* min vsync + back porch (usec) */
+#define VESAGTF_M 600 /* blanking formula gradient */
+#define VESAGTF_C 40 /* blanking formula offset */
+#define VESAGTF_K 128 /* blanking formula scaling factor */
+#define VESAGTF_J 20 /* blanking formula scaling factor */
+
+/*
+ * Use VESA GTF formula to generate monitor timings. Assumes default
+ * GTF parameters, non-interlaced, and no margins.
+ */
+void vesagtf_mode(unsigned x, unsigned y, unsigned refresh,
+ struct videomode *);
+
+/*
+ * A more complete version, in case we ever want to use alternate GTF
+ * parameters. EDID 1.3 allows for "secondary GTF parameters".
+ */
+void vesagtf_mode_params(unsigned x, unsigned y, unsigned refresh,
+ struct vesagtf_params *, int flags, struct videomode *);
+
+#define VESAGTF_FLAG_ILACE 0x0001 /* use interlace */
+#define VESAGTF_FLAG_MARGINS 0x0002 /* use margins */
+
+#endif /* _DEV_VIDEOMODE_VESAGTF_H */
diff --git a/sys/dev/videomode/videomode.c b/sys/dev/videomode/videomode.c
new file mode 100644
index 0000000..a1c7f0a
--- /dev/null
+++ b/sys/dev/videomode/videomode.c
@@ -0,0 +1,130 @@
+/* $FreeBSD$ */
+
+/*
+ * THIS FILE AUTOMATICALLY GENERATED. DO NOT EDIT.
+ *
+ * generated from:
+ * NetBSD: modelines,v 1.9 2011/03/30 18:45:04 jdc Exp
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/module.h>
+#include <dev/videomode/videomode.h>
+
+MODULE_VERSION(videomode, 1);
+
+/*
+ * These macros help the modelines below fit on one line.
+ */
+#define HP VID_PHSYNC
+#define HN VID_NHSYNC
+#define VP VID_PVSYNC
+#define VN VID_NVSYNC
+#define I VID_INTERLACE
+#define DS VID_DBLSCAN
+
+#define M(nm,hr,vr,clk,hs,he,ht,vs,ve,vt,f) \
+ { clk, hr, hs, he, ht, vr, vs, ve, vt, f, nm }
+
+const struct videomode videomode_list[] = {
+M("640x350x85",640,350,31500,672,736,832,382,385,445,HP|VN),
+M("640x400x85",640,400,31500,672,736,832,401,404,445,HN|VP),
+M("720x400x70",720,400,28320,738,846,900,412,414,449,HN|VP),
+M("720x400x85",720,400,35500,756,828,936,401,404,446,HN|VP),
+M("720x400x87",720,400,35500,738,846,900,421,423,449,HN|VN),
+M("640x480x60",640,480,25175,656,752,800,490,492,525,HN|VN),
+M("640x480x72",640,480,31500,664,704,832,489,492,520,HN|VN),
+M("640x480x75",640,480,31500,656,720,840,481,484,500,HN|VN),
+M("640x480x85",640,480,36000,696,752,832,481,484,509,HN|VN),
+M("800x600x56",800,600,36000,824,896,1024,601,603,625,HP|VP),
+M("800x600x60",800,600,40000,840,968,1056,601,605,628,HP|VP),
+M("800x600x72",800,600,50000,856,976,1040,637,643,666,HP|VP),
+M("800x600x75",800,600,49500,816,896,1056,601,604,625,HP|VP),
+M("800x600x85",800,600,56250,832,896,1048,601,604,631,HP|VP),
+M("1024x768x87i",1024,768,44900,1032,1208,1264,768,776,817,HP|VP|I),
+M("1024x768x60",1024,768,65000,1048,1184,1344,771,777,806,HN|VN),
+M("1024x768x70",1024,768,75000,1048,1184,1328,771,777,806,HN|VN),
+M("1024x768x75",1024,768,78750,1040,1136,1312,769,772,800,HP|VP),
+M("1024x768x85",1024,768,94500,1072,1168,1376,769,772,808,HP|VP),
+M("1024x768x89",1024,768,100000,1108,1280,1408,768,780,796,HP|VP),
+M("1152x864x75",1152,864,108000,1216,1344,1600,865,868,900,HP|VP),
+M("1280x768x75",1280,768,105640,1312,1712,1744,782,792,807,HN|VP),
+M("1280x960x60",1280,960,108000,1376,1488,1800,961,964,1000,HP|VP),
+M("1280x960x85",1280,960,148500,1344,1504,1728,961,964,1011,HP|VP),
+M("1280x1024x60",1280,1024,108000,1328,1440,1688,1025,1028,1066,HP|VP),
+M("1280x1024x70",1280,1024,126000,1328,1440,1688,1025,1028,1066,HP|VP),
+M("1280x1024x75",1280,1024,135000,1296,1440,1688,1025,1028,1066,HP|VP),
+M("1280x1024x85",1280,1024,157500,1344,1504,1728,1025,1028,1072,HP|VP),
+M("1600x1200x60",1600,1200,162000,1664,1856,2160,1201,1204,1250,HP|VP),
+M("1600x1200x65",1600,1200,175500,1664,1856,2160,1201,1204,1250,HP|VP),
+M("1600x1200x70",1600,1200,189000,1664,1856,2160,1201,1204,1250,HP|VP),
+M("1600x1200x75",1600,1200,202500,1664,1856,2160,1201,1204,1250,HP|VP),
+M("1600x1200x85",1600,1200,229500,1664,1856,2160,1201,1204,1250,HP|VP),
+M("1680x1050x60",1680,1050,147140,1784,1968,2256,1051,1054,1087,HP|VP),
+M("1792x1344x60",1792,1344,204800,1920,2120,2448,1345,1348,1394,HN|VP),
+M("1792x1344x75",1792,1344,261000,1888,2104,2456,1345,1348,1417,HN|VP),
+M("1856x1392x60",1856,1392,218300,1952,2176,2528,1393,1396,1439,HN|VP),
+M("1856x1392x75",1856,1392,288000,1984,2208,2560,1393,1396,1500,HN|VP),
+M("1920x1440x60",1920,1440,234000,2048,2256,2600,1441,1444,1500,HN|VP),
+M("1920x1440x75",1920,1440,297000,2064,2288,2640,1441,1444,1500,HN|VP),
+M("832x624x74",832,624,57284,864,928,1152,625,628,667,HN|VN),
+M("1152x768x54",1152,768,64995,1178,1314,1472,771,777,806,HP|VP),
+M("1400x1050x60",1400,1050,122000,1488,1640,1880,1052,1064,1082,HP|VP),
+M("1400x1050x74",1400,1050,155800,1464,1784,1912,1052,1064,1090,HP|VP),
+M("1152x900x66",1152,900,94500,1192,1320,1528,902,906,937,HN|VN),
+M("1152x900x76",1152,900,105560,1168,1280,1472,902,906,943,HN|VN),
+
+/* Derived Double Scan Modes */
+
+M("320x175x85",320,175,15750,336,368,416,191,192,222,HP|VN|DS),
+M("320x200x85",320,200,15750,336,368,416,200,202,222,HN|VP|DS),
+M("360x200x70",360,200,14160,369,423,450,206,207,224,HN|VP|DS),
+M("360x200x85",360,200,17750,378,414,468,200,202,223,HN|VP|DS),
+M("360x200x87",360,200,17750,369,423,450,210,211,224,HN|VN|DS),
+M("320x240x60",320,240,12587,328,376,400,245,246,262,HN|VN|DS),
+M("320x240x72",320,240,15750,332,352,416,244,246,260,HN|VN|DS),
+M("320x240x75",320,240,15750,328,360,420,240,242,250,HN|VN|DS),
+M("320x240x85",320,240,18000,348,376,416,240,242,254,HN|VN|DS),
+M("400x300x56",400,300,18000,412,448,512,300,301,312,HP|VP|DS),
+M("400x300x60",400,300,20000,420,484,528,300,302,314,HP|VP|DS),
+M("400x300x72",400,300,25000,428,488,520,318,321,333,HP|VP|DS),
+M("400x300x75",400,300,24750,408,448,528,300,302,312,HP|VP|DS),
+M("400x300x85",400,300,28125,416,448,524,300,302,315,HP|VP|DS),
+M("512x384x87i",512,384,22450,516,604,632,384,388,408,HP|VP|DS|I),
+M("512x384x60",512,384,32500,524,592,672,385,388,403,HN|VN|DS),
+M("512x384x70",512,384,37500,524,592,664,385,388,403,HN|VN|DS),
+M("512x384x75",512,384,39375,520,568,656,384,386,400,HP|VP|DS),
+M("512x384x85",512,384,47250,536,584,688,384,386,404,HP|VP|DS),
+M("512x384x89",512,384,50000,554,640,704,384,390,398,HP|VP|DS),
+M("576x432x75",576,432,54000,608,672,800,432,434,450,HP|VP|DS),
+M("640x384x75",640,384,52820,656,856,872,391,396,403,HN|VP|DS),
+M("640x480x60",640,480,54000,688,744,900,480,482,500,HP|VP|DS),
+M("640x480x85",640,480,74250,672,752,864,480,482,505,HP|VP|DS),
+M("640x512x60",640,512,54000,664,720,844,512,514,533,HP|VP|DS),
+M("640x512x70",640,512,63000,664,720,844,512,514,533,HP|VP|DS),
+M("640x512x75",640,512,67500,648,720,844,512,514,533,HP|VP|DS),
+M("640x512x85",640,512,78750,672,752,864,512,514,536,HP|VP|DS),
+M("800x600x60",800,600,81000,832,928,1080,600,602,625,HP|VP|DS),
+M("800x600x65",800,600,87750,832,928,1080,600,602,625,HP|VP|DS),
+M("800x600x70",800,600,94500,832,928,1080,600,602,625,HP|VP|DS),
+M("800x600x75",800,600,101250,832,928,1080,600,602,625,HP|VP|DS),
+M("800x600x85",800,600,114750,832,928,1080,600,602,625,HP|VP|DS),
+M("840x525x60",840,525,73570,892,984,1128,525,527,543,HP|VP|DS),
+M("896x672x60",896,672,102400,960,1060,1224,672,674,697,HN|VP|DS),
+M("896x672x75",896,672,130500,944,1052,1228,672,674,708,HN|VP|DS),
+M("928x696x60",928,696,109150,976,1088,1264,696,698,719,HN|VP|DS),
+M("928x696x75",928,696,144000,992,1104,1280,696,698,750,HN|VP|DS),
+M("960x720x60",960,720,117000,1024,1128,1300,720,722,750,HN|VP|DS),
+M("960x720x75",960,720,148500,1032,1144,1320,720,722,750,HN|VP|DS),
+M("416x312x74",416,312,28642,432,464,576,312,314,333,HN|VN|DS),
+M("576x384x54",576,384,32497,589,657,736,385,388,403,HP|VP|DS),
+M("700x525x60",700,525,61000,744,820,940,526,532,541,HP|VP|DS),
+M("700x525x74",700,525,77900,732,892,956,526,532,545,HP|VP|DS),
+M("576x450x66",576,450,47250,596,660,764,451,453,468,HN|VN|DS),
+M("576x450x76",576,450,52780,584,640,736,451,453,471,HN|VN|DS),
+};
+
+const int videomode_count = 46;
diff --git a/sys/dev/videomode/videomode.h b/sys/dev/videomode/videomode.h
new file mode 100644
index 0000000..b223da8
--- /dev/null
+++ b/sys/dev/videomode/videomode.h
@@ -0,0 +1,74 @@
+/* $NetBSD: videomode.h,v 1.2 2010/05/04 21:17:10 macallan Exp $ */
+/* $FreeBSD$ */
+
+/*
+ * Copyright (c) 2001, 2002 Bang Jun-Young
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _DEV_VIDEOMODE_H
+#define _DEV_VIDEOMODE_H
+
+struct videomode {
+ int dot_clock; /* Dot clock frequency in kHz. */
+ int hdisplay;
+ int hsync_start;
+ int hsync_end;
+ int htotal;
+ int vdisplay;
+ int vsync_start;
+ int vsync_end;
+ int vtotal;
+ int flags; /* Video mode flags; see below. */
+ const char *name;
+};
+
+/*
+ * Video mode flags.
+ */
+
+#define VID_PHSYNC 0x0001
+#define VID_NHSYNC 0x0002
+#define VID_PVSYNC 0x0004
+#define VID_NVSYNC 0x0008
+#define VID_INTERLACE 0x0010
+#define VID_DBLSCAN 0x0020
+#define VID_CSYNC 0x0040
+#define VID_PCSYNC 0x0080
+#define VID_NCSYNC 0x0100
+#define VID_HSKEW 0x0200
+#define VID_BCAST 0x0400
+#define VID_PIXMUX 0x1000
+#define VID_DBLCLK 0x2000
+#define VID_CLKDIV2 0x4000
+
+extern const struct videomode videomode_list[];
+extern const int videomode_count;
+
+const struct videomode *pick_mode_by_dotclock(int, int, int);
+const struct videomode *pick_mode_by_ref(int, int, int);
+void sort_modes(struct videomode *, struct videomode **, int);
+
+#endif /* _DEV_VIDEOMODE_H */
diff --git a/sys/dev/vt/vt_core.c b/sys/dev/vt/vt_core.c
index f0acf22..9627849 100644
--- a/sys/dev/vt/vt_core.c
+++ b/sys/dev/vt/vt_core.c
@@ -2518,6 +2518,7 @@ vt_upgrade(struct vt_device *vd)
{
struct vt_window *vw;
unsigned int i;
+ int register_handlers;
if (!vty_enabled(VTY_VT))
return;
@@ -2546,6 +2547,7 @@ vt_upgrade(struct vt_device *vd)
if (vd->vd_curwindow == NULL)
vd->vd_curwindow = vd->vd_windows[VT_CONSWINDOW];
+ register_handlers = 0;
if (!(vd->vd_flags & VDF_ASYNC)) {
/* Attach keyboard. */
vt_allocate_keyboard(vd);
@@ -2557,18 +2559,21 @@ vt_upgrade(struct vt_device *vd)
vd->vd_flags |= VDF_ASYNC;
callout_reset(&vd->vd_timer, hz / VT_TIMERFREQ, vt_timer, vd);
vd->vd_timer_armed = 1;
+ register_handlers = 1;
+ }
+
+ VT_UNLOCK(vd);
+
+ /* Refill settings with new sizes. */
+ vt_resize(vd);
+ if (register_handlers) {
/* Register suspend/resume handlers. */
EVENTHANDLER_REGISTER(power_suspend_early, vt_suspend_handler,
vd, EVENTHANDLER_PRI_ANY);
EVENTHANDLER_REGISTER(power_resume, vt_resume_handler, vd,
EVENTHANDLER_PRI_ANY);
}
-
- VT_UNLOCK(vd);
-
- /* Refill settings with new sizes. */
- vt_resize(vd);
}
static void
diff --git a/sys/dev/wpi/if_wpi.c b/sys/dev/wpi/if_wpi.c
index 52846dd..87e8b8b 100644
--- a/sys/dev/wpi/if_wpi.c
+++ b/sys/dev/wpi/if_wpi.c
@@ -42,7 +42,7 @@ __FBSDID("$FreeBSD$");
*
* A similar thing happens with the tx rings. The difference is the firmware
* stop processing buffers once the queue is full and until confirmation
- * of a successful transmition (tx_intr) has occurred.
+ * of a successful transmition (tx_done) has occurred.
*
* The command ring operates in the same manner as the tx queues.
*
@@ -447,6 +447,8 @@ wpi_attach(device_t dev)
ic->ic_cryptocaps =
IEEE80211_CRYPTO_AES_CCM;
+ ic->ic_flags |= IEEE80211_F_DATAPAD;
+
/*
* Read in the eeprom and also setup the channels for
* net80211. We don't set the rates as net80211 does this for us
@@ -1378,8 +1380,7 @@ wpi_read_eeprom_band(struct wpi_softc *sc, int n)
"adding chan %d (%dMHz) flags=0x%x maxpwr=%d passive=%d,"
" offset %d\n", chan, c->ic_freq,
channels[i].flags, sc->maxpwr[chan],
- (c->ic_flags & IEEE80211_CHAN_PASSIVE) != 0,
- ic->ic_nchans);
+ IEEE80211_IS_CHAN_PASSIVE(c), ic->ic_nchans);
}
}
@@ -1695,8 +1696,7 @@ wpi_rx_done(struct wpi_softc *sc, struct wpi_rx_desc *desc,
if (stat->len > WPI_STAT_MAXLEN) {
device_printf(sc->sc_dev, "invalid RX statistic header\n");
- if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
- return;
+ goto fail1;
}
bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD);
@@ -1714,23 +1714,20 @@ wpi_rx_done(struct wpi_softc *sc, struct wpi_rx_desc *desc,
if ((flags & WPI_RX_NOERROR) != WPI_RX_NOERROR) {
DPRINTF(sc, WPI_DEBUG_RECV, "%s: RX flags error %x\n",
__func__, flags);
- if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
- return;
+ goto fail1;
}
/* Discard frames that are too short. */
if (len < sizeof (*wh)) {
DPRINTF(sc, WPI_DEBUG_RECV, "%s: frame too short: %d\n",
__func__, len);
- if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
- return;
+ goto fail1;
}
m1 = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE);
if (m1 == NULL) {
DPRINTF(sc, WPI_DEBUG_ANY, "%s: no mbuf to restock ring\n",
__func__);
- if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
- return;
+ goto fail1;
}
bus_dmamap_unload(ring->data_dmat, data->map);
@@ -1752,8 +1749,7 @@ wpi_rx_done(struct wpi_softc *sc, struct wpi_rx_desc *desc,
ring->desc[ring->cur] = htole32(paddr);
bus_dmamap_sync(ring->data_dmat, ring->desc_dma.map,
BUS_DMASYNC_PREWRITE);
- if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
- return;
+ goto fail1;
}
m = data->m;
@@ -1777,18 +1773,14 @@ wpi_rx_done(struct wpi_softc *sc, struct wpi_rx_desc *desc,
if ((wh->i_fc[1] & IEEE80211_FC1_PROTECTED) &&
!IEEE80211_IS_MULTICAST(wh->i_addr1) &&
cip != NULL && cip->ic_cipher == IEEE80211_CIPHER_AES_CCM) {
- if ((flags & WPI_RX_CIPHER_MASK) != WPI_RX_CIPHER_CCMP) {
- if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
- m_freem(m);
- return;
- }
+ if ((flags & WPI_RX_CIPHER_MASK) != WPI_RX_CIPHER_CCMP)
+ goto fail2;
+
/* Check whether decryption was successful or not. */
if ((flags & WPI_RX_DECRYPT_MASK) != WPI_RX_DECRYPT_OK) {
DPRINTF(sc, WPI_DEBUG_RECV,
"CCMP decryption failed 0x%x\n", flags);
- if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
- m_freem(m);
- return;
+ goto fail2;
}
m->m_flags |= M_WEP;
}
@@ -1817,6 +1809,13 @@ wpi_rx_done(struct wpi_softc *sc, struct wpi_rx_desc *desc,
(void)ieee80211_input_all(ic, m, stat->rssi, -WPI_RSSI_OFFSET);
WPI_LOCK(sc);
+
+ return;
+
+fail2: ieee80211_free_node(ni);
+ m_freem(m);
+
+fail1: if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
static void
@@ -1836,6 +1835,7 @@ wpi_tx_done(struct wpi_softc *sc, struct wpi_rx_desc *desc)
struct mbuf *m;
struct ieee80211_node *ni;
struct ieee80211vap *vap;
+ int ackfailcnt = stat->ackfailcnt;
int status = le32toh(stat->status);
KASSERT(data->ni != NULL, ("no node"));
@@ -1844,7 +1844,7 @@ wpi_tx_done(struct wpi_softc *sc, struct wpi_rx_desc *desc)
DPRINTF(sc, WPI_DEBUG_XMIT, "%s: "
"qid %d idx %d retries %d btkillcnt %d rate %x duration %d "
- "status %x\n", __func__, desc->qid, desc->idx, stat->ackfailcnt,
+ "status %x\n", __func__, desc->qid, desc->idx, ackfailcnt,
stat->btkillcnt, stat->rate, le32toh(stat->duration), status);
/* Unmap and free mbuf. */
@@ -1861,11 +1861,11 @@ wpi_tx_done(struct wpi_softc *sc, struct wpi_rx_desc *desc)
if ((status & 0xff) != 1) {
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
ieee80211_ratectl_tx_complete(vap, ni,
- IEEE80211_RATECTL_TX_FAILURE, &stat->ackfailcnt, NULL);
+ IEEE80211_RATECTL_TX_FAILURE, &ackfailcnt, NULL);
} else {
if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
ieee80211_ratectl_tx_complete(vap, ni,
- IEEE80211_RATECTL_TX_SUCCESS, &stat->ackfailcnt, NULL);
+ IEEE80211_RATECTL_TX_SUCCESS, &ackfailcnt, NULL);
}
ieee80211_tx_complete(ni, m, (status & 0xff) != 1);
@@ -1931,10 +1931,7 @@ wpi_notif_intr(struct wpi_softc *sc)
hw = le32toh(sc->shared->next);
hw = (hw == 0) ? WPI_RX_RING_COUNT - 1 : hw - 1;
- if (sc->rxq.cur == hw)
- return;
-
- do {
+ while (sc->rxq.cur != hw) {
sc->rxq.cur = (sc->rxq.cur + 1) % WPI_RX_RING_COUNT;
struct wpi_rx_data *data = &sc->rxq.data[sc->rxq.cur];
@@ -2020,10 +2017,10 @@ wpi_notif_intr(struct wpi_softc *sc)
BUS_DMASYNC_POSTREAD);
uint32_t *status = (uint32_t *)(desc + 1);
-#ifdef WPI_DEBUG
+
DPRINTF(sc, WPI_DEBUG_STATE, "state changed to %x\n",
le32toh(*status));
-#endif
+
if (le32toh(*status) & 1) {
ieee80211_runtask(ic, &sc->sc_radiooff_task);
return;
@@ -2061,7 +2058,7 @@ wpi_notif_intr(struct wpi_softc *sc)
break;
}
}
- } while (sc->rxq.cur != hw);
+ }
/* Tell the firmware what we have processed. */
wpi_update_rx_ring(sc);
@@ -2081,15 +2078,15 @@ wpi_wakeup_intr(struct wpi_softc *sc)
/* Wakeup RX and TX rings. */
if (sc->rxq.update) {
- wpi_update_rx_ring(sc);
sc->rxq.update = 0;
+ wpi_update_rx_ring(sc);
}
for (qid = 0; qid < WPI_NTXQUEUES; qid++) {
struct wpi_tx_ring *ring = &sc->txq[qid];
if (ring->update) {
- wpi_update_tx_ring(sc, ring);
ring->update = 0;
+ wpi_update_tx_ring(sc, ring);
}
}
@@ -2227,6 +2224,8 @@ done:
static int
wpi_cmd2(struct wpi_softc *sc, struct wpi_buf *buf)
{
+ struct ifnet *ifp = sc->sc_ifp;
+ struct ieee80211com *ic = ifp->if_l2com;
struct ieee80211_frame *wh;
struct wpi_tx_cmd *cmd;
struct wpi_tx_data *data;
@@ -2234,23 +2233,16 @@ wpi_cmd2(struct wpi_softc *sc, struct wpi_buf *buf)
struct wpi_tx_ring *ring;
struct mbuf *m1;
bus_dma_segment_t *seg, segs[WPI_MAX_SCATTER];
- u_int hdrlen;
- int error, i, nsegs, pad, totlen;
+ int error, i, hdrspace, nsegs, totlen;
WPI_LOCK_ASSERT(sc);
DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__);
wh = mtod(buf->m, struct ieee80211_frame *);
- hdrlen = ieee80211_anyhdrsize(wh);
+ hdrspace = ieee80211_anyhdrspace(ic, wh);
totlen = buf->m->m_pkthdr.len;
- if (hdrlen & 3) {
- /* First segment length must be a multiple of 4. */
- pad = 4 - (hdrlen & 3);
- } else
- pad = 0;
-
ring = &sc->txq[buf->ac];
desc = &ring->desc[ring->cur];
data = &ring->data[ring->cur];
@@ -2265,8 +2257,8 @@ wpi_cmd2(struct wpi_softc *sc, struct wpi_buf *buf)
memcpy(cmd->data, buf->data, buf->size);
/* Save and trim IEEE802.11 header. */
- memcpy((uint8_t *)(cmd->data + buf->size), wh, hdrlen);
- m_adj(buf->m, hdrlen);
+ memcpy((uint8_t *)(cmd->data + buf->size), wh, hdrspace);
+ m_adj(buf->m, hdrspace);
error = bus_dmamap_load_mbuf_sg(ring->data_dmat, data->map, buf->m,
segs, &nsegs, BUS_DMA_NOWAIT);
@@ -2304,10 +2296,10 @@ wpi_cmd2(struct wpi_softc *sc, struct wpi_buf *buf)
__func__, ring->qid, ring->cur, totlen, nsegs);
/* Fill TX descriptor. */
- desc->nsegs = WPI_PAD32(totlen + pad) << 4 | (1 + nsegs);
+ desc->nsegs = WPI_PAD32(totlen) << 4 | (1 + nsegs);
/* First DMA segment is used by the TX command. */
desc->segs[0].addr = htole32(data->cmd_paddr);
- desc->segs[0].len = htole32(4 + buf->size + hdrlen + pad);
+ desc->segs[0].len = htole32(4 + buf->size + hdrspace);
/* Other DMA segments are for data payload. */
seg = &segs[0];
for (i = 1; i <= nsegs; i++) {
@@ -2353,9 +2345,10 @@ wpi_tx_data(struct wpi_softc *sc, struct mbuf *m, struct ieee80211_node *ni)
uint32_t flags;
uint16_t qos;
uint8_t tid, type;
- int ac, error, rate, ismcast, totlen;
+ int ac, error, rate, ismcast, hdrlen, totlen;
wh = mtod(m, struct ieee80211_frame *);
+ hdrlen = ieee80211_anyhdrsize(wh);
type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1);
@@ -2399,12 +2392,12 @@ wpi_tx_data(struct wpi_softc *sc, struct mbuf *m, struct ieee80211_node *ni)
/* 802.11 header may have moved. */
wh = mtod(m, struct ieee80211_frame *);
}
- totlen = m->m_pkthdr.len;
+ totlen = m->m_pkthdr.len - (hdrlen & 3);
if (ieee80211_radiotap_active_vap(vap)) {
struct wpi_tx_radiotap_header *tap = &sc->sc_txtap;
- tap->wt_flags = 0;
+ tap->wt_flags = IEEE80211_RADIOTAP_F_DATAPAD;
tap->wt_rate = rate;
if (k != NULL)
tap->wt_flags |= IEEE80211_RADIOTAP_F_WEP;
@@ -2420,6 +2413,9 @@ wpi_tx_data(struct wpi_softc *sc, struct mbuf *m, struct ieee80211_node *ni)
flags |= WPI_TX_NEED_ACK;
}
+ if (wh->i_fc[1] & IEEE80211_FC1_MORE_FRAG)
+ flags |= WPI_TX_MORE_FRAG; /* Cannot happen yet. */
+
/* Check if frame must be protected using RTS/CTS or CTS-to-self. */
if (!ismcast) {
/* NB: Group frames are sent using CCK in 802.11b/g. */
@@ -2518,11 +2514,12 @@ wpi_tx_data_raw(struct wpi_softc *sc, struct mbuf *m, struct ieee80211_node *ni,
struct wpi_buf tx_data;
uint32_t flags;
uint8_t type;
- int ac, rate, totlen;
+ int ac, rate, hdrlen, totlen;
wh = mtod(m, struct ieee80211_frame *);
+ hdrlen = ieee80211_anyhdrsize(wh);
type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
- totlen = m->m_pkthdr.len;
+ totlen = m->m_pkthdr.len - (hdrlen & 3);
ac = params->ibp_pri & 3;
@@ -2544,6 +2541,8 @@ wpi_tx_data_raw(struct wpi_softc *sc, struct mbuf *m, struct ieee80211_node *ni,
tap->wt_flags = 0;
tap->wt_rate = rate;
+ if (params->ibp_flags & IEEE80211_BPF_DATAPAD)
+ tap->wt_flags |= IEEE80211_RADIOTAP_F_DATAPAD;
ieee80211_radiotap_tx(vap, m);
}
@@ -3434,31 +3433,28 @@ wpi_config(struct wpi_softc *sc)
if (IEEE80211_IS_CHAN_2GHZ(ic->ic_curchan))
sc->rxon.flags |= htole32(WPI_RXON_AUTO | WPI_RXON_24GHZ);
+ sc->rxon.filter = WPI_FILTER_MULTICAST;
switch (ic->ic_opmode) {
case IEEE80211_M_STA:
sc->rxon.mode = WPI_MODE_STA;
- sc->rxon.filter = htole32(WPI_FILTER_MULTICAST);
break;
case IEEE80211_M_IBSS:
sc->rxon.mode = WPI_MODE_IBSS;
- sc->rxon.filter = htole32(WPI_FILTER_BEACON |
- WPI_FILTER_MULTICAST);
+ sc->rxon.filter |= WPI_FILTER_BEACON;
break;
/* XXX workaround for passive channels selection */
case IEEE80211_M_AHDEMO:
- sc->rxon.filter = htole32(WPI_FILTER_MULTICAST);
- /* FALLTHROUGH */
case IEEE80211_M_HOSTAP:
sc->rxon.mode = WPI_MODE_HOSTAP;
break;
case IEEE80211_M_MONITOR:
sc->rxon.mode = WPI_MODE_MONITOR;
- sc->rxon.filter = htole32(WPI_FILTER_MULTICAST);
break;
default:
device_printf(sc->sc_dev, "unknown opmode %d\n", ic->ic_opmode);
return EINVAL;
}
+ sc->rxon.filter = htole32(sc->rxon.filter);
wpi_set_promisc(sc);
sc->rxon.cck_mask = 0x0f; /* not yet negotiated */
sc->rxon.ofdm_mask = 0xff; /* not yet negotiated */
@@ -3677,7 +3673,7 @@ wpi_scan(struct wpi_softc *sc, struct ieee80211_channel *c)
} else
hdr->crc_threshold = WPI_SCAN_CRC_TH_NEVER;
- if (!(c->ic_flags & IEEE80211_CHAN_PASSIVE))
+ if (!IEEE80211_IS_CHAN_PASSIVE(c))
chan->flags |= WPI_CHAN_ACTIVE;
/*
@@ -3702,7 +3698,7 @@ wpi_scan(struct wpi_softc *sc, struct ieee80211_channel *c)
chan->rf_gain = 0x28;
DPRINTF(sc, WPI_DEBUG_SCAN, "Scanning %u Passive: %d\n",
- chan->chan, (c->ic_flags & IEEE80211_CHAN_PASSIVE) ? 1 : 0);
+ chan->chan, IEEE80211_IS_CHAN_PASSIVE(c));
hdr->nchan++;
chan++;
@@ -3838,11 +3834,13 @@ wpi_update_beacon(struct ieee80211vap *vap, int item)
struct wpi_softc *sc = ifp->if_softc;
int error;
+ WPI_LOCK(sc);
if ((error = wpi_setup_beacon(sc, ni)) != 0) {
device_printf(sc->sc_dev,
"%s: could not update beacon frame, error %d", __func__,
error);
}
+ WPI_UNLOCK(sc);
}
static int
@@ -3939,6 +3937,8 @@ wpi_run(struct wpi_softc *sc, struct ieee80211vap *vap)
/* Enable power-saving mode if requested by user. */
if (vap->iv_flags & IEEE80211_F_PMGTON)
(void)wpi_set_pslevel(sc, 0, 3, 1);
+ else
+ (void)wpi_set_pslevel(sc, 0, 0, 1);
DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__);
@@ -4384,6 +4384,8 @@ wpi_apm_init(struct wpi_softc *sc)
DELAY(20);
/* Disable L1-Active. */
wpi_prph_setbits(sc, WPI_APMG_PCI_STT, WPI_APMG_PCI_STT_L1A_DIS);
+ /* ??? */
+ wpi_prph_clrbits(sc, WPI_APMG_PS, 0x00000E00);
wpi_nic_unlock(sc);
return 0;
diff --git a/sys/dev/wpi/if_wpireg.h b/sys/dev/wpi/if_wpireg.h
index cfd8a09..24dcb2d 100644
--- a/sys/dev/wpi/if_wpireg.h
+++ b/sys/dev/wpi/if_wpireg.h
@@ -470,6 +470,7 @@ struct wpi_cmd_data {
#define WPI_TX_FULL_TXOP (1 << 7)
#define WPI_TX_BT_DISABLE (1 << 12) /* bluetooth coexistence */
#define WPI_TX_AUTO_SEQ (1 << 13)
+#define WPI_TX_MORE_FRAG (1 << 14)
#define WPI_TX_INSERT_TSTAMP (1 << 16)
uint8_t plcp;
@@ -964,11 +965,6 @@ static const char * const wpi_fw_errmsg[] = {
"FATAL_ERROR"
};
-/* XXX description for some error codes (error data). */
-/* 0x00000074 - wrong totlen field */
-/* 0x000003B3 - powersave error */
-/* 0x00000447 - wrong channel selected */
-
#define WPI_READ(sc, reg) \
bus_space_read_4((sc)->sc_st, (sc)->sc_sh, (reg))
diff --git a/sys/dev/xen/xenstore/xenstore_dev.c b/sys/dev/xen/xenstore/xenstore_dev.c
index 54b5e82..80396fd 100644
--- a/sys/dev/xen/xenstore/xenstore_dev.c
+++ b/sys/dev/xen/xenstore/xenstore_dev.c
@@ -77,7 +77,11 @@ static int
xs_dev_read(struct cdev *dev, struct uio *uio, int ioflag)
{
int error;
- struct xs_dev_data *u = dev->si_drv1;
+ struct xs_dev_data *u;
+
+ error = devfs_get_cdevpriv((void **)&u);
+ if (error != 0)
+ return (error);
while (u->read_prod == u->read_cons) {
error = tsleep(u, PCATCH, "xsdread", hz/10);
@@ -115,11 +119,15 @@ static int
xs_dev_write(struct cdev *dev, struct uio *uio, int ioflag)
{
int error;
- struct xs_dev_data *u = dev->si_drv1;
+ struct xs_dev_data *u;
struct xs_dev_transaction *trans;
void *reply;
int len = uio->uio_resid;
+ error = devfs_get_cdevpriv((void **)&u);
+ if (error != 0)
+ return (error);
+
if ((len + u->len) > sizeof(u->u.buffer))
return (EINVAL);
@@ -177,25 +185,10 @@ xs_dev_write(struct cdev *dev, struct uio *uio, int ioflag)
return (error);
}
-static int
-xs_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
-{
- struct xs_dev_data *u;
-
-#if 0 /* XXX figure out if equiv needed */
- nonseekable_open(inode, filp);
-#endif
- u = malloc(sizeof(*u), M_XENSTORE, M_WAITOK|M_ZERO);
- LIST_INIT(&u->transactions);
- dev->si_drv1 = u;
-
- return (0);
-}
-
-static int
-xs_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
+static void
+xs_dev_dtor(void *arg)
{
- struct xs_dev_data *u = dev->si_drv1;
+ struct xs_dev_data *u = arg;
struct xs_dev_transaction *trans, *tmp;
LIST_FOREACH_SAFE(trans, &u->transactions, list, tmp) {
@@ -205,7 +198,21 @@ xs_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
}
free(u, M_XENSTORE);
- return (0);
+}
+
+static int
+xs_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
+{
+ struct xs_dev_data *u;
+ int error;
+
+ u = malloc(sizeof(*u), M_XENSTORE, M_WAITOK|M_ZERO);
+ LIST_INIT(&u->transactions);
+ error = devfs_set_cdevpriv(u, xs_dev_dtor);
+ if (error != 0)
+ free(u, M_XENSTORE);
+
+ return (error);
}
static struct cdevsw xs_dev_cdevsw = {
@@ -213,7 +220,6 @@ static struct cdevsw xs_dev_cdevsw = {
.d_read = xs_dev_read,
.d_write = xs_dev_write,
.d_open = xs_dev_open,
- .d_close = xs_dev_close,
.d_name = "xs_dev",
};
@@ -262,8 +268,8 @@ xs_dev_attach(device_t dev)
{
struct cdev *xs_cdev;
- xs_cdev = make_dev(&xs_dev_cdevsw, 0, UID_ROOT, GID_WHEEL, 0400,
- "xen/xenstore");
+ xs_cdev = make_dev_credf(MAKEDEV_ETERNAL, &xs_dev_cdevsw, 0, NULL,
+ UID_ROOT, GID_WHEEL, 0400, "xen/xenstore");
if (xs_cdev == NULL)
return (EINVAL);
diff --git a/sys/fs/ext2fs/ext2_htree.c b/sys/fs/ext2fs/ext2_htree.c
index 70a2f47..c847aa4 100644
--- a/sys/fs/ext2fs/ext2_htree.c
+++ b/sys/fs/ext2fs/ext2_htree.c
@@ -861,7 +861,7 @@ ext2_htree_add_entry(struct vnode *dvp, struct ext2fs_direct_2 *entry,
ext2_htree_split_dirblock((char *)bp->b_data, newdirblock, blksize,
fs->e3fs_hash_seed, hash_version, &split_hash, entry);
cursize = roundup(ip->i_size, blksize);
- dirsize = roundup(ip->i_size, blksize) + blksize;
+ dirsize = cursize + blksize;
blknum = dirsize / blksize - 1;
/* Add index entry for the new directory block */
diff --git a/sys/fs/ext2fs/ext2_vfsops.c b/sys/fs/ext2fs/ext2_vfsops.c
index 6879e42..b82a41e 100644
--- a/sys/fs/ext2fs/ext2_vfsops.c
+++ b/sys/fs/ext2fs/ext2_vfsops.c
@@ -355,7 +355,7 @@ compute_sb_data(struct vnode *devvp, struct ext2fs *es,
}
fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs);
- fs->e2fs_itpg = fs->e2fs_ipg /fs->e2fs_ipb;
+ fs->e2fs_itpg = fs->e2fs_ipg / fs->e2fs_ipb;
/* s_resuid / s_resgid ? */
fs->e2fs_gcount = (es->e2fs_bcount - es->e2fs_first_dblock +
EXT2_BLOCKS_PER_GROUP(fs) - 1) / EXT2_BLOCKS_PER_GROUP(fs);
@@ -365,7 +365,7 @@ compute_sb_data(struct vnode *devvp, struct ext2fs *es,
fs->e2fs_gd = malloc(db_count * fs->e2fs_bsize,
M_EXT2MNT, M_WAITOK);
fs->e2fs_contigdirs = malloc(fs->e2fs_gcount *
- sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK);
+ sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK | M_ZERO);
/*
* Adjust logic_sb_block.
@@ -379,6 +379,7 @@ compute_sb_data(struct vnode *devvp, struct ext2fs *es,
fsbtodb(fs, logic_sb_block + i + 1 ),
fs->e2fs_bsize, NOCRED, &bp);
if (error) {
+ free(fs->e2fs_contigdirs, M_EXT2MNT);
free(fs->e2fs_gd, M_EXT2MNT);
brelse(bp);
return (error);
@@ -390,11 +391,11 @@ compute_sb_data(struct vnode *devvp, struct ext2fs *es,
brelse(bp);
bp = NULL;
}
+ /* Initialization for the ext2 Orlov allocator variant. */
fs->e2fs_total_dir = 0;
- for (i=0; i < fs->e2fs_gcount; i++){
+ for (i = 0; i < fs->e2fs_gcount; i++)
fs->e2fs_total_dir += fs->e2fs_gd[i].ext2bgd_ndirs;
- fs->e2fs_contigdirs[i] = 0;
- }
+
if (es->e2fs_rev == E2FS_REV0 ||
!EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE))
fs->e2fs_maxfilesize = 0x7fffffff;
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index 32b9540..a80de54 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -1555,6 +1555,7 @@ cpususpend_handler(void)
cpu_ops.cpu_resume();
/* Resume MCA and local APIC */
+ lapic_xapic_mode();
mca_resume();
lapic_setup(0);
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index 53abf92..4df182a 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -1401,7 +1401,8 @@ each_writable_segment(td, func, closure)
object = backing_object;
}
ignore_entry = object->type != OBJT_DEFAULT &&
- object->type != OBJT_SWAP && object->type != OBJT_VNODE;
+ object->type != OBJT_SWAP && object->type != OBJT_VNODE &&
+ object->type != OBJT_PHYS;
VM_OBJECT_RUNLOCK(object);
if (ignore_entry)
continue;
diff --git a/sys/kern/kern_procctl.c b/sys/kern/kern_procctl.c
index d58302a..d65ba5a 100644
--- a/sys/kern/kern_procctl.c
+++ b/sys/kern/kern_procctl.c
@@ -160,7 +160,7 @@ static int
reap_status(struct thread *td, struct proc *p,
struct procctl_reaper_status *rs)
{
- struct proc *reap, *p2;
+ struct proc *reap, *p2, *first_p;
sx_assert(&proctree_lock, SX_LOCKED);
bzero(rs, sizeof(*rs));
@@ -176,8 +176,10 @@ reap_status(struct thread *td, struct proc *p,
rs->rs_descendants = 0;
rs->rs_children = 0;
if (!LIST_EMPTY(&reap->p_reaplist)) {
- KASSERT(!LIST_EMPTY(&reap->p_children), ("no children"));
- rs->rs_pid = LIST_FIRST(&reap->p_children)->p_pid;
+ first_p = LIST_FIRST(&reap->p_children);
+ if (first_p == NULL)
+ first_p = LIST_FIRST(&reap->p_reaplist);
+ rs->rs_pid = first_p->p_pid;
LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) {
if (proc_realparent(p2) == reap)
rs->rs_children++;
@@ -239,13 +241,13 @@ reap_kill(struct thread *td, struct proc *p, struct procctl_reaper_kill *rk)
int error, error1;
sx_assert(&proctree_lock, SX_LOCKED);
- PROC_UNLOCK(p);
if (IN_CAPABILITY_MODE(td))
return (ECAPMODE);
if (rk->rk_sig <= 0 || rk->rk_sig > _SIG_MAXSIG)
return (EINVAL);
if ((rk->rk_flags & ~REAPER_KILL_CHILDREN) != 0)
return (EINVAL);
+ PROC_UNLOCK(p);
reap = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p;
ksiginfo_init(&ksi);
ksi.ksi_signo = rk->rk_sig;
diff --git a/sys/kern/subr_taskqueue.c b/sys/kern/subr_taskqueue.c
index 321e4ae..086c4a9 100644
--- a/sys/kern/subr_taskqueue.c
+++ b/sys/kern/subr_taskqueue.c
@@ -571,8 +571,9 @@ taskqueue_swi_giant_run(void *dummy)
static int
_taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
- cpuset_t *mask, const char *ktname)
+ cpuset_t *mask, const char *name, va_list ap)
{
+ char ktname[MAXCOMLEN + 1];
struct thread *td;
struct taskqueue *tq;
int i, error;
@@ -580,6 +581,7 @@ _taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
if (count <= 0)
return (EINVAL);
+ vsnprintf(ktname, sizeof(ktname), name, ap);
tq = *tqp;
tq->tq_threads = malloc(sizeof(struct thread *) * count, M_TASKQUEUE,
@@ -635,27 +637,35 @@ int
taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
const char *name, ...)
{
- char ktname[MAXCOMLEN + 1];
va_list ap;
+ int error;
va_start(ap, name);
- vsnprintf(ktname, sizeof(ktname), name, ap);
+ error = _taskqueue_start_threads(tqp, count, pri, NULL, name, ap);
va_end(ap);
-
- return (_taskqueue_start_threads(tqp, count, pri, NULL, ktname));
+ return (error);
}
int
-taskqueue_start_threads_pinned(struct taskqueue **tqp, int count, int pri,
- int cpu_id, const char *name, ...)
+taskqueue_start_threads_cpuset(struct taskqueue **tqp, int count, int pri,
+ cpuset_t *mask, const char *name, ...)
{
- char ktname[MAXCOMLEN + 1];
va_list ap;
- cpuset_t mask;
+ int error;
va_start(ap, name);
- vsnprintf(ktname, sizeof(ktname), name, ap);
+ error = _taskqueue_start_threads(tqp, count, pri, mask, name, ap);
va_end(ap);
+ return (error);
+}
+
+int
+taskqueue_start_threads_pinned(struct taskqueue **tqp, int count, int pri,
+ int cpu_id, const char *name, ...)
+{
+ cpuset_t mask;
+ va_list ap;
+ int error;
/*
* In case someone passes in NOCPU, just fall back to the
@@ -666,8 +676,11 @@ taskqueue_start_threads_pinned(struct taskqueue **tqp, int count, int pri,
CPU_SET(cpu_id, &mask);
}
- return (_taskqueue_start_threads(tqp, count, pri,
- cpu_id == NOCPU ? NULL : &mask, ktname));
+ va_start(ap, name);
+ error = _taskqueue_start_threads(tqp, count, pri,
+ cpu_id == NOCPU ? NULL : &mask, name, ap);
+ va_end(ap);
+ return (error);
}
static inline void
diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c
index 7a5f624..bab8f50 100644
--- a/sys/kern/uipc_mbuf.c
+++ b/sys/kern/uipc_mbuf.c
@@ -120,6 +120,18 @@ CTASSERT(sizeof(struct struct_m_ext) == 28);
#endif
/*
+ * Assert that the queue(3) macros produce code of the same size as an old
+ * plain pointer does.
+ */
+#ifdef INVARIANTS
+static struct mbuf m_assertbuf;
+CTASSERT(sizeof(m_assertbuf.m_slist) == sizeof(m_assertbuf.m_next));
+CTASSERT(sizeof(m_assertbuf.m_stailq) == sizeof(m_assertbuf.m_next));
+CTASSERT(sizeof(m_assertbuf.m_slistpkt) == sizeof(m_assertbuf.m_nextpkt));
+CTASSERT(sizeof(m_assertbuf.m_stailqpkt) == sizeof(m_assertbuf.m_nextpkt));
+#endif
+
+/*
* m_get2() allocates minimum mbuf that would fit "size" argument.
*/
struct mbuf *
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 189a30f..b897e05 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -3439,11 +3439,9 @@ soisdisconnecting(struct socket *so)
SOCKBUF_LOCK(&so->so_rcv);
so->so_state &= ~SS_ISCONNECTING;
so->so_state |= SS_ISDISCONNECTING;
- so->so_rcv.sb_state |= SBS_CANTRCVMORE;
- sorwakeup_locked(so);
+ socantrcvmore_locked(so);
SOCKBUF_LOCK(&so->so_snd);
- so->so_snd.sb_state |= SBS_CANTSENDMORE;
- sowwakeup_locked(so);
+ socantsendmore_locked(so);
wakeup(&so->so_timeo);
}
@@ -3458,12 +3456,10 @@ soisdisconnected(struct socket *so)
SOCKBUF_LOCK(&so->so_rcv);
so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
so->so_state |= SS_ISDISCONNECTED;
- so->so_rcv.sb_state |= SBS_CANTRCVMORE;
- sorwakeup_locked(so);
+ socantrcvmore_locked(so);
SOCKBUF_LOCK(&so->so_snd);
- so->so_snd.sb_state |= SBS_CANTSENDMORE;
sbdrop_locked(&so->so_snd, sbused(&so->so_snd));
- sowwakeup_locked(so);
+ socantsendmore_locked(so);
wakeup(&so->so_timeo);
}
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 912863e..cfa8f45 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -122,6 +122,10 @@ static unsigned long numvnodes;
SYSCTL_ULONG(_vfs, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0,
"Number of vnodes in existence");
+static u_long vnodes_created;
+SYSCTL_ULONG(_vfs, OID_AUTO, vnodes_created, CTLFLAG_RD, &vnodes_created,
+ 0, "Number of vnodes created by getnewvnode");
+
/*
* Conversion tables for conversion from vnode types to inode formats
* and back.
@@ -156,6 +160,10 @@ static int vlru_allow_cache_src;
SYSCTL_INT(_vfs, OID_AUTO, vlru_allow_cache_src, CTLFLAG_RW,
&vlru_allow_cache_src, 0, "Allow vlru to reclaim source vnode");
+static u_long recycles_count;
+SYSCTL_ULONG(_vfs, OID_AUTO, recycles, CTLFLAG_RD, &recycles_count, 0,
+ "Number of vnodes recycled to avoid exceding kern.maxvnodes");
+
/*
* Various variables used for debugging the new implementation of
* reassignbuf().
@@ -788,6 +796,7 @@ vlrureclaim(struct mount *mp)
}
KASSERT((vp->v_iflag & VI_DOOMED) == 0,
("VI_DOOMED unexpectedly detected in vlrureclaim()"));
+ atomic_add_long(&recycles_count, 1);
vgonel(vp);
VOP_UNLOCK(vp, 0);
vdropl(vp);
@@ -988,8 +997,10 @@ vtryrecycle(struct vnode *vp)
__func__, vp);
return (EBUSY);
}
- if ((vp->v_iflag & VI_DOOMED) == 0)
+ if ((vp->v_iflag & VI_DOOMED) == 0) {
+ atomic_add_long(&recycles_count, 1);
vgonel(vp);
+ }
VOP_UNLOCK(vp, LK_INTERLOCK);
vn_finished_write(vnmp);
return (0);
@@ -1093,6 +1104,7 @@ getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops,
atomic_add_long(&numvnodes, 1);
mtx_unlock(&vnode_free_list_mtx);
alloc:
+ atomic_add_long(&vnodes_created, 1);
vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK|M_ZERO);
/*
* Setup locks.
@@ -3191,6 +3203,7 @@ DB_SHOW_COMMAND(mount, db_show_mount)
db_printf(" mnt_maxsymlinklen = %d\n", mp->mnt_maxsymlinklen);
db_printf(" mnt_iosize_max = %d\n", mp->mnt_iosize_max);
db_printf(" mnt_hashseed = %u\n", mp->mnt_hashseed);
+ db_printf(" mnt_lockref = %d\n", mp->mnt_lockref);
db_printf(" mnt_secondary_writes = %d\n", mp->mnt_secondary_writes);
db_printf(" mnt_secondary_accwrites = %d\n",
mp->mnt_secondary_accwrites);
diff --git a/sys/mips/conf/AR71XX_BASE b/sys/mips/conf/AR71XX_BASE
index c94dd32..357f965 100644
--- a/sys/mips/conf/AR71XX_BASE
+++ b/sys/mips/conf/AR71XX_BASE
@@ -60,7 +60,6 @@ device ar71xx_pci
options IEEE80211_DEBUG
options IEEE80211_ALQ
options IEEE80211_SUPPORT_MESH
-# This option is currently broken for if_ath_tx.
options IEEE80211_SUPPORT_TDMA
options IEEE80211_AMPDU_AGE
device wlan # 802.11 support
diff --git a/sys/modules/cxgbe/if_cxgbe/Makefile b/sys/modules/cxgbe/if_cxgbe/Makefile
index 32347f4..a66e45a 100644
--- a/sys/modules/cxgbe/if_cxgbe/Makefile
+++ b/sys/modules/cxgbe/if_cxgbe/Makefile
@@ -26,4 +26,3 @@ SRCS+= t4_tracer.c
CFLAGS+= -I${CXGBE}
.include <bsd.kmod.mk>
-CFLAGS+= ${GCC_MS_EXTENSIONS}
diff --git a/sys/modules/drm2/radeonkms/Makefile b/sys/modules/drm2/radeonkms/Makefile
index 076b825..1e674a7 100644
--- a/sys/modules/drm2/radeonkms/Makefile
+++ b/sys/modules/drm2/radeonkms/Makefile
@@ -106,6 +106,6 @@ SRCS += \
iicbus_if.h \
pci_if.h
-CFLAGS += -I${.CURDIR}/../../../dev/drm2/radeon ${GCC_MS_EXTENSIONS}
+CFLAGS += -I${.CURDIR}/../../../dev/drm2/radeon
.include <bsd.kmod.mk>
diff --git a/sys/modules/ibcore/Makefile b/sys/modules/ibcore/Makefile
index fea77b9..60ec8dd 100644
--- a/sys/modules/ibcore/Makefile
+++ b/sys/modules/ibcore/Makefile
@@ -4,8 +4,8 @@
KMOD= ibcore
SRCS= addr.c iwcm.c sa_query.c ucma.c uverbs_cmd.c \
- agent.c local_sa.c multicast.c smi.c ud_header.c uverbs_main.c \
- mad.c notice.c umem.c uverbs_marshall.c \
+ agent.c multicast.c smi.c ud_header.c uverbs_main.c \
+ mad.c peer_mem.c umem.c uverbs_marshall.c \
cache.c device.c packer.c sysfs.c user_mad.c verbs.c \
cm.c fmr_pool.c mad_rmpp.c ucm.c cma.c \
vnode_if.h device_if.h bus_if.h pci_if.h \
@@ -18,6 +18,6 @@ CFLAGS+= -DINET6 -DINET
.include <bsd.kmod.mk>
-CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS}
+CFLAGS+= -Wno-cast-qual -Wno-pointer-arith
CWARNFLAGS.cm.c= -Wno-unused-function
diff --git a/sys/modules/ipoib/Makefile b/sys/modules/ipoib/Makefile
index d634e37..ba8c6d3 100644
--- a/sys/modules/ipoib/Makefile
+++ b/sys/modules/ipoib/Makefile
@@ -15,4 +15,4 @@ CFLAGS+= -DINET6 -DINET
.include <bsd.kmod.mk>
-CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS}
+CFLAGS+= -Wno-cast-qual -Wno-pointer-arith
diff --git a/sys/modules/mlx4/Makefile b/sys/modules/mlx4/Makefile
index d989ff0..cd2b177 100644
--- a/sys/modules/mlx4/Makefile
+++ b/sys/modules/mlx4/Makefile
@@ -15,7 +15,6 @@ CFLAGS+= -I${.CURDIR}/../../ofed/include/
.include <bsd.kmod.mk>
-CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS}
-CFLAGS+= -fms-extensions
+CFLAGS+= -Wno-cast-qual -Wno-pointer-arith
CWARNFLAGS.mcg.c= -Wno-unused
diff --git a/sys/modules/mlx4ib/Makefile b/sys/modules/mlx4ib/Makefile
index 57592bc..7bfcb4a 100644
--- a/sys/modules/mlx4ib/Makefile
+++ b/sys/modules/mlx4ib/Makefile
@@ -6,14 +6,14 @@ KMOD= mlx4ib
SRCS= device_if.h bus_if.h vnode_if.h pci_if.h \
opt_inet.h opt_inet6.h \
alias_GUID.c mcg.c sysfs.c ah.c cq.c \
+ mlx4_exp.c \
doorbell.c mad.c main.c mr.c qp.c srq.c wc.c cm.c
CFLAGS+= -I${.CURDIR}/../../ofed/drivers/infiniband/hw/mlx4
CFLAGS+= -I${.CURDIR}/../../ofed/include/
CFLAGS+= -DCONFIG_INFINIBAND_USER_MEM
CFLAGS+= -DINET6 -DINET
-CFLAGS+= -fms-extensions
.include <bsd.kmod.mk>
-CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS}
+CFLAGS+= -Wno-cast-qual -Wno-pointer-arith
diff --git a/sys/modules/mlxen/Makefile b/sys/modules/mlxen/Makefile
index 258cf7e..02f7776 100644
--- a/sys/modules/mlxen/Makefile
+++ b/sys/modules/mlxen/Makefile
@@ -8,8 +8,7 @@ SRCS += en_rx.c en_tx.c utils.c
SRCS += opt_inet.h opt_inet6.h
CFLAGS+= -I${.CURDIR}/../../ofed/drivers/net/mlx4
CFLAGS+= -I${.CURDIR}/../../ofed/include/
-CFLAGS+= -fms-extensions
.include <bsd.kmod.mk>
-CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS}
+CFLAGS+= -Wno-cast-qual -Wno-pointer-arith
diff --git a/sys/modules/mthca/Makefile b/sys/modules/mthca/Makefile
index 25daedb..31fcebd 100644
--- a/sys/modules/mthca/Makefile
+++ b/sys/modules/mthca/Makefile
@@ -14,4 +14,4 @@ CFLAGS+= -I${.CURDIR}/../../ofed/include
.include <bsd.kmod.mk>
-CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS}
+CFLAGS+= -Wno-cast-qual -Wno-pointer-arith
diff --git a/sys/modules/usb/udl/Makefile b/sys/modules/usb/udl/Makefile
new file mode 100644
index 0000000..5323883
--- /dev/null
+++ b/sys/modules/usb/udl/Makefile
@@ -0,0 +1,12 @@
+# $FreeBSD$
+
+S= ${.CURDIR}/../../..
+
+.PATH: $S/dev/usb/video
+
+KMOD= udl
+SRCS= opt_bus.h opt_usb.h fb_if.h device_if.h \
+ bus_if.h usb_if.h vnode_if.h usbdevs.h \
+ udl.c
+
+.include <bsd.kmod.mk>
diff --git a/sys/modules/videomode/Makefile b/sys/modules/videomode/Makefile
new file mode 100644
index 0000000..9b09aae
--- /dev/null
+++ b/sys/modules/videomode/Makefile
@@ -0,0 +1,40 @@
+#
+# $FreeBSD$
+#
+# Copyright (c) 2015 Hans Petter Selasky. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+
+S= ${.CURDIR}/../..
+
+.PATH: $S/dev/videomode
+
+KMOD= videomode
+SRCS= bus_if.h device_if.h vnode_if.h \
+ opt_bus.h opt_ddb.h opt_videomode.h \
+ videomode.c \
+ edid.c \
+ pickmode.c \
+ vesagtf.c
+
+.include <bsd.kmod.mk>
diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c
index 019bbf5..753854d 100644
--- a/sys/net/if_bridge.c
+++ b/sys/net/if_bridge.c
@@ -228,7 +228,7 @@ struct bridge_softc {
static VNET_DEFINE(struct mtx, bridge_list_mtx);
#define V_bridge_list_mtx VNET(bridge_list_mtx)
-eventhandler_tag bridge_detach_cookie = NULL;
+static eventhandler_tag bridge_detach_cookie;
int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
@@ -538,6 +538,7 @@ vnet_bridge_uninit(const void *unused __unused)
{
if_clone_detach(V_bridge_cloner);
+ V_bridge_cloner = NULL;
BRIDGE_LIST_LOCK_DESTROY();
}
VNET_SYSUNINIT(vnet_bridge_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
@@ -1797,7 +1798,13 @@ bridge_ifdetach(void *arg __unused, struct ifnet *ifp)
if (ifp->if_flags & IFF_RENAMING)
return;
-
+ if (V_bridge_cloner == NULL) {
+ /*
+ * This detach handler can be called after
+ * vnet_bridge_uninit(). Just return in that case.
+ */
+ return;
+ }
/* Check if the interface is a bridge member */
if (sc != NULL) {
BRIDGE_LOCK(sc);
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 78a4e8a..d14f6bf 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -1567,13 +1567,18 @@ void pf_free_rule(struct pf_rule *);
#ifdef INET
int pf_test(int, struct ifnet *, struct mbuf **, struct inpcb *);
+int pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, u_short *,
+ struct pf_pdesc *);
#endif /* INET */
#ifdef INET6
int pf_test6(int, struct ifnet *, struct mbuf **, struct inpcb *);
+int pf_normalize_ip6(struct mbuf **, int, struct pfi_kif *, u_short *,
+ struct pf_pdesc *);
void pf_poolmask(struct pf_addr *, struct pf_addr*,
struct pf_addr *, struct pf_addr *, u_int8_t);
void pf_addr_inc(struct pf_addr *, sa_family_t);
+int pf_refragment6(struct ifnet *, struct mbuf **, struct m_tag *);
#endif /* INET6 */
u_int32_t pf_new_isn(struct pf_state *);
@@ -1589,10 +1594,6 @@ int pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t);
void pf_normalize_init(void);
void pf_normalize_cleanup(void);
-int pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, u_short *,
- struct pf_pdesc *);
-int pf_normalize_ip6(struct mbuf **, int, struct pfi_kif *, u_short *,
- struct pf_pdesc *);
int pf_normalize_tcp(int, struct pfi_kif *, struct mbuf *, int, int, void *,
struct pf_pdesc *);
void pf_normalize_tcp_cleanup(struct pf_state *);
@@ -1669,6 +1670,8 @@ int pfi_clear_flags(const char *, int);
int pf_match_tag(struct mbuf *, struct pf_rule *, int *, int);
int pf_tag_packet(struct mbuf *, struct pf_pdesc *, int);
+int pf_addr_cmp(struct pf_addr *, struct pf_addr *,
+ sa_family_t);
void pf_qid2qname(u_int32_t, char *);
VNET_DECLARE(struct pf_kstatus, pf_status);
diff --git a/sys/netinet6/frag6.c b/sys/netinet6/frag6.c
index 92d0715..1e9e692 100644
--- a/sys/netinet6/frag6.c
+++ b/sys/netinet6/frag6.c
@@ -541,27 +541,16 @@ insert:
*q6->ip6q_nxtp = (u_char)(nxt & 0xff);
#endif
- /* Delete frag6 header */
- if (m->m_len >= offset + sizeof(struct ip6_frag)) {
- /* This is the only possible case with !PULLDOWN_TEST */
- ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag),
- offset);
- m->m_data += sizeof(struct ip6_frag);
- m->m_len -= sizeof(struct ip6_frag);
- } else {
- /* this comes with no copy if the boundary is on cluster */
- if ((t = m_split(m, offset, M_NOWAIT)) == NULL) {
- frag6_remque(q6);
- V_frag6_nfrags -= q6->ip6q_nfrag;
+ if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) {
+ frag6_remque(q6);
+ V_frag6_nfrags -= q6->ip6q_nfrag;
#ifdef MAC
- mac_ip6q_destroy(q6);
+ mac_ip6q_destroy(q6);
#endif
- free(q6, M_FTABLE);
- V_frag6_nfragpackets--;
- goto dropfrag;
- }
- m_adj(t, sizeof(struct ip6_frag));
- m_cat(m, t);
+ free(q6, M_FTABLE);
+ V_frag6_nfragpackets--;
+
+ goto dropfrag;
}
/*
@@ -775,3 +764,27 @@ frag6_drain(void)
IP6Q_UNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
}
+
+int
+ip6_deletefraghdr(struct mbuf *m, int offset, int wait)
+{
+ struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+ struct mbuf *t;
+
+ /* Delete frag6 header. */
+ if (m->m_len >= offset + sizeof(struct ip6_frag)) {
+ /* This is the only possible case with !PULLDOWN_TEST. */
+ bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag),
+ offset);
+ m->m_data += sizeof(struct ip6_frag);
+ m->m_len -= sizeof(struct ip6_frag);
+ } else {
+ /* This comes with no copy if the boundary is on cluster. */
+ if ((t = m_split(m, offset, wait)) == NULL)
+ return (ENOMEM);
+ m_adj(t, sizeof(struct ip6_frag));
+ m_cat(m, t);
+ }
+
+ return (0);
+}
diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c
index a3474d3..a94d797 100644
--- a/sys/netinet6/ip6_output.c
+++ b/sys/netinet6/ip6_output.c
@@ -212,6 +212,65 @@ in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
*(u_short *)(m->m_data + offset) = csum;
}
+int
+ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto,
+ int mtu)
+{
+ struct mbuf *m, **mnext, *m_frgpart;
+ struct ip6_hdr *ip6, *mhip6;
+ struct ip6_frag *ip6f;
+ int off;
+ int error;
+ int tlen = m0->m_pkthdr.len;
+ uint32_t id = htonl(ip6_randomid());
+
+ m = m0;
+ ip6 = mtod(m, struct ip6_hdr *);
+ mnext = &m->m_nextpkt;
+
+ for (off = hlen; off < tlen; off += mtu) {
+ m = m_gethdr(M_NOWAIT, MT_DATA);
+ if (!m) {
+ IP6STAT_INC(ip6s_odropped);
+ return (ENOBUFS);
+ }
+ m->m_flags = m0->m_flags & M_COPYFLAGS;
+ *mnext = m;
+ mnext = &m->m_nextpkt;
+ m->m_data += max_linkhdr;
+ mhip6 = mtod(m, struct ip6_hdr *);
+ *mhip6 = *ip6;
+ m->m_len = sizeof(*mhip6);
+ error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
+ if (error) {
+ IP6STAT_INC(ip6s_odropped);
+ return (error);
+ }
+ ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
+ if (off + mtu >= tlen)
+ mtu = tlen - off;
+ else
+ ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
+ mhip6->ip6_plen = htons((u_short)(mtu + hlen +
+ sizeof(*ip6f) - sizeof(struct ip6_hdr)));
+ if ((m_frgpart = m_copy(m0, off, mtu)) == 0) {
+ IP6STAT_INC(ip6s_odropped);
+ return (ENOBUFS);
+ }
+ m_cat(m, m_frgpart);
+ m->m_pkthdr.len = mtu + hlen + sizeof(*ip6f);
+ m->m_pkthdr.fibnum = m0->m_pkthdr.fibnum;
+ m->m_pkthdr.rcvif = NULL;
+ ip6f->ip6f_reserved = 0;
+ ip6f->ip6f_ident = id;
+ ip6f->ip6f_nxt = nextproto;
+ IP6STAT_INC(ip6s_ofragments);
+ in6_ifstat_inc(ifp, ifs6_out_fragcreat);
+ }
+
+ return (0);
+}
+
/*
* IP6 output. The packet in mbuf chain m contains a skeletal IP6
* header (with pri, len, nxt, hlim, src, dst).
@@ -236,11 +295,11 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
struct ifnet **ifpp, struct inpcb *inp)
{
- struct ip6_hdr *ip6, *mhip6;
+ struct ip6_hdr *ip6;
struct ifnet *ifp, *origifp;
struct mbuf *m = m0;
struct mbuf *mprev = NULL;
- int hlen, tlen, len, off;
+ int hlen, tlen, len;
struct route_in6 ip6route;
struct rtentry *rt = NULL;
struct sockaddr_in6 *dst, src_sa, dst_sa;
@@ -901,9 +960,6 @@ passout:
in6_ifstat_inc(ifp, ifs6_out_fragfail);
goto bad;
} else {
- struct mbuf **mnext, *m_frgpart;
- struct ip6_frag *ip6f;
- u_int32_t id = htonl(ip6_randomid());
u_char nextproto;
/*
@@ -937,8 +993,6 @@ passout:
m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
}
#endif
- mnext = &m->m_nextpkt;
-
/*
* Change the next header field of the last header in the
* unfragmentable part.
@@ -963,47 +1017,8 @@ passout:
* chain.
*/
m0 = m;
- for (off = hlen; off < tlen; off += len) {
- m = m_gethdr(M_NOWAIT, MT_DATA);
- if (!m) {
- error = ENOBUFS;
- IP6STAT_INC(ip6s_odropped);
- goto sendorfree;
- }
- m->m_flags = m0->m_flags & M_COPYFLAGS;
- *mnext = m;
- mnext = &m->m_nextpkt;
- m->m_data += max_linkhdr;
- mhip6 = mtod(m, struct ip6_hdr *);
- *mhip6 = *ip6;
- m->m_len = sizeof(*mhip6);
- error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
- if (error) {
- IP6STAT_INC(ip6s_odropped);
- goto sendorfree;
- }
- ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
- if (off + len >= tlen)
- len = tlen - off;
- else
- ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
- mhip6->ip6_plen = htons((u_short)(len + hlen +
- sizeof(*ip6f) - sizeof(struct ip6_hdr)));
- if ((m_frgpart = m_copy(m0, off, len)) == 0) {
- error = ENOBUFS;
- IP6STAT_INC(ip6s_odropped);
- goto sendorfree;
- }
- m_cat(m, m_frgpart);
- m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
- m->m_pkthdr.fibnum = m0->m_pkthdr.fibnum;
- m->m_pkthdr.rcvif = NULL;
- ip6f->ip6f_reserved = 0;
- ip6f->ip6f_ident = id;
- ip6f->ip6f_nxt = nextproto;
- IP6STAT_INC(ip6s_ofragments);
- in6_ifstat_inc(ifp, ifs6_out_fragcreat);
- }
+ if ((error = ip6_fragment(ifp, m, hlen, nextproto, len)))
+ goto sendorfree;
in6_ifstat_inc(ifp, ifs6_out_fragok);
}
diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h
index 155a607..54c5c66 100644
--- a/sys/netinet6/ip6_var.h
+++ b/sys/netinet6/ip6_var.h
@@ -388,6 +388,8 @@ int ip6_setpktopts(struct mbuf *, struct ip6_pktopts *,
void ip6_clearpktopts(struct ip6_pktopts *, int);
struct ip6_pktopts *ip6_copypktopts(struct ip6_pktopts *, int);
int ip6_optlen(struct inpcb *);
+int ip6_deletefraghdr(struct mbuf *, int, int);
+int ip6_fragment(struct ifnet *, struct mbuf *, int, u_char, int);
int route6_input(struct mbuf **, int *, int);
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 15667a6..b32288b 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -362,6 +362,45 @@ VNET_DEFINE(void *, pf_swi_cookie);
VNET_DEFINE(uint32_t, pf_hashseed);
#define V_pf_hashseed VNET(pf_hashseed)
+int
+pf_addr_cmp(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
+{
+
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ if (a->addr32[0] > b->addr32[0])
+ return (1);
+ if (a->addr32[0] < b->addr32[0])
+ return (-1);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ if (a->addr32[3] > b->addr32[3])
+ return (1);
+ if (a->addr32[3] < b->addr32[3])
+ return (-1);
+ if (a->addr32[2] > b->addr32[2])
+ return (1);
+ if (a->addr32[2] < b->addr32[2])
+ return (-1);
+ if (a->addr32[1] > b->addr32[1])
+ return (1);
+ if (a->addr32[1] < b->addr32[1])
+ return (-1);
+ if (a->addr32[0] > b->addr32[0])
+ return (1);
+ if (a->addr32[0] < b->addr32[0])
+ return (-1);
+ break;
+#endif /* INET6 */
+ default:
+ panic("%s: unknown address family %u", __func__, af);
+ }
+ return (0);
+}
+
static __inline uint32_t
pf_hashkey(struct pf_state_key *sk)
{
@@ -5460,7 +5499,7 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
goto bad;
if (oifp != ifp) {
- if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS)
+ if (pf_test6(PF_FWD, ifp, &m0, NULL) != PF_PASS)
goto bad;
else if (m0 == NULL)
goto done;
@@ -6018,15 +6057,20 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
struct pfi_kif *kif;
u_short action, reason = 0, log = 0;
struct mbuf *m = *m0, *n = NULL;
+ struct m_tag *mtag;
struct ip6_hdr *h = NULL;
struct pf_rule *a = NULL, *r = &V_pf_default_rule, *tr, *nr;
struct pf_state *s = NULL;
struct pf_ruleset *ruleset = NULL;
struct pf_pdesc pd;
int off, terminal = 0, dirndx, rh_cnt = 0;
+ int fwdir = dir;
M_ASSERTPKTHDR(m);
+ if (ifp != m->m_pkthdr.rcvif)
+ fwdir = PF_FWD;
+
if (!V_pf_status.running)
return (PF_PASS);
@@ -6388,6 +6432,11 @@ done:
if (s)
PF_STATE_UNLOCK(s);
+ /* If reassembled packet passed, create new fragments. */
+ if (action == PF_PASS && *m0 && fwdir == PF_FWD &&
+ (mtag = m_tag_find(m, PF_REASSEMBLED, NULL)) != NULL)
+ action = pf_refragment6(ifp, m0, mtag);
+
return (action);
}
#endif /* INET6 */
diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h
index 96f638e..ac0e0fb 100644
--- a/sys/netpfil/pf/pf.h
+++ b/sys/netpfil/pf/pf.h
@@ -43,7 +43,7 @@
#endif
#endif
-enum { PF_INOUT, PF_IN, PF_OUT };
+enum { PF_INOUT, PF_IN, PF_OUT, PF_FWD };
enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT,
PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP, PF_DEFER };
enum { PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT,
diff --git a/sys/netpfil/pf/pf_mtag.h b/sys/netpfil/pf/pf_mtag.h
index baff00a..3aacb2e 100644
--- a/sys/netpfil/pf/pf_mtag.h
+++ b/sys/netpfil/pf/pf_mtag.h
@@ -39,6 +39,7 @@
#define PF_TAG_TRANSLATE_LOCALHOST 0x04
#define PF_PACKET_LOOPED 0x08
#define PF_FASTFWD_OURS_PRESENT 0x10
+#define PF_REASSEMBLED 0x20
struct pf_mtag {
void *hdr; /* saved hdr pos in mbuf, for ECN */
diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c
index fb30331..aed2a55 100644
--- a/sys/netpfil/pf/pf_norm.c
+++ b/sys/netpfil/pf/pf_norm.c
@@ -1,5 +1,6 @@
/*-
* Copyright 2001 Niels Provos <provos@citi.umich.edu>
+ * Copyright 2011 Alexander Bluhm <bluhm@openbsd.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -48,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
+#include <netinet6/ip6_var.h>
#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
@@ -57,38 +59,51 @@ __FBSDID("$FreeBSD$");
#endif /* INET6 */
struct pf_frent {
- LIST_ENTRY(pf_frent) fr_next;
- union {
- struct {
- struct ip *_fr_ip;
- struct mbuf *_fr_m;
- } _frag;
- struct {
- uint16_t _fr_off;
- uint16_t _fr_end;
- } _cache;
- } _u;
+ TAILQ_ENTRY(pf_frent) fr_next;
+ struct mbuf *fe_m;
+ uint16_t fe_hdrlen; /* ipv4 header lenght with ip options
+ ipv6, extension, fragment header */
+ uint16_t fe_extoff; /* last extension header offset or 0 */
+ uint16_t fe_len; /* fragment length */
+ uint16_t fe_off; /* fragment offset */
+ uint16_t fe_mff; /* more fragment flag */
+};
+
+struct pf_fragment_cmp {
+ struct pf_addr frc_src;
+ struct pf_addr frc_dst;
+ uint32_t frc_id;
+ sa_family_t frc_af;
+ uint8_t frc_proto;
+ uint8_t frc_direction;
};
-#define fr_ip _u._frag._fr_ip
-#define fr_m _u._frag._fr_m
-#define fr_off _u._cache._fr_off
-#define fr_end _u._cache._fr_end
struct pf_fragment {
+ struct pf_fragment_cmp fr_key;
+#define fr_src fr_key.frc_src
+#define fr_dst fr_key.frc_dst
+#define fr_id fr_key.frc_id
+#define fr_af fr_key.frc_af
+#define fr_proto fr_key.frc_proto
+#define fr_direction fr_key.frc_direction
+
RB_ENTRY(pf_fragment) fr_entry;
TAILQ_ENTRY(pf_fragment) frag_next;
- struct in_addr fr_src;
- struct in_addr fr_dst;
- u_int8_t fr_p; /* protocol of this fragment */
- u_int8_t fr_flags; /* status flags */
-#define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */
-#define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */
-#define PFFRAG_DROP 0x0004 /* Drop all fragments */
+ uint8_t fr_flags; /* status flags */
+#define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */
+#define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */
+#define PFFRAG_DROP 0x0004 /* Drop all fragments */
#define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER))
- u_int16_t fr_id; /* fragment id for reassemble */
- u_int16_t fr_max; /* fragment data max */
- u_int32_t fr_timeout;
- LIST_HEAD(, pf_frent) fr_queue;
+ uint16_t fr_max; /* fragment data max */
+ uint32_t fr_timeout;
+ uint16_t fr_maxlen; /* maximum length of single fragment */
+ TAILQ_HEAD(pf_fragq, pf_frent) fr_queue;
+};
+
+struct pf_fragment_tag {
+ uint16_t ft_hdrlen; /* header length of reassembled pkt */
+ uint16_t ft_extoff; /* last extension header offset or 0 */
+ uint16_t ft_maxlen; /* maximum fragment payload length */
};
static struct mtx pf_frag_mtx;
@@ -119,25 +134,32 @@ static int pf_frag_compare(struct pf_fragment *,
static RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
static RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
-/* Private prototypes */
-static void pf_free_fragment(struct pf_fragment *);
-static void pf_remove_fragment(struct pf_fragment *);
-static int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
- struct tcphdr *, int, sa_family_t);
+static void pf_free_fragment(struct pf_fragment *);
+static void pf_remove_fragment(struct pf_fragment *);
+static int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
+ struct tcphdr *, int, sa_family_t);
+struct pf_frent *pf_create_fragment(u_short *);
+static struct pf_fragment *
+ pf_find_fragment(struct pf_fragment_cmp *key,
+ struct pf_frag_tree *tree);
+static struct pf_fragment *
+ pf_fillup_fragment(struct pf_fragment_cmp *, struct pf_frent *, u_short *);
+static int pf_isfull_fragment(struct pf_fragment *);
+struct mbuf *pf_join_fragment(struct pf_fragment *);
#ifdef INET
-static void pf_ip2key(struct pf_fragment *, struct ip *);
-static void pf_scrub_ip(struct mbuf **, u_int32_t, u_int8_t,
- u_int8_t);
-static void pf_flush_fragments(void);
-static struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *);
-static struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **,
- struct pf_frent *, int);
-static struct mbuf *pf_fragcache(struct mbuf **, struct ip*,
- struct pf_fragment **, int, int, int *);
-#endif /* INET */
+static void pf_scrub_ip(struct mbuf **, uint32_t, uint8_t, uint8_t);
+static void pf_flush_fragments(void);
+static int pf_reassemble(struct mbuf **, struct ip *, int, u_short *);
+static struct mbuf *
+ pf_fragcache(struct mbuf **, struct ip*, struct pf_fragment **,
+ int, int, int *);
+#endif /* INET */
#ifdef INET6
-static void pf_scrub_ip6(struct mbuf **, u_int8_t);
-#endif
+static int pf_reassemble6(struct mbuf **, struct ip6_hdr *,
+ struct ip6_frag *, uint16_t, uint16_t, int, u_short *);
+static void pf_scrub_ip6(struct mbuf **, uint8_t);
+#endif /* INET6 */
+
#define DPFPRINTF(x) do { \
if (V_pf_status.debug >= PF_DEBUG_MISC) { \
printf("%s: ", __func__); \
@@ -145,6 +167,20 @@ static void pf_scrub_ip6(struct mbuf **, u_int8_t);
} \
} while(0)
+#ifdef INET
+static void
+pf_ip2key(struct ip *ip, int dir, struct pf_fragment_cmp *key)
+{
+
+ key->frc_src.v4 = ip->ip_src;
+ key->frc_dst.v4 = ip->ip_dst;
+ key->frc_af = AF_INET;
+ key->frc_proto = ip->ip_p;
+ key->frc_id = ip->ip_id;
+ key->frc_direction = dir;
+}
+#endif /* INET */
+
void
pf_normalize_init(void)
{
@@ -184,18 +220,16 @@ pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
{
int diff;
- if ((diff = a->fr_id - b->fr_id))
+ if ((diff = a->fr_id - b->fr_id) != 0)
return (diff);
- else if ((diff = a->fr_p - b->fr_p))
+ if ((diff = a->fr_proto - b->fr_proto) != 0)
+ return (diff);
+ if ((diff = a->fr_af - b->fr_af) != 0)
+ return (diff);
+ if ((diff = pf_addr_cmp(&a->fr_src, &b->fr_src, a->fr_af)) != 0)
+ return (diff);
+ if ((diff = pf_addr_cmp(&a->fr_dst, &b->fr_dst, a->fr_af)) != 0)
return (diff);
- else if (a->fr_src.s_addr < b->fr_src.s_addr)
- return (-1);
- else if (a->fr_src.s_addr > b->fr_src.s_addr)
- return (1);
- else if (a->fr_dst.s_addr < b->fr_dst.s_addr)
- return (-1);
- else if (a->fr_dst.s_addr > b->fr_dst.s_addr)
- return (1);
return (0);
}
@@ -270,23 +304,23 @@ pf_free_fragment(struct pf_fragment *frag)
/* Free all fragments */
if (BUFFER_FRAGMENTS(frag)) {
- for (frent = LIST_FIRST(&frag->fr_queue); frent;
- frent = LIST_FIRST(&frag->fr_queue)) {
- LIST_REMOVE(frent, fr_next);
+ for (frent = TAILQ_FIRST(&frag->fr_queue); frent;
+ frent = TAILQ_FIRST(&frag->fr_queue)) {
+ TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
- m_freem(frent->fr_m);
+ m_freem(frent->fe_m);
uma_zfree(V_pf_frent_z, frent);
}
} else {
- for (frent = LIST_FIRST(&frag->fr_queue); frent;
- frent = LIST_FIRST(&frag->fr_queue)) {
- LIST_REMOVE(frent, fr_next);
+ for (frent = TAILQ_FIRST(&frag->fr_queue); frent;
+ frent = TAILQ_FIRST(&frag->fr_queue)) {
+ TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
- KASSERT((LIST_EMPTY(&frag->fr_queue) ||
- LIST_FIRST(&frag->fr_queue)->fr_off >
- frent->fr_end),
- ("! (LIST_EMPTY() || LIST_FIRST()->fr_off >"
- " frent->fr_end): %s", __func__));
+ KASSERT((TAILQ_EMPTY(&frag->fr_queue) ||
+ TAILQ_FIRST(&frag->fr_queue)->fe_off >
+ frent->fe_len),
+ ("! (TAILQ_EMPTY() || TAILQ_FIRST()->fe_off >"
+ " frent->fe_len): %s", __func__));
uma_zfree(V_pf_frent_z, frent);
}
@@ -296,26 +330,14 @@ pf_free_fragment(struct pf_fragment *frag)
}
#ifdef INET
-static void
-pf_ip2key(struct pf_fragment *key, struct ip *ip)
-{
- key->fr_p = ip->ip_p;
- key->fr_id = ip->ip_id;
- key->fr_src.s_addr = ip->ip_src.s_addr;
- key->fr_dst.s_addr = ip->ip_dst.s_addr;
-}
-
static struct pf_fragment *
-pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
+pf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree)
{
- struct pf_fragment key;
struct pf_fragment *frag;
PF_FRAG_ASSERT();
- pf_ip2key(&key, ip);
-
- frag = RB_FIND(pf_frag_tree, tree, &key);
+ frag = RB_FIND(pf_frag_tree, tree, (struct pf_fragment *)key);
if (frag != NULL) {
/* XXX Are we sure we want to update the timeout? */
frag->fr_timeout = time_uptime;
@@ -351,210 +373,421 @@ pf_remove_fragment(struct pf_fragment *frag)
}
}
-#ifdef INET
-#define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
-static struct mbuf *
-pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
- struct pf_frent *frent, int mff)
+struct pf_frent *
+pf_create_fragment(u_short *reason)
{
- struct mbuf *m = *m0, *m2;
- struct pf_frent *frea, *next;
- struct pf_frent *frep = NULL;
- struct ip *ip = frent->fr_ip;
- int hlen = ip->ip_hl << 2;
- u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
- u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
- u_int16_t max = ip_len + off;
+ struct pf_frent *frent;
PF_FRAG_ASSERT();
- KASSERT((*frag == NULL || BUFFER_FRAGMENTS(*frag)),
- ("! (*frag == NULL || BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__));
- /* Strip off ip header */
- m->m_data += hlen;
- m->m_len -= hlen;
+ frent = uma_zalloc(V_pf_frent_z, M_NOWAIT);
+ if (frent == NULL) {
+ pf_flush_fragments();
+ frent = uma_zalloc(V_pf_frent_z, M_NOWAIT);
+ if (frent == NULL) {
+ REASON_SET(reason, PFRES_MEMORY);
+ return (NULL);
+ }
+ }
- /* Create a new reassembly queue for this packet */
- if (*frag == NULL) {
- *frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
- if (*frag == NULL) {
+ return (frent);
+}
+
+struct pf_fragment *
+pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent,
+ u_short *reason)
+{
+ struct pf_frent *after, *next, *prev;
+ struct pf_fragment *frag;
+ uint16_t total;
+
+ PF_FRAG_ASSERT();
+
+ /* No empty fragments. */
+ if (frent->fe_len == 0) {
+ DPFPRINTF(("bad fragment: len 0"));
+ goto bad_fragment;
+ }
+
+ /* All fragments are 8 byte aligned. */
+ if (frent->fe_mff && (frent->fe_len & 0x7)) {
+ DPFPRINTF(("bad fragment: mff and len %d", frent->fe_len));
+ goto bad_fragment;
+ }
+
+ /* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET. */
+ if (frent->fe_off + frent->fe_len > IP_MAXPACKET) {
+ DPFPRINTF(("bad fragment: max packet %d",
+ frent->fe_off + frent->fe_len));
+ goto bad_fragment;
+ }
+
+ DPFPRINTF((key->frc_af == AF_INET ?
+ "reass frag %d @ %d-%d" : "reass frag %#08x @ %d-%d",
+ key->frc_id, frent->fe_off, frent->fe_off + frent->fe_len));
+
+ /* Fully buffer all of the fragments in this fragment queue. */
+ frag = pf_find_fragment(key, &V_pf_frag_tree);
+
+ /* Create a new reassembly queue for this packet. */
+ if (frag == NULL) {
+ frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
+ if (frag == NULL) {
pf_flush_fragments();
- *frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
- if (*frag == NULL)
+ frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
+ if (frag == NULL) {
+ REASON_SET(reason, PFRES_MEMORY);
goto drop_fragment;
+ }
}
- (*frag)->fr_flags = 0;
- (*frag)->fr_max = 0;
- (*frag)->fr_src = frent->fr_ip->ip_src;
- (*frag)->fr_dst = frent->fr_ip->ip_dst;
- (*frag)->fr_p = frent->fr_ip->ip_p;
- (*frag)->fr_id = frent->fr_ip->ip_id;
- (*frag)->fr_timeout = time_uptime;
- LIST_INIT(&(*frag)->fr_queue);
+ *(struct pf_fragment_cmp *)frag = *key;
+ frag->fr_timeout = time_second;
+ frag->fr_maxlen = frent->fe_len;
+ TAILQ_INIT(&frag->fr_queue);
+
+ RB_INSERT(pf_frag_tree, &V_pf_frag_tree, frag);
+ TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next);
- RB_INSERT(pf_frag_tree, &V_pf_frag_tree, *frag);
- TAILQ_INSERT_HEAD(&V_pf_fragqueue, *frag, frag_next);
+ /* We do not have a previous fragment. */
+ TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
- /* We do not have a previous fragment */
- frep = NULL;
- goto insert;
+ return (frag);
}
- /*
- * Find a fragment after the current one:
- * - off contains the real shifted offset.
- */
- LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
- if (FR_IP_OFF(frea) > off)
+ KASSERT(!TAILQ_EMPTY(&frag->fr_queue), ("!TAILQ_EMPTY()->fr_queue"));
+
+ /* Remember maximum fragment len for refragmentation. */
+ if (frent->fe_len > frag->fr_maxlen)
+ frag->fr_maxlen = frent->fe_len;
+
+ /* Maximum data we have seen already. */
+ total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
+ TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
+
+ /* Non terminal fragments must have more fragments flag. */
+ if (frent->fe_off + frent->fe_len < total && !frent->fe_mff)
+ goto bad_fragment;
+
+ /* Check if we saw the last fragment already. */
+ if (!TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) {
+ if (frent->fe_off + frent->fe_len > total ||
+ (frent->fe_off + frent->fe_len == total && frent->fe_mff))
+ goto bad_fragment;
+ } else {
+ if (frent->fe_off + frent->fe_len == total && !frent->fe_mff)
+ goto bad_fragment;
+ }
+
+ /* Find a fragment after the current one. */
+ prev = NULL;
+ TAILQ_FOREACH(after, &frag->fr_queue, fr_next) {
+ if (after->fe_off > frent->fe_off)
break;
- frep = frea;
+ prev = after;
}
- KASSERT((frep != NULL || frea != NULL),
- ("!(frep != NULL || frea != NULL): %s", __FUNCTION__));;
+ KASSERT(prev != NULL || after != NULL,
+ ("prev != NULL || after != NULL"));
- if (frep != NULL &&
- FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
- 4 > off)
- {
- u_int16_t precut;
+ if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) {
+ uint16_t precut;
- precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
- frep->fr_ip->ip_hl * 4 - off;
- if (precut >= ip_len)
- goto drop_fragment;
- m_adj(frent->fr_m, precut);
- DPFPRINTF(("overlap -%d\n", precut));
- /* Enforce 8 byte boundaries */
- ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
- off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
- ip_len -= precut;
- ip->ip_len = htons(ip_len);
- }
-
- for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
- frea = next)
- {
- u_int16_t aftercut;
-
- aftercut = ip_len + off - FR_IP_OFF(frea);
- DPFPRINTF(("adjust overlap %d\n", aftercut));
- if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
- * 4)
- {
- frea->fr_ip->ip_len =
- htons(ntohs(frea->fr_ip->ip_len) - aftercut);
- frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
- (aftercut >> 3));
- m_adj(frea->fr_m, aftercut);
+ precut = prev->fe_off + prev->fe_len - frent->fe_off;
+ if (precut >= frent->fe_len)
+ goto bad_fragment;
+ DPFPRINTF(("overlap -%d", precut));
+ m_adj(frent->fe_m, precut);
+ frent->fe_off += precut;
+ frent->fe_len -= precut;
+ }
+
+ for (; after != NULL && frent->fe_off + frent->fe_len > after->fe_off;
+ after = next) {
+ uint16_t aftercut;
+
+ aftercut = frent->fe_off + frent->fe_len - after->fe_off;
+ DPFPRINTF(("adjust overlap %d", aftercut));
+ if (aftercut < after->fe_len) {
+ m_adj(after->fe_m, aftercut);
+ after->fe_off += aftercut;
+ after->fe_len -= aftercut;
break;
}
- /* This fragment is completely overlapped, lose it */
- next = LIST_NEXT(frea, fr_next);
- m_freem(frea->fr_m);
- LIST_REMOVE(frea, fr_next);
- uma_zfree(V_pf_frent_z, frea);
+ /* This fragment is completely overlapped, lose it. */
+ next = TAILQ_NEXT(after, fr_next);
+ m_freem(after->fe_m);
+ TAILQ_REMOVE(&frag->fr_queue, after, fr_next);
+ uma_zfree(V_pf_frent_z, after);
}
- insert:
- /* Update maximum data size */
- if ((*frag)->fr_max < max)
- (*frag)->fr_max = max;
- /* This is the last segment */
- if (!mff)
- (*frag)->fr_flags |= PFFRAG_SEENLAST;
-
- if (frep == NULL)
- LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
+ if (prev == NULL)
+ TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
else
- LIST_INSERT_AFTER(frep, frent, fr_next);
+ TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next);
+
+ return (frag);
+
+bad_fragment:
+ REASON_SET(reason, PFRES_FRAG);
+drop_fragment:
+ uma_zfree(V_pf_frent_z, frent);
+ return (NULL);
+}
+
+static int
+pf_isfull_fragment(struct pf_fragment *frag)
+{
+ struct pf_frent *frent, *next;
+ uint16_t off, total;
/* Check if we are completely reassembled */
- if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
- return (NULL);
+ if (TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff)
+ return (0);
+
+ /* Maximum data we have seen already */
+ total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
+ TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
/* Check if we have all the data */
off = 0;
- for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
- next = LIST_NEXT(frep, fr_next);
-
- off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
- if (off < (*frag)->fr_max &&
- (next == NULL || FR_IP_OFF(next) != off))
- {
- DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
- off, next == NULL ? -1 : FR_IP_OFF(next),
- (*frag)->fr_max));
- return (NULL);
+ for (frent = TAILQ_FIRST(&frag->fr_queue); frent; frent = next) {
+ next = TAILQ_NEXT(frent, fr_next);
+
+ off += frent->fe_len;
+ if (off < total && (next == NULL || next->fe_off != off)) {
+ DPFPRINTF(("missing fragment at %d, next %d, total %d",
+ off, next == NULL ? -1 : next->fe_off, total));
+ return (0);
}
}
- DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
- if (off < (*frag)->fr_max)
- return (NULL);
+ DPFPRINTF(("%d < %d?", off, total));
+ if (off < total)
+ return (0);
+ KASSERT(off == total, ("off == total"));
- /* We have all the data */
- frent = LIST_FIRST(&(*frag)->fr_queue);
- KASSERT((frent != NULL), ("frent == NULL: %s", __FUNCTION__));
- if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
- DPFPRINTF(("drop: too big: %d\n", off));
- pf_free_fragment(*frag);
- *frag = NULL;
- return (NULL);
- }
- next = LIST_NEXT(frent, fr_next);
+ return (1);
+}
+
+struct mbuf *
+pf_join_fragment(struct pf_fragment *frag)
+{
+ struct mbuf *m, *m2;
+ struct pf_frent *frent, *next;
+
+ frent = TAILQ_FIRST(&frag->fr_queue);
+ next = TAILQ_NEXT(frent, fr_next);
- /* Magic from ip_input */
- ip = frent->fr_ip;
- m = frent->fr_m;
+ /* Magic from ip_input. */
+ m = frent->fe_m;
m2 = m->m_next;
m->m_next = NULL;
m_cat(m, m2);
uma_zfree(V_pf_frent_z, frent);
for (frent = next; frent != NULL; frent = next) {
- next = LIST_NEXT(frent, fr_next);
+ next = TAILQ_NEXT(frent, fr_next);
- m2 = frent->fr_m;
+ m2 = frent->fe_m;
+ /* Strip off ip header. */
+ m_adj(m2, frent->fe_hdrlen);
uma_zfree(V_pf_frent_z, frent);
- m->m_pkthdr.csum_flags &= m2->m_pkthdr.csum_flags;
- m->m_pkthdr.csum_data += m2->m_pkthdr.csum_data;
m_cat(m, m2);
}
- while (m->m_pkthdr.csum_data & 0xffff0000)
- m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
- (m->m_pkthdr.csum_data >> 16);
- ip->ip_src = (*frag)->fr_src;
- ip->ip_dst = (*frag)->fr_dst;
+ /* Remove from fragment queue. */
+ pf_remove_fragment(frag);
- /* Remove from fragment queue */
- pf_remove_fragment(*frag);
- *frag = NULL;
+ return (m);
+}
+
+#ifdef INET
+static int
+pf_reassemble(struct mbuf **m0, struct ip *ip, int dir, u_short *reason)
+{
+ struct mbuf *m = *m0;
+ struct pf_frent *frent;
+ struct pf_fragment *frag;
+ struct pf_fragment_cmp key;
+ uint16_t total, hdrlen;
+
+ /* Get an entry for the fragment queue */
+ if ((frent = pf_create_fragment(reason)) == NULL)
+ return (PF_DROP);
+
+ frent->fe_m = m;
+ frent->fe_hdrlen = ip->ip_hl << 2;
+ frent->fe_extoff = 0;
+ frent->fe_len = ntohs(ip->ip_len) - (ip->ip_hl << 2);
+ frent->fe_off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
+ frent->fe_mff = ntohs(ip->ip_off) & IP_MF;
+
+ pf_ip2key(ip, dir, &key);
+
+ if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL)
+ return (PF_DROP);
- hlen = ip->ip_hl << 2;
- ip->ip_len = htons(off + hlen);
- m->m_len += hlen;
- m->m_data -= hlen;
+ /* The mbuf is part of the fragment entry, no direct free or access */
+ m = *m0 = NULL;
+
+ if (!pf_isfull_fragment(frag))
+ return (PF_PASS); /* drop because *m0 is NULL, no error */
+
+ /* We have all the data */
+ frent = TAILQ_FIRST(&frag->fr_queue);
+ KASSERT(frent != NULL, ("frent != NULL"));
+ total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
+ TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
+ hdrlen = frent->fe_hdrlen;
+
+ m = *m0 = pf_join_fragment(frag);
+ frag = NULL;
- /* some debugging cruft by sklower, below, will go away soon */
- /* XXX this should be done elsewhere */
if (m->m_flags & M_PKTHDR) {
int plen = 0;
- for (m2 = m; m2; m2 = m2->m_next)
- plen += m2->m_len;
+ for (m = *m0; m; m = m->m_next)
+ plen += m->m_len;
+ m = *m0;
m->m_pkthdr.len = plen;
}
+ ip = mtod(m, struct ip *);
+ ip->ip_len = htons(hdrlen + total);
+ ip->ip_off &= ~(IP_MF|IP_OFFMASK);
+
+ if (hdrlen + total > IP_MAXPACKET) {
+ DPFPRINTF(("drop: too big: %d", total));
+ ip->ip_len = 0;
+ REASON_SET(reason, PFRES_SHORT);
+ /* PF_DROP requires a valid mbuf *m0 in pf_test() */
+ return (PF_DROP);
+ }
+
DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
- return (m);
+ return (PF_PASS);
+}
- drop_fragment:
- /* Oops - fail safe - drop packet */
- uma_zfree(V_pf_frent_z, frent);
- m_freem(m);
- return (NULL);
+#ifdef INET6
+static int
+pf_reassemble6(struct mbuf **m0, struct ip6_hdr *ip6, struct ip6_frag *fraghdr,
+ uint16_t hdrlen, uint16_t extoff, int dir, u_short *reason)
+{
+ struct mbuf *m = *m0;
+ struct pf_frent *frent;
+ struct pf_fragment *frag;
+ struct pf_fragment_cmp key;
+ struct m_tag *mtag;
+ struct pf_fragment_tag *ftag;
+ int off;
+ uint16_t total, maxlen;
+ uint8_t proto;
+
+ PF_FRAG_LOCK();
+
+ /* Get an entry for the fragment queue. */
+ if ((frent = pf_create_fragment(reason)) == NULL) {
+ PF_FRAG_UNLOCK();
+ return (PF_DROP);
+ }
+
+ frent->fe_m = m;
+ frent->fe_hdrlen = hdrlen;
+ frent->fe_extoff = extoff;
+ frent->fe_len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - hdrlen;
+ frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK);
+ frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG;
+
+ key.frc_src.v6 = ip6->ip6_src;
+ key.frc_dst.v6 = ip6->ip6_dst;
+ key.frc_af = AF_INET6;
+ /* Only the first fragment's protocol is relevant. */
+ key.frc_proto = 0;
+ key.frc_id = fraghdr->ip6f_ident;
+ key.frc_direction = dir;
+
+ if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) {
+ PF_FRAG_UNLOCK();
+ return (PF_DROP);
+ }
+
+ /* The mbuf is part of the fragment entry, no direct free or access. */
+ m = *m0 = NULL;
+
+ if (!pf_isfull_fragment(frag)) {
+ PF_FRAG_UNLOCK();
+ return (PF_PASS); /* Drop because *m0 is NULL, no error. */
+ }
+
+ /* We have all the data. */
+ extoff = frent->fe_extoff;
+ maxlen = frag->fr_maxlen;
+ frent = TAILQ_FIRST(&frag->fr_queue);
+ KASSERT(frent != NULL, ("frent != NULL"));
+ total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
+ TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
+ hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag);
+
+ m = *m0 = pf_join_fragment(frag);
+ frag = NULL;
+
+ PF_FRAG_UNLOCK();
+
+ /* Take protocol from first fragment header. */
+ m = m_getptr(m, hdrlen + offsetof(struct ip6_frag, ip6f_nxt), &off);
+ KASSERT(m, ("%s: short mbuf chain", __func__));
+ proto = *(mtod(m, caddr_t) + off);
+ m = *m0;
+
+ /* Delete frag6 header */
+ if (ip6_deletefraghdr(m, hdrlen, M_NOWAIT) != 0)
+ goto fail;
+
+ if (m->m_flags & M_PKTHDR) {
+ int plen = 0;
+ for (m = *m0; m; m = m->m_next)
+ plen += m->m_len;
+ m = *m0;
+ m->m_pkthdr.len = plen;
+ }
+
+ if ((mtag = m_tag_get(PF_REASSEMBLED, sizeof(struct pf_fragment_tag),
+ M_NOWAIT)) == NULL)
+ goto fail;
+ ftag = (struct pf_fragment_tag *)(mtag + 1);
+ ftag->ft_hdrlen = hdrlen;
+ ftag->ft_extoff = extoff;
+ ftag->ft_maxlen = maxlen;
+ m_tag_prepend(m, mtag);
+
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6->ip6_plen = htons(hdrlen - sizeof(struct ip6_hdr) + total);
+ if (extoff) {
+ /* Write protocol into next field of last extension header. */
+ m = m_getptr(m, extoff + offsetof(struct ip6_ext, ip6e_nxt),
+ &off);
+ KASSERT(m, ("%s: short mbuf chain", __func__));
+ *(mtod(m, char *) + off) = proto;
+ m = *m0;
+ } else
+ ip6->ip6_nxt = proto;
+
+ if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) {
+ DPFPRINTF(("drop: too big: %d", total));
+ ip6->ip6_plen = 0;
+ REASON_SET(reason, PFRES_SHORT);
+ /* PF_DROP requires a valid mbuf *m0 in pf_test6(). */
+ return (PF_DROP);
+ }
+
+ DPFPRINTF(("complete: %p(%d)", m, ntohs(ip6->ip6_plen)));
+ return (PF_PASS);
+
+fail:
+ REASON_SET(reason, PFRES_MEMORY);
+ /* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later. */
+ return (PF_DROP);
}
+#endif /* INET6 */
static struct mbuf *
pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
@@ -591,16 +824,15 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
(*frag)->fr_flags = PFFRAG_NOBUFFER;
(*frag)->fr_max = 0;
- (*frag)->fr_src = h->ip_src;
- (*frag)->fr_dst = h->ip_dst;
- (*frag)->fr_p = h->ip_p;
+ (*frag)->fr_src.v4 = h->ip_src;
+ (*frag)->fr_dst.v4 = h->ip_dst;
(*frag)->fr_id = h->ip_id;
(*frag)->fr_timeout = time_uptime;
- cur->fr_off = off;
- cur->fr_end = max;
- LIST_INIT(&(*frag)->fr_queue);
- LIST_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next);
+ cur->fe_off = off;
+ cur->fe_len = max; /* TODO: fe_len = max - off ? */
+ TAILQ_INIT(&(*frag)->fr_queue);
+ TAILQ_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next);
RB_INSERT(pf_frag_tree, &V_pf_cache_tree, *frag);
TAILQ_INSERT_HEAD(&V_pf_cachequeue, *frag, frag_next);
@@ -615,8 +847,8 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
* - off contains the real shifted offset.
*/
frp = NULL;
- LIST_FOREACH(fra, &(*frag)->fr_queue, fr_next) {
- if (fra->fr_off > off)
+ TAILQ_FOREACH(fra, &(*frag)->fr_queue, fr_next) {
+ if (fra->fe_off > off)
break;
frp = fra;
}
@@ -627,18 +859,18 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
if (frp != NULL) {
int precut;
- precut = frp->fr_end - off;
+ precut = frp->fe_len - off;
if (precut >= ip_len) {
/* Fragment is entirely a duplicate */
DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
- h->ip_id, frp->fr_off, frp->fr_end, off, max));
+ h->ip_id, frp->fe_off, frp->fe_len, off, max));
goto drop_fragment;
}
if (precut == 0) {
/* They are adjacent. Fixup cache entry */
DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
- h->ip_id, frp->fr_off, frp->fr_end, off, max));
- frp->fr_end = max;
+ h->ip_id, frp->fe_off, frp->fe_len, off, max));
+ frp->fe_len = max;
} else if (precut > 0) {
/* The first part of this payload overlaps with a
* fragment that has already been passed.
@@ -648,13 +880,13 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
*/
DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
- h->ip_id, precut, frp->fr_off, frp->fr_end, off,
+ h->ip_id, precut, frp->fe_off, frp->fe_len, off,
max));
off += precut;
max -= precut;
/* Update the previous frag to encompass this one */
- frp->fr_end = max;
+ frp->fe_len = max;
if (!drop) {
/* XXX Optimization opportunity
@@ -702,16 +934,16 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
/* There is a gap between fragments */
DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
- h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
+ h->ip_id, -precut, frp->fe_off, frp->fe_len, off,
max));
cur = uma_zalloc(V_pf_frent_z, M_NOWAIT);
if (cur == NULL)
goto no_mem;
- cur->fr_off = off;
- cur->fr_end = max;
- LIST_INSERT_AFTER(frp, cur, fr_next);
+ cur->fe_off = off;
+ cur->fe_len = max;
+ TAILQ_INSERT_AFTER(&(*frag)->fr_queue, frp, cur, fr_next);
}
}
@@ -719,19 +951,19 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
int aftercut;
int merge = 0;
- aftercut = max - fra->fr_off;
+ aftercut = max - fra->fe_off;
if (aftercut == 0) {
/* Adjacent fragments */
DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
- h->ip_id, off, max, fra->fr_off, fra->fr_end));
- fra->fr_off = off;
+ h->ip_id, off, max, fra->fe_off, fra->fe_len));
+ fra->fe_off = off;
merge = 1;
} else if (aftercut > 0) {
/* Need to chop off the tail of this fragment */
DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
- h->ip_id, aftercut, off, max, fra->fr_off,
- fra->fr_end));
- fra->fr_off = off;
+ h->ip_id, aftercut, off, max, fra->fe_off,
+ fra->fe_len));
+ fra->fe_off = off;
max -= aftercut;
merge = 1;
@@ -756,42 +988,42 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
} else if (frp == NULL) {
/* There is a gap between fragments */
DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
- h->ip_id, -aftercut, off, max, fra->fr_off,
- fra->fr_end));
+ h->ip_id, -aftercut, off, max, fra->fe_off,
+ fra->fe_len));
cur = uma_zalloc(V_pf_frent_z, M_NOWAIT);
if (cur == NULL)
goto no_mem;
- cur->fr_off = off;
- cur->fr_end = max;
- LIST_INSERT_BEFORE(fra, cur, fr_next);
+ cur->fe_off = off;
+ cur->fe_len = max;
+ TAILQ_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next);
}
/* Need to glue together two separate fragment descriptors */
if (merge) {
- if (cur && fra->fr_off <= cur->fr_end) {
+ if (cur && fra->fe_off <= cur->fe_len) {
/* Need to merge in a previous 'cur' */
DPFPRINTF(("fragcache[%d]: adjacent(merge "
"%d-%d) %d-%d (%d-%d)\n",
- h->ip_id, cur->fr_off, cur->fr_end, off,
- max, fra->fr_off, fra->fr_end));
- fra->fr_off = cur->fr_off;
- LIST_REMOVE(cur, fr_next);
+ h->ip_id, cur->fe_off, cur->fe_len, off,
+ max, fra->fe_off, fra->fe_len));
+ fra->fe_off = cur->fe_off;
+ TAILQ_REMOVE(&(*frag)->fr_queue, cur, fr_next);
uma_zfree(V_pf_frent_z, cur);
cur = NULL;
- } else if (frp && fra->fr_off <= frp->fr_end) {
+ } else if (frp && fra->fe_off <= frp->fe_len) {
/* Need to merge in a modified 'frp' */
KASSERT((cur == NULL), ("cur != NULL: %s",
__FUNCTION__));
DPFPRINTF(("fragcache[%d]: adjacent(merge "
"%d-%d) %d-%d (%d-%d)\n",
- h->ip_id, frp->fr_off, frp->fr_end, off,
- max, fra->fr_off, fra->fr_end));
- fra->fr_off = frp->fr_off;
- LIST_REMOVE(frp, fr_next);
+ h->ip_id, frp->fe_off, frp->fe_len, off,
+ max, fra->fe_off, fra->fe_len));
+ fra->fe_off = frp->fe_off;
+ TAILQ_REMOVE(&(*frag)->fr_queue, frp, fr_next);
uma_zfree(V_pf_frent_z, frp);
frp = NULL;
@@ -820,8 +1052,8 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
/* Check if we are completely reassembled */
if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
- LIST_FIRST(&(*frag)->fr_queue)->fr_off == 0 &&
- LIST_FIRST(&(*frag)->fr_queue)->fr_end == (*frag)->fr_max) {
+ TAILQ_FIRST(&(*frag)->fr_queue)->fe_off == 0 &&
+ TAILQ_FIRST(&(*frag)->fr_queue)->fe_len == (*frag)->fr_max) {
/* Remove from fragment queue */
DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
(*frag)->fr_max));
@@ -859,14 +1091,85 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
return (NULL);
}
+#ifdef INET6
+int
+pf_refragment6(struct ifnet *ifp, struct mbuf **m0, struct m_tag *mtag)
+{
+ struct mbuf *m = *m0, *t;
+ struct pf_fragment_tag *ftag = (struct pf_fragment_tag *)(mtag + 1);
+ struct pf_pdesc pd;
+ uint16_t hdrlen, extoff, maxlen;
+ uint8_t proto;
+ int error, action;
+
+ hdrlen = ftag->ft_hdrlen;
+ extoff = ftag->ft_extoff;
+ maxlen = ftag->ft_maxlen;
+ m_tag_delete(m, mtag);
+ mtag = NULL;
+ ftag = NULL;
+
+ if (extoff) {
+ int off;
+
+ /* Use protocol from next field of last extension header */
+ m = m_getptr(m, extoff + offsetof(struct ip6_ext, ip6e_nxt),
+ &off);
+ KASSERT((m != NULL), ("pf_refragment6: short mbuf chain"));
+ proto = *(mtod(m, caddr_t) + off);
+ *(mtod(m, char *) + off) = IPPROTO_FRAGMENT;
+ m = *m0;
+ } else {
+ struct ip6_hdr *hdr;
+
+ hdr = mtod(m, struct ip6_hdr *);
+ proto = hdr->ip6_nxt;
+ hdr->ip6_nxt = IPPROTO_FRAGMENT;
+ }
+
+ /*
+ * Maxlen may be less than 8 if there was only a single
+ * fragment. As it was fragmented before, add a fragment
+ * header also for a single fragment. If total or maxlen
+ * is less than 8, ip6_fragment() will return EMSGSIZE and
+ * we drop the packet.
+ */
+ error = ip6_fragment(ifp, m, hdrlen, proto, maxlen);
+ m = (*m0)->m_nextpkt;
+ (*m0)->m_nextpkt = NULL;
+ if (error == 0) {
+ /* The first mbuf contains the unfragmented packet. */
+ m_freem(*m0);
+ *m0 = NULL;
+ action = PF_PASS;
+ } else {
+ /* Drop expects an mbuf to free. */
+ DPFPRINTF(("refragment error %d", error));
+ action = PF_DROP;
+ }
+ for (t = m; m; m = t) {
+ t = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ memset(&pd, 0, sizeof(pd));
+ pd.pf_mtag = pf_find_mtag(m);
+ if (error == 0)
+ ip6_forward(m, 0);
+ else
+ m_freem(m);
+ }
+
+ return (action);
+}
+#endif /* INET6 */
+
int
pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
struct pf_pdesc *pd)
{
struct mbuf *m = *m0;
struct pf_rule *r;
- struct pf_frent *frent;
struct pf_fragment *frag = NULL;
+ struct pf_fragment_cmp key;
struct ip *h = mtod(m, struct ip *);
int mff = (ntohs(h->ip_off) & IP_MF);
int hlen = h->ip_hl << 2;
@@ -875,6 +1178,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
int ip_len;
int ip_off;
int tag = -1;
+ int verdict;
PF_RULES_RASSERT();
@@ -959,28 +1263,24 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
/* Fully buffer all of the fragments */
PF_FRAG_LOCK();
- frag = pf_find_fragment(h, &V_pf_frag_tree);
+
+ pf_ip2key(h, dir, &key);
+ frag = pf_find_fragment(&key, &V_pf_frag_tree);
/* Check if we saw the last fragment already */
if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
max > frag->fr_max)
goto bad;
- /* Get an entry for the fragment queue */
- frent = uma_zalloc(V_pf_frent_z, M_NOWAIT);
- if (frent == NULL) {
- PF_FRAG_UNLOCK();
- REASON_SET(reason, PFRES_MEMORY);
- return (PF_DROP);
- }
- frent->fr_ip = h;
- frent->fr_m = m;
-
/* Might return a completely reassembled mbuf, or NULL */
DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max));
- *m0 = m = pf_reassemble(m0, &frag, frent, mff);
+ verdict = pf_reassemble(m0, h, dir, reason);
PF_FRAG_UNLOCK();
+ if (verdict != PF_PASS)
+ return (PF_DROP);
+
+ m = *m0;
if (m == NULL)
return (PF_DROP);
@@ -1003,7 +1303,8 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
}
PF_FRAG_LOCK();
- frag = pf_find_fragment(h, &V_pf_cache_tree);
+ pf_ip2key(h, dir, &key);
+ frag = pf_find_fragment(&key, &V_pf_cache_tree);
/* Check if we saw the last fragment already */
if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
@@ -1096,13 +1397,13 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
struct mbuf *m = *m0;
struct pf_rule *r;
struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
+ int extoff;
int off;
struct ip6_ext ext;
struct ip6_opt opt;
struct ip6_opt_jumbo jumbo;
struct ip6_frag frag;
u_int32_t jumbolen = 0, plen;
- u_int16_t fragoff = 0;
int optend;
int ooff;
u_int8_t proto;
@@ -1146,6 +1447,7 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
goto drop;
+ extoff = 0;
off = sizeof(struct ip6_hdr);
proto = h->ip6_nxt;
terminal = 0;
@@ -1160,6 +1462,7 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
NULL, AF_INET6))
goto shortpkt;
+ extoff = off;
if (proto == IPPROTO_AH)
off += (ext.ip6e_len + 2) * 4;
else
@@ -1170,6 +1473,7 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
NULL, AF_INET6))
goto shortpkt;
+ extoff = off;
optend = off + (ext.ip6e_len + 1) * 8;
ooff = off + sizeof(ext);
do {
@@ -1233,18 +1537,27 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
return (PF_PASS);
fragment:
- if (ntohs(h->ip6_plen) == 0 || jumbolen)
- goto drop;
+ /* Jumbo payload packets cannot be fragmented. */
plen = ntohs(h->ip6_plen);
+ if (plen == 0 || jumbolen)
+ goto drop;
+ if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
+ goto shortpkt;
if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
goto shortpkt;
- fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
- if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET)
- goto badfrag;
- /* do something about it */
- /* remember to set pd->flags |= PFDESC_IP_REAS */
+ /* Offset now points to data portion. */
+ off += sizeof(frag);
+
+ /* Returns PF_DROP or *m0 is NULL or completely reassembled mbuf. */
+ if (pf_reassemble6(m0, h, &frag, off, extoff, dir, reason) != PF_PASS)
+ return (PF_DROP);
+ m = *m0;
+ if (m == NULL)
+ return (PF_DROP);
+
+ pd->flags |= PFDESC_IP_REAS;
return (PF_PASS);
shortpkt:
@@ -1260,13 +1573,6 @@ pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd,
1);
return (PF_DROP);
-
- badfrag:
- REASON_SET(reason, PFRES_FRAG);
- if (r != NULL && r->log)
- PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd,
- 1);
- return (PF_DROP);
}
#endif /* INET6 */
diff --git a/sys/ofed/drivers/infiniband/core/Makefile b/sys/ofed/drivers/infiniband/core/Makefile
deleted file mode 100644
index f646040..0000000
--- a/sys/ofed/drivers/infiniband/core/Makefile
+++ /dev/null
@@ -1,32 +0,0 @@
-infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o
-user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o
-
-obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \
- ib_cm.o iw_cm.o $(infiniband-y)
-obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
-obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
- $(user_access-y)
-
-ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
- device.o fmr_pool.o cache.o
-ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
-
-ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
-
-ib_sa-y := sa_query.o multicast.o notice.o local_sa.o
-
-ib_cm-y := cm.o
-
-iw_cm-y := iwcm.o
-
-rdma_cm-y := cma.o
-
-rdma_ucm-y := ucma.o
-
-ib_addr-y := addr.o
-
-ib_umad-y := user_mad.o
-
-ib_ucm-y := ucm.o
-
-ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o
diff --git a/sys/ofed/drivers/infiniband/core/addr.c b/sys/ofed/drivers/infiniband/core/addr.c
index c3d5b4f..e85b554 100644
--- a/sys/ofed/drivers/infiniband/core/addr.c
+++ b/sys/ofed/drivers/infiniband/core/addr.c
@@ -69,6 +69,7 @@ static LIST_HEAD(req_list);
static struct delayed_work work;
static struct workqueue_struct *addr_wq;
+static struct rdma_addr_client self;
void rdma_addr_register_client(struct rdma_addr_client *client)
{
atomic_set(&client->refcount, 1);
@@ -89,19 +90,6 @@ void rdma_addr_unregister_client(struct rdma_addr_client *client)
}
EXPORT_SYMBOL(rdma_addr_unregister_client);
-#ifdef __linux__
-int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
- const unsigned char *dst_dev_addr)
-{
- dev_addr->dev_type = dev->type;
- memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
- memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
- if (dst_dev_addr)
- memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
- dev_addr->bound_dev_if = dev->ifindex;
- return 0;
-}
-#else
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev,
const unsigned char *dst_dev_addr)
{
@@ -119,10 +107,10 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev,
dev_addr->bound_dev_if = dev->if_index;
return 0;
}
-#endif
EXPORT_SYMBOL(rdma_copy_addr);
-int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
+int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
+ u16 *vlan_id)
{
struct net_device *dev;
int ret = -EADDRNOTAVAIL;
@@ -137,33 +125,21 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
}
switch (addr->sa_family) {
-#ifdef INET
case AF_INET:
- dev = ip_dev_find(NULL,
+ dev = ip_dev_find(&init_net,
((struct sockaddr_in *) addr)->sin_addr.s_addr);
if (!dev)
return ret;
ret = rdma_copy_addr(dev_addr, dev, NULL);
+ if (vlan_id)
+ *vlan_id = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
break;
-#endif
#if defined(INET6)
case AF_INET6:
-#ifdef __linux__
- read_lock(&dev_base_lock);
- for_each_netdev(&init_net, dev) {
- if (ipv6_chk_addr(&init_net,
- &((struct sockaddr_in6 *) addr)->sin6_addr,
- dev, 1)) {
- ret = rdma_copy_addr(dev_addr, dev, NULL);
- break;
- }
- }
- read_unlock(&dev_base_lock);
-#else
{
struct sockaddr_in6 *sin6;
struct ifaddr *ifa;
@@ -179,12 +155,12 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
break;
}
ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL);
+ if (vlan_id)
+ *vlan_id = rdma_vlan_dev_vlan_id(ifa->ifa_ifp);
ifa_free(ifa);
break;
}
#endif
- break;
-#endif
}
return ret;
}
@@ -218,127 +194,6 @@ static void queue_req(struct addr_req *req)
mutex_unlock(&lock);
}
-#ifdef __linux__
-static int addr4_resolve(struct sockaddr_in *src_in,
- struct sockaddr_in *dst_in,
- struct rdma_dev_addr *addr)
-{
- __be32 src_ip = src_in->sin_addr.s_addr;
- __be32 dst_ip = dst_in->sin_addr.s_addr;
- struct flowi fl;
- struct rtable *rt;
- struct neighbour *neigh;
- int ret;
-
- memset(&fl, 0, sizeof fl);
- fl.nl_u.ip4_u.daddr = dst_ip;
- fl.nl_u.ip4_u.saddr = src_ip;
- fl.oif = addr->bound_dev_if;
-
- ret = ip_route_output_key(&init_net, &rt, &fl);
- if (ret)
- goto out;
-
- src_in->sin_family = AF_INET;
- src_in->sin_addr.s_addr = rt->rt_src;
-
- if (rt->idev->dev->flags & IFF_LOOPBACK) {
- ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
- if (!ret)
- memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
- goto put;
- }
-
- /* If the device does ARP internally, return 'done' */
- if (rt->idev->dev->flags & IFF_NOARP) {
- rdma_copy_addr(addr, rt->idev->dev, NULL);
- goto put;
- }
-
- neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
- if (!neigh || !(neigh->nud_state & NUD_VALID)) {
- neigh_event_send(rt->u.dst.neighbour, NULL);
- ret = -ENODATA;
- if (neigh)
- goto release;
- goto put;
- }
-
- ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
-release:
- neigh_release(neigh);
-put:
- ip_rt_put(rt);
-out:
- return ret;
-}
-
-#if defined(INET6)
-static int addr6_resolve(struct sockaddr_in6 *src_in,
- struct sockaddr_in6 *dst_in,
- struct rdma_dev_addr *addr)
-{
- struct flowi fl;
- struct neighbour *neigh;
- struct dst_entry *dst;
- int ret;
-
- memset(&fl, 0, sizeof fl);
- ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr);
- ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr);
- fl.oif = addr->bound_dev_if;
-
- dst = ip6_route_output(&init_net, NULL, &fl);
- if ((ret = dst->error))
- goto put;
-
- if (ipv6_addr_any(&fl.fl6_src)) {
- ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
- &fl.fl6_dst, 0, &fl.fl6_src);
- if (ret)
- goto put;
-
- src_in->sin6_family = AF_INET6;
- ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src);
- }
-
- if (dst->dev->flags & IFF_LOOPBACK) {
- ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
- if (!ret)
- memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
- goto put;
- }
-
- /* If the device does ARP internally, return 'done' */
- if (dst->dev->flags & IFF_NOARP) {
- ret = rdma_copy_addr(addr, dst->dev, NULL);
- goto put;
- }
-
- neigh = dst->neighbour;
- if (!neigh || !(neigh->nud_state & NUD_VALID)) {
- neigh_event_send(dst->neighbour, NULL);
- ret = -ENODATA;
- goto put;
- }
-
- ret = rdma_copy_addr(addr, dst->dev, neigh->ha);
-put:
- dst_release(dst);
- return ret;
-}
-#else
-static int addr6_resolve(struct sockaddr_in6 *src_in,
- struct sockaddr_in6 *dst_in,
- struct rdma_dev_addr *addr)
-{
- return -EADDRNOTAVAIL;
-}
-#endif
-
-#else
-#include <netinet/if_ether.h>
-
static int addr_resolve(struct sockaddr *src_in,
struct sockaddr *dst_in,
struct rdma_dev_addr *addr)
@@ -354,7 +209,6 @@ static int addr_resolve(struct sockaddr *src_in,
int bcast;
int is_gw = 0;
int error = 0;
-
/*
* Determine whether the address is unicast, multicast, or broadcast
* and whether the source interface is valid.
@@ -382,8 +236,7 @@ static int addr_resolve(struct sockaddr *src_in,
port = sin->sin_port;
sin->sin_port = 0;
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
- } else
- src_in = NULL;
+ }
break;
#endif
#ifdef INET6
@@ -406,7 +259,7 @@ static int addr_resolve(struct sockaddr *src_in,
* If we have a source address to use look it up first and verify
* that it is a local interface.
*/
- if (src_in) {
+ if (sin->sin_addr.s_addr != INADDR_ANY) {
ifa = ifa_ifwithaddr(src_in);
if (sin)
sin->sin_port = port;
@@ -436,15 +289,20 @@ static int addr_resolve(struct sockaddr *src_in,
* correct interface pointer and unlock the route.
*/
if (multi || bcast) {
- if (ifp == NULL)
+ if (ifp == NULL) {
ifp = rte->rt_ifp;
+ /* rt_ifa holds the route answer source address */
+ ifa = rte->rt_ifa;
+ }
RTFREE_LOCKED(rte);
} else if (ifp && ifp != rte->rt_ifp) {
RTFREE_LOCKED(rte);
return -ENETUNREACH;
} else {
- if (ifp == NULL)
+ if (ifp == NULL) {
ifp = rte->rt_ifp;
+ ifa = rte->rt_ifa;
+ }
RT_UNLOCK(rte);
}
mcast:
@@ -459,6 +317,8 @@ mcast:
error = rdma_copy_addr(addr, ifp,
LLADDR((struct sockaddr_dl *)llsa));
free(llsa, M_IFMADDR);
+ if (error == 0)
+ memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr));
return error;
}
/*
@@ -472,7 +332,7 @@ mcast:
#endif
#ifdef INET6
case AF_INET6:
- error = nd6_storelladdr(ifp, NULL, dst_in, (u_char *)edst,NULL);
+ error = nd6_storelladdr(ifp, NULL, dst_in, (u_char *)edst, NULL);
break;
#endif
default:
@@ -480,15 +340,15 @@ mcast:
error = -EINVAL;
}
RTFREE(rte);
- if (error == 0)
+ if (error == 0) {
+ memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr));
return rdma_copy_addr(addr, ifp, edst);
+ }
if (error == EWOULDBLOCK)
return -ENODATA;
return -error;
}
-#endif
-
static void process_req(struct work_struct *work)
{
struct addr_req *req, *temp_req;
@@ -602,20 +462,94 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
}
EXPORT_SYMBOL(rdma_addr_cancel);
+struct resolve_cb_context {
+ struct rdma_dev_addr *addr;
+ struct completion comp;
+};
+
+static void resolve_cb(int status, struct sockaddr *src_addr,
+ struct rdma_dev_addr *addr, void *context)
+{
+ memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
+ rdma_dev_addr));
+ complete(&((struct resolve_cb_context *)context)->comp);
+}
+
+int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
+ u16 *vlan_id)
+{
+ int ret = 0;
+ struct rdma_dev_addr dev_addr;
+ struct resolve_cb_context ctx;
+ struct net_device *dev;
+
+ union {
+ struct sockaddr _sockaddr;
+ struct sockaddr_in _sockaddr_in;
+ struct sockaddr_in6 _sockaddr_in6;
+ } sgid_addr, dgid_addr;
+
+
+ ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid);
+ if (ret)
+ return ret;
+
+ ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid);
+ if (ret)
+ return ret;
+
+ memset(&dev_addr, 0, sizeof(dev_addr));
+
+ ctx.addr = &dev_addr;
+ init_completion(&ctx.comp);
+ ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
+ &dev_addr, 1000, resolve_cb, &ctx);
+ if (ret)
+ return ret;
+
+ wait_for_completion(&ctx.comp);
+
+ memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
+ dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
+ if (!dev)
+ return -ENODEV;
+ if (vlan_id)
+ *vlan_id = rdma_vlan_dev_vlan_id(dev);
+ dev_put(dev);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
+
+int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
+{
+ int ret = 0;
+ struct rdma_dev_addr dev_addr;
+ union {
+ struct sockaddr _sockaddr;
+ struct sockaddr_in _sockaddr_in;
+ struct sockaddr_in6 _sockaddr_in6;
+ } gid_addr;
+
+ ret = rdma_gid2ip(&gid_addr._sockaddr, sgid);
+
+ if (ret)
+ return ret;
+ memset(&dev_addr, 0, sizeof(dev_addr));
+ ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
+ if (ret)
+ return ret;
+
+ memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
+
static int netevent_callback(struct notifier_block *self, unsigned long event,
void *ctx)
{
if (event == NETEVENT_NEIGH_UPDATE) {
-#ifdef __linux__
- struct neighbour *neigh = ctx;
-
- if (neigh->nud_state & NUD_VALID) {
set_timeout(jiffies);
}
-#else
- set_timeout(jiffies);
-#endif
- }
return 0;
}
@@ -631,11 +565,13 @@ static int __init addr_init(void)
return -ENOMEM;
register_netevent_notifier(&nb);
+ rdma_addr_register_client(&self);
return 0;
}
static void __exit addr_cleanup(void)
{
+ rdma_addr_unregister_client(&self);
unregister_netevent_notifier(&nb);
destroy_workqueue(addr_wq);
}
diff --git a/sys/ofed/drivers/infiniband/core/cache.c b/sys/ofed/drivers/infiniband/core/cache.c
index 660bff5..d11e7c2 100644
--- a/sys/ofed/drivers/infiniband/core/cache.c
+++ b/sys/ofed/drivers/infiniband/core/cache.c
@@ -76,19 +76,21 @@ int ib_get_cached_gid(struct ib_device *device,
{
struct ib_gid_cache *cache;
unsigned long flags;
- int ret = 0;
+ int ret = -EINVAL;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- cache = device->cache.gid_cache[port_num - start_port(device)];
+ if (device->cache.gid_cache) {
+ cache = device->cache.gid_cache[port_num - start_port(device)];
- if (index < 0 || index >= cache->table_len)
- ret = -EINVAL;
- else
- *gid = cache->table[index];
+ if (cache && index >= 0 && index < cache->table_len) {
+ *gid = cache->table[index];
+ ret = 0;
+ }
+ }
read_unlock_irqrestore(&device->cache.lock, flags);
@@ -111,22 +113,24 @@ int ib_find_cached_gid(struct ib_device *device,
*index = -1;
read_lock_irqsave(&device->cache.lock, flags);
-
+ if (!device->cache.gid_cache)
+ goto out;
for (p = 0; p <= end_port(device) - start_port(device); ++p) {
cache = device->cache.gid_cache[p];
+ if (!cache)
+ continue;
for (i = 0; i < cache->table_len; ++i) {
if (!memcmp(gid, &cache->table[i], sizeof *gid)) {
*port_num = p + start_port(device);
if (index)
*index = i;
ret = 0;
- goto found;
+ goto out;
}
}
}
-found:
+out:
read_unlock_irqrestore(&device->cache.lock, flags);
-
return ret;
}
EXPORT_SYMBOL(ib_find_cached_gid);
@@ -138,19 +142,21 @@ int ib_get_cached_pkey(struct ib_device *device,
{
struct ib_pkey_cache *cache;
unsigned long flags;
- int ret = 0;
+ int ret = -EINVAL;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- cache = device->cache.pkey_cache[port_num - start_port(device)];
+ if (device->cache.pkey_cache) {
+ cache = device->cache.pkey_cache[port_num - start_port(device)];
- if (index < 0 || index >= cache->table_len)
- ret = -EINVAL;
- else
- *pkey = cache->table[index];
+ if (cache && index >= 0 && index < cache->table_len) {
+ *pkey = cache->table[index];
+ ret = 0;
+ }
+ }
read_unlock_irqrestore(&device->cache.lock, flags);
@@ -167,41 +173,93 @@ int ib_find_cached_pkey(struct ib_device *device,
unsigned long flags;
int i;
int ret = -ENOENT;
+ int partial_ix = -1;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
+ *index = -1;
+
read_lock_irqsave(&device->cache.lock, flags);
+ if (!device->cache.pkey_cache)
+ goto out;
+
cache = device->cache.pkey_cache[port_num - start_port(device)];
+ if (!cache)
+ goto out;
+
+ for (i = 0; i < cache->table_len; ++i)
+ if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
+ if (cache->table[i] & 0x8000) {
+ *index = i;
+ ret = 0;
+ break;
+ } else
+ partial_ix = i;
+ }
+
+ if (ret && partial_ix >= 0) {
+ *index = partial_ix;
+ ret = 0;
+ }
+out:
+ read_unlock_irqrestore(&device->cache.lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(ib_find_cached_pkey);
+
+int ib_find_exact_cached_pkey(struct ib_device *device,
+ u8 port_num,
+ u16 pkey,
+ u16 *index)
+{
+ struct ib_pkey_cache *cache;
+ unsigned long flags;
+ int i;
+ int ret = -ENOENT;
+
+ if (port_num < start_port(device) || port_num > end_port(device))
+ return -EINVAL;
*index = -1;
+ read_lock_irqsave(&device->cache.lock, flags);
+
+ if (!device->cache.pkey_cache)
+ goto out;
+
+ cache = device->cache.pkey_cache[port_num - start_port(device)];
+ if (!cache)
+ goto out;
+
for (i = 0; i < cache->table_len; ++i)
- if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
+ if (cache->table[i] == pkey) {
*index = i;
ret = 0;
break;
}
-
+out:
read_unlock_irqrestore(&device->cache.lock, flags);
-
return ret;
}
-EXPORT_SYMBOL(ib_find_cached_pkey);
+EXPORT_SYMBOL(ib_find_exact_cached_pkey);
int ib_get_cached_lmc(struct ib_device *device,
u8 port_num,
u8 *lmc)
{
unsigned long flags;
- int ret = 0;
+ int ret = -EINVAL;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- *lmc = device->cache.lmc_cache[port_num - start_port(device)];
+ if (device->cache.lmc_cache) {
+ *lmc = device->cache.lmc_cache[port_num - start_port(device)];
+ ret = 0;
+ }
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
@@ -217,6 +275,10 @@ static void ib_cache_update(struct ib_device *device,
int i;
int ret;
+ if (!(device->cache.pkey_cache && device->cache.gid_cache &&
+ device->cache.lmc_cache))
+ return;
+
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
return;
@@ -309,7 +371,7 @@ static void ib_cache_event(struct ib_event_handler *handler,
INIT_WORK(&work->work, ib_cache_task);
work->device = event->device;
work->port_num = event->element.port_num;
- schedule_work(&work->work);
+ queue_work(ib_wq, &work->work);
}
}
}
@@ -362,14 +424,21 @@ err:
kfree(device->cache.pkey_cache);
kfree(device->cache.gid_cache);
kfree(device->cache.lmc_cache);
+ device->cache.pkey_cache = NULL;
+ device->cache.gid_cache = NULL;
+ device->cache.lmc_cache = NULL;
}
static void ib_cache_cleanup_one(struct ib_device *device)
{
int p;
+ if (!(device->cache.pkey_cache && device->cache.gid_cache &&
+ device->cache.lmc_cache))
+ return;
+
ib_unregister_event_handler(&device->cache.event_handler);
- flush_scheduled_work();
+ flush_workqueue(ib_wq);
for (p = 0; p <= end_port(device) - start_port(device); ++p) {
kfree(device->cache.pkey_cache[p]);
diff --git a/sys/ofed/drivers/infiniband/core/cm.c b/sys/ofed/drivers/infiniband/core/cm.c
index 3d2794d..07f6e08 100644
--- a/sys/ofed/drivers/infiniband/core/cm.c
+++ b/sys/ofed/drivers/infiniband/core/cm.c
@@ -36,16 +36,19 @@
#include <linux/completion.h>
#include <linux/dma-mapping.h>
#include <linux/device.h>
+#include <linux/module.h>
#include <linux/err.h>
#include <linux/idr.h>
#include <linux/interrupt.h>
#include <linux/random.h>
#include <linux/rbtree.h>
#include <linux/spinlock.h>
+#include <linux/slab.h>
#include <linux/sysfs.h>
#include <linux/workqueue.h>
#include <linux/kdev_t.h>
#include <linux/string.h>
+#include <linux/etherdevice.h>
#include <asm/atomic-long.h>
@@ -57,16 +60,10 @@ MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("InfiniBand CM");
MODULE_LICENSE("Dual BSD/GPL");
-#define PFX "ib_cm: "
-
-/*
- * Limit CM message timeouts to something reasonable:
- * 8 seconds per message, with up to 15 retries
- */
-static int max_timeout = 21;
-module_param(max_timeout, int, 0644);
-MODULE_PARM_DESC(max_timeout, "Maximum IB CM per message timeout "
- "(default=21, or ~8 seconds)");
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
static void cm_add_one(struct ib_device *device);
static void cm_remove_one(struct ib_device *device);
@@ -189,6 +186,8 @@ struct cm_av {
struct ib_ah_attr ah_attr;
u16 pkey_index;
u8 timeout;
+ u8 valid;
+ u8 smac[ETH_ALEN];
};
struct cm_work {
@@ -358,6 +357,23 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
grh, &av->ah_attr);
}
+int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac)
+{
+ struct cm_id_private *cm_id_priv;
+
+ cm_id_priv = container_of(id, struct cm_id_private, id);
+
+ if (smac != NULL)
+ memcpy(cm_id_priv->av.smac, smac, sizeof(cm_id_priv->av.smac));
+
+ if (alt_smac != NULL)
+ memcpy(cm_id_priv->alt_av.smac, alt_smac,
+ sizeof(cm_id_priv->alt_av.smac));
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_update_cm_av);
+
static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
{
struct cm_device *cm_dev;
@@ -388,6 +404,9 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
&av->ah_attr);
av->timeout = path->packet_life_time + 1;
+ memcpy(av->smac, path->smac, sizeof(av->smac));
+
+ av->valid = 1;
return 0;
}
@@ -402,7 +421,7 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
next_id, &id);
if (!ret)
- next_id = ((unsigned) id + 1) & MAX_ID_MASK;
+ next_id = ((unsigned) id + 1) & MAX_IDR_MASK;
spin_unlock_irqrestore(&cm.lock, flags);
} while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
@@ -794,11 +813,11 @@ static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
}
}
-static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
+static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id, gfp_t flags)
{
struct cm_timewait_info *timewait_info;
- timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
+ timewait_info = kzalloc(sizeof *timewait_info, flags);
if (!timewait_info)
return ERR_PTR(-ENOMEM);
@@ -902,6 +921,8 @@ retest:
break;
case IB_CM_ESTABLISHED:
spin_unlock_irq(&cm_id_priv->lock);
+ if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
+ break;
ib_send_cm_dreq(cm_id, NULL, 0);
goto retest;
case IB_CM_DREQ_SENT:
@@ -1021,33 +1042,24 @@ static void cm_format_req(struct cm_req_msg *req_msg,
req_msg->service_id = param->service_id;
req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
- cm_req_set_resp_res(req_msg, param->responder_resources);
cm_req_set_init_depth(req_msg, param->initiator_depth);
cm_req_set_remote_resp_timeout(req_msg,
param->remote_cm_response_timeout);
- if (param->remote_cm_response_timeout > (u8) max_timeout) {
- printk(KERN_WARNING PFX "req remote_cm_response_timeout %d > "
- "%d, decreasing\n", param->remote_cm_response_timeout,
- max_timeout);
- cm_req_set_remote_resp_timeout(req_msg, (u8) max_timeout);
- }
cm_req_set_qp_type(req_msg, param->qp_type);
cm_req_set_flow_ctrl(req_msg, param->flow_control);
cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
cm_req_set_local_resp_timeout(req_msg,
param->local_cm_response_timeout);
- if (param->local_cm_response_timeout > (u8) max_timeout) {
- printk(KERN_WARNING PFX "req local_cm_response_timeout %d > "
- "%d, decreasing\n", param->local_cm_response_timeout,
- max_timeout);
- cm_req_set_local_resp_timeout(req_msg, (u8) max_timeout);
- }
- cm_req_set_retry_count(req_msg, param->retry_count);
req_msg->pkey = param->primary_path->pkey;
cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
- cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
+
+ if (param->qp_type != IB_QPT_XRC_INI) {
+ cm_req_set_resp_res(req_msg, param->responder_resources);
+ cm_req_set_retry_count(req_msg, param->retry_count);
+ cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
cm_req_set_srq(req_msg, param->srq);
+ }
if (pri_path->hop_limit <= 1) {
req_msg->primary_local_lid = pri_path->slid;
@@ -1105,7 +1117,8 @@ static int cm_validate_req_param(struct ib_cm_req_param *param)
if (!param->primary_path)
return -EINVAL;
- if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC)
+ if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC &&
+ param->qp_type != IB_QPT_XRC_INI)
return -EINVAL;
if (param->private_data &&
@@ -1137,38 +1150,34 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_IDLE) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
- id.local_id);
+ id.local_id,
+ GFP_ATOMIC);
if (IS_ERR(cm_id_priv->timewait_info)) {
- ret = PTR_ERR(cm_id_priv->timewait_info);
- goto out;
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return (PTR_ERR(cm_id_priv->timewait_info));
}
ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av);
- if (ret)
- goto error1;
- if (param->alternate_path) {
+ if (!ret && param->alternate_path) {
ret = cm_init_av_by_path(param->alternate_path,
&cm_id_priv->alt_av);
- if (ret)
+ }
+ if (ret) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
goto error1;
}
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
cm_id->service_id = param->service_id;
cm_id->service_mask = ~cpu_to_be64(0);
cm_id_priv->timeout_ms = cm_convert_to_ms(
param->primary_path->packet_life_time) * 2 +
cm_convert_to_ms(
param->remote_cm_response_timeout);
- if (cm_id_priv->timeout_ms > cm_convert_to_ms(max_timeout)) {
- printk(KERN_WARNING PFX "req timeout_ms %d > %d, decreasing\n",
- cm_id_priv->timeout_ms, cm_convert_to_ms(max_timeout));
- cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout);
- }
cm_id_priv->max_cm_retries = param->max_cm_retries;
cm_id_priv->initiator_depth = param->initiator_depth;
cm_id_priv->responder_resources = param->responder_resources;
@@ -1201,9 +1210,11 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return 0;
-error2: cm_free_msg(cm_id_priv->msg);
-error1: kfree(cm_id_priv->timewait_info);
-out: return ret;
+error2:
+ cm_free_msg(cm_id_priv->msg);
+error1:
+ kfree(cm_id_priv->timewait_info);
+ return ret;
}
EXPORT_SYMBOL(ib_send_cm_req);
@@ -1556,7 +1567,8 @@ static int cm_req_handler(struct cm_work *work)
work->mad_recv_wc->recv_buf.grh,
&cm_id_priv->av);
cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
- id.local_id);
+ id.local_id,
+ GFP_KERNEL);
if (IS_ERR(cm_id_priv->timewait_info)) {
ret = PTR_ERR(cm_id_priv->timewait_info);
goto destroy;
@@ -1579,6 +1591,10 @@ static int cm_req_handler(struct cm_work *work)
cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
+
+ /* Workarround: path in req_msg doesn't contain MAC, take it from wc */
+ memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, 6);
+ work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id;
ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
if (ret) {
ib_get_cached_gid(work->port->cm_dev->ib_device,
@@ -1600,13 +1616,6 @@ static int cm_req_handler(struct cm_work *work)
cm_id_priv->tid = req_msg->hdr.tid;
cm_id_priv->timeout_ms = cm_convert_to_ms(
cm_req_get_local_resp_timeout(req_msg));
- if (cm_req_get_local_resp_timeout(req_msg) > (u8) max_timeout) {
- printk(KERN_WARNING PFX "rcvd cm_local_resp_timeout %d > %d, "
- "decreasing used timeout_ms\n",
- cm_req_get_local_resp_timeout(req_msg), max_timeout);
- cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout);
- }
-
cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
@@ -1638,18 +1647,24 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
rep_msg->local_comm_id = cm_id_priv->id.local_id;
rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
- cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
rep_msg->resp_resources = param->responder_resources;
- rep_msg->initiator_depth = param->initiator_depth;
cm_rep_set_target_ack_delay(rep_msg,
cm_id_priv->av.port->cm_dev->ack_delay);
cm_rep_set_failover(rep_msg, param->failover_accepted);
- cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
- cm_rep_set_srq(rep_msg, param->srq);
rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
+ if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
+ rep_msg->initiator_depth = param->initiator_depth;
+ cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
+ cm_rep_set_srq(rep_msg, param->srq);
+ cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
+ } else {
+ cm_rep_set_srq(rep_msg, 1);
+ cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num));
+ }
+
if (param->private_data && param->private_data_len)
memcpy(rep_msg->private_data, param->private_data,
param->private_data_len);
@@ -1672,6 +1687,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REQ_RCVD &&
cm_id->state != IB_CM_MRA_REQ_SENT) {
+ pr_debug("cm_id->state: %d\n", cm_id->state);
ret = -EINVAL;
goto out;
}
@@ -1697,7 +1713,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
cm_id_priv->initiator_depth = param->initiator_depth;
cm_id_priv->responder_resources = param->responder_resources;
cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
- cm_id_priv->local_qpn = cm_rep_get_local_qpn(rep_msg);
+ cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
@@ -1738,6 +1754,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REP_RCVD &&
cm_id->state != IB_CM_MRA_REP_SENT) {
+ pr_debug("cm_id->state: %d\n", cm_id->state);
ret = -EINVAL;
goto error;
}
@@ -1768,7 +1785,7 @@ error: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
}
EXPORT_SYMBOL(ib_send_cm_rtu);
-static void cm_format_rep_event(struct cm_work *work)
+static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
{
struct cm_rep_msg *rep_msg;
struct ib_cm_rep_event_param *param;
@@ -1777,7 +1794,7 @@ static void cm_format_rep_event(struct cm_work *work)
param = &work->cm_event.param.rep_rcvd;
param->remote_ca_guid = rep_msg->local_ca_guid;
param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
- param->remote_qpn = be32_to_cpu(cm_rep_get_local_qpn(rep_msg));
+ param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
param->responder_resources = rep_msg->initiator_depth;
param->initiator_depth = rep_msg->resp_resources;
@@ -1842,10 +1859,11 @@ static int cm_rep_handler(struct cm_work *work)
cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
if (!cm_id_priv) {
cm_dup_rep_handler(work);
+ pr_debug("no cm_id_priv\n");
return -EINVAL;
}
- cm_format_rep_event(work);
+ cm_format_rep_event(work, cm_id_priv->qp_type);
spin_lock_irq(&cm_id_priv->lock);
switch (cm_id_priv->id.state) {
@@ -1855,12 +1873,13 @@ static int cm_rep_handler(struct cm_work *work)
default:
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
+ pr_debug("cm_id_priv->id.state: %d\n", cm_id_priv->id.state);
goto error;
}
cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
- cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg);
+ cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
spin_lock(&cm.lock);
/* Check for duplicate REP. */
@@ -1868,6 +1887,7 @@ static int cm_rep_handler(struct cm_work *work)
spin_unlock(&cm.lock);
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
+ pr_debug("Failed to insert remote id\n");
goto error;
}
/* Check for a stale connection. */
@@ -1881,13 +1901,14 @@ static int cm_rep_handler(struct cm_work *work)
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
NULL, 0);
ret = -EINVAL;
+ pr_debug("Stale connection.\n");
goto error;
}
spin_unlock(&cm.lock);
cm_id_priv->id.state = IB_CM_REP_RCVD;
cm_id_priv->id.remote_id = rep_msg->local_comm_id;
- cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg);
+ cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
cm_id_priv->initiator_depth = rep_msg->resp_resources;
cm_id_priv->responder_resources = rep_msg->initiator_depth;
cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
@@ -2021,10 +2042,15 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_ESTABLISHED) {
+ pr_debug("cm_id->state: %d\n", cm_id->state);
ret = -EINVAL;
goto out;
}
+ if (cm_id->lap_state == IB_CM_LAP_SENT ||
+ cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+
ret = cm_alloc_msg(cm_id_priv, &msg);
if (ret) {
cm_enter_timewait(cm_id_priv);
@@ -2086,6 +2112,7 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id,
if (cm_id->state != IB_CM_DREQ_RCVD) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
kfree(data);
+ pr_debug("cm_id->state(%d) != IB_CM_DREQ_RCVD\n", cm_id->state);
return -EINVAL;
}
@@ -2151,6 +2178,7 @@ static int cm_dreq_handler(struct cm_work *work)
atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
counter[CM_DREQ_COUNTER]);
cm_issue_drep(work->port, work->mad_recv_wc);
+ pr_debug("no cm_id_priv\n");
return -EINVAL;
}
@@ -2166,6 +2194,10 @@ static int cm_dreq_handler(struct cm_work *work)
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
break;
case IB_CM_ESTABLISHED:
+ if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
+ cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ break;
case IB_CM_MRA_REP_RCVD:
break;
case IB_CM_TIMEWAIT:
@@ -2187,6 +2219,7 @@ static int cm_dreq_handler(struct cm_work *work)
counter[CM_DREQ_COUNTER]);
goto unlock;
default:
+ pr_debug("cm_id_priv->id.state: %d\n", cm_id_priv->id.state);
goto unlock;
}
cm_id_priv->id.state = IB_CM_DREQ_RCVD;
@@ -2290,6 +2323,7 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
cm_enter_timewait(cm_id_priv);
break;
default:
+ pr_debug("cm_id->state: 0x%x\n", cm_id->state);
ret = -EINVAL;
goto out;
}
@@ -2386,11 +2420,21 @@ static int cm_rej_handler(struct cm_work *work)
/* fall through */
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
+ cm_enter_timewait(cm_id_priv);
+ break;
case IB_CM_ESTABLISHED:
+ if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
+ cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
+ if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent,
+ cm_id_priv->msg);
cm_enter_timewait(cm_id_priv);
break;
+ }
+ /* fall through */
default:
spin_unlock_irq(&cm_id_priv->lock);
+ pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state);
ret = -EINVAL;
goto out;
}
@@ -2453,6 +2497,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
break;
}
default:
+ pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state);
ret = -EINVAL;
goto error1;
}
@@ -2518,12 +2563,6 @@ static int cm_mra_handler(struct cm_work *work)
cm_mra_get_service_timeout(mra_msg);
timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
cm_convert_to_ms(cm_id_priv->av.timeout);
- if (timeout > cm_convert_to_ms(max_timeout)) {
- printk(KERN_WARNING PFX "calculated mra timeout %d > %d, "
- "decreasing used timeout_ms\n", timeout,
- cm_convert_to_ms(max_timeout));
- timeout = cm_convert_to_ms(max_timeout);
- }
spin_lock_irq(&cm_id_priv->lock);
switch (cm_id_priv->id.state) {
@@ -2560,6 +2599,7 @@ static int cm_mra_handler(struct cm_work *work)
counter[CM_MRA_COUNTER]);
/* fall through */
default:
+ pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state);
goto out;
}
@@ -2746,7 +2786,8 @@ static int cm_lap_handler(struct cm_work *work)
cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
work->mad_recv_wc->recv_buf.grh,
&cm_id_priv->av);
- cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av);
+ if (cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av))
+ goto unlock;
ret = atomic_inc_and_test(&cm_id_priv->work_count);
if (!ret)
list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -2938,6 +2979,9 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
return -EINVAL;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+
ret = cm_init_av_by_path(param->path, &cm_id_priv->av);
if (ret)
goto out;
@@ -2945,12 +2989,6 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
cm_id->service_id = param->service_id;
cm_id->service_mask = ~cpu_to_be64(0);
cm_id_priv->timeout_ms = param->timeout_ms;
- if (cm_id_priv->timeout_ms > cm_convert_to_ms(max_timeout)) {
- printk(KERN_WARNING PFX "sidr req timeout_ms %d > %d, "
- "decreasing used timeout_ms\n", param->timeout_ms,
- cm_convert_to_ms(max_timeout));
- cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout);
- }
cm_id_priv->max_cm_retries = param->max_cm_retries;
ret = cm_alloc_msg(cm_id_priv, &msg);
if (ret)
@@ -2961,21 +2999,19 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
- spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state == IB_CM_IDLE)
ret = ib_post_send_mad(msg, NULL);
else
ret = -EINVAL;
if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
goto out;
}
cm_id->state = IB_CM_SIDR_REQ_SENT;
cm_id_priv->msg = msg;
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
out:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_sidr_req);
@@ -3038,6 +3074,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
goto out; /* No match. */
}
atomic_inc(&cur_cm_id_priv->refcount);
+ atomic_inc(&cm_id_priv->refcount);
spin_unlock_irq(&cm.lock);
cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
@@ -3302,6 +3339,7 @@ static void cm_work_handler(struct work_struct *_work)
ret = cm_timewait_handler(work);
break;
default:
+ pr_debug("work->cm_event.event: 0x%x\n", work->cm_event.event);
ret = -EINVAL;
break;
}
@@ -3332,6 +3370,7 @@ static int cm_establish(struct ib_cm_id *cm_id)
ret = -EISCONN;
break;
default:
+ pr_debug("cm_id->state: 0x%x\n", cm_id->state);
ret = -EINVAL;
break;
}
@@ -3494,6 +3533,7 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
+ pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state);
ret = -EINVAL;
break;
}
@@ -3520,10 +3560,36 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
IB_QP_DEST_QPN | IB_QP_RQ_PSN;
qp_attr->ah_attr = cm_id_priv->av.ah_attr;
+ if (!cm_id_priv->av.valid)
+ return -EINVAL;
+ if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) {
+ qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id;
+ *qp_attr_mask |= IB_QP_VID;
+ }
+ if (!is_zero_ether_addr(cm_id_priv->av.smac)) {
+ memcpy(qp_attr->smac, cm_id_priv->av.smac,
+ sizeof(qp_attr->smac));
+ *qp_attr_mask |= IB_QP_SMAC;
+ }
+ if (cm_id_priv->alt_av.valid) {
+ if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) {
+ qp_attr->alt_vlan_id =
+ cm_id_priv->alt_av.ah_attr.vlan_id;
+ *qp_attr_mask |= IB_QP_ALT_VID;
+ }
+ if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) {
+ memcpy(qp_attr->alt_smac,
+ cm_id_priv->alt_av.smac,
+ sizeof(qp_attr->alt_smac));
+ *qp_attr_mask |= IB_QP_ALT_SMAC;
+ }
+ }
+
qp_attr->path_mtu = cm_id_priv->path_mtu;
qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
- if (cm_id_priv->qp_type == IB_QPT_RC) {
+ if (cm_id_priv->qp_type == IB_QPT_RC ||
+ cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
*qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_MIN_RNR_TIMER;
qp_attr->max_dest_rd_atomic =
@@ -3540,6 +3606,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
+ pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state);
ret = -EINVAL;
break;
}
@@ -3568,15 +3635,21 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
*qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
- if (cm_id_priv->qp_type == IB_QPT_RC) {
- *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
- IB_QP_RNR_RETRY |
+ switch (cm_id_priv->qp_type) {
+ case IB_QPT_RC:
+ case IB_QPT_XRC_INI:
+ *qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
IB_QP_MAX_QP_RD_ATOMIC;
- qp_attr->timeout = cm_id_priv->av.timeout;
qp_attr->retry_cnt = cm_id_priv->retry_count;
qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
- qp_attr->max_rd_atomic =
- cm_id_priv->initiator_depth;
+ qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
+ /* fall through */
+ case IB_QPT_XRC_TGT:
+ *qp_attr_mask |= IB_QP_TIMEOUT;
+ qp_attr->timeout = cm_id_priv->av.timeout;
+ break;
+ default:
+ break;
}
if (cm_id_priv->alt_av.ah_attr.dlid) {
*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
@@ -3593,6 +3666,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
+ pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state);
ret = -EINVAL;
break;
}
@@ -3619,6 +3693,7 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
break;
default:
+ pr_debug("qp_attr->qp_state: 0x%x\n", qp_attr->qp_state);
ret = -EINVAL;
break;
}
@@ -3649,7 +3724,7 @@ static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
atomic_long_read(&group->counter[cm_attr->index]));
}
-static struct sysfs_ops cm_counter_ops = {
+static const struct sysfs_ops cm_counter_ops = {
.show = cm_show_counter
};
@@ -3670,8 +3745,17 @@ static struct kobj_type cm_port_obj_type = {
.release = cm_release_port_obj
};
+static char *cm_devnode(struct device *dev, umode_t *mode)
+{
+ if (mode)
+ *mode = 0666;
+ return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
+}
+
struct class cm_class = {
+ .owner = THIS_MODULE,
.name = "infiniband_cm",
+ .devnode = cm_devnode,
};
EXPORT_SYMBOL(cm_class);
@@ -3745,7 +3829,7 @@ static void cm_add_one(struct ib_device *ib_device)
cm_dev->device = device_create(&cm_class, &ib_device->dev,
MKDEV(0, 0), NULL,
"%s", ib_device->name);
- if (!cm_dev->device) {
+ if (IS_ERR(cm_dev->device)) {
kfree(cm_dev);
return;
}
@@ -3846,28 +3930,33 @@ static int __init ib_cm_init(void)
cm.remote_sidr_table = RB_ROOT;
idr_init(&cm.local_id_table);
get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
- idr_pre_get(&cm.local_id_table, GFP_KERNEL);
+ if (!idr_pre_get(&cm.local_id_table, GFP_KERNEL))
+ return -ENOMEM;
INIT_LIST_HEAD(&cm.timewait_list);
ret = class_register(&cm_class);
- if (ret)
- return -ENOMEM;
+ if (ret) {
+ ret = -ENOMEM;
+ goto error1;
+ }
cm.wq = create_workqueue("ib_cm");
if (!cm.wq) {
ret = -ENOMEM;
- goto error1;
+ goto error2;
}
ret = ib_register_client(&cm_client);
if (ret)
- goto error2;
+ goto error3;
return 0;
-error2:
+error3:
destroy_workqueue(cm.wq);
-error1:
+error2:
class_unregister(&cm_class);
+error1:
+ idr_destroy(&cm.local_id_table);
return ret;
}
diff --git a/sys/ofed/drivers/infiniband/core/cm_msgs.h b/sys/ofed/drivers/infiniband/core/cm_msgs.h
index 7e63c08..be068f4 100644
--- a/sys/ofed/drivers/infiniband/core/cm_msgs.h
+++ b/sys/ofed/drivers/infiniband/core/cm_msgs.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004, 2011 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
*
@@ -44,18 +44,6 @@
#define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */
-#define CM_REQ_ATTR_ID cpu_to_be16(0x0010)
-#define CM_MRA_ATTR_ID cpu_to_be16(0x0011)
-#define CM_REJ_ATTR_ID cpu_to_be16(0x0012)
-#define CM_REP_ATTR_ID cpu_to_be16(0x0013)
-#define CM_RTU_ATTR_ID cpu_to_be16(0x0014)
-#define CM_DREQ_ATTR_ID cpu_to_be16(0x0015)
-#define CM_DREP_ATTR_ID cpu_to_be16(0x0016)
-#define CM_SIDR_REQ_ATTR_ID cpu_to_be16(0x0017)
-#define CM_SIDR_REP_ATTR_ID cpu_to_be16(0x0018)
-#define CM_LAP_ATTR_ID cpu_to_be16(0x0019)
-#define CM_APR_ATTR_ID cpu_to_be16(0x001A)
-
enum cm_msg_sequence {
CM_MSG_SEQUENCE_REQ,
CM_MSG_SEQUENCE_LAP,
@@ -86,7 +74,7 @@ struct cm_req_msg {
__be16 pkey;
/* path MTU:4, RDC exists:1, RNR retry count:3. */
u8 offset50;
- /* max CM Retries:4, SRQ:1, rsvd:3 */
+ /* max CM Retries:4, SRQ:1, extended transport type:3 */
u8 offset51;
__be16 primary_local_lid;
@@ -175,6 +163,11 @@ static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg)
switch(transport_type) {
case 0: return IB_QPT_RC;
case 1: return IB_QPT_UC;
+ case 3:
+ switch (req_msg->offset51 & 0x7) {
+ case 1: return IB_QPT_XRC_TGT;
+ default: return 0;
+ }
default: return 0;
}
}
@@ -188,6 +181,12 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg,
req_msg->offset40) &
0xFFFFFFF9) | 0x2);
break;
+ case IB_QPT_XRC_INI:
+ req_msg->offset40 = cpu_to_be32((be32_to_cpu(
+ req_msg->offset40) &
+ 0xFFFFFFF9) | 0x6);
+ req_msg->offset51 = (req_msg->offset51 & 0xF8) | 1;
+ break;
default:
req_msg->offset40 = cpu_to_be32(be32_to_cpu(
req_msg->offset40) &
@@ -527,6 +526,23 @@ static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn)
(be32_to_cpu(rep_msg->offset12) & 0x000000FF));
}
+static inline __be32 cm_rep_get_local_eecn(struct cm_rep_msg *rep_msg)
+{
+ return cpu_to_be32(be32_to_cpu(rep_msg->offset16) >> 8);
+}
+
+static inline void cm_rep_set_local_eecn(struct cm_rep_msg *rep_msg, __be32 eecn)
+{
+ rep_msg->offset16 = cpu_to_be32((be32_to_cpu(eecn) << 8) |
+ (be32_to_cpu(rep_msg->offset16) & 0x000000FF));
+}
+
+static inline __be32 cm_rep_get_qpn(struct cm_rep_msg *rep_msg, enum ib_qp_type qp_type)
+{
+ return (qp_type == IB_QPT_XRC_INI) ?
+ cm_rep_get_local_eecn(rep_msg) : cm_rep_get_local_qpn(rep_msg);
+}
+
static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg)
{
return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8);
@@ -771,6 +787,7 @@ struct cm_apr_msg {
u8 info_length;
u8 ap_status;
+ __be16 rsvd;
u8 info[IB_CM_APR_INFO_LENGTH];
u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE];
diff --git a/sys/ofed/drivers/infiniband/core/cma.c b/sys/ofed/drivers/infiniband/core/cma.c
index 318beb1..d2064b6 100644
--- a/sys/ofed/drivers/infiniband/core/cma.c
+++ b/sys/ofed/drivers/infiniband/core/cma.c
@@ -40,6 +40,10 @@
#include <linux/random.h>
#include <linux/idr.h>
#include <linux/inetdevice.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <net/route.h>
#include <net/tcp.h>
#include <net/ipv6.h>
@@ -55,28 +59,47 @@ MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("Generic RDMA CM Agent");
MODULE_LICENSE("Dual BSD/GPL");
-static int tavor_quirk = 0;
-module_param_named(tavor_quirk, tavor_quirk, int, 0644);
-MODULE_PARM_DESC(tavor_quirk, "Tavor performance quirk: limit MTU to 1K if > 0");
-
-int unify_tcp_port_space = 1;
-module_param(unify_tcp_port_space, int, 0644);
-MODULE_PARM_DESC(unify_tcp_port_space, "Unify the host TCP and RDMA port "
- "space allocation (default=1)");
-
#define CMA_CM_RESPONSE_TIMEOUT 20
#define CMA_MAX_CM_RETRIES 15
#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
-#define IBOE_PACKET_LIFETIME 18
+#define CMA_IBOE_PACKET_LIFETIME 18
static int cma_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
module_param_named(cma_response_timeout, cma_response_timeout, int, 0644);
-MODULE_PARM_DESC(cma_response_timeout, "CMA_CM_RESPONSE_TIMEOUT default=20");
+MODULE_PARM_DESC(cma_response_timeout, "CMA_CM_RESPONSE_TIMEOUT (default=20)");
static int def_prec2sl = 3;
module_param_named(def_prec2sl, def_prec2sl, int, 0644);
MODULE_PARM_DESC(def_prec2sl, "Default value for SL priority with RoCE. Valid values 0 - 7");
+static int debug_level = 0;
+#define cma_pr(level, priv, format, arg...) \
+ printk(level "CMA: %p: %s: " format, ((struct rdma_id_priv *) priv) , __func__, ## arg)
+
+#define cma_dbg(priv, format, arg...) \
+ do { if (debug_level) cma_pr(KERN_DEBUG, priv, format, ## arg); } while (0)
+
+#define cma_warn(priv, format, arg...) \
+ cma_pr(KERN_WARNING, priv, format, ## arg)
+
+#define CMA_GID_FMT "%2.2x%2.2x:%2.2x%2.2x"
+#define CMA_GID_RAW_ARG(gid) ((u8 *)(gid))[12],\
+ ((u8 *)(gid))[13],\
+ ((u8 *)(gid))[14],\
+ ((u8 *)(gid))[15]
+
+#define CMA_GID_ARG(gid) CMA_GID_RAW_ARG((gid).raw)
+#define cma_debug_path(priv, pfx, p) \
+ cma_dbg(priv, pfx "sgid=" CMA_GID_FMT ",dgid=" \
+ CMA_GID_FMT "\n", CMA_GID_ARG(p.sgid), \
+ CMA_GID_ARG(p.dgid))
+
+#define cma_debug_gid(priv, g) \
+ cma_dbg(priv, "gid=" CMA_GID_FMT "\n", CMA_GID_ARG(g)
+
+module_param_named(debug_level, debug_level, int, 0644);
+MODULE_PARM_DESC(debug_level, "debug level default=0");
+
static void cma_add_one(struct ib_device *device);
static void cma_remove_one(struct ib_device *device);
@@ -92,13 +115,12 @@ static LIST_HEAD(dev_list);
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
static struct workqueue_struct *cma_wq;
+static struct workqueue_struct *cma_free_wq;
static DEFINE_IDR(sdp_ps);
static DEFINE_IDR(tcp_ps);
static DEFINE_IDR(udp_ps);
static DEFINE_IDR(ipoib_ps);
-#if defined(INET)
-static int next_port;
-#endif
+static DEFINE_IDR(ib_ps);
struct cma_device {
struct list_head list;
@@ -108,26 +130,16 @@ struct cma_device {
struct list_head id_list;
};
-enum cma_state {
- CMA_IDLE,
- CMA_ADDR_QUERY,
- CMA_ADDR_RESOLVED,
- CMA_ROUTE_QUERY,
- CMA_ROUTE_RESOLVED,
- CMA_CONNECT,
- CMA_DISCONNECT,
- CMA_ADDR_BOUND,
- CMA_LISTEN,
- CMA_DEVICE_REMOVAL,
- CMA_DESTROYING
-};
-
struct rdma_bind_list {
struct idr *ps;
struct hlist_head owners;
unsigned short port;
};
+enum {
+ CMA_OPTION_AFONLY,
+};
+
/*
* Device removal can occur at anytime, so we need extra handling to
* serialize notifying the user of device removal with other callbacks.
@@ -138,7 +150,7 @@ struct rdma_id_private {
struct rdma_cm_id id;
struct rdma_bind_list *bind_list;
- struct socket *sock;
+ struct socket *sock;
struct hlist_node node;
struct list_head list; /* listen_any_list or cma_device.list */
struct list_head listen_list; /* per device listens */
@@ -146,13 +158,15 @@ struct rdma_id_private {
struct list_head mc_list;
int internal_id;
- enum cma_state state;
+ enum rdma_cm_state state;
spinlock_t lock;
+ spinlock_t cm_lock;
struct mutex qp_mutex;
struct completion comp;
atomic_t refcount;
struct mutex handler_mutex;
+ struct work_struct work; /* garbage coll */
int backlog;
int timeout_ms;
@@ -166,8 +180,16 @@ struct rdma_id_private {
u32 seq_num;
u32 qkey;
u32 qp_num;
+ pid_t owner;
+ u32 options;
u8 srq;
u8 tos;
+ u8 reuseaddr;
+ u8 afonly;
+ int qp_timeout;
+ /* cache for mc record params */
+ struct ib_sa_mcmember_rec rec;
+ int is_valid_rec;
};
struct cma_multicast {
@@ -184,8 +206,8 @@ struct cma_multicast {
struct cma_work {
struct work_struct work;
struct rdma_id_private *id;
- enum cma_state old_state;
- enum cma_state new_state;
+ enum rdma_cm_state old_state;
+ enum rdma_cm_state new_state;
struct rdma_cm_event event;
};
@@ -236,7 +258,7 @@ struct sdp_hah {
#define CMA_VERSION 0x00
#define SDP_MAJ_VERSION 0x2
-static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
+static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
{
unsigned long flags;
int ret;
@@ -248,7 +270,7 @@ static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
}
static int cma_comp_exch(struct rdma_id_private *id_priv,
- enum cma_state comp, enum cma_state exch)
+ enum rdma_cm_state comp, enum rdma_cm_state exch)
{
unsigned long flags;
int ret;
@@ -260,11 +282,11 @@ static int cma_comp_exch(struct rdma_id_private *id_priv,
return ret;
}
-static enum cma_state cma_exch(struct rdma_id_private *id_priv,
- enum cma_state exch)
+static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv,
+ enum rdma_cm_state exch)
{
unsigned long flags;
- enum cma_state old;
+ enum rdma_cm_state old;
spin_lock_irqsave(&id_priv->lock, flags);
old = id_priv->state;
@@ -298,11 +320,6 @@ static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
}
-static inline int cma_is_ud_ps(enum rdma_port_space ps)
-{
- return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
-}
-
static void cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
@@ -328,11 +345,13 @@ static inline void release_mc(struct kref *kref)
kfree(mc);
}
-static void cma_detach_from_dev(struct rdma_id_private *id_priv)
+static void cma_release_dev(struct rdma_id_private *id_priv)
{
+ mutex_lock(&lock);
list_del(&id_priv->list);
cma_deref_dev(id_priv->cma_dev);
id_priv->cma_dev = NULL;
+ mutex_unlock(&lock);
}
static int cma_set_qkey(struct rdma_id_private *id_priv)
@@ -361,36 +380,71 @@ static int cma_set_qkey(struct rdma_id_private *id_priv)
return ret;
}
+static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num)
+{
+ int i;
+ int err;
+ struct ib_port_attr props;
+ union ib_gid tmp;
+
+ err = ib_query_port(device, port_num, &props);
+ if (err)
+ return 1;
+
+ for (i = 0; i < props.gid_tbl_len; ++i) {
+ err = ib_query_gid(device, port_num, i, &tmp);
+ if (err)
+ return 1;
+ if (!memcmp(&tmp, gid, sizeof tmp))
+ return 0;
+ }
+
+ return -EAGAIN;
+}
+
static int cma_acquire_dev(struct rdma_id_private *id_priv)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
struct cma_device *cma_dev;
- union ib_gid gid;
+ union ib_gid gid, iboe_gid;
int ret = -ENODEV;
+ u8 port;
+ enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
+ IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
- if (dev_addr->dev_type != ARPHRD_INFINIBAND) {
- iboe_addr_get_sgid(dev_addr, &gid);
- list_for_each_entry(cma_dev, &dev_list, list) {
- ret = ib_find_cached_gid(cma_dev->device, &gid,
- &id_priv->id.port_num, NULL);
- if (!ret)
- goto out;
- }
- }
+ if (dev_ll != IB_LINK_LAYER_INFINIBAND &&
+ id_priv->id.ps == RDMA_PS_IPOIB)
+ return -EINVAL;
+
+ mutex_lock(&lock);
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
+ &iboe_gid);
memcpy(&gid, dev_addr->src_dev_addr +
rdma_addr_gid_offset(dev_addr), sizeof gid);
list_for_each_entry(cma_dev, &dev_list, list) {
- ret = ib_find_cached_gid(cma_dev->device, &gid,
- &id_priv->id.port_num, NULL);
- if (!ret)
+ for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
+ if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) {
+ if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
+ rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
+ ret = find_gid_port(cma_dev->device, &iboe_gid, port);
+ else
+ ret = find_gid_port(cma_dev->device, &gid, port);
+
+ if (!ret) {
+ id_priv->id.port_num = port;
+ goto out;
+ } else if (ret == 1)
break;
}
+ }
+ }
out:
if (!ret)
cma_attach_to_dev(id_priv, cma_dev);
+ mutex_unlock(&lock);
return ret;
}
@@ -401,7 +455,7 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
}
static int cma_disable_callback(struct rdma_id_private *id_priv,
- enum cma_state state)
+ enum rdma_cm_state state)
{
mutex_lock(&id_priv->handler_mutex);
if (id_priv->state != state) {
@@ -411,13 +465,9 @@ static int cma_disable_callback(struct rdma_id_private *id_priv,
return 0;
}
-static int cma_has_cm_dev(struct rdma_id_private *id_priv)
-{
- return (id_priv->id.device && id_priv->cm_id.ib);
-}
-
struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
- void *context, enum rdma_port_space ps)
+ void *context, enum rdma_port_space ps,
+ enum ib_qp_type qp_type)
{
struct rdma_id_private *id_priv;
@@ -425,11 +475,14 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
if (!id_priv)
return ERR_PTR(-ENOMEM);
- id_priv->state = CMA_IDLE;
+ id_priv->owner = curthread->td_proc->p_pid;
+ id_priv->state = RDMA_CM_IDLE;
id_priv->id.context = context;
id_priv->id.event_handler = event_handler;
id_priv->id.ps = ps;
+ id_priv->id.qp_type = qp_type;
spin_lock_init(&id_priv->lock);
+ spin_lock_init(&id_priv->cm_lock);
mutex_init(&id_priv->qp_mutex);
init_completion(&id_priv->comp);
atomic_set(&id_priv->refcount, 1);
@@ -496,7 +549,7 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
if (IS_ERR(qp))
return PTR_ERR(qp);
- if (cma_is_ud_ps(id_priv->id.ps))
+ if (id->qp_type == IB_QPT_UD)
ret = cma_init_ud_qp(id_priv, qp);
else
ret = cma_init_conn_qp(id_priv, qp);
@@ -530,6 +583,7 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
{
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
+ union ib_gid sgid;
mutex_lock(&id_priv->qp_mutex);
if (!id_priv->id.qp) {
@@ -551,6 +605,20 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
if (ret)
goto out;
+ ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
+ qp_attr.ah_attr.grh.sgid_index, &sgid);
+ if (ret)
+ goto out;
+
+ if (rdma_node_get_transport(id_priv->cma_dev->device->node_type)
+ == RDMA_TRANSPORT_IB &&
+ rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)
+ == IB_LINK_LAYER_ETHERNET) {
+ ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL);
+
+ if (ret)
+ goto out;
+ }
if (conn_param)
qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
@@ -579,6 +647,12 @@ static int cma_modify_qp_rts(struct rdma_id_private *id_priv,
if (conn_param)
qp_attr.max_rd_atomic = conn_param->initiator_depth;
+
+ if (id_priv->qp_timeout && id_priv->id.qp->qp_type == IB_QPT_RC) {
+ qp_attr.timeout = id_priv->qp_timeout;
+ qp_attr_mask |= IB_QP_TIMEOUT;
+ }
+
ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
out:
mutex_unlock(&id_priv->qp_mutex);
@@ -624,7 +698,7 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
qp_attr->port_num = id_priv->id.port_num;
*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
- if (cma_is_ud_ps(id_priv->id.ps)) {
+ if (id_priv->id.qp_type == IB_QPT_UD) {
ret = cma_set_qkey(id_priv);
if (ret)
return ret;
@@ -647,7 +721,7 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
id_priv = container_of(id, struct rdma_id_private, id);
switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps))
+ if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD))
ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
else
ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
@@ -656,6 +730,7 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
qp_attr->rq_psn = id_priv->seq_num;
break;
case RDMA_TRANSPORT_IWARP:
+ case RDMA_TRANSPORT_SCIF:
if (!id_priv->cm_id.iw) {
qp_attr->qp_access_flags = 0;
*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
@@ -701,6 +776,21 @@ static inline int cma_any_addr(struct sockaddr *addr)
return cma_zero_addr(addr) || cma_loopback_addr(addr);
}
+static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
+{
+ if (src->sa_family != dst->sa_family)
+ return -1;
+
+ switch (src->sa_family) {
+ case AF_INET:
+ return ((struct sockaddr_in *) src)->sin_addr.s_addr !=
+ ((struct sockaddr_in *) dst)->sin_addr.s_addr;
+ default:
+ return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr,
+ &((struct sockaddr_in6 *) dst)->sin6_addr);
+ }
+}
+
static inline __be16 cma_port(struct sockaddr *addr)
{
if (addr->sa_family == AF_INET)
@@ -831,16 +921,16 @@ static void cma_cancel_listens(struct rdma_id_private *id_priv)
}
static void cma_cancel_operation(struct rdma_id_private *id_priv,
- enum cma_state state)
+ enum rdma_cm_state state)
{
switch (state) {
- case CMA_ADDR_QUERY:
+ case RDMA_CM_ADDR_QUERY:
rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
break;
- case CMA_ROUTE_QUERY:
+ case RDMA_CM_ROUTE_QUERY:
cma_cancel_route(id_priv);
break;
- case CMA_LISTEN:
+ case RDMA_CM_LISTEN:
if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)
&& !id_priv->cma_dev)
cma_cancel_listens(id_priv);
@@ -852,20 +942,21 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
static void cma_release_port(struct rdma_id_private *id_priv)
{
- struct rdma_bind_list *bind_list = id_priv->bind_list;
-
- if (!bind_list)
- return;
+ struct rdma_bind_list *bind_list;
mutex_lock(&lock);
+ bind_list = id_priv->bind_list;
+ if (!bind_list) {
+ mutex_unlock(&lock);
+ return;
+ }
hlist_del(&id_priv->node);
+ id_priv->bind_list = NULL;
if (hlist_empty(&bind_list->owners)) {
idr_remove(bind_list->ps, bind_list->port);
kfree(bind_list);
}
mutex_unlock(&lock);
- if (id_priv->sock)
- sock_release(id_priv->sock);
}
static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
@@ -889,46 +980,66 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
}
}
}
+static void __rdma_free(struct work_struct *work)
+{
+ struct rdma_id_private *id_priv;
+ id_priv = container_of(work, struct rdma_id_private, work);
+
+ wait_for_completion(&id_priv->comp);
+
+ if (id_priv->internal_id)
+ cma_deref_id(id_priv->id.context);
+
+ kfree(id_priv->id.route.path_rec);
+ kfree(id_priv);
+}
void rdma_destroy_id(struct rdma_cm_id *id)
{
struct rdma_id_private *id_priv;
- enum cma_state state;
+ enum rdma_cm_state state;
+ unsigned long flags;
+ struct ib_cm_id *ib;
id_priv = container_of(id, struct rdma_id_private, id);
- state = cma_exch(id_priv, CMA_DESTROYING);
+ state = cma_exch(id_priv, RDMA_CM_DESTROYING);
cma_cancel_operation(id_priv, state);
- mutex_lock(&lock);
+ /*
+ * Wait for any active callback to finish. New callbacks will find
+ * the id_priv state set to destroying and abort.
+ */
+ mutex_lock(&id_priv->handler_mutex);
+ mutex_unlock(&id_priv->handler_mutex);
+
if (id_priv->cma_dev) {
- mutex_unlock(&lock);
switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
- ib_destroy_cm_id(id_priv->cm_id.ib);
+ spin_lock_irqsave(&id_priv->cm_lock, flags);
+ if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) {
+ ib = id_priv->cm_id.ib;
+ id_priv->cm_id.ib = NULL;
+ spin_unlock_irqrestore(&id_priv->cm_lock, flags);
+ ib_destroy_cm_id(ib);
+ } else
+ spin_unlock_irqrestore(&id_priv->cm_lock, flags);
break;
case RDMA_TRANSPORT_IWARP:
- if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
+ case RDMA_TRANSPORT_SCIF:
+ if (id_priv->cm_id.iw)
iw_destroy_cm_id(id_priv->cm_id.iw);
break;
default:
break;
}
cma_leave_mc_groups(id_priv);
- mutex_lock(&lock);
- cma_detach_from_dev(id_priv);
+ cma_release_dev(id_priv);
}
- mutex_unlock(&lock);
cma_release_port(id_priv);
cma_deref_id(id_priv);
- wait_for_completion(&id_priv->comp);
-
- if (id_priv->internal_id)
- cma_deref_id(id_priv->id.context);
-
- kfree(id_priv->id.route.path_rec);
- kfree(id_priv);
+ INIT_WORK(&id_priv->work, __rdma_free);
+ queue_work(cma_free_wq, &id_priv->work);
}
EXPORT_SYMBOL(rdma_destroy_id);
@@ -944,6 +1055,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
if (ret)
goto reject;
+ cma_dbg(id_priv, "sending RTU\n");
ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
if (ret)
goto reject;
@@ -951,6 +1063,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
return 0;
reject:
cma_modify_qp_err(id_priv);
+ cma_dbg(id_priv, "sending REJ\n");
ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
NULL, 0, NULL, 0);
return ret;
@@ -987,11 +1100,10 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
int ret = 0;
if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
- cma_disable_callback(id_priv, CMA_CONNECT)) ||
+ cma_disable_callback(id_priv, RDMA_CM_CONNECT)) ||
(ib_event->event == IB_CM_TIMEWAIT_EXIT &&
- cma_disable_callback(id_priv, CMA_DISCONNECT)))
+ cma_disable_callback(id_priv, RDMA_CM_DISCONNECT)))
return 0;
-
memset(&event, 0, sizeof event);
switch (ib_event->event) {
case IB_CM_REQ_ERROR:
@@ -1020,7 +1132,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
event.status = -ETIMEDOUT; /* fall through */
case IB_CM_DREQ_RECEIVED:
case IB_CM_DREP_RECEIVED:
- if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
+ if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT,
+ RDMA_CM_DISCONNECT))
goto out;
event.event = RDMA_CM_EVENT_DISCONNECTED;
break;
@@ -1047,7 +1160,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
- cma_exch(id_priv, CMA_DESTROYING);
+ cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
rdma_destroy_id(&id_priv->id);
return ret;
@@ -1070,12 +1183,12 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
if (cma_get_net_info(ib_event->private_data, listen_id->ps,
&ip_ver, &port, &src, &dst))
- goto err;
+ return NULL;
id = rdma_create_id(listen_id->event_handler, listen_id->context,
- listen_id->ps);
+ listen_id->ps, ib_event->param.req_rcvd.qp_type);
if (IS_ERR(id))
- goto err;
+ return NULL;
cma_save_net_info(&id->route.addr, &listen_id->route.addr,
ip_ver, port, src, dst);
@@ -1085,7 +1198,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,
GFP_KERNEL);
if (!rt->path_rec)
- goto destroy_id;
+ goto err;
rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
if (rt->num_paths == 2)
@@ -1094,22 +1207,21 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
- ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey);
+ ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
} else {
ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
- &rt->addr.dev_addr);
+ &rt->addr.dev_addr, NULL);
if (ret)
- goto destroy_id;
+ goto err;
}
rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
id_priv = container_of(id, struct rdma_id_private, id);
- id_priv->state = CMA_CONNECT;
+ id_priv->state = RDMA_CM_CONNECT;
return id_priv;
-destroy_id:
- rdma_destroy_id(id);
err:
+ rdma_destroy_id(id);
return NULL;
}
@@ -1124,7 +1236,7 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
int ret;
id = rdma_create_id(listen_id->event_handler, listen_id->context,
- listen_id->ps);
+ listen_id->ps, IB_QPT_UD);
if (IS_ERR(id))
return NULL;
@@ -1138,13 +1250,13 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
- &id->route.addr.dev_addr);
+ &id->route.addr.dev_addr, NULL);
if (ret)
goto err;
}
id_priv = container_of(id, struct rdma_id_private, id);
- id_priv->state = CMA_CONNECT;
+ id_priv->state = RDMA_CM_CONNECT;
return id_priv;
err:
rdma_destroy_id(id);
@@ -1166,20 +1278,43 @@ static void cma_set_req_event_data(struct rdma_cm_event *event,
event->param.conn.qp_num = req_data->remote_qpn;
}
+static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event)
+{
+ return (((ib_event->event == IB_CM_REQ_RECEIVED) &&
+ (ib_event->param.req_rcvd.qp_type == id->qp_type)) ||
+ ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) &&
+ (id->qp_type == IB_QPT_UD)) ||
+ (!id->qp_type));
+}
+
static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
{
struct rdma_id_private *listen_id, *conn_id;
struct rdma_cm_event event;
int offset, ret;
+ u8 smac[ETH_ALEN];
+ u8 alt_smac[ETH_ALEN];
+ u8 *psmac = smac;
+ u8 *palt_smac = alt_smac;
+ int is_iboe = ((rdma_node_get_transport(cm_id->device->node_type) ==
+ RDMA_TRANSPORT_IB) &&
+ (rdma_port_get_link_layer(cm_id->device,
+ ib_event->param.req_rcvd.port) ==
+ IB_LINK_LAYER_ETHERNET));
+ int is_sidr = 0;
listen_id = cm_id->context;
- if (cma_disable_callback(listen_id, CMA_LISTEN))
+ if (!cma_check_req_qp_type(&listen_id->id, ib_event))
+ return -EINVAL;
+
+ if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
return -ECONNABORTED;
memset(&event, 0, sizeof event);
offset = cma_user_data_offset(listen_id->id.ps);
event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
- if (cma_is_ud_ps(listen_id->id.ps)) {
+ if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
+ is_sidr = 1;
conn_id = cma_new_udp_id(&listen_id->id, ib_event);
event.param.ud.private_data = ib_event->private_data + offset;
event.param.ud.private_data_len =
@@ -1191,45 +1326,69 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
}
if (!conn_id) {
ret = -ENOMEM;
- goto out;
+ goto err1;
}
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
- mutex_lock(&lock);
ret = cma_acquire_dev(conn_id);
- mutex_unlock(&lock);
if (ret)
- goto release_conn_id;
+ goto err2;
conn_id->cm_id.ib = cm_id;
cm_id->context = conn_id;
cm_id->cm_handler = cma_ib_handler;
+ /*
+ * Protect against the user destroying conn_id from another thread
+ * until we're done accessing it.
+ */
+ atomic_inc(&conn_id->refcount);
ret = conn_id->id.event_handler(&conn_id->id, &event);
- if (!ret) {
+ if (ret)
+ goto err3;
+
+ if (is_iboe && !is_sidr) {
+ if (ib_event->param.req_rcvd.primary_path != NULL)
+ rdma_addr_find_smac_by_sgid(
+ &ib_event->param.req_rcvd.primary_path->sgid,
+ psmac, NULL);
+ else
+ psmac = NULL;
+ if (ib_event->param.req_rcvd.alternate_path != NULL)
+ rdma_addr_find_smac_by_sgid(
+ &ib_event->param.req_rcvd.alternate_path->sgid,
+ palt_smac, NULL);
+ else
+ palt_smac = NULL;
+ }
/*
* Acquire mutex to prevent user executing rdma_destroy_id()
* while we're accessing the cm_id.
*/
mutex_lock(&lock);
- if (cma_comp(conn_id, CMA_CONNECT) &&
- !cma_is_ud_ps(conn_id->id.ps))
+ if (is_iboe && !is_sidr)
+ ib_update_cm_av(cm_id, psmac, palt_smac);
+ if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) {
+ cma_dbg(container_of(&conn_id->id, struct rdma_id_private, id), "sending MRA\n");
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
+ }
mutex_unlock(&lock);
mutex_unlock(&conn_id->handler_mutex);
- goto out;
- }
+ mutex_unlock(&listen_id->handler_mutex);
+ cma_deref_id(conn_id);
+ return 0;
+err3:
+ cma_deref_id(conn_id);
/* Destroy the CM ID by returning a non-zero value. */
conn_id->cm_id.ib = NULL;
-
-release_conn_id:
- cma_exch(conn_id, CMA_DESTROYING);
+err2:
+ cma_exch(conn_id, RDMA_CM_DESTROYING);
mutex_unlock(&conn_id->handler_mutex);
- rdma_destroy_id(&conn_id->id);
-
-out:
+err1:
mutex_unlock(&listen_id->handler_mutex);
+ if (conn_id)
+ rdma_destroy_id(&conn_id->id);
return ret;
}
@@ -1244,9 +1403,7 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
struct cma_hdr *cma_data, *cma_mask;
struct sdp_hh *sdp_data, *sdp_mask;
__be32 ip4_addr;
-#ifdef INET6
struct in6_addr ip6_addr;
-#endif
memset(compare, 0, sizeof *compare);
cma_data = (void *) compare->data;
@@ -1260,33 +1417,39 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
if (ps == RDMA_PS_SDP) {
sdp_set_ip_ver(sdp_data, 4);
sdp_set_ip_ver(sdp_mask, 0xF);
- sdp_data->dst_addr.ip4.addr = ip4_addr;
- sdp_mask->dst_addr.ip4.addr = htonl(~0);
+ if (!cma_any_addr(addr)) {
+ sdp_data->dst_addr.ip4.addr = ip4_addr;
+ sdp_mask->dst_addr.ip4.addr = htonl(~0);
+ }
} else {
cma_set_ip_ver(cma_data, 4);
cma_set_ip_ver(cma_mask, 0xF);
- cma_data->dst_addr.ip4.addr = ip4_addr;
- cma_mask->dst_addr.ip4.addr = htonl(~0);
+ if (!cma_any_addr(addr)) {
+ cma_data->dst_addr.ip4.addr = ip4_addr;
+ cma_mask->dst_addr.ip4.addr = htonl(~0);
+ }
}
break;
-#ifdef INET6
case AF_INET6:
ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
if (ps == RDMA_PS_SDP) {
sdp_set_ip_ver(sdp_data, 6);
sdp_set_ip_ver(sdp_mask, 0xF);
- sdp_data->dst_addr.ip6 = ip6_addr;
- memset(&sdp_mask->dst_addr.ip6, 0xFF,
- sizeof sdp_mask->dst_addr.ip6);
+ if (!cma_any_addr(addr)) {
+ sdp_data->dst_addr.ip6 = ip6_addr;
+ memset(&sdp_mask->dst_addr.ip6, 0xFF,
+ sizeof(sdp_mask->dst_addr.ip6));
+ }
} else {
cma_set_ip_ver(cma_data, 6);
cma_set_ip_ver(cma_mask, 0xF);
- cma_data->dst_addr.ip6 = ip6_addr;
- memset(&cma_mask->dst_addr.ip6, 0xFF,
- sizeof cma_mask->dst_addr.ip6);
+ if (!cma_any_addr(addr)) {
+ cma_data->dst_addr.ip6 = ip6_addr;
+ memset(&cma_mask->dst_addr.ip6, 0xFF,
+ sizeof(cma_mask->dst_addr.ip6));
+ }
}
break;
-#endif
default:
break;
}
@@ -1299,7 +1462,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
struct sockaddr_in *sin;
int ret = 0;
- if (cma_disable_callback(id_priv, CMA_CONNECT))
+ if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
return 0;
memset(&event, 0, sizeof event);
@@ -1315,6 +1478,8 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
switch ((int)iw_event->status) {
case 0:
event.event = RDMA_CM_EVENT_ESTABLISHED;
+ event.param.conn.initiator_depth = iw_event->ird;
+ event.param.conn.responder_resources = iw_event->ord;
break;
case -ECONNRESET:
case -ECONNREFUSED:
@@ -1330,6 +1495,8 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
break;
case IW_CM_EVENT_ESTABLISHED:
event.event = RDMA_CM_EVENT_ESTABLISHED;
+ event.param.conn.initiator_depth = iw_event->ird;
+ event.param.conn.responder_resources = iw_event->ord;
break;
default:
BUG_ON(1);
@@ -1342,7 +1509,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.iw = NULL;
- cma_exch(id_priv, CMA_DESTROYING);
+ cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
rdma_destroy_id(&id_priv->id);
return ret;
@@ -1364,22 +1531,22 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
struct ib_device_attr attr;
listen_id = cm_id->context;
- if (cma_disable_callback(listen_id, CMA_LISTEN))
+ if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
return -ECONNABORTED;
/* Create a new RDMA id for the new IW CM ID */
new_cm_id = rdma_create_id(listen_id->id.event_handler,
listen_id->id.context,
- RDMA_PS_TCP);
+ RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(new_cm_id)) {
ret = -ENOMEM;
goto out;
}
conn_id = container_of(new_cm_id, struct rdma_id_private, id);
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
- conn_id->state = CMA_CONNECT;
+ conn_id->state = RDMA_CM_CONNECT;
- dev = ip_dev_find(NULL, iw_event->local_addr.sin_addr.s_addr);
+ dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr);
if (!dev) {
ret = -EADDRNOTAVAIL;
mutex_unlock(&conn_id->handler_mutex);
@@ -1393,9 +1560,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
goto out;
}
- mutex_lock(&lock);
ret = cma_acquire_dev(conn_id);
- mutex_unlock(&lock);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
@@ -1422,19 +1587,27 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
event.param.conn.private_data = iw_event->private_data;
event.param.conn.private_data_len = iw_event->private_data_len;
- event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;
- event.param.conn.responder_resources = attr.max_qp_rd_atom;
+ event.param.conn.initiator_depth = iw_event->ird;
+ event.param.conn.responder_resources = iw_event->ord;
+
+ /*
+ * Protect against the user destroying conn_id from another thread
+ * until we're done accessing it.
+ */
+ atomic_inc(&conn_id->refcount);
ret = conn_id->id.event_handler(&conn_id->id, &event);
if (ret) {
/* User wants to destroy the CM ID */
conn_id->cm_id.iw = NULL;
- cma_exch(conn_id, CMA_DESTROYING);
+ cma_exch(conn_id, RDMA_CM_DESTROYING);
mutex_unlock(&conn_id->handler_mutex);
+ cma_deref_id(conn_id);
rdma_destroy_id(&conn_id->id);
goto out;
}
mutex_unlock(&conn_id->handler_mutex);
+ cma_deref_id(conn_id);
out:
if (dev)
@@ -1447,17 +1620,19 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)
{
struct ib_cm_compare_data compare_data;
struct sockaddr *addr;
+ struct ib_cm_id *id;
__be64 svc_id;
int ret;
- id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
- id_priv);
- if (IS_ERR(id_priv->cm_id.ib))
- return PTR_ERR(id_priv->cm_id.ib);
+ id = ib_create_cm_id(id_priv->id.device, cma_req_handler, id_priv);
+ if (IS_ERR(id))
+ return PTR_ERR(id);
+
+ id_priv->cm_id.ib = id;
addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
svc_id = cma_get_service_id(id_priv->id.ps, addr);
- if (cma_any_addr(addr))
+ if (cma_any_addr(addr) && !id_priv->afonly)
ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
else {
cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
@@ -1476,13 +1651,16 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
{
int ret;
struct sockaddr_in *sin;
+ struct iw_cm_id *id;
- id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
+ id = iw_create_cm_id(id_priv->id.device,
id_priv->sock,
iw_conn_req_handler,
id_priv);
- if (IS_ERR(id_priv->cm_id.iw))
- return PTR_ERR(id_priv->cm_id.iw);
+ if (IS_ERR(id))
+ return PTR_ERR(id);
+
+ id_priv->cm_id.iw = id;
sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
id_priv->cm_id.iw->local_addr = *sin;
@@ -1514,13 +1692,14 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
struct rdma_cm_id *id;
int ret;
- id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
+ id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps,
+ id_priv->id.qp_type);
if (IS_ERR(id))
return;
dev_id_priv = container_of(id, struct rdma_id_private, id);
- dev_id_priv->state = CMA_ADDR_BOUND;
+ dev_id_priv->state = RDMA_CM_ADDR_BOUND;
memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
@@ -1528,11 +1707,11 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
atomic_inc(&id_priv->refcount);
dev_id_priv->internal_id = 1;
+ dev_id_priv->afonly = id_priv->afonly;
ret = rdma_listen(id, id_priv->backlog);
if (ret)
- printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
- "listening on device %s\n", ret, cma_dev->device->name);
+ cma_warn(id_priv, "cma_listen_on_dev, error %d, listening on device %s\n", ret, cma_dev->device->name);
}
static void cma_listen_on_all(struct rdma_id_private *id_priv)
@@ -1546,58 +1725,23 @@ static void cma_listen_on_all(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}
-int rdma_listen(struct rdma_cm_id *id, int backlog)
+void rdma_set_service_type(struct rdma_cm_id *id, int tos)
{
struct rdma_id_private *id_priv;
- int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (id_priv->state == CMA_IDLE) {
- ((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
- ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
- if (ret)
- return ret;
- }
-
- if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
- return -EINVAL;
-
- id_priv->backlog = backlog;
- if (id->device) {
- switch (rdma_node_get_transport(id->device->node_type)) {
- case RDMA_TRANSPORT_IB:
- ret = cma_ib_listen(id_priv);
- if (ret)
- goto err;
- break;
- case RDMA_TRANSPORT_IWARP:
- ret = cma_iw_listen(id_priv, backlog);
- if (ret)
- goto err;
- break;
- default:
- ret = -ENOSYS;
- goto err;
- }
- } else
- cma_listen_on_all(id_priv);
-
- return 0;
-err:
- id_priv->backlog = 0;
- cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
- return ret;
+ id_priv->tos = (u8) tos;
}
-EXPORT_SYMBOL(rdma_listen);
+EXPORT_SYMBOL(rdma_set_service_type);
-void rdma_set_service_type(struct rdma_cm_id *id, int tos)
+void rdma_set_timeout(struct rdma_cm_id *id, int timeout)
{
struct rdma_id_private *id_priv;
id_priv = container_of(id, struct rdma_id_private, id);
- id_priv->tos = (u8) tos;
+ id_priv->qp_timeout = (u8) timeout;
}
-EXPORT_SYMBOL(rdma_set_service_type);
+EXPORT_SYMBOL(rdma_set_timeout);
static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
void *context)
@@ -1611,8 +1755,8 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
route->num_paths = 1;
*route->path_rec = *path_rec;
} else {
- work->old_state = CMA_ROUTE_QUERY;
- work->new_state = CMA_ADDR_RESOLVED;
+ work->old_state = RDMA_CM_ROUTE_QUERY;
+ work->new_state = RDMA_CM_ADDR_RESOLVED;
work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
work->event.status = status;
}
@@ -1650,11 +1794,6 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
}
- if (tavor_quirk) {
- path_rec.mtu_selector = IB_SA_LT;
- path_rec.mtu = IB_MTU_2048;
- }
-
id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
id_priv->id.port_num, &path_rec,
comp_mask, timeout_ms,
@@ -1675,7 +1814,7 @@ static void cma_work_handler(struct work_struct *_work)
goto out;
if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
- cma_exch(id_priv, CMA_DESTROYING);
+ cma_exch(id_priv, RDMA_CM_DESTROYING);
destroy = 1;
}
out:
@@ -1693,12 +1832,12 @@ static void cma_ndev_work_handler(struct work_struct *_work)
int destroy = 0;
mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state == CMA_DESTROYING ||
- id_priv->state == CMA_DEVICE_REMOVAL)
+ if (id_priv->state == RDMA_CM_DESTROYING ||
+ id_priv->state == RDMA_CM_DEVICE_REMOVAL)
goto out;
if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
- cma_exch(id_priv, CMA_DESTROYING);
+ cma_exch(id_priv, RDMA_CM_DESTROYING);
destroy = 1;
}
@@ -1722,8 +1861,8 @@ static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
work->id = id_priv;
INIT_WORK(&work->work, cma_work_handler);
- work->old_state = CMA_ROUTE_QUERY;
- work->new_state = CMA_ROUTE_RESOLVED;
+ work->old_state = RDMA_CM_ROUTE_QUERY;
+ work->new_state = RDMA_CM_ROUTE_RESOLVED;
work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
@@ -1752,19 +1891,21 @@ int rdma_set_ib_paths(struct rdma_cm_id *id,
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
+ RDMA_CM_ROUTE_RESOLVED))
return -EINVAL;
- id->route.path_rec = kmalloc(sizeof *path_rec * num_paths, GFP_KERNEL);
+ id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths,
+ GFP_KERNEL);
if (!id->route.path_rec) {
ret = -ENOMEM;
goto err;
}
- memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths);
+ id->route.num_paths = num_paths;
return 0;
err:
- cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
+ cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED);
return ret;
}
EXPORT_SYMBOL(rdma_set_ib_paths);
@@ -1779,8 +1920,8 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
work->id = id_priv;
INIT_WORK(&work->work, cma_work_handler);
- work->old_state = CMA_ROUTE_QUERY;
- work->new_state = CMA_ROUTE_RESOLVED;
+ work->old_state = RDMA_CM_ROUTE_QUERY;
+ work->new_state = RDMA_CM_ROUTE_RESOLVED;
work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
queue_work(cma_wq, &work->work);
return 0;
@@ -1800,7 +1941,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr;
struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr;
struct net_device *ndev = NULL;
- u16 vid;
+
if (src_addr->sin_family != dst_addr->sin_family)
return -EINVAL;
@@ -1827,10 +1968,15 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
goto err2;
}
- vid = rdma_vlan_dev_vlan_id(ndev);
+ route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev);
+ memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
+ memcpy(route->path_rec->smac, IF_LLADDR(ndev), ndev->if_addrlen);
+
- iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid);
- iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid);
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
+ &route->path_rec->sgid);
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
+ &route->path_rec->dgid);
route->path_rec->hop_limit = 1;
route->path_rec->reversible = 1;
@@ -1838,23 +1984,19 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->path_rec->mtu_selector = IB_SA_EQ;
route->path_rec->sl = tos_to_sl(id_priv->tos);
-#ifdef __linux__
- route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
-#else
route->path_rec->mtu = iboe_get_mtu(ndev->if_mtu);
-#endif
route->path_rec->rate_selector = IB_SA_EQ;
route->path_rec->rate = iboe_get_rate(ndev);
dev_put(ndev);
route->path_rec->packet_life_time_selector = IB_SA_EQ;
- route->path_rec->packet_life_time = IBOE_PACKET_LIFETIME;
+ route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
if (!route->path_rec->mtu) {
ret = -EINVAL;
goto err2;
}
- work->old_state = CMA_ROUTE_QUERY;
- work->new_state = CMA_ROUTE_RESOLVED;
+ work->old_state = RDMA_CM_ROUTE_QUERY;
+ work->new_state = RDMA_CM_ROUTE_RESOLVED;
work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
work->event.status = 0;
@@ -1876,7 +2018,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
return -EINVAL;
atomic_inc(&id_priv->refcount);
@@ -1894,6 +2036,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
}
break;
case RDMA_TRANSPORT_IWARP:
+ case RDMA_TRANSPORT_SCIF:
ret = cma_resolve_iw_route(id_priv, timeout_ms);
break;
default:
@@ -1905,12 +2048,19 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
return 0;
err:
- cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
+ cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
cma_deref_id(id_priv);
return ret;
}
EXPORT_SYMBOL(rdma_resolve_route);
+int rdma_enable_apm(struct rdma_cm_id *id, enum alt_path_type alt_type)
+{
+ /* APM is not supported yet */
+ return -EINVAL;
+}
+EXPORT_SYMBOL(rdma_enable_apm);
+
static int cma_bind_loopback(struct rdma_id_private *id_priv)
{
struct cma_device *cma_dev;
@@ -1964,34 +2114,26 @@ static void addr_handler(int status, struct sockaddr *src_addr,
memset(&event, 0, sizeof event);
mutex_lock(&id_priv->handler_mutex);
-
- /*
- * Grab mutex to block rdma_destroy_id() from removing the device while
- * we're trying to acquire it.
- */
- mutex_lock(&lock);
- if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
- mutex_unlock(&lock);
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY,
+ RDMA_CM_ADDR_RESOLVED))
goto out;
- }
+ memcpy(&id_priv->id.route.addr.src_addr, src_addr,
+ ip_addr_size(src_addr));
if (!status && !id_priv->cma_dev)
status = cma_acquire_dev(id_priv);
- mutex_unlock(&lock);
if (status) {
- if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
+ RDMA_CM_ADDR_BOUND))
goto out;
event.event = RDMA_CM_EVENT_ADDR_ERROR;
event.status = status;
- } else {
- memcpy(&id_priv->id.route.addr.src_addr, src_addr,
- ip_addr_size(src_addr));
+ } else
event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
- }
if (id_priv->id.event_handler(&id_priv->id, &event)) {
- cma_exch(id_priv, CMA_DESTROYING);
+ cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
cma_deref_id(id_priv);
rdma_destroy_id(&id_priv->id);
@@ -2026,18 +2168,18 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
if (cma_zero_addr(src)) {
dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
if ((src->sa_family = dst->sa_family) == AF_INET) {
- ((struct sockaddr_in *) src)->sin_addr.s_addr =
- ((struct sockaddr_in *) dst)->sin_addr.s_addr;
+ ((struct sockaddr_in *)src)->sin_addr =
+ ((struct sockaddr_in *)dst)->sin_addr;
} else {
- ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr,
- &((struct sockaddr_in6 *) dst)->sin6_addr);
+ ((struct sockaddr_in6 *)src)->sin6_addr =
+ ((struct sockaddr_in6 *)dst)->sin6_addr;
}
}
work->id = id_priv;
INIT_WORK(&work->work, cma_work_handler);
- work->old_state = CMA_ADDR_QUERY;
- work->new_state = CMA_ADDR_RESOLVED;
+ work->old_state = RDMA_CM_ADDR_QUERY;
+ work->new_state = RDMA_CM_ADDR_RESOLVED;
work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
queue_work(cma_wq, &work->work);
return 0;
@@ -2046,6 +2188,25 @@ err:
return ret;
}
+static int cma_resolve_scif(struct rdma_id_private *id_priv)
+{
+ struct cma_work *work;
+
+ work = kzalloc(sizeof *work, GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ /* we probably can leave it empty here */
+
+ work->id = id_priv;
+ INIT_WORK(&work->work, cma_work_handler);
+ work->old_state = RDMA_CM_ADDR_QUERY;
+ work->new_state = RDMA_CM_ADDR_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
+ queue_work(cma_wq, &work->work);
+ return 0;
+}
+
static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
struct sockaddr *dst_addr)
{
@@ -2061,11 +2222,12 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
else {
struct sockaddr_in addr_in;
- memset(&addr_in, 0, sizeof addr_in);
- addr_in.sin_family = dst_addr->sa_family;
- addr_in.sin_len = sizeof addr_in;
- return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
+ memset(&addr_in, 0, sizeof addr_in);
+ addr_in.sin_family = dst_addr->sa_family;
+ addr_in.sin_len = sizeof addr_in;
+ return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
}
+
}
int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
@@ -2075,19 +2237,22 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (id_priv->state == CMA_IDLE) {
+ if (id_priv->state == RDMA_CM_IDLE) {
ret = cma_bind_addr(id, src_addr, dst_addr);
if (ret)
return ret;
}
- if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
return -EINVAL;
atomic_inc(&id_priv->refcount);
memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
if (cma_any_addr(dst_addr))
ret = cma_resolve_loopback(id_priv);
+ else if (id_priv->id.device &&
+ rdma_node_get_transport(id_priv->id.device->node_type) == RDMA_TRANSPORT_SCIF)
+ ret = cma_resolve_scif(id_priv);
else
ret = rdma_resolve_ip(&addr_client, (struct sockaddr *) &id->route.addr.src_addr,
dst_addr, &id->route.addr.dev_addr,
@@ -2097,12 +2262,51 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
return 0;
err:
- cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
+ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
cma_deref_id(id_priv);
return ret;
}
EXPORT_SYMBOL(rdma_resolve_addr);
+int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
+{
+ struct rdma_id_private *id_priv;
+ unsigned long flags;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ spin_lock_irqsave(&id_priv->lock, flags);
+ if (id_priv->state == RDMA_CM_IDLE) {
+ id_priv->reuseaddr = reuse;
+ ret = 0;
+ } else {
+ ret = -EINVAL;
+ }
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_set_reuseaddr);
+
+int rdma_set_afonly(struct rdma_cm_id *id, int afonly)
+{
+ struct rdma_id_private *id_priv;
+ unsigned long flags;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ spin_lock_irqsave(&id_priv->lock, flags);
+ if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) {
+ id_priv->options |= (1 << CMA_OPTION_AFONLY);
+ id_priv->afonly = afonly;
+ ret = 0;
+ } else {
+ ret = -EINVAL;
+ }
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_set_afonly);
+
static void cma_bind_port(struct rdma_bind_list *bind_list,
struct rdma_id_private *id_priv)
{
@@ -2149,126 +2353,100 @@ err1:
static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
{
-#if defined(INET)
- struct rdma_bind_list *bind_list;
- int port, ret, low, high;
-
- bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
- if (!bind_list)
- return -ENOMEM;
-
-retry:
- /* FIXME: add proper port randomization per like inet_csk_get_port */
- do {
- ret = idr_get_new_above(ps, bind_list, next_port, &port);
- } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
-
- if (ret)
- goto err1;
+ static unsigned int last_used_port;
+ int low, high, remaining;
+ unsigned int rover;
inet_get_local_port_range(&low, &high);
- if (port > high) {
- if (next_port != low) {
- idr_remove(ps, port);
- next_port = low;
- goto retry;
+ remaining = (high - low) + 1;
+ rover = random() % remaining + low;
+retry:
+ if (last_used_port != rover &&
+ !idr_find(ps, (unsigned short) rover)) {
+ int ret = cma_alloc_port(ps, id_priv, rover);
+ /*
+ * Remember previously used port number in order to avoid
+ * re-using same port immediately after it is closed.
+ */
+ if (!ret)
+ last_used_port = rover;
+ if (ret != -EADDRNOTAVAIL)
+ return ret;
}
- ret = -EADDRNOTAVAIL;
- goto err2;
+ if (--remaining) {
+ rover++;
+ if ((rover < low) || (rover > high))
+ rover = low;
+ goto retry;
}
-
- if (port == high)
- next_port = low;
- else
- next_port = port + 1;
-
- bind_list->ps = ps;
- bind_list->port = (unsigned short) port;
- cma_bind_port(bind_list, id_priv);
- return 0;
-err2:
- idr_remove(ps, port);
-err1:
- kfree(bind_list);
- return ret;
-#else
- return -ENOSPC;
-#endif
+ return -EADDRNOTAVAIL;
}
-static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
+/*
+ * Check that the requested port is available. This is called when trying to
+ * bind to a specific port, or when trying to listen on a bound port. In
+ * the latter case, the provided id_priv may already be on the bind_list, but
+ * we still need to check that it's okay to start listening.
+ */
+static int cma_check_port(struct rdma_bind_list *bind_list,
+ struct rdma_id_private *id_priv, uint8_t reuseaddr)
{
struct rdma_id_private *cur_id;
- struct sockaddr_in *sin, *cur_sin;
- struct rdma_bind_list *bind_list;
+ struct sockaddr *addr, *cur_addr;
struct hlist_node *node;
- unsigned short snum;
- sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
- snum = ntohs(sin->sin_port);
-#ifdef __linux__
- if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
- return -EACCES;
-#endif
+ addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
+ hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
+ if (id_priv == cur_id)
+ continue;
- bind_list = idr_find(ps, snum);
- if (!bind_list)
- return cma_alloc_port(ps, id_priv, snum);
+ if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr &&
+ cur_id->reuseaddr)
+ continue;
- /*
- * We don't support binding to any address if anyone is bound to
- * a specific address on the same port.
- */
- if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr))
- return -EADDRNOTAVAIL;
+ cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr;
+ if (id_priv->afonly && cur_id->afonly &&
+ (addr->sa_family != cur_addr->sa_family))
+ continue;
- hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
- if (cma_any_addr((struct sockaddr *) &cur_id->id.route.addr.src_addr))
+ if (cma_any_addr(addr) || cma_any_addr(cur_addr))
return -EADDRNOTAVAIL;
- cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
- if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
+ if (!cma_addr_cmp(addr, cur_addr))
return -EADDRINUSE;
}
-
- cma_bind_port(bind_list, id_priv);
return 0;
}
-static int cma_get_tcp_port(struct rdma_id_private *id_priv)
+static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
{
+ struct rdma_bind_list *bind_list;
+ unsigned short snum;
int ret;
- int size;
- struct socket *sock;
- ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
- if (ret)
- return ret;
-#ifdef __linux__
- ret = sock->ops->bind(sock,
- (struct sockaddr *) &id_priv->id.route.addr.src_addr,
- ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
-#else
- ret = -sobind(sock,
- (struct sockaddr *)&id_priv->id.route.addr.src_addr,
- curthread);
-#endif
- if (ret) {
- sock_release(sock);
- return ret;
- }
+ snum = ntohs(cma_port((struct sockaddr *) &id_priv->id.route.addr.src_addr));
- size = ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr);
- ret = sock_getname(sock,
- (struct sockaddr *) &id_priv->id.route.addr.src_addr,
- &size, 0);
- if (ret) {
- sock_release(sock);
- return ret;
+ bind_list = idr_find(ps, snum);
+ if (!bind_list) {
+ ret = cma_alloc_port(ps, id_priv, snum);
+ } else {
+ ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr);
+ if (!ret)
+ cma_bind_port(bind_list, id_priv);
}
+ return ret;
+}
- id_priv->sock = sock;
- return 0;
+static int cma_bind_listen(struct rdma_id_private *id_priv)
+{
+ struct rdma_bind_list *bind_list = id_priv->bind_list;
+ int ret = 0;
+
+ mutex_lock(&lock);
+ if (bind_list->owners.first->next)
+ ret = cma_check_port(bind_list, id_priv, 0);
+ mutex_unlock(&lock);
+ return ret;
}
static int cma_get_port(struct rdma_id_private *id_priv)
@@ -2282,11 +2460,6 @@ static int cma_get_port(struct rdma_id_private *id_priv)
break;
case RDMA_PS_TCP:
ps = &tcp_ps;
- if (unify_tcp_port_space) {
- ret = cma_get_tcp_port(id_priv);
- if (ret)
- goto out;
- }
break;
case RDMA_PS_UDP:
ps = &udp_ps;
@@ -2294,6 +2467,9 @@ static int cma_get_port(struct rdma_id_private *id_priv)
case RDMA_PS_IPOIB:
ps = &ipoib_ps;
break;
+ case RDMA_PS_IB:
+ ps = &ib_ps;
+ break;
default:
return -EPROTONOSUPPORT;
}
@@ -2304,7 +2480,7 @@ static int cma_get_port(struct rdma_id_private *id_priv)
else
ret = cma_use_port(ps, id_priv);
mutex_unlock(&lock);
-out:
+
return ret;
}
@@ -2318,11 +2494,7 @@ static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
return 0;
sin6 = (struct sockaddr_in6 *) addr;
-#ifdef __linux__
- if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
-#else
if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) &&
-#endif
!sin6->sin6_scope_id)
return -EINVAL;
@@ -2331,48 +2503,105 @@ static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
return 0;
}
+int rdma_listen(struct rdma_cm_id *id, int backlog)
+{
+ struct rdma_id_private *id_priv;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (id_priv->state == RDMA_CM_IDLE) {
+ ((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
+ ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
+ if (ret)
+ return ret;
+ }
+
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
+ return -EINVAL;
+
+ if (id_priv->reuseaddr) {
+ ret = cma_bind_listen(id_priv);
+ if (ret)
+ goto err;
+ }
+
+ id_priv->backlog = backlog;
+ if (id->device) {
+ switch (rdma_node_get_transport(id->device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ ret = cma_ib_listen(id_priv);
+ if (ret)
+ goto err;
+ break;
+ case RDMA_TRANSPORT_IWARP:
+ case RDMA_TRANSPORT_SCIF:
+ ret = cma_iw_listen(id_priv, backlog);
+ if (ret)
+ goto err;
+ break;
+ default:
+ ret = -ENOSYS;
+ goto err;
+ }
+ } else
+ cma_listen_on_all(id_priv);
+
+ return 0;
+err:
+ id_priv->backlog = 0;
+ cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_listen);
+
int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
{
struct rdma_id_private *id_priv;
int ret;
+ int ipv6only;
+ size_t var_size = sizeof(int);
if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
return -EAFNOSUPPORT;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
+ if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND))
return -EINVAL;
ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
if (ret)
goto err1;
+ memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
if (!cma_any_addr(addr)) {
- ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
+ ret = rdma_translate_ip(addr, &id->route.addr.dev_addr, NULL);
if (ret)
goto err1;
- mutex_lock(&lock);
ret = cma_acquire_dev(id_priv);
- mutex_unlock(&lock);
if (ret)
goto err1;
}
- memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
+ if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) {
+ if (addr->sa_family == AF_INET)
+ id_priv->afonly = 1;
+#if defined(INET6)
+ else if (addr->sa_family == AF_INET6)
+ id_priv->afonly = kernel_sysctlbyname(&thread0, "net.inet6.ip6.v6only",
+ &ipv6only, &var_size, NULL, 0, NULL, 0);
+#endif
+ }
ret = cma_get_port(id_priv);
if (ret)
goto err2;
return 0;
err2:
- if (id_priv->cma_dev) {
- mutex_lock(&lock);
- cma_detach_from_dev(id_priv);
- mutex_unlock(&lock);
- }
+ if (id_priv->cma_dev)
+ cma_release_dev(id_priv);
err1:
- cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
+ cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
return ret;
}
EXPORT_SYMBOL(rdma_bind_addr);
@@ -2445,7 +2674,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
int ret = 0;
- if (cma_disable_callback(id_priv, CMA_CONNECT))
+ if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
return 0;
memset(&event, 0, sizeof event);
@@ -2491,7 +2720,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
- cma_exch(id_priv, CMA_DESTROYING);
+ cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
rdma_destroy_id(&id_priv->id);
return ret;
@@ -2506,10 +2735,14 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
{
struct ib_cm_sidr_req_param req;
struct rdma_route *route;
+ struct ib_cm_id *id;
int ret;
req.private_data_len = sizeof(struct cma_hdr) +
conn_param->private_data_len;
+ if (req.private_data_len < conn_param->private_data_len)
+ return -EINVAL;
+
req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
if (!req.private_data)
return -ENOMEM;
@@ -2523,12 +2756,13 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
if (ret)
goto out;
- id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device,
- cma_sidr_rep_handler, id_priv);
- if (IS_ERR(id_priv->cm_id.ib)) {
- ret = PTR_ERR(id_priv->cm_id.ib);
+ id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler,
+ id_priv);
+ if (IS_ERR(id)) {
+ ret = PTR_ERR(id);
goto out;
}
+ id_priv->cm_id.ib = id;
req.path = route->path_rec;
req.service_id = cma_get_service_id(id_priv->id.ps,
@@ -2536,6 +2770,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
req.timeout_ms = 1 << (cma_response_timeout - 8);
req.max_cm_retries = CMA_MAX_CM_RETRIES;
+ cma_dbg(id_priv, "sending SIDR\n");
ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
if (ret) {
ib_destroy_cm_id(id_priv->cm_id.ib);
@@ -2552,11 +2787,15 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
struct ib_cm_req_param req;
struct rdma_route *route;
void *private_data;
+ struct ib_cm_id *id;
int offset, ret;
memset(&req, 0, sizeof req);
offset = cma_user_data_offset(id_priv->id.ps);
req.private_data_len = offset + conn_param->private_data_len;
+ if (req.private_data_len < conn_param->private_data_len)
+ return -EINVAL;
+
private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
if (!private_data)
return -ENOMEM;
@@ -2565,12 +2804,12 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
memcpy(private_data + offset, conn_param->private_data,
conn_param->private_data_len);
- id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
- id_priv);
- if (IS_ERR(id_priv->cm_id.ib)) {
- ret = PTR_ERR(id_priv->cm_id.ib);
+ id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv);
+ if (IS_ERR(id)) {
+ ret = PTR_ERR(id);
goto out;
}
+ id_priv->cm_id.ib = id;
route = &id_priv->id.route;
ret = cma_format_hdr(private_data, id_priv->id.ps, route);
@@ -2585,22 +2824,23 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
req.service_id = cma_get_service_id(id_priv->id.ps,
(struct sockaddr *) &route->addr.dst_addr);
req.qp_num = id_priv->qp_num;
- req.qp_type = IB_QPT_RC;
+ req.qp_type = id_priv->id.qp_type;
req.starting_psn = id_priv->seq_num;
req.responder_resources = conn_param->responder_resources;
req.initiator_depth = conn_param->initiator_depth;
req.flow_control = conn_param->flow_control;
- req.retry_count = conn_param->retry_count;
- req.rnr_retry_count = conn_param->rnr_retry_count;
+ req.retry_count = min_t(u8, 7, conn_param->retry_count);
+ req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
req.remote_cm_response_timeout = cma_response_timeout;
req.local_cm_response_timeout = cma_response_timeout;
req.max_cm_retries = CMA_MAX_CM_RETRIES;
req.srq = id_priv->srq ? 1 : 0;
+ cma_dbg(id_priv, "sending REQ\n");
ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
out:
- if (ret && !IS_ERR(id_priv->cm_id.ib)) {
- ib_destroy_cm_id(id_priv->cm_id.ib);
+ if (ret && !IS_ERR(id)) {
+ ib_destroy_cm_id(id);
id_priv->cm_id.ib = NULL;
}
@@ -2617,11 +2857,9 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
struct iw_cm_conn_param iw_param;
cm_id = iw_create_cm_id(id_priv->id.device, id_priv->sock,
- cma_iw_handler, id_priv);
- if (IS_ERR(cm_id)) {
- ret = PTR_ERR(cm_id);
- goto out;
- }
+ cma_iw_handler, id_priv);
+ if (IS_ERR(cm_id))
+ return PTR_ERR(cm_id);
id_priv->cm_id.iw = cm_id;
@@ -2635,17 +2873,19 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
if (ret)
goto out;
+ if (conn_param) {
iw_param.ord = conn_param->initiator_depth;
iw_param.ird = conn_param->responder_resources;
iw_param.private_data = conn_param->private_data;
iw_param.private_data_len = conn_param->private_data_len;
- if (id_priv->id.qp)
+ iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num;
+ } else {
+ memset(&iw_param, 0, sizeof iw_param);
iw_param.qpn = id_priv->qp_num;
- else
- iw_param.qpn = conn_param->qp_num;
+ }
ret = iw_cm_connect(cm_id, &iw_param);
out:
- if (ret && !IS_ERR(cm_id)) {
+ if (ret) {
iw_destroy_cm_id(cm_id);
id_priv->cm_id.iw = NULL;
}
@@ -2658,7 +2898,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
+ if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
return -EINVAL;
if (!id->qp) {
@@ -2668,12 +2908,13 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
switch (rdma_node_get_transport(id->device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (cma_is_ud_ps(id->ps))
+ if (id->qp_type == IB_QPT_UD)
ret = cma_resolve_ib_udp(id_priv, conn_param);
else
ret = cma_connect_ib(id_priv, conn_param);
break;
case RDMA_TRANSPORT_IWARP:
+ case RDMA_TRANSPORT_SCIF:
ret = cma_connect_iw(id_priv, conn_param);
break;
default:
@@ -2685,7 +2926,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
return 0;
err:
- cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
+ cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
return ret;
}
EXPORT_SYMBOL(rdma_connect);
@@ -2713,9 +2954,9 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
rep.initiator_depth = conn_param->initiator_depth;
rep.failover_accepted = 0;
rep.flow_control = conn_param->flow_control;
- rep.rnr_retry_count = conn_param->rnr_retry_count;
+ rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
rep.srq = id_priv->srq ? 1 : 0;
-
+ cma_dbg(id_priv, "sending REP\n");
ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
out:
return ret;
@@ -2727,6 +2968,9 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
struct iw_cm_conn_param iw_param;
int ret;
+ if (!conn_param)
+ return -EINVAL;
+
ret = cma_modify_qp_rtr(id_priv, conn_param);
if (ret)
return ret;
@@ -2762,6 +3006,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
rep.private_data = private_data;
rep.private_data_len = private_data_len;
+ cma_dbg(id_priv, "sending SIDR\n");
return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
}
@@ -2771,7 +3016,9 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp(id_priv, CMA_CONNECT))
+
+ id_priv->owner = curthread->td_proc->p_pid;
+ if (!cma_comp(id_priv, RDMA_CM_CONNECT))
return -EINVAL;
if (!id->qp && conn_param) {
@@ -2781,16 +3028,23 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
switch (rdma_node_get_transport(id->device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (cma_is_ud_ps(id->ps))
+ if (id->qp_type == IB_QPT_UD) {
+ if (conn_param)
ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
conn_param->private_data,
conn_param->private_data_len);
- else if (conn_param)
+ else
+ ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
+ NULL, 0);
+ } else {
+ if (conn_param)
ret = cma_accept_ib(id_priv, conn_param);
else
ret = cma_rep_recv(id_priv);
+ }
break;
case RDMA_TRANSPORT_IWARP:
+ case RDMA_TRANSPORT_SCIF:
ret = cma_accept_iw(id_priv, conn_param);
break;
default:
@@ -2815,7 +3069,7 @@ int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_has_cm_dev(id_priv))
+ if (!id_priv->cm_id.ib)
return -EINVAL;
switch (id->device->node_type) {
@@ -2837,20 +3091,23 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_has_cm_dev(id_priv))
+ if (!id_priv->cm_id.ib)
return -EINVAL;
switch (rdma_node_get_transport(id->device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (cma_is_ud_ps(id->ps))
+ if (id->qp_type == IB_QPT_UD)
ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
private_data, private_data_len);
- else
+ else {
+ cma_dbg(id_priv, "sending REJ\n");
ret = ib_send_cm_rej(id_priv->cm_id.ib,
IB_CM_REJ_CONSUMER_DEFINED, NULL,
0, private_data, private_data_len);
+ }
break;
case RDMA_TRANSPORT_IWARP:
+ case RDMA_TRANSPORT_SCIF:
ret = iw_cm_reject(id_priv->cm_id.iw,
private_data, private_data_len);
break;
@@ -2868,7 +3125,7 @@ int rdma_disconnect(struct rdma_cm_id *id)
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_has_cm_dev(id_priv))
+ if (!id_priv->cm_id.ib)
return -EINVAL;
switch (rdma_node_get_transport(id->device->node_type)) {
@@ -2877,10 +3134,14 @@ int rdma_disconnect(struct rdma_cm_id *id)
if (ret)
goto out;
/* Initiate or respond to a disconnect. */
- if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
+ cma_dbg(id_priv, "sending DREQ\n");
+ if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) {
+ cma_dbg(id_priv, "sending DREP\n");
ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
+ }
break;
case RDMA_TRANSPORT_IWARP:
+ case RDMA_TRANSPORT_SCIF:
ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
break;
default:
@@ -2897,35 +3158,55 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
struct rdma_id_private *id_priv;
struct cma_multicast *mc = multicast->context;
struct rdma_cm_event event;
+ struct rdma_dev_addr *dev_addr;
int ret;
+ struct net_device *ndev = NULL;
+ u16 vlan;
id_priv = mc->id_priv;
- if (cma_disable_callback(id_priv, CMA_ADDR_BOUND) &&
- cma_disable_callback(id_priv, CMA_ADDR_RESOLVED))
+ dev_addr = &id_priv->id.route.addr.dev_addr;
+ if (cma_disable_callback(id_priv, RDMA_CM_ADDR_BOUND) &&
+ cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED))
return 0;
mutex_lock(&id_priv->qp_mutex);
if (!status && id_priv->id.qp)
status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
- multicast->rec.mlid);
+ be16_to_cpu(multicast->rec.mlid));
mutex_unlock(&id_priv->qp_mutex);
memset(&event, 0, sizeof event);
event.status = status;
event.param.ud.private_data = mc->context;
+ ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ if (!ndev) {
+ status = -ENODEV;
+ } else {
+ vlan = rdma_vlan_dev_vlan_id(ndev);
+ dev_put(ndev);
+ }
if (!status) {
event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
ib_init_ah_from_mcmember(id_priv->id.device,
id_priv->id.port_num, &multicast->rec,
&event.param.ud.ah_attr);
+ event.param.ud.ah_attr.vlan_id = vlan;
event.param.ud.qp_num = 0xFFFFFF;
event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
- } else
+ } else {
event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+ /* mark that the cached record is no longer valid */
+ if (status != -ENETRESET && status != -EAGAIN) {
+ spin_lock(&id_priv->lock);
+ id_priv->is_valid_rec = 0;
+ spin_unlock(&id_priv->lock);
+ }
+ }
+
ret = id_priv->id.event_handler(&id_priv->id, &event);
if (ret) {
- cma_exch(id_priv, CMA_DESTROYING);
+ cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
rdma_destroy_id(&id_priv->id);
return 0;
@@ -2938,20 +3219,13 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
static void cma_set_mgid(struct rdma_id_private *id_priv,
struct sockaddr *addr, union ib_gid *mgid)
{
-#if defined(INET) || defined(INET6)
unsigned char mc_map[MAX_ADDR_LEN];
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
-#endif
-#ifdef INET
struct sockaddr_in *sin = (struct sockaddr_in *) addr;
-#endif
-#ifdef INET6
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
-#endif
if (cma_any_addr(addr)) {
memset(mgid, 0, sizeof *mgid);
-#ifdef INET6
} else if ((addr->sa_family == AF_INET6) &&
((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
0xFF10A01B)) {
@@ -2962,14 +3236,11 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
if (id_priv->id.ps == RDMA_PS_UDP)
mc_map[7] = 0x01; /* Use RDMA CM signature */
*mgid = *(union ib_gid *) (mc_map + 4);
-#endif
-#ifdef INET
} else {
ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
if (id_priv->id.ps == RDMA_PS_UDP)
mc_map[7] = 0x01; /* Use RDMA CM signature */
*mgid = *(union ib_gid *) (mc_map + 4);
-#endif
}
}
@@ -2979,13 +3250,26 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
struct ib_sa_mcmember_rec rec;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
ib_sa_comp_mask comp_mask;
- int ret;
+ int ret = 0;
- ib_addr_get_mgid(dev_addr, &rec.mgid);
- ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
- &rec.mgid, &rec);
- if (ret)
+ ib_addr_get_mgid(dev_addr, &id_priv->rec.mgid);
+
+ /* cache ipoib bc record */
+ spin_lock(&id_priv->lock);
+ if (!id_priv->is_valid_rec)
+ ret = ib_sa_get_mcmember_rec(id_priv->id.device,
+ id_priv->id.port_num,
+ &id_priv->rec.mgid,
+ &id_priv->rec);
+ if (ret) {
+ id_priv->is_valid_rec = 0;
+ spin_unlock(&id_priv->lock);
return ret;
+ } else {
+ rec = id_priv->rec;
+ id_priv->is_valid_rec = 1;
+ }
+ spin_unlock(&id_priv->lock);
cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
if (id_priv->id.ps == RDMA_PS_UDP)
@@ -3002,19 +3286,18 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
if (id_priv->id.ps == RDMA_PS_IPOIB)
comp_mask |= IB_SA_MCMEMBER_REC_RATE |
- IB_SA_MCMEMBER_REC_RATE_SELECTOR;
+ IB_SA_MCMEMBER_REC_RATE_SELECTOR |
+ IB_SA_MCMEMBER_REC_MTU_SELECTOR |
+ IB_SA_MCMEMBER_REC_MTU |
+ IB_SA_MCMEMBER_REC_HOP_LIMIT;
mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
id_priv->id.port_num, &rec,
comp_mask, GFP_KERNEL,
cma_ib_mc_handler, mc);
- if (IS_ERR(mc->multicast.ib))
- return PTR_ERR(mc->multicast.ib);
-
- return 0;
+ return PTR_RET(mc->multicast.ib);
}
-
static void iboe_mcast_work_handler(struct work_struct *work)
{
struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
@@ -3034,9 +3317,9 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
if (cma_any_addr(addr)) {
memset(mgid, 0, sizeof *mgid);
- } else if (addr->sa_family == AF_INET6)
+ } else if (addr->sa_family == AF_INET6) {
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
- else {
+ } else {
mgid->raw[0] = 0xff;
mgid->raw[1] = 0x0e;
mgid->raw[2] = 0;
@@ -3087,20 +3370,16 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
err = -ENODEV;
goto out2;
}
-
mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
mc->multicast.ib->rec.hop_limit = 1;
-#ifdef __linux__
- mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
-#else
mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->if_mtu);
-#endif
dev_put(ndev);
if (!mc->multicast.ib->rec.mtu) {
err = -EINVAL;
goto out2;
}
- iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid);
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
+ &mc->multicast.ib->rec.port_gid);
work->id = id_priv;
work->mc = mc;
INIT_WORK(&work->work, iboe_mcast_work_handler);
@@ -3124,8 +3403,8 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
- !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+ if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
+ !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
return -EINVAL;
mc = kmalloc(sizeof *mc, GFP_KERNEL);
@@ -3165,7 +3444,6 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
spin_unlock_irq(&id_priv->lock);
kfree(mc);
}
-
return ret;
}
EXPORT_SYMBOL(rdma_join_multicast);
@@ -3185,7 +3463,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
if (id->qp)
ib_detach_mcast(id->qp,
&mc->multicast.ib->rec.mgid,
- mc->multicast.ib->rec.mlid);
+ be16_to_cpu(mc->multicast.ib->rec.mlid));
if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) {
switch (rdma_port_get_link_layer(id->device, id->port_num)) {
case IB_LINK_LAYER_INFINIBAND:
@@ -3213,17 +3491,10 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
dev_addr = &id_priv->id.route.addr.dev_addr;
-#ifdef __linux__
- if ((dev_addr->bound_dev_if == ndev->ifindex) &&
- memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
- printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
- ndev->name, &id_priv->id);
-#else
if ((dev_addr->bound_dev_if == ndev->if_index) &&
memcmp(dev_addr->src_dev_addr, IF_LLADDR(ndev), ndev->if_addrlen)) {
printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
ndev->if_xname, &id_priv->id);
-#endif
work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
return -ENOMEM;
@@ -3246,7 +3517,8 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
struct rdma_id_private *id_priv;
int ret = NOTIFY_DONE;
-#ifdef __linux__
+/* BONDING related, commented out until the bonding is resolved */
+#if 0
if (dev_net(ndev) != &init_net)
return NOTIFY_DONE;
@@ -3255,10 +3527,9 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
return NOTIFY_DONE;
-#else
+#endif
if (event != NETDEV_DOWN && event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
-#endif
mutex_lock(&lock);
list_for_each_entry(cma_dev, &dev_list, list)
@@ -3303,19 +3574,19 @@ static void cma_add_one(struct ib_device *device)
static int cma_remove_id_dev(struct rdma_id_private *id_priv)
{
struct rdma_cm_event event;
- enum cma_state state;
+ enum rdma_cm_state state;
int ret = 0;
/* Record that we want to remove the device */
- state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
- if (state == CMA_DESTROYING)
+ state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
+ if (state == RDMA_CM_DESTROYING)
return 0;
cma_cancel_operation(id_priv, state);
mutex_lock(&id_priv->handler_mutex);
/* Check for destruction from another callback. */
- if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
+ if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
goto out;
memset(&event, 0, sizeof event);
@@ -3370,22 +3641,18 @@ static void cma_remove_one(struct ib_device *device)
kfree(cma_dev);
}
-static int cma_init(void)
+static int __init cma_init(void)
{
- int ret;
-#if defined(INET)
- int low, high, remaining;
-
- get_random_bytes(&next_port, sizeof next_port);
- inet_get_local_port_range(&low, &high);
- remaining = (high - low) + 1;
- next_port = ((unsigned int) next_port % remaining) + low;
-#endif
+ int ret = -ENOMEM;
cma_wq = create_singlethread_workqueue("rdma_cm");
if (!cma_wq)
return -ENOMEM;
+ cma_free_wq = create_singlethread_workqueue("rdma_cm_fr");
+ if (!cma_free_wq)
+ goto err1;
+
ib_sa_register_client(&sa_client);
rdma_addr_register_client(&addr_client);
register_netdevice_notifier(&cma_nb);
@@ -3393,27 +3660,34 @@ static int cma_init(void)
ret = ib_register_client(&cma_client);
if (ret)
goto err;
+
return 0;
err:
unregister_netdevice_notifier(&cma_nb);
rdma_addr_unregister_client(&addr_client);
ib_sa_unregister_client(&sa_client);
+
+ destroy_workqueue(cma_free_wq);
+err1:
destroy_workqueue(cma_wq);
return ret;
}
-static void cma_cleanup(void)
+static void __exit cma_cleanup(void)
{
ib_unregister_client(&cma_client);
unregister_netdevice_notifier(&cma_nb);
rdma_addr_unregister_client(&addr_client);
ib_sa_unregister_client(&sa_client);
+ flush_workqueue(cma_free_wq);
+ destroy_workqueue(cma_free_wq);
destroy_workqueue(cma_wq);
idr_destroy(&sdp_ps);
idr_destroy(&tcp_ps);
idr_destroy(&udp_ps);
idr_destroy(&ipoib_ps);
+ idr_destroy(&ib_ps);
}
module_init(cma_init);
diff --git a/sys/ofed/drivers/infiniband/core/core_priv.h b/sys/ofed/drivers/infiniband/core/core_priv.h
index 08c4bbb..001bbbe 100644
--- a/sys/ofed/drivers/infiniband/core/core_priv.h
+++ b/sys/ofed/drivers/infiniband/core/core_priv.h
@@ -38,7 +38,8 @@
#include <rdma/ib_verbs.h>
-int ib_device_register_sysfs(struct ib_device *device, int (*port_callback)(struct ib_device *,
+int ib_device_register_sysfs(struct ib_device *device,
+ int (*port_callback)(struct ib_device *,
u8, struct kobject *));
void ib_device_unregister_sysfs(struct ib_device *device);
diff --git a/sys/ofed/drivers/infiniband/core/device.c b/sys/ofed/drivers/infiniband/core/device.c
index 98adf48..a7a06d78 100644
--- a/sys/ofed/drivers/infiniband/core/device.c
+++ b/sys/ofed/drivers/infiniband/core/device.c
@@ -37,7 +37,6 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mutex.h>
-#include <linux/workqueue.h>
#include "core_priv.h"
@@ -45,18 +44,15 @@ MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("core kernel InfiniBand API");
MODULE_LICENSE("Dual BSD/GPL");
-#ifdef __ia64__
-/* workaround for a bug in hp chipset that would cause kernel
- panic when dma resources are exhaused */
-int dma_map_sg_hp_wa = 0;
-#endif
-
struct ib_client_data {
struct list_head list;
struct ib_client *client;
void * data;
};
+struct workqueue_struct *ib_wq;
+EXPORT_SYMBOL_GPL(ib_wq);
+
static LIST_HEAD(device_list);
static LIST_HEAD(client_list);
@@ -99,7 +95,7 @@ static int ib_device_check_mandatory(struct ib_device *device)
int i;
for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
- if (!*(void **) ((u_char *) device + mandatory_table[i].offset)) {
+ if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
printk(KERN_WARNING "Device %s is missing mandatory function %s\n",
device->name, mandatory_table[i].name);
return -EINVAL;
@@ -177,9 +173,14 @@ static int end_port(struct ib_device *device)
*/
struct ib_device *ib_alloc_device(size_t size)
{
+ struct ib_device *dev;
+
BUG_ON(size < sizeof (struct ib_device));
- return kzalloc(size, GFP_KERNEL);
+ dev = kzalloc(size, GFP_KERNEL);
+ spin_lock_init(&dev->cmd_perf_lock);
+
+ return dev;
}
EXPORT_SYMBOL(ib_alloc_device);
@@ -295,8 +296,6 @@ int ib_register_device(struct ib_device *device,
INIT_LIST_HEAD(&device->client_data_list);
spin_lock_init(&device->event_handler_lock);
spin_lock_init(&device->client_data_lock);
- device->ib_uverbs_xrcd_table = RB_ROOT;
- mutex_init(&device->xrcd_table_mutex);
ret = read_port_table_lengths(device);
if (ret) {
@@ -631,6 +630,9 @@ int ib_modify_device(struct ib_device *device,
int device_modify_mask,
struct ib_device_modify *device_modify)
{
+ if (!device->modify_device)
+ return -ENOSYS;
+
return device->modify_device(device, device_modify_mask,
device_modify);
}
@@ -651,6 +653,9 @@ int ib_modify_port(struct ib_device *device,
u8 port_num, int port_modify_mask,
struct ib_port_modify *port_modify)
{
+ if (!device->modify_port)
+ return -ENOSYS;
+
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
@@ -705,18 +710,28 @@ int ib_find_pkey(struct ib_device *device,
{
int ret, i;
u16 tmp_pkey;
+ int partial_ix = -1;
for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) {
ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
if (ret)
return ret;
-
if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) {
- *index = i;
- return 0;
+ /* if there is full-member pkey take it.*/
+ if (tmp_pkey & 0x8000) {
+ *index = i;
+ return 0;
+ }
+ if (partial_ix < 0)
+ partial_ix = i;
}
}
+ /*no full-member, if exists take the limited*/
+ if (partial_ix >= 0) {
+ *index = partial_ix;
+ return 0;
+ }
return -ENOENT;
}
EXPORT_SYMBOL(ib_find_pkey);
@@ -725,21 +740,29 @@ static int __init ib_core_init(void)
{
int ret;
-#ifdef __ia64__
- if (ia64_platform_is("hpzx1"))
- dma_map_sg_hp_wa = 1;
-#endif
+ ib_wq = create_workqueue("infiniband");
+ if (!ib_wq)
+ return -ENOMEM;
ret = ib_sysfs_setup();
- if (ret)
+ if (ret) {
printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
+ goto err;
+ }
ret = ib_cache_setup();
if (ret) {
printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
- ib_sysfs_cleanup();
+ goto err_sysfs;
}
+ return 0;
+
+err_sysfs:
+ ib_sysfs_cleanup();
+
+err:
+ destroy_workqueue(ib_wq);
return ret;
}
@@ -748,7 +771,7 @@ static void __exit ib_core_cleanup(void)
ib_cache_cleanup();
ib_sysfs_cleanup();
/* Make sure that any pending umem accounting work is done. */
- flush_scheduled_work();
+ destroy_workqueue(ib_wq);
}
module_init(ib_core_init);
diff --git a/sys/ofed/drivers/infiniband/core/fmr_pool.c b/sys/ofed/drivers/infiniband/core/fmr_pool.c
index c225833..4ba4c77 100644
--- a/sys/ofed/drivers/infiniband/core/fmr_pool.c
+++ b/sys/ofed/drivers/infiniband/core/fmr_pool.c
@@ -33,6 +33,7 @@
#include <linux/errno.h>
#include <linux/spinlock.h>
+#include <linux/module.h>
#include <linux/slab.h>
#include <linux/jhash.h>
#include <linux/kthread.h>
diff --git a/sys/ofed/drivers/infiniband/core/iwcm.c b/sys/ofed/drivers/infiniband/core/iwcm.c
index 27878a8..14d23cc 100644
--- a/sys/ofed/drivers/infiniband/core/iwcm.c
+++ b/sys/ofed/drivers/infiniband/core/iwcm.c
@@ -40,9 +40,12 @@
#include <linux/idr.h>
#include <linux/interrupt.h>
#include <linux/rbtree.h>
+#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
#include <linux/completion.h>
+#include <linux/slab.h>
+#include <linux/module.h>
#include <linux/string.h>
#include <rdma/iw_cm.h>
@@ -507,6 +510,8 @@ int iw_cm_accept(struct iw_cm_id *cm_id,
qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
if (!qp) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+ wake_up_all(&cm_id_priv->connect_wait);
return -EINVAL;
}
cm_id->device->iwcm->add_ref(qp);
@@ -566,6 +571,8 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
if (!qp) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+ wake_up_all(&cm_id_priv->connect_wait);
return -EINVAL;
}
cm_id->device->iwcm->add_ref(qp);
@@ -620,17 +627,6 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
*/
BUG_ON(iw_event->status);
- /*
- * We could be destroying the listening id. If so, ignore this
- * upcall.
- */
- spin_lock_irqsave(&listen_id_priv->lock, flags);
- if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
- spin_unlock_irqrestore(&listen_id_priv->lock, flags);
- goto out;
- }
- spin_unlock_irqrestore(&listen_id_priv->lock, flags);
-
cm_id = iw_create_cm_id(listen_id_priv->id.device,
iw_event->so,
listen_id_priv->id.cm_handler,
@@ -646,6 +642,19 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
cm_id_priv->state = IW_CM_STATE_CONN_RECV;
+ /*
+ * We could be destroying the listening id. If so, ignore this
+ * upcall.
+ */
+ spin_lock_irqsave(&listen_id_priv->lock, flags);
+ if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
+ spin_unlock_irqrestore(&listen_id_priv->lock, flags);
+ iw_cm_reject(cm_id, NULL, 0);
+ iw_destroy_cm_id(cm_id);
+ goto out;
+ }
+ spin_unlock_irqrestore(&listen_id_priv->lock, flags);
+
ret = alloc_work_entries(cm_id_priv, 3);
if (ret) {
iw_cm_reject(cm_id, NULL, 0);
@@ -723,7 +732,7 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
*/
clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
- if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) {
+ if (iw_event->status == 0) {
cm_id_priv->id.local_addr = iw_event->local_addr;
cm_id_priv->id.remote_addr = iw_event->remote_addr;
cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
diff --git a/sys/ofed/drivers/infiniband/core/local_sa.c b/sys/ofed/drivers/infiniband/core/local_sa.c
deleted file mode 100644
index 9b9c60a..0000000
--- a/sys/ofed/drivers/infiniband/core/local_sa.c
+++ /dev/null
@@ -1,1273 +0,0 @@
-/*
- * Copyright (c) 2006 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/dma-mapping.h>
-#include <linux/err.h>
-#include <linux/interrupt.h>
-#include <linux/rbtree.h>
-#include <linux/mutex.h>
-#include <linux/spinlock.h>
-#include <linux/pci.h>
-#include <linux/miscdevice.h>
-#include <linux/random.h>
-
-#include <rdma/ib_cache.h>
-#include <rdma/ib_sa.h>
-#include "sa.h"
-
-MODULE_AUTHOR("Sean Hefty");
-MODULE_DESCRIPTION("InfiniBand subnet administration caching");
-MODULE_LICENSE("Dual BSD/GPL");
-
-enum {
- SA_DB_MAX_PATHS_PER_DEST = 0x7F,
- SA_DB_MIN_RETRY_TIMER = 4000, /* 4 sec */
- SA_DB_MAX_RETRY_TIMER = 256000 /* 256 sec */
-};
-
-static int set_paths_per_dest(const char *val, struct kernel_param *kp);
-static unsigned long paths_per_dest = 0;
-module_param_call(paths_per_dest, set_paths_per_dest, param_get_ulong,
- &paths_per_dest, 0644);
-MODULE_PARM_DESC(paths_per_dest, "Maximum number of paths to retrieve "
- "to each destination (DGID). Set to 0 "
- "to disable cache.");
-
-static int set_subscribe_inform_info(const char *val, struct kernel_param *kp);
-static char subscribe_inform_info = 1;
-module_param_call(subscribe_inform_info, set_subscribe_inform_info,
- param_get_bool, &subscribe_inform_info, 0644);
-MODULE_PARM_DESC(subscribe_inform_info,
- "Subscribe for SA InformInfo/Notice events.");
-
-static int do_refresh(const char *val, struct kernel_param *kp);
-module_param_call(refresh, do_refresh, NULL, NULL, 0200);
-
-static unsigned long retry_timer = SA_DB_MIN_RETRY_TIMER;
-
-enum sa_db_lookup_method {
- SA_DB_LOOKUP_LEAST_USED,
- SA_DB_LOOKUP_RANDOM
-};
-
-static int set_lookup_method(const char *val, struct kernel_param *kp);
-static int get_lookup_method(char *buf, struct kernel_param *kp);
-static unsigned long lookup_method;
-module_param_call(lookup_method, set_lookup_method, get_lookup_method,
- &lookup_method, 0644);
-MODULE_PARM_DESC(lookup_method, "Method used to return path records when "
- "multiple paths exist to a given destination.");
-
-static void sa_db_add_dev(struct ib_device *device);
-static void sa_db_remove_dev(struct ib_device *device);
-
-static struct ib_client sa_db_client = {
- .name = "local_sa",
- .add = sa_db_add_dev,
- .remove = sa_db_remove_dev
-};
-
-static LIST_HEAD(dev_list);
-static DEFINE_MUTEX(lock);
-static rwlock_t rwlock;
-static struct workqueue_struct *sa_wq;
-static struct ib_sa_client sa_client;
-
-enum sa_db_state {
- SA_DB_IDLE,
- SA_DB_REFRESH,
- SA_DB_DESTROY
-};
-
-struct sa_db_port {
- struct sa_db_device *dev;
- struct ib_mad_agent *agent;
- /* Limit number of outstanding MADs to SA to reduce SA flooding */
- struct ib_mad_send_buf *msg;
- u16 sm_lid;
- u8 sm_sl;
- struct ib_inform_info *in_info;
- struct ib_inform_info *out_info;
- struct rb_root paths;
- struct list_head update_list;
- unsigned long update_id;
- enum sa_db_state state;
- struct work_struct work;
- union ib_gid gid;
- int port_num;
-};
-
-struct sa_db_device {
- struct list_head list;
- struct ib_device *device;
- struct ib_event_handler event_handler;
- int start_port;
- int port_count;
- struct sa_db_port port[0];
-};
-
-struct ib_sa_iterator {
- struct ib_sa_iterator *next;
-};
-
-struct ib_sa_attr_iter {
- struct ib_sa_iterator *iter;
- unsigned long flags;
-};
-
-struct ib_sa_attr_list {
- struct ib_sa_iterator iter;
- struct ib_sa_iterator *tail;
- int update_id;
- union ib_gid gid;
- struct rb_node node;
-};
-
-struct ib_path_rec_info {
- struct ib_sa_iterator iter; /* keep first */
- struct ib_sa_path_rec rec;
- unsigned long lookups;
-};
-
-struct ib_sa_mad_iter {
- struct ib_mad_recv_wc *recv_wc;
- struct ib_mad_recv_buf *recv_buf;
- int attr_size;
- int attr_offset;
- int data_offset;
- int data_left;
- void *attr;
- u8 attr_data[0];
-};
-
-enum sa_update_type {
- SA_UPDATE_FULL,
- SA_UPDATE_ADD,
- SA_UPDATE_REMOVE
-};
-
-struct update_info {
- struct list_head list;
- union ib_gid gid;
- enum sa_update_type type;
-};
-
-struct sa_path_request {
- struct work_struct work;
- struct ib_sa_client *client;
- void (*callback)(int, struct ib_sa_path_rec *, void *);
- void *context;
- struct ib_sa_path_rec path_rec;
-};
-
-static void process_updates(struct sa_db_port *port);
-
-static void free_attr_list(struct ib_sa_attr_list *attr_list)
-{
- struct ib_sa_iterator *cur;
-
- for (cur = attr_list->iter.next; cur; cur = attr_list->iter.next) {
- attr_list->iter.next = cur->next;
- kfree(cur);
- }
- attr_list->tail = &attr_list->iter;
-}
-
-static void remove_attr(struct rb_root *root, struct ib_sa_attr_list *attr_list)
-{
- rb_erase(&attr_list->node, root);
- free_attr_list(attr_list);
- kfree(attr_list);
-}
-
-static void remove_all_attrs(struct rb_root *root)
-{
- struct rb_node *node, *next_node;
- struct ib_sa_attr_list *attr_list;
-
- write_lock_irq(&rwlock);
- for (node = rb_first(root); node; node = next_node) {
- next_node = rb_next(node);
- attr_list = rb_entry(node, struct ib_sa_attr_list, node);
- remove_attr(root, attr_list);
- }
- write_unlock_irq(&rwlock);
-}
-
-static void remove_old_attrs(struct rb_root *root, unsigned long update_id)
-{
- struct rb_node *node, *next_node;
- struct ib_sa_attr_list *attr_list;
-
- write_lock_irq(&rwlock);
- for (node = rb_first(root); node; node = next_node) {
- next_node = rb_next(node);
- attr_list = rb_entry(node, struct ib_sa_attr_list, node);
- if (attr_list->update_id != update_id)
- remove_attr(root, attr_list);
- }
- write_unlock_irq(&rwlock);
-}
-
-static struct ib_sa_attr_list *insert_attr_list(struct rb_root *root,
- struct ib_sa_attr_list *attr_list)
-{
- struct rb_node **link = &root->rb_node;
- struct rb_node *parent = NULL;
- struct ib_sa_attr_list *cur_attr_list;
- int cmp;
-
- while (*link) {
- parent = *link;
- cur_attr_list = rb_entry(parent, struct ib_sa_attr_list, node);
- cmp = memcmp(&cur_attr_list->gid, &attr_list->gid,
- sizeof attr_list->gid);
- if (cmp < 0)
- link = &(*link)->rb_left;
- else if (cmp > 0)
- link = &(*link)->rb_right;
- else
- return cur_attr_list;
- }
- rb_link_node(&attr_list->node, parent, link);
- rb_insert_color(&attr_list->node, root);
- return NULL;
-}
-
-static struct ib_sa_attr_list *find_attr_list(struct rb_root *root, u8 *gid)
-{
- struct rb_node *node = root->rb_node;
- struct ib_sa_attr_list *attr_list;
- int cmp;
-
- while (node) {
- attr_list = rb_entry(node, struct ib_sa_attr_list, node);
- cmp = memcmp(&attr_list->gid, gid, sizeof attr_list->gid);
- if (cmp < 0)
- node = node->rb_left;
- else if (cmp > 0)
- node = node->rb_right;
- else
- return attr_list;
- }
- return NULL;
-}
-
-static int insert_attr(struct rb_root *root, unsigned long update_id, void *key,
- struct ib_sa_iterator *iter)
-{
- struct ib_sa_attr_list *attr_list;
- void *err;
-
- write_lock_irq(&rwlock);
- attr_list = find_attr_list(root, key);
- if (!attr_list) {
- write_unlock_irq(&rwlock);
- attr_list = kmalloc(sizeof *attr_list, GFP_KERNEL);
- if (!attr_list)
- return -ENOMEM;
-
- attr_list->iter.next = NULL;
- attr_list->tail = &attr_list->iter;
- attr_list->update_id = update_id;
- memcpy(attr_list->gid.raw, key, sizeof attr_list->gid);
-
- write_lock_irq(&rwlock);
- err = insert_attr_list(root, attr_list);
- if (err) {
- write_unlock_irq(&rwlock);
- kfree(attr_list);
- return PTR_ERR(err);
- }
- } else if (attr_list->update_id != update_id) {
- free_attr_list(attr_list);
- attr_list->update_id = update_id;
- }
-
- attr_list->tail->next = iter;
- iter->next = NULL;
- attr_list->tail = iter;
- write_unlock_irq(&rwlock);
- return 0;
-}
-
-static struct ib_sa_mad_iter *ib_sa_iter_create(struct ib_mad_recv_wc *mad_recv_wc)
-{
- struct ib_sa_mad_iter *iter;
- struct ib_sa_mad *mad = (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad;
- int attr_size, attr_offset;
-
- attr_offset = be16_to_cpu(mad->sa_hdr.attr_offset) * 8;
- attr_size = 64; /* path record length */
- if (attr_offset < attr_size)
- return ERR_PTR(-EINVAL);
-
- iter = kzalloc(sizeof *iter + attr_size, GFP_KERNEL);
- if (!iter)
- return ERR_PTR(-ENOMEM);
-
- iter->data_left = mad_recv_wc->mad_len - IB_MGMT_SA_HDR;
- iter->recv_wc = mad_recv_wc;
- iter->recv_buf = &mad_recv_wc->recv_buf;
- iter->attr_offset = attr_offset;
- iter->attr_size = attr_size;
- return iter;
-}
-
-static void ib_sa_iter_free(struct ib_sa_mad_iter *iter)
-{
- kfree(iter);
-}
-
-static void *ib_sa_iter_next(struct ib_sa_mad_iter *iter)
-{
- struct ib_sa_mad *mad;
- int left, offset = 0;
-
- while (iter->data_left >= iter->attr_offset) {
- while (iter->data_offset < IB_MGMT_SA_DATA) {
- mad = (struct ib_sa_mad *) iter->recv_buf->mad;
-
- left = IB_MGMT_SA_DATA - iter->data_offset;
- if (left < iter->attr_size) {
- /* copy first piece of the attribute */
- iter->attr = &iter->attr_data;
- memcpy(iter->attr,
- &mad->data[iter->data_offset], left);
- offset = left;
- break;
- } else if (offset) {
- /* copy the second piece of the attribute */
- memcpy(iter->attr + offset, &mad->data[0],
- iter->attr_size - offset);
- iter->data_offset = iter->attr_size - offset;
- offset = 0;
- } else {
- iter->attr = &mad->data[iter->data_offset];
- iter->data_offset += iter->attr_size;
- }
-
- iter->data_left -= iter->attr_offset;
- goto out;
- }
- iter->data_offset = 0;
- iter->recv_buf = list_entry(iter->recv_buf->list.next,
- struct ib_mad_recv_buf, list);
- }
- iter->attr = NULL;
-out:
- return iter->attr;
-}
-
-/*
- * Copy path records from a received response and insert them into our cache.
- * A path record in the MADs are in network order, packed, and may
- * span multiple MAD buffers, just to make our life hard.
- */
-static void update_path_db(struct sa_db_port *port,
- struct ib_mad_recv_wc *mad_recv_wc,
- enum sa_update_type type)
-{
- struct ib_sa_mad_iter *iter;
- struct ib_path_rec_info *path_info;
- void *attr;
- int ret;
-
- iter = ib_sa_iter_create(mad_recv_wc);
- if (IS_ERR(iter))
- return;
-
- port->update_id += (type == SA_UPDATE_FULL);
-
- while ((attr = ib_sa_iter_next(iter)) &&
- (path_info = kmalloc(sizeof *path_info, GFP_KERNEL))) {
-
- ib_sa_unpack_attr(&path_info->rec, attr, IB_SA_ATTR_PATH_REC);
-
- ret = insert_attr(&port->paths, port->update_id,
- path_info->rec.dgid.raw, &path_info->iter);
- if (ret) {
- kfree(path_info);
- break;
- }
- }
- ib_sa_iter_free(iter);
-
- if (type == SA_UPDATE_FULL)
- remove_old_attrs(&port->paths, port->update_id);
-}
-
-static struct ib_mad_send_buf *get_sa_msg(struct sa_db_port *port,
- struct update_info *update)
-{
- struct ib_ah_attr ah_attr;
- struct ib_mad_send_buf *msg;
-
- msg = ib_create_send_mad(port->agent, 1, 0, 0, IB_MGMT_SA_HDR,
- IB_MGMT_SA_DATA, GFP_KERNEL);
- if (IS_ERR(msg))
- return NULL;
-
- memset(&ah_attr, 0, sizeof ah_attr);
- ah_attr.dlid = port->sm_lid;
- ah_attr.sl = port->sm_sl;
- ah_attr.port_num = port->port_num;
-
- msg->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
- if (IS_ERR(msg->ah)) {
- ib_free_send_mad(msg);
- return NULL;
- }
-
- msg->timeout_ms = retry_timer;
- msg->retries = 0;
- msg->context[0] = port;
- msg->context[1] = update;
- return msg;
-}
-
-static __be64 form_tid(u32 hi_tid)
-{
- static atomic_t tid;
- return cpu_to_be64((((u64) hi_tid) << 32) |
- ((u32) atomic_inc_return(&tid)));
-}
-
-static void format_path_req(struct sa_db_port *port,
- struct update_info *update,
- struct ib_mad_send_buf *msg)
-{
- struct ib_sa_mad *mad = msg->mad;
- struct ib_sa_path_rec path_rec;
-
- mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
- mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
- mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
- mad->mad_hdr.method = IB_SA_METHOD_GET_TABLE;
- mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC);
- mad->mad_hdr.tid = form_tid(msg->mad_agent->hi_tid);
-
- mad->sa_hdr.comp_mask = IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_NUMB_PATH;
-
- path_rec.sgid = port->gid;
- path_rec.numb_path = (u8) paths_per_dest;
-
- if (update->type == SA_UPDATE_ADD) {
- mad->sa_hdr.comp_mask |= IB_SA_PATH_REC_DGID;
- memcpy(&path_rec.dgid, &update->gid, sizeof path_rec.dgid);
- }
-
- ib_sa_pack_attr(mad->data, &path_rec, IB_SA_ATTR_PATH_REC);
-}
-
-static int send_query(struct sa_db_port *port,
- struct update_info *update)
-{
- int ret;
-
- port->msg = get_sa_msg(port, update);
- if (!port->msg)
- return -ENOMEM;
-
- format_path_req(port, update, port->msg);
-
- ret = ib_post_send_mad(port->msg, NULL);
- if (ret)
- goto err;
-
- return 0;
-
-err:
- ib_destroy_ah(port->msg->ah);
- ib_free_send_mad(port->msg);
- return ret;
-}
-
-static void add_update(struct sa_db_port *port, u8 *gid,
- enum sa_update_type type)
-{
- struct update_info *update;
-
- update = kmalloc(sizeof *update, GFP_KERNEL);
- if (update) {
- if (gid)
- memcpy(&update->gid, gid, sizeof update->gid);
- update->type = type;
- list_add(&update->list, &port->update_list);
- }
-
- if (port->state == SA_DB_IDLE) {
- port->state = SA_DB_REFRESH;
- process_updates(port);
- }
-}
-
-static void clean_update_list(struct sa_db_port *port)
-{
- struct update_info *update;
-
- while (!list_empty(&port->update_list)) {
- update = list_entry(port->update_list.next,
- struct update_info, list);
- list_del(&update->list);
- kfree(update);
- }
-}
-
-static int notice_handler(int status, struct ib_inform_info *info,
- struct ib_sa_notice *notice)
-{
- struct sa_db_port *port = info->context;
- struct ib_sa_notice_data_gid *gid_data;
- struct ib_inform_info **pinfo;
- enum sa_update_type type;
-
- if (info->trap_number == IB_SA_SM_TRAP_GID_IN_SERVICE) {
- pinfo = &port->in_info;
- type = SA_UPDATE_ADD;
- } else {
- pinfo = &port->out_info;
- type = SA_UPDATE_REMOVE;
- }
-
- mutex_lock(&lock);
- if (port->state == SA_DB_DESTROY || !*pinfo) {
- mutex_unlock(&lock);
- return 0;
- }
-
- if (notice) {
- gid_data = (struct ib_sa_notice_data_gid *)
- &notice->data_details;
- add_update(port, gid_data->gid, type);
- mutex_unlock(&lock);
- } else if (status == -ENETRESET) {
- *pinfo = NULL;
- mutex_unlock(&lock);
- } else {
- if (status)
- *pinfo = ERR_PTR(-EINVAL);
- port->state = SA_DB_IDLE;
- clean_update_list(port);
- mutex_unlock(&lock);
- queue_work(sa_wq, &port->work);
- }
-
- return status;
-}
-
-static int reg_in_info(struct sa_db_port *port)
-{
- int ret = 0;
-
- port->in_info = ib_sa_register_inform_info(&sa_client,
- port->dev->device,
- port->port_num,
- IB_SA_SM_TRAP_GID_IN_SERVICE,
- GFP_KERNEL, notice_handler,
- port);
- if (IS_ERR(port->in_info))
- ret = PTR_ERR(port->in_info);
-
- return ret;
-}
-
-static int reg_out_info(struct sa_db_port *port)
-{
- int ret = 0;
-
- port->out_info = ib_sa_register_inform_info(&sa_client,
- port->dev->device,
- port->port_num,
- IB_SA_SM_TRAP_GID_OUT_OF_SERVICE,
- GFP_KERNEL, notice_handler,
- port);
- if (IS_ERR(port->out_info))
- ret = PTR_ERR(port->out_info);
-
- return ret;
-}
-
-static void unsubscribe_port(struct sa_db_port *port)
-{
- if (port->in_info && !IS_ERR(port->in_info))
- ib_sa_unregister_inform_info(port->in_info);
-
- if (port->out_info && !IS_ERR(port->out_info))
- ib_sa_unregister_inform_info(port->out_info);
-
- port->out_info = NULL;
- port->in_info = NULL;
-
-}
-
-static void cleanup_port(struct sa_db_port *port)
-{
- unsubscribe_port(port);
-
- clean_update_list(port);
- remove_all_attrs(&port->paths);
-}
-
-static int update_port_info(struct sa_db_port *port)
-{
- struct ib_port_attr port_attr;
- int ret;
-
- ret = ib_query_port(port->dev->device, port->port_num, &port_attr);
- if (ret)
- return ret;
-
- if (port_attr.state != IB_PORT_ACTIVE)
- return -ENODATA;
-
- port->sm_lid = port_attr.sm_lid;
- port->sm_sl = port_attr.sm_sl;
- return 0;
-}
-
-static void process_updates(struct sa_db_port *port)
-{
- struct update_info *update;
- struct ib_sa_attr_list *attr_list;
- int ret;
-
- if (!paths_per_dest || update_port_info(port)) {
- cleanup_port(port);
- goto out;
- }
-
- /* Event registration is an optimization, so ignore failures. */
- if (subscribe_inform_info) {
- if (!port->out_info) {
- ret = reg_out_info(port);
- if (!ret)
- return;
- }
-
- if (!port->in_info) {
- ret = reg_in_info(port);
- if (!ret)
- return;
- }
- } else
- unsubscribe_port(port);
-
- while (!list_empty(&port->update_list)) {
- update = list_entry(port->update_list.next,
- struct update_info, list);
-
- if (update->type == SA_UPDATE_REMOVE) {
- write_lock_irq(&rwlock);
- attr_list = find_attr_list(&port->paths,
- update->gid.raw);
- if (attr_list)
- remove_attr(&port->paths, attr_list);
- write_unlock_irq(&rwlock);
- } else {
- ret = send_query(port, update);
- if (!ret)
- return;
-
- }
- list_del(&update->list);
- kfree(update);
- }
-out:
- port->state = SA_DB_IDLE;
-}
-
-static void refresh_port_db(struct sa_db_port *port)
-{
- if (port->state == SA_DB_DESTROY)
- return;
-
- if (port->state == SA_DB_REFRESH) {
- clean_update_list(port);
- ib_cancel_mad(port->agent, port->msg);
- }
-
- add_update(port, NULL, SA_UPDATE_FULL);
-}
-
-static void refresh_dev_db(struct sa_db_device *dev)
-{
- int i;
-
- for (i = 0; i < dev->port_count; i++)
- refresh_port_db(&dev->port[i]);
-}
-
-static void refresh_db(void)
-{
- struct sa_db_device *dev;
-
- list_for_each_entry(dev, &dev_list, list)
- refresh_dev_db(dev);
-}
-
-static int do_refresh(const char *val, struct kernel_param *kp)
-{
- mutex_lock(&lock);
- refresh_db();
- mutex_unlock(&lock);
- return 0;
-}
-
-static int get_lookup_method(char *buf, struct kernel_param *kp)
-{
- return sprintf(buf,
- "%c %d round robin\n"
- "%c %d random",
- (lookup_method == SA_DB_LOOKUP_LEAST_USED) ? '*' : ' ',
- SA_DB_LOOKUP_LEAST_USED,
- (lookup_method == SA_DB_LOOKUP_RANDOM) ? '*' : ' ',
- SA_DB_LOOKUP_RANDOM);
-}
-
-static int set_lookup_method(const char *val, struct kernel_param *kp)
-{
- unsigned long method;
- int ret = 0;
-
- method = simple_strtoul(val, NULL, 0);
-
- switch (method) {
- case SA_DB_LOOKUP_LEAST_USED:
- case SA_DB_LOOKUP_RANDOM:
- lookup_method = method;
- break;
- default:
- ret = -EINVAL;
- break;
- }
-
- return ret;
-}
-
-static int set_paths_per_dest(const char *val, struct kernel_param *kp)
-{
- int ret;
-
- mutex_lock(&lock);
- ret = param_set_ulong(val, kp);
- if (ret)
- goto out;
-
- if (paths_per_dest > SA_DB_MAX_PATHS_PER_DEST)
- paths_per_dest = SA_DB_MAX_PATHS_PER_DEST;
- refresh_db();
-out:
- mutex_unlock(&lock);
- return ret;
-}
-
-static int set_subscribe_inform_info(const char *val, struct kernel_param *kp)
-{
- int ret;
-
- ret = param_set_bool(val, kp);
- if (ret)
- return ret;
-
- return do_refresh(val, kp);
-}
-
-static void port_work_handler(struct work_struct *work)
-{
- struct sa_db_port *port;
-
- port = container_of(work, typeof(*port), work);
- mutex_lock(&lock);
- refresh_port_db(port);
- mutex_unlock(&lock);
-}
-
-static void handle_event(struct ib_event_handler *event_handler,
- struct ib_event *event)
-{
- struct sa_db_device *dev;
- struct sa_db_port *port;
-
- dev = container_of(event_handler, typeof(*dev), event_handler);
- port = &dev->port[event->element.port_num - dev->start_port];
-
- switch (event->event) {
- case IB_EVENT_PORT_ERR:
- case IB_EVENT_LID_CHANGE:
- case IB_EVENT_SM_CHANGE:
- case IB_EVENT_CLIENT_REREGISTER:
- case IB_EVENT_PKEY_CHANGE:
- case IB_EVENT_PORT_ACTIVE:
- queue_work(sa_wq, &port->work);
- break;
- default:
- break;
- }
-}
-
-static void ib_free_path_iter(struct ib_sa_attr_iter *iter)
-{
- read_unlock_irqrestore(&rwlock, iter->flags);
-}
-
-static int ib_create_path_iter(struct ib_device *device, u8 port_num,
- union ib_gid *dgid, struct ib_sa_attr_iter *iter)
-{
- struct sa_db_device *dev;
- struct sa_db_port *port;
- struct ib_sa_attr_list *list;
-
- dev = ib_get_client_data(device, &sa_db_client);
- if (!dev)
- return -ENODEV;
-
- port = &dev->port[port_num - dev->start_port];
-
- read_lock_irqsave(&rwlock, iter->flags);
- list = find_attr_list(&port->paths, dgid->raw);
- if (!list) {
- ib_free_path_iter(iter);
- return -ENODATA;
- }
-
- iter->iter = &list->iter;
- return 0;
-}
-
-static struct ib_sa_path_rec *ib_get_next_path(struct ib_sa_attr_iter *iter)
-{
- struct ib_path_rec_info *next_path;
-
- iter->iter = iter->iter->next;
- if (iter->iter) {
- next_path = container_of(iter->iter, struct ib_path_rec_info, iter);
- return &next_path->rec;
- } else
- return NULL;
-}
-
-static int cmp_rec(struct ib_sa_path_rec *src,
- struct ib_sa_path_rec *dst, ib_sa_comp_mask comp_mask)
-{
- /* DGID check already done */
- if (comp_mask & IB_SA_PATH_REC_SGID &&
- memcmp(&src->sgid, &dst->sgid, sizeof src->sgid))
- return -EINVAL;
- if (comp_mask & IB_SA_PATH_REC_DLID && src->dlid != dst->dlid)
- return -EINVAL;
- if (comp_mask & IB_SA_PATH_REC_SLID && src->slid != dst->slid)
- return -EINVAL;
- if (comp_mask & IB_SA_PATH_REC_RAW_TRAFFIC &&
- src->raw_traffic != dst->raw_traffic)
- return -EINVAL;
-
- if (comp_mask & IB_SA_PATH_REC_FLOW_LABEL &&
- src->flow_label != dst->flow_label)
- return -EINVAL;
- if (comp_mask & IB_SA_PATH_REC_HOP_LIMIT &&
- src->hop_limit != dst->hop_limit)
- return -EINVAL;
- if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS &&
- src->traffic_class != dst->traffic_class)
- return -EINVAL;
- if (comp_mask & IB_SA_PATH_REC_REVERSIBLE &&
- dst->reversible && !src->reversible)
- return -EINVAL;
- /* Numb path check already done */
- if (comp_mask & IB_SA_PATH_REC_PKEY && src->pkey != dst->pkey)
- return -EINVAL;
-
- if (comp_mask & IB_SA_PATH_REC_SL && src->sl != dst->sl)
- return -EINVAL;
-
- if (ib_sa_check_selector(comp_mask, IB_SA_PATH_REC_MTU_SELECTOR,
- IB_SA_PATH_REC_MTU, dst->mtu_selector,
- src->mtu, dst->mtu))
- return -EINVAL;
- if (ib_sa_check_selector(comp_mask, IB_SA_PATH_REC_RATE_SELECTOR,
- IB_SA_PATH_REC_RATE, dst->rate_selector,
- src->rate, dst->rate))
- return -EINVAL;
- if (ib_sa_check_selector(comp_mask,
- IB_SA_PATH_REC_PACKET_LIFE_TIME_SELECTOR,
- IB_SA_PATH_REC_PACKET_LIFE_TIME,
- dst->packet_life_time_selector,
- src->packet_life_time, dst->packet_life_time))
- return -EINVAL;
-
- return 0;
-}
-
-static struct ib_sa_path_rec *get_random_path(struct ib_sa_attr_iter *iter,
- struct ib_sa_path_rec *req_path,
- ib_sa_comp_mask comp_mask)
-{
- struct ib_sa_path_rec *path, *rand_path = NULL;
- int num, count = 0;
-
- for (path = ib_get_next_path(iter); path;
- path = ib_get_next_path(iter)) {
- if (!cmp_rec(path, req_path, comp_mask)) {
- get_random_bytes(&num, sizeof num);
- if ((num % ++count) == 0)
- rand_path = path;
- }
- }
-
- return rand_path;
-}
-
-static struct ib_sa_path_rec *get_next_path(struct ib_sa_attr_iter *iter,
- struct ib_sa_path_rec *req_path,
- ib_sa_comp_mask comp_mask)
-{
- struct ib_path_rec_info *cur_path, *next_path = NULL;
- struct ib_sa_path_rec *path;
- unsigned long lookups = ~0;
-
- for (path = ib_get_next_path(iter); path;
- path = ib_get_next_path(iter)) {
- if (!cmp_rec(path, req_path, comp_mask)) {
-
- cur_path = container_of(iter->iter, struct ib_path_rec_info,
- iter);
- if (cur_path->lookups < lookups) {
- lookups = cur_path->lookups;
- next_path = cur_path;
- }
- }
- }
-
- if (next_path) {
- next_path->lookups++;
- return &next_path->rec;
- } else
- return NULL;
-}
-
-static void report_path(struct work_struct *work)
-{
- struct sa_path_request *req;
-
- req = container_of(work, struct sa_path_request, work);
- req->callback(0, &req->path_rec, req->context);
- ib_sa_client_put(req->client);
- kfree(req);
-}
-
-/**
- * ib_sa_path_rec_get - Start a Path get query
- * @client:SA client
- * @device:device to send query on
- * @port_num: port number to send query on
- * @rec:Path Record to send in query
- * @comp_mask:component mask to send in query
- * @timeout_ms:time to wait for response
- * @gfp_mask:GFP mask to use for internal allocations
- * @callback:function called when query completes, times out or is
- * canceled
- * @context:opaque user context passed to callback
- * @sa_query:query context, used to cancel query
- *
- * Send a Path Record Get query to the SA to look up a path. The
- * callback function will be called when the query completes (or
- * fails); status is 0 for a successful response, -EINTR if the query
- * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
- * occurred sending the query. The resp parameter of the callback is
- * only valid if status is 0.
- *
- * If the return value of ib_sa_path_rec_get() is negative, it is an
- * error code. Otherwise it is a query ID that can be used to cancel
- * the query.
- */
-int ib_sa_path_rec_get(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- struct ib_sa_path_rec *rec,
- ib_sa_comp_mask comp_mask,
- int timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct ib_sa_path_rec *resp,
- void *context),
- void *context,
- struct ib_sa_query **sa_query)
-{
- struct sa_path_request *req;
- struct ib_sa_attr_iter iter;
- struct ib_sa_path_rec *path_rec;
- int ret;
-
- if (!paths_per_dest)
- goto query_sa;
-
- if (!(comp_mask & IB_SA_PATH_REC_DGID) ||
- !(comp_mask & IB_SA_PATH_REC_NUMB_PATH) || rec->numb_path != 1)
- goto query_sa;
-
- req = kmalloc(sizeof *req, gfp_mask);
- if (!req)
- goto query_sa;
-
- ret = ib_create_path_iter(device, port_num, &rec->dgid, &iter);
- if (ret)
- goto free_req;
-
- if (lookup_method == SA_DB_LOOKUP_RANDOM)
- path_rec = get_random_path(&iter, rec, comp_mask);
- else
- path_rec = get_next_path(&iter, rec, comp_mask);
-
- if (!path_rec)
- goto free_iter;
-
- memcpy(&req->path_rec, path_rec, sizeof *path_rec);
- ib_free_path_iter(&iter);
-
- INIT_WORK(&req->work, report_path);
- req->client = client;
- req->callback = callback;
- req->context = context;
-
- ib_sa_client_get(client);
- queue_work(sa_wq, &req->work);
- *sa_query = ERR_PTR(-EEXIST);
- return 0;
-
-free_iter:
- ib_free_path_iter(&iter);
-free_req:
- kfree(req);
-query_sa:
- return ib_sa_path_rec_query(client, device, port_num, rec, comp_mask,
- timeout_ms, gfp_mask, callback, context,
- sa_query);
-}
-EXPORT_SYMBOL(ib_sa_path_rec_get);
-
-static void recv_handler(struct ib_mad_agent *mad_agent,
- struct ib_mad_recv_wc *mad_recv_wc)
-{
- struct sa_db_port *port;
- struct update_info *update;
- struct ib_mad_send_buf *msg;
- enum sa_update_type type;
-
- msg = (struct ib_mad_send_buf *) (unsigned long) mad_recv_wc->wc->wr_id;
- port = msg->context[0];
- update = msg->context[1];
-
- mutex_lock(&lock);
- if (port->state == SA_DB_DESTROY ||
- update != list_entry(port->update_list.next,
- struct update_info, list)) {
- mutex_unlock(&lock);
- } else {
- type = update->type;
- mutex_unlock(&lock);
- update_path_db(mad_agent->context, mad_recv_wc, type);
- }
-
- ib_free_recv_mad(mad_recv_wc);
-}
-
-static void send_handler(struct ib_mad_agent *agent,
- struct ib_mad_send_wc *mad_send_wc)
-{
- struct ib_mad_send_buf *msg;
- struct sa_db_port *port;
- struct update_info *update;
- int ret;
-
- msg = mad_send_wc->send_buf;
- port = msg->context[0];
- update = msg->context[1];
-
- mutex_lock(&lock);
- if (port->state == SA_DB_DESTROY)
- goto unlock;
-
- if (update == list_entry(port->update_list.next,
- struct update_info, list)) {
-
- if (mad_send_wc->status == IB_WC_RESP_TIMEOUT_ERR &&
- msg->timeout_ms < SA_DB_MAX_RETRY_TIMER) {
-
- msg->timeout_ms <<= 1;
- ret = ib_post_send_mad(msg, NULL);
- if (!ret) {
- mutex_unlock(&lock);
- return;
- }
- }
- list_del(&update->list);
- kfree(update);
- }
- process_updates(port);
-unlock:
- mutex_unlock(&lock);
-
- ib_destroy_ah(msg->ah);
- ib_free_send_mad(msg);
-}
-
-static int init_port(struct sa_db_device *dev, int port_num)
-{
- struct sa_db_port *port;
- int ret;
-
- port = &dev->port[port_num - dev->start_port];
- port->dev = dev;
- port->port_num = port_num;
- INIT_WORK(&port->work, port_work_handler);
- port->paths = RB_ROOT;
- INIT_LIST_HEAD(&port->update_list);
-
- ret = ib_get_cached_gid(dev->device, port_num, 0, &port->gid);
- if (ret)
- return ret;
-
- port->agent = ib_register_mad_agent(dev->device, port_num, IB_QPT_GSI,
- NULL, IB_MGMT_RMPP_VERSION,
- send_handler, recv_handler, port);
- if (IS_ERR(port->agent))
- ret = PTR_ERR(port->agent);
-
- return ret;
-}
-
-static void destroy_port(struct sa_db_port *port)
-{
- mutex_lock(&lock);
- port->state = SA_DB_DESTROY;
- mutex_unlock(&lock);
-
- ib_unregister_mad_agent(port->agent);
- cleanup_port(port);
- flush_workqueue(sa_wq);
-}
-
-static void sa_db_add_dev(struct ib_device *device)
-{
- struct sa_db_device *dev;
- struct sa_db_port *port;
- int s, e, i, ret;
-
- if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
- return;
-
- if (device->node_type == RDMA_NODE_IB_SWITCH) {
- s = e = 0;
- } else {
- s = 1;
- e = device->phys_port_cnt;
- }
-
- dev = kzalloc(sizeof *dev + (e - s + 1) * sizeof *port, GFP_KERNEL);
- if (!dev)
- return;
-
- dev->start_port = s;
- dev->port_count = e - s + 1;
- dev->device = device;
- for (i = 0; i < dev->port_count; i++) {
- ret = init_port(dev, s + i);
- if (ret)
- goto err;
- }
-
- ib_set_client_data(device, &sa_db_client, dev);
-
- INIT_IB_EVENT_HANDLER(&dev->event_handler, device, handle_event);
-
- mutex_lock(&lock);
- list_add_tail(&dev->list, &dev_list);
- refresh_dev_db(dev);
- mutex_unlock(&lock);
-
- ib_register_event_handler(&dev->event_handler);
- return;
-err:
- while (i--)
- destroy_port(&dev->port[i]);
- kfree(dev);
-}
-
-static void sa_db_remove_dev(struct ib_device *device)
-{
- struct sa_db_device *dev;
- int i;
-
- dev = ib_get_client_data(device, &sa_db_client);
- if (!dev)
- return;
-
- ib_unregister_event_handler(&dev->event_handler);
- flush_workqueue(sa_wq);
-
- for (i = 0; i < dev->port_count; i++)
- destroy_port(&dev->port[i]);
-
- mutex_lock(&lock);
- list_del(&dev->list);
- mutex_unlock(&lock);
-
- kfree(dev);
-}
-
-int sa_db_init(void)
-{
- int ret;
-
- rwlock_init(&rwlock);
- sa_wq = create_singlethread_workqueue("local_sa");
- if (!sa_wq)
- return -ENOMEM;
-
- ib_sa_register_client(&sa_client);
- ret = ib_register_client(&sa_db_client);
- if (ret)
- goto err;
-
- return 0;
-
-err:
- ib_sa_unregister_client(&sa_client);
- destroy_workqueue(sa_wq);
- return ret;
-}
-
-void sa_db_cleanup(void)
-{
- ib_unregister_client(&sa_db_client);
- ib_sa_unregister_client(&sa_client);
- destroy_workqueue(sa_wq);
-}
diff --git a/sys/ofed/drivers/infiniband/core/mad.c b/sys/ofed/drivers/infiniband/core/mad.c
index 64e660c..11b3ba3 100644
--- a/sys/ofed/drivers/infiniband/core/mad.c
+++ b/sys/ofed/drivers/infiniband/core/mad.c
@@ -34,6 +34,9 @@
*
*/
#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/string.h>
#include <rdma/ib_cache.h>
#include "mad_priv.h"
@@ -46,8 +49,8 @@ MODULE_DESCRIPTION("kernel IB MAD API");
MODULE_AUTHOR("Hal Rosenstock");
MODULE_AUTHOR("Sean Hefty");
-int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
-int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
+static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
+static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
module_param_named(send_queue_size, mad_sendq_size, int, 0444);
MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests");
@@ -59,9 +62,26 @@ static struct kmem_cache *ib_mad_cache;
static struct list_head ib_mad_port_list;
static u32 ib_mad_client_id = 0;
-/* Port list lock */
-static spinlock_t ib_mad_port_list_lock;
+/*
+ * Timeout FIFO (tf) param
+ */
+enum {
+ /* min time between 2 consecutive activations of tf workqueue */
+ MIN_BETWEEN_ACTIVATIONS_MS = 5
+};
+
+/*
+ * SA congestion control params
+ */
+enum {
+ MAX_OUTSTANDING_SA_MADS = 10,
+ MIN_TIME_FOR_SA_MAD_SEND_MS = 20,
+ MAX_SA_MADS = 10000
+};
+
+/* Port list lock */
+static DEFINE_SPINLOCK(ib_mad_port_list_lock);
/* Forward declarations */
static int method_in_use(struct ib_mad_mgmt_method_table **method,
@@ -80,6 +100,509 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
u8 mgmt_class);
static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
struct ib_mad_agent_private *agent_priv);
+static int send_sa_cc_mad(struct ib_mad_send_wr_private *mad_send_wr,
+ u32 timeout_ms, u32 retries_left);
+
+
+/*
+ * Timeout FIFO functions - implements FIFO with timeout mechanism
+ */
+
+static void activate_timeout_handler_task(unsigned long data)
+{
+ struct to_fifo *tf;
+
+ tf = (struct to_fifo *)data;
+ del_timer(&tf->timer);
+ queue_work(tf->workq, &tf->work);
+}
+
+static unsigned long adjusted_time(unsigned long last, unsigned long next)
+{
+ unsigned long min_next;
+
+ min_next = last + msecs_to_jiffies(MIN_BETWEEN_ACTIVATIONS_MS);
+ if (time_after(min_next, next))
+ return min_next;
+
+ return next;
+}
+
+static void notify_failure(struct ib_mad_send_wr_private *mad_send_wr,
+ enum ib_wc_status status)
+{
+ struct ib_mad_send_wc mad_send_wc;
+ struct ib_mad_agent_private *mad_agent_priv;
+
+ mad_send_wc.status = status;
+ mad_send_wc.vendor_err = 0;
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ mad_agent_priv = mad_send_wr->mad_agent_priv;
+ mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc);
+}
+
+static inline struct sa_cc_data *
+get_cc_obj(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ return &mad_send_wr->mad_agent_priv->qp_info->port_priv->sa_cc;
+}
+
+static inline struct ib_mad_send_wr_private *tfe_to_mad(struct tf_entry *tfe)
+{
+ return container_of(tfe, struct ib_mad_send_wr_private, tf_list);
+}
+
+static void timeout_handler_task(struct work_struct *work)
+{
+ struct tf_entry *tmp1, *tmp2;
+ struct list_head *list_item, exp_lst;
+ unsigned long flags, curr_time;
+ int lst_empty;
+ struct to_fifo *tf;
+
+ tf = container_of(work, struct to_fifo, work);
+ do {
+ INIT_LIST_HEAD(&exp_lst);
+
+ spin_lock_irqsave(&tf->lists_lock, flags);
+ curr_time = jiffies;
+ list_for_each(list_item, &tf->to_head) {
+ tmp1 = list_entry(list_item, struct tf_entry, to_list);
+ if (time_before(curr_time, tmp1->exp_time))
+ break;
+ list_del(&tmp1->fifo_list);
+ tf->num_items--;
+ }
+
+ /* cut list up to and including list_item->prev */
+ list_cut_position(&exp_lst, &tf->to_head, list_item->prev);
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+
+ lst_empty = list_empty(&exp_lst);
+ list_for_each_entry_safe(tmp1, tmp2, &exp_lst, to_list) {
+ list_del(&tmp1->to_list);
+ if (tmp1->canceled) {
+ tmp1->canceled = 0;
+ notify_failure(tfe_to_mad(tmp1), IB_WC_WR_FLUSH_ERR);
+ } else {
+ notify_failure(tfe_to_mad(tmp1), IB_WC_RESP_TIMEOUT_ERR);
+ }
+ }
+ } while (!lst_empty);
+
+ spin_lock_irqsave(&tf->lists_lock, flags);
+ if (!list_empty(&tf->to_head)) {
+ tmp1 = list_entry(tf->to_head.next, struct tf_entry, to_list);
+ mod_timer(&tf->timer, adjusted_time(curr_time, tmp1->exp_time));
+ }
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+}
+
+/**
+ * tf_create - creates new timeout-fifo object
+ * @fifo_size: Maximum fifo size
+ *
+ * Allocate and initialize new timeout-fifo object
+ */
+static struct to_fifo *tf_create(u32 fifo_size)
+{
+ struct to_fifo *tf;
+
+ tf = kzalloc(sizeof(*tf), GFP_KERNEL);
+ if (tf) {
+ tf->workq = create_singlethread_workqueue("to_fifo");
+ if (!tf->workq) {
+ kfree(tf);
+ return NULL;
+ }
+ spin_lock_init(&tf->lists_lock);
+ INIT_LIST_HEAD(&tf->to_head);
+ INIT_LIST_HEAD(&tf->fifo_head);
+ init_timer(&tf->timer);
+ INIT_WORK(&tf->work, timeout_handler_task);
+ tf->timer.data = (unsigned long) tf;
+ tf->timer.function = activate_timeout_handler_task;
+ tf->timer.expires = jiffies;
+ tf->fifo_size = fifo_size;
+ tf->stop_enqueue = 0;
+ tf->num_items = 0;
+ }
+
+ return tf;
+}
+
+/**
+ * tf_enqueue - enqueue item to timeout-fifo object
+ * @tf:timeout-fifo object
+ * @item: item to enqueue.
+ * @timeout_ms: item expiration time in ms.
+ *
+ * Enqueue item to fifo and modify expiration timer when required.
+ *
+ * Returns 0 on success and negative on failure.
+ */
+static int tf_enqueue(struct to_fifo *tf, struct tf_entry *item, u32 timeout_ms)
+{
+ struct tf_entry *tmp;
+ struct list_head *list_item;
+ unsigned long flags;
+
+ item->exp_time = jiffies + msecs_to_jiffies(timeout_ms);
+
+ spin_lock_irqsave(&tf->lists_lock, flags);
+ if (tf->num_items >= tf->fifo_size || tf->stop_enqueue) {
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+ return -EBUSY;
+ }
+
+ /* Insert item to timeout list */
+ list_for_each_prev(list_item, &tf->to_head) {
+ tmp = list_entry(list_item, struct tf_entry, to_list);
+ if (time_after(item->exp_time, tmp->exp_time))
+ break;
+ }
+
+ list_add(&item->to_list, list_item);
+
+ /* Insert item to fifo list */
+ list_add_tail(&item->fifo_list, &tf->fifo_head);
+
+ tf->num_items++;
+
+ /* modify expiration timer if required */
+ if (list_item == &tf->to_head)
+ mod_timer(&tf->timer, item->exp_time);
+
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+
+ return 0;
+}
+
+/**
+ * tf_dequeue - dequeue item from timeout-fifo object
+ * @tf:timeout-fifo object
+ * @time_left_ms: returns the time left for expiration in ms.
+ *
+ * Dequeue item from fifo and modify expiration timer when required.
+ *
+ * Returns pointer to tf_entry on success and NULL on failure.
+ */
+static struct tf_entry *tf_dequeue(struct to_fifo *tf, u32 *time_left_ms)
+{
+ unsigned long flags;
+ unsigned long time_left;
+ struct tf_entry *tmp, *tmp1;
+
+ spin_lock_irqsave(&tf->lists_lock, flags);
+ if (list_empty(&tf->fifo_head)) {
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+ return NULL;
+ }
+
+ list_for_each_entry(tmp, &tf->fifo_head, fifo_list) {
+ if (!tmp->canceled)
+ break;
+ }
+
+ if (tmp->canceled) {
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+ return NULL;
+ }
+
+ /* modify timer in case enqueued item is the next to expire */
+ if (tf->to_head.next == &tmp->to_list) {
+ if (list_is_last(&tmp->to_list, &tf->to_head)) {
+ del_timer(&tf->timer);
+ } else {
+ tmp1 = list_entry(tmp->to_list.next, struct tf_entry, to_list);
+ mod_timer(&tf->timer, tmp1->exp_time);
+ }
+ }
+ list_del(&tmp->fifo_list);
+ list_del(&tmp->to_list);
+ tf->num_items--;
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+
+ time_left = tmp->exp_time - jiffies;
+ if ((long) time_left <= 0)
+ time_left = 0;
+ *time_left_ms = jiffies_to_msecs(time_left);
+
+ return tmp;
+}
+
+static void tf_stop_enqueue(struct to_fifo *tf)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&tf->lists_lock, flags);
+ tf->stop_enqueue = 1;
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+}
+
+/**
+ * tf_free - free empty timeout-fifo object
+ * @tf:timeout-fifo object
+ *
+ */
+static void tf_free(struct to_fifo *tf)
+{
+ del_timer_sync(&tf->timer);
+ flush_workqueue(tf->workq);
+ destroy_workqueue(tf->workq);
+
+ kfree(tf);
+}
+
+/**
+ * tf_free_agent - free MADs related to specific MAD agent from timeout-fifo
+ * @tf:timeout-fifo object
+ * @mad_agent_priv: MAD agent.
+ *
+ */
+static void tf_free_agent(struct to_fifo *tf, struct ib_mad_agent_private *mad_agent_priv)
+{
+ unsigned long flags;
+ struct tf_entry *tmp, *tmp1;
+ struct list_head tmp_head;
+
+ INIT_LIST_HEAD(&tmp_head);
+ spin_lock_irqsave(&tf->lists_lock, flags);
+ list_for_each_entry_safe(tmp, tmp1, &tf->fifo_head, fifo_list) {
+ if (tfe_to_mad(tmp)->mad_agent_priv == mad_agent_priv) {
+ list_del(&tmp->to_list);
+ list_move(&tmp->fifo_list, &tmp_head);
+ tf->num_items--;
+ }
+ }
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+
+ list_for_each_entry_safe(tmp, tmp1, &tmp_head, fifo_list) {
+ list_del(&tmp->fifo_list);
+ notify_failure(tfe_to_mad(tmp), IB_WC_WR_FLUSH_ERR);
+ }
+}
+
+/**
+ * tf_modify_item - to modify expiration time for specific item
+ * @tf:timeout-fifo object
+ * @mad_agent_priv: MAD agent.
+ * @send_buf: the MAD to modify in queue
+ * @timeout_ms: new timeout to set.
+ *
+ * Returns 0 if item found on list and -ENXIO if not.
+ *
+ * Note: The send_buf may point on MAD that is already released.
+ * Therefore we can't use this struct before finding it in the list
+ */
+static int tf_modify_item(struct to_fifo *tf,
+ struct ib_mad_agent_private *mad_agent_priv,
+ struct ib_mad_send_buf *send_buf, u32 timeout_ms)
+{
+ struct tf_entry *tmp, *item;
+ struct list_head *list_item;
+ unsigned long flags;
+ int found = 0;
+
+ spin_lock_irqsave(&tf->lists_lock, flags);
+ list_for_each_entry(item, &tf->fifo_head, fifo_list) {
+ if (tfe_to_mad(item)->mad_agent_priv == mad_agent_priv &&
+ &tfe_to_mad(item)->send_buf == send_buf) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+ return -ENXIO;
+ }
+
+ item->exp_time = jiffies + msecs_to_jiffies(timeout_ms);
+
+ if (timeout_ms) {
+ list_del(&item->to_list);
+ list_for_each_prev(list_item, &tf->to_head) {
+ tmp = list_entry(list_item, struct tf_entry, to_list);
+ if (time_after(item->exp_time, tmp->exp_time))
+ break;
+ }
+ list_add(&item->to_list, list_item);
+
+ /* modify expiration timer if required */
+ if (list_item == &tf->to_head)
+ mod_timer(&tf->timer, item->exp_time);
+ } else {
+ /*
+ * when item canceled (timeout_ms == 0) move item to
+ * head of timeout list and to the tail of fifo list
+ */
+ item->canceled = 1;
+ list_move(&item->to_list, &tf->to_head);
+ list_move_tail(&item->fifo_list, &tf->fifo_head);
+ mod_timer(&tf->timer, item->exp_time);
+ }
+ spin_unlock_irqrestore(&tf->lists_lock, flags);
+
+ return 0;
+}
+
+/*
+ * SA congestion control functions
+ */
+
+/*
+ * Defines which MAD is under congestion control.
+ */
+static int is_sa_cc_mad(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ struct ib_mad_hdr *mad;
+
+ mad = (struct ib_mad_hdr *)mad_send_wr->send_buf.mad;
+
+ return ((mad_send_wr->send_buf.timeout_ms) &&
+ (mad->mgmt_class == IB_MGMT_CLASS_SUBN_ADM) &&
+ ((mad->method == IB_MGMT_METHOD_GET) ||
+ (mad->method == IB_MGMT_METHOD_SET)));
+}
+
+/*
+ * Notify that SA congestion controlled MAD is done.
+ * to allow dequeuing SA MAD from congestion control queue.
+ */
+static void sa_cc_mad_done(struct sa_cc_data *cc_obj)
+{
+ unsigned long flags;
+ struct tf_entry *tfe;
+ struct ib_mad_send_wr_private *mad_send_wr;
+ u32 time_left_ms, timeout_ms, retries;
+ int ret;
+
+ do {
+ spin_lock_irqsave(&cc_obj->lock, flags);
+ tfe = tf_dequeue(cc_obj->tf, &time_left_ms);
+ if (!tfe) {
+ if (cc_obj->outstanding > 0)
+ cc_obj->outstanding--;
+ spin_unlock_irqrestore(&cc_obj->lock, flags);
+ break;
+ }
+ spin_unlock_irqrestore(&cc_obj->lock, flags);
+ mad_send_wr = tfe_to_mad(tfe);
+ time_left_ms += MIN_TIME_FOR_SA_MAD_SEND_MS;
+ if (time_left_ms > mad_send_wr->send_buf.timeout_ms) {
+ retries = time_left_ms / mad_send_wr->send_buf.timeout_ms - 1;
+ timeout_ms = mad_send_wr->send_buf.timeout_ms;
+ } else {
+ retries = 0;
+ timeout_ms = time_left_ms;
+ }
+ ret = send_sa_cc_mad(mad_send_wr, timeout_ms, retries);
+ if (ret) {
+ if (ret == -ENOMEM)
+ notify_failure(mad_send_wr, IB_WC_GENERAL_ERR);
+ else
+ notify_failure(mad_send_wr, IB_WC_LOC_QP_OP_ERR);
+ }
+ } while (ret);
+}
+
+/*
+ * Send SA MAD under congestion control.
+ */
+static int sa_cc_mad_send(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ unsigned long flags;
+ int ret;
+ struct sa_cc_data *cc_obj;
+
+ cc_obj = get_cc_obj(mad_send_wr);
+ spin_lock_irqsave(&cc_obj->lock, flags);
+ if (cc_obj->outstanding < MAX_OUTSTANDING_SA_MADS) {
+ cc_obj->outstanding++;
+ spin_unlock_irqrestore(&cc_obj->lock, flags);
+ ret = send_sa_cc_mad(mad_send_wr, mad_send_wr->send_buf.timeout_ms,
+ mad_send_wr->retries_left);
+ if (ret)
+ sa_cc_mad_done(cc_obj);
+
+ } else {
+ int qtime = (mad_send_wr->send_buf.timeout_ms *
+ (mad_send_wr->retries_left + 1))
+ - MIN_TIME_FOR_SA_MAD_SEND_MS;
+
+ if (qtime < 0)
+ qtime = 0;
+ ret = tf_enqueue(cc_obj->tf, &mad_send_wr->tf_list, (u32)qtime);
+
+ spin_unlock_irqrestore(&cc_obj->lock, flags);
+ }
+
+ return ret;
+}
+
+/*
+ * Initialize SA congestion control.
+ */
+static int sa_cc_init(struct sa_cc_data *cc_obj)
+{
+ spin_lock_init(&cc_obj->lock);
+ cc_obj->outstanding = 0;
+ cc_obj->tf = tf_create(MAX_SA_MADS);
+ if (!cc_obj->tf)
+ return -ENOMEM;
+ return 0;
+}
+
+/*
+ * Cancel SA MADs from congestion control queue.
+ */
+static void cancel_sa_cc_mads(struct ib_mad_agent_private *mad_agent_priv)
+{
+ tf_free_agent(mad_agent_priv->qp_info->port_priv->sa_cc.tf,
+ mad_agent_priv);
+}
+
+/*
+ * Modify timeout of SA MAD on congestion control queue.
+ */
+static int modify_sa_cc_mad(struct ib_mad_agent_private *mad_agent_priv,
+ struct ib_mad_send_buf *send_buf, u32 timeout_ms)
+{
+ int ret;
+ int qtime = 0;
+
+ if (timeout_ms > MIN_TIME_FOR_SA_MAD_SEND_MS)
+ qtime = timeout_ms - MIN_TIME_FOR_SA_MAD_SEND_MS;
+
+ ret = tf_modify_item(mad_agent_priv->qp_info->port_priv->sa_cc.tf,
+ mad_agent_priv, send_buf, (u32)qtime);
+ return ret;
+}
+
+static void sa_cc_destroy(struct sa_cc_data *cc_obj)
+{
+ struct ib_mad_send_wr_private *mad_send_wr;
+ struct tf_entry *tfe;
+ struct ib_mad_send_wc mad_send_wc;
+ struct ib_mad_agent_private *mad_agent_priv;
+ u32 time_left_ms;
+
+ mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
+ mad_send_wc.vendor_err = 0;
+
+ tf_stop_enqueue(cc_obj->tf);
+ tfe = tf_dequeue(cc_obj->tf, &time_left_ms);
+ while (tfe) {
+ mad_send_wr = tfe_to_mad(tfe);
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ mad_agent_priv = mad_send_wr->mad_agent_priv;
+ mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
+ &mad_send_wc);
+ tfe = tf_dequeue(cc_obj->tf, &time_left_ms);
+ }
+ tf_free(cc_obj->tf);
+}
/*
* Returns a ib_mad_port_private structure or NULL for a device/port
@@ -184,15 +707,6 @@ int ib_response_mad(struct ib_mad *mad)
}
EXPORT_SYMBOL(ib_response_mad);
-static void timeout_callback(unsigned long data)
-{
- struct ib_mad_agent_private *mad_agent_priv =
- (struct ib_mad_agent_private *) data;
-
- queue_work(mad_agent_priv->qp_info->port_priv->wq,
- &mad_agent_priv->timeout_work);
-}
-
/*
* ib_register_mad_agent - Register to send/receive MADs
*/
@@ -285,6 +799,13 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
goto error1;
}
+ /* Verify the QP requested is supported. For example, Ethernet devices
+ * will not have QP0 */
+ if (!port_priv->qp_info[qpn].qp) {
+ ret = ERR_PTR(-EPROTONOSUPPORT);
+ goto error1;
+ }
+
/* Allocate structures */
mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL);
if (!mad_agent_priv) {
@@ -300,13 +821,11 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
}
if (mad_reg_req) {
- reg_req = kmalloc(sizeof *reg_req, GFP_KERNEL);
+ reg_req = kmemdup(mad_reg_req, sizeof *reg_req, GFP_KERNEL);
if (!reg_req) {
ret = ERR_PTR(-ENOMEM);
goto error3;
}
- /* Make a copy of the MAD registration request */
- memcpy(reg_req, mad_reg_req, sizeof *reg_req);
}
/* Now, fill in the various structures */
@@ -324,9 +843,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
INIT_LIST_HEAD(&mad_agent_priv->wait_list);
INIT_LIST_HEAD(&mad_agent_priv->done_list);
INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
- INIT_WORK(&mad_agent_priv->timeout_work, timeout_sends);
- setup_timer(&mad_agent_priv->timeout_timer, timeout_callback,
- (unsigned long) mad_agent_priv);
+ INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends);
INIT_LIST_HEAD(&mad_agent_priv->local_list);
INIT_WORK(&mad_agent_priv->local_work, local_completions);
atomic_set(&mad_agent_priv->refcount, 1);
@@ -533,8 +1050,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
*/
cancel_mads(mad_agent_priv);
port_priv = mad_agent_priv->qp_info->port_priv;
- del_timer_sync(&mad_agent_priv->timeout_timer);
- cancel_work_sync(&mad_agent_priv->timeout_work);
+ cancel_delayed_work(&mad_agent_priv->timed_work);
spin_lock_irqsave(&port_priv->reg_lock, flags);
remove_mad_reg_req(mad_agent_priv);
@@ -577,6 +1093,7 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_snoop_private *mad_snoop_priv;
+ if (!IS_ERR(mad_agent)) {
/* If the TID is zero, the agent can only snoop. */
if (mad_agent->hi_tid) {
mad_agent_priv = container_of(mad_agent,
@@ -589,6 +1106,8 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
agent);
unregister_mad_snoop(mad_snoop_priv);
}
+ }
+
return 0;
}
EXPORT_SYMBOL(ib_unregister_mad_agent);
@@ -695,7 +1214,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
struct ib_wc mad_wc;
struct ib_send_wr *send_wr = &mad_send_wr->send_wr;
- if (device->node_type == RDMA_NODE_IB_SWITCH)
+ if (device->node_type == RDMA_NODE_IB_SWITCH &&
+ smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
port_num = send_wr->wr.ud.port_num;
else
port_num = mad_agent_priv->agent.port_num;
@@ -1028,12 +1548,20 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
mad_send_wr->send_buf.mad,
sge[0].length,
DMA_TO_DEVICE);
- mad_send_wr->header_mapping = sge[0].addr;
+ if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr)))
+ return -ENOMEM;
sge[1].addr = ib_dma_map_single(mad_agent->device,
ib_get_payload(mad_send_wr),
sge[1].length,
DMA_TO_DEVICE);
+
+ if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) {
+ ret = -ENOMEM;
+ goto dma1_err;
+ }
+
+ mad_send_wr->header_mapping = sge[0].addr;
mad_send_wr->payload_mapping = sge[1].addr;
spin_lock_irqsave(&qp_info->send_queue.lock, flags);
@@ -1051,14 +1579,51 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
list_add_tail(&mad_send_wr->mad_list.list, list);
}
spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
- if (ret) {
+
+ if (!ret)
+ return 0;
+
ib_dma_unmap_single(mad_agent->device,
mad_send_wr->header_mapping,
- sge[0].length, DMA_TO_DEVICE);
+ sge[1].length, DMA_TO_DEVICE);
+dma1_err:
ib_dma_unmap_single(mad_agent->device,
mad_send_wr->payload_mapping,
- sge[1].length, DMA_TO_DEVICE);
+ sge[0].length, DMA_TO_DEVICE);
+ return ret;
+}
+
+/*
+ * Send SA MAD that passed congestion control
+ */
+static int send_sa_cc_mad(struct ib_mad_send_wr_private *mad_send_wr,
+ u32 timeout_ms, u32 retries_left)
+{
+ int ret;
+ unsigned long flags;
+ struct ib_mad_agent_private *mad_agent_priv;
+
+ mad_agent_priv = mad_send_wr->mad_agent_priv;
+ mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
+ mad_send_wr->retries_left = retries_left;
+ mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
+
+ /* Reference MAD agent until send completes */
+ atomic_inc(&mad_agent_priv->refcount);
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ list_add_tail(&mad_send_wr->agent_list,
+ &mad_agent_priv->send_list);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+
+ ret = ib_send_mad(mad_send_wr);
+ if (ret < 0) {
+ /* Fail send request */
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ list_del(&mad_send_wr->agent_list);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ atomic_dec(&mad_agent_priv->refcount);
}
+
return ret;
}
@@ -1125,6 +1690,12 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
mad_send_wr->status = IB_WC_SUCCESS;
+ if (is_sa_cc_mad(mad_send_wr)) {
+ mad_send_wr->is_sa_cc_mad = 1;
+ ret = sa_cc_mad_send(mad_send_wr);
+ if (ret < 0)
+ goto error;
+ } else {
/* Reference MAD agent until send completes */
atomic_inc(&mad_agent_priv->refcount);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
@@ -1147,6 +1718,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
goto error;
}
}
+ }
return 0;
error:
if (bad_send_buf)
@@ -1206,10 +1778,7 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method,
{
int i;
- for (i = find_first_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS);
- i < IB_MGMT_MAX_METHODS;
- i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
- 1+i)) {
+ for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) {
if ((*method)->agent[i]) {
printk(KERN_ERR PFX "Method %d already in use\n", i);
return -EINVAL;
@@ -1343,13 +1912,9 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
goto error3;
/* Finally, add in methods being registered */
- for (i = find_first_bit(mad_reg_req->method_mask,
- IB_MGMT_MAX_METHODS);
- i < IB_MGMT_MAX_METHODS;
- i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
- 1+i)) {
+ for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
(*method)->agent[i] = agent_priv;
- }
+
return 0;
error3:
@@ -1442,13 +2007,9 @@ check_in_use:
goto error4;
/* Finally, add in methods being registered */
- for (i = find_first_bit(mad_reg_req->method_mask,
- IB_MGMT_MAX_METHODS);
- i < IB_MGMT_MAX_METHODS;
- i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
- 1+i)) {
+ for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
(*method)->agent[i] = agent_priv;
- }
+
return 0;
error4:
@@ -1614,6 +2175,9 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
mad->mad_hdr.class_version].class;
if (!class)
goto out;
+ if (convert_mgmt_class(mad->mad_hdr.mgmt_class) >=
+ IB_MGMT_MAX_METHODS)
+ goto out;
method = class->method_table[convert_mgmt_class(
mad->mad_hdr.mgmt_class)];
if (method)
@@ -1856,6 +2420,26 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
}
}
+static bool generate_unmatched_resp(struct ib_mad_private *recv,
+ struct ib_mad_private *response)
+{
+ if (recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_GET ||
+ recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_SET) {
+ memcpy(response, recv, sizeof *response);
+ response->header.recv_wc.wc = &response->header.wc;
+ response->header.recv_wc.recv_buf.mad = &response->mad.mad;
+ response->header.recv_wc.recv_buf.grh = &response->grh;
+ response->mad.mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
+ response->mad.mad.mad_hdr.status =
+ cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB);
+ if (recv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ response->mad.mad.mad_hdr.status |= IB_SMP_DIRECTION;
+
+ return true;
+ } else {
+ return false;
+ }
+}
static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
struct ib_wc *wc)
{
@@ -1865,6 +2449,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
struct ib_mad_list_head *mad_list;
struct ib_mad_agent_private *mad_agent;
int port_num;
+ int ret = IB_MAD_RESULT_SUCCESS;
mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
qp_info = mad_list->mad_queue->qp_info;
@@ -1948,8 +2533,6 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
local:
/* Give driver "right of first refusal" on incoming MAD */
if (port_priv->device->process_mad) {
- int ret;
-
ret = port_priv->device->process_mad(port_priv->device, 0,
port_priv->port_num,
wc, &recv->grh,
@@ -1977,6 +2560,10 @@ local:
* or via recv_handler in ib_mad_complete_recv()
*/
recv = NULL;
+ } else if ((ret & IB_MAD_RESULT_SUCCESS) &&
+ generate_unmatched_resp(recv, response)) {
+ agent_send_response(&response->mad.mad, &recv->grh, wc,
+ port_priv->device, port_num, qp_info->qp->qp_num);
}
out:
@@ -1992,9 +2579,10 @@ out:
static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
{
struct ib_mad_send_wr_private *mad_send_wr;
+ unsigned long delay;
if (list_empty(&mad_agent_priv->wait_list)) {
- del_timer(&mad_agent_priv->timeout_timer);
+ cancel_delayed_work(&mad_agent_priv->timed_work);
} else {
mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
struct ib_mad_send_wr_private,
@@ -2003,8 +2591,11 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
if (time_after(mad_agent_priv->timeout,
mad_send_wr->timeout)) {
mad_agent_priv->timeout = mad_send_wr->timeout;
- mod_timer(&mad_agent_priv->timeout_timer,
- mad_send_wr->timeout);
+ delay = mad_send_wr->timeout - jiffies;
+ if ((long)delay <= 0)
+ delay = 1;
+ mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
+ &mad_agent_priv->timed_work, delay);
}
}
}
@@ -2031,14 +2622,15 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
temp_mad_send_wr->timeout))
break;
}
- } else
+ }
+ else
list_item = &mad_agent_priv->wait_list;
list_add(&mad_send_wr->agent_list, list_item);
/* Reschedule a work item if we have a shorter timeout */
if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list)
- mod_timer(&mad_agent_priv->timeout_timer,
- mad_send_wr->timeout);
+ mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
+ &mad_agent_priv->timed_work, delay);
}
void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
@@ -2090,9 +2682,12 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
mad_send_wc->status = mad_send_wr->status;
if (ret == IB_RMPP_RESULT_INTERNAL)
ib_rmpp_send_handler(mad_send_wc);
- else
+ else {
+ if (mad_send_wr->is_sa_cc_mad)
+ sa_cc_mad_done(get_cc_obj(mad_send_wr));
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
mad_send_wc);
+ }
/* Release reference on agent taken when sending */
deref_mad_agent(mad_agent_priv);
@@ -2272,6 +2867,7 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
INIT_LIST_HEAD(&cancel_list);
+ cancel_sa_cc_mads(mad_agent_priv);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
&mad_agent_priv->send_list, agent_list) {
@@ -2293,6 +2889,8 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
&cancel_list, agent_list) {
mad_send_wc.send_buf = &mad_send_wr->send_buf;
list_del(&mad_send_wr->agent_list);
+ if (mad_send_wr->is_sa_cc_mad)
+ sa_cc_mad_done(get_cc_obj(mad_send_wr));
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
atomic_dec(&mad_agent_priv->refcount);
@@ -2332,7 +2930,13 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent,
agent);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
- if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
+ if (!mad_send_wr) {
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ if (modify_sa_cc_mad(mad_agent_priv, send_buf, timeout_ms))
+ return -EINVAL;
+ return 0;
+ }
+ if (mad_send_wr->status != IB_WC_SUCCESS) {
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
return -EINVAL;
}
@@ -2482,10 +3086,10 @@ static void timeout_sends(struct work_struct *work)
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wr_private *mad_send_wr;
struct ib_mad_send_wc mad_send_wc;
- unsigned long flags;
+ unsigned long flags, delay;
mad_agent_priv = container_of(work, struct ib_mad_agent_private,
- timeout_work);
+ timed_work.work);
mad_send_wc.vendor_err = 0;
spin_lock_irqsave(&mad_agent_priv->lock, flags);
@@ -2495,8 +3099,12 @@ static void timeout_sends(struct work_struct *work)
agent_list);
if (time_after(mad_send_wr->timeout, jiffies)) {
- mod_timer(&mad_agent_priv->timeout_timer,
- mad_send_wr->timeout);
+ delay = mad_send_wr->timeout - jiffies;
+ if ((long)delay <= 0)
+ delay = 1;
+ queue_delayed_work(mad_agent_priv->qp_info->
+ port_priv->wq,
+ &mad_agent_priv->timed_work, delay);
break;
}
@@ -2512,6 +3120,8 @@ static void timeout_sends(struct work_struct *work)
else
mad_send_wc.status = mad_send_wr->status;
mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ if (mad_send_wr->is_sa_cc_mad)
+ sa_cc_mad_done(get_cc_obj(mad_send_wr));
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
@@ -2572,6 +3182,14 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
sizeof *mad_priv -
sizeof mad_priv->header,
DMA_FROM_DEVICE);
+ if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
+ sg_list.addr))) {
+ ret = -ENOMEM;
+ kmem_cache_free(ib_mad_cache, mad_priv);
+ printk(KERN_ERR PFX "ib_dma_map_single failed\n");
+ break;
+ }
+
mad_priv->header.mapping = sg_list.addr;
recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
mad_priv->header.mad_list.mad_queue = recv_queue;
@@ -2645,6 +3263,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
int ret, i;
struct ib_qp_attr *attr;
struct ib_qp *qp;
+ u16 pkey_index = 0;
attr = kmalloc(sizeof *attr, GFP_KERNEL);
if (!attr) {
@@ -2652,6 +3271,11 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
return -ENOMEM;
}
+ ret = ib_find_pkey(port_priv->device, port_priv->port_num,
+ 0xFFFF, &pkey_index);
+ if (ret)
+ pkey_index = 0;
+
for (i = 0; i < IB_MAD_QPS_CORE; i++) {
qp = port_priv->qp_info[i].qp;
if (!qp)
@@ -2662,7 +3286,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
* one is needed for the Reset to Init transition
*/
attr->qp_state = IB_QPS_INIT;
- attr->pkey_index = 0;
+ attr->pkey_index = pkey_index;
attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY;
ret = ib_modify_qp(qp, attr, IB_QP_STATE |
IB_QP_PKEY_INDEX | IB_QP_QKEY);
@@ -2858,6 +3482,10 @@ static int ib_mad_port_open(struct ib_device *device,
}
INIT_WORK(&port_priv->work, ib_mad_completion_handler);
+ if (sa_cc_init(&port_priv->sa_cc))
+ goto error9;
+
+
spin_lock_irqsave(&ib_mad_port_list_lock, flags);
list_add_tail(&port_priv->port_list, &ib_mad_port_list);
spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
@@ -2865,17 +3493,19 @@ static int ib_mad_port_open(struct ib_device *device,
ret = ib_mad_port_start(port_priv);
if (ret) {
printk(KERN_ERR PFX "Couldn't start port\n");
- goto error9;
+ goto error10;
}
return 0;
-error9:
+error10:
spin_lock_irqsave(&ib_mad_port_list_lock, flags);
list_del_init(&port_priv->port_list);
spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
destroy_workqueue(port_priv->wq);
+error9:
+ sa_cc_destroy(&port_priv->sa_cc);
error8:
destroy_mad_qp(&port_priv->qp_info[1]);
error7:
@@ -2915,6 +3545,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
destroy_workqueue(port_priv->wq);
+ sa_cc_destroy(&port_priv->sa_cc);
destroy_mad_qp(&port_priv->qp_info[1]);
destroy_mad_qp(&port_priv->qp_info[0]);
ib_dereg_mr(port_priv->mr);
@@ -2983,6 +3614,9 @@ static void ib_mad_remove_device(struct ib_device *device)
{
int i, num_ports, cur_port;
+ if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+ return;
+
if (device->node_type == RDMA_NODE_IB_SWITCH) {
num_ports = 1;
cur_port = 0;
@@ -3017,8 +3651,6 @@ static int __init ib_mad_init_module(void)
mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE);
mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE);
- spin_lock_init(&ib_mad_port_list_lock);
-
ib_mad_cache = kmem_cache_create("ib_mad",
sizeof(struct ib_mad_private),
0,
@@ -3054,4 +3686,3 @@ static void __exit ib_mad_cleanup_module(void)
module_init(ib_mad_init_module);
module_exit(ib_mad_cleanup_module);
-
diff --git a/sys/ofed/drivers/infiniband/core/mad_priv.h b/sys/ofed/drivers/infiniband/core/mad_priv.h
index 8b4df0a..e2cd0ac 100644
--- a/sys/ofed/drivers/infiniband/core/mad_priv.h
+++ b/sys/ofed/drivers/infiniband/core/mad_priv.h
@@ -102,8 +102,7 @@ struct ib_mad_agent_private {
struct list_head send_list;
struct list_head wait_list;
struct list_head done_list;
- struct work_struct timeout_work;
- struct timer_list timeout_timer;
+ struct delayed_work timed_work;
unsigned long timeout;
struct list_head local_list;
struct work_struct local_work;
@@ -122,6 +121,14 @@ struct ib_mad_snoop_private {
struct completion comp;
};
+/* Structure for timeout-fifo entry */
+struct tf_entry {
+ unsigned long exp_time; /* entry expiration time */
+ struct list_head fifo_list; /* to keep entries in fifo order */
+ struct list_head to_list; /* to keep entries in timeout order */
+ int canceled; /* indicates whether entry is canceled */
+};
+
struct ib_mad_send_wr_private {
struct ib_mad_list_head mad_list;
struct list_head agent_list;
@@ -147,6 +154,10 @@ struct ib_mad_send_wr_private {
int seg_num;
int newwin;
int pad;
+
+ /* SA congestion controlled MAD */
+ int is_sa_cc_mad;
+ struct tf_entry tf_list;
};
struct ib_mad_local_private {
@@ -198,6 +209,25 @@ struct ib_mad_qp_info {
atomic_t snoop_count;
};
+struct to_fifo {
+ struct list_head to_head;
+ struct list_head fifo_head;
+ spinlock_t lists_lock;
+ struct timer_list timer;
+ struct work_struct work;
+ u32 fifo_size;
+ u32 num_items;
+ int stop_enqueue;
+ struct workqueue_struct *workq;
+};
+
+/* SA congestion control data */
+struct sa_cc_data {
+ spinlock_t lock;
+ unsigned long outstanding;
+ struct to_fifo *tf;
+};
+
struct ib_mad_port_private {
struct list_head port_list;
struct ib_device *device;
@@ -212,6 +242,7 @@ struct ib_mad_port_private {
struct workqueue_struct *wq;
struct work_struct work;
struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
+ struct sa_cc_data sa_cc;
};
int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr);
diff --git a/sys/ofed/drivers/infiniband/core/mad_rmpp.c b/sys/ofed/drivers/infiniband/core/mad_rmpp.c
index 4e0f282..f37878c 100644
--- a/sys/ofed/drivers/infiniband/core/mad_rmpp.c
+++ b/sys/ofed/drivers/infiniband/core/mad_rmpp.c
@@ -31,6 +31,8 @@
* SOFTWARE.
*/
+#include <linux/slab.h>
+
#include "mad_priv.h"
#include "mad_rmpp.h"
diff --git a/sys/ofed/drivers/infiniband/core/multicast.c b/sys/ofed/drivers/infiniband/core/multicast.c
index f8d7ef8..ef595b2 100644
--- a/sys/ofed/drivers/infiniband/core/multicast.c
+++ b/sys/ofed/drivers/infiniband/core/multicast.c
@@ -34,12 +34,27 @@
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/slab.h>
#include <linux/bitops.h>
#include <linux/random.h>
+#include <linux/moduleparam.h>
+#include <linux/rbtree.h>
#include <rdma/ib_cache.h>
#include "sa.h"
+static int mcast_leave_retries = 3;
+
+/*static const struct kernel_param_ops retry_ops = {
+ .set = param_set_int,
+ .get = param_get_int,
+};
+
+module_param_cb(mcast_leave_retries, &retry_ops, &mcast_leave_retries, 0644);
+MODULE_PARM_DESC(mcast_leave_retries, "Number of retries for multicast leave "
+ "requests before giving up (default: 3)");
+*/
static void mcast_add_one(struct ib_device *device);
static void mcast_remove_one(struct ib_device *device);
@@ -250,6 +265,34 @@ static u8 get_leave_state(struct mcast_group *group)
return leave_state & group->rec.join_state;
}
+static int check_selector(ib_sa_comp_mask comp_mask,
+ ib_sa_comp_mask selector_mask,
+ ib_sa_comp_mask value_mask,
+ u8 selector, u8 src_value, u8 dst_value)
+{
+ int err;
+
+ if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
+ return 0;
+
+ switch (selector) {
+ case IB_SA_GT:
+ err = (src_value <= dst_value);
+ break;
+ case IB_SA_LT:
+ err = (src_value >= dst_value);
+ break;
+ case IB_SA_EQ:
+ err = (src_value != dst_value);
+ break;
+ default:
+ err = 0;
+ break;
+ }
+
+ return err;
+}
+
static int cmp_rec(struct ib_sa_mcmember_rec *src,
struct ib_sa_mcmember_rec *dst, ib_sa_comp_mask comp_mask)
{
@@ -262,7 +305,7 @@ static int cmp_rec(struct ib_sa_mcmember_rec *src,
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid)
return -EINVAL;
- if (ib_sa_check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
+ if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector,
src->mtu, dst->mtu))
return -EINVAL;
@@ -271,11 +314,11 @@ static int cmp_rec(struct ib_sa_mcmember_rec *src,
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey)
return -EINVAL;
- if (ib_sa_check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
+ if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
IB_SA_MCMEMBER_REC_RATE, dst->rate_selector,
src->rate, dst->rate))
return -EINVAL;
- if (ib_sa_check_selector(comp_mask,
+ if (check_selector(comp_mask,
IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR,
IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME,
dst->packet_life_time_selector,
@@ -517,11 +560,15 @@ static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
{
struct mcast_group *group = context;
- if (status && (group->retries > 0) &&
+ if (status && group->retries > 0 &&
!send_leave(group, group->leave_state))
group->retries--;
- else
+ else {
+ if (status && group->retries <= 0)
+ printk(KERN_WARNING "reached max retry count. "
+ "status=%d. Giving up\n", status);
mcast_work_handler(&group->work);
+ }
}
static struct mcast_group *acquire_group(struct mcast_port *port,
@@ -544,7 +591,7 @@ static struct mcast_group *acquire_group(struct mcast_port *port,
if (!group)
return NULL;
- group->retries = 3;
+ group->retries = mcast_leave_retries;
group->port = port;
group->rec.mgid = *mgid;
group->pkey_index = MCAST_INVALID_PKEY_INDEX;
@@ -754,7 +801,6 @@ static void mcast_event_handler(struct ib_event_handler *handler,
switch (event->event) {
case IB_EVENT_PORT_ERR:
case IB_EVENT_LID_CHANGE:
- case IB_EVENT_SM_CHANGE:
case IB_EVENT_CLIENT_REREGISTER:
mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR);
break;
diff --git a/sys/ofed/drivers/infiniband/core/notice.c b/sys/ofed/drivers/infiniband/core/notice.c
deleted file mode 100644
index ca91d96d..0000000
--- a/sys/ofed/drivers/infiniband/core/notice.c
+++ /dev/null
@@ -1,749 +0,0 @@
-/*
- * Copyright (c) 2006 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/completion.h>
-#include <linux/dma-mapping.h>
-#include <linux/err.h>
-#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/bitops.h>
-#include <linux/random.h>
-
-#include "sa.h"
-
-MODULE_AUTHOR("Sean Hefty");
-MODULE_DESCRIPTION("InfiniBand InformInfo & Notice event handling");
-MODULE_LICENSE("Dual BSD/GPL");
-
-static void inform_add_one(struct ib_device *device);
-static void inform_remove_one(struct ib_device *device);
-
-static struct ib_client inform_client = {
- .name = "ib_notice",
- .add = inform_add_one,
- .remove = inform_remove_one
-};
-
-static struct ib_sa_client sa_client;
-static struct workqueue_struct *inform_wq;
-
-struct inform_device;
-
-struct inform_port {
- struct inform_device *dev;
- spinlock_t lock;
- struct rb_root table;
- atomic_t refcount;
- struct completion comp;
- u8 port_num;
-};
-
-struct inform_device {
- struct ib_device *device;
- struct ib_event_handler event_handler;
- int start_port;
- int end_port;
- struct inform_port port[0];
-};
-
-enum inform_state {
- INFORM_IDLE,
- INFORM_REGISTERING,
- INFORM_MEMBER,
- INFORM_BUSY,
- INFORM_ERROR
-};
-
-struct inform_member;
-
-struct inform_group {
- u16 trap_number;
- struct rb_node node;
- struct inform_port *port;
- spinlock_t lock;
- struct work_struct work;
- struct list_head pending_list;
- struct list_head active_list;
- struct list_head notice_list;
- struct inform_member *last_join;
- int members;
- enum inform_state join_state; /* State relative to SA */
- atomic_t refcount;
- enum inform_state state;
- struct ib_sa_query *query;
- int query_id;
-};
-
-struct inform_member {
- struct ib_inform_info info;
- struct ib_sa_client *client;
- struct inform_group *group;
- struct list_head list;
- enum inform_state state;
- atomic_t refcount;
- struct completion comp;
-};
-
-struct inform_notice {
- struct list_head list;
- struct ib_sa_notice notice;
-};
-
-static void reg_handler(int status, struct ib_sa_inform *inform,
- void *context);
-static void unreg_handler(int status, struct ib_sa_inform *inform,
- void *context);
-
-static struct inform_group *inform_find(struct inform_port *port,
- u16 trap_number)
-{
- struct rb_node *node = port->table.rb_node;
- struct inform_group *group;
-
- while (node) {
- group = rb_entry(node, struct inform_group, node);
- if (trap_number < group->trap_number)
- node = node->rb_left;
- else if (trap_number > group->trap_number)
- node = node->rb_right;
- else
- return group;
- }
- return NULL;
-}
-
-static struct inform_group *inform_insert(struct inform_port *port,
- struct inform_group *group)
-{
- struct rb_node **link = &port->table.rb_node;
- struct rb_node *parent = NULL;
- struct inform_group *cur_group;
-
- while (*link) {
- parent = *link;
- cur_group = rb_entry(parent, struct inform_group, node);
- if (group->trap_number < cur_group->trap_number)
- link = &(*link)->rb_left;
- else if (group->trap_number > cur_group->trap_number)
- link = &(*link)->rb_right;
- else
- return cur_group;
- }
- rb_link_node(&group->node, parent, link);
- rb_insert_color(&group->node, &port->table);
- return NULL;
-}
-
-static void deref_port(struct inform_port *port)
-{
- if (atomic_dec_and_test(&port->refcount))
- complete(&port->comp);
-}
-
-static void release_group(struct inform_group *group)
-{
- struct inform_port *port = group->port;
- unsigned long flags;
-
- spin_lock_irqsave(&port->lock, flags);
- if (atomic_dec_and_test(&group->refcount)) {
- rb_erase(&group->node, &port->table);
- spin_unlock_irqrestore(&port->lock, flags);
- kfree(group);
- deref_port(port);
- } else
- spin_unlock_irqrestore(&port->lock, flags);
-}
-
-static void deref_member(struct inform_member *member)
-{
- if (atomic_dec_and_test(&member->refcount))
- complete(&member->comp);
-}
-
-static void queue_reg(struct inform_member *member)
-{
- struct inform_group *group = member->group;
- unsigned long flags;
-
- spin_lock_irqsave(&group->lock, flags);
- list_add(&member->list, &group->pending_list);
- if (group->state == INFORM_IDLE) {
- group->state = INFORM_BUSY;
- atomic_inc(&group->refcount);
- queue_work(inform_wq, &group->work);
- }
- spin_unlock_irqrestore(&group->lock, flags);
-}
-
-static int send_reg(struct inform_group *group, struct inform_member *member)
-{
- struct inform_port *port = group->port;
- struct ib_sa_inform inform;
- int ret;
-
- memset(&inform, 0, sizeof inform);
- inform.lid_range_begin = cpu_to_be16(0xFFFF);
- inform.is_generic = 1;
- inform.subscribe = 1;
- inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL);
- inform.trap.generic.trap_num = cpu_to_be16(member->info.trap_number);
- inform.trap.generic.resp_time = 19;
- inform.trap.generic.producer_type =
- cpu_to_be32(IB_SA_EVENT_PRODUCER_TYPE_ALL);
-
- group->last_join = member;
- ret = ib_sa_informinfo_query(&sa_client, port->dev->device,
- port->port_num, &inform, 3000, GFP_KERNEL,
- reg_handler, group,&group->query);
- if (ret >= 0) {
- group->query_id = ret;
- ret = 0;
- }
- return ret;
-}
-
-static int send_unreg(struct inform_group *group)
-{
- struct inform_port *port = group->port;
- struct ib_sa_inform inform;
- int ret;
-
- memset(&inform, 0, sizeof inform);
- inform.lid_range_begin = cpu_to_be16(0xFFFF);
- inform.is_generic = 1;
- inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL);
- inform.trap.generic.trap_num = cpu_to_be16(group->trap_number);
- inform.trap.generic.qpn = IB_QP1;
- inform.trap.generic.resp_time = 19;
- inform.trap.generic.producer_type =
- cpu_to_be32(IB_SA_EVENT_PRODUCER_TYPE_ALL);
-
- ret = ib_sa_informinfo_query(&sa_client, port->dev->device,
- port->port_num, &inform, 3000, GFP_KERNEL,
- unreg_handler, group, &group->query);
- if (ret >= 0) {
- group->query_id = ret;
- ret = 0;
- }
- return ret;
-}
-
-static void join_group(struct inform_group *group, struct inform_member *member)
-{
- member->state = INFORM_MEMBER;
- group->members++;
- list_move(&member->list, &group->active_list);
-}
-
-static int fail_join(struct inform_group *group, struct inform_member *member,
- int status)
-{
- spin_lock_irq(&group->lock);
- list_del_init(&member->list);
- spin_unlock_irq(&group->lock);
- return member->info.callback(status, &member->info, NULL);
-}
-
-static void process_group_error(struct inform_group *group)
-{
- struct inform_member *member;
- int ret;
-
- spin_lock_irq(&group->lock);
- while (!list_empty(&group->active_list)) {
- member = list_entry(group->active_list.next,
- struct inform_member, list);
- atomic_inc(&member->refcount);
- list_del_init(&member->list);
- group->members--;
- member->state = INFORM_ERROR;
- spin_unlock_irq(&group->lock);
-
- ret = member->info.callback(-ENETRESET, &member->info, NULL);
- deref_member(member);
- if (ret)
- ib_sa_unregister_inform_info(&member->info);
- spin_lock_irq(&group->lock);
- }
-
- group->join_state = INFORM_IDLE;
- group->state = INFORM_BUSY;
- spin_unlock_irq(&group->lock);
-}
-
-/*
- * Report a notice to all active subscribers. We use a temporary list to
- * handle unsubscription requests while the notice is being reported, which
- * avoids holding the group lock while in the user's callback.
- */
-static void process_notice(struct inform_group *group,
- struct inform_notice *info_notice)
-{
- struct inform_member *member;
- struct list_head list;
- int ret;
-
- INIT_LIST_HEAD(&list);
-
- spin_lock_irq(&group->lock);
- list_splice_init(&group->active_list, &list);
- while (!list_empty(&list)) {
-
- member = list_entry(list.next, struct inform_member, list);
- atomic_inc(&member->refcount);
- list_move(&member->list, &group->active_list);
- spin_unlock_irq(&group->lock);
-
- ret = member->info.callback(0, &member->info,
- &info_notice->notice);
- deref_member(member);
- if (ret)
- ib_sa_unregister_inform_info(&member->info);
- spin_lock_irq(&group->lock);
- }
- spin_unlock_irq(&group->lock);
-}
-
-static void inform_work_handler(struct work_struct *work)
-{
- struct inform_group *group;
- struct inform_member *member;
- struct ib_inform_info *info;
- struct inform_notice *info_notice;
- int status, ret;
-
- group = container_of(work, typeof(*group), work);
-retest:
- spin_lock_irq(&group->lock);
- while (!list_empty(&group->pending_list) ||
- !list_empty(&group->notice_list) ||
- (group->state == INFORM_ERROR)) {
-
- if (group->state == INFORM_ERROR) {
- spin_unlock_irq(&group->lock);
- process_group_error(group);
- goto retest;
- }
-
- if (!list_empty(&group->notice_list)) {
- info_notice = list_entry(group->notice_list.next,
- struct inform_notice, list);
- list_del(&info_notice->list);
- spin_unlock_irq(&group->lock);
- process_notice(group, info_notice);
- kfree(info_notice);
- goto retest;
- }
-
- member = list_entry(group->pending_list.next,
- struct inform_member, list);
- info = &member->info;
- atomic_inc(&member->refcount);
-
- if (group->join_state == INFORM_MEMBER) {
- join_group(group, member);
- spin_unlock_irq(&group->lock);
- ret = info->callback(0, info, NULL);
- } else {
- spin_unlock_irq(&group->lock);
- status = send_reg(group, member);
- if (!status) {
- deref_member(member);
- return;
- }
- ret = fail_join(group, member, status);
- }
-
- deref_member(member);
- if (ret)
- ib_sa_unregister_inform_info(&member->info);
- spin_lock_irq(&group->lock);
- }
-
- if (!group->members && (group->join_state == INFORM_MEMBER)) {
- group->join_state = INFORM_IDLE;
- spin_unlock_irq(&group->lock);
- if (send_unreg(group))
- goto retest;
- } else {
- group->state = INFORM_IDLE;
- spin_unlock_irq(&group->lock);
- release_group(group);
- }
-}
-
-/*
- * Fail a join request if it is still active - at the head of the pending queue.
- */
-static void process_join_error(struct inform_group *group, int status)
-{
- struct inform_member *member;
- int ret;
-
- spin_lock_irq(&group->lock);
- member = list_entry(group->pending_list.next,
- struct inform_member, list);
- if (group->last_join == member) {
- atomic_inc(&member->refcount);
- list_del_init(&member->list);
- spin_unlock_irq(&group->lock);
- ret = member->info.callback(status, &member->info, NULL);
- deref_member(member);
- if (ret)
- ib_sa_unregister_inform_info(&member->info);
- } else
- spin_unlock_irq(&group->lock);
-}
-
-static void reg_handler(int status, struct ib_sa_inform *inform, void *context)
-{
- struct inform_group *group = context;
-
- if (status)
- process_join_error(group, status);
- else
- group->join_state = INFORM_MEMBER;
-
- inform_work_handler(&group->work);
-}
-
-static void unreg_handler(int status, struct ib_sa_inform *rec, void *context)
-{
- struct inform_group *group = context;
-
- inform_work_handler(&group->work);
-}
-
-int notice_dispatch(struct ib_device *device, u8 port_num,
- struct ib_sa_notice *notice)
-{
- struct inform_device *dev;
- struct inform_port *port;
- struct inform_group *group;
- struct inform_notice *info_notice;
-
- dev = ib_get_client_data(device, &inform_client);
- if (!dev)
- return 0; /* No one to give notice to. */
-
- port = &dev->port[port_num - dev->start_port];
- spin_lock_irq(&port->lock);
- group = inform_find(port, __be16_to_cpu(notice->trap.
- generic.trap_num));
- if (!group) {
- spin_unlock_irq(&port->lock);
- return 0;
- }
-
- atomic_inc(&group->refcount);
- spin_unlock_irq(&port->lock);
-
- info_notice = kmalloc(sizeof *info_notice, GFP_KERNEL);
- if (!info_notice) {
- release_group(group);
- return -ENOMEM;
- }
-
- info_notice->notice = *notice;
-
- spin_lock_irq(&group->lock);
- list_add(&info_notice->list, &group->notice_list);
- if (group->state == INFORM_IDLE) {
- group->state = INFORM_BUSY;
- spin_unlock_irq(&group->lock);
- inform_work_handler(&group->work);
- } else {
- spin_unlock_irq(&group->lock);
- release_group(group);
- }
-
- return 0;
-}
-
-static struct inform_group *acquire_group(struct inform_port *port,
- u16 trap_number, gfp_t gfp_mask)
-{
- struct inform_group *group, *cur_group;
- unsigned long flags;
-
- spin_lock_irqsave(&port->lock, flags);
- group = inform_find(port, trap_number);
- if (group)
- goto found;
- spin_unlock_irqrestore(&port->lock, flags);
-
- group = kzalloc(sizeof *group, gfp_mask);
- if (!group)
- return NULL;
-
- group->port = port;
- group->trap_number = trap_number;
- INIT_LIST_HEAD(&group->pending_list);
- INIT_LIST_HEAD(&group->active_list);
- INIT_LIST_HEAD(&group->notice_list);
- INIT_WORK(&group->work, inform_work_handler);
- spin_lock_init(&group->lock);
-
- spin_lock_irqsave(&port->lock, flags);
- cur_group = inform_insert(port, group);
- if (cur_group) {
- kfree(group);
- group = cur_group;
- } else
- atomic_inc(&port->refcount);
-found:
- atomic_inc(&group->refcount);
- spin_unlock_irqrestore(&port->lock, flags);
- return group;
-}
-
-/*
- * We serialize all join requests to a single group to make our lives much
- * easier. Otherwise, two users could try to join the same group
- * simultaneously, with different configurations, one could leave while the
- * join is in progress, etc., which makes locking around error recovery
- * difficult.
- */
-struct ib_inform_info *
-ib_sa_register_inform_info(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- u16 trap_number, gfp_t gfp_mask,
- int (*callback)(int status,
- struct ib_inform_info *info,
- struct ib_sa_notice *notice),
- void *context)
-{
- struct inform_device *dev;
- struct inform_member *member;
- struct ib_inform_info *info;
- int ret;
-
- dev = ib_get_client_data(device, &inform_client);
- if (!dev)
- return ERR_PTR(-ENODEV);
-
- member = kzalloc(sizeof *member, gfp_mask);
- if (!member)
- return ERR_PTR(-ENOMEM);
-
- ib_sa_client_get(client);
- member->client = client;
- member->info.trap_number = trap_number;
- member->info.callback = callback;
- member->info.context = context;
- init_completion(&member->comp);
- atomic_set(&member->refcount, 1);
- member->state = INFORM_REGISTERING;
-
- member->group = acquire_group(&dev->port[port_num - dev->start_port],
- trap_number, gfp_mask);
- if (!member->group) {
- ret = -ENOMEM;
- goto err;
- }
-
- /*
- * The user will get the info structure in their callback. They
- * could then free the info structure before we can return from
- * this routine. So we save the pointer to return before queuing
- * any callback.
- */
- info = &member->info;
- queue_reg(member);
- return info;
-
-err:
- ib_sa_client_put(member->client);
- kfree(member);
- return ERR_PTR(ret);
-}
-EXPORT_SYMBOL(ib_sa_register_inform_info);
-
-void ib_sa_unregister_inform_info(struct ib_inform_info *info)
-{
- struct inform_member *member;
- struct inform_group *group;
-
- member = container_of(info, struct inform_member, info);
- group = member->group;
-
- spin_lock_irq(&group->lock);
- if (member->state == INFORM_MEMBER)
- group->members--;
-
- list_del_init(&member->list);
-
- if (group->state == INFORM_IDLE) {
- group->state = INFORM_BUSY;
- spin_unlock_irq(&group->lock);
- /* Continue to hold reference on group until callback */
- queue_work(inform_wq, &group->work);
- } else {
- spin_unlock_irq(&group->lock);
- release_group(group);
- }
-
- deref_member(member);
- wait_for_completion(&member->comp);
- ib_sa_client_put(member->client);
- kfree(member);
-}
-EXPORT_SYMBOL(ib_sa_unregister_inform_info);
-
-static void inform_groups_lost(struct inform_port *port)
-{
- struct inform_group *group;
- struct rb_node *node;
- unsigned long flags;
-
- spin_lock_irqsave(&port->lock, flags);
- for (node = rb_first(&port->table); node; node = rb_next(node)) {
- group = rb_entry(node, struct inform_group, node);
- spin_lock(&group->lock);
- if (group->state == INFORM_IDLE) {
- atomic_inc(&group->refcount);
- queue_work(inform_wq, &group->work);
- }
- group->state = INFORM_ERROR;
- spin_unlock(&group->lock);
- }
- spin_unlock_irqrestore(&port->lock, flags);
-}
-
-static void inform_event_handler(struct ib_event_handler *handler,
- struct ib_event *event)
-{
- struct inform_device *dev;
-
- dev = container_of(handler, struct inform_device, event_handler);
-
- switch (event->event) {
- case IB_EVENT_PORT_ERR:
- case IB_EVENT_LID_CHANGE:
- case IB_EVENT_SM_CHANGE:
- case IB_EVENT_CLIENT_REREGISTER:
- inform_groups_lost(&dev->port[event->element.port_num -
- dev->start_port]);
- break;
- default:
- break;
- }
-}
-
-static void inform_add_one(struct ib_device *device)
-{
- struct inform_device *dev;
- struct inform_port *port;
- int i;
-
- if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
- return;
-
- dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port,
- GFP_KERNEL);
- if (!dev)
- return;
-
- if (device->node_type == RDMA_NODE_IB_SWITCH)
- dev->start_port = dev->end_port = 0;
- else {
- dev->start_port = 1;
- dev->end_port = device->phys_port_cnt;
- }
-
- for (i = 0; i <= dev->end_port - dev->start_port; i++) {
- port = &dev->port[i];
- port->dev = dev;
- port->port_num = dev->start_port + i;
- spin_lock_init(&port->lock);
- port->table = RB_ROOT;
- init_completion(&port->comp);
- atomic_set(&port->refcount, 1);
- }
-
- dev->device = device;
- ib_set_client_data(device, &inform_client, dev);
-
- INIT_IB_EVENT_HANDLER(&dev->event_handler, device, inform_event_handler);
- ib_register_event_handler(&dev->event_handler);
-}
-
-static void inform_remove_one(struct ib_device *device)
-{
- struct inform_device *dev;
- struct inform_port *port;
- int i;
-
- dev = ib_get_client_data(device, &inform_client);
- if (!dev)
- return;
-
- ib_unregister_event_handler(&dev->event_handler);
- flush_workqueue(inform_wq);
-
- for (i = 0; i <= dev->end_port - dev->start_port; i++) {
- port = &dev->port[i];
- deref_port(port);
- wait_for_completion(&port->comp);
- }
-
- kfree(dev);
-}
-
-int notice_init(void)
-{
- int ret;
-
- inform_wq = create_singlethread_workqueue("ib_inform");
- if (!inform_wq)
- return -ENOMEM;
-
- ib_sa_register_client(&sa_client);
-
- ret = ib_register_client(&inform_client);
- if (ret)
- goto err;
- return 0;
-
-err:
- ib_sa_unregister_client(&sa_client);
- destroy_workqueue(inform_wq);
- return ret;
-}
-
-void notice_cleanup(void)
-{
- ib_unregister_client(&inform_client);
- ib_sa_unregister_client(&sa_client);
- destroy_workqueue(inform_wq);
-}
diff --git a/sys/ofed/drivers/infiniband/core/packer.c b/sys/ofed/drivers/infiniband/core/packer.c
index 019bd4b..9f42595 100644
--- a/sys/ofed/drivers/infiniband/core/packer.c
+++ b/sys/ofed/drivers/infiniband/core/packer.c
@@ -31,6 +31,7 @@
* SOFTWARE.
*/
+#include <linux/module.h>
#include <linux/string.h>
#include <rdma/ib_pack.h>
diff --git a/sys/ofed/drivers/infiniband/core/peer_mem.c b/sys/ofed/drivers/infiniband/core/peer_mem.c
new file mode 100644
index 0000000..cd716a4
--- /dev/null
+++ b/sys/ofed/drivers/infiniband/core/peer_mem.c
@@ -0,0 +1,461 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_peer_mem.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+
+static DEFINE_MUTEX(peer_memory_mutex);
+static LIST_HEAD(peer_memory_list);
+
+static int num_registered_peers;
+
+/* This code uses the sysfs which is not supporeted by the FreeBSD.
+ * * Will be added in future to the sysctl */
+
+#if 0
+static struct kobject *peers_kobj;
+static struct ib_peer_memory_client *get_peer_by_kobj(void *kobj);
+static ssize_t version_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj);
+
+ if (ib_peer_client) {
+ sprintf(buf, "%s\n", ib_peer_client->peer_mem->version);
+ return strlen(buf);
+ }
+ /* not found - nothing is return */
+ return 0;
+}
+
+static ssize_t num_alloc_mrs_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj);
+
+ if (ib_peer_client) {
+ sprintf(buf, "%lu\n", ib_peer_client->stats.num_alloc_mrs);
+ return strlen(buf);
+ }
+ /* not found - nothing is return */
+ return 0;
+}
+
+static ssize_t num_reg_pages_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj);
+
+ if (ib_peer_client) {
+ sprintf(buf, "%lu\n", ib_peer_client->stats.num_reg_pages);
+ return strlen(buf);
+ }
+ /* not found - nothing is return */
+ return 0;
+}
+
+static ssize_t num_dereg_pages_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj);
+
+ if (ib_peer_client) {
+ sprintf(buf, "%lu\n", ib_peer_client->stats.num_dereg_pages);
+ return strlen(buf);
+ }
+ /* not found - nothing is return */
+ return 0;
+}
+
+static ssize_t num_free_callbacks_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj);
+
+ if (ib_peer_client) {
+ sprintf(buf, "%lu\n", ib_peer_client->stats.num_free_callbacks);
+ return strlen(buf);
+ }
+ /* not found - nothing is return */
+ return 0;
+}
+
+static struct kobj_attribute version_attr = __ATTR_RO(version);
+static struct kobj_attribute num_alloc_mrs = __ATTR_RO(num_alloc_mrs);
+static struct kobj_attribute num_reg_pages = __ATTR_RO(num_reg_pages);
+static struct kobj_attribute num_dereg_pages = __ATTR_RO(num_dereg_pages);
+static struct kobj_attribute num_free_callbacks = __ATTR_RO(num_free_callbacks);
+
+static struct attribute *peer_mem_attrs[] = {
+ &version_attr.attr,
+ &num_alloc_mrs.attr,
+ &num_reg_pages.attr,
+ &num_dereg_pages.attr,
+ &num_free_callbacks.attr,
+ NULL,
+};
+#endif
+
+#if 0
+static void destroy_peer_sysfs(struct ib_peer_memory_client *ib_peer_client)
+{
+ kobject_put(ib_peer_client->kobj);
+ if (!num_registered_peers)
+ kobject_put(peers_kobj);
+
+ return;
+}
+
+/* This code uses the sysfs which is not supporeted by the FreeBSD.
+ * Will be added in future to the sysctl */
+
+static int create_peer_sysfs(struct ib_peer_memory_client *ib_peer_client)
+{
+ int ret;
+
+ if (!num_registered_peers) {
+ /* creating under /sys/kernel/mm */
+ peers_kobj = kobject_create_and_add("memory_peers", mm_kobj);
+ if (!peers_kobj)
+ return -ENOMEM;
+ }
+
+ ib_peer_client->peer_mem_attr_group.attrs = peer_mem_attrs;
+ /* Dir alreday was created explicitly to get its kernel object for further usage */
+ ib_peer_client->peer_mem_attr_group.name = NULL;
+ ib_peer_client->kobj = kobject_create_and_add(ib_peer_client->peer_mem->name,
+ peers_kobj);
+
+ if (!ib_peer_client->kobj) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ /* Create the files associated with this kobject */
+ ret = sysfs_create_group(ib_peer_client->kobj,
+ &ib_peer_client->peer_mem_attr_group);
+ if (ret)
+ goto peer_free;
+
+ return 0;
+
+peer_free:
+ kobject_put(ib_peer_client->kobj);
+
+free:
+ if (!num_registered_peers)
+ kobject_put(peers_kobj);
+
+ return ret;
+}
+#endif
+
+static int ib_invalidate_peer_memory(void *reg_handle,
+ void *core_context)
+{
+ struct ib_peer_memory_client *ib_peer_client =
+ (struct ib_peer_memory_client *)reg_handle;
+ struct invalidation_ctx *invalidation_ctx;
+ struct core_ticket *core_ticket;
+ int need_unlock = 1;
+
+ mutex_lock(&ib_peer_client->lock);
+ ib_peer_client->stats.num_free_callbacks += 1;
+ core_ticket = ib_peer_search_context(ib_peer_client,
+ (unsigned long)core_context);
+ if (!core_ticket)
+ goto out;
+
+ invalidation_ctx = (struct invalidation_ctx *)core_ticket->context;
+ /* If context not ready yet mark to be invalidated */
+ if (!invalidation_ctx->func) {
+ invalidation_ctx->peer_invalidated = 1;
+ goto out;
+ }
+
+ invalidation_ctx->func(invalidation_ctx->cookie,
+ invalidation_ctx->umem, 0, 0);
+ if (invalidation_ctx->inflight_invalidation) {
+
+ /* init the completion to wait on before letting other thread to run */
+ init_completion(&invalidation_ctx->comp);
+ mutex_unlock(&ib_peer_client->lock);
+ need_unlock = 0;
+ wait_for_completion(&invalidation_ctx->comp);
+ }
+
+ kfree(invalidation_ctx);
+
+out:
+ if (need_unlock)
+ mutex_unlock(&ib_peer_client->lock);
+
+ return 0;
+}
+
+/* access to that peer client is under its lock - no extra lock is needed */
+unsigned long ib_peer_insert_context(struct ib_peer_memory_client *ib_peer_client,
+ void *context)
+{
+ struct core_ticket *core_ticket = kzalloc(sizeof(*core_ticket), GFP_KERNEL);
+
+ ib_peer_client->last_ticket++;
+ core_ticket->context = context;
+ core_ticket->key = ib_peer_client->last_ticket;
+
+ list_add_tail(&core_ticket->ticket_list,
+ &ib_peer_client->core_ticket_list);
+
+ return core_ticket->key;
+}
+
+int ib_peer_remove_context(struct ib_peer_memory_client *ib_peer_client,
+ unsigned long key)
+{
+ struct core_ticket *core_ticket, *tmp;
+
+ list_for_each_entry_safe(core_ticket, tmp, &ib_peer_client->core_ticket_list,
+ ticket_list) {
+ if (core_ticket->key == key) {
+ list_del(&core_ticket->ticket_list);
+ kfree(core_ticket);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+struct core_ticket *ib_peer_search_context(struct ib_peer_memory_client *ib_peer_client,
+ unsigned long key)
+{
+ struct core_ticket *core_ticket, *tmp;
+ list_for_each_entry_safe(core_ticket, tmp, &ib_peer_client->core_ticket_list,
+ ticket_list) {
+ if (core_ticket->key == key)
+ return core_ticket;
+ }
+
+ return NULL;
+}
+
+
+static int ib_memory_peer_check_mandatory(struct peer_memory_client
+ *peer_client)
+{
+#define PEER_MEM_MANDATORY_FUNC(x) {\
+ offsetof(struct peer_memory_client, x), #x }
+
+ static const struct {
+ size_t offset;
+ char *name;
+ } mandatory_table[] = {
+ PEER_MEM_MANDATORY_FUNC(acquire),
+ PEER_MEM_MANDATORY_FUNC(get_pages),
+ PEER_MEM_MANDATORY_FUNC(put_pages),
+ PEER_MEM_MANDATORY_FUNC(get_page_size),
+ PEER_MEM_MANDATORY_FUNC(dma_map),
+ PEER_MEM_MANDATORY_FUNC(dma_unmap)
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
+ if (!*(void **) ((void *) peer_client + mandatory_table[i].offset)) {
+ printk(KERN_WARNING "Peer memory %s is missing mandatory function %s\n",
+ peer_client->name, mandatory_table[i].name);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+
+
+void *ib_register_peer_memory_client(struct peer_memory_client *peer_client,
+ invalidate_peer_memory *invalidate_callback)
+{
+ int ret = 0;
+ struct ib_peer_memory_client *ib_peer_client = NULL;
+
+ mutex_lock(&peer_memory_mutex);
+ if (ib_memory_peer_check_mandatory(peer_client)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ib_peer_client = kzalloc(sizeof(*ib_peer_client), GFP_KERNEL);
+ if (!ib_peer_client)
+ goto out;
+ ib_peer_client->peer_mem = peer_client;
+
+ INIT_LIST_HEAD(&ib_peer_client->core_ticket_list);
+ mutex_init(&ib_peer_client->lock);
+#ifdef __FreeBSD__
+ ib_peer_client->holdcount = 0;
+ ib_peer_client->needwakeup = 0;
+ cv_init(&ib_peer_client->peer_cv, "ibprcl");
+#else
+ ret = init_srcu_struct(&ib_peer_client->peer_srcu);
+ if (ret)
+ goto free;
+#endif
+#if 0
+ if (create_peer_sysfs(ib_peer_client))
+ goto free;
+#endif
+ *invalidate_callback = ib_invalidate_peer_memory;
+ list_add_tail(&ib_peer_client->core_peer_list, &peer_memory_list);
+ num_registered_peers++;
+ goto out;
+#if 0
+free:
+ kfree(ib_peer_client);
+ ib_peer_client = NULL;
+#endif
+out:
+ mutex_unlock(&peer_memory_mutex);
+ return ib_peer_client;
+}
+EXPORT_SYMBOL(ib_register_peer_memory_client);
+
+void ib_unregister_peer_memory_client(void *reg_handle)
+{
+ struct ib_peer_memory_client *ib_peer_client =
+ (struct ib_peer_memory_client *)reg_handle;
+
+ mutex_lock(&peer_memory_mutex);
+ /* remove from list to prevent future core clients usage as it goes down */
+ list_del(&ib_peer_client->core_peer_list);
+#ifdef __FreeBSD__
+ while (ib_peer_client->holdcount != 0) {
+ ib_peer_client->needwakeup = 1;
+ cv_wait(&ib_peer_client->peer_cv, &peer_memory_mutex.sx);
+ }
+ cv_destroy(&ib_peer_client->peer_cv);
+#else
+ mutex_unlock(&peer_memory_mutex);
+ /* peer memory can't go down while there are active clients */
+ synchronize_srcu(&ib_peer_client->peer_srcu);
+ cleanup_srcu_struct(&ib_peer_client->peer_srcu);
+ mutex_lock(&peer_memory_mutex);
+#endif
+ num_registered_peers--;
+/* This code uses the sysfs which is not supporeted by the FreeBSD.
+ * Will be added in future to the sysctl */
+#if 0
+ destroy_peer_sysfs(ib_peer_client);
+#endif
+ mutex_unlock(&peer_memory_mutex);
+
+ kfree(ib_peer_client);
+}
+EXPORT_SYMBOL(ib_unregister_peer_memory_client);
+
+/* This code uses the sysfs which is not supporeted by the FreeBSD.
+ * Will be added in future to the sysctl */
+
+#if 0
+static struct ib_peer_memory_client *get_peer_by_kobj(void *kobj)
+{
+ struct ib_peer_memory_client *ib_peer_client;
+
+ mutex_lock(&peer_memory_mutex);
+ list_for_each_entry(ib_peer_client, &peer_memory_list, core_peer_list) {
+ if (ib_peer_client->kobj == kobj)
+ goto found;
+ }
+
+ ib_peer_client = NULL;
+
+found:
+
+ mutex_unlock(&peer_memory_mutex);
+ return ib_peer_client;
+}
+#endif
+
+struct ib_peer_memory_client *ib_get_peer_client(struct ib_ucontext *context, unsigned long addr,
+ size_t size, void **peer_client_context,
+ int *srcu_key)
+{
+ struct ib_peer_memory_client *ib_peer_client;
+ int ret;
+
+ mutex_lock(&peer_memory_mutex);
+ list_for_each_entry(ib_peer_client, &peer_memory_list, core_peer_list) {
+ ret = ib_peer_client->peer_mem->acquire(addr, size,
+ context->peer_mem_private_data,
+ context->peer_mem_name,
+ peer_client_context);
+ if (ret == 1)
+ goto found;
+ }
+
+ ib_peer_client = NULL;
+
+found:
+ if (ib_peer_client) {
+#ifdef __FreeBSD__
+ ib_peer_client->holdcount++;
+#else
+ *srcu_key = srcu_read_lock(&ib_peer_client->peer_srcu);
+#endif
+ }
+
+ mutex_unlock(&peer_memory_mutex);
+ return ib_peer_client;
+
+}
+EXPORT_SYMBOL(ib_get_peer_client);
+
+void ib_put_peer_client(struct ib_peer_memory_client *ib_peer_client,
+ void *peer_client_context,
+ int srcu_key)
+{
+
+ if (ib_peer_client->peer_mem->release)
+ ib_peer_client->peer_mem->release(peer_client_context);
+
+#ifdef __FreeBSD__
+ ib_peer_client->holdcount--;
+ if (ib_peer_client->holdcount == 0 && ib_peer_client->needwakeup) {
+ cv_signal(&ib_peer_client->peer_cv);
+ }
+#else
+ srcu_read_unlock(&ib_peer_client->peer_srcu, srcu_key);
+#endif
+ return;
+}
+EXPORT_SYMBOL(ib_put_peer_client);
+
diff --git a/sys/ofed/drivers/infiniband/core/sa.h b/sys/ofed/drivers/infiniband/core/sa.h
index b8abdd7..b1d4bbf 100644
--- a/sys/ofed/drivers/infiniband/core/sa.h
+++ b/sys/ofed/drivers/infiniband/core/sa.h
@@ -48,29 +48,6 @@ static inline void ib_sa_client_put(struct ib_sa_client *client)
complete(&client->comp);
}
-int ib_sa_check_selector(ib_sa_comp_mask comp_mask,
- ib_sa_comp_mask selector_mask,
- ib_sa_comp_mask value_mask,
- u8 selector, u8 src_value, u8 dst_value);
-
-int ib_sa_pack_attr(void *dst, void *src, int attr_id);
-
-int ib_sa_unpack_attr(void *dst, void *src, int attr_id);
-
-int ib_sa_path_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- struct ib_sa_path_rec *rec,
- ib_sa_comp_mask comp_mask,
- int timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct ib_sa_path_rec *resp,
- void *context),
- void *context,
- struct ib_sa_query **sa_query);
-
-int sa_db_init(void);
-void sa_db_cleanup(void);
-
int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
u8 method,
@@ -86,20 +63,4 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
int mcast_init(void);
void mcast_cleanup(void);
-int ib_sa_informinfo_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- struct ib_sa_inform *rec,
- int timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct ib_sa_inform *resp,
- void *context),
- void *context,
- struct ib_sa_query **sa_query);
-
-int notice_dispatch(struct ib_device *device, u8 port_num,
- struct ib_sa_notice *notice);
-
-int notice_init(void);
-void notice_cleanup(void);
-
#endif /* SA_H */
diff --git a/sys/ofed/drivers/infiniband/core/sa_query.c b/sys/ofed/drivers/infiniband/core/sa_query.c
index 9c6b4f7..a0c04f5 100644
--- a/sys/ofed/drivers/infiniband/core/sa_query.c
+++ b/sys/ofed/drivers/infiniband/core/sa_query.c
@@ -59,12 +59,10 @@ struct ib_sa_sm_ah {
struct ib_sa_port {
struct ib_mad_agent *agent;
- struct ib_mad_agent *notice_agent;
struct ib_sa_sm_ah *sm_ah;
struct work_struct update_task;
spinlock_t ah_lock;
u8 port_num;
- struct ib_device *device;
};
struct ib_sa_device {
@@ -95,14 +93,14 @@ struct ib_sa_path_query {
struct ib_sa_query sa_query;
};
-struct ib_sa_mcmember_query {
- void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
+struct ib_sa_guidinfo_query {
+ void (*callback)(int, struct ib_sa_guidinfo_rec *, void *);
void *context;
struct ib_sa_query sa_query;
};
-struct ib_sa_inform_query {
- void (*callback)(int, struct ib_sa_inform *, void *);
+struct ib_sa_mcmember_query {
+ void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
void *context;
struct ib_sa_query sa_query;
};
@@ -116,10 +114,10 @@ static struct ib_client sa_client = {
.remove = ib_sa_remove_one
};
-static spinlock_t idr_lock;
+static DEFINE_SPINLOCK(idr_lock);
static DEFINE_IDR(query_idr);
-static spinlock_t tid_lock;
+static DEFINE_SPINLOCK(tid_lock);
static u32 tid;
#define PATH_REC_FIELD(field) \
@@ -354,162 +352,34 @@ static const struct ib_field service_rec_table[] = {
.size_bits = 2*64 },
};
-#define INFORM_FIELD(field) \
- .struct_offset_bytes = offsetof(struct ib_sa_inform, field), \
- .struct_size_bytes = sizeof ((struct ib_sa_inform *) 0)->field, \
- .field_name = "sa_inform:" #field
+#define GUIDINFO_REC_FIELD(field) \
+ .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \
+ .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \
+ .field_name = "sa_guidinfo_rec:" #field
-static const struct ib_field inform_table[] = {
- { INFORM_FIELD(gid),
+static const struct ib_field guidinfo_rec_table[] = {
+ { GUIDINFO_REC_FIELD(lid),
.offset_words = 0,
.offset_bits = 0,
- .size_bits = 128 },
- { INFORM_FIELD(lid_range_begin),
- .offset_words = 4,
- .offset_bits = 0,
- .size_bits = 16 },
- { INFORM_FIELD(lid_range_end),
- .offset_words = 4,
- .offset_bits = 16,
- .size_bits = 16 },
- { RESERVED,
- .offset_words = 5,
- .offset_bits = 0,
.size_bits = 16 },
- { INFORM_FIELD(is_generic),
- .offset_words = 5,
+ { GUIDINFO_REC_FIELD(block_num),
+ .offset_words = 0,
.offset_bits = 16,
.size_bits = 8 },
- { INFORM_FIELD(subscribe),
- .offset_words = 5,
- .offset_bits = 24,
- .size_bits = 8 },
- { INFORM_FIELD(type),
- .offset_words = 6,
- .offset_bits = 0,
- .size_bits = 16 },
- { INFORM_FIELD(trap.generic.trap_num),
- .offset_words = 6,
- .offset_bits = 16,
- .size_bits = 16 },
- { INFORM_FIELD(trap.generic.qpn),
- .offset_words = 7,
- .offset_bits = 0,
- .size_bits = 24 },
- { RESERVED,
- .offset_words = 7,
+ { GUIDINFO_REC_FIELD(res1),
+ .offset_words = 0,
.offset_bits = 24,
- .size_bits = 3 },
- { INFORM_FIELD(trap.generic.resp_time),
- .offset_words = 7,
- .offset_bits = 27,
- .size_bits = 5 },
- { RESERVED,
- .offset_words = 8,
- .offset_bits = 0,
.size_bits = 8 },
- { INFORM_FIELD(trap.generic.producer_type),
- .offset_words = 8,
- .offset_bits = 8,
- .size_bits = 24 },
-};
-
-#define NOTICE_FIELD(field) \
- .struct_offset_bytes = offsetof(struct ib_sa_notice, field), \
- .struct_size_bytes = sizeof ((struct ib_sa_notice *) 0)->field, \
- .field_name = "sa_notice:" #field
-
-static const struct ib_field notice_table[] = {
- { NOTICE_FIELD(is_generic),
- .offset_words = 0,
- .offset_bits = 0,
- .size_bits = 1 },
- { NOTICE_FIELD(type),
- .offset_words = 0,
- .offset_bits = 1,
- .size_bits = 7 },
- { NOTICE_FIELD(trap.generic.producer_type),
- .offset_words = 0,
- .offset_bits = 8,
- .size_bits = 24 },
- { NOTICE_FIELD(trap.generic.trap_num),
+ { GUIDINFO_REC_FIELD(res2),
.offset_words = 1,
.offset_bits = 0,
- .size_bits = 16 },
- { NOTICE_FIELD(issuer_lid),
- .offset_words = 1,
- .offset_bits = 16,
- .size_bits = 16 },
- { NOTICE_FIELD(notice_toggle),
- .offset_words = 2,
- .offset_bits = 0,
- .size_bits = 1 },
- { NOTICE_FIELD(notice_count),
- .offset_words = 2,
- .offset_bits = 1,
- .size_bits = 15 },
- { NOTICE_FIELD(data_details),
+ .size_bits = 32 },
+ { GUIDINFO_REC_FIELD(guid_info_list),
.offset_words = 2,
- .offset_bits = 16,
- .size_bits = 432 },
- { NOTICE_FIELD(issuer_gid),
- .offset_words = 16,
.offset_bits = 0,
- .size_bits = 128 },
+ .size_bits = 512 },
};
-int ib_sa_check_selector(ib_sa_comp_mask comp_mask,
- ib_sa_comp_mask selector_mask,
- ib_sa_comp_mask value_mask,
- u8 selector, u8 src_value, u8 dst_value)
-{
- int err;
-
- if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
- return 0;
-
- switch (selector) {
- case IB_SA_GT:
- err = (src_value <= dst_value);
- break;
- case IB_SA_LT:
- err = (src_value >= dst_value);
- break;
- case IB_SA_EQ:
- err = (src_value != dst_value);
- break;
- default:
- err = 0;
- break;
- }
-
- return err;
-}
-
-int ib_sa_pack_attr(void *dst, void *src, int attr_id)
-{
- switch (attr_id) {
- case IB_SA_ATTR_PATH_REC:
- ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), src, dst);
- break;
- default:
- return -EINVAL;
- }
- return 0;
-}
-
-int ib_sa_unpack_attr(void *dst, void *src, int attr_id)
-{
- switch (attr_id) {
- case IB_SA_ATTR_PATH_REC:
- ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), src, dst);
- break;
- default:
- return -EINVAL;
- }
- return 0;
-}
-
static void free_sm_ah(struct kref *kref)
{
struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
@@ -588,7 +458,7 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
port->sm_ah = NULL;
spin_unlock_irqrestore(&port->ah_lock, flags);
- schedule_work(&sa_dev->port[event->element.port_num -
+ queue_work(ib_wq, &sa_dev->port[event->element.port_num -
sa_dev->start_port].update_task);
}
}
@@ -685,6 +555,14 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
ah_attr->grh.hop_limit = rec->hop_limit;
ah_attr->grh.traffic_class = rec->traffic_class;
}
+ if (force_grh) {
+ memcpy(ah_attr->dmac, rec->dmac, 6);
+ ah_attr->vlan_id = rec->vlan_id;
+ } else {
+ memset(ah_attr->dmac, 0, 6);
+ ah_attr->vlan_id = 0xffff;
+ }
+
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_path);
@@ -791,6 +669,10 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
mad->data, &rec);
+ rec.vlan_id = 0xffff;
+ memset(rec.dmac, 0, ETH_ALEN);
+ memset(rec.smac, 0, ETH_ALEN);
+
query->callback(status, &rec, query->context);
} else
query->callback(status, NULL, query->context);
@@ -801,7 +683,33 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
}
-int ib_sa_path_rec_query(struct ib_sa_client *client,
+
+/**
+ * ib_sa_path_rec_get - Start a Path get query
+ * @client:SA client
+ * @device:device to send query on
+ * @port_num: port number to send query on
+ * @rec:Path Record to send in query
+ * @comp_mask:component mask to send in query
+ * @timeout_ms:time to wait for response
+ * @gfp_mask:GFP mask to use for internal allocations
+ * @callback:function called when query completes, times out or is
+ * canceled
+ * @context:opaque user context passed to callback
+ * @sa_query:query context, used to cancel query
+ *
+ * Send a Path Record Get query to the SA to look up a path. The
+ * callback function will be called when the query completes (or
+ * fails); status is 0 for a successful response, -EINTR if the query
+ * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
+ * occurred sending the query. The resp parameter of the callback is
+ * only valid if status is 0.
+ *
+ * If the return value of ib_sa_path_rec_get() is negative, it is an
+ * error code. Otherwise it is a query ID that can be used to cancel
+ * the query.
+ */
+int ib_sa_path_rec_get(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
struct ib_sa_path_rec *rec,
ib_sa_comp_mask comp_mask,
@@ -867,6 +775,7 @@ err1:
kfree(query);
return ret;
}
+EXPORT_SYMBOL(ib_sa_path_rec_get);
static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
int status,
@@ -1082,26 +991,27 @@ err1:
return ret;
}
-static void ib_sa_inform_callback(struct ib_sa_query *sa_query,
+/* Support GuidInfoRecord */
+static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
{
- struct ib_sa_inform_query *query =
- container_of(sa_query, struct ib_sa_inform_query, sa_query);
+ struct ib_sa_guidinfo_query *query =
+ container_of(sa_query, struct ib_sa_guidinfo_query, sa_query);
if (mad) {
- struct ib_sa_inform rec;
+ struct ib_sa_guidinfo_rec rec;
- ib_unpack(inform_table, ARRAY_SIZE(inform_table),
+ ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table),
mad->data, &rec);
query->callback(status, &rec, query->context);
} else
query->callback(status, NULL, query->context);
}
-static void ib_sa_inform_release(struct ib_sa_query *sa_query)
+static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query)
{
- kfree(container_of(sa_query, struct ib_sa_inform_query, sa_query));
+ kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query));
}
int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
@@ -1115,52 +1025,7 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
void *context,
struct ib_sa_query **sa_query)
{
- // stub function -
- // called originally from mad.c under mlx4_ib_init_sriov()
- // which calls mlx4_ib_init_alias_guid_service() in alias_GUID.c
- // which goes down to this function
-
- printk("ERROR: function should be called only in SRIOV flow!!!");
-
- return 0;
-}
-
-/**
- * ib_sa_informinfo_query - Start an InformInfo registration.
- * @client:SA client
- * @device:device to send query on
- * @port_num: port number to send query on
- * @rec:Inform record to send in query
- * @timeout_ms:time to wait for response
- * @gfp_mask:GFP mask to use for internal allocations
- * @callback:function called when notice handler registration completes,
- * times out or is canceled
- * @context:opaque user context passed to callback
- * @sa_query:query context, used to cancel query
- *
- * This function sends inform info to register with SA to receive
- * in-service notice.
- * The callback function will be called when the query completes (or
- * fails); status is 0 for a successful response, -EINTR if the query
- * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
- * occurred sending the query. The resp parameter of the callback is
- * only valid if status is 0.
- *
- * If the return value of ib_sa_inform_query() is negative, it is an
- * error code. Otherwise it is a query ID that can be used to cancel
- * the query.
- */
-int ib_sa_informinfo_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- struct ib_sa_inform *rec,
- int timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct ib_sa_inform *resp,
- void *context),
- void *context,
- struct ib_sa_query **sa_query)
-{
- struct ib_sa_inform_query *query;
+ struct ib_sa_guidinfo_query *query;
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
struct ib_sa_port *port;
struct ib_mad_agent *agent;
@@ -1170,6 +1035,12 @@ int ib_sa_informinfo_query(struct ib_sa_client *client,
if (!sa_dev)
return -ENODEV;
+ if (method != IB_MGMT_METHOD_GET &&
+ method != IB_MGMT_METHOD_SET &&
+ method != IB_SA_METHOD_DELETE) {
+ return -EINVAL;
+ }
+
port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
@@ -1190,15 +1061,18 @@ int ib_sa_informinfo_query(struct ib_sa_client *client,
mad = query->sa_query.mad_buf->mad;
init_mad(mad, agent);
- query->sa_query.callback = callback ? ib_sa_inform_callback : NULL;
- query->sa_query.release = ib_sa_inform_release;
- query->sa_query.port = port;
- mad->mad_hdr.method = IB_MGMT_METHOD_SET;
- mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_INFORM_INFO);
+ query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL;
+ query->sa_query.release = ib_sa_guidinfo_rec_release;
- ib_pack(inform_table, ARRAY_SIZE(inform_table), rec, mad->data);
+ mad->mad_hdr.method = method;
+ mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC);
+ mad->sa_hdr.comp_mask = comp_mask;
+
+ ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec,
+ mad->data);
*sa_query = &query->sa_query;
+
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
goto err2;
@@ -1209,49 +1083,12 @@ err2:
*sa_query = NULL;
ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
+
err1:
kfree(query);
return ret;
}
-
-static void ib_sa_notice_resp(struct ib_sa_port *port,
- struct ib_mad_recv_wc *mad_recv_wc)
-{
- struct ib_mad_send_buf *mad_buf;
- struct ib_sa_mad *mad;
- int ret;
- unsigned long flags;
-
- mad_buf = ib_create_send_mad(port->notice_agent, 1, 0, 0,
- IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
- GFP_KERNEL);
- if (IS_ERR(mad_buf))
- return;
-
- mad = mad_buf->mad;
- memcpy(mad, mad_recv_wc->recv_buf.mad, sizeof *mad);
- mad->mad_hdr.method = IB_MGMT_METHOD_REPORT_RESP;
-
- spin_lock_irqsave(&port->ah_lock, flags);
- if (!port->sm_ah) {
- spin_unlock_irqrestore(&port->ah_lock, flags);
- ib_free_send_mad(mad_buf);
- return;
- }
- kref_get(&port->sm_ah->ref);
- mad_buf->context[0] = &port->sm_ah->ref;
- mad_buf->ah = port->sm_ah->ah;
- spin_unlock_irqrestore(&port->ah_lock, flags);
-
- ret = ib_post_send_mad(mad_buf, NULL);
- if (ret)
- goto err;
-
- return;
-err:
- kref_put(mad_buf->context[0], free_sm_ah);
- ib_free_send_mad(mad_buf);
-}
+EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
@@ -1306,36 +1143,9 @@ static void recv_handler(struct ib_mad_agent *mad_agent,
ib_free_recv_mad(mad_recv_wc);
}
-static void notice_resp_handler(struct ib_mad_agent *agent,
- struct ib_mad_send_wc *mad_send_wc)
-{
- kref_put(mad_send_wc->send_buf->context[0], free_sm_ah);
- ib_free_send_mad(mad_send_wc->send_buf);
-}
-
-static void notice_handler(struct ib_mad_agent *mad_agent,
- struct ib_mad_recv_wc *mad_recv_wc)
-{
- struct ib_sa_port *port;
- struct ib_sa_mad *mad;
- struct ib_sa_notice notice;
-
- port = mad_agent->context;
- mad = (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad;
- ib_unpack(notice_table, ARRAY_SIZE(notice_table), mad->data, &notice);
-
- if (!notice_dispatch(port->device, port->port_num, &notice))
- ib_sa_notice_resp(port, mad_recv_wc);
- ib_free_recv_mad(mad_recv_wc);
-}
-
static void ib_sa_add_one(struct ib_device *device)
{
struct ib_sa_device *sa_dev;
- struct ib_mad_reg_req reg_req = {
- .mgmt_class = IB_MGMT_CLASS_SUBN_ADM,
- .mgmt_class_version = 2
- };
int s, e, i;
if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
@@ -1372,16 +1182,6 @@ static void ib_sa_add_one(struct ib_device *device)
if (IS_ERR(sa_dev->port[i].agent))
goto err;
- sa_dev->port[i].device = device;
- set_bit(IB_MGMT_METHOD_REPORT, reg_req.method_mask);
- sa_dev->port[i].notice_agent =
- ib_register_mad_agent(device, i + s, IB_QPT_GSI,
- &reg_req, 0, notice_resp_handler,
- notice_handler, &sa_dev->port[i]);
-
- if (IS_ERR(sa_dev->port[i].notice_agent))
- goto err;
-
INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
}
@@ -1396,7 +1196,7 @@ static void ib_sa_add_one(struct ib_device *device)
INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
if (ib_register_event_handler(&sa_dev->event_handler))
- goto err;
+ goto reg_err;
for (i = 0; i <= e - s; ++i)
if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND)
@@ -1404,14 +1204,14 @@ static void ib_sa_add_one(struct ib_device *device)
return;
+reg_err:
+ ib_set_client_data(device, &sa_client, NULL);
+ i = e - s;
err:
- while (--i >= 0)
- if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) {
- if (!IS_ERR(sa_dev->port[i].notice_agent))
- ib_unregister_mad_agent(sa_dev->port[i].notice_agent);
- if (!IS_ERR(sa_dev->port[i].agent))
+ for (; i >= 0; --i)
+ if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND &&
+ !IS_ERR(sa_dev->port[i].agent))
ib_unregister_mad_agent(sa_dev->port[i].agent);
- }
kfree(sa_dev);
@@ -1428,11 +1228,10 @@ static void ib_sa_remove_one(struct ib_device *device)
ib_unregister_event_handler(&sa_dev->event_handler);
- flush_scheduled_work();
+ flush_workqueue(ib_wq);
for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) {
- ib_unregister_mad_agent(sa_dev->port[i].notice_agent);
ib_unregister_mad_agent(sa_dev->port[i].agent);
if (sa_dev->port[i].sm_ah)
kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
@@ -1447,9 +1246,6 @@ static int __init ib_sa_init(void)
{
int ret;
- spin_lock_init(&idr_lock);
- spin_lock_init(&tid_lock);
-
get_random_bytes(&tid, sizeof tid);
ret = ib_register_client(&sa_client);
@@ -1464,23 +1260,7 @@ static int __init ib_sa_init(void)
goto err2;
}
- ret = notice_init();
- if (ret) {
- printk(KERN_ERR "Couldn't initialize notice handling\n");
- goto err3;
- }
-
- ret = sa_db_init();
- if (ret) {
- printk(KERN_ERR "Couldn't initialize local SA\n");
- goto err4;
- }
-
return 0;
-err4:
- notice_cleanup();
-err3:
- mcast_cleanup();
err2:
ib_unregister_client(&sa_client);
err1:
@@ -1489,9 +1269,7 @@ err1:
static void __exit ib_sa_cleanup(void)
{
- sa_db_cleanup();
mcast_cleanup();
- notice_cleanup();
ib_unregister_client(&sa_client);
idr_destroy(&query_idr);
}
diff --git a/sys/ofed/drivers/infiniband/core/smi.c b/sys/ofed/drivers/infiniband/core/smi.c
index 8723675..5855e44 100644
--- a/sys/ofed/drivers/infiniband/core/smi.c
+++ b/sys/ofed/drivers/infiniband/core/smi.c
@@ -52,6 +52,10 @@ enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
hop_cnt = smp->hop_cnt;
/* See section 14.2.2.2, Vol 1 IB spec */
+ /* C14-6 -- valid hop_cnt values are from 0 to 63 */
+ if (hop_cnt >= IB_SMP_MAX_PATH_HOPS)
+ return IB_SMI_DISCARD;
+
if (!ib_get_smp_direction(smp)) {
/* C14-9:1 */
if (hop_cnt && hop_ptr == 0) {
@@ -133,6 +137,10 @@ enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
hop_cnt = smp->hop_cnt;
/* See section 14.2.2.2, Vol 1 IB spec */
+ /* C14-6 -- valid hop_cnt values are from 0 to 63 */
+ if (hop_cnt >= IB_SMP_MAX_PATH_HOPS)
+ return IB_SMI_DISCARD;
+
if (!ib_get_smp_direction(smp)) {
/* C14-9:1 -- sender should have incremented hop_ptr */
if (hop_cnt && hop_ptr == 0)
diff --git a/sys/ofed/drivers/infiniband/core/sysfs.c b/sys/ofed/drivers/infiniband/core/sysfs.c
index 4cd5560..6bcbfb9 100644
--- a/sys/ofed/drivers/infiniband/core/sysfs.c
+++ b/sys/ofed/drivers/infiniband/core/sysfs.c
@@ -37,6 +37,7 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/fs.h>
+#include <linux/printk.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_pma.h>
@@ -105,7 +106,7 @@ static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
return ret;
return sprintf(buf, "%d: %s\n", attr.state,
- attr.state < ARRAY_SIZE(state_name) ?
+ attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ?
state_name[attr.state] : "UNKNOWN");
}
@@ -180,19 +181,18 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
{
struct ib_port_attr attr;
char *speed = "";
- int rate;
+ int rate; /* in deci-Gb/sec */
ssize_t ret;
ret = ib_query_port(p->ibdev, p->port_num, &attr);
if (ret)
return ret;
- switch (attr.active_speed) {
- case 2: speed = " DDR"; break;
- case 4: speed = " QDR"; break;
- }
+ ib_active_speed_enum_to_rate(attr.active_speed,
+ &rate,
+ &speed);
- rate = 25 * ib_width_enum_to_int(attr.active_width) * attr.active_speed;
+ rate *= ib_width_enum_to_int(attr.active_width);
if (rate < 0)
return -EINVAL;
@@ -229,9 +229,11 @@ static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused,
{
switch (rdma_port_get_link_layer(p->ibdev, p->port_num)) {
case IB_LINK_LAYER_INFINIBAND:
- return sprintf(buf, "%s\n", "IB");
+ return sprintf(buf, "%s\n", "InfiniBand");
case IB_LINK_LAYER_ETHERNET:
return sprintf(buf, "%s\n", "Ethernet");
+ case IB_LINK_LAYER_SCIF:
+ return sprintf(buf, "%s\n", "SCIF");
default:
return sprintf(buf, "%s\n", "Unknown");
}
@@ -267,16 +269,12 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
container_of(attr, struct port_table_attribute, attr);
union ib_gid gid;
ssize_t ret;
- u16 *raw;
ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid);
if (ret)
return ret;
- raw = (u16 *)gid.raw;
- return sprintf(buf, "%.4x:%.4x:%.4x:%.4x:%.4x:%.4x:%.4x:%.4x\n",
- htons(raw[0]), htons(raw[1]), htons(raw[2]), htons(raw[3]),
- htons(raw[4]), htons(raw[5]), htons(raw[6]), htons(raw[7]));
+ return sprintf(buf, GID_PRINT_FMT"\n",GID_PRINT_ARGS(gid.raw));
}
static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
@@ -351,8 +349,8 @@ static ssize_t get_pma_counters(struct ib_port *p, struct port_attribute *attr,
be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8)));
break;
case 64:
- ret = sprintf(buf, "%llu\n", (unsigned long long)
- be64_to_cpup((__be64 *)(out_mad->data + 40 + offset / 8)));
+ ret = sprintf(buf, "%llu\n",
+ (unsigned long long)be64_to_cpup((__be64 *)(out_mad->data + 40 + offset / 8)));
break;
default:
ret = 0;
@@ -536,6 +534,7 @@ alloc_group_attrs(ssize_t (*show)(struct ib_port *,
element->attr.attr.mode = S_IRUGO;
element->attr.show = show;
element->index = i;
+ sysfs_attr_init(&element->attr.attr);
tab_attr[i] = &element->attr.attr;
}
@@ -570,7 +569,7 @@ static int add_port(struct ib_device *device, int port_num,
p->port_num = port_num;
ret = kobject_init_and_add(&p->kobj, &port_type,
- kobject_get(device->ports_parent),
+ device->ports_parent,
"%d", port_num);
if (ret)
goto err_put;
@@ -609,7 +608,6 @@ static int add_port(struct ib_device *device, int port_num,
}
list_add_tail(&p->kobj.entry, &device->port_list);
-
#ifdef __linux__
kobject_uevent(&p->kobj, KOBJ_ADD);
#endif
@@ -655,6 +653,7 @@ static ssize_t show_node_type(struct device *device,
case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type);
case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
+ case RDMA_NODE_MIC: return sprintf(buf, "%d: MIC\n", dev->node_type);
default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
}
}
@@ -716,16 +715,75 @@ static ssize_t set_node_desc(struct device *device,
return count;
}
+static ssize_t show_cmd_perf(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct ib_device *dev = container_of(device, struct ib_device, dev);
+
+ return sprintf(buf, "%d\n", dev->cmd_perf);
+}
+
+static ssize_t set_cmd_perf(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct ib_device *dev = container_of(device, struct ib_device, dev);
+ u32 val;
+
+ if (sscanf(buf, "0x%x", &val) != 1)
+ return -EINVAL;
+
+ dev->cmd_perf = val;
+
+ return count;
+}
+
+static ssize_t show_cmd_avg(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct ib_device *dev = container_of(device, struct ib_device, dev);
+
+ return sprintf(buf, "%llu\n", (unsigned long long)dev->cmd_avg);
+}
+
+static ssize_t set_cmd_avg(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct ib_device *dev = container_of(device, struct ib_device, dev);
+
+ spin_lock(&dev->cmd_perf_lock);
+ dev->cmd_avg = 0;
+ dev->cmd_n = 0;
+ spin_unlock(&dev->cmd_perf_lock);
+
+ return count;
+}
+
+static ssize_t show_cmd_n(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct ib_device *dev = container_of(device, struct ib_device, dev);
+
+ return sprintf(buf, "%d\n", dev->cmd_n);
+}
+
static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL);
static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL);
static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL);
static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc);
+static DEVICE_ATTR(cmd_perf, S_IRUGO | S_IWUSR, show_cmd_perf, set_cmd_perf);
+static DEVICE_ATTR(cmd_avg, S_IRUGO | S_IWUSR, show_cmd_avg, set_cmd_avg);
+static DEVICE_ATTR(cmd_n, S_IRUGO, show_cmd_n, NULL);
static struct device_attribute *ib_class_attributes[] = {
&dev_attr_node_type,
&dev_attr_sys_image_guid,
&dev_attr_node_guid,
- &dev_attr_node_desc
+ &dev_attr_node_desc,
+ &dev_attr_cmd_perf,
+ &dev_attr_cmd_avg,
+ &dev_attr_cmd_n,
};
static struct class ib_class = {
@@ -851,7 +909,8 @@ static struct attribute_group iw_stats_group = {
};
int ib_device_register_sysfs(struct ib_device *device,
- int (*port_callback)(struct ib_device *, u8, struct kobject *))
+ int (*port_callback)(struct ib_device *,
+ u8, struct kobject *))
{
struct device *class_dev = &device->dev;
int ret;
@@ -874,8 +933,7 @@ int ib_device_register_sysfs(struct ib_device *device,
goto err_unregister;
}
- device->ports_parent = kobject_create_and_add("ports",
- kobject_get(&class_dev->kobj));
+ device->ports_parent = kobject_create_and_add("ports",&class_dev->kobj);
if (!device->ports_parent) {
ret = -ENOMEM;
goto err_put;
@@ -919,6 +977,11 @@ err_put:
kobject_put(&class_dev->kobj);
err_unregister:
+
+ for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) {
+ device_remove_file(class_dev, ib_class_attributes[i]);
+ }
+
device_unregister(class_dev);
err:
@@ -927,15 +990,16 @@ err:
void ib_device_unregister_sysfs(struct ib_device *device)
{
+ int i;
struct kobject *p, *t;
struct ib_port *port;
- int i;
+ struct device *class_dev = &device->dev;
/* Hold kobject until ib_dealloc_device() */
kobject_get(&device->dev.kobj);
for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) {
- device_remove_file(&device->dev, ib_class_attributes[i]);
+ device_remove_file(class_dev, ib_class_attributes[i]);
}
list_for_each_entry_safe(p, t, &device->port_list, entry) {
@@ -960,22 +1024,3 @@ void ib_sysfs_cleanup(void)
{
class_unregister(&ib_class);
}
-
-/*int ib_sysfs_create_port_files(struct ib_device *device,
- int (*create)(struct ib_device *dev, u8 port_num,
- struct kobject *kobj))
-{
- struct kobject *p;
- struct ib_port *port;
- int ret = 0;
-
- list_for_each_entry(p, &device->port_list, entry) {
- port = container_of(p, struct ib_port, kobj);
- ret = create(device, port->port_num, &port->kobj);
- if (ret)
- break;
- }
-
- return ret;
-}
-EXPORT_SYMBOL(ib_sysfs_create_port_files);*/
diff --git a/sys/ofed/drivers/infiniband/core/ucm.c b/sys/ofed/drivers/infiniband/core/ucm.c
index b912ebe..8f20e89 100644
--- a/sys/ofed/drivers/infiniband/core/ucm.c
+++ b/sys/ofed/drivers/infiniband/core/ucm.c
@@ -37,10 +37,12 @@
#include <linux/device.h>
#include <linux/err.h>
#include <linux/poll.h>
+#include <linux/sched.h>
#include <linux/file.h>
#include <linux/cdev.h>
#include <linux/idr.h>
#include <linux/mutex.h>
+#include <linux/slab.h>
#include <linux/string.h>
#include <asm/uaccess.h>
@@ -396,7 +398,6 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
struct ib_ucm_event_get cmd;
struct ib_ucm_event *uevent;
int result = 0;
- DEFINE_WAIT(wait);
if (out_len < sizeof(struct ib_ucm_event_resp))
return -ENOSPC;
@@ -1123,7 +1124,7 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
if (copy_from_user(&hdr, buf, sizeof(hdr)))
return -EFAULT;
- if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
+ if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
return -EINVAL;
if (hdr.in + sizeof(hdr) > len)
@@ -1163,7 +1164,7 @@ static int ib_ucm_open(struct inode *inode, struct file *filp)
{
struct ib_ucm_file *file;
- file = kzalloc(sizeof(*file), GFP_KERNEL);
+ file = kmalloc(sizeof(*file), GFP_KERNEL);
if (!file)
return -ENOMEM;
@@ -1177,7 +1178,7 @@ static int ib_ucm_open(struct inode *inode, struct file *filp)
file->filp = filp;
file->device = container_of(inode->i_cdev->si_drv1, struct ib_ucm_device, cdev);
- return 0;
+ return nonseekable_open(inode, filp);
}
static int ib_ucm_close(struct inode *inode, struct file *filp)
@@ -1212,7 +1213,10 @@ static void ib_ucm_release_dev(struct device *dev)
ucm_dev = container_of(dev, struct ib_ucm_device, dev);
cdev_del(&ucm_dev->cdev);
+ if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
clear_bit(ucm_dev->devnum, dev_map);
+ else
+ clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, dev_map);
kfree(ucm_dev);
}
@@ -1222,6 +1226,7 @@ static const struct file_operations ucm_fops = {
.release = ib_ucm_close,
.write = ib_ucm_write,
.poll = ib_ucm_poll,
+ .llseek = no_llseek,
};
static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
@@ -1234,8 +1239,32 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
+static dev_t overflow_maj;
+static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES);
+static int find_overflow_devnum(void)
+{
+ int ret;
+
+ if (!overflow_maj) {
+ ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES,
+ "infiniband_cm");
+ if (ret) {
+ printk(KERN_ERR "ucm: couldn't register dynamic device number\n");
+ return ret;
+ }
+ }
+
+ ret = find_first_zero_bit(overflow_map, IB_UCM_MAX_DEVICES);
+ if (ret >= IB_UCM_MAX_DEVICES)
+ return -1;
+
+ return ret;
+}
+
static void ib_ucm_add_one(struct ib_device *device)
{
+ int devnum;
+ dev_t base;
struct ib_ucm_device *ucm_dev;
if (!device->alloc_ucontext ||
@@ -1248,16 +1277,25 @@ static void ib_ucm_add_one(struct ib_device *device)
ucm_dev->ib_dev = device;
- ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
- if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES)
+ devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
+ if (devnum >= IB_UCM_MAX_DEVICES) {
+ devnum = find_overflow_devnum();
+ if (devnum < 0)
goto err;
- set_bit(ucm_dev->devnum, dev_map);
+ ucm_dev->devnum = devnum + IB_UCM_MAX_DEVICES;
+ base = devnum + overflow_maj;
+ set_bit(devnum, overflow_map);
+ } else {
+ ucm_dev->devnum = devnum;
+ base = devnum + IB_UCM_BASE_DEV;
+ set_bit(devnum, dev_map);
+ }
cdev_init(&ucm_dev->cdev, &ucm_fops);
ucm_dev->cdev.owner = THIS_MODULE;
kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum);
- if (cdev_add(&ucm_dev->cdev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1))
+ if (cdev_add(&ucm_dev->cdev, base, 1))
goto err;
ucm_dev->dev.class = &cm_class;
@@ -1278,7 +1316,10 @@ err_dev:
device_unregister(&ucm_dev->dev);
err_cdev:
cdev_del(&ucm_dev->cdev);
- clear_bit(ucm_dev->devnum, dev_map);
+ if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
+ clear_bit(devnum, dev_map);
+ else
+ clear_bit(devnum, overflow_map);
err:
kfree(ucm_dev);
return;
@@ -1298,6 +1339,7 @@ static ssize_t show_abi_version(struct class *class, struct class_attribute *att
{
return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION);
}
+
static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
static int __init ib_ucm_init(void)
@@ -1337,6 +1379,8 @@ static void __exit ib_ucm_cleanup(void)
ib_unregister_client(&ucm_client);
class_remove_file(&cm_class, &class_attr_abi_version);
unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
+ if (overflow_maj)
+ unregister_chrdev_region(overflow_maj, IB_UCM_MAX_DEVICES);
idr_destroy(&ctx_id_table);
}
diff --git a/sys/ofed/drivers/infiniband/core/ucma.c b/sys/ofed/drivers/infiniband/core/ucma.c
index 23cbf7b..5f73b40 100644
--- a/sys/ofed/drivers/infiniband/core/ucma.c
+++ b/sys/ofed/drivers/infiniband/core/ucma.c
@@ -34,10 +34,13 @@
#include <linux/file.h>
#include <linux/mutex.h>
#include <linux/poll.h>
+#include <linux/sched.h>
#include <linux/idr.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/module.h>
#include <rdma/rdma_user_cm.h>
#include <rdma/ib_marshall.h>
@@ -48,9 +51,7 @@ MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
MODULE_LICENSE("Dual BSD/GPL");
-enum {
- UCMA_MAX_BACKLOG = 1024
-};
+static unsigned int max_backlog = 1024;
struct ucma_file {
struct mutex mut;
@@ -253,17 +254,17 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
if (!uevent)
return event->event == RDMA_CM_EVENT_CONNECT_REQUEST;
+ mutex_lock(&ctx->file->mut);
uevent->cm_id = cm_id;
ucma_set_event_context(ctx, event, uevent);
uevent->resp.event = event->event;
uevent->resp.status = event->status;
- if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB)
+ if (cm_id->qp_type == IB_QPT_UD)
ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud);
else
ucma_copy_conn_event(&uevent->resp.param.conn,
&event->param.conn);
- mutex_lock(&ctx->file->mut);
if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
if (!ctx->backlog) {
ret = -ENOMEM;
@@ -298,7 +299,6 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
struct rdma_ucm_get_event cmd;
struct ucma_event *uevent;
int ret = 0;
- DEFINE_WAIT(wait);
if (out_len < sizeof uevent->resp)
return -ENOSPC;
@@ -332,6 +332,7 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
ctx->cm_id = uevent->cm_id;
ctx->cm_id->context = ctx;
uevent->resp.id = ctx->id;
+ ctx->cm_id->ucontext = ctx;
}
if (copy_to_user((void __user *)(unsigned long)cmd.response,
@@ -350,13 +351,31 @@ done:
return ret;
}
-static ssize_t ucma_create_id(struct ucma_file *file,
- const char __user *inbuf,
+static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type)
+{
+ switch (cmd->ps) {
+ case RDMA_PS_TCP:
+ *qp_type = IB_QPT_RC;
+ return 0;
+ case RDMA_PS_UDP:
+ case RDMA_PS_IPOIB:
+ *qp_type = IB_QPT_UD;
+ return 0;
+ case RDMA_PS_IB:
+ *qp_type = cmd->qp_type;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
int in_len, int out_len)
{
struct rdma_ucm_create_id cmd;
struct rdma_ucm_create_id_resp resp;
struct ucma_context *ctx;
+ enum ib_qp_type qp_type;
int ret;
if (out_len < sizeof(resp))
@@ -365,6 +384,10 @@ static ssize_t ucma_create_id(struct ucma_file *file,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
+ ret = ucma_get_qp_type(&cmd, &qp_type);
+ if (ret)
+ return ret;
+
mutex_lock(&file->mut);
ctx = ucma_alloc_ctx(file);
mutex_unlock(&file->mut);
@@ -372,11 +395,12 @@ static ssize_t ucma_create_id(struct ucma_file *file,
return -ENOMEM;
ctx->uid = cmd.uid;
- ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps);
+ ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps, qp_type);
if (IS_ERR(ctx->cm_id)) {
ret = PTR_ERR(ctx->cm_id);
goto err1;
}
+ ctx->cm_id->ucontext = ctx;
resp.id = ctx->id;
if (copy_to_user((void __user *)(unsigned long)cmd.response,
@@ -409,24 +433,6 @@ static void ucma_cleanup_multicast(struct ucma_context *ctx)
mutex_unlock(&mut);
}
-static void ucma_cleanup_events(struct ucma_context *ctx)
-{
- struct ucma_event *uevent, *tmp;
-
- list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
- if (uevent->ctx != ctx)
- continue;
-
- list_del(&uevent->list);
-
- /* clear incoming connections. */
- if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
- rdma_destroy_id(uevent->cm_id);
-
- kfree(uevent);
- }
-}
-
static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
{
struct ucma_event *uevent, *tmp;
@@ -440,9 +446,16 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
}
}
+/*
+ * We cannot hold file->mut when calling rdma_destroy_id() or we can
+ * deadlock. We also acquire file->mut in ucma_event_handler(), and
+ * rdma_destroy_id() will wait until all callbacks have completed.
+ */
static int ucma_free_ctx(struct ucma_context *ctx)
{
int events_reported;
+ struct ucma_event *uevent, *tmp;
+ LIST_HEAD(list);
/* No new events will be generated after destroying the id. */
rdma_destroy_id(ctx->cm_id);
@@ -451,10 +464,20 @@ static int ucma_free_ctx(struct ucma_context *ctx)
/* Cleanup events not yet reported to the user. */
mutex_lock(&ctx->file->mut);
- ucma_cleanup_events(ctx);
+ list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
+ if (uevent->ctx == ctx)
+ list_move_tail(&uevent->list, &list);
+ }
list_del(&ctx->list);
mutex_unlock(&ctx->file->mut);
+ list_for_each_entry_safe(uevent, tmp, &list, list) {
+ list_del(&uevent->list);
+ if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
+ rdma_destroy_id(uevent->cm_id);
+ kfree(uevent);
+ }
+
events_reported = ctx->events_reported;
kfree(ctx);
return events_reported;
@@ -586,24 +609,14 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
struct rdma_route *route)
{
- struct rdma_dev_addr *dev_addr;
- struct net_device *dev;
- u16 vid = 0;
resp->num_paths = route->num_paths;
switch (route->num_paths) {
case 0:
- dev_addr = &route->addr.dev_addr;
- dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
- if (dev) {
- vid = rdma_vlan_dev_vlan_id(dev);
- dev_put(dev);
- }
-
- iboe_mac_vlan_to_ll((union ib_gid *) &resp->ib_route[0].dgid,
- dev_addr->dst_dev_addr, vid);
- iboe_addr_get_sgid(dev_addr,
- (union ib_gid *) &resp->ib_route[0].sgid);
+ rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr,
+ (union ib_gid *)&resp->ib_route[0].dgid);
+ rdma_ip2gid((struct sockaddr *)&route->addr.src_addr,
+ (union ib_gid *)&resp->ib_route[0].sgid);
resp->ib_route[0].pkey = cpu_to_be16(0xffff);
break;
case 2:
@@ -619,6 +632,16 @@ static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
}
}
+static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp,
+ struct rdma_route *route)
+{
+ struct rdma_dev_addr *dev_addr;
+
+ dev_addr = &route->addr.dev_addr;
+ rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid);
+ rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid);
+}
+
static ssize_t ucma_query_route(struct ucma_file *file,
const char __user *inbuf,
int in_len, int out_len)
@@ -653,8 +676,10 @@ static ssize_t ucma_query_route(struct ucma_file *file,
resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;
resp.port_num = ctx->cm_id->port_num;
- if (rdma_node_get_transport(ctx->cm_id->device->node_type) == RDMA_TRANSPORT_IB) {
- switch (rdma_port_get_link_layer(ctx->cm_id->device, ctx->cm_id->port_num)) {
+ switch (rdma_node_get_transport(ctx->cm_id->device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ switch (rdma_port_get_link_layer(ctx->cm_id->device,
+ ctx->cm_id->port_num)) {
case IB_LINK_LAYER_INFINIBAND:
ucma_copy_ib_route(&resp, &ctx->cm_id->route);
break;
@@ -664,6 +689,12 @@ static ssize_t ucma_query_route(struct ucma_file *file,
default:
break;
}
+ break;
+ case RDMA_TRANSPORT_IWARP:
+ ucma_copy_iw_route(&resp, &ctx->cm_id->route);
+ break;
+ default:
+ break;
}
out:
@@ -727,8 +758,8 @@ static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ctx->backlog = cmd.backlog > 0 && cmd.backlog < UCMA_MAX_BACKLOG ?
- cmd.backlog : UCMA_MAX_BACKLOG;
+ ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ?
+ cmd.backlog : max_backlog;
ret = rdma_listen(ctx->cm_id, ctx->backlog);
ucma_put_ctx(ctx);
return ret;
@@ -750,9 +781,12 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
return PTR_ERR(ctx);
if (cmd.conn_param.valid) {
- ctx->uid = cmd.uid;
ucma_copy_conn_param(&conn_param, &cmd.conn_param);
+ mutex_lock(&file->mut);
ret = rdma_accept(ctx->cm_id, &conn_param);
+ if (!ret)
+ ctx->uid = cmd.uid;
+ mutex_unlock(&file->mut);
} else
ret = rdma_accept(ctx->cm_id, NULL);
@@ -848,6 +882,20 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
}
rdma_set_service_type(ctx->cm_id, *((u8 *) optval));
break;
+ case RDMA_OPTION_ID_REUSEADDR:
+ if (optlen != sizeof(int)) {
+ ret = -EINVAL;
+ break;
+ }
+ ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0);
+ break;
+ case RDMA_OPTION_ID_AFONLY:
+ if (optlen != sizeof(int)) {
+ ret = -EINVAL;
+ break;
+ }
+ ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0);
+ break;
default:
ret = -ENOSYS;
}
@@ -887,12 +935,22 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
static int ucma_set_option_ib(struct ucma_context *ctx, int optname,
void *optval, size_t optlen)
{
- int ret;
+ int ret = 0;
switch (optname) {
case RDMA_OPTION_IB_PATH:
ret = ucma_set_ib_path(ctx, optval, optlen);
break;
+
+ case RDMA_OPTION_IB_APM:
+ if (optlen != sizeof(u8)) {
+ ret = -EINVAL;
+ break;
+ }
+ if (*(u8 *)optval)
+ ret = rdma_enable_apm(ctx->cm_id, RDMA_ALT_PATH_BEST);
+ break;
+
default:
ret = -ENOSYS;
}
@@ -937,20 +995,21 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
optval = kmalloc(cmd.optlen, GFP_KERNEL);
if (!optval) {
ret = -ENOMEM;
- goto out1;
+ goto err_ucma_put_ctx;
}
- if (copy_from_user(optval, (void __user *) (unsigned long) cmd.optval,
+ if (copy_from_user(optval, (void __user *)(unsigned long)cmd.optval,
cmd.optlen)) {
ret = -EFAULT;
- goto out2;
+ goto err_kfree;
}
ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval,
cmd.optlen);
-out2:
+
+err_kfree:
kfree(optval);
-out1:
+err_ucma_put_ctx:
ucma_put_ctx(ctx);
return ret;
}
@@ -1121,7 +1180,7 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
struct rdma_ucm_migrate_id cmd;
struct rdma_ucm_migrate_resp resp;
struct ucma_context *ctx;
- struct file *filp;
+ struct fd f;
struct ucma_file *cur_file;
int ret = 0;
@@ -1129,12 +1188,12 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
return -EFAULT;
/* Get current fd to protect against it being closed */
- filp = fget(cmd.fd);
- if (!filp)
+ f = fdget(cmd.fd);
+ if (!f.file)
return -ENOENT;
/* Validate current fd and prevent destruction of id. */
- ctx = ucma_get_ctx(filp->private_data, cmd.id);
+ ctx = ucma_get_ctx(f.file->private_data, cmd.id);
if (IS_ERR(ctx)) {
ret = PTR_ERR(ctx);
goto file_put;
@@ -1168,7 +1227,7 @@ response:
ucma_put_ctx(ctx);
file_put:
- fput(filp);
+ fdput(f);
return ret;
}
@@ -1209,7 +1268,7 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,
if (copy_from_user(&hdr, buf, sizeof(hdr)))
return -EFAULT;
- if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
+ if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
return -EINVAL;
if (hdr.in + sizeof(hdr) > len)
@@ -1261,7 +1320,8 @@ static int ucma_open(struct inode *inode, struct file *filp)
filp->private_data = file;
file->filp = filp;
- return 0;
+
+ return nonseekable_open(inode, filp);
}
static int ucma_close(struct inode *inode, struct file *filp)
@@ -1291,11 +1351,14 @@ static const struct file_operations ucma_fops = {
.release = ucma_close,
.write = ucma_write,
.poll = ucma_poll,
+ .llseek = no_llseek,
};
static struct miscdevice ucma_misc = {
.minor = MISC_DYNAMIC_MINOR,
.name = "rdma_cm",
+ .nodename = "infiniband/rdma_cm",
+ .mode = 0666,
.fops = &ucma_fops,
};
@@ -1318,10 +1381,11 @@ static int __init ucma_init(void)
ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version);
if (ret) {
printk(KERN_ERR "rdma_ucm: couldn't create abi_version attr\n");
- goto err;
+ goto err1;
}
+
return 0;
-err:
+err1:
misc_deregister(&ucma_misc);
return ret;
}
diff --git a/sys/ofed/drivers/infiniband/core/ud_header.c b/sys/ofed/drivers/infiniband/core/ud_header.c
index 09fc1ff..051d3bd 100644
--- a/sys/ofed/drivers/infiniband/core/ud_header.c
+++ b/sys/ofed/drivers/infiniband/core/ud_header.c
@@ -33,6 +33,7 @@
#include <linux/errno.h>
#include <linux/string.h>
+#include <linux/module.h>
#include <linux/if_ether.h>
#include <rdma/ib_pack.h>
@@ -230,32 +231,28 @@ void ib_ud_header_init(int payload_bytes,
int immediate_present,
struct ib_ud_header *header)
{
- u16 packet_length = 0;
-
memset(header, 0, sizeof *header);
if (lrh_present) {
+ u16 packet_length = 0;
+
header->lrh.link_version = 0;
header->lrh.link_next_header =
grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL;
- packet_length = IB_LRH_BYTES;
+ packet_length = (IB_LRH_BYTES +
+ IB_BTH_BYTES +
+ IB_DETH_BYTES +
+ (grh_present ? IB_GRH_BYTES : 0) +
+ payload_bytes +
+ 4 + /* ICRC */
+ 3) / 4; /* round up */
+ header->lrh.packet_length = cpu_to_be16(packet_length);
}
- if (eth_present) {
- if (vlan_present) {
+ if (vlan_present)
header->eth.type = cpu_to_be16(ETH_P_8021Q);
- packet_length += IB_VLAN_BYTES;
- }
- packet_length += IB_ETH_BYTES;
- }
-
- packet_length += IB_BTH_BYTES + IB_DETH_BYTES + payload_bytes +
- 4 + /* ICRC */
- 3; /* round up */
- packet_length /= 4;
if (grh_present) {
- packet_length += IB_GRH_BYTES / 4;
header->grh.ip_version = 6;
header->grh.payload_length =
cpu_to_be16((IB_BTH_BYTES +
@@ -266,9 +263,6 @@ void ib_ud_header_init(int payload_bytes,
header->grh.next_header = 0x1b;
}
- if (lrh_present)
- header->lrh.packet_length = cpu_to_be16(packet_length);
-
if (immediate_present)
header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
else
@@ -285,36 +279,6 @@ void ib_ud_header_init(int payload_bytes,
EXPORT_SYMBOL(ib_ud_header_init);
/**
- * ib_lrh_header_pack - Pack LRH header struct into wire format
- * @lrh:unpacked LRH header struct
- * @buf:Buffer to pack into
- *
- * ib_lrh_header_pack() packs the LRH header structure @lrh into
- * wire format in the buffer @buf.
- */
-int ib_lrh_header_pack(struct ib_unpacked_lrh *lrh, void *buf)
-{
- ib_pack(lrh_table, ARRAY_SIZE(lrh_table), lrh, buf);
- return 0;
-}
-EXPORT_SYMBOL(ib_lrh_header_pack);
-
-/**
- * ib_lrh_header_unpack - Unpack LRH structure from wire format
- * @lrh:unpacked LRH header struct
- * @buf:Buffer to pack into
- *
- * ib_lrh_header_unpack() unpacks the LRH header structure from
- * wire format (in buf) into @lrh.
- */
-int ib_lrh_header_unpack(void *buf, struct ib_unpacked_lrh *lrh)
-{
- ib_unpack(lrh_table, ARRAY_SIZE(lrh_table), buf, lrh);
- return 0;
-}
-EXPORT_SYMBOL(ib_lrh_header_unpack);
-
-/**
* ib_ud_header_pack - Pack UD header struct into wire format
* @header:UD header struct
* @buf:Buffer to pack into
@@ -337,14 +301,11 @@ int ib_ud_header_pack(struct ib_ud_header *header,
&header->eth, buf + len);
len += IB_ETH_BYTES;
}
-
-
if (header->vlan_present) {
ib_pack(vlan_table, ARRAY_SIZE(vlan_table),
&header->vlan, buf + len);
len += IB_VLAN_BYTES;
}
-
if (header->grh_present) {
ib_pack(grh_table, ARRAY_SIZE(grh_table),
&header->grh, buf + len);
diff --git a/sys/ofed/drivers/infiniband/core/umem.c b/sys/ofed/drivers/infiniband/core/umem.c
index 7695a21..cdd2e67 100644
--- a/sys/ofed/drivers/infiniband/core/umem.c
+++ b/sys/ofed/drivers/infiniband/core/umem.c
@@ -35,109 +35,168 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/sched.h>
-#ifdef __linux__
-#include <linux/hugetlb.h>
-#endif
#include <linux/dma-attrs.h>
-
+#include <linux/slab.h>
+#include <linux/module.h>
#include <sys/priv.h>
-#include <sys/resource.h>
#include <sys/resourcevar.h>
-
-#include <vm/vm.h>
-#include <vm/vm_map.h>
-#include <vm/vm_object.h>
#include <vm/vm_pageout.h>
-
+#include <vm/vm_map.h>
#include "uverbs.h"
-static int allow_weak_ordering;
-module_param(allow_weak_ordering, bool, 0444);
-MODULE_PARM_DESC(allow_weak_ordering, "Allow weak ordering for data registered memory");
+#define IB_UMEM_MAX_PAGE_CHUNK (PAGE_SIZE / sizeof (struct page *))
-#define IB_UMEM_MAX_PAGE_CHUNK \
- ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \
- ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \
- (void *) &((struct ib_umem_chunk *) 0)->page_list[0]))
-
-#ifdef __ia64__
-extern int dma_map_sg_hp_wa;
+static int allow_weak_ordering;
+module_param_named(weak_ordering, allow_weak_ordering, int, 0444);
+MODULE_PARM_DESC(weak_ordering, "Allow weak ordering for data registered memory");
-static int dma_map_sg_ia64(struct ib_device *ibdev,
- struct scatterlist *sg,
- int nents,
- enum dma_data_direction dir)
+static struct ib_umem *peer_umem_get(struct ib_peer_memory_client *ib_peer_mem,
+ struct ib_umem *umem, unsigned long addr,
+ int dmasync, int invalidation_supported)
{
- int i, rc, j, lents = 0;
- struct device *dev;
-
- if (!dma_map_sg_hp_wa)
- return ib_dma_map_sg(ibdev, sg, nents, dir);
+ int ret;
+ const struct peer_memory_client *peer_mem = ib_peer_mem->peer_mem;
+ struct invalidation_ctx *invalidation_ctx = NULL;
- dev = ibdev->dma_device;
- for (i = 0; i < nents; ++i) {
- rc = dma_map_sg(dev, sg + i, 1, dir);
- if (rc <= 0) {
- for (j = 0; j < i; ++j)
- dma_unmap_sg(dev, sg + j, 1, dir);
+ umem->ib_peer_mem = ib_peer_mem;
+ if (invalidation_supported) {
+ invalidation_ctx = kzalloc(sizeof(*invalidation_ctx), GFP_KERNEL);
+ if (!invalidation_ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ umem->invalidation_ctx = invalidation_ctx;
+ invalidation_ctx->umem = umem;
+ mutex_lock(&ib_peer_mem->lock);
+ invalidation_ctx->context_ticket =
+ ib_peer_insert_context(ib_peer_mem, invalidation_ctx);
+ /* unlock before calling get pages to prevent a dead-lock from the callback */
+ mutex_unlock(&ib_peer_mem->lock);
+ }
- return 0;
+ ret = peer_mem->get_pages(addr, umem->length, umem->writable, 1,
+ &umem->sg_head,
+ umem->peer_mem_client_context,
+ invalidation_ctx ?
+ (void *)invalidation_ctx->context_ticket : NULL);
+
+ if (invalidation_ctx) {
+ /* taking the lock back, checking that wasn't invalidated at that time */
+ mutex_lock(&ib_peer_mem->lock);
+ if (invalidation_ctx->peer_invalidated) {
+ printk(KERN_ERR "peer_umem_get: pages were invalidated by peer\n");
+ ret = -EINVAL;
}
- lents += rc;
}
- return lents;
+ if (ret)
+ goto out;
+
+ umem->page_size = peer_mem->get_page_size
+ (umem->peer_mem_client_context);
+ if (umem->page_size <= 0)
+ goto put_pages;
+
+ umem->offset = addr & ((unsigned long)umem->page_size - 1);
+ ret = peer_mem->dma_map(&umem->sg_head,
+ umem->peer_mem_client_context,
+ umem->context->device->dma_device,
+ dmasync,
+ &umem->nmap);
+ if (ret)
+ goto put_pages;
+
+ ib_peer_mem->stats.num_reg_pages +=
+ umem->nmap * (umem->page_size >> PAGE_SHIFT);
+ ib_peer_mem->stats.num_alloc_mrs += 1;
+ return umem;
+
+put_pages:
+
+ peer_mem->put_pages(umem->peer_mem_client_context,
+ &umem->sg_head);
+out:
+ if (invalidation_ctx) {
+ ib_peer_remove_context(ib_peer_mem, invalidation_ctx->context_ticket);
+ mutex_unlock(&umem->ib_peer_mem->lock);
+ kfree(invalidation_ctx);
+ }
+
+ ib_put_peer_client(ib_peer_mem, umem->peer_mem_client_context,
+ umem->peer_mem_srcu_key);
+ kfree(umem);
+ return ERR_PTR(ret);
}
-static void dma_unmap_sg_ia64(struct ib_device *ibdev,
- struct scatterlist *sg,
- int nents,
- enum dma_data_direction dir)
+static void peer_umem_release(struct ib_umem *umem)
{
- int i;
- struct device *dev;
-
- if (!dma_map_sg_hp_wa)
- return ib_dma_unmap_sg(ibdev, sg, nents, dir);
+ struct ib_peer_memory_client *ib_peer_mem = umem->ib_peer_mem;
+ const struct peer_memory_client *peer_mem = ib_peer_mem->peer_mem;
+ struct invalidation_ctx *invalidation_ctx = umem->invalidation_ctx;
+
+ if (invalidation_ctx) {
+
+ int peer_callback;
+ int inflight_invalidation;
+ /* If we are not under peer callback we must take the lock before removing
+ * core ticket from the tree and releasing its umem.
+ * It will let any inflight callbacks to be ended safely.
+ * If we are under peer callback or under error flow of reg_mr so that context
+ * wasn't activated yet lock was already taken.
+ */
+ if (invalidation_ctx->func && !invalidation_ctx->peer_callback)
+ mutex_lock(&ib_peer_mem->lock);
+ ib_peer_remove_context(ib_peer_mem, invalidation_ctx->context_ticket);
+ /* make sure to check inflight flag after took the lock and remove from tree.
+ * in addition, from that point using local variables for peer_callback and
+ * inflight_invalidation as after the complete invalidation_ctx can't be accessed
+ * any more as it may be freed by the callback.
+ */
+ peer_callback = invalidation_ctx->peer_callback;
+ inflight_invalidation = invalidation_ctx->inflight_invalidation;
+ if (inflight_invalidation)
+ complete(&invalidation_ctx->comp);
+ /* On peer callback lock is handled externally */
+ if (!peer_callback)
+ /* unlocking before put_pages */
+ mutex_unlock(&ib_peer_mem->lock);
+ /* in case under callback context or callback is pending let it free the invalidation context */
+ if (!peer_callback && !inflight_invalidation)
+ kfree(invalidation_ctx);
+ }
- dev = ibdev->dma_device;
- for (i = 0; i < nents; ++i)
- dma_unmap_sg(dev, sg + i, 1, dir);
-}
+ peer_mem->dma_unmap(&umem->sg_head,
+ umem->peer_mem_client_context,
+ umem->context->device->dma_device);
+ peer_mem->put_pages(&umem->sg_head,
+ umem->peer_mem_client_context);
+
+ ib_peer_mem->stats.num_dereg_pages +=
+ umem->nmap * (umem->page_size >> PAGE_SHIFT);
+ ib_peer_mem->stats.num_dealloc_mrs += 1;
+ ib_put_peer_client(ib_peer_mem, umem->peer_mem_client_context,
+ umem->peer_mem_srcu_key);
+ kfree(umem);
-#define ib_dma_map_sg(dev, sg, nents, dir) dma_map_sg_ia64(dev, sg, nents, dir)
-#define ib_dma_unmap_sg(dev, sg, nents, dir) dma_unmap_sg_ia64(dev, sg, nents, dir)
+ return;
-#endif
+}
static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
{
-#ifdef __linux__
- struct ib_umem_chunk *chunk, *tmp;
- int i;
- list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
- ib_dma_unmap_sg_attrs(dev, chunk->page_list,
- chunk->nents, DMA_BIDIRECTIONAL, &chunk->attrs);
- for (i = 0; i < chunk->nents; ++i) {
- struct page *page = sg_page(&chunk->page_list[i]);
- if (umem->writable && dirty)
- set_page_dirty_lock(page);
- put_page(page);
- }
- kfree(chunk);
- }
-#else
- struct ib_umem_chunk *chunk, *tmp;
vm_object_t object;
+ struct scatterlist *sg;
+ struct page *page;
int i;
object = NULL;
- list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
- ib_dma_unmap_sg_attrs(dev, chunk->page_list,
- chunk->nents, DMA_BIDIRECTIONAL, &chunk->attrs);
- for (i = 0; i < chunk->nents; ++i) {
- struct page *page = sg_page(&chunk->page_list[i]);
+ if (umem->nmap > 0)
+ ib_dma_unmap_sg(dev, umem->sg_head.sgl,
+ umem->nmap,
+ DMA_BIDIRECTIONAL);
+ for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
+ page = sg_page(sg);
if (umem->writable && dirty) {
if (object && object != page->object)
VM_OBJECT_WUNLOCK(object);
@@ -148,14 +207,26 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
vm_page_dirty(page);
}
}
- kfree(chunk);
- }
+ sg_free_table(&umem->sg_head);
if (object)
VM_OBJECT_WUNLOCK(object);
-#endif
}
+void ib_umem_activate_invalidation_notifier(struct ib_umem *umem,
+ umem_invalidate_func_t func,
+ void *cookie)
+{
+ struct invalidation_ctx *invalidation_ctx = umem->invalidation_ctx;
+
+ invalidation_ctx->func = func;
+ invalidation_ctx->cookie = cookie;
+
+ /* from that point any pending invalidations can be called */
+ mutex_unlock(&umem->ib_peer_mem->lock);
+ return;
+}
+EXPORT_SYMBOL(ib_umem_activate_invalidation_notifier);
/**
* ib_umem_get - Pin and DMA map userspace memory.
* @context: userspace context to pin memory for
@@ -164,163 +235,23 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
* @access: IB_ACCESS_xxx flags for memory being pinned
* @dmasync: flush in-flight DMA when the memory region is written
*/
-struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
- size_t size, int access, int dmasync)
+struct ib_umem *ib_umem_get_ex(struct ib_ucontext *context, unsigned long addr,
+ size_t size, int access, int dmasync,
+ int invalidation_supported)
{
-#ifdef __linux__
- struct ib_umem *umem;
- struct page **page_list;
- struct vm_area_struct **vma_list;
- struct ib_umem_chunk *chunk;
- unsigned long locked;
- unsigned long lock_limit;
- unsigned long cur_base;
- unsigned long npages;
- int ret;
- int off;
- int i;
- DEFINE_DMA_ATTRS(attrs);
-
- if (dmasync)
- dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
- else if (allow_weak_ordering)
- dma_set_attr(DMA_ATTR_WEAK_ORDERING, &attrs);
-
- if (!can_do_mlock())
- return ERR_PTR(-EPERM);
- umem = kmalloc(sizeof *umem, GFP_KERNEL);
- if (!umem)
- return ERR_PTR(-ENOMEM);
-
- umem->context = context;
- umem->length = size;
- umem->offset = addr & ~PAGE_MASK;
- umem->page_size = PAGE_SIZE;
- /*
- * We ask for writable memory if any access flags other than
- * "remote read" are set. "Local write" and "remote write"
- * obviously require write access. "Remote atomic" can do
- * things like fetch and add, which will modify memory, and
- * "MW bind" can change permissions by binding a window.
- */
- umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
-
- /* We assume the memory is from hugetlb until proved otherwise */
- umem->hugetlb = 1;
-
- INIT_LIST_HEAD(&umem->chunk_list);
-
- page_list = (struct page **) __get_free_page(GFP_KERNEL);
- if (!page_list) {
- kfree(umem);
- return ERR_PTR(-ENOMEM);
- }
-
- /*
- * if we can't alloc the vma_list, it's not so bad;
- * just assume the memory is not hugetlb memory
- */
- vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL);
- if (!vma_list)
- umem->hugetlb = 0;
-
- npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
-
- down_write(&current->mm->mmap_sem);
-
- locked = npages + current->mm->locked_vm;
- lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
-
- if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
- ret = -ENOMEM;
- goto out;
- }
-
- cur_base = addr & PAGE_MASK;
-
- ret = 0;
-
- while (npages) {
- ret = get_user_pages(current, current->mm, cur_base,
- min_t(unsigned long, npages,
- PAGE_SIZE / sizeof (struct page *)),
- 1, !umem->writable, page_list, vma_list);
-
- if (ret < 0)
- goto out;
-
- cur_base += ret * PAGE_SIZE;
- npages -= ret;
-
- off = 0;
-
- while (ret) {
- chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) *
- min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK),
- GFP_KERNEL);
- if (!chunk) {
- ret = -ENOMEM;
- goto out;
- }
-
- chunk->attrs = attrs;
- chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
- sg_init_table(chunk->page_list, chunk->nents);
- for (i = 0; i < chunk->nents; ++i) {
- if (vma_list &&
- !is_vm_hugetlb_page(vma_list[i + off]))
- umem->hugetlb = 0;
- sg_set_page(&chunk->page_list[i], page_list[i + off], PAGE_SIZE, 0);
- }
-
- chunk->nmap = ib_dma_map_sg_attrs(context->device,
- &chunk->page_list[0],
- chunk->nents,
- DMA_BIDIRECTIONAL,
- &attrs);
- if (chunk->nmap <= 0) {
- for (i = 0; i < chunk->nents; ++i)
- put_page(sg_page(&chunk->page_list[i]));
- kfree(chunk);
-
- ret = -ENOMEM;
- goto out;
- }
-
- ret -= chunk->nents;
- off += chunk->nents;
- list_add_tail(&chunk->list, &umem->chunk_list);
- }
-
- ret = 0;
- }
-
-out:
- if (ret < 0) {
- __ib_umem_release(context->device, umem, 0);
- kfree(umem);
- } else
- current->mm->locked_vm = locked;
-
- up_write(&current->mm->mmap_sem);
- if (vma_list)
- free_page((unsigned long) vma_list);
- free_page((unsigned long) page_list);
-
- return ret < 0 ? ERR_PTR(ret) : umem;
-#else
struct ib_umem *umem;
- struct ib_umem_chunk *chunk;
struct proc *proc;
pmap_t pmap;
vm_offset_t end, last, start;
vm_size_t npages;
int error;
- int ents;
int ret;
+ int ents;
int i;
DEFINE_DMA_ATTRS(attrs);
+ struct scatterlist *sg, *sg_list_start;
+ int need_release = 0;
error = priv_check(curthread, PRIV_VM_MLOCK);
if (error)
@@ -372,76 +303,86 @@ out:
* "MW bind" can change permissions by binding a window.
*/
umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
+
+ if (invalidation_supported || context->peer_mem_private_data) {
+
+ struct ib_peer_memory_client *peer_mem_client;
+
+ peer_mem_client = ib_get_peer_client(context, addr, size,
+ &umem->peer_mem_client_context,
+ &umem->peer_mem_srcu_key);
+ if (peer_mem_client)
+ return peer_umem_get(peer_mem_client, umem, addr,
+ dmasync, invalidation_supported);
+ }
+
umem->hugetlb = 0;
- INIT_LIST_HEAD(&umem->chunk_list);
pmap = vm_map_pmap(&proc->p_vmspace->vm_map);
- ret = 0;
- while (npages) {
- ents = min_t(int, npages, IB_UMEM_MAX_PAGE_CHUNK);
- chunk = kmalloc(sizeof(*chunk) +
- (sizeof(struct scatterlist) * ents),
- GFP_KERNEL);
- if (!chunk) {
- ret = -ENOMEM;
+
+ if (npages == 0) {
+ ret = -EINVAL;
goto out;
}
- chunk->attrs = attrs;
- chunk->nents = ents;
- sg_init_table(&chunk->page_list[0], ents);
- for (i = 0; i < chunk->nents; ++i) {
+ ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
+ if (ret)
+ goto out;
+
+ need_release = 1;
+ sg_list_start = umem->sg_head.sgl;
+
+ while (npages) {
+
+ ents = min_t(int, npages, IB_UMEM_MAX_PAGE_CHUNK);
+ umem->npages += ents;
+
+ for_each_sg(sg_list_start, sg, ents, i) {
vm_paddr_t pa;
pa = pmap_extract(pmap, start);
if (pa == 0) {
ret = -ENOMEM;
- kfree(chunk);
goto out;
}
- sg_set_page(&chunk->page_list[i], PHYS_TO_VM_PAGE(pa),
+ sg_set_page(sg, PHYS_TO_VM_PAGE(pa),
PAGE_SIZE, 0);
npages--;
start += PAGE_SIZE;
}
- chunk->nmap = ib_dma_map_sg_attrs(context->device,
- &chunk->page_list[0],
- chunk->nents,
+ /* preparing for next loop */
+ sg_list_start = sg;
+ }
+
+ umem->nmap = ib_dma_map_sg_attrs(context->device,
+ umem->sg_head.sgl,
+ umem->npages,
DMA_BIDIRECTIONAL,
&attrs);
- if (chunk->nmap != chunk->nents) {
- kfree(chunk);
+ if (umem->nmap != umem->npages) {
ret = -ENOMEM;
goto out;
}
- list_add_tail(&chunk->list, &umem->chunk_list);
- }
-
out:
if (ret < 0) {
+ if (need_release)
__ib_umem_release(context->device, umem, 0);
kfree(umem);
}
return ret < 0 ? ERR_PTR(ret) : umem;
-#endif
}
-EXPORT_SYMBOL(ib_umem_get);
+EXPORT_SYMBOL(ib_umem_get_ex);
-#ifdef __linux__
-static void ib_umem_account(struct work_struct *work)
+struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
+ size_t size, int access, int dmasync)
{
- struct ib_umem *umem = container_of(work, struct ib_umem, work);
-
- down_write(&umem->mm->mmap_sem);
- umem->mm->locked_vm -= umem->diff;
- up_write(&umem->mm->mmap_sem);
- mmput(umem->mm);
- kfree(umem);
+ return ib_umem_get_ex(context, addr,
+ size, access, dmasync, 0);
}
-#endif
+EXPORT_SYMBOL(ib_umem_get);
/**
* ib_umem_release - release memory pinned with ib_umem_get
@@ -449,57 +390,28 @@ static void ib_umem_account(struct work_struct *work)
*/
void ib_umem_release(struct ib_umem *umem)
{
-#ifdef __linux__
- struct ib_ucontext *context = umem->context;
- struct mm_struct *mm;
- unsigned long diff;
-
- __ib_umem_release(umem->context->device, umem, 1);
- mm = get_task_mm(current);
- if (!mm) {
- kfree(umem);
- return;
- }
-
- diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
-
- /*
- * We may be called with the mm's mmap_sem already held. This
- * can happen when a userspace munmap() is the call that drops
- * the last reference to our file and calls our release
- * method. If there are memory regions to destroy, we'll end
- * up here and not be able to take the mmap_sem. In that case
- * we defer the vm_locked accounting to the system workqueue.
- */
- if (context->closing) {
- if (!down_write_trylock(&mm->mmap_sem)) {
- INIT_WORK(&umem->work, ib_umem_account);
- umem->mm = mm;
- umem->diff = diff;
-
- schedule_work(&umem->work);
- return;
- }
- } else
- down_write(&mm->mmap_sem);
-
- current->mm->locked_vm -= diff;
- up_write(&mm->mmap_sem);
- mmput(mm);
-#else
vm_offset_t addr, end, last, start;
vm_size_t size;
int error;
+ if (umem->ib_peer_mem) {
+ peer_umem_release(umem);
+ return;
+ }
+
__ib_umem_release(umem->context->device, umem, 1);
+
if (umem->context->closing) {
kfree(umem);
return;
}
+
error = priv_check(curthread, PRIV_VM_MUNLOCK);
+
if (error)
return;
+
addr = umem->start;
size = umem->length;
last = addr + size;
@@ -507,69 +419,24 @@ void ib_umem_release(struct ib_umem *umem)
end = roundup2(last, PAGE_SIZE); /* Use PAGE_MASK safe operation. */
vm_map_unwire(&curthread->td_proc->p_vmspace->vm_map, start, end,
VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
-
-#endif
kfree(umem);
+
}
EXPORT_SYMBOL(ib_umem_release);
int ib_umem_page_count(struct ib_umem *umem)
{
- struct ib_umem_chunk *chunk;
int shift;
int i;
int n;
+ struct scatterlist *sg;
shift = ilog2(umem->page_size);
n = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list)
- for (i = 0; i < chunk->nmap; ++i)
- n += sg_dma_len(&chunk->page_list[i]) >> shift;
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
+ n += sg_dma_len(sg) >> shift;
return n;
}
EXPORT_SYMBOL(ib_umem_page_count);
-
-/**********************************************/
-/*
- * Stub functions for contiguous pages -
- * We currently do not support this feature
- */
-/**********************************************/
-
-/**
- * ib_cmem_release_contiguous_pages - release memory allocated by
- * ib_cmem_alloc_contiguous_pages.
- * @cmem: cmem struct to release
- */
-void ib_cmem_release_contiguous_pages(struct ib_cmem *cmem)
-{
-}
-EXPORT_SYMBOL(ib_cmem_release_contiguous_pages);
-
-/**
- * * ib_cmem_alloc_contiguous_pages - allocate contiguous pages
- * * @context: userspace context to allocate memory for
- * * @total_size: total required size for that allocation.
- * * @page_size_order: order of one contiguous page.
- * */
-struct ib_cmem *ib_cmem_alloc_contiguous_pages(struct ib_ucontext *context,
- unsigned long total_size,
- unsigned long page_size_order)
-{
- return NULL;
-}
-EXPORT_SYMBOL(ib_cmem_alloc_contiguous_pages);
-
-/**
- * * ib_cmem_map_contiguous_pages_to_vma - map contiguous pages into VMA
- * * @ib_cmem: cmem structure returned by ib_cmem_alloc_contiguous_pages
- * * @vma: VMA to inject pages into.
- * */
-int ib_cmem_map_contiguous_pages_to_vma(struct ib_cmem *ib_cmem,
- struct vm_area_struct *vma)
-{
- return 0;
-}
-EXPORT_SYMBOL(ib_cmem_map_contiguous_pages_to_vma);
diff --git a/sys/ofed/drivers/infiniband/core/user_mad.c b/sys/ofed/drivers/infiniband/core/user_mad.c
index 161c65f..cc4a659 100644
--- a/sys/ofed/drivers/infiniband/core/user_mad.c
+++ b/sys/ofed/drivers/infiniband/core/user_mad.c
@@ -43,7 +43,9 @@
#include <linux/mutex.h>
#include <linux/kref.h>
#include <linux/compat.h>
+#include <linux/sched.h>
#include <linux/semaphore.h>
+#include <linux/slab.h>
#include <asm/uaccess.h>
@@ -63,12 +65,9 @@ enum {
};
/*
- * Our lifetime rules for these structs are the following: each time a
- * device special file is opened, we look up the corresponding struct
- * ib_umad_port by minor in the umad_port[] table while holding the
- * port_lock. If this lookup succeeds, we take a reference on the
- * ib_umad_port's struct ib_umad_device while still holding the
- * port_lock; if the lookup fails, we fail the open(). We drop these
+ * Our lifetime rules for these structs are the following:
+ * device special file is opened, we take a reference on the
+ * ib_umad_port's struct ib_umad_device. We drop these
* references in the corresponding close().
*
* In addition to references coming from open character devices, there
@@ -76,12 +75,7 @@ enum {
* module's reference taken when allocating the ib_umad_device in
* ib_umad_add_one().
*
- * When destroying an ib_umad_device, we clear all of its
- * ib_umad_ports from umad_port[] while holding port_lock before
- * dropping the module's reference to the ib_umad_device. This is
- * always safe because any open() calls will either succeed and obtain
- * a reference before we clear the umad_port[] entries, or fail after
- * we clear the umad_port[] entries.
+ * When destroying an ib_umad_device, we drop the module's reference.
*/
struct ib_umad_port {
@@ -99,6 +93,7 @@ struct ib_umad_port {
struct ib_umad_device *umad_dev;
int dev_num;
u8 port_num;
+ struct list_head port_lst;
};
struct ib_umad_device {
@@ -135,18 +130,85 @@ static struct class *umad_class;
static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
static DEFINE_SPINLOCK(port_lock);
-static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS];
static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
+static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS);
static void ib_umad_add_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device);
-static void ib_umad_release_dev(struct kref *ref)
+static DEFINE_SPINLOCK(ports_list_lock);
+static struct list_head ports_list;
+
+
+static void remove_ports(struct kref *ref)
+{
+ int i;
+ struct ib_umad_port *p, *p1;
+ struct ib_umad_device *dev =
+ container_of(ref, struct ib_umad_device, ref);
+
+ for (i = 0; i <= dev->end_port - dev->start_port; ++i) {
+ struct ib_umad_port *port = &dev->port[i];
+
+ list_for_each_entry_safe(p, p1, &ports_list, port_lst)
+ if (p == port) {
+ list_del(&p->port_lst);
+ break;
+ }
+ }
+}
+
+static void put_umad_dev(struct kref *ref)
{
+ int ret, i;
struct ib_umad_device *dev =
container_of(ref, struct ib_umad_device, ref);
+ spin_lock(&ports_list_lock);
+ ret = (kref_put(ref, remove_ports));
+ spin_unlock(&ports_list_lock);
+ if (ret) {
+ for (i = 0; i <= dev->end_port - dev->start_port; ++i) {
+ if (dev->port[i].dev_num < IB_UMAD_MAX_PORTS)
+ clear_bit(dev->port[i].dev_num, dev_map);
+ else
+ clear_bit(dev->port[i].dev_num - IB_UMAD_MAX_PORTS, overflow_map);
+ cdev_del(dev->port[i].cdev);
+ cdev_del(dev->port[i].sm_cdev);
+ }
kfree(dev);
+ }
+}
+
+static void release_port(struct ib_umad_port *port)
+{
+ put_umad_dev(&port->umad_dev->ref);
+}
+
+
+static struct ib_umad_port *get_port(struct cdev *cdev)
+{
+ struct ib_umad_port *port;
+
+ spin_lock(&ports_list_lock);
+ list_for_each_entry(port, &ports_list, port_lst) {
+ if (port->cdev == cdev || port->sm_cdev == cdev) {
+ kref_get(&port->umad_dev->ref);
+ spin_unlock(&ports_list_lock);
+
+ return port;
+ }
+ }
+ spin_unlock(&ports_list_lock);
+
+ return NULL;
+}
+
+static void insert_port(struct ib_umad_port *port)
+{
+ spin_lock(&ports_list_lock);
+ list_add(&port->port_lst, &ports_list);
+ spin_unlock(&ports_list_lock);
}
static int hdr_size(struct ib_umad_file *file)
@@ -466,8 +528,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
goto err;
}
- if (packet->mad.hdr.id < 0 ||
- packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) {
+ if (packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) {
ret = -EINVAL;
goto err;
}
@@ -679,7 +740,7 @@ found:
file->already_used = 1;
if (!file->use_pkey_index) {
printk(KERN_WARNING "user_mad: process %s did not enable "
- "P_Key index support.\n", curproc->p_comm);
+ "P_Key index support.\n", curthread->td_proc->p_comm);
printk(KERN_WARNING "user_mad: Documentation/infiniband/user_mad.txt "
"has info on the new ABI.\n");
}
@@ -711,7 +772,7 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
mutex_lock(&file->port->file_mutex);
mutex_lock(&file->mutex);
- if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
+ if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
ret = -EINVAL;
goto out;
}
@@ -779,41 +840,33 @@ static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd,
/*
* ib_umad_open() does not need the BKL:
*
- * - umad_port[] accesses are protected by port_lock, the
- * ib_umad_port structures are properly reference counted, and
+ * - the ib_umad_port structures are properly reference counted, and
* everything else is purely local to the file being created, so
* races against other open calls are not a problem;
* - the ioctl method does not affect any global state outside of the
* file structure being operated on;
- * - the port is added to umad_port[] as the last part of module
- * initialization so the open method will either immediately run
- * -ENXIO, or all required initialization will be done.
*/
static int ib_umad_open(struct inode *inode, struct file *filp)
{
struct ib_umad_port *port;
struct ib_umad_file *file;
- int ret = 0;
-
- spin_lock(&port_lock);
- port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE];
- if (port)
- kref_get(&port->umad_dev->ref);
- spin_unlock(&port_lock);
+ int ret;
+ port = get_port(inode->i_cdev->si_drv1);
if (!port)
return -ENXIO;
mutex_lock(&port->file_mutex);
if (!port->ib_dev) {
+ release_port(port);
ret = -ENXIO;
goto out;
}
file = kzalloc(sizeof *file, GFP_KERNEL);
if (!file) {
- kref_put(&port->umad_dev->ref, ib_umad_release_dev);
+ release_port(port);
ret = -ENOMEM;
goto out;
}
@@ -830,6 +883,8 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
list_add_tail(&file->port_list, &port->file_list);
+ ret = nonseekable_open(inode, filp);
+
out:
mutex_unlock(&port->file_mutex);
return ret;
@@ -838,7 +893,7 @@ out:
static int ib_umad_close(struct inode *inode, struct file *filp)
{
struct ib_umad_file *file = filp->private_data;
- struct ib_umad_device *dev = file->port->umad_dev;
+ struct ib_umad_port *port = file->port;
struct ib_umad_packet *packet, *tmp;
int already_dead;
int i;
@@ -867,7 +922,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
mutex_unlock(&file->port->file_mutex);
kfree(file);
- kref_put(&dev->ref, ib_umad_release_dev);
+ release_port(port);
return 0;
}
@@ -882,7 +937,8 @@ static const struct file_operations umad_fops = {
.compat_ioctl = ib_umad_compat_ioctl,
#endif
.open = ib_umad_open,
- .release = ib_umad_close
+ .release = ib_umad_close,
+ .llseek = no_llseek,
};
static int ib_umad_sm_open(struct inode *inode, struct file *filp)
@@ -893,12 +949,7 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)
};
int ret;
- spin_lock(&port_lock);
- port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE - IB_UMAD_MAX_PORTS];
- if (port)
- kref_get(&port->umad_dev->ref);
- spin_unlock(&port_lock);
-
+ port = get_port(inode->i_cdev->si_drv1);
if (!port)
return -ENXIO;
@@ -922,10 +973,10 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)
filp->private_data = port;
- return 0;
+ return nonseekable_open(inode, filp);
fail:
- kref_put(&port->umad_dev->ref, ib_umad_release_dev);
+ release_port(port);
return ret;
}
@@ -944,7 +995,7 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)
up(&port->sm_sem);
- kref_put(&port->umad_dev->ref, ib_umad_release_dev);
+ release_port(port);
return ret;
}
@@ -952,7 +1003,8 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)
static const struct file_operations umad_sm_fops = {
.owner = THIS_MODULE,
.open = ib_umad_sm_open,
- .release = ib_umad_sm_close
+ .release = ib_umad_sm_close,
+ .llseek = no_llseek,
};
static struct ib_client umad_client = {
@@ -991,31 +1043,66 @@ static ssize_t show_abi_version(struct class *class, struct class_attribute *att
}
static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+static dev_t overflow_maj;
+static int find_overflow_devnum(void)
+{
+ int ret;
+
+ if (!overflow_maj) {
+ ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2,
+ "infiniband_mad");
+ if (ret) {
+ printk(KERN_ERR "user_mad: couldn't register dynamic device number\n");
+ return ret;
+ }
+ }
+
+ ret = find_first_zero_bit(overflow_map, IB_UMAD_MAX_PORTS);
+ if (ret >= IB_UMAD_MAX_PORTS)
+ return -1;
+
+ return ret;
+}
+
static int ib_umad_init_port(struct ib_device *device, int port_num,
struct ib_umad_port *port)
{
+ int devnum;
+ dev_t base;
+
spin_lock(&port_lock);
- port->dev_num = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
- if (port->dev_num >= IB_UMAD_MAX_PORTS) {
+ devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
+ if (devnum >= IB_UMAD_MAX_PORTS) {
spin_unlock(&port_lock);
+ devnum = find_overflow_devnum();
+ if (devnum < 0)
return -1;
+
+ spin_lock(&port_lock);
+ port->dev_num = devnum + IB_UMAD_MAX_PORTS;
+ base = devnum + overflow_maj;
+ set_bit(devnum, overflow_map);
+ } else {
+ port->dev_num = devnum;
+ base = devnum + base_dev;
+ set_bit(devnum, dev_map);
}
- set_bit(port->dev_num, dev_map);
spin_unlock(&port_lock);
port->ib_dev = device;
port->port_num = port_num;
- init_MUTEX(&port->sm_sem);
+ sema_init(&port->sm_sem, 1);
mutex_init(&port->file_mutex);
INIT_LIST_HEAD(&port->file_list);
port->cdev = cdev_alloc();
if (!port->cdev)
- return -1;
- port->cdev->owner = THIS_MODULE;
+ goto err_cdev_c;
+
port->cdev->ops = &umad_fops;
+ port->cdev->owner = THIS_MODULE;
kobject_set_name(&port->cdev->kobj, "umad%d", port->dev_num);
- if (cdev_add(port->cdev, base_dev + port->dev_num, 1))
+ if (cdev_add(port->cdev, base, 1))
goto err_cdev;
port->dev = device_create(umad_class, device->dma_device,
@@ -1029,13 +1116,15 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
if (device_create_file(port->dev, &dev_attr_port))
goto err_dev;
+ base += IB_UMAD_MAX_PORTS;
port->sm_cdev = cdev_alloc();
if (!port->sm_cdev)
goto err_dev;
- port->sm_cdev->owner = THIS_MODULE;
+
port->sm_cdev->ops = &umad_sm_fops;
+ port->sm_cdev->owner = THIS_MODULE;
kobject_set_name(&port->sm_cdev->kobj, "issm%d", port->dev_num);
- if (cdev_add(port->sm_cdev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1))
+ if (cdev_add(port->sm_cdev, base, 1))
goto err_sm_cdev;
port->sm_dev = device_create(umad_class, device->dma_device,
@@ -1049,10 +1138,6 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
if (device_create_file(port->sm_dev, &dev_attr_port))
goto err_sm_dev;
- spin_lock(&port_lock);
- umad_port[port->dev_num] = port;
- spin_unlock(&port_lock);
-
return 0;
err_sm_dev:
@@ -1066,7 +1151,11 @@ err_dev:
err_cdev:
cdev_del(port->cdev);
- clear_bit(port->dev_num, dev_map);
+err_cdev_c:
+ if (port->dev_num < IB_UMAD_MAX_PORTS)
+ clear_bit(devnum, dev_map);
+ else
+ clear_bit(devnum, overflow_map);
return -1;
}
@@ -1074,7 +1163,6 @@ err_cdev:
static void ib_umad_kill_port(struct ib_umad_port *port)
{
struct ib_umad_file *file;
- int already_dead;
int id;
dev_set_drvdata(port->dev, NULL);
@@ -1083,20 +1171,12 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
device_destroy(umad_class, port->cdev->dev);
device_destroy(umad_class, port->sm_cdev->dev);
- cdev_del(port->cdev);
- cdev_del(port->sm_cdev);
-
- spin_lock(&port_lock);
- umad_port[port->dev_num] = NULL;
- spin_unlock(&port_lock);
-
mutex_lock(&port->file_mutex);
port->ib_dev = NULL;
list_for_each_entry(file, &port->file_list, port_list) {
mutex_lock(&file->mutex);
- already_dead = file->agents_dead;
file->agents_dead = 1;
mutex_unlock(&file->mutex);
@@ -1106,8 +1186,6 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
}
mutex_unlock(&port->file_mutex);
-
- clear_bit(port->dev_num, dev_map);
}
static void ib_umad_add_one(struct ib_device *device)
@@ -1136,10 +1214,12 @@ static void ib_umad_add_one(struct ib_device *device)
umad_dev->start_port = s;
umad_dev->end_port = e;
+ for (i = 0; i <= e - s; ++i)
+ insert_port(&umad_dev->port[i]);
+
for (i = s; i <= e; ++i) {
umad_dev->port[i - s].umad_dev = umad_dev;
- if (rdma_port_get_link_layer(device, i) == IB_LINK_LAYER_INFINIBAND)
if (ib_umad_init_port(device, i, &umad_dev->port[i - s]))
goto err;
}
@@ -1150,10 +1230,9 @@ static void ib_umad_add_one(struct ib_device *device)
err:
while (--i >= s)
- if (rdma_port_get_link_layer(device, i) == IB_LINK_LAYER_INFINIBAND)
ib_umad_kill_port(&umad_dev->port[i - s]);
- kref_put(&umad_dev->ref, ib_umad_release_dev);
+ put_umad_dev(&umad_dev->ref);
}
static void ib_umad_remove_one(struct ib_device *device)
@@ -1165,16 +1244,22 @@ static void ib_umad_remove_one(struct ib_device *device)
return;
for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i)
- if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND)
ib_umad_kill_port(&umad_dev->port[i]);
- kref_put(&umad_dev->ref, ib_umad_release_dev);
+ put_umad_dev(&umad_dev->ref);
+}
+
+static char *umad_devnode(struct device *dev, umode_t *mode)
+{
+ return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
}
static int __init ib_umad_init(void)
{
int ret;
+ INIT_LIST_HEAD(&ports_list);
+
ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2,
"infiniband_mad");
if (ret) {
@@ -1189,6 +1274,8 @@ static int __init ib_umad_init(void)
goto out_chrdev;
}
+ umad_class->devnode = umad_devnode;
+
ret = class_create_file(umad_class, &class_attr_abi_version);
if (ret) {
printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n");
@@ -1218,6 +1305,8 @@ static void __exit ib_umad_cleanup(void)
ib_unregister_client(&umad_client);
class_destroy(umad_class);
unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
+ if (overflow_maj)
+ unregister_chrdev_region(overflow_maj, IB_UMAD_MAX_PORTS * 2);
}
module_init(ib_umad_init);
diff --git a/sys/ofed/drivers/infiniband/core/uverbs.h b/sys/ofed/drivers/infiniband/core/uverbs.h
index fa64da5..8ca6498 100644
--- a/sys/ofed/drivers/infiniband/core/uverbs.h
+++ b/sys/ofed/drivers/infiniband/core/uverbs.h
@@ -41,10 +41,14 @@
#include <linux/idr.h>
#include <linux/mutex.h>
#include <linux/completion.h>
+#include <linux/cdev.h>
+#include <linux/rbtree.h>
#include <rdma/ib_verbs.h>
+#include <rdma/ib_verbs_exp.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_user_verbs_exp.h>
/*
* Our lifetime rules for these structs are the following:
@@ -69,24 +73,26 @@
struct ib_uverbs_device {
struct kref ref;
+ int num_comp_vectors;
struct completion comp;
- int devnum;
- struct cdev *cdev;
struct device *dev;
struct ib_device *ib_dev;
- int num_comp_vectors;
+ int devnum;
+ struct cdev cdev;
+ struct rb_root xrcd_tree;
+ struct mutex xrcd_tree_mutex;
};
struct ib_uverbs_event_file {
struct kref ref;
struct file *filp;
+ int is_async;
struct ib_uverbs_file *uverbs_file;
spinlock_t lock;
+ int is_closed;
wait_queue_head_t poll_wait;
struct fasync_struct *async_queue;
struct list_head event_list;
- int is_async;
- int is_closed;
};
struct ib_uverbs_file {
@@ -120,9 +126,20 @@ struct ib_uevent_object {
u32 events_reported;
};
+struct ib_uxrcd_object {
+ struct ib_uobject uobject;
+ atomic_t refcnt;
+};
+
+struct ib_usrq_object {
+ struct ib_uevent_object uevent;
+ struct ib_uxrcd_object *uxrcd;
+};
+
struct ib_uqp_object {
struct ib_uevent_object uevent;
struct list_head mcast_list;
+ struct ib_uxrcd_object *uxrcd;
};
struct ib_ucq_object {
@@ -134,9 +151,8 @@ struct ib_ucq_object {
u32 async_events_reported;
};
-struct ib_uxrcd_object {
+struct ib_udct_object {
struct ib_uobject uobject;
- struct list_head xrc_reg_qp_list;
};
extern spinlock_t ib_uverbs_idr_lock;
@@ -147,12 +163,14 @@ extern struct idr ib_uverbs_ah_idr;
extern struct idr ib_uverbs_cq_idr;
extern struct idr ib_uverbs_qp_idr;
extern struct idr ib_uverbs_srq_idr;
-extern struct idr ib_uverbs_xrc_domain_idr;
+extern struct idr ib_uverbs_xrcd_idr;
+extern struct idr ib_uverbs_rule_idr;
+extern struct idr ib_uverbs_dct_idr;
void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
- int is_async, int *fd);
+ int is_async);
struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
@@ -167,12 +185,24 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
-void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event,
- void *context_ptr);
-void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev,
- struct ib_xrcd *xrcd);
-int ib_uverbs_cleanup_xrc_rcv_qp(struct ib_uverbs_file *file,
- struct ib_xrcd *xrcd, u32 qp_num);
+void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
+
+struct ib_uverbs_flow_spec {
+ union {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ struct ib_uverbs_flow_spec_eth eth;
+ struct ib_uverbs_flow_spec_ib ib;
+ struct ib_uverbs_flow_spec_ipv4 ipv4;
+ struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
+ };
+};
#define IB_UVERBS_DECLARE_CMD(name) \
ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
@@ -186,6 +216,8 @@ IB_UVERBS_DECLARE_CMD(alloc_pd);
IB_UVERBS_DECLARE_CMD(dealloc_pd);
IB_UVERBS_DECLARE_CMD(reg_mr);
IB_UVERBS_DECLARE_CMD(dereg_mr);
+IB_UVERBS_DECLARE_CMD(alloc_mw);
+IB_UVERBS_DECLARE_CMD(dealloc_mw);
IB_UVERBS_DECLARE_CMD(create_comp_channel);
IB_UVERBS_DECLARE_CMD(create_cq);
IB_UVERBS_DECLARE_CMD(resize_cq);
@@ -193,6 +225,7 @@ IB_UVERBS_DECLARE_CMD(poll_cq);
IB_UVERBS_DECLARE_CMD(req_notify_cq);
IB_UVERBS_DECLARE_CMD(destroy_cq);
IB_UVERBS_DECLARE_CMD(create_qp);
+IB_UVERBS_DECLARE_CMD(open_qp);
IB_UVERBS_DECLARE_CMD(query_qp);
IB_UVERBS_DECLARE_CMD(modify_qp);
IB_UVERBS_DECLARE_CMD(destroy_qp);
@@ -207,14 +240,30 @@ IB_UVERBS_DECLARE_CMD(create_srq);
IB_UVERBS_DECLARE_CMD(modify_srq);
IB_UVERBS_DECLARE_CMD(query_srq);
IB_UVERBS_DECLARE_CMD(destroy_srq);
-IB_UVERBS_DECLARE_CMD(create_xrc_srq);
-IB_UVERBS_DECLARE_CMD(open_xrc_domain);
-IB_UVERBS_DECLARE_CMD(close_xrc_domain);
-IB_UVERBS_DECLARE_CMD(create_xrc_rcv_qp);
-IB_UVERBS_DECLARE_CMD(modify_xrc_rcv_qp);
-IB_UVERBS_DECLARE_CMD(query_xrc_rcv_qp);
-IB_UVERBS_DECLARE_CMD(reg_xrc_rcv_qp);
-IB_UVERBS_DECLARE_CMD(unreg_xrc_rcv_qp);
+IB_UVERBS_DECLARE_CMD(create_xsrq);
+IB_UVERBS_DECLARE_CMD(open_xrcd);
+IB_UVERBS_DECLARE_CMD(close_xrcd);
+
+#define IB_UVERBS_DECLARE_EX_CMD(name) \
+ int ib_uverbs_ex_##name(struct ib_uverbs_file *file,\
+ struct ib_udata *ucore, \
+ struct ib_udata *uhw)
+
+#define IB_UVERBS_DECLARE_EXP_CMD(name) \
+ ssize_t ib_uverbs_exp_##name(struct ib_uverbs_file *file, \
+ struct ib_udata *ucore, \
+ struct ib_udata *uhw)
+
+IB_UVERBS_DECLARE_EX_CMD(create_flow);
+IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
+IB_UVERBS_DECLARE_EXP_CMD(create_qp);
+IB_UVERBS_DECLARE_EXP_CMD(modify_cq);
+IB_UVERBS_DECLARE_EXP_CMD(modify_qp);
+IB_UVERBS_DECLARE_EXP_CMD(create_cq);
+IB_UVERBS_DECLARE_EXP_CMD(query_device);
+IB_UVERBS_DECLARE_EXP_CMD(create_dct);
+IB_UVERBS_DECLARE_EXP_CMD(destroy_dct);
+IB_UVERBS_DECLARE_EXP_CMD(query_dct);
#endif /* UVERBS_H */
diff --git a/sys/ofed/drivers/infiniband/core/uverbs_cmd.c b/sys/ofed/drivers/infiniband/core/uverbs_cmd.c
index a34b344..5eef3f7 100644
--- a/sys/ofed/drivers/infiniband/core/uverbs_cmd.c
+++ b/sys/ofed/drivers/infiniband/core/uverbs_cmd.c
@@ -35,28 +35,68 @@
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/moduleparam.h>
+#include <linux/rbtree.h>
#include <linux/lockdep.h>
+#include <rdma/ib_addr.h>
#include <asm/uaccess.h>
#include <asm/fcntl.h>
+#include <sys/priv.h>
#include "uverbs.h"
-static struct lock_class_key pd_lock_key;
-static struct lock_class_key mr_lock_key;
-static struct lock_class_key cq_lock_key;
-static struct lock_class_key qp_lock_key;
-static struct lock_class_key ah_lock_key;
-static struct lock_class_key srq_lock_key;
+static int disable_raw_qp_enforcement;
+module_param_named(disable_raw_qp_enforcement, disable_raw_qp_enforcement, int,
+ 0444);
+MODULE_PARM_DESC(disable_raw_qp_enforcement, "Disable RAW QP enforcement for "
+ "being opened by root (default: 0)");
+
+struct uverbs_lock_class {
+ struct lock_class_key key;
+ char name[16];
+};
+
+static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" };
+static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" };
+static struct uverbs_lock_class mw_lock_class = { .name = "MW-uobj" };
+static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" };
+static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" };
+static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" };
+static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
+static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
+static struct uverbs_lock_class dct_lock_class = { .name = "DCT-uobj" };
+
+static int uverbs_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
+{
+ return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0;
+}
+
+static int uverbs_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
+{
+ return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
+}
+
+static struct ib_udata_ops uverbs_copy = {
+ .copy_from = uverbs_copy_from_udata,
+ .copy_to = uverbs_copy_to_udata
+};
#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
do { \
+ (udata)->ops = &uverbs_copy; \
(udata)->inbuf = (void __user *) (ibuf); \
(udata)->outbuf = (void __user *) (obuf); \
(udata)->inlen = (ilen); \
(udata)->outlen = (olen); \
} while (0)
+enum uverbs_cmd_type {
+ IB_USER_VERBS_CMD_BASIC,
+ IB_USER_VERBS_CMD_EXTENDED
+};
+
/*
* The ib_uobject locking scheme is as follows:
*
@@ -83,13 +123,13 @@ static struct lock_class_key srq_lock_key;
*/
static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
- struct ib_ucontext *context, struct lock_class_key *key)
+ struct ib_ucontext *context, struct uverbs_lock_class *c)
{
uobj->user_handle = user_handle;
uobj->context = context;
kref_init(&uobj->ref);
init_rwsem(&uobj->mutex);
- lockdep_set_class(&uobj->mutex, key);
+ lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name);
uobj->live = 0;
}
@@ -241,11 +281,34 @@ static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
}
+static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context)
+{
+ struct ib_uobject *uobj;
+
+ uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context);
+ return uobj ? uobj->object : NULL;
+}
+
static void put_qp_read(struct ib_qp *qp)
{
put_uobj_read(qp->uobject);
}
+static void put_qp_write(struct ib_qp *qp)
+{
+ put_uobj_write(qp->uobject);
+}
+
+static struct ib_dct *idr_read_dct(int dct_handle, struct ib_ucontext *context)
+{
+ return idr_read_obj(&ib_uverbs_dct_idr, dct_handle, context, 0);
+}
+
+static void put_dct_read(struct ib_dct *dct)
+{
+ put_uobj_read(dct->uobject);
+}
+
static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
{
return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
@@ -256,12 +319,10 @@ static void put_srq_read(struct ib_srq *srq)
put_uobj_read(srq->uobject);
}
-static struct ib_xrcd *idr_read_xrcd(int xrcd_handle,
- struct ib_ucontext *context,
+static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context,
struct ib_uobject **uobj)
{
- *uobj = idr_read_uobj(&ib_uverbs_xrc_domain_idr, xrcd_handle,
- context, 0);
+ *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0);
return *uobj ? (*uobj)->object : NULL;
}
@@ -301,7 +362,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
ucontext = ibdev->alloc_ucontext(ibdev, &udata);
if (IS_ERR(ucontext)) {
- ret = PTR_ERR(file->ucontext);
+ ret = PTR_ERR(ucontext);
goto err;
}
@@ -314,20 +375,23 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
INIT_LIST_HEAD(&ucontext->srq_list);
INIT_LIST_HEAD(&ucontext->ah_list);
INIT_LIST_HEAD(&ucontext->xrcd_list);
+ INIT_LIST_HEAD(&ucontext->rule_list);
+ INIT_LIST_HEAD(&ucontext->dct_list);
ucontext->closing = 0;
+ ucontext->peer_mem_private_data = NULL;
+ ucontext->peer_mem_name = NULL;
resp.num_comp_vectors = file->device->num_comp_vectors;
- filp = ib_uverbs_alloc_event_file(file, 1, &resp.async_fd);
- if (IS_ERR(filp)) {
- ret = PTR_ERR(filp);
+ ret = get_unused_fd();
+ if (ret < 0)
goto err_free;
- }
+ resp.async_fd = ret;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- ret = -EFAULT;
- goto err_file;
+ filp = ib_uverbs_alloc_event_file(file, 1);
+ if (IS_ERR(filp)) {
+ ret = PTR_ERR(filp);
+ goto err_fd;
}
file->async_file = filp->private_data;
@@ -338,6 +402,11 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
if (ret)
goto err_file;
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_file;
+ }
kref_get(&file->async_file->ref);
kref_get(&file->ref);
file->ucontext = ucontext;
@@ -349,9 +418,11 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
return in_len;
err_file:
- put_unused_fd(resp.async_fd);
fput(filp);
+err_fd:
+ put_unused_fd(resp.async_fd);
+
err_free:
ibdev->dealloc_ucontext(ucontext);
@@ -360,6 +431,55 @@ err:
return ret;
}
+static void ib_uverbs_query_device_assign(
+ struct ib_uverbs_query_device_resp *resp,
+ struct ib_device_attr *attr,
+ struct ib_uverbs_file *file)
+{
+ memset(resp, 0, sizeof(*resp));
+
+ resp->fw_ver = attr->fw_ver;
+ resp->node_guid = file->device->ib_dev->node_guid;
+ resp->sys_image_guid = attr->sys_image_guid;
+ resp->max_mr_size = attr->max_mr_size;
+ resp->page_size_cap = attr->page_size_cap;
+ resp->vendor_id = attr->vendor_id;
+ resp->vendor_part_id = attr->vendor_part_id;
+ resp->hw_ver = attr->hw_ver;
+ resp->max_qp = attr->max_qp;
+ resp->max_qp_wr = attr->max_qp_wr;
+ resp->device_cap_flags = attr->device_cap_flags;
+ resp->max_sge = attr->max_sge;
+ resp->max_sge_rd = attr->max_sge_rd;
+ resp->max_cq = attr->max_cq;
+ resp->max_cqe = attr->max_cqe;
+ resp->max_mr = attr->max_mr;
+ resp->max_pd = attr->max_pd;
+ resp->max_qp_rd_atom = attr->max_qp_rd_atom;
+ resp->max_ee_rd_atom = attr->max_ee_rd_atom;
+ resp->max_res_rd_atom = attr->max_res_rd_atom;
+ resp->max_qp_init_rd_atom = attr->max_qp_init_rd_atom;
+ resp->max_ee_init_rd_atom = attr->max_ee_init_rd_atom;
+ resp->atomic_cap = attr->atomic_cap;
+ resp->max_ee = attr->max_ee;
+ resp->max_rdd = attr->max_rdd;
+ resp->max_mw = attr->max_mw;
+ resp->max_raw_ipv6_qp = attr->max_raw_ipv6_qp;
+ resp->max_raw_ethy_qp = attr->max_raw_ethy_qp;
+ resp->max_mcast_grp = attr->max_mcast_grp;
+ resp->max_mcast_qp_attach = attr->max_mcast_qp_attach;
+ resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach;
+ resp->max_ah = attr->max_ah;
+ resp->max_fmr = attr->max_fmr;
+ resp->max_map_per_fmr = attr->max_map_per_fmr;
+ resp->max_srq = attr->max_srq;
+ resp->max_srq_wr = attr->max_srq_wr;
+ resp->max_srq_sge = attr->max_srq_sge;
+ resp->max_pkeys = attr->max_pkeys;
+ resp->local_ca_ack_delay = attr->local_ca_ack_delay;
+ resp->phys_port_cnt = file->device->ib_dev->phys_port_cnt;
+}
+
ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
const char __user *buf,
int in_len, int out_len)
@@ -379,51 +499,10 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
if (ret)
return ret;
- memset(&resp, 0, sizeof resp);
-
- resp.fw_ver = attr.fw_ver;
- resp.node_guid = file->device->ib_dev->node_guid;
- resp.sys_image_guid = attr.sys_image_guid;
- resp.max_mr_size = attr.max_mr_size;
- resp.page_size_cap = attr.page_size_cap;
- resp.vendor_id = attr.vendor_id;
- resp.vendor_part_id = attr.vendor_part_id;
- resp.hw_ver = attr.hw_ver;
- resp.max_qp = attr.max_qp;
- resp.max_qp_wr = attr.max_qp_wr;
- resp.device_cap_flags = attr.device_cap_flags;
- resp.max_sge = attr.max_sge;
- resp.max_sge_rd = attr.max_sge_rd;
- resp.max_cq = attr.max_cq;
- resp.max_cqe = attr.max_cqe;
- resp.max_mr = attr.max_mr;
- resp.max_pd = attr.max_pd;
- resp.max_qp_rd_atom = attr.max_qp_rd_atom;
- resp.max_ee_rd_atom = attr.max_ee_rd_atom;
- resp.max_res_rd_atom = attr.max_res_rd_atom;
- resp.max_qp_init_rd_atom = attr.max_qp_init_rd_atom;
- resp.max_ee_init_rd_atom = attr.max_ee_init_rd_atom;
- resp.atomic_cap = attr.atomic_cap;
- resp.max_ee = attr.max_ee;
- resp.max_rdd = attr.max_rdd;
- resp.max_mw = attr.max_mw;
- resp.max_raw_ipv6_qp = attr.max_raw_ipv6_qp;
- resp.max_raw_ethy_qp = attr.max_raw_ethy_qp;
- resp.max_mcast_grp = attr.max_mcast_grp;
- resp.max_mcast_qp_attach = attr.max_mcast_qp_attach;
- resp.max_total_mcast_qp_attach = attr.max_total_mcast_qp_attach;
- resp.max_ah = attr.max_ah;
- resp.max_fmr = attr.max_fmr;
- resp.max_map_per_fmr = attr.max_map_per_fmr;
- resp.max_srq = attr.max_srq;
- resp.max_srq_wr = attr.max_srq_wr;
- resp.max_srq_sge = attr.max_srq_sge;
- resp.max_pkeys = attr.max_pkeys;
- resp.local_ca_ack_delay = attr.local_ca_ack_delay;
- resp.phys_port_cnt = file->device->ib_dev->phys_port_cnt;
+ ib_uverbs_query_device_assign(&resp, &attr, file);
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
+ if (copy_to_user((void __user *)(unsigned long) cmd.response,
+ &resp, sizeof(resp)))
return -EFAULT;
return in_len;
@@ -469,7 +548,8 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
resp.active_width = attr.active_width;
resp.active_speed = attr.active_speed;
resp.phys_state = attr.phys_state;
- resp.link_layer = attr.link_layer;
+ resp.link_layer = rdma_port_get_link_layer(file->device->ib_dev,
+ cmd.port_num);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
@@ -503,7 +583,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
if (!uobj)
return -ENOMEM;
- init_uobj(uobj, 0, file->ucontext, &pd_lock_key);
+ init_uobj(uobj, 0, file->ucontext, &pd_lock_class);
down_write(&uobj->mutex);
pd = file->device->ib_dev->alloc_pd(file->device->ib_dev,
@@ -587,17 +667,316 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
return in_len;
}
-ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
+struct xrcd_table_entry {
+ struct rb_node node;
+ struct ib_xrcd *xrcd;
+ struct inode *inode;
+};
+
+static int xrcd_table_insert(struct ib_uverbs_device *dev,
+ struct inode *inode,
+ struct ib_xrcd *xrcd)
+{
+ struct xrcd_table_entry *entry, *scan;
+ struct rb_node **p = &dev->xrcd_tree.rb_node;
+ struct rb_node *parent = NULL;
+
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->xrcd = xrcd;
+ entry->inode = inode;
+
+ while (*p) {
+ parent = *p;
+ scan = rb_entry(parent, struct xrcd_table_entry, node);
+
+ if (inode < scan->inode) {
+ p = &(*p)->rb_left;
+ } else if (inode > scan->inode) {
+ p = &(*p)->rb_right;
+ } else {
+ kfree(entry);
+ return -EEXIST;
+ }
+ }
+
+ rb_link_node(&entry->node, parent, p);
+ rb_insert_color(&entry->node, &dev->xrcd_tree);
+ igrab(inode);
+ return 0;
+}
+
+static struct xrcd_table_entry *xrcd_table_search(struct ib_uverbs_device *dev,
+ struct inode *inode)
+{
+ struct xrcd_table_entry *entry;
+ struct rb_node *p = dev->xrcd_tree.rb_node;
+
+ while (p) {
+ entry = rb_entry(p, struct xrcd_table_entry, node);
+
+ if (inode < entry->inode)
+ p = p->rb_left;
+ else if (inode > entry->inode)
+ p = p->rb_right;
+ else
+ return entry;
+ }
+
+ return NULL;
+}
+
+static struct ib_xrcd *find_xrcd(struct ib_uverbs_device *dev, struct inode *inode)
+{
+ struct xrcd_table_entry *entry;
+
+ entry = xrcd_table_search(dev, inode);
+ if (!entry)
+ return NULL;
+
+ return entry->xrcd;
+}
+
+static void xrcd_table_delete(struct ib_uverbs_device *dev,
+ struct inode *inode)
+{
+ struct xrcd_table_entry *entry;
+
+ entry = xrcd_table_search(dev, inode);
+ if (entry) {
+ iput(inode);
+ rb_erase(&entry->node, &dev->xrcd_tree);
+ kfree(entry);
+ }
+}
+
+ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
+ struct ib_uverbs_open_xrcd cmd;
+ struct ib_uverbs_open_xrcd_resp resp;
+ struct ib_udata udata;
+ struct ib_uxrcd_object *obj;
+ struct ib_xrcd *xrcd = NULL;
+ struct fd f = {NULL};
+ struct inode *inode = NULL;
+ int ret = 0;
+ int new_xrcd = 0;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ mutex_lock(&file->device->xrcd_tree_mutex);
+
+ if (cmd.fd != -1) {
+ /* search for file descriptor */
+ f = fdget(cmd.fd);
+ if (!f.file) {
+ ret = -EBADF;
+ goto err_tree_mutex_unlock;
+ }
+
+ inode = f.file->f_dentry->d_inode;
+ xrcd = find_xrcd(file->device, inode);
+ if (!xrcd && !(cmd.oflags & O_CREAT)) {
+ /* no file descriptor. Need CREATE flag */
+ ret = -EAGAIN;
+ goto err_tree_mutex_unlock;
+ }
+
+ if (xrcd && cmd.oflags & O_EXCL) {
+ ret = -EINVAL;
+ goto err_tree_mutex_unlock;
+ }
+ }
+
+ obj = kmalloc(sizeof *obj, GFP_KERNEL);
+ if (!obj) {
+ ret = -ENOMEM;
+ goto err_tree_mutex_unlock;
+ }
+
+ init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class);
+
+ down_write(&obj->uobject.mutex);
+
+ if (!xrcd) {
+ xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev,
+ file->ucontext, &udata);
+ if (IS_ERR(xrcd)) {
+ ret = PTR_ERR(xrcd);
+ goto err;
+ }
+
+ xrcd->inode = inode;
+ xrcd->device = file->device->ib_dev;
+ atomic_set(&xrcd->usecnt, 0);
+ mutex_init(&xrcd->tgt_qp_mutex);
+ INIT_LIST_HEAD(&xrcd->tgt_qp_list);
+ new_xrcd = 1;
+ }
+
+ atomic_set(&obj->refcnt, 0);
+ obj->uobject.object = xrcd;
+ ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
+ if (ret)
+ goto err_idr;
+
+ memset(&resp, 0, sizeof resp);
+ resp.xrcd_handle = obj->uobject.id;
+
+ if (inode) {
+ if (new_xrcd) {
+ /* create new inode/xrcd table entry */
+ ret = xrcd_table_insert(file->device, inode, xrcd);
+ if (ret)
+ goto err_insert_xrcd;
+ }
+ atomic_inc(&xrcd->usecnt);
+ }
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_copy;
+ }
+
+ if (f.file)
+ fdput(f);
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list);
+ mutex_unlock(&file->mutex);
+
+ obj->uobject.live = 1;
+ up_write(&obj->uobject.mutex);
+
+ mutex_unlock(&file->device->xrcd_tree_mutex);
+ return in_len;
+
+err_copy:
+ if (inode) {
+ if (new_xrcd)
+ xrcd_table_delete(file->device, inode);
+ atomic_dec(&xrcd->usecnt);
+ }
+
+err_insert_xrcd:
+ idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
+
+err_idr:
+ ib_dealloc_xrcd(xrcd);
+
+err:
+ put_uobj_write(&obj->uobject);
+
+err_tree_mutex_unlock:
+ if (f.file)
+ fdput(f);
+
+ mutex_unlock(&file->device->xrcd_tree_mutex);
+
+ return ret;
+}
+
+ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_close_xrcd cmd;
+ struct ib_uobject *uobj;
+ struct ib_xrcd *xrcd = NULL;
+ struct inode *inode = NULL;
+ struct ib_uxrcd_object *obj;
+ int live;
+ int ret = 0;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ mutex_lock(&file->device->xrcd_tree_mutex);
+ uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext);
+ if (!uobj) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ xrcd = uobj->object;
+ inode = xrcd->inode;
+ obj = container_of(uobj, struct ib_uxrcd_object, uobject);
+ if (atomic_read(&obj->refcnt)) {
+ put_uobj_write(uobj);
+ ret = -EBUSY;
+ goto out;
+ }
+
+ if (!inode || atomic_dec_and_test(&xrcd->usecnt)) {
+ ret = ib_dealloc_xrcd(uobj->object);
+ if (!ret)
+ uobj->live = 0;
+ }
+
+ live = uobj->live;
+ if (inode && ret)
+ atomic_inc(&xrcd->usecnt);
+
+ put_uobj_write(uobj);
+
+ if (ret)
+ goto out;
+
+ if (inode && !live)
+ xrcd_table_delete(file->device, inode);
+
+ idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
+ mutex_lock(&file->mutex);
+ list_del(&uobj->list);
+ mutex_unlock(&file->mutex);
+
+ put_uobj(uobj);
+ ret = in_len;
+
+out:
+ mutex_unlock(&file->device->xrcd_tree_mutex);
+ return ret;
+}
+
+void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
+ struct ib_xrcd *xrcd)
+{
+ struct inode *inode;
+
+ inode = xrcd->inode;
+ if (inode && !atomic_dec_and_test(&xrcd->usecnt))
+ return;
+
+ ib_dealloc_xrcd(xrcd);
+
+ if (inode)
+ xrcd_table_delete(dev, inode);
+}
+
+ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
struct ib_uverbs_reg_mr cmd;
struct ib_uverbs_reg_mr_resp resp;
- struct ib_udata udata;
+ struct ib_udata udata;
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mr *mr;
- int ret;
+ int ret;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -612,32 +991,34 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
return -EINVAL;
- /*
- * Local write permission is required if remote write or
- * remote atomic permission is also requested.
- */
- if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
- !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
- return -EINVAL;
+ ret = ib_check_mr_access(cmd.access_flags);
+ if (ret)
+ return ret;
uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
if (!uobj)
return -ENOMEM;
- init_uobj(uobj, 0, file->ucontext, &mr_lock_key);
+ init_uobj(uobj, 0, file->ucontext, &mr_lock_class);
down_write(&uobj->mutex);
pd = idr_read_pd(cmd.pd_handle, file->ucontext);
if (!pd) {
- ret = -EINVAL;
+ ret = -EINVAL;
goto err_free;
}
+ /* We first get a new "obj id" to be passed later to reg mr for
+ further use as mr_id.
+ */
+ ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
+ if (ret)
+ goto err_put;
mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
- cmd.access_flags, &udata, 0);
+ cmd.access_flags, &udata, uobj->id);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
- goto err_put;
+ goto err_remove_uobj;
}
mr->device = pd->device;
@@ -647,9 +1028,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
atomic_set(&mr->usecnt, 0);
uobj->object = mr;
- ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
- if (ret)
- goto err_unreg;
memset(&resp, 0, sizeof resp);
resp.lkey = mr->lkey;
@@ -675,11 +1053,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
-
-err_unreg:
ib_dereg_mr(mr);
+err_remove_uobj:
+ idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
+
err_put:
put_pd_read(pd);
@@ -689,13 +1067,13 @@ err_free:
}
ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_dereg_mr cmd;
struct ib_mr *mr;
struct ib_uobject *uobj;
- int ret = -EINVAL;
+ int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
@@ -726,13 +1104,134 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
return in_len;
}
+ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_alloc_mw cmd;
+ struct ib_uverbs_alloc_mw_resp resp;
+ struct ib_uobject *uobj;
+ struct ib_pd *pd;
+ struct ib_mw *mw;
+ int ret;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+
+ uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
+ if (!uobj)
+ return -ENOMEM;
+
+ init_uobj(uobj, 0, file->ucontext, &mw_lock_class);
+ down_write(&uobj->mutex);
+
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ if (!pd) {
+ ret = -EINVAL;
+ goto err_free;
+ }
+
+ mw = pd->device->alloc_mw(pd, cmd.mw_type);
+ if (IS_ERR(mw)) {
+ ret = PTR_ERR(mw);
+ goto err_put;
+ }
+
+ mw->device = pd->device;
+ mw->pd = pd;
+ mw->uobject = uobj;
+ atomic_inc(&pd->usecnt);
+
+ uobj->object = mw;
+ ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj);
+ if (ret)
+ goto err_unalloc;
+
+ memset(&resp, 0, sizeof(resp));
+ resp.rkey = mw->rkey;
+ resp.mw_handle = uobj->id;
+
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp))) {
+ ret = -EFAULT;
+ goto err_copy;
+ }
+
+ put_pd_read(pd);
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->mw_list);
+ mutex_unlock(&file->mutex);
+
+ uobj->live = 1;
+
+ up_write(&uobj->mutex);
+
+ return in_len;
+
+err_copy:
+ idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
+
+err_unalloc:
+ ib_dealloc_mw(mw);
+
+err_put:
+ put_pd_read(pd);
+
+err_free:
+ put_uobj_write(uobj);
+ return ret;
+}
+
+ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_dealloc_mw cmd;
+ struct ib_mw *mw;
+ struct ib_uobject *uobj;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+
+ uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext);
+ if (!uobj)
+ return -EINVAL;
+
+ mw = uobj->object;
+
+ ret = ib_dealloc_mw(mw);
+ if (!ret)
+ uobj->live = 0;
+
+ put_uobj_write(uobj);
+
+ if (ret)
+ return ret;
+
+ idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
+
+ mutex_lock(&file->mutex);
+ list_del(&uobj->list);
+ mutex_unlock(&file->mutex);
+
+ put_uobj(uobj);
+
+ return in_len;
+}
+
ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_create_comp_channel cmd;
struct ib_uverbs_create_comp_channel_resp resp;
struct file *filp;
+ int ret;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -740,9 +1239,16 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- filp = ib_uverbs_alloc_event_file(file, 0, &resp.fd);
- if (IS_ERR(filp))
+ ret = get_unused_fd();
+ if (ret < 0)
+ return ret;
+ resp.fd = ret;
+
+ filp = ib_uverbs_alloc_event_file(file, 0);
+ if (IS_ERR(filp)) {
+ put_unused_fd(resp.fd);
return PTR_ERR(filp);
+ }
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
@@ -755,40 +1261,44 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
return in_len;
}
-ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static ssize_t create_cq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len, void *vcmd, int ex,
+ void __user *response)
{
- struct ib_uverbs_create_cq cmd;
+ struct ib_uverbs_create_cq *cmd;
+ struct ib_uverbs_create_cq_ex *cmd_e;
struct ib_uverbs_create_cq_resp resp;
struct ib_udata udata;
struct ib_ucq_object *obj;
struct ib_uverbs_event_file *ev_file = NULL;
struct ib_cq *cq;
+ struct ib_cq_init_attr attr;
+ int cmd_sz;
int ret;
if (out_len < sizeof resp)
return -ENOSPC;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
+ cmd = vcmd;
+ cmd_e = vcmd;
+ cmd_sz = ex ? sizeof(*cmd_e) : sizeof(*cmd);
+ INIT_UDATA(&udata, buf + cmd_sz, response + sizeof(resp),
+ in_len - sizeof(cmd), out_len - sizeof(resp));
- if (cmd.comp_vector >= file->device->num_comp_vectors)
+ if (cmd->comp_vector >= file->device->num_comp_vectors)
return -EINVAL;
obj = kmalloc(sizeof *obj, GFP_KERNEL);
if (!obj)
return -ENOMEM;
- init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_key);
+ init_uobj(&obj->uobject, cmd->user_handle, file->ucontext,
+ &cq_lock_class);
down_write(&obj->uobject.mutex);
- if (cmd.comp_channel >= 0) {
- ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel);
+ if (cmd->comp_channel >= 0) {
+ ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel);
if (!ev_file) {
ret = -EINVAL;
goto err;
@@ -801,8 +1311,12 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
INIT_LIST_HEAD(&obj->comp_list);
INIT_LIST_HEAD(&obj->async_list);
- cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe,
- cmd.comp_vector,
+ memset(&attr, 0, sizeof(attr));
+ attr.cqe = cmd->cqe;
+ attr.comp_vector = cmd->comp_vector;
+ if (ex && (cmd_e->comp_mask & IB_UVERBS_CREATE_CQ_EX_CAP_FLAGS))
+ attr.flags = cmd_e->create_flags;
+ cq = file->device->ib_dev->create_cq(file->device->ib_dev, &attr,
file->ucontext, &udata);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
@@ -825,8 +1339,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
resp.cq_handle = obj->uobject.id;
resp.cqe = cq->cqe;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
+ if (copy_to_user(response, &resp, sizeof(resp))) {
ret = -EFAULT;
goto err_copy;
}
@@ -856,6 +1369,19 @@ err:
return ret;
}
+ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_cq cmd;
+
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+
+ return create_cq(file, buf, in_len, out_len, &cmd,
+ IB_USER_VERBS_CMD_BASIC, (void __user *)cmd.response);
+}
+
ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
@@ -893,68 +1419,81 @@ out:
return ret ? ret : in_len;
}
+static int copy_wc_to_user(void __user *dest, struct ib_wc *wc)
+{
+ struct ib_uverbs_wc tmp;
+
+ tmp.wr_id = wc->wr_id;
+ tmp.status = wc->status;
+ tmp.opcode = wc->opcode;
+ tmp.vendor_err = wc->vendor_err;
+ tmp.byte_len = wc->byte_len;
+ tmp.ex.imm_data = (__u32 __force) wc->ex.imm_data;
+ tmp.qp_num = wc->qp->qp_num;
+ tmp.src_qp = wc->src_qp;
+ tmp.wc_flags = wc->wc_flags;
+ tmp.pkey_index = wc->pkey_index;
+ tmp.slid = wc->slid;
+ tmp.sl = wc->sl;
+ tmp.dlid_path_bits = wc->dlid_path_bits;
+ tmp.port_num = wc->port_num;
+ tmp.reserved = 0;
+
+ if (copy_to_user(dest, &tmp, sizeof tmp))
+ return -EFAULT;
+
+ return 0;
+}
+
ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_poll_cq cmd;
- struct ib_uverbs_poll_cq_resp *resp;
+ struct ib_uverbs_poll_cq_resp resp;
+ u8 __user *header_ptr;
+ u8 __user *data_ptr;
struct ib_cq *cq;
- struct ib_wc *wc;
- int ret = 0;
- int i;
- int rsize;
+ struct ib_wc wc;
+ int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- wc = kmalloc(cmd.ne * sizeof *wc, GFP_KERNEL);
- if (!wc)
- return -ENOMEM;
+ cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ if (!cq)
+ return -EINVAL;
- rsize = sizeof *resp + cmd.ne * sizeof(struct ib_uverbs_wc);
- resp = kmalloc(rsize, GFP_KERNEL);
- if (!resp) {
- ret = -ENOMEM;
- goto out_wc;
- }
+ /* we copy a struct ib_uverbs_poll_cq_resp to user space */
+ header_ptr = (void __user *)(unsigned long) cmd.response;
+ data_ptr = header_ptr + sizeof resp;
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
- if (!cq) {
- ret = -EINVAL;
- goto out;
- }
+ memset(&resp, 0, sizeof resp);
+ while (resp.count < cmd.ne) {
+ ret = ib_poll_cq(cq, 1, &wc);
+ if (ret < 0)
+ goto out_put;
+ if (!ret)
+ break;
- resp->count = ib_poll_cq(cq, cmd.ne, wc);
+ ret = copy_wc_to_user(data_ptr, &wc);
+ if (ret)
+ goto out_put;
- put_cq_read(cq);
+ data_ptr += sizeof(struct ib_uverbs_wc);
+ ++resp.count;
+ }
- for (i = 0; i < resp->count; i++) {
- resp->wc[i].wr_id = wc[i].wr_id;
- resp->wc[i].status = wc[i].status;
- resp->wc[i].opcode = wc[i].opcode;
- resp->wc[i].vendor_err = wc[i].vendor_err;
- resp->wc[i].byte_len = wc[i].byte_len;
- resp->wc[i].ex.imm_data = (__u32 __force) wc[i].ex.imm_data;
- resp->wc[i].qp_num = wc[i].qp->qp_num;
- resp->wc[i].src_qp = wc[i].src_qp;
- resp->wc[i].wc_flags = wc[i].wc_flags;
- resp->wc[i].pkey_index = wc[i].pkey_index;
- resp->wc[i].slid = wc[i].slid;
- resp->wc[i].sl = wc[i].sl;
- resp->wc[i].dlid_path_bits = wc[i].dlid_path_bits;
- resp->wc[i].port_num = wc[i].port_num;
- }
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response, resp, rsize))
+ if (copy_to_user(header_ptr, &resp, sizeof resp)) {
ret = -EFAULT;
+ goto out_put;
+ }
-out:
- kfree(resp);
+ ret = in_len;
-out_wc:
- kfree(wc);
- return ret ? ret : in_len;
+out_put:
+ put_cq_read(cq);
+ return ret;
}
ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
@@ -1035,124 +1574,181 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
- struct ib_uverbs_create_qp cmd;
- struct ib_uverbs_create_qp_resp resp;
+ void __user *response;
struct ib_udata udata;
struct ib_uqp_object *obj;
- struct ib_pd *pd;
- struct ib_cq *scq, *rcq;
- struct ib_srq *srq;
+ struct ib_device *device;
+ struct ib_pd *pd = NULL;
+ struct ib_xrcd *xrcd = NULL;
+ struct ib_uobject *uninitialized_var(xrcd_uobj);
+ struct ib_cq *scq = NULL, *rcq = NULL;
+ struct ib_srq *srq = NULL;
struct ib_qp *qp;
struct ib_qp_init_attr attr;
- struct ib_xrcd *xrcd;
- struct ib_uobject *xrcd_uobj;
int ret;
-
- if (out_len < sizeof resp)
+ union {
+ struct ib_uverbs_create_qp basic;
+ } cmd_obj;
+ struct ib_uverbs_create_qp *cmd;
+ size_t cmd_size = 0;
+ union {
+ struct ib_uverbs_create_qp_resp basic;
+ } resp_obj;
+ struct ib_uverbs_create_qp_resp *resp;
+ size_t resp_size = 0;
+
+ cmd_size = sizeof(cmd_obj.basic);
+ cmd = &cmd_obj.basic;
+
+ resp_size = sizeof(resp_obj.basic);
+ resp = &resp_obj.basic;
+
+ if (out_len < resp_size)
return -ENOSPC;
- if (copy_from_user(&cmd, buf, sizeof cmd))
+ if (copy_from_user(&cmd_obj, buf, cmd_size))
return -EFAULT;
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
+ response = (void __user *)cmd->response;
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
+ if (!disable_raw_qp_enforcement &&
+ cmd->qp_type == IB_QPT_RAW_PACKET && !priv_check(curthread, PRIV_NET_RAW))
+ return -EPERM;
+
+ INIT_UDATA(&udata, buf + cmd_size, response + resp_size,
+ in_len - cmd_size, out_len - resp_size);
+
+ obj = kzalloc(sizeof *obj, GFP_KERNEL);
if (!obj)
return -ENOMEM;
- init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key);
+ init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &qp_lock_class);
down_write(&obj->uevent.uobject.mutex);
- srq = (cmd.is_srq && cmd.qp_type != IB_QPT_XRC) ?
- idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
- xrcd = cmd.qp_type == IB_QPT_XRC ?
- idr_read_xrcd(cmd.srq_handle, file->ucontext, &xrcd_uobj) : NULL;
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
- scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
- rcq = cmd.recv_cq_handle == cmd.send_cq_handle ?
- scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1);
+ if (cmd->qp_type == IB_QPT_XRC_TGT) {
+ xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext, &xrcd_uobj);
+ if (!xrcd) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+ device = xrcd->device;
+ } else {
+ if (cmd->qp_type == IB_QPT_XRC_INI) {
+ cmd->max_recv_wr = 0;
+ cmd->max_recv_sge = 0;
+ } else {
+ if (cmd->is_srq) {
+ srq = idr_read_srq(cmd->srq_handle, file->ucontext);
+ if (!srq || srq->srq_type != IB_SRQT_BASIC) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+ }
- if (!pd || !scq || !rcq || (cmd.is_srq && !srq) ||
- (cmd.qp_type == IB_QPT_XRC && !xrcd)) {
- ret = -EINVAL;
- goto err_put;
+ if (cmd->recv_cq_handle != cmd->send_cq_handle) {
+ rcq = idr_read_cq(cmd->recv_cq_handle, file->ucontext, 0);
+ if (!rcq) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+ }
+ }
+
+ scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
+ rcq = rcq ?: scq;
+ pd = idr_read_pd(cmd->pd_handle, file->ucontext);
+ if (!pd || !scq) {
+ ret = -EINVAL;
+ goto err_put;
}
- attr.create_flags = 0;
+ device = pd->device;
+ }
+
+ memset(&attr, 0, sizeof attr);
attr.event_handler = ib_uverbs_qp_event_handler;
attr.qp_context = file;
attr.send_cq = scq;
attr.recv_cq = rcq;
attr.srq = srq;
- attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
- attr.qp_type = cmd.qp_type;
- attr.xrcd = xrcd;
+ attr.xrcd = xrcd;
+ attr.sq_sig_type = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
+ attr.qp_type = cmd->qp_type;
attr.create_flags = 0;
- attr.cap.max_send_wr = cmd.max_send_wr;
- attr.cap.max_recv_wr = cmd.max_recv_wr;
- attr.cap.max_send_sge = cmd.max_send_sge;
- attr.cap.max_recv_sge = cmd.max_recv_sge;
- attr.cap.max_inline_data = cmd.max_inline_data;
+ attr.cap.max_send_wr = cmd->max_send_wr;
+ attr.cap.max_recv_wr = cmd->max_recv_wr;
+ attr.cap.max_send_sge = cmd->max_send_sge;
+ attr.cap.max_recv_sge = cmd->max_recv_sge;
+ attr.cap.max_inline_data = cmd->max_inline_data;
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
INIT_LIST_HEAD(&obj->mcast_list);
- qp = pd->device->create_qp(pd, &attr, &udata);
+ if (cmd->qp_type == IB_QPT_XRC_TGT)
+ qp = ib_create_qp(pd, &attr);
+ else
+ qp = device->create_qp(pd, &attr, &udata);
+
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
goto err_put;
}
- qp->device = pd->device;
- qp->pd = pd;
- qp->send_cq = attr.send_cq;
- qp->recv_cq = attr.recv_cq;
- qp->srq = attr.srq;
- qp->uobject = &obj->uevent.uobject;
- qp->event_handler = attr.event_handler;
- qp->qp_context = attr.qp_context;
- qp->qp_type = attr.qp_type;
- qp->xrcd = attr.xrcd;
- atomic_inc(&pd->usecnt);
- atomic_inc(&attr.send_cq->usecnt);
- atomic_inc(&attr.recv_cq->usecnt);
- if (attr.srq)
- atomic_inc(&attr.srq->usecnt);
- else if (attr.xrcd)
- atomic_inc(&attr.xrcd->usecnt);
+ if (cmd->qp_type != IB_QPT_XRC_TGT) {
+ qp->real_qp = qp;
+ qp->device = device;
+ qp->pd = pd;
+ qp->send_cq = attr.send_cq;
+ qp->recv_cq = attr.recv_cq;
+ qp->srq = attr.srq;
+ qp->event_handler = attr.event_handler;
+ qp->qp_context = attr.qp_context;
+ qp->qp_type = attr.qp_type;
+ atomic_set(&qp->usecnt, 0);
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&attr.send_cq->usecnt);
+ if (attr.recv_cq)
+ atomic_inc(&attr.recv_cq->usecnt);
+ if (attr.srq)
+ atomic_inc(&attr.srq->usecnt);
+ }
+ qp->uobject = &obj->uevent.uobject;
obj->uevent.uobject.object = qp;
ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
if (ret)
goto err_destroy;
- memset(&resp, 0, sizeof resp);
- resp.qpn = qp->qp_num;
- resp.qp_handle = obj->uevent.uobject.id;
- resp.max_recv_sge = attr.cap.max_recv_sge;
- resp.max_send_sge = attr.cap.max_send_sge;
- resp.max_recv_wr = attr.cap.max_recv_wr;
- resp.max_send_wr = attr.cap.max_send_wr;
- resp.max_inline_data = attr.cap.max_inline_data;
+ memset(&resp_obj, 0, sizeof(resp_obj));
+ resp->qpn = qp->qp_num;
+ resp->qp_handle = obj->uevent.uobject.id;
+ resp->max_recv_sge = attr.cap.max_recv_sge;
+ resp->max_send_sge = attr.cap.max_send_sge;
+ resp->max_recv_wr = attr.cap.max_recv_wr;
+ resp->max_send_wr = attr.cap.max_send_wr;
+ resp->max_inline_data = attr.cap.max_inline_data;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- ret = -EFAULT;
+ if (copy_to_user(response, &resp_obj, resp_size)) {
+ ret = -EFAULT;
goto err_copy;
- }
+ }
- put_pd_read(pd);
- put_cq_read(scq);
- if (rcq != scq)
+ if (xrcd) {
+ obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
+ atomic_inc(&obj->uxrcd->refcnt);
+ put_xrcd_read(xrcd_uobj);
+ }
+
+ if (pd)
+ put_pd_read(pd);
+ if (scq)
+ put_cq_read(scq);
+ if (rcq && rcq != scq)
put_cq_read(rcq);
if (srq)
put_srq_read(srq);
- if (xrcd)
- put_xrcd_read(xrcd_uobj);
mutex_lock(&file->mutex);
list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
@@ -1171,6 +1767,8 @@ err_destroy:
ib_destroy_qp(qp);
err_put:
+ if (xrcd)
+ put_xrcd_read(xrcd_uobj);
if (pd)
put_pd_read(pd);
if (scq)
@@ -1179,16 +1777,107 @@ err_put:
put_cq_read(rcq);
if (srq)
put_srq_read(srq);
- if (xrcd)
- put_xrcd_read(xrcd_uobj);
put_uobj_write(&obj->uevent.uobject);
return ret;
}
+ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len, int out_len)
+{
+ struct ib_uverbs_open_qp cmd;
+ struct ib_uverbs_create_qp_resp resp;
+ struct ib_udata udata;
+ struct ib_uqp_object *obj;
+ struct ib_xrcd *xrcd;
+ struct ib_uobject *uninitialized_var(xrcd_uobj);
+ struct ib_qp *qp;
+ struct ib_qp_open_attr attr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ obj = kmalloc(sizeof *obj, GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
+ down_write(&obj->uevent.uobject.mutex);
+
+ xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
+ if (!xrcd) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+
+ attr.event_handler = ib_uverbs_qp_event_handler;
+ attr.qp_context = file;
+ attr.qp_num = cmd.qpn;
+ attr.qp_type = cmd.qp_type;
+
+ obj->uevent.events_reported = 0;
+ INIT_LIST_HEAD(&obj->uevent.event_list);
+ INIT_LIST_HEAD(&obj->mcast_list);
+
+ qp = ib_open_qp(xrcd, &attr);
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
+ goto err_put;
+ }
+
+ qp->uobject = &obj->uevent.uobject;
+
+ obj->uevent.uobject.object = qp;
+ ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
+ if (ret)
+ goto err_destroy;
+
+ memset(&resp, 0, sizeof resp);
+ resp.qpn = qp->qp_num;
+ resp.qp_handle = obj->uevent.uobject.id;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_remove;
+ }
+
+ obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
+ atomic_inc(&obj->uxrcd->refcnt);
+ put_xrcd_read(xrcd_uobj);
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
+ mutex_unlock(&file->mutex);
+
+ obj->uevent.uobject.live = 1;
+ up_write(&obj->uevent.uobject.mutex);
+
+ return in_len;
+
+err_remove:
+ idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
+
+err_destroy:
+ ib_destroy_qp(qp);
+
+err_put:
+ put_xrcd_read(xrcd_uobj);
+ put_uobj_write(&obj->uevent.uobject);
+ return ret;
+}
+
ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_query_qp cmd;
struct ib_uverbs_query_qp_resp resp;
@@ -1286,30 +1975,59 @@ out:
return ret ? ret : in_len;
}
-ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+/* Remove ignored fields set in the attribute mask */
+static int modify_qp_mask(enum ib_qp_type qp_type, int mask)
{
- struct ib_uverbs_modify_qp cmd;
- struct ib_udata udata;
- struct ib_qp *qp;
- struct ib_qp_attr *attr;
- int ret;
+ switch (qp_type) {
+ case IB_QPT_XRC_INI:
+ return mask & ~(IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER);
+ case IB_QPT_XRC_TGT:
+ return mask & ~(IB_QP_MAX_QP_RD_ATOMIC | IB_QP_RETRY_CNT |
+ IB_QP_RNR_RETRY);
+ default:
+ return mask;
+ }
+}
- if (copy_from_user(&cmd, buf, sizeof cmd))
+static ssize_t __uverbs_modify_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len,
+ enum uverbs_cmd_type cmd_type)
+{
+ struct ib_uverbs_modify_qp_ex cmd;
+ struct ib_udata udata;
+ struct ib_qp *qp;
+ struct ib_qp_attr *attr;
+ struct ib_qp_attr_ex *attrx;
+ int ret;
+ void *p;
+ union ib_gid sgid;
+ union ib_gid *dgid;
+ u8 port_num;
+
+ if (cmd_type == IB_USER_VERBS_CMD_BASIC) {
+ p = &cmd;
+ p += sizeof(cmd.comp_mask);
+ if (copy_from_user(p, buf,
+ sizeof(struct ib_uverbs_modify_qp)))
return -EFAULT;
+ } else {
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+ }
INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
out_len);
- attr = kmalloc(sizeof *attr, GFP_KERNEL);
- if (!attr)
+ attrx = kzalloc(sizeof(*attrx), GFP_KERNEL);
+ if (!attrx)
return -ENOMEM;
+ attr = (struct ib_qp_attr *)attrx;
qp = idr_read_qp(cmd.qp_handle, file->ucontext);
if (!qp) {
- ret = -EINVAL;
- goto out;
+ kfree(attrx);
+ return -EINVAL;
}
attr->qp_state = cmd.qp_state;
@@ -1357,10 +2075,49 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate;
attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
+ port_num = (cmd.attr_mask & IB_QP_PORT) ? cmd.port_num : qp->port_num;
+ if ((cmd.attr_mask & IB_QP_AV) && port_num &&
+ (rdma_port_get_link_layer(qp->device, port_num) ==
+ IB_LINK_LAYER_ETHERNET)) {
+ ret = ib_query_gid(qp->device, port_num,
+ attr->ah_attr.grh.sgid_index, &sgid);
+ if (ret)
+ goto out;
+ dgid = &attr->ah_attr.grh.dgid;
+ if (rdma_link_local_addr((struct in6_addr *)dgid->raw)) {
+ rdma_get_ll_mac((struct in6_addr *)dgid->raw,
+ attr->ah_attr.dmac);
+ rdma_get_ll_mac((struct in6_addr *)sgid.raw,
+ attr->smac);
+ attr->vlan_id = rdma_get_vlan_id(&sgid);
+ } else {
+ ret = rdma_addr_find_dmac_by_grh(&sgid, dgid,
+ attr->ah_attr.dmac,
+ &attr->vlan_id);
+ if (ret)
+ goto out;
+ ret = rdma_addr_find_smac_by_sgid(&sgid, attr->smac,
+ NULL);
+ if (ret)
+ goto out;
+ }
+ cmd.attr_mask |= IB_QP_SMAC;
+ if (attr->vlan_id < 0xFFFF)
+ cmd.attr_mask |= IB_QP_VID;
+ }
+ if (cmd_type == IB_USER_VERBS_CMD_EXTENDED) {
+ if (cmd.comp_mask & IB_UVERBS_QP_ATTR_DCT_KEY)
+ attrx->dct_key = cmd.dct_key;
+ }
- ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata);
-
- put_qp_read(qp);
+ if (qp->real_qp == qp) {
+ ret = qp->device->modify_qp(qp, attr,
+ modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata);
+ if (!ret && (cmd.attr_mask & IB_QP_PORT))
+ qp->port_num = attr->port_num;
+ } else {
+ ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask));
+ }
if (ret)
goto out;
@@ -1368,18 +2125,27 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
ret = in_len;
out:
- kfree(attr);
+ put_qp_read(qp);
+ kfree(attrx);
return ret;
}
+ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ return __uverbs_modify_qp(file, buf, in_len, out_len,
+ IB_USER_VERBS_CMD_BASIC);
+}
+
ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_destroy_qp cmd;
struct ib_uverbs_destroy_qp_resp resp;
- struct ib_uobject *uobj;
+ struct ib_uobject *uobj;
struct ib_qp *qp;
struct ib_uqp_object *obj;
int ret = -EINVAL;
@@ -1409,6 +2175,9 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
if (ret)
return ret;
+ if (obj->uxrcd)
+ atomic_dec(&obj->uxrcd->refcnt);
+
idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
mutex_lock(&file->mutex);
@@ -1429,14 +2198,14 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_post_send cmd;
struct ib_uverbs_post_send_resp resp;
struct ib_uverbs_send_wr *user_wr;
struct ib_send_wr *wr = NULL, *last, *next, *bad_wr;
- struct ib_qp *qp;
+ struct ib_qp *qp;
int i, sg_ind;
int is_ud;
ssize_t ret = -EINVAL;
@@ -1479,13 +2248,13 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
user_wr->num_sge * sizeof (struct ib_sge),
GFP_KERNEL);
if (!next) {
- ret = -ENOMEM;
- goto out_put;
- }
+ ret = -ENOMEM;
+ goto out_put;
+ }
if (!last)
wr = next;
- else
+ else
last->next = next;
last = next;
@@ -1500,7 +2269,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
file->ucontext);
if (!next->wr.ud.ah) {
ret = -EINVAL;
- goto out_put;
+ goto out_put;
}
next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn;
next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
@@ -1555,12 +2324,12 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
}
resp.bad_wr = 0;
- ret = qp->device->post_send(qp, wr, &bad_wr);
+ ret = qp->device->post_send(qp->real_qp, wr, &bad_wr);
if (ret)
for (next = wr; next; next = next->next) {
++resp.bad_wr;
if (next == bad_wr)
- break;
+ break;
}
if (copy_to_user((void __user *) (unsigned long) cmd.response,
@@ -1594,7 +2363,7 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
struct ib_recv_wr *wr = NULL, *last, *next;
int sg_ind;
int i;
- int ret;
+ int ret;
if (in_len < wqe_size * wr_count +
sge_count * sizeof (struct ib_uverbs_sge))
@@ -1617,9 +2386,9 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
}
if (user_wr->num_sge + sg_ind > sge_count) {
- ret = -EINVAL;
- goto err;
- }
+ ret = -EINVAL;
+ goto err;
+ }
next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
user_wr->num_sge * sizeof (struct ib_sge),
@@ -1627,7 +2396,7 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
if (!next) {
ret = -ENOMEM;
goto err;
- }
+ }
if (!last)
wr = next;
@@ -1693,7 +2462,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
goto out;
resp.bad_wr = 0;
- ret = qp->device->post_recv(qp, wr, &bad_wr);
+ ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr);
put_qp_read(qp);
@@ -1768,8 +2537,8 @@ out:
}
ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_create_ah cmd;
struct ib_uverbs_create_ah_resp resp;
@@ -1789,10 +2558,10 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
if (!uobj)
return -ENOMEM;
- init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_key);
+ init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class);
down_write(&uobj->mutex);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err;
@@ -1863,7 +2632,7 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
struct ib_uverbs_destroy_ah cmd;
struct ib_ah *ah;
struct ib_uobject *uobj;
- int ret;
+ int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
@@ -1906,7 +2675,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = idr_write_qp(cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
@@ -1935,25 +2704,25 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
kfree(mcast);
out_put:
- put_qp_read(qp);
+ put_qp_write(qp);
return ret ? ret : in_len;
}
ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_detach_mcast cmd;
struct ib_uqp_object *obj;
struct ib_qp *qp;
struct ib_uverbs_mcast_entry *mcast;
- int ret = -EINVAL;
+ int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = idr_write_qp(cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
@@ -1972,102 +2741,122 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
}
out_put:
- put_qp_read(qp);
+ put_qp_write(qp);
return ret ? ret : in_len;
}
-ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
+ struct ib_uverbs_create_xsrq *cmd,
+ struct ib_udata *udata)
{
- struct ib_uverbs_create_srq cmd;
struct ib_uverbs_create_srq_resp resp;
- struct ib_udata udata;
- struct ib_uevent_object *obj;
+ struct ib_usrq_object *obj;
struct ib_pd *pd;
struct ib_srq *srq;
+ struct ib_uobject *uninitialized_var(xrcd_uobj);
struct ib_srq_init_attr attr;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
+ obj = kmalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class);
+ down_write(&obj->uevent.uobject.mutex);
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
+ if (cmd->srq_type == IB_SRQT_XRC) {
+ attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj);
+ if (!attr.ext.xrc.xrcd) {
+ ret = -EINVAL;
+ goto err;
+ }
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
+ atomic_inc(&obj->uxrcd->refcnt);
- init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &srq_lock_key);
- down_write(&obj->uobject.mutex);
+ attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0);
+ if (!attr.ext.xrc.cq) {
+ ret = -EINVAL;
+ goto err_put_xrcd;
+ }
+ }
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = idr_read_pd(cmd->pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
- goto err;
- }
+ goto err_put_cq;
+ }
attr.event_handler = ib_uverbs_srq_event_handler;
attr.srq_context = file;
- attr.attr.max_wr = cmd.max_wr;
- attr.attr.max_sge = cmd.max_sge;
- attr.attr.srq_limit = cmd.srq_limit;
+ attr.srq_type = cmd->srq_type;
+ attr.attr.max_wr = cmd->max_wr;
+ attr.attr.max_sge = cmd->max_sge;
+ attr.attr.srq_limit = cmd->srq_limit;
- obj->events_reported = 0;
- INIT_LIST_HEAD(&obj->event_list);
+ obj->uevent.events_reported = 0;
+ INIT_LIST_HEAD(&obj->uevent.event_list);
- srq = pd->device->create_srq(pd, &attr, &udata);
+ srq = pd->device->create_srq(pd, &attr, udata);
if (IS_ERR(srq)) {
ret = PTR_ERR(srq);
goto err_put;
}
- srq->device = pd->device;
- srq->pd = pd;
- srq->uobject = &obj->uobject;
+ srq->device = pd->device;
+ srq->pd = pd;
+ srq->srq_type = cmd->srq_type;
+ srq->uobject = &obj->uevent.uobject;
srq->event_handler = attr.event_handler;
srq->srq_context = attr.srq_context;
- srq->ext.xrc.cq = NULL;
- srq->ext.xrc.xrcd = NULL;
+
+ if (cmd->srq_type == IB_SRQT_XRC) {
+ srq->ext.xrc.cq = attr.ext.xrc.cq;
+ srq->ext.xrc.xrcd = attr.ext.xrc.xrcd;
+ atomic_inc(&attr.ext.xrc.cq->usecnt);
+ atomic_inc(&attr.ext.xrc.xrcd->usecnt);
+ }
+
atomic_inc(&pd->usecnt);
atomic_set(&srq->usecnt, 0);
- obj->uobject.object = srq;
- ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject);
+ obj->uevent.uobject.object = srq;
+ ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
if (ret)
goto err_destroy;
memset(&resp, 0, sizeof resp);
- resp.srq_handle = obj->uobject.id;
+ resp.srq_handle = obj->uevent.uobject.id;
resp.max_wr = attr.attr.max_wr;
resp.max_sge = attr.attr.max_sge;
+ if (cmd->srq_type == IB_SRQT_XRC)
+ resp.srqn = srq->ext.xrc.srq_num;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ if (copy_to_user((void __user *) (unsigned long) cmd->response,
&resp, sizeof resp)) {
ret = -EFAULT;
goto err_copy;
}
+ if (cmd->srq_type == IB_SRQT_XRC) {
+ put_uobj_read(xrcd_uobj);
+ put_cq_read(attr.ext.xrc.cq);
+ }
put_pd_read(pd);
mutex_lock(&file->mutex);
- list_add_tail(&obj->uobject.list, &file->ucontext->srq_list);
+ list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list);
mutex_unlock(&file->mutex);
- obj->uobject.live = 1;
+ obj->uevent.uobject.live = 1;
- up_write(&obj->uobject.mutex);
+ up_write(&obj->uevent.uobject.mutex);
- return in_len;
+ return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject);
+ idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
err_destroy:
ib_destroy_srq(srq);
@@ -2075,25 +2864,29 @@ err_destroy:
err_put:
put_pd_read(pd);
+err_put_cq:
+ if (cmd->srq_type == IB_SRQT_XRC)
+ put_cq_read(attr.ext.xrc.cq);
+
+err_put_xrcd:
+ if (cmd->srq_type == IB_SRQT_XRC) {
+ atomic_dec(&obj->uxrcd->refcnt);
+ put_uobj_read(xrcd_uobj);
+ }
+
err:
- put_uobj_write(&obj->uobject);
+ put_uobj_write(&obj->uevent.uobject);
return ret;
}
-ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
{
- struct ib_uverbs_create_xsrq cmd;
+ struct ib_uverbs_create_srq cmd;
+ struct ib_uverbs_create_xsrq xcmd;
struct ib_uverbs_create_srq_resp resp;
- struct ib_udata udata;
- struct ib_uevent_object *obj;
- struct ib_pd *pd;
- struct ib_srq *srq;
- struct ib_cq *xrc_cq;
- struct ib_xrcd *xrcd;
- struct ib_srq_init_attr attr;
- struct ib_uobject *xrcd_uobj;
+ struct ib_udata udata;
int ret;
if (out_len < sizeof resp)
@@ -2102,113 +2895,48 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
+ xcmd.response = cmd.response;
+ xcmd.user_handle = cmd.user_handle;
+ xcmd.srq_type = IB_SRQT_BASIC;
+ xcmd.pd_handle = cmd.pd_handle;
+ xcmd.max_wr = cmd.max_wr;
+ xcmd.max_sge = cmd.max_sge;
+ xcmd.srq_limit = cmd.srq_limit;
+
INIT_UDATA(&udata, buf + sizeof cmd,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
-
- init_uobj(&obj->uobject, cmd.user_handle, file->ucontext,
- &srq_lock_key);
- down_write(&obj->uobject.mutex);
-
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
- if (!pd) {
- ret = -EINVAL;
- goto err;
- }
-
- xrc_cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
- if (!xrc_cq) {
- ret = -EINVAL;
- goto err_put_pd;
- }
-
- xrcd = idr_read_xrcd(cmd.xrcd_handle, file->ucontext, &xrcd_uobj);
- if (!xrcd) {
- ret = -EINVAL;
- goto err_put_cq;
- }
-
-
- attr.event_handler = ib_uverbs_srq_event_handler;
- attr.srq_context = file;
- attr.attr.max_wr = cmd.max_wr;
- attr.attr.max_sge = cmd.max_sge;
- attr.attr.srq_limit = cmd.srq_limit;
-
- obj->events_reported = 0;
- INIT_LIST_HEAD(&obj->event_list);
-
- srq = pd->device->create_xrc_srq(pd, xrc_cq, xrcd, &attr, &udata);
- if (IS_ERR(srq)) {
- ret = PTR_ERR(srq);
- goto err_put;
- }
-
- srq->device = pd->device;
- srq->pd = pd;
- srq->uobject = &obj->uobject;
- srq->event_handler = attr.event_handler;
- srq->srq_context = attr.srq_context;
- srq->ext.xrc.cq = xrc_cq;
- srq->ext.xrc.xrcd = xrcd;
- atomic_inc(&pd->usecnt);
- atomic_inc(&xrc_cq->usecnt);
- atomic_inc(&xrcd->usecnt);
-
- atomic_set(&srq->usecnt, 0);
-
- obj->uobject.object = srq;
- ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject);
+ ret = __uverbs_create_xsrq(file, &xcmd, &udata);
if (ret)
- goto err_destroy;
-
- memset(&resp, 0, sizeof resp);
- resp.srq_handle = obj->uobject.id;
- resp.max_wr = attr.attr.max_wr;
- resp.max_sge = attr.attr.max_sge;
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- ret = -EFAULT;
- goto err_copy;
- }
-
- put_xrcd_read(xrcd_uobj);
- put_cq_read(xrc_cq);
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uobject.list, &file->ucontext->srq_list);
- mutex_unlock(&file->mutex);
-
- obj->uobject.live = 1;
-
- up_write(&obj->uobject.mutex);
+ return ret;
return in_len;
+}
-err_copy:
- idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject);
+ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len, int out_len)
+{
+ struct ib_uverbs_create_xsrq cmd;
+ struct ib_uverbs_create_srq_resp resp;
+ struct ib_udata udata;
+ int ret;
-err_destroy:
- ib_destroy_srq(srq);
+ if (out_len < sizeof resp)
+ return -ENOSPC;
-err_put:
- put_xrcd_read(xrcd_uobj);
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
-err_put_cq:
- put_cq_read(xrc_cq);
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
-err_put_pd:
- put_pd_read(pd);
+ ret = __uverbs_create_xsrq(file, &cmd, &udata);
+ if (ret)
+ return ret;
-err:
- put_uobj_write(&obj->uobject);
- return ret;
+ return in_len;
}
ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
@@ -2266,7 +2994,7 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
put_srq_read(srq);
if (ret)
- return ret;
+ return ret;
memset(&resp, 0, sizeof resp);
@@ -2282,8 +3010,8 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ const char __user *buf, int in_len,
+ int out_len)
{
struct ib_uverbs_destroy_srq cmd;
struct ib_uverbs_destroy_srq_resp resp;
@@ -2291,6 +3019,8 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
struct ib_srq *srq;
struct ib_uevent_object *obj;
int ret = -EINVAL;
+ struct ib_usrq_object *us;
+ enum ib_srq_type srq_type;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
@@ -2300,6 +3030,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
return -EINVAL;
srq = uobj->object;
obj = container_of(uobj, struct ib_uevent_object, uobject);
+ srq_type = srq->srq_type;
ret = ib_destroy_srq(srq);
if (!ret)
@@ -2310,6 +3041,11 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
if (ret)
return ret;
+ if (srq_type == IB_SRQT_XRC) {
+ us = container_of(obj, struct ib_usrq_object, uevent);
+ atomic_dec(&us->uxrcd->refcnt);
+ }
+
idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
mutex_lock(&file->mutex);
@@ -2330,313 +3066,467 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
return ret ? ret : in_len;
}
-static struct inode *xrc_file2inode(struct file *f)
+ssize_t ib_uverbs_exp_create_dct(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
{
- return f->f_dentry->d_inode;
-}
+ int in_len = ucore->inlen + uhw->inlen;
+ int out_len = ucore->outlen + uhw->outlen;
+ struct ib_uverbs_create_dct cmd;
+ struct ib_uverbs_create_dct_resp resp;
+ struct ib_udata udata;
+ struct ib_udct_object *obj;
+ struct ib_dct *dct;
+ int ret;
+ struct ib_dct_init_attr attr;
+ struct ib_pd *pd = NULL;
+ struct ib_cq *cq = NULL;
+ struct ib_srq *srq = NULL;
-struct xrcd_table_entry {
- struct rb_node node;
- struct inode *inode;
- struct ib_xrcd *xrcd;
-};
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
-static int xrcd_table_insert(struct ib_device *dev,
- struct inode *i_n,
- struct ib_xrcd *xrcd)
-{
- struct xrcd_table_entry *entry, *scan;
- struct rb_node **p = &dev->ib_uverbs_xrcd_table.rb_node;
- struct rb_node *parent = NULL;
+ ret = ucore->ops->copy_from(&cmd, ucore, sizeof(cmd));
+ if (ret)
+ return ret;
- entry = kmalloc(sizeof(struct xrcd_table_entry), GFP_KERNEL);
- if (!entry)
+ obj = kmalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj)
return -ENOMEM;
- entry->inode = i_n;
- entry->xrcd = xrcd;
+ init_uobj(&obj->uobject, cmd.user_handle, file->ucontext,
+ &dct_lock_class);
+ down_write(&obj->uobject.mutex);
- while (*p) {
- parent = *p;
- scan = rb_entry(parent, struct xrcd_table_entry, node);
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ if (!pd) {
+ ret = -EINVAL;
+ goto err_pd;
+ }
- if (i_n < scan->inode)
- p = &(*p)->rb_left;
- else if (i_n > scan->inode)
- p = &(*p)->rb_right;
- else {
- kfree(entry);
- return -EEXIST;
- }
+ cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ if (!cq) {
+ ret = -EINVAL;
+ goto err_put;
}
- rb_link_node(&entry->node, parent, p);
- rb_insert_color(&entry->node, &dev->ib_uverbs_xrcd_table);
- igrab(i_n);
- return 0;
-}
+ srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ if (!srq) {
+ ret = -EINVAL;
+ goto err_put;
+ }
-static struct xrcd_table_entry *xrcd_table_search(struct ib_device *dev,
- struct inode *i_n)
-{
- struct xrcd_table_entry *scan;
- struct rb_node **p = &dev->ib_uverbs_xrcd_table.rb_node;
- struct rb_node *parent = NULL;
+ attr.cq = cq;
+ attr.access_flags = cmd.access_flags;
+ attr.min_rnr_timer = cmd.min_rnr_timer;
+ attr.srq = srq;
+ attr.tclass = cmd.tclass;
+ attr.flow_label = cmd.flow_label;
+ attr.dc_key = cmd.dc_key;
+ attr.mtu = cmd.mtu;
+ attr.port = cmd.port;
+ attr.pkey_index = cmd.pkey_index;
+ attr.gid_index = cmd.gid_index;
+ attr.hop_limit = cmd.hop_limit;
+ attr.create_flags = cmd.create_flags;
+
+ dct = ib_create_dct(pd, &attr, &udata);
+ if (IS_ERR(dct)) {
+ ret = PTR_ERR(dct);
+ goto err_put;
+ }
- while (*p) {
- parent = *p;
- scan = rb_entry(parent, struct xrcd_table_entry, node);
+ dct->device = file->device->ib_dev;
+ dct->uobject = &obj->uobject;
- if (i_n < scan->inode)
- p = &(*p)->rb_left;
- else if (i_n > scan->inode)
- p = &(*p)->rb_right;
- else
- return scan;
- }
- return NULL;
-}
+ obj->uobject.object = dct;
+ ret = idr_add_uobj(&ib_uverbs_dct_idr, &obj->uobject);
+ if (ret)
+ goto err_dct;
-static int find_xrcd(struct ib_device *dev, struct inode *i_n,
- struct ib_xrcd **xrcd)
-{
- struct xrcd_table_entry *entry;
+ memset(&resp, 0, sizeof(resp));
+ resp.dct_handle = obj->uobject.id;
+ resp.dctn = dct->dct_num;
- entry = xrcd_table_search(dev, i_n);
- if (!entry)
- return -EINVAL;
+ ret = ucore->ops->copy_to(ucore, &resp, sizeof(resp));
+ if (ret)
+ goto err_copy;
- *xrcd = entry->xrcd;
- return 0;
-}
+ mutex_lock(&file->mutex);
+ list_add_tail(&obj->uobject.list, &file->ucontext->dct_list);
+ mutex_unlock(&file->mutex);
+ obj->uobject.live = 1;
-static void xrcd_table_delete(struct ib_device *dev,
- struct inode *i_n)
-{
- struct xrcd_table_entry *entry = xrcd_table_search(dev, i_n);
+ put_srq_read(srq);
+ put_cq_read(cq);
+ put_pd_read(pd);
- if (entry) {
- iput(i_n);
- rb_erase(&entry->node, &dev->ib_uverbs_xrcd_table);
- kfree(entry);
- }
+ up_write(&obj->uobject.mutex);
+
+ return in_len;
+
+err_copy:
+ idr_remove_uobj(&ib_uverbs_dct_idr, &obj->uobject);
+
+err_dct:
+ ib_destroy_dct(dct);
+
+err_put:
+ if (srq)
+ put_srq_read(srq);
+
+ if (cq)
+ put_cq_read(cq);
+
+ put_pd_read(pd);
+
+err_pd:
+ put_uobj_write(&obj->uobject);
+ return ret;
}
-ssize_t ib_uverbs_open_xrc_domain(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ssize_t ib_uverbs_exp_destroy_dct(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
{
- struct ib_uverbs_open_xrc_domain cmd;
- struct ib_uverbs_open_xrc_domain_resp resp;
- struct ib_udata udata;
- struct ib_uobject *uobj;
- struct ib_uxrcd_object *xrcd_uobj;
- struct ib_xrcd *xrcd = NULL;
- struct file *f = NULL;
- struct inode *inode = NULL;
- int ret = 0;
- int new_xrcd = 0;
+ int in_len = ucore->inlen + uhw->inlen;
+ int out_len = ucore->outlen + uhw->outlen;
+ struct ib_uverbs_destroy_dct cmd;
+ struct ib_uverbs_destroy_dct_resp resp;
+ struct ib_uobject *uobj;
+ struct ib_dct *dct;
+ struct ib_udct_object *obj;
+ int ret;
- if (out_len < sizeof resp)
+ if (out_len < sizeof(resp))
return -ENOSPC;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = ucore->ops->copy_from(&cmd, ucore, sizeof(cmd));
+ if (ret)
+ return ret;
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
+ uobj = idr_write_uobj(&ib_uverbs_dct_idr, cmd.user_handle, file->ucontext);
+ if (!uobj)
+ return -EINVAL;
- mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
- if (cmd.fd != (u32) (-1)) {
- /* search for file descriptor */
- f = fget(cmd.fd);
- if (!f) {
- ret = -EBADF;
- goto err_table_mutex_unlock;
- }
+ dct = uobj->object;
+ obj = container_of(dct->uobject, struct ib_udct_object, uobject);
- inode = xrc_file2inode(f);
- if (!inode) {
- ret = -EBADF;
- goto err_table_mutex_unlock;
- }
+ ret = ib_destroy_dct(dct);
+ if (!ret)
+ uobj->live = 0;
- ret = find_xrcd(file->device->ib_dev, inode, &xrcd);
- if (ret && !(cmd.oflags & O_CREAT)) {
- /* no file descriptor. Need CREATE flag */
- ret = -EAGAIN;
- goto err_table_mutex_unlock;
- }
+ put_uobj_write(uobj);
- if (xrcd && cmd.oflags & O_EXCL) {
- ret = -EINVAL;
- goto err_table_mutex_unlock;
- }
- }
+ if (ret)
+ return ret;
- xrcd_uobj = kmalloc(sizeof *xrcd_uobj, GFP_KERNEL);
- if (!xrcd_uobj) {
- ret = -ENOMEM;
- goto err_table_mutex_unlock;
- }
+ idr_remove_uobj(&ib_uverbs_dct_idr, uobj);
- uobj = &xrcd_uobj->uobject;
- init_uobj(uobj, 0, file->ucontext, &pd_lock_key);
- down_write(&uobj->mutex);
+ mutex_lock(&file->mutex);
+ list_del(&uobj->list);
+ mutex_unlock(&file->mutex);
- if (!xrcd) {
- xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev,
- file->ucontext, &udata);
- if (IS_ERR(xrcd)) {
- ret = PTR_ERR(xrcd);
- goto err;
- }
- xrcd->uobject = (cmd.fd == -1) ? uobj : NULL;
- xrcd->inode = inode;
- xrcd->device = file->device->ib_dev;
- atomic_set(&xrcd->usecnt, 0);
- new_xrcd = 1;
- }
+ memset(&resp, 0, sizeof(resp));
- uobj->object = xrcd;
- ret = idr_add_uobj(&ib_uverbs_xrc_domain_idr, uobj);
+ put_uobj(uobj);
+
+ ret = ucore->ops->copy_to(ucore, &resp, sizeof(resp));
if (ret)
- goto err_idr;
+ return ret;
- memset(&resp, 0, sizeof resp);
- resp.xrcd_handle = uobj->id;
+ return in_len;
+}
- if (inode) {
- if (new_xrcd) {
- /* create new inode/xrcd table entry */
- ret = xrcd_table_insert(file->device->ib_dev, inode, xrcd);
- if (ret)
- goto err_insert_xrcd;
- }
- atomic_inc(&xrcd->usecnt);
+ssize_t ib_uverbs_exp_query_dct(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ int in_len = ucore->inlen + uhw->inlen;
+ int out_len = ucore->outlen + uhw->outlen;
+ struct ib_uverbs_query_dct cmd;
+ struct ib_uverbs_query_dct_resp resp;
+ struct ib_dct *dct;
+ struct ib_dct_attr *attr;
+ int err;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ err = ucore->ops->copy_from(&cmd, ucore, sizeof(cmd));
+ if (err)
+ return err;
+
+ attr = kmalloc(sizeof(*attr), GFP_KERNEL);
+ if (!attr) {
+ err = -ENOMEM;
+ goto out;
}
- if (f)
- fput(f);
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- ret = -EFAULT;
- goto err_copy;
+ dct = idr_read_dct(cmd.dct_handle, file->ucontext);
+ if (!dct) {
+ err = -EINVAL;
+ goto out;
}
- INIT_LIST_HEAD(&xrcd_uobj->xrc_reg_qp_list);
+ err = ib_query_dct(dct, attr);
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->xrcd_list);
- mutex_unlock(&file->mutex);
+ put_dct_read(dct);
- uobj->live = 1;
+ if (err)
+ goto out;
- up_write(&uobj->mutex);
+ memset(&resp, 0, sizeof(resp));
- mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
- return in_len;
+ resp.dc_key = attr->dc_key;
+ resp.access_flags = attr->access_flags;
+ resp.flow_label = attr->flow_label;
+ resp.key_violations = attr->key_violations;
+ resp.port = attr->port;
+ resp.min_rnr_timer = attr->min_rnr_timer;
+ resp.tclass = attr->tclass;
+ resp.mtu = attr->mtu;
+ resp.pkey_index = attr->pkey_index;
+ resp.gid_index = attr->gid_index;
+ resp.hop_limit = attr->hop_limit;
+ resp.state = attr->state;
-err_copy:
+ err = ucore->ops->copy_to(ucore, &resp, sizeof(resp));
- if (inode) {
- if (new_xrcd)
- xrcd_table_delete(file->device->ib_dev, inode);
- atomic_dec(&xrcd->usecnt);
+out:
+ kfree(attr);
+
+ return err ? err : in_len;
+}
+
+/*
+ * Experimental functions
+ */
+
+static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
+
+static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ ib_spec->type = kern_spec->type;
+
+ switch (ib_spec->type) {
+ case IB_FLOW_SPEC_ETH:
+ ib_spec->eth.size = sizeof(struct ib_flow_spec_eth);
+ memcpy(&ib_spec->eth.val, &kern_spec->eth.val,
+ sizeof(struct ib_flow_eth_filter));
+ memcpy(&ib_spec->eth.mask, &kern_spec->eth.mask,
+ sizeof(struct ib_flow_eth_filter));
+ break;
+ case IB_FLOW_SPEC_IB:
+ ib_spec->ib.size = sizeof(struct ib_flow_spec_ib);
+ memcpy(&ib_spec->ib.val, &kern_spec->ib.val,
+ sizeof(struct ib_flow_ib_filter));
+ memcpy(&ib_spec->ib.mask, &kern_spec->ib.mask,
+ sizeof(struct ib_flow_ib_filter));
+ break;
+ case IB_FLOW_SPEC_IPV4:
+ ib_spec->ipv4.size = sizeof(struct ib_flow_spec_ipv4);
+ memcpy(&ib_spec->ipv4.val, &kern_spec->ipv4.val,
+ sizeof(struct ib_flow_ipv4_filter));
+ memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask,
+ sizeof(struct ib_flow_ipv4_filter));
+ break;
+ case IB_FLOW_SPEC_TCP:
+ case IB_FLOW_SPEC_UDP:
+ ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp);
+ memcpy(&ib_spec->tcp_udp.val, &kern_spec->tcp_udp.val,
+ sizeof(struct ib_flow_tcp_udp_filter));
+ memcpy(&ib_spec->tcp_udp.mask, &kern_spec->tcp_udp.mask,
+ sizeof(struct ib_flow_tcp_udp_filter));
+ break;
+ default:
+ return -EINVAL;
}
+ return 0;
+}
-err_insert_xrcd:
- idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj);
+int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_create_flow cmd;
+ struct ib_uverbs_create_flow_resp resp;
+ struct ib_uobject *uobj;
+ struct ib_flow *flow_id;
+ struct ib_uverbs_flow_attr *kern_flow_attr;
+ struct ib_flow_attr *flow_attr;
+ struct ib_qp *qp;
+ int err = 0;
+ void *kern_spec;
+ void *ib_spec;
+ int i;
+
+ if (ucore->outlen < sizeof(resp))
+ return -ENOSPC;
-err_idr:
- ib_dealloc_xrcd(xrcd);
+ err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ if (err)
+ return err;
-err:
- put_uobj_write(uobj);
+ ucore->inbuf += sizeof(cmd);
+ ucore->inlen -= sizeof(cmd);
-err_table_mutex_unlock:
+ if (cmd.comp_mask)
+ return -EINVAL;
- if (f)
- fput(f);
- mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
- return ret;
-}
+ if (!priv_check(curthread, PRIV_NET_RAW) && !disable_raw_qp_enforcement)
+ return -EPERM;
-ssize_t ib_uverbs_close_xrc_domain(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
-{
- struct ib_uverbs_close_xrc_domain cmd;
- struct ib_uobject *uobj, *t_uobj;
- struct ib_uxrcd_object *xrcd_uobj;
- struct ib_xrcd *xrcd = NULL;
- struct inode *inode = NULL;
- int ret = 0;
+ if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
+ return -EINVAL;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ if (cmd.flow_attr.size > ucore->inlen ||
+ cmd.flow_attr.size >
+ (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec)))
+ return -EINVAL;
- mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
- uobj = idr_write_uobj(&ib_uverbs_xrc_domain_idr, cmd.xrcd_handle,
- file->ucontext);
+ if (cmd.flow_attr.num_of_specs) {
+ kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) +
+ cmd.flow_attr.size, GFP_KERNEL);
+ if (!kern_flow_attr)
+ return -ENOMEM;
+
+ memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
+ err = ib_copy_from_udata(kern_flow_attr + 1, ucore,
+ cmd.flow_attr.size);
+ if (err)
+ goto err_free_attr;
+ } else {
+ kern_flow_attr = &cmd.flow_attr;
+ }
+
+ uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
if (!uobj) {
- ret = -EINVAL;
- goto err_unlock_mutex;
+ err = -ENOMEM;
+ goto err_free_attr;
}
+ init_uobj(uobj, 0, file->ucontext, &rule_lock_class);
+ down_write(&uobj->mutex);
- mutex_lock(&file->mutex);
- if (!ret) {
- list_for_each_entry(t_uobj, &file->ucontext->qp_list, list) {
- struct ib_qp *qp = t_uobj->object;
- if (qp->xrcd && qp->xrcd == uobj->object) {
- ret = -EBUSY;
- break;
- }
- }
+ qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ if (!qp) {
+ err = -EINVAL;
+ goto err_uobj;
}
- if (!ret) {
- list_for_each_entry(t_uobj, &file->ucontext->srq_list, list) {
- struct ib_srq *srq = t_uobj->object;
- if (srq->ext.xrc.xrcd && srq->ext.xrc.xrcd == uobj->object) {
- ret = -EBUSY;
- break;
- }
+
+ flow_attr = kmalloc(sizeof(*flow_attr) + cmd.flow_attr.size,
+ GFP_KERNEL);
+ if (!flow_attr) {
+ err = -ENOMEM;
+ goto err_put;
+ }
+
+ flow_attr->type = kern_flow_attr->type;
+ flow_attr->priority = kern_flow_attr->priority;
+ flow_attr->num_of_specs = kern_flow_attr->num_of_specs;
+ flow_attr->port = kern_flow_attr->port;
+ flow_attr->flags = kern_flow_attr->flags;
+ flow_attr->size = sizeof(*flow_attr);
+
+ kern_spec = kern_flow_attr + 1;
+ ib_spec = flow_attr + 1;
+ for (i = 0; i < flow_attr->num_of_specs &&
+ cmd.flow_attr.size >
+ offsetof(struct ib_uverbs_flow_spec, reserved) &&
+ cmd.flow_attr.size >=
+ ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
+ err = kern_spec_to_ib_spec(kern_spec, ib_spec);
+ if (err)
+ goto err_free;
+ flow_attr->size +=
+ ((union ib_flow_spec *)ib_spec)->size;
+ cmd.flow_attr.size -=
+ ((struct ib_uverbs_flow_spec *)kern_spec)->size;
+ kern_spec += ((struct ib_uverbs_flow_spec *)kern_spec)->size;
+ ib_spec += ((union ib_flow_spec *)ib_spec)->size;
+ }
+ if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {
+ pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n",
+ i, cmd.flow_attr.size);
+ goto err_free;
}
+ flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
+ if (IS_ERR(flow_id)) {
+ err = PTR_ERR(flow_id);
+ goto err_free;
}
+ flow_id->qp = qp;
+ flow_id->uobject = uobj;
+ uobj->object = flow_id;
+
+ err = idr_add_uobj(&ib_uverbs_rule_idr, uobj);
+ if (err)
+ goto destroy_flow;
+
+ memset(&resp, 0, sizeof(resp));
+ resp.flow_handle = uobj->id;
+
+ err = ib_copy_to_udata(ucore,
+ &resp, sizeof(resp));
+ if (err)
+ goto err_copy;
+
+ put_qp_read(qp);
+ mutex_lock(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->rule_list);
mutex_unlock(&file->mutex);
- if (ret) {
- put_uobj_write(uobj);
- goto err_unlock_mutex;
- }
- xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject);
- if (!list_empty(&xrcd_uobj->xrc_reg_qp_list)) {
- ret = -EBUSY;
- put_uobj_write(uobj);
- goto err_unlock_mutex;
- }
+ uobj->live = 1;
- xrcd = (struct ib_xrcd *) (uobj->object);
- inode = xrcd->inode;
+ up_write(&uobj->mutex);
+ kfree(flow_attr);
+ if (cmd.flow_attr.num_of_specs)
+ kfree(kern_flow_attr);
+ return 0;
+err_copy:
+ idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
+destroy_flow:
+ ib_destroy_flow(flow_id);
+err_free:
+ kfree(flow_attr);
+err_put:
+ put_qp_read(qp);
+err_uobj:
+ put_uobj_write(uobj);
+err_free_attr:
+ if (cmd.flow_attr.num_of_specs)
+ kfree(kern_flow_attr);
+ return err;
+}
- if (inode)
- atomic_dec(&xrcd->usecnt);
+int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_destroy_flow cmd;
+ struct ib_flow *flow_id;
+ struct ib_uobject *uobj;
+ int ret;
- ret = ib_dealloc_xrcd(uobj->object);
+ ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ if (ret)
+ return ret;
+
+ uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
+ file->ucontext);
+ if (!uobj)
+ return -EINVAL;
+ flow_id = uobj->object;
+
+ ret = ib_destroy_flow(flow_id);
if (!ret)
uobj->live = 0;
put_uobj_write(uobj);
- if (ret && !inode)
- goto err_unlock_mutex;
-
- if (!ret && inode)
- xrcd_table_delete(file->device->ib_dev, inode);
-
- idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj);
+ idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
mutex_lock(&file->mutex);
list_del(&uobj->list);
@@ -2644,380 +3534,378 @@ ssize_t ib_uverbs_close_xrc_domain(struct ib_uverbs_file *file,
put_uobj(uobj);
- mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
- return in_len;
-
-err_unlock_mutex:
- mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
return ret;
}
-void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev,
- struct ib_xrcd *xrcd)
+ssize_t ib_uverbs_exp_modify_qp(struct ib_uverbs_file *file,
+ struct ib_udata *ucore, struct ib_udata *uhw)
{
- struct inode *inode = NULL;
- int ret = 0;
+ const char __user *buf = ucore->inbuf;
+ int in_len = ucore->inlen + uhw->inlen;
+ int out_len = ucore->outlen + uhw->outlen;
- inode = xrcd->inode;
- if (inode)
- atomic_dec(&xrcd->usecnt);
+ return __uverbs_modify_qp(file, buf, in_len, out_len,
+ IB_USER_VERBS_CMD_EXTENDED);
+}
- ret = ib_dealloc_xrcd(xrcd);
- if (!ret && inode)
- xrcd_table_delete(ib_dev, inode);
+
+ssize_t ib_uverbs_exp_create_cq(struct ib_uverbs_file *file,
+ struct ib_udata *ucore, struct ib_udata *uhw)
+{
+ const char __user *buf = ucore->inbuf;
+ int in_len = ucore->inlen + uhw->inlen;
+ int out_len = ucore->outlen + uhw->outlen;
+ struct ib_uverbs_create_cq_ex cmd;
+
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+
+ return create_cq(file, buf, in_len, out_len, &cmd,
+ IB_USER_VERBS_CMD_EXTENDED, ucore->outbuf);
}
-ssize_t ib_uverbs_create_xrc_rcv_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ssize_t ib_uverbs_exp_modify_cq(struct ib_uverbs_file *file,
+ struct ib_udata *ucore, struct ib_udata *uhw)
{
- struct ib_uverbs_create_xrc_rcv_qp cmd;
- struct ib_uverbs_create_xrc_rcv_qp_resp resp;
- struct ib_uxrc_rcv_object *obj;
- struct ib_qp_init_attr init_attr;
- struct ib_xrcd *xrcd;
- struct ib_uobject *uobj;
- struct ib_uxrcd_object *xrcd_uobj;
- u32 qp_num;
- int err;
+ const char __user *buf = ucore->inbuf;
+ int in_len = ucore->inlen + uhw->inlen;
+ struct ib_uverbs_modify_cq_ex cmd;
+ struct ib_cq *cq;
+ struct ib_cq_attr attr;
+ int ret;
- if (out_len < sizeof resp)
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+
+ cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ if (!cq)
+ return -EINVAL;
+
+ attr.moderation.cq_count = cmd.cq_count;
+ attr.moderation.cq_period = cmd.cq_period;
+ attr.cq_cap_flags = cmd.cq_cap_flags;
+
+ ret = ib_modify_cq(cq, &attr, cmd.attr_mask);
+
+ put_cq_read(cq);
+
+ return ret ? ret : in_len;
+}
+
+
+ssize_t ib_uverbs_exp_query_device(struct ib_uverbs_file *file,
+ struct ib_udata *ucore, struct ib_udata *uhw)
+{
+ struct ib_uverbs_exp_query_device_resp resp;
+ struct ib_exp_device_attr exp_attr;
+ int ret;
+
+ if (ucore->outlen + uhw->outlen < sizeof(resp))
return -ENOSPC;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ memset(&resp, 0, sizeof(resp));
+ memset(&exp_attr, 0, sizeof(exp_attr));
+ ret = ib_exp_query_device(file->device->ib_dev, &exp_attr);
+ if (ret)
+ return ret;
- obj = kzalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ ib_uverbs_query_device_assign(&resp.base, &exp_attr.base, file);
- xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj);
- if (!xrcd) {
- err = -EINVAL;
- goto err_out;
+ resp.comp_mask = 0;
+ resp.device_cap_flags2 = 0;
+
+ /*
+ * Handle regular attr fields
+ */
+ if (exp_attr.base.comp_mask & IB_DEVICE_ATTR_WITH_TIMESTAMP_MASK) {
+ resp.timestamp_mask = exp_attr.base.timestamp_mask;
+ resp.comp_mask |= IB_EXP_DEVICE_ATTR_WITH_TIMESTAMP_MASK;
}
- init_attr.event_handler = ib_uverbs_xrc_rcv_qp_event_handler;
- init_attr.qp_context = file;
- init_attr.srq = NULL;
- init_attr.sq_sig_type =
- cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
- init_attr.qp_type = IB_QPT_XRC;
- init_attr.xrcd = xrcd;
+ if (exp_attr.base.comp_mask & IB_DEVICE_ATTR_WITH_HCA_CORE_CLOCK) {
+ resp.hca_core_clock = exp_attr.base.hca_core_clock;
+ resp.comp_mask |= IB_EXP_DEVICE_ATTR_WITH_HCA_CORE_CLOCK;
+ }
- init_attr.cap.max_send_wr = 1;
- init_attr.cap.max_recv_wr = 0;
- init_attr.cap.max_send_sge = 1;
- init_attr.cap.max_recv_sge = 0;
- init_attr.cap.max_inline_data = 0;
+ /*
+ * Handle experimental attr fields
+ */
+ if (exp_attr.exp_comp_mask & IB_EXP_DEVICE_ATTR_CAP_FLAGS2) {
+ resp.device_cap_flags2 = exp_attr.device_cap_flags2;
+ resp.comp_mask |= IB_EXP_DEVICE_ATTR_CAP_FLAGS2;
+ }
- err = xrcd->device->create_xrc_rcv_qp(&init_attr, &qp_num);
- if (err)
- goto err_put;
+ if (exp_attr.exp_comp_mask & IB_EXP_DEVICE_ATTR_DC_REQ_RD) {
+ resp.dc_rd_req = exp_attr.dc_rd_req;
+ resp.comp_mask |= IB_EXP_DEVICE_ATTR_DC_REQ_RD;
+ }
- memset(&resp, 0, sizeof resp);
- resp.qpn = qp_num;
+ if (exp_attr.exp_comp_mask & IB_EXP_DEVICE_ATTR_DC_RES_RD) {
+ resp.dc_rd_res = exp_attr.dc_rd_res;
+ resp.comp_mask |= IB_EXP_DEVICE_ATTR_DC_RES_RD;
+ }
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- err = -EFAULT;
- goto err_destroy;
+ if (exp_attr.exp_comp_mask & IB_EXP_DEVICE_ATTR_INLINE_RECV_SZ) {
+ resp.inline_recv_sz = exp_attr.inline_recv_sz;
+ resp.comp_mask |= IB_EXP_DEVICE_ATTR_INLINE_RECV_SZ;
}
- atomic_inc(&xrcd->usecnt);
- put_xrcd_read(uobj);
- obj->qp_num = qp_num;
- obj->domain_handle = cmd.xrc_domain_handle;
- xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject);
- mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
- list_add_tail(&obj->list, &xrcd_uobj->xrc_reg_qp_list);
- mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
+ if (exp_attr.exp_comp_mask & IB_EXP_DEVICE_ATTR_RSS_TBL_SZ) {
+ resp.max_rss_tbl_sz = exp_attr.max_rss_tbl_sz;
+ resp.comp_mask |= IB_EXP_DEVICE_ATTR_RSS_TBL_SZ;
+ }
- return in_len;
+ if (copy_to_user(ucore->outbuf, &resp, sizeof(resp)))
+ return -EFAULT;
-err_destroy:
- xrcd->device->unreg_xrc_rcv_qp(xrcd, file, qp_num);
-err_put:
- put_xrcd_read(uobj);
-err_out:
- kfree(obj);
- return err;
+ return ucore->inlen + uhw->inlen;
}
-ssize_t ib_uverbs_modify_xrc_rcv_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ssize_t ib_uverbs_exp_create_qp(struct ib_uverbs_file *file,
+ struct ib_udata *ucore, struct ib_udata *uhw)
{
- struct ib_uverbs_modify_xrc_rcv_qp cmd;
- struct ib_qp_attr *attr;
- struct ib_xrcd *xrcd;
- struct ib_uobject *uobj;
- int err;
+ struct ib_uqp_object *obj;
+ struct ib_device *device;
+ struct ib_pd *pd = NULL;
+ struct ib_xrcd *xrcd = NULL;
+ struct ib_uobject *uninitialized_var(xrcd_uobj);
+ struct ib_cq *scq = NULL, *rcq = NULL;
+ struct ib_srq *srq = NULL;
+ struct ib_qp *qp;
+ struct ib_exp_qp_init_attr attr;
+ int ret;
+ struct ib_uverbs_exp_create_qp cmd_exp;
+ struct ib_uverbs_exp_create_qp_resp resp_exp;
+ struct ib_qp *parentqp = NULL;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ memset(&cmd_exp, 0, sizeof(cmd_exp));
- attr = kzalloc(sizeof *attr, GFP_KERNEL);
- if (!attr)
- return -ENOMEM;
+ ret = ucore->ops->copy_from(&cmd_exp, ucore, sizeof(cmd_exp));
+ if (ret)
+ return ret;
- xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj);
- if (!xrcd) {
- kfree(attr);
- return -EINVAL;
- }
+ if (!disable_raw_qp_enforcement &&
+ cmd_exp.qp_type == IB_QPT_RAW_PACKET && !priv_check(curthread,
+ PRIV_NET_RAW))
+ return -EPERM;
- attr->qp_state = cmd.qp_state;
- attr->cur_qp_state = cmd.cur_qp_state;
- attr->qp_access_flags = cmd.qp_access_flags;
- attr->pkey_index = cmd.pkey_index;
- attr->port_num = cmd.port_num;
- attr->path_mtu = cmd.path_mtu;
- attr->path_mig_state = cmd.path_mig_state;
- attr->qkey = cmd.qkey;
- attr->rq_psn = cmd.rq_psn;
- attr->sq_psn = cmd.sq_psn;
- attr->dest_qp_num = cmd.dest_qp_num;
- attr->alt_pkey_index = cmd.alt_pkey_index;
- attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
- attr->max_rd_atomic = cmd.max_rd_atomic;
- attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic;
- attr->min_rnr_timer = cmd.min_rnr_timer;
- attr->port_num = cmd.port_num;
- attr->timeout = cmd.timeout;
- attr->retry_cnt = cmd.retry_cnt;
- attr->rnr_retry = cmd.rnr_retry;
- attr->alt_port_num = cmd.alt_port_num;
- attr->alt_timeout = cmd.alt_timeout;
+ obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
- memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16);
- attr->ah_attr.grh.flow_label = cmd.dest.flow_label;
- attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index;
- attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit;
- attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class;
- attr->ah_attr.dlid = cmd.dest.dlid;
- attr->ah_attr.sl = cmd.dest.sl;
- attr->ah_attr.src_path_bits = cmd.dest.src_path_bits;
- attr->ah_attr.static_rate = cmd.dest.static_rate;
- attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0;
- attr->ah_attr.port_num = cmd.dest.port_num;
+ init_uobj(&obj->uevent.uobject, cmd_exp.user_handle, file->ucontext,
+ &qp_lock_class);
+ down_write(&obj->uevent.uobject.mutex);
- memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16);
- attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label;
- attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index;
- attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit;
- attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class;
- attr->alt_ah_attr.dlid = cmd.alt_dest.dlid;
- attr->alt_ah_attr.sl = cmd.alt_dest.sl;
- attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits;
- attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate;
- attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
- attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
-
- err = xrcd->device->modify_xrc_rcv_qp(xrcd, cmd.qp_num, attr, cmd.attr_mask);
- put_xrcd_read(uobj);
- kfree(attr);
- return err ? err : in_len;
-}
+ if (cmd_exp.qp_type == IB_QPT_XRC_TGT) {
+ xrcd = idr_read_xrcd(cmd_exp.pd_handle, file->ucontext, &xrcd_uobj);
+ if (!xrcd) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+ device = xrcd->device;
+ } else {
+ if (cmd_exp.qp_type == IB_QPT_XRC_INI) {
+ cmd_exp.max_recv_wr = 0;
+ cmd_exp.max_recv_sge = 0;
+ } else {
+ if (cmd_exp.is_srq) {
+ srq = idr_read_srq(cmd_exp.srq_handle, file->ucontext);
+ if (!srq || srq->srq_type != IB_SRQT_BASIC) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+ }
-ssize_t ib_uverbs_query_xrc_rcv_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
-{
- struct ib_uverbs_query_xrc_rcv_qp cmd;
- struct ib_uverbs_query_qp_resp resp;
- struct ib_qp_attr *attr;
- struct ib_qp_init_attr *init_attr;
- struct ib_xrcd *xrcd;
- struct ib_uobject *uobj;
- int ret;
+ if (cmd_exp.recv_cq_handle != cmd_exp.send_cq_handle) {
+ rcq = idr_read_cq(cmd_exp.recv_cq_handle, file->ucontext, 0);
+ if (!rcq) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+ }
+ }
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ scq = idr_read_cq(cmd_exp.send_cq_handle, file->ucontext, !!rcq);
+ rcq = rcq ?: scq;
+ pd = idr_read_pd(cmd_exp.pd_handle, file->ucontext);
+ if (!pd || !scq) {
+ ret = -EINVAL;
+ goto err_put;
+ }
- attr = kmalloc(sizeof *attr, GFP_KERNEL);
- init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL);
- if (!attr || !init_attr) {
- ret = -ENOMEM;
- goto out;
+ device = pd->device;
}
- xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj);
- if (!xrcd) {
- ret = -EINVAL;
- goto out;
+ memset(&attr, 0, sizeof(attr));
+ attr.event_handler = ib_uverbs_qp_event_handler;
+ attr.qp_context = file;
+ attr.send_cq = scq;
+ attr.recv_cq = rcq;
+ attr.srq = srq;
+ attr.xrcd = xrcd;
+ attr.sq_sig_type = cmd_exp.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
+ attr.qp_type = cmd_exp.qp_type;
+ attr.create_flags = 0;
+
+ attr.cap.max_send_wr = cmd_exp.max_send_wr;
+ attr.cap.max_recv_wr = cmd_exp.max_recv_wr;
+ attr.cap.max_send_sge = cmd_exp.max_send_sge;
+ attr.cap.max_recv_sge = cmd_exp.max_recv_sge;
+ attr.cap.max_inline_data = cmd_exp.max_inline_data;
+
+ if (cmd_exp.comp_mask & IB_UVERBS_EXP_CREATE_QP_CAP_FLAGS)
+ attr.create_flags |= cmd_exp.qp_cap_flags &
+ (IB_QP_CREATE_CROSS_CHANNEL |
+ IB_QP_CREATE_MANAGED_SEND |
+ IB_QP_CREATE_MANAGED_RECV);
+
+ if (cmd_exp.comp_mask & IB_UVERBS_EXP_CREATE_QP_QPG) {
+ struct ib_uverbs_qpg *qpg;
+ if (cmd_exp.qp_type != IB_QPT_RAW_PACKET &&
+ cmd_exp.qp_type != IB_QPT_UD) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+ qpg = &cmd_exp.qpg;
+ switch (qpg->qpg_type) {
+ case IB_QPG_PARENT:
+ attr.parent_attrib.rss_child_count =
+ qpg->parent_attrib.rss_child_count;
+ attr.parent_attrib.tss_child_count =
+ qpg->parent_attrib.tss_child_count;
+ break;
+ case IB_QPG_CHILD_RX:
+ case IB_QPG_CHILD_TX:
+ parentqp = idr_read_qp(qpg->parent_handle,
+ file->ucontext);
+ if (!parentqp) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+ attr.qpg_parent = parentqp;
+ break;
+ default:
+ ret = -EINVAL;
+ goto err_put;
+ }
+ attr.qpg_type = qpg->qpg_type;
}
- ret = xrcd->device->query_xrc_rcv_qp(xrcd, cmd.qp_num, attr,
- cmd.attr_mask, init_attr);
+ if (cmd_exp.comp_mask & IB_UVERBS_EXP_CREATE_QP_INL_RECV)
+ attr.max_inl_recv = cmd_exp.max_inl_recv;
- put_xrcd_read(uobj);
+ obj->uevent.events_reported = 0;
+ INIT_LIST_HEAD(&obj->uevent.event_list);
+ INIT_LIST_HEAD(&obj->mcast_list);
- if (ret)
- goto out;
+ if (cmd_exp.qp_type == IB_QPT_XRC_TGT)
+ qp = ib_create_qp(pd, (struct ib_qp_init_attr *)&attr);
+ else
+ qp = device->exp_create_qp(pd, &attr, uhw);
- memset(&resp, 0, sizeof resp);
- resp.qp_state = attr->qp_state;
- resp.cur_qp_state = attr->cur_qp_state;
- resp.path_mtu = attr->path_mtu;
- resp.path_mig_state = attr->path_mig_state;
- resp.qkey = attr->qkey;
- resp.rq_psn = attr->rq_psn;
- resp.sq_psn = attr->sq_psn;
- resp.dest_qp_num = attr->dest_qp_num;
- resp.qp_access_flags = attr->qp_access_flags;
- resp.pkey_index = attr->pkey_index;
- resp.alt_pkey_index = attr->alt_pkey_index;
- resp.sq_draining = attr->sq_draining;
- resp.max_rd_atomic = attr->max_rd_atomic;
- resp.max_dest_rd_atomic = attr->max_dest_rd_atomic;
- resp.min_rnr_timer = attr->min_rnr_timer;
- resp.port_num = attr->port_num;
- resp.timeout = attr->timeout;
- resp.retry_cnt = attr->retry_cnt;
- resp.rnr_retry = attr->rnr_retry;
- resp.alt_port_num = attr->alt_port_num;
- resp.alt_timeout = attr->alt_timeout;
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
+ goto err_put;
+ }
- memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
- resp.dest.flow_label = attr->ah_attr.grh.flow_label;
- resp.dest.sgid_index = attr->ah_attr.grh.sgid_index;
- resp.dest.hop_limit = attr->ah_attr.grh.hop_limit;
- resp.dest.traffic_class = attr->ah_attr.grh.traffic_class;
- resp.dest.dlid = attr->ah_attr.dlid;
- resp.dest.sl = attr->ah_attr.sl;
- resp.dest.src_path_bits = attr->ah_attr.src_path_bits;
- resp.dest.static_rate = attr->ah_attr.static_rate;
- resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH);
- resp.dest.port_num = attr->ah_attr.port_num;
+ if (cmd_exp.qp_type != IB_QPT_XRC_TGT) {
+ qp->real_qp = qp;
+ qp->device = device;
+ qp->pd = pd;
+ qp->send_cq = attr.send_cq;
+ qp->recv_cq = attr.recv_cq;
+ qp->srq = attr.srq;
+ qp->event_handler = attr.event_handler;
+ qp->qp_context = attr.qp_context;
+ qp->qp_type = attr.qp_type;
+ atomic_set(&qp->usecnt, 0);
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&attr.send_cq->usecnt);
+ if (attr.recv_cq)
+ atomic_inc(&attr.recv_cq->usecnt);
+ if (attr.srq)
+ atomic_inc(&attr.srq->usecnt);
+ }
+ qp->uobject = &obj->uevent.uobject;
- memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
- resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label;
- resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index;
- resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit;
- resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
- resp.alt_dest.dlid = attr->alt_ah_attr.dlid;
- resp.alt_dest.sl = attr->alt_ah_attr.sl;
- resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
- resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate;
- resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH);
- resp.alt_dest.port_num = attr->alt_ah_attr.port_num;
+ obj->uevent.uobject.object = qp;
+ ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
+ if (ret)
+ goto err_destroy;
- resp.max_send_wr = init_attr->cap.max_send_wr;
- resp.max_recv_wr = init_attr->cap.max_recv_wr;
- resp.max_send_sge = init_attr->cap.max_send_sge;
- resp.max_recv_sge = init_attr->cap.max_recv_sge;
- resp.max_inline_data = init_attr->cap.max_inline_data;
- resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
+ memset(&resp_exp, 0, sizeof(resp_exp));
+ resp_exp.qpn = qp->qp_num;
+ resp_exp.qp_handle = obj->uevent.uobject.id;
+ resp_exp.max_recv_sge = attr.cap.max_recv_sge;
+ resp_exp.max_send_sge = attr.cap.max_send_sge;
+ resp_exp.max_recv_wr = attr.cap.max_recv_wr;
+ resp_exp.max_send_wr = attr.cap.max_send_wr;
+ resp_exp.max_inline_data = attr.cap.max_inline_data;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- ret = -EFAULT;
+ if (cmd_exp.comp_mask & IB_UVERBS_EXP_CREATE_QP_INL_RECV) {
+ resp_exp.comp_mask |= IB_UVERBS_EXP_CREATE_QP_RESP_INL_RECV;
+ resp_exp.max_inl_recv = attr.max_inl_recv;
+ }
-out:
- kfree(attr);
- kfree(init_attr);
+ ret = ucore->ops->copy_to(ucore, &resp_exp, sizeof(resp_exp));
+ if (ret)
+ goto err_copy;
- return ret ? ret : in_len;
-}
+ if (xrcd) {
+ obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
+ atomic_inc(&obj->uxrcd->refcnt);
+ put_xrcd_read(xrcd_uobj);
+ }
-ssize_t ib_uverbs_reg_xrc_rcv_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
-{
- struct ib_uverbs_reg_xrc_rcv_qp cmd;
- struct ib_uxrc_rcv_object *qp_obj, *tmp;
- struct ib_xrcd *xrcd;
- struct ib_uobject *uobj;
- struct ib_uxrcd_object *xrcd_uobj;
- int ret;
+ if (pd)
+ put_pd_read(pd);
+ if (scq)
+ put_cq_read(scq);
+ if (rcq && rcq != scq)
+ put_cq_read(rcq);
+ if (srq)
+ put_srq_read(srq);
+ if (parentqp)
+ put_qp_read(parentqp);
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ mutex_lock(&file->mutex);
+ list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
+ mutex_unlock(&file->mutex);
- qp_obj = kmalloc(sizeof *qp_obj, GFP_KERNEL);
- if (!qp_obj)
- return -ENOMEM;
+ obj->uevent.uobject.live = 1;
- xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj);
- if (!xrcd) {
- ret = -EINVAL;
- goto err_out;
- }
+ up_write(&obj->uevent.uobject.mutex);
- ret = xrcd->device->reg_xrc_rcv_qp(xrcd, file, cmd.qp_num);
- if (ret)
- goto err_put;
+ return ucore->inlen + uhw->inlen;
- xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject);
- mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
- list_for_each_entry(tmp, &xrcd_uobj->xrc_reg_qp_list, list)
- if (cmd.qp_num == tmp->qp_num) {
- kfree(qp_obj);
- mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
- put_xrcd_read(uobj);
- return in_len;
- }
- qp_obj->qp_num = cmd.qp_num;
- qp_obj->domain_handle = cmd.xrc_domain_handle;
- list_add_tail(&qp_obj->list, &xrcd_uobj->xrc_reg_qp_list);
- mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
- atomic_inc(&xrcd->usecnt);
- put_xrcd_read(uobj);
- return in_len;
+err_copy:
+ idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
+
+err_destroy:
+ ib_destroy_qp(qp);
err_put:
- put_xrcd_read(uobj);
-err_out:
+ if (xrcd)
+ put_xrcd_read(xrcd_uobj);
+ if (pd)
+ put_pd_read(pd);
+ if (scq)
+ put_cq_read(scq);
+ if (rcq && rcq != scq)
+ put_cq_read(rcq);
+ if (srq)
+ put_srq_read(srq);
+ if (parentqp)
+ put_qp_read(parentqp);
- kfree(qp_obj);
+ put_uobj_write(&obj->uevent.uobject);
return ret;
}
-int ib_uverbs_cleanup_xrc_rcv_qp(struct ib_uverbs_file *file,
- struct ib_xrcd *xrcd, u32 qp_num)
-{
- int err;
- err = xrcd->device->unreg_xrc_rcv_qp(xrcd, file, qp_num);
- if (!err)
- atomic_dec(&xrcd->usecnt);
- return err;
-}
-
-ssize_t ib_uverbs_unreg_xrc_rcv_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+int ib_exp_query_device(struct ib_device *device,
+ struct ib_exp_device_attr *device_attr)
{
- struct ib_uverbs_unreg_xrc_rcv_qp cmd;
- struct ib_uxrc_rcv_object *qp_obj, *tmp;
- struct ib_xrcd *xrcd;
- struct ib_uobject *uobj;
- struct ib_uxrcd_object *xrcd_uobj;
- int ret;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj);
- if (!xrcd)
- return -EINVAL;
-
- ret = xrcd->device->unreg_xrc_rcv_qp(xrcd, file, cmd.qp_num);
- if (ret) {
- put_xrcd_read(uobj);
- return -EINVAL;
- }
- atomic_dec(&xrcd->usecnt);
-
- xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject);
- mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
- list_for_each_entry_safe(qp_obj, tmp, &xrcd_uobj->xrc_reg_qp_list, list)
- if (cmd.qp_num == qp_obj->qp_num) {
- list_del(&qp_obj->list);
- kfree(qp_obj);
- break;
- }
- mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
- put_xrcd_read(uobj);
- return in_len;
+ return device->exp_query_device(device, device_attr);
}
+EXPORT_SYMBOL(ib_exp_query_device);
diff --git a/sys/ofed/drivers/infiniband/core/uverbs_main.c b/sys/ofed/drivers/infiniband/core/uverbs_main.c
index 30b9259..12bc0d3 100644
--- a/sys/ofed/drivers/infiniband/core/uverbs_main.c
+++ b/sys/ofed/drivers/infiniband/core/uverbs_main.c
@@ -39,8 +39,13 @@
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/poll.h>
+#include <linux/sched.h>
#include <linux/file.h>
#include <linux/cdev.h>
+#include <linux/slab.h>
+#include <linux/ktime.h>
+#include <linux/rbtree.h>
+#include <linux/math64.h>
#include <asm/uaccess.h>
@@ -50,8 +55,6 @@ MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace verbs access");
MODULE_LICENSE("Dual BSD/GPL");
-#define INFINIBANDEVENTFS_MAGIC 0x49426576 /* "IBev" */
-
enum {
IB_UVERBS_MAJOR = 231,
IB_UVERBS_BASE_MINOR = 192,
@@ -60,6 +63,31 @@ enum {
#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
+static int uverbs_copy_from_udata_ex(void *dest, struct ib_udata *udata, size_t len)
+{
+ return copy_from_user(dest, udata->inbuf, min(udata->inlen, len)) ? -EFAULT : 0;
+}
+
+static int uverbs_copy_to_udata_ex(struct ib_udata *udata, void *src, size_t len)
+{
+ return copy_to_user(udata->outbuf, src, min(udata->outlen, len)) ? -EFAULT : 0;
+}
+
+static struct ib_udata_ops uverbs_copy_ex = {
+ .copy_from = uverbs_copy_from_udata_ex,
+ .copy_to = uverbs_copy_to_udata_ex
+};
+
+#define INIT_UDATA_EX(udata, ibuf, obuf, ilen, olen) \
+ do { \
+ (udata)->ops = &uverbs_copy_ex; \
+ (udata)->inbuf = (void __user *)(ibuf); \
+ (udata)->outbuf = (void __user *)(obuf); \
+ (udata)->inlen = (ilen); \
+ (udata)->outlen = (olen); \
+ } while (0)
+
+
static struct class *uverbs_class;
DEFINE_SPINLOCK(ib_uverbs_idr_lock);
@@ -70,10 +98,11 @@ DEFINE_IDR(ib_uverbs_ah_idr);
DEFINE_IDR(ib_uverbs_cq_idr);
DEFINE_IDR(ib_uverbs_qp_idr);
DEFINE_IDR(ib_uverbs_srq_idr);
-DEFINE_IDR(ib_uverbs_xrc_domain_idr);
+DEFINE_IDR(ib_uverbs_xrcd_idr);
+DEFINE_IDR(ib_uverbs_rule_idr);
+DEFINE_IDR(ib_uverbs_dct_idr);
-static spinlock_t map_lock;
-static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES];
+static DEFINE_SPINLOCK(map_lock);
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
@@ -86,6 +115,8 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
[IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
[IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
+ [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw,
+ [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw,
[IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
[IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
[IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
@@ -107,20 +138,31 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
[IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
[IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
- [IB_USER_VERBS_CMD_CREATE_XRC_SRQ] = ib_uverbs_create_xrc_srq,
- [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrc_domain,
- [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrc_domain,
- [IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP] = ib_uverbs_create_xrc_rcv_qp,
- [IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP] = ib_uverbs_modify_xrc_rcv_qp,
- [IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP] = ib_uverbs_query_xrc_rcv_qp,
- [IB_USER_VERBS_CMD_REG_XRC_RCV_QP] = ib_uverbs_reg_xrc_rcv_qp,
- [IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP] = ib_uverbs_unreg_xrc_rcv_qp,
+ [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd,
+ [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
+ [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq,
+ [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp,
};
-#ifdef __linux__
-/* BSD Does not require a fake mountpoint for all files. */
-static struct vfsmount *uverbs_event_mnt;
-#endif
+static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw) = {
+ [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
+ [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow,
+};
+
+static ssize_t (*uverbs_exp_cmd_table[])(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw) = {
+ [IB_USER_VERBS_EXP_CMD_CREATE_QP] = ib_uverbs_exp_create_qp,
+ [IB_USER_VERBS_EXP_CMD_MODIFY_CQ] = ib_uverbs_exp_modify_cq,
+ [IB_USER_VERBS_EXP_CMD_MODIFY_QP] = ib_uverbs_exp_modify_qp,
+ [IB_USER_VERBS_EXP_CMD_CREATE_CQ] = ib_uverbs_exp_create_cq,
+ [IB_USER_VERBS_EXP_CMD_QUERY_DEVICE] = ib_uverbs_exp_query_device,
+ [IB_USER_VERBS_EXP_CMD_CREATE_DCT] = ib_uverbs_exp_create_dct,
+ [IB_USER_VERBS_EXP_CMD_DESTROY_DCT] = ib_uverbs_exp_destroy_dct,
+ [IB_USER_VERBS_EXP_CMD_QUERY_DCT] = ib_uverbs_exp_query_dct,
+};
static void ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device);
@@ -195,6 +237,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
struct ib_ucontext *context)
{
struct ib_uobject *uobj, *tmp;
+ int err;
if (!context)
return 0;
@@ -209,18 +252,55 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
kfree(uobj);
}
+ /* Remove MWs before QPs, in order to support type 2A MWs. */
+ list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) {
+ struct ib_mw *mw = uobj->object;
+
+ idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
+ err = ib_dealloc_mw(mw);
+ if (err) {
+ pr_info("user_verbs: couldn't deallocate MW during cleanup.\n");
+ pr_info("user_verbs: the system may have become unstable.\n");
+ }
+ kfree(uobj);
+ }
+ list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
+ struct ib_flow *flow_id = uobj->object;
+
+ idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
+ ib_destroy_flow(flow_id);
+ kfree(uobj);
+ }
+
list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
struct ib_qp *qp = uobj->object;
struct ib_uqp_object *uqp =
container_of(uobj, struct ib_uqp_object, uevent.uobject);
idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
+
ib_uverbs_detach_umcast(qp, uqp);
- ib_destroy_qp(qp);
+ err = ib_destroy_qp(qp);
+ if (err)
+ pr_info("destroying uverbs qp failed: err %d\n", err);
+
ib_uverbs_release_uevent(file, &uqp->uevent);
kfree(uqp);
}
+ list_for_each_entry_safe(uobj, tmp, &context->dct_list, list) {
+ struct ib_dct *dct = uobj->object;
+ struct ib_udct_object *udct =
+ container_of(uobj, struct ib_udct_object, uobject);
+
+ idr_remove_uobj(&ib_uverbs_dct_idr, uobj);
+
+ err = ib_destroy_dct(dct);
+ if (err)
+ pr_info("destroying uverbs dct failed: err %d\n", err);
+
+ kfree(udct);
+ }
list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
struct ib_srq *srq = uobj->object;
@@ -228,7 +308,9 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
container_of(uobj, struct ib_uevent_object, uobject);
idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
- ib_destroy_srq(srq);
+ err = ib_destroy_srq(srq);
+ if (err)
+ pr_info("destroying uverbs srq failed: err %d\n", err);
ib_uverbs_release_uevent(file, uevent);
kfree(uevent);
}
@@ -240,41 +322,37 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
container_of(uobj, struct ib_ucq_object, uobject);
idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
- ib_destroy_cq(cq);
+ err = ib_destroy_cq(cq);
+ if (err)
+ pr_info("destroying uverbs cq failed: err %d\n", err);
+
ib_uverbs_release_ucq(file, ev_file, ucq);
kfree(ucq);
}
- /* XXX Free MWs */
-
list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
struct ib_mr *mr = uobj->object;
idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
- ib_dereg_mr(mr);
+ err = ib_dereg_mr(mr);
+ if (err) {
+ pr_info("user_verbs: couldn't deregister an MR during cleanup.\n");
+ pr_info("user_verbs: the system may have become unstable.\n");
+ }
kfree(uobj);
}
- mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
+ mutex_lock(&file->device->xrcd_tree_mutex);
list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
struct ib_xrcd *xrcd = uobj->object;
- struct ib_uxrc_rcv_object *xrc_qp_obj, *tmp1;
- struct ib_uxrcd_object *xrcd_uobj =
+ struct ib_uxrcd_object *uxrcd =
container_of(uobj, struct ib_uxrcd_object, uobject);
- list_for_each_entry_safe(xrc_qp_obj, tmp1,
- &xrcd_uobj->xrc_reg_qp_list, list) {
- list_del(&xrc_qp_obj->list);
- ib_uverbs_cleanup_xrc_rcv_qp(file, xrcd,
- xrc_qp_obj->qp_num);
- kfree(xrc_qp_obj);
- }
-
- idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj);
- ib_uverbs_dealloc_xrcd(file->device->ib_dev, xrcd);
- kfree(uobj);
+ idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
+ ib_uverbs_dealloc_xrcd(file->device, xrcd);
+ kfree(uxrcd);
}
- mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
+ mutex_unlock(&file->device->xrcd_tree_mutex);
list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
struct ib_pd *pd = uobj->object;
@@ -405,7 +483,8 @@ static const struct file_operations uverbs_event_fops = {
.read = ib_uverbs_event_read,
.poll = ib_uverbs_event_poll,
.release = ib_uverbs_event_close,
- .fasync = ib_uverbs_event_fasync
+ .fasync = ib_uverbs_event_fasync,
+ .llseek = no_llseek,
};
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
@@ -524,21 +603,13 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
NULL, NULL);
}
-void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event,
- void *context_ptr)
-{
- ib_uverbs_async_handler(context_ptr, event->element.xrc_qp_num,
- event->event, NULL, NULL);
-}
-
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
- int is_async, int *fd)
+ int is_async)
{
struct ib_uverbs_event_file *ev_file;
struct file *filp;
- int ret;
- ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL);
+ ev_file = kzalloc(sizeof *ev_file, GFP_KERNEL);
if (!ev_file)
return ERR_PTR(-ENOMEM);
@@ -547,43 +618,22 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
INIT_LIST_HEAD(&ev_file->event_list);
init_waitqueue_head(&ev_file->poll_wait);
ev_file->uverbs_file = uverbs_file;
- ev_file->async_queue = NULL;
ev_file->is_async = is_async;
- ev_file->is_closed = 0;
- ev_file->filp = NULL;
-
- *fd = get_unused_fd();
- if (*fd < 0) {
- ret = *fd;
- goto err;
- }
/*
* fops_get() can't fail here, because we're coming from a
* system call on a uverbs file, which will already have a
* module reference.
*/
-#ifdef __linux__
- filp = alloc_file(uverbs_event_mnt, dget(uverbs_event_mnt->mnt_root),
- FMODE_READ, fops_get(&uverbs_event_fops));
-#else
filp = alloc_file(FMODE_READ, fops_get(&uverbs_event_fops));
-#endif
- if (!filp) {
- ret = -ENFILE;
- goto err_fd;
- }
+ if (IS_ERR(filp)) {
+ kfree(ev_file);
+ } else {
filp->private_data = ev_file;
+ }
return filp;
-
-err_fd:
- put_unused_fd(*fd);
-
-err:
- kfree(ev_file);
- return ERR_PTR(ret);
}
/*
@@ -594,16 +644,15 @@ err:
struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
{
struct ib_uverbs_event_file *ev_file = NULL;
- struct file *filp;
+ struct fd f = fdget(fd);
- filp = fget(fd);
- if (!filp)
+ if (!f.file)
return NULL;
- if (filp->f_op != &uverbs_event_fops)
+ if (f.file->f_op != &uverbs_event_fops)
goto out;
- ev_file = filp->private_data;
+ ev_file = f.file->private_data;
if (ev_file->is_async) {
ev_file = NULL;
goto out;
@@ -612,15 +661,225 @@ struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
kref_get(&ev_file->ref);
out:
- fput(filp);
+ fdput(f);
return ev_file;
}
+static const char *verbs_cmd_str(__u32 cmd)
+{
+ switch (cmd) {
+ case IB_USER_VERBS_CMD_GET_CONTEXT:
+ return "GET_CONTEXT";
+ case IB_USER_VERBS_CMD_QUERY_DEVICE:
+ return "QUERY_DEVICE";
+ case IB_USER_VERBS_CMD_QUERY_PORT:
+ return "QUERY_PORT";
+ case IB_USER_VERBS_CMD_ALLOC_PD:
+ return "ALLOC_PD";
+ case IB_USER_VERBS_CMD_DEALLOC_PD:
+ return "DEALLOC_PD";
+ case IB_USER_VERBS_CMD_REG_MR:
+ return "REG_MR";
+ case IB_USER_VERBS_CMD_DEREG_MR:
+ return "DEREG_MR";
+ case IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL:
+ return "CREATE_COMP_CHANNEL";
+ case IB_USER_VERBS_CMD_CREATE_CQ:
+ return "CREATE_CQ";
+ case IB_USER_VERBS_CMD_RESIZE_CQ:
+ return "RESIZE_CQ";
+ case IB_USER_VERBS_CMD_POLL_CQ:
+ return "POLL_CQ";
+ case IB_USER_VERBS_CMD_REQ_NOTIFY_CQ:
+ return "REQ_NOTIFY_CQ";
+ case IB_USER_VERBS_CMD_DESTROY_CQ:
+ return "DESTROY_CQ";
+ case IB_USER_VERBS_CMD_CREATE_QP:
+ return "CREATE_QP";
+ case IB_USER_VERBS_CMD_QUERY_QP:
+ return "QUERY_QP";
+ case IB_USER_VERBS_CMD_MODIFY_QP:
+ return "MODIFY_QP";
+ case IB_USER_VERBS_CMD_DESTROY_QP:
+ return "DESTROY_QP";
+ case IB_USER_VERBS_CMD_POST_SEND:
+ return "POST_SEND";
+ case IB_USER_VERBS_CMD_POST_RECV:
+ return "POST_RECV";
+ case IB_USER_VERBS_CMD_POST_SRQ_RECV:
+ return "POST_SRQ_RECV";
+ case IB_USER_VERBS_CMD_CREATE_AH:
+ return "CREATE_AH";
+ case IB_USER_VERBS_CMD_DESTROY_AH:
+ return "DESTROY_AH";
+ case IB_USER_VERBS_CMD_ATTACH_MCAST:
+ return "ATTACH_MCAST";
+ case IB_USER_VERBS_CMD_DETACH_MCAST:
+ return "DETACH_MCAST";
+ case IB_USER_VERBS_CMD_CREATE_SRQ:
+ return "CREATE_SRQ";
+ case IB_USER_VERBS_CMD_MODIFY_SRQ:
+ return "MODIFY_SRQ";
+ case IB_USER_VERBS_CMD_QUERY_SRQ:
+ return "QUERY_SRQ";
+ case IB_USER_VERBS_CMD_DESTROY_SRQ:
+ return "DESTROY_SRQ";
+ case IB_USER_VERBS_CMD_OPEN_XRCD:
+ return "OPEN_XRCD";
+ case IB_USER_VERBS_CMD_CLOSE_XRCD:
+ return "CLOSE_XRCD";
+ case IB_USER_VERBS_CMD_CREATE_XSRQ:
+ return "CREATE_XSRQ";
+ case IB_USER_VERBS_CMD_OPEN_QP:
+ return "OPEN_QP";
+ }
+
+ return "Unknown command";
+}
+
+enum {
+ COMMAND_INFO_MASK = 0x1000,
+};
+
+static ssize_t ib_uverbs_exp_handle_cmd(struct ib_uverbs_file *file,
+ const char __user *buf,
+ struct ib_device *dev,
+ struct ib_uverbs_cmd_hdr *hdr,
+ size_t count,
+ int legacy_ex_cmd)
+{
+ struct ib_udata ucore;
+ struct ib_udata uhw;
+ struct ib_uverbs_ex_cmd_hdr ex_hdr;
+ __u32 command = hdr->command - IB_USER_VERBS_EXP_CMD_FIRST;
+
+ if (hdr->command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
+ IB_USER_VERBS_CMD_COMMAND_MASK))
+ return -EINVAL;
+
+ if (command >= ARRAY_SIZE(uverbs_exp_cmd_table) ||
+ !uverbs_exp_cmd_table[command])
+ return -EINVAL;
+
+ if (!file->ucontext)
+ return -EINVAL;
+
+ if (!(dev->uverbs_exp_cmd_mask & (1ull << command)))
+ return -ENOSYS;
+
+ if (legacy_ex_cmd) {
+ struct ib_uverbs_ex_cmd_hdr_legacy hxl;
+ struct ib_uverbs_ex_cmd_resp1_legacy resp1;
+ __u64 response;
+ ssize_t ret;
+
+ if (count < sizeof(hxl))
+ return -EINVAL;
+
+ if (copy_from_user(&hxl, buf, sizeof(hxl)))
+ return -EFAULT;
+
+ if (((hxl.in_words + hxl.provider_in_words) * 4) != count)
+ return -EINVAL;
+
+ count -= sizeof(hxl);
+ buf += sizeof(hxl);
+ if (hxl.out_words || hxl.provider_out_words) {
+ if (count < sizeof(resp1))
+ return -EINVAL;
+ if (copy_from_user(&resp1, buf, sizeof(resp1)))
+ return -EFAULT;
+ response = resp1.response;
+ if (!response)
+ return -EINVAL;
+
+ /*
+ * Change user buffer to comply with new extension format.
+ */
+ if (sizeof(resp1.comp_mask) != sizeof(resp1.response))
+ return -EFAULT;
+ buf += sizeof(resp1.comp_mask);
+ if (copy_to_user(__DECONST(void __user *, buf), &resp1.comp_mask,
+ sizeof(resp1.response)))
+ return -EFAULT;
+
+ } else {
+ response = 0;
+ }
+
+ INIT_UDATA_EX(&ucore,
+ (hxl.in_words) ? buf : 0,
+ response,
+ hxl.in_words * 4,
+ hxl.out_words * 4);
+
+ INIT_UDATA_EX(&uhw,
+ (hxl.provider_in_words) ? buf + ucore.inlen : 0,
+ (hxl.provider_out_words) ? response + ucore.outlen : 0,
+ hxl.provider_in_words * 4,
+ hxl.provider_out_words * 4);
+
+ ret = uverbs_exp_cmd_table[command](file, &ucore, &uhw);
+ /*
+ * UnChange user buffer
+ */
+ if (response && copy_to_user(__DECONST(void __user *, buf), &resp1.response, sizeof(resp1.response)))
+ return -EFAULT;
+
+ return ret;
+ } else {
+ if (count < (sizeof(hdr) + sizeof(ex_hdr)))
+ return -EINVAL;
+
+ if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
+ return -EFAULT;
+
+ buf += sizeof(hdr) + sizeof(ex_hdr);
+
+ if ((hdr->in_words + ex_hdr.provider_in_words) * 8 != count)
+ return -EINVAL;
+
+ if (ex_hdr.response) {
+ if (!hdr->out_words && !ex_hdr.provider_out_words)
+ return -EINVAL;
+ } else {
+ if (hdr->out_words || ex_hdr.provider_out_words)
+ return -EINVAL;
+ }
+
+ INIT_UDATA_EX(&ucore,
+ (hdr->in_words) ? buf : 0,
+ (unsigned long)ex_hdr.response,
+ hdr->in_words * 8,
+ hdr->out_words * 8);
+
+ INIT_UDATA_EX(&uhw,
+ (ex_hdr.provider_in_words) ? buf + ucore.inlen : 0,
+ (ex_hdr.provider_out_words) ? ex_hdr.response + ucore.outlen : 0,
+ ex_hdr.provider_in_words * 8,
+ ex_hdr.provider_out_words * 8);
+
+ return uverbs_exp_cmd_table[command](file, &ucore, &uhw);
+ }
+}
+
static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct ib_uverbs_file *file = filp->private_data;
+ struct ib_device *dev = file->device->ib_dev;
struct ib_uverbs_cmd_hdr hdr;
+ struct timespec ts1;
+ struct timespec ts2;
+ ktime_t t1, t2, delta;
+ s64 ds;
+ ssize_t ret;
+ u64 dividend;
+ u32 divisor;
+ __u32 flags;
+ __u32 command;
+ int legacy_ex_cmd = 0;
+ size_t written_count = count;
if (count < sizeof hdr)
return -EINVAL;
@@ -628,20 +887,126 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
if (copy_from_user(&hdr, buf, sizeof hdr))
return -EFAULT;
+ /*
+ * For BWD compatibility change old style extension verbs commands
+ * to their equivalent experimental command.
+ */
+ if ((hdr.command >= IB_USER_VERBS_LEGACY_CMD_FIRST) &&
+ (hdr.command <= IB_USER_VERBS_LEGACY_EX_CMD_LAST)) {
+ hdr.command += IB_USER_VERBS_EXP_CMD_FIRST -
+ IB_USER_VERBS_LEGACY_CMD_FIRST;
+ legacy_ex_cmd = 1;
+ }
+
+ flags = (hdr.command &
+ IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
+ command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
+
+ ktime_get_ts(&ts1);
+ if (!flags && (command >= IB_USER_VERBS_EXP_CMD_FIRST)) {
+ ret = ib_uverbs_exp_handle_cmd(file, buf, dev, &hdr, count, legacy_ex_cmd);
+ } else if (!flags) {
+ if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
+ !uverbs_cmd_table[command])
+ return -EINVAL;
+
+ if (!file->ucontext &&
+ command != IB_USER_VERBS_CMD_GET_CONTEXT)
+ return -EINVAL;
+
+ if (!(dev->uverbs_cmd_mask & (1ull << command)))
+ return -ENOSYS;
+
if (hdr.in_words * 4 != count)
return -EINVAL;
- if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
- !uverbs_cmd_table[hdr.command] ||
- !(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
+ ret = uverbs_cmd_table[command](file,
+ buf + sizeof(hdr),
+ hdr.in_words * 4,
+ hdr.out_words * 4);
+ } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
+ struct ib_udata ucore;
+ struct ib_udata uhw;
+ struct ib_uverbs_ex_cmd_hdr ex_hdr;
+
+ if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
+ IB_USER_VERBS_CMD_COMMAND_MASK))
return -EINVAL;
- if (!file->ucontext &&
- hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
+ if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
+ !uverbs_ex_cmd_table[command])
+ return -EINVAL;
+
+ if (!file->ucontext)
+ return -EINVAL;
+
+ if (!(dev->uverbs_ex_cmd_mask & (1ull << command)))
+ return -ENOSYS;
+
+ if (count < (sizeof(hdr) + sizeof(ex_hdr)))
+ return -EINVAL;
+
+ if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
+ return -EFAULT;
+
+ count -= sizeof(hdr) + sizeof(ex_hdr);
+ buf += sizeof(hdr) + sizeof(ex_hdr);
+
+ if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count)
+ return -EINVAL;
+
+ if (ex_hdr.response) {
+ if (!hdr.out_words && !ex_hdr.provider_out_words)
+ return -EINVAL;
+ } else {
+ if (hdr.out_words || ex_hdr.provider_out_words)
return -EINVAL;
+ }
+
+ INIT_UDATA_EX(&ucore,
+ (hdr.in_words) ? buf : 0,
+ (unsigned long)ex_hdr.response,
+ hdr.in_words * 8,
+ hdr.out_words * 8);
+
+ INIT_UDATA_EX(&uhw,
+ (ex_hdr.provider_in_words) ? buf + ucore.inlen : 0,
+ (ex_hdr.provider_out_words) ? ex_hdr.response + ucore.outlen : 0,
+ ex_hdr.provider_in_words * 8,
+ ex_hdr.provider_out_words * 8);
+
+ ret = uverbs_ex_cmd_table[command](file, &ucore, &uhw);
+
+ if (ret)
+ return ret;
+
+ return written_count;
+
+ } else {
+ return -EFAULT;
+ }
- return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr,
- hdr.in_words * 4, hdr.out_words * 4);
+ if ((dev->cmd_perf & (COMMAND_INFO_MASK - 1)) == hdr.command) {
+ ktime_get_ts(&ts2);
+ t1 = timespec_to_ktime(ts1);
+ t2 = timespec_to_ktime(ts2);
+ delta = ktime_sub(t2, t1);
+ ds = ktime_to_ns(delta);
+ spin_lock(&dev->cmd_perf_lock);
+ dividend = dev->cmd_avg * dev->cmd_n + ds;
+ ++dev->cmd_n;
+ divisor = dev->cmd_n;
+ do_div(dividend, divisor);
+ dev->cmd_avg = dividend;
+ spin_unlock(&dev->cmd_perf_lock);
+ if (dev->cmd_perf & COMMAND_INFO_MASK) {
+ pr_info("%s: %s execution time = %lld nsec\n",
+ file->device->ib_dev->name,
+ verbs_cmd_str(hdr.command),
+ (long long)ds);
+ }
+ }
+ return ret;
}
static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -653,18 +1018,51 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
else
return file->device->ib_dev->mmap(file->ucontext, vma);
}
+/* XXX Not supported in FreeBSD */
+#if 0
+static unsigned long ib_uverbs_get_unmapped_area(struct file *filp,
+ unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ struct ib_uverbs_file *file = filp->private_data;
+
+ if (!file->ucontext)
+ return -ENODEV;
+ else {
+ if (!file->device->ib_dev->get_unmapped_area)
+ return current->mm->get_unmapped_area(filp, addr, len,
+ pgoff, flags);
+
+ return file->device->ib_dev->get_unmapped_area(filp, addr, len,
+ pgoff, flags);
+ }
+}
+#endif
+
+static long ib_uverbs_ioctl(struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+ struct ib_uverbs_file *file = filp->private_data;
+
+ if (!file->device->ib_dev->ioctl)
+ return -ENOTSUPP;
+
+ if (!file->ucontext)
+ return -ENODEV;
+ else
+ /* provider should provide it's own locking mechanism */
+ return file->device->ib_dev->ioctl(file->ucontext, cmd, arg);
+}
/*
* ib_uverbs_open() does not need the BKL:
*
- * - dev_table[] accesses are protected by map_lock, the
- * ib_uverbs_device structures are properly reference counted, and
+ * - the ib_uverbs_device structures are properly reference counted and
* everything else is purely local to the file being created, so
* races against other open calls are not a problem;
* - there is no ioctl method to race against;
- * - the device is added to dev_table[] as the last part of module
- * initialization, the open method will either immediately run
- * -ENXIO, or all required initialization will be done.
+ * - the open method will either immediately run -ENXIO, or all
+ * required initialization will be done.
*/
static int ib_uverbs_open(struct inode *inode, struct file *filp)
{
@@ -672,13 +1070,10 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
struct ib_uverbs_file *file;
int ret;
- spin_lock(&map_lock);
- dev = dev_table[iminor(inode) - IB_UVERBS_BASE_MINOR];
+ dev = container_of(inode->i_cdev->si_drv1, struct ib_uverbs_device, cdev);
if (dev)
kref_get(&dev->ref);
- spin_unlock(&map_lock);
-
- if (!dev)
+ else
return -ENXIO;
if (!try_module_get(dev->ib_dev->owner)) {
@@ -700,7 +1095,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
filp->private_data = file;
- return 0;
+ return nonseekable_open(inode, filp);
err_module:
module_put(dev->ib_dev->owner);
@@ -728,7 +1123,9 @@ static const struct file_operations uverbs_fops = {
.owner = THIS_MODULE,
.write = ib_uverbs_write,
.open = ib_uverbs_open,
- .release = ib_uverbs_close
+ .release = ib_uverbs_close,
+ .llseek = no_llseek,
+ .unlocked_ioctl = ib_uverbs_ioctl,
};
static const struct file_operations uverbs_mmap_fops = {
@@ -736,7 +1133,13 @@ static const struct file_operations uverbs_mmap_fops = {
.write = ib_uverbs_write,
.mmap = ib_uverbs_mmap,
.open = ib_uverbs_open,
- .release = ib_uverbs_close
+ .release = ib_uverbs_close,
+ .llseek = no_llseek,
+/* XXX Not supported in FreeBSD */
+#if 0
+ .get_unmapped_area = ib_uverbs_get_unmapped_area,
+#endif
+ .unlocked_ioctl = ib_uverbs_ioctl,
};
static struct ib_client uverbs_client = {
@@ -757,6 +1160,18 @@ static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
}
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
+static ssize_t show_dev_ref_cnt(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct ib_uverbs_device *dev = dev_get_drvdata(device);
+
+ if (!dev)
+ return -ENODEV;
+
+ return sprintf(buf, "%d\n", dev->ref.count);
+}
+static DEVICE_ATTR(ref_cnt, S_IRUGO, show_dev_ref_cnt, NULL);
+
static ssize_t show_dev_abi_version(struct device *device,
struct device_attribute *attr, char *buf)
{
@@ -773,8 +1188,36 @@ static ssize_t show_abi_version(struct class *class, struct class_attribute *att
{
return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION);
}
+
static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+static dev_t overflow_maj;
+static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES);
+
+/*
+ * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by
+ * requesting a new major number and doubling the number of max devices we
+ * support. It's stupid, but simple.
+ */
+static int find_overflow_devnum(void)
+{
+ int ret;
+
+ if (!overflow_maj) {
+ ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
+ "infiniband_verbs");
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't register dynamic device number\n");
+ return ret;
+ }
+ }
+
+ ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES);
+ if (ret >= IB_UVERBS_MAX_DEVICES)
+ return -1;
+
+ return ret;
+}
#include <linux/pci.h>
static ssize_t
@@ -801,6 +1244,7 @@ show_dev_vendor(struct device *device, struct device_attribute *attr, char *buf)
return sprintf(buf, "0x%04x\n",
((struct pci_dev *)dev->ib_dev->dma_device)->vendor);
}
+
static DEVICE_ATTR(vendor, S_IRUGO, show_dev_vendor, NULL);
struct attribute *device_attrs[] =
@@ -817,6 +1261,8 @@ static struct attribute_group device_group = {
static void ib_uverbs_add_one(struct ib_device *device)
{
+ int devnum;
+ dev_t base;
struct ib_uverbs_device *uverbs_dev;
if (!device->alloc_ucontext)
@@ -828,55 +1274,66 @@ static void ib_uverbs_add_one(struct ib_device *device)
kref_init(&uverbs_dev->ref);
init_completion(&uverbs_dev->comp);
+ uverbs_dev->xrcd_tree = RB_ROOT;
+ mutex_init(&uverbs_dev->xrcd_tree_mutex);
spin_lock(&map_lock);
- uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
- if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) {
+ devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
+ if (devnum >= IB_UVERBS_MAX_DEVICES) {
spin_unlock(&map_lock);
+ devnum = find_overflow_devnum();
+ if (devnum < 0)
goto err;
+
+ spin_lock(&map_lock);
+ uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES;
+ base = devnum + overflow_maj;
+ set_bit(devnum, overflow_map);
+ } else {
+ uverbs_dev->devnum = devnum;
+ base = devnum + IB_UVERBS_BASE_DEV;
+ set_bit(devnum, dev_map);
}
- set_bit(uverbs_dev->devnum, dev_map);
spin_unlock(&map_lock);
uverbs_dev->ib_dev = device;
uverbs_dev->num_comp_vectors = device->num_comp_vectors;
- uverbs_dev->cdev = cdev_alloc();
- if (!uverbs_dev->cdev)
- goto err;
- uverbs_dev->cdev->owner = THIS_MODULE;
- uverbs_dev->cdev->ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
- kobject_set_name(&uverbs_dev->cdev->kobj, "uverbs%d", uverbs_dev->devnum);
- if (cdev_add(uverbs_dev->cdev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1))
+ cdev_init(&uverbs_dev->cdev, NULL);
+ uverbs_dev->cdev.owner = THIS_MODULE;
+ uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
+ kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
+ if (cdev_add(&uverbs_dev->cdev, base, 1))
goto err_cdev;
uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
- uverbs_dev->cdev->dev, uverbs_dev,
+ uverbs_dev->cdev.dev, uverbs_dev,
"uverbs%d", uverbs_dev->devnum);
if (IS_ERR(uverbs_dev->dev))
goto err_cdev;
if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
goto err_class;
+ if (device_create_file(uverbs_dev->dev, &dev_attr_ref_cnt))
+ goto err_class;
if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
goto err_class;
if (sysfs_create_group(&uverbs_dev->dev->kobj, &device_group))
goto err_class;
- spin_lock(&map_lock);
- dev_table[uverbs_dev->devnum] = uverbs_dev;
- spin_unlock(&map_lock);
-
ib_set_client_data(device, &uverbs_client, uverbs_dev);
return;
err_class:
- device_destroy(uverbs_class, uverbs_dev->cdev->dev);
+ device_destroy(uverbs_class, uverbs_dev->cdev.dev);
err_cdev:
- cdev_del(uverbs_dev->cdev);
- clear_bit(uverbs_dev->devnum, dev_map);
+ cdev_del(&uverbs_dev->cdev);
+ if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
+ clear_bit(devnum, dev_map);
+ else
+ clear_bit(devnum, overflow_map);
err:
kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
@@ -894,42 +1351,30 @@ static void ib_uverbs_remove_one(struct ib_device *device)
sysfs_remove_group(&uverbs_dev->dev->kobj, &device_group);
dev_set_drvdata(uverbs_dev->dev, NULL);
- device_destroy(uverbs_class, uverbs_dev->cdev->dev);
- cdev_del(uverbs_dev->cdev);
-
- spin_lock(&map_lock);
- dev_table[uverbs_dev->devnum] = NULL;
- spin_unlock(&map_lock);
+ device_destroy(uverbs_class, uverbs_dev->cdev.dev);
+ cdev_del(&uverbs_dev->cdev);
+ if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
clear_bit(uverbs_dev->devnum, dev_map);
+ else
+ clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
wait_for_completion(&uverbs_dev->comp);
kfree(uverbs_dev);
}
-#ifdef __linux__
-static int uverbs_event_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data,
- struct vfsmount *mnt)
+
+static char *uverbs_devnode(struct device *dev, umode_t *mode)
{
- return get_sb_pseudo(fs_type, "infinibandevent:", NULL,
- INFINIBANDEVENTFS_MAGIC, mnt);
+ if (mode)
+ *mode = 0666;
+ return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
}
-static struct file_system_type uverbs_event_fs = {
- /* No owner field so module can be unloaded */
- .name = "infinibandeventfs",
- .get_sb = uverbs_event_get_sb,
- .kill_sb = kill_litter_super
-};
-#endif
-
static int __init ib_uverbs_init(void)
{
int ret;
- spin_lock_init(&map_lock);
-
ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
"infiniband_verbs");
if (ret) {
@@ -944,43 +1389,22 @@ static int __init ib_uverbs_init(void)
goto out_chrdev;
}
+ uverbs_class->devnode = uverbs_devnode;
+
ret = class_create_file(uverbs_class, &class_attr_abi_version);
if (ret) {
printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
goto out_class;
}
-#ifdef __linux__
- ret = register_filesystem(&uverbs_event_fs);
- if (ret) {
- printk(KERN_ERR "user_verbs: couldn't register infinibandeventfs\n");
- goto out_class;
- }
-
- uverbs_event_mnt = kern_mount(&uverbs_event_fs);
- if (IS_ERR(uverbs_event_mnt)) {
- ret = PTR_ERR(uverbs_event_mnt);
- printk(KERN_ERR "user_verbs: couldn't mount infinibandeventfs\n");
- goto out_fs;
- }
-#endif
-
ret = ib_register_client(&uverbs_client);
if (ret) {
printk(KERN_ERR "user_verbs: couldn't register client\n");
- goto out_mnt;
+ goto out_class;
}
return 0;
-out_mnt:
-#ifdef __linux__
- mntput(uverbs_event_mnt);
-
-out_fs:
- unregister_filesystem(&uverbs_event_fs);
-#endif
-
out_class:
class_destroy(uverbs_class);
@@ -994,12 +1418,10 @@ out:
static void __exit ib_uverbs_cleanup(void)
{
ib_unregister_client(&uverbs_client);
-#ifdef __linux__
- mntput(uverbs_event_mnt);
- unregister_filesystem(&uverbs_event_fs);
-#endif
class_destroy(uverbs_class);
unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
+ if (overflow_maj)
+ unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
idr_destroy(&ib_uverbs_pd_idr);
idr_destroy(&ib_uverbs_mr_idr);
idr_destroy(&ib_uverbs_mw_idr);
diff --git a/sys/ofed/drivers/infiniband/core/uverbs_marshall.c b/sys/ofed/drivers/infiniband/core/uverbs_marshall.c
index 5440da0..a541882 100644
--- a/sys/ofed/drivers/infiniband/core/uverbs_marshall.c
+++ b/sys/ofed/drivers/infiniband/core/uverbs_marshall.c
@@ -30,6 +30,7 @@
* SOFTWARE.
*/
+#include <linux/module.h>
#include <rdma/ib_marshall.h>
void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
@@ -40,18 +41,21 @@ void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
dst->grh.sgid_index = src->grh.sgid_index;
dst->grh.hop_limit = src->grh.hop_limit;
dst->grh.traffic_class = src->grh.traffic_class;
+ memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved));
dst->dlid = src->dlid;
dst->sl = src->sl;
dst->src_path_bits = src->src_path_bits;
dst->static_rate = src->static_rate;
dst->is_global = src->ah_flags & IB_AH_GRH ? 1 : 0;
dst->port_num = src->port_num;
+ dst->reserved = 0;
}
EXPORT_SYMBOL(ib_copy_ah_attr_to_user);
void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
struct ib_qp_attr *src)
{
+ dst->qp_state = src->qp_state;
dst->cur_qp_state = src->cur_qp_state;
dst->path_mtu = src->path_mtu;
dst->path_mig_state = src->path_mig_state;
@@ -83,6 +87,7 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
dst->rnr_retry = src->rnr_retry;
dst->alt_port_num = src->alt_port_num;
dst->alt_timeout = src->alt_timeout;
+ memset(dst->reserved, 0, sizeof(dst->reserved));
}
EXPORT_SYMBOL(ib_copy_qp_attr_to_user);
diff --git a/sys/ofed/drivers/infiniband/core/verbs.c b/sys/ofed/drivers/infiniband/core/verbs.c
index 023564f..51a0ed5 100644
--- a/sys/ofed/drivers/infiniband/core/verbs.c
+++ b/sys/ofed/drivers/infiniband/core/verbs.c
@@ -38,10 +38,13 @@
#include <linux/errno.h>
#include <linux/err.h>
+#include <linux/module.h>
#include <linux/string.h>
+#include <linux/slab.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
+#include <rdma/ib_addr.h>
int ib_rate_to_mult(enum ib_rate rate)
{
@@ -77,6 +80,31 @@ enum ib_rate mult_to_ib_rate(int mult)
}
EXPORT_SYMBOL(mult_to_ib_rate);
+int ib_rate_to_mbps(enum ib_rate rate)
+{
+ switch (rate) {
+ case IB_RATE_2_5_GBPS: return 2500;
+ case IB_RATE_5_GBPS: return 5000;
+ case IB_RATE_10_GBPS: return 10000;
+ case IB_RATE_20_GBPS: return 20000;
+ case IB_RATE_30_GBPS: return 30000;
+ case IB_RATE_40_GBPS: return 40000;
+ case IB_RATE_60_GBPS: return 60000;
+ case IB_RATE_80_GBPS: return 80000;
+ case IB_RATE_120_GBPS: return 120000;
+ case IB_RATE_14_GBPS: return 14062;
+ case IB_RATE_56_GBPS: return 56250;
+ case IB_RATE_112_GBPS: return 112500;
+ case IB_RATE_168_GBPS: return 168750;
+ case IB_RATE_25_GBPS: return 25781;
+ case IB_RATE_100_GBPS: return 103125;
+ case IB_RATE_200_GBPS: return 206250;
+ case IB_RATE_300_GBPS: return 309375;
+ default: return -1;
+ }
+}
+EXPORT_SYMBOL(ib_rate_to_mbps);
+
enum rdma_transport_type
rdma_node_get_transport(enum rdma_node_type node_type)
{
@@ -87,6 +115,8 @@ rdma_node_get_transport(enum rdma_node_type node_type)
return RDMA_TRANSPORT_IB;
case RDMA_NODE_RNIC:
return RDMA_TRANSPORT_IWARP;
+ case RDMA_NODE_MIC:
+ return RDMA_TRANSPORT_SCIF;
default:
BUG();
return 0;
@@ -104,6 +134,8 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_
return IB_LINK_LAYER_INFINIBAND;
case RDMA_TRANSPORT_IWARP:
return IB_LINK_LAYER_ETHERNET;
+ case RDMA_TRANSPORT_SCIF:
+ return IB_LINK_LAYER_SCIF;
default:
return IB_LINK_LAYER_UNSPECIFIED;
}
@@ -162,8 +194,29 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
u32 flow_class;
u16 gid_index;
int ret;
+ int is_eth = (rdma_port_get_link_layer(device, port_num) ==
+ IB_LINK_LAYER_ETHERNET);
memset(ah_attr, 0, sizeof *ah_attr);
+ if (is_eth) {
+ if (!(wc->wc_flags & IB_WC_GRH))
+ return -EPROTOTYPE;
+
+ if (wc->wc_flags & IB_WC_WITH_SMAC &&
+ wc->wc_flags & IB_WC_WITH_VLAN) {
+ memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
+ ah_attr->vlan_id = wc->vlan_id;
+ } else {
+ ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
+ ah_attr->dmac, &ah_attr->vlan_id);
+ if (ret)
+ return ret;
+ }
+ } else {
+ ah_attr->vlan_id = 0xffff;
+ }
+
+
ah_attr->dlid = wc->slid;
ah_attr->sl = wc->sl;
ah_attr->src_path_bits = wc->dlid_path_bits;
@@ -250,45 +303,20 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
srq->uobject = NULL;
srq->event_handler = srq_init_attr->event_handler;
srq->srq_context = srq_init_attr->srq_context;
- srq->ext.xrc.cq = NULL;
- srq->ext.xrc.xrcd = NULL;
- atomic_inc(&pd->usecnt);
- atomic_set(&srq->usecnt, 0);
+ srq->srq_type = srq_init_attr->srq_type;
+ if (srq->srq_type == IB_SRQT_XRC) {
+ srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd;
+ srq->ext.xrc.cq = srq_init_attr->ext.xrc.cq;
+ atomic_inc(&srq->ext.xrc.xrcd->usecnt);
+ atomic_inc(&srq->ext.xrc.cq->usecnt);
}
-
- return srq;
-}
-EXPORT_SYMBOL(ib_create_srq);
-
-struct ib_srq *ib_create_xrc_srq(struct ib_pd *pd,
- struct ib_cq *xrc_cq,
- struct ib_xrcd *xrcd,
- struct ib_srq_init_attr *srq_init_attr)
-{
- struct ib_srq *srq;
-
- if (!pd->device->create_xrc_srq)
- return ERR_PTR(-ENOSYS);
-
- srq = pd->device->create_xrc_srq(pd, xrc_cq, xrcd, srq_init_attr, NULL);
-
- if (!IS_ERR(srq)) {
- srq->device = pd->device;
- srq->pd = pd;
- srq->uobject = NULL;
- srq->event_handler = srq_init_attr->event_handler;
- srq->srq_context = srq_init_attr->srq_context;
- srq->ext.xrc.cq = xrc_cq;
- srq->ext.xrc.xrcd = xrcd;
atomic_inc(&pd->usecnt);
- atomic_inc(&xrcd->usecnt);
- atomic_inc(&xrc_cq->usecnt);
atomic_set(&srq->usecnt, 0);
}
return srq;
}
-EXPORT_SYMBOL(ib_create_xrc_srq);
+EXPORT_SYMBOL(ib_create_srq);
int ib_modify_srq(struct ib_srq *srq,
struct ib_srq_attr *srq_attr,
@@ -308,27 +336,39 @@ int ib_query_srq(struct ib_srq *srq,
}
EXPORT_SYMBOL(ib_query_srq);
+int ib_query_values(struct ib_device *device,
+ int q_values, struct ib_device_values *values)
+{
+ return device->query_values ?
+ device->query_values(device, q_values, values) : -ENOSYS;
+}
+EXPORT_SYMBOL(ib_query_values);
+
int ib_destroy_srq(struct ib_srq *srq)
{
struct ib_pd *pd;
- struct ib_cq *xrc_cq;
- struct ib_xrcd *xrcd;
+ enum ib_srq_type srq_type;
+ struct ib_xrcd *uninitialized_var(xrcd);
+ struct ib_cq *uninitialized_var(cq);
int ret;
if (atomic_read(&srq->usecnt))
return -EBUSY;
pd = srq->pd;
- xrc_cq = srq->ext.xrc.cq;
+ srq_type = srq->srq_type;
+ if (srq_type == IB_SRQT_XRC) {
xrcd = srq->ext.xrc.xrcd;
+ cq = srq->ext.xrc.cq;
+ }
ret = srq->device->destroy_srq(srq);
if (!ret) {
atomic_dec(&pd->usecnt);
- if (xrc_cq)
- atomic_dec(&xrc_cq->usecnt);
- if (xrcd)
+ if (srq_type == IB_SRQT_XRC) {
atomic_dec(&xrcd->usecnt);
+ atomic_dec(&cq->usecnt);
+ }
}
return ret;
@@ -337,32 +377,130 @@ EXPORT_SYMBOL(ib_destroy_srq);
/* Queue pairs */
+static void __ib_shared_qp_event_handler(struct ib_event *event, void *context)
+{
+ struct ib_qp *qp = context;
+ unsigned long flags;
+
+ /* The code below must be synced with deletions of existing qps (ib_close_qp) --
+ * because a qp from the list may be closed during the scan, resulting in a kernel Oops.
+ */
+ spin_lock_irqsave(&qp->device->event_handler_lock, flags);
+ list_for_each_entry(event->element.qp, &qp->open_list, open_list)
+ if (event->element.qp->event_handler)
+ event->element.qp->event_handler(event, event->element.qp->qp_context);
+ spin_unlock_irqrestore(&qp->device->event_handler_lock, flags);
+}
+
+static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp)
+{
+ mutex_lock(&xrcd->tgt_qp_mutex);
+ list_add(&qp->xrcd_list, &xrcd->tgt_qp_list);
+ mutex_unlock(&xrcd->tgt_qp_mutex);
+}
+
+static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp,
+ void (*event_handler)(struct ib_event *, void *),
+ void *qp_context)
+{
+ struct ib_qp *qp;
+ unsigned long flags;
+
+ qp = kzalloc(sizeof *qp, GFP_KERNEL);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
+
+ qp->real_qp = real_qp;
+ atomic_inc(&real_qp->usecnt);
+ qp->device = real_qp->device;
+ qp->event_handler = event_handler;
+ qp->qp_context = qp_context;
+ qp->qp_num = real_qp->qp_num;
+ qp->qp_type = real_qp->qp_type;
+
+ spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
+ list_add(&qp->open_list, &real_qp->open_list);
+ spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
+
+ return qp;
+}
+
+struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
+ struct ib_qp_open_attr *qp_open_attr)
+{
+ struct ib_qp *qp, *real_qp;
+
+ if (qp_open_attr->qp_type != IB_QPT_XRC_TGT)
+ return ERR_PTR(-EINVAL);
+
+ qp = ERR_PTR(-EINVAL);
+ mutex_lock(&xrcd->tgt_qp_mutex);
+ list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) {
+ if (real_qp->qp_num == qp_open_attr->qp_num) {
+ qp = __ib_open_qp(real_qp, qp_open_attr->event_handler,
+ qp_open_attr->qp_context);
+ break;
+ }
+ }
+ mutex_unlock(&xrcd->tgt_qp_mutex);
+ return qp;
+}
+EXPORT_SYMBOL(ib_open_qp);
+
struct ib_qp *ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *qp_init_attr)
{
- struct ib_qp *qp;
+ struct ib_qp *qp, *real_qp;
+ struct ib_device *device;
- qp = pd->device->create_qp(pd, qp_init_attr, NULL);
+ device = pd ? pd->device : qp_init_attr->xrcd->device;
+ qp = device->create_qp(pd, qp_init_attr, NULL);
if (!IS_ERR(qp)) {
- qp->device = pd->device;
- qp->pd = pd;
- qp->send_cq = qp_init_attr->send_cq;
- qp->recv_cq = qp_init_attr->recv_cq;
- qp->srq = qp_init_attr->srq;
+ qp->device = device;
+ qp->real_qp = qp;
qp->uobject = NULL;
+ qp->qp_type = qp_init_attr->qp_type;
+
+ atomic_set(&qp->usecnt, 0);
+ if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) {
+ qp->event_handler = __ib_shared_qp_event_handler;
+ qp->qp_context = qp;
+ qp->pd = NULL;
+ qp->send_cq = qp->recv_cq = NULL;
+ qp->srq = NULL;
+ qp->xrcd = qp_init_attr->xrcd;
+ atomic_inc(&qp_init_attr->xrcd->usecnt);
+ INIT_LIST_HEAD(&qp->open_list);
+
+ real_qp = qp;
+ qp = __ib_open_qp(real_qp, qp_init_attr->event_handler,
+ qp_init_attr->qp_context);
+ if (!IS_ERR(qp))
+ __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
+ else
+ real_qp->device->destroy_qp(real_qp);
+ } else {
qp->event_handler = qp_init_attr->event_handler;
qp->qp_context = qp_init_attr->qp_context;
- qp->qp_type = qp_init_attr->qp_type;
- qp->xrcd = qp->qp_type == IB_QPT_XRC ?
- qp_init_attr->xrcd : NULL;
- atomic_inc(&pd->usecnt);
- atomic_inc(&qp_init_attr->send_cq->usecnt);
+ if (qp_init_attr->qp_type == IB_QPT_XRC_INI) {
+ qp->recv_cq = NULL;
+ qp->srq = NULL;
+ } else {
+ qp->recv_cq = qp_init_attr->recv_cq;
atomic_inc(&qp_init_attr->recv_cq->usecnt);
- if (qp_init_attr->srq)
+ qp->srq = qp_init_attr->srq;
+ if (qp->srq)
atomic_inc(&qp_init_attr->srq->usecnt);
- if (qp->qp_type == IB_QPT_XRC)
- atomic_inc(&qp->xrcd->usecnt);
+ }
+
+ qp->pd = pd;
+ qp->send_cq = qp_init_attr->send_cq;
+ qp->xrcd = NULL;
+
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&qp_init_attr->send_cq->usecnt);
+ }
}
return qp;
@@ -371,8 +509,10 @@ EXPORT_SYMBOL(ib_create_qp);
static const struct {
int valid;
- enum ib_qp_attr_mask req_param[IB_QPT_RAW_PACKET + 1];
- enum ib_qp_attr_mask opt_param[IB_QPT_RAW_PACKET + 1];
+ enum ib_qp_attr_mask req_param[IB_QPT_MAX];
+ enum ib_qp_attr_mask req_param_add_eth[IB_QPT_MAX];
+ enum ib_qp_attr_mask opt_param[IB_QPT_MAX];
+ enum ib_qp_attr_mask opt_param_add_eth[IB_QPT_MAX];
} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = {
[IB_QPS_RESET] = { .valid = 1 },
@@ -389,13 +529,24 @@ static const struct {
[IB_QPT_RC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
- [IB_QPT_XRC] = (IB_QP_PKEY_INDEX |
+ [IB_QPT_DC_INI] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_DC_KEY),
+ [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
[IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
+ },
+ .opt_param = {
+ [IB_QPT_UD] = IB_QP_GROUP_RSS,
+ [IB_QPT_RAW_PACKET] = IB_QP_GROUP_RSS
}
},
},
@@ -414,7 +565,13 @@ static const struct {
[IB_QPT_RC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
- [IB_QPT_XRC] = (IB_QP_PKEY_INDEX |
+ [IB_QPT_DC_INI] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
[IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
@@ -436,13 +593,26 @@ static const struct {
IB_QP_RQ_PSN |
IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_MIN_RNR_TIMER),
- [IB_QPT_XRC] = (IB_QP_AV |
+ [IB_QPT_DC_INI] = (IB_QP_PATH_MTU |
+ IB_QP_MAX_DEST_RD_ATOMIC |
+ IB_QP_MIN_RNR_TIMER),
+ [IB_QPT_XRC_INI] = (IB_QP_AV |
+ IB_QP_PATH_MTU |
+ IB_QP_DEST_QPN |
+ IB_QP_RQ_PSN),
+ [IB_QPT_XRC_TGT] = (IB_QP_AV |
IB_QP_PATH_MTU |
IB_QP_DEST_QPN |
IB_QP_RQ_PSN |
IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_MIN_RNR_TIMER),
},
+ .req_param_add_eth = {
+ [IB_QPT_RC] = (IB_QP_SMAC),
+ [IB_QPT_UC] = (IB_QP_SMAC),
+ [IB_QPT_XRC_INI] = (IB_QP_SMAC),
+ [IB_QPT_XRC_TGT] = (IB_QP_SMAC)
+ },
.opt_param = {
[IB_QPT_UD] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
@@ -452,13 +622,34 @@ static const struct {
[IB_QPT_RC] = (IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX),
- [IB_QPT_XRC] = (IB_QP_ALT_PATH |
+ [IB_QPT_DC_INI] = (IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX),
+ [IB_QPT_XRC_INI] = (IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX),
+ [IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX),
[IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
+ [IB_QPT_RAW_PACKET] = IB_QP_AV,
+ },
+ .opt_param_add_eth = {
+ [IB_QPT_RC] = (IB_QP_ALT_SMAC |
+ IB_QP_VID |
+ IB_QP_ALT_VID),
+ [IB_QPT_UC] = (IB_QP_ALT_SMAC |
+ IB_QP_VID |
+ IB_QP_ALT_VID),
+ [IB_QPT_XRC_INI] = (IB_QP_ALT_SMAC |
+ IB_QP_VID |
+ IB_QP_ALT_VID),
+ [IB_QPT_XRC_TGT] = (IB_QP_ALT_SMAC |
+ IB_QP_VID |
+ IB_QP_ALT_VID)
}
}
},
@@ -475,11 +666,17 @@ static const struct {
IB_QP_RNR_RETRY |
IB_QP_SQ_PSN |
IB_QP_MAX_QP_RD_ATOMIC),
- [IB_QPT_XRC] = (IB_QP_TIMEOUT |
+ [IB_QPT_DC_INI] = (IB_QP_TIMEOUT |
+ IB_QP_RETRY_CNT |
+ IB_QP_RNR_RETRY |
+ IB_QP_MAX_QP_RD_ATOMIC),
+ [IB_QPT_XRC_INI] = (IB_QP_TIMEOUT |
IB_QP_RETRY_CNT |
IB_QP_RNR_RETRY |
IB_QP_SQ_PSN |
IB_QP_MAX_QP_RD_ATOMIC),
+ [IB_QPT_XRC_TGT] = (IB_QP_TIMEOUT |
+ IB_QP_SQ_PSN),
[IB_QPT_SMI] = IB_QP_SQ_PSN,
[IB_QPT_GSI] = IB_QP_SQ_PSN,
},
@@ -495,7 +692,16 @@ static const struct {
IB_QP_ACCESS_FLAGS |
IB_QP_MIN_RNR_TIMER |
IB_QP_PATH_MIG_STATE),
- [IB_QPT_XRC] = (IB_QP_CUR_STATE |
+ [IB_QPT_DC_INI] = (IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_MIN_RNR_TIMER |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE |
IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_MIN_RNR_TIMER |
@@ -524,7 +730,16 @@ static const struct {
IB_QP_ALT_PATH |
IB_QP_PATH_MIG_STATE |
IB_QP_MIN_RNR_TIMER),
- [IB_QPT_XRC] = (IB_QP_CUR_STATE |
+ [IB_QPT_DC_INI] = (IB_QP_CUR_STATE |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_ALT_PATH |
+ IB_QP_PATH_MIG_STATE |
+ IB_QP_MIN_RNR_TIMER),
+ [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_ALT_PATH |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE |
IB_QP_ACCESS_FLAGS |
IB_QP_ALT_PATH |
IB_QP_PATH_MIG_STATE |
@@ -541,7 +756,8 @@ static const struct {
[IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY,
[IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
[IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
- [IB_QPT_XRC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
+ [IB_QPT_XRC_INI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
+ [IB_QPT_XRC_TGT] = IB_QP_EN_SQD_ASYNC_NOTIFY, /* ??? */
[IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
[IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY
}
@@ -564,7 +780,11 @@ static const struct {
IB_QP_ACCESS_FLAGS |
IB_QP_MIN_RNR_TIMER |
IB_QP_PATH_MIG_STATE),
- [IB_QPT_XRC] = (IB_QP_CUR_STATE |
+ [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE |
IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_MIN_RNR_TIMER |
@@ -597,12 +817,19 @@ static const struct {
IB_QP_PKEY_INDEX |
IB_QP_MIN_RNR_TIMER |
IB_QP_PATH_MIG_STATE),
- [IB_QPT_XRC] = (IB_QP_PORT |
+ [IB_QPT_XRC_INI] = (IB_QP_PORT |
IB_QP_AV |
IB_QP_TIMEOUT |
IB_QP_RETRY_CNT |
IB_QP_RNR_RETRY |
IB_QP_MAX_QP_RD_ATOMIC |
+ IB_QP_ALT_PATH |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PKEY_INDEX |
+ IB_QP_PATH_MIG_STATE),
+ [IB_QPT_XRC_TGT] = (IB_QP_PORT |
+ IB_QP_AV |
+ IB_QP_TIMEOUT |
IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
@@ -640,7 +867,8 @@ static const struct {
};
int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
- enum ib_qp_type type, enum ib_qp_attr_mask mask)
+ enum ib_qp_type type, enum ib_qp_attr_mask mask,
+ enum rdma_link_layer ll)
{
enum ib_qp_attr_mask req_param, opt_param;
@@ -659,6 +887,13 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
req_param = qp_state_table[cur_state][next_state].req_param[type];
opt_param = qp_state_table[cur_state][next_state].opt_param[type];
+ if (ll == IB_LINK_LAYER_ETHERNET) {
+ req_param |= qp_state_table[cur_state][next_state].
+ req_param_add_eth[type];
+ opt_param |= qp_state_table[cur_state][next_state].
+ opt_param_add_eth[type];
+ }
+
if ((mask & req_param) != req_param)
return 0;
@@ -673,7 +908,13 @@ int ib_modify_qp(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask)
{
- return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL);
+ int ret;
+
+ ret = qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
+ if (!ret && (qp_attr_mask & IB_QP_PORT))
+ qp->port_num = qp_attr->port_num;
+
+ return ret;
}
EXPORT_SYMBOL(ib_modify_qp);
@@ -683,35 +924,87 @@ int ib_query_qp(struct ib_qp *qp,
struct ib_qp_init_attr *qp_init_attr)
{
return qp->device->query_qp ?
- qp->device->query_qp(qp, qp_attr, qp_attr_mask, qp_init_attr) :
+ qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) :
-ENOSYS;
}
EXPORT_SYMBOL(ib_query_qp);
+int ib_close_qp(struct ib_qp *qp)
+{
+ struct ib_qp *real_qp;
+ unsigned long flags;
+
+ real_qp = qp->real_qp;
+ if (real_qp == qp)
+ return -EINVAL;
+
+ spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
+ list_del(&qp->open_list);
+ spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
+
+ atomic_dec(&real_qp->usecnt);
+ kfree(qp);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_close_qp);
+
+static int __ib_destroy_shared_qp(struct ib_qp *qp)
+{
+ struct ib_xrcd *xrcd;
+ struct ib_qp *real_qp;
+ int ret;
+
+ real_qp = qp->real_qp;
+ xrcd = real_qp->xrcd;
+
+ mutex_lock(&xrcd->tgt_qp_mutex);
+ ib_close_qp(qp);
+ if (atomic_read(&real_qp->usecnt) == 0)
+ list_del(&real_qp->xrcd_list);
+ else
+ real_qp = NULL;
+ mutex_unlock(&xrcd->tgt_qp_mutex);
+
+ if (real_qp) {
+ ret = ib_destroy_qp(real_qp);
+ if (!ret)
+ atomic_dec(&xrcd->usecnt);
+ else
+ __ib_insert_xrcd_qp(xrcd, real_qp);
+ }
+
+ return 0;
+}
+
int ib_destroy_qp(struct ib_qp *qp)
{
struct ib_pd *pd;
struct ib_cq *scq, *rcq;
struct ib_srq *srq;
- struct ib_xrcd *xrcd;
- enum ib_qp_type qp_type = qp->qp_type;
int ret;
+ if (atomic_read(&qp->usecnt))
+ return -EBUSY;
+
+ if (qp->real_qp != qp)
+ return __ib_destroy_shared_qp(qp);
+
pd = qp->pd;
scq = qp->send_cq;
rcq = qp->recv_cq;
srq = qp->srq;
- xrcd = qp->xrcd;
ret = qp->device->destroy_qp(qp);
if (!ret) {
+ if (pd)
atomic_dec(&pd->usecnt);
+ if (scq)
atomic_dec(&scq->usecnt);
+ if (rcq)
atomic_dec(&rcq->usecnt);
if (srq)
atomic_dec(&srq->usecnt);
- if (qp_type == IB_QPT_XRC)
- atomic_dec(&xrcd->usecnt);
}
return ret;
@@ -726,8 +1019,13 @@ struct ib_cq *ib_create_cq(struct ib_device *device,
void *cq_context, int cqe, int comp_vector)
{
struct ib_cq *cq;
+ struct ib_cq_init_attr attr = {
+ .cqe = cqe,
+ .comp_vector = comp_vector,
+ .flags = 0,
+ };
- cq = device->create_cq(device, cqe, comp_vector, NULL, NULL);
+ cq = device->create_cq(device, &attr, NULL, NULL);
if (!IS_ERR(cq)) {
cq->device = device;
@@ -742,10 +1040,12 @@ struct ib_cq *ib_create_cq(struct ib_device *device,
}
EXPORT_SYMBOL(ib_create_cq);
-int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+int ib_modify_cq(struct ib_cq *cq,
+ struct ib_cq_attr *cq_attr,
+ int cq_attr_mask)
{
return cq->device->modify_cq ?
- cq->device->modify_cq(cq, cq_count, cq_period) : -ENOSYS;
+ cq->device->modify_cq(cq, cq_attr, cq_attr_mask) : -ENOSYS;
}
EXPORT_SYMBOL(ib_modify_cq);
@@ -770,6 +1070,11 @@ EXPORT_SYMBOL(ib_resize_cq);
struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
{
struct ib_mr *mr;
+ int err;
+
+ err = ib_check_mr_access(mr_access_flags);
+ if (err)
+ return ERR_PTR(err);
mr = pd->device->get_dma_mr(pd, mr_access_flags);
@@ -792,6 +1097,11 @@ struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
u64 *iova_start)
{
struct ib_mr *mr;
+ int err;
+
+ err = ib_check_mr_access(mr_access_flags);
+ if (err)
+ return ERR_PTR(err);
if (!pd->device->reg_phys_mr)
return ERR_PTR(-ENOSYS);
@@ -822,6 +1132,10 @@ int ib_rereg_phys_mr(struct ib_mr *mr,
struct ib_pd *old_pd;
int ret;
+ ret = ib_check_mr_access(mr_access_flags);
+ if (ret)
+ return ret;
+
if (!mr->device->rereg_phys_mr)
return -ENOSYS;
@@ -867,6 +1181,45 @@ int ib_dereg_mr(struct ib_mr *mr)
}
EXPORT_SYMBOL(ib_dereg_mr);
+struct ib_mr *ib_create_mr(struct ib_pd *pd,
+ struct ib_mr_init_attr *mr_init_attr)
+{
+ struct ib_mr *mr;
+
+ if (!pd->device->create_mr)
+ return ERR_PTR(-ENOSYS);
+
+ mr = pd->device->create_mr(pd, mr_init_attr);
+
+ if (!IS_ERR(mr)) {
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->uobject = NULL;
+ atomic_inc(&pd->usecnt);
+ atomic_set(&mr->usecnt, 0);
+ }
+
+ return mr;
+}
+EXPORT_SYMBOL(ib_create_mr);
+
+int ib_destroy_mr(struct ib_mr *mr)
+{
+ struct ib_pd *pd;
+ int ret;
+
+ if (atomic_read(&mr->usecnt))
+ return -EBUSY;
+
+ pd = mr->pd;
+ ret = mr->device->destroy_mr(mr);
+ if (!ret)
+ atomic_dec(&pd->usecnt);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_destroy_mr);
+
struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
{
struct ib_mr *mr;
@@ -915,18 +1268,19 @@ EXPORT_SYMBOL(ib_free_fast_reg_page_list);
/* Memory windows */
-struct ib_mw *ib_alloc_mw(struct ib_pd *pd)
+struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
{
struct ib_mw *mw;
if (!pd->device->alloc_mw)
return ERR_PTR(-ENOSYS);
- mw = pd->device->alloc_mw(pd);
+ mw = pd->device->alloc_mw(pd, type);
if (!IS_ERR(mw)) {
mw->device = pd->device;
mw->pd = pd;
mw->uobject = NULL;
+ mw->type = type;
atomic_inc(&pd->usecnt);
}
@@ -1000,58 +1354,58 @@ EXPORT_SYMBOL(ib_dealloc_fmr);
int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
+ int ret;
+
if (!qp->device->attach_mcast)
return -ENOSYS;
switch (rdma_node_get_transport(qp->device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (qp->qp_type == IB_QPT_RAW_PACKET) {
- /* In raw Etherent mgids the 63 msb's should be 0 */
- if (gid->global.subnet_prefix & cpu_to_be64(~1ULL))
- return -EINVAL;
- } else if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+ if ((gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) &&
+ qp->qp_type != IB_QPT_RAW_PACKET)
return -EINVAL;
break;
case RDMA_TRANSPORT_IWARP:
+ case RDMA_TRANSPORT_SCIF:
if (qp->qp_type != IB_QPT_RAW_PACKET)
return -EINVAL;
break;
}
- return qp->device->attach_mcast(qp, gid, lid);
+
+ ret = qp->device->attach_mcast(qp, gid, lid);
+ if (!ret)
+ atomic_inc(&qp->usecnt);
+ return ret;
}
EXPORT_SYMBOL(ib_attach_mcast);
int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
+ int ret;
+
if (!qp->device->detach_mcast)
return -ENOSYS;
switch (rdma_node_get_transport(qp->device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (qp->qp_type == IB_QPT_RAW_PACKET) {
- /* In raw Etherent mgids the 63 msb's should be 0 */
- if (gid->global.subnet_prefix & cpu_to_be64(~1ULL))
- return -EINVAL;
- } else if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+ if ((gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) &&
+ qp->qp_type != IB_QPT_RAW_PACKET)
return -EINVAL;
break;
case RDMA_TRANSPORT_IWARP:
+ case RDMA_TRANSPORT_SCIF:
+
if (qp->qp_type != IB_QPT_RAW_PACKET)
return -EINVAL;
break;
}
- return qp->device->detach_mcast(qp, gid, lid);
-}
-EXPORT_SYMBOL(ib_detach_mcast);
-int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
-{
- if (atomic_read(&xrcd->usecnt))
- return -EBUSY;
-
- return xrcd->device->dealloc_xrcd(xrcd);
+ ret = qp->device->detach_mcast(qp, gid, lid);
+ if (!ret)
+ atomic_dec(&qp->usecnt);
+ return ret;
}
-EXPORT_SYMBOL(ib_dealloc_xrcd);
+EXPORT_SYMBOL(ib_detach_mcast);
struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
{
@@ -1064,10 +1418,119 @@ struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
if (!IS_ERR(xrcd)) {
xrcd->device = device;
xrcd->inode = NULL;
- xrcd->uobject = NULL;
atomic_set(&xrcd->usecnt, 0);
+ mutex_init(&xrcd->tgt_qp_mutex);
+ INIT_LIST_HEAD(&xrcd->tgt_qp_list);
}
+
return xrcd;
}
EXPORT_SYMBOL(ib_alloc_xrcd);
+int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
+{
+ struct ib_qp *qp;
+ int ret;
+
+ if (atomic_read(&xrcd->usecnt))
+ return -EBUSY;
+
+ while (!list_empty(&xrcd->tgt_qp_list)) {
+ qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list);
+ ret = ib_destroy_qp(qp);
+ if (ret)
+ return ret;
+ }
+
+ return xrcd->device->dealloc_xrcd(xrcd);
+}
+EXPORT_SYMBOL(ib_dealloc_xrcd);
+
+struct ib_flow *ib_create_flow(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr,
+ int domain)
+{
+ struct ib_flow *flow_id;
+ if (!qp->device->create_flow)
+ return ERR_PTR(-ENOSYS);
+
+ flow_id = qp->device->create_flow(qp, flow_attr, domain);
+ if (!IS_ERR(flow_id))
+ atomic_inc(&qp->usecnt);
+ return flow_id;
+}
+EXPORT_SYMBOL(ib_create_flow);
+
+int ib_destroy_flow(struct ib_flow *flow_id)
+{
+ int err;
+ struct ib_qp *qp;
+
+ if (!flow_id)
+ return -EINVAL;
+ qp = flow_id->qp;
+ if (!qp->device->destroy_flow)
+ return -ENOSYS;
+ err = qp->device->destroy_flow(flow_id);
+ if (!err)
+ atomic_dec(&qp->usecnt);
+ return err;
+}
+EXPORT_SYMBOL(ib_destroy_flow);
+
+struct ib_dct *ib_create_dct(struct ib_pd *pd, struct ib_dct_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct ib_dct *dct;
+
+ if (!pd->device->exp_create_dct)
+ return ERR_PTR(-ENOSYS);
+
+ dct = pd->device->exp_create_dct(pd, attr, udata);
+ if (!IS_ERR(dct)) {
+ dct->pd = pd;
+ dct->srq = attr->srq;
+ dct->cq = attr->cq;
+ atomic_inc(&dct->srq->usecnt);
+ atomic_inc(&dct->cq->usecnt);
+ atomic_inc(&dct->pd->usecnt);
+ }
+
+ return dct;
+}
+EXPORT_SYMBOL(ib_create_dct);
+
+int ib_destroy_dct(struct ib_dct *dct)
+{
+ int err;
+
+ if (!dct->device->exp_destroy_dct)
+ return -ENOSYS;
+
+ err = dct->device->exp_destroy_dct(dct);
+ if (!err) {
+ atomic_dec(&dct->srq->usecnt);
+ atomic_dec(&dct->cq->usecnt);
+ atomic_dec(&dct->pd->usecnt);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(ib_destroy_dct);
+
+int ib_query_dct(struct ib_dct *dct, struct ib_dct_attr *attr)
+{
+ if (!dct->device->exp_query_dct)
+ return -ENOSYS;
+
+ return dct->device->exp_query_dct(dct, attr);
+}
+EXPORT_SYMBOL(ib_query_dct);
+
+int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
+ struct ib_mr_status *mr_status)
+{
+ return mr->device->check_mr_status ?
+ mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS;
+}
+EXPORT_SYMBOL(ib_check_mr_status);
diff --git a/sys/ofed/drivers/infiniband/debug/memtrack.c b/sys/ofed/drivers/infiniband/debug/memtrack.c
index 199b33b..7082856 100644
--- a/sys/ofed/drivers/infiniband/debug/memtrack.c
+++ b/sys/ofed/drivers/infiniband/debug/memtrack.c
@@ -24,12 +24,21 @@
#ifdef kmalloc
#undef kmalloc
#endif
+#ifdef kmemdup
+ #undef kmemdup
+#endif
#ifdef kfree
#undef kfree
#endif
#ifdef vmalloc
#undef vmalloc
#endif
+#ifdef vzalloc
+ #undef vzalloc
+#endif
+#ifdef vzalloc_node
+ #undef vzalloc_node
+#endif
#ifdef vfree
#undef vfree
#endif
@@ -39,16 +48,59 @@
#ifdef kmem_cache_free
#undef kmem_cache_free
#endif
+#ifdef ioremap
+ #undef ioremap
+#endif
+#ifdef io_mapping_create_wc
+ #undef io_mapping_create_wc
+#endif
+#ifdef io_mapping_free
+ #undef io_mapping_free
+#endif
+#ifdef ioremap_nocache
+ #undef ioremap_nocache
+#endif
+#ifdef iounmap
+ #undef iounmap
+#endif
+#ifdef alloc_pages
+ #undef alloc_pages
+#endif
+#ifdef free_pages
+ #undef free_pages
+#endif
+#ifdef get_page
+ #undef get_page
+#endif
+#ifdef put_page
+ #undef put_page
+#endif
+#ifdef create_workqueue
+ #undef create_workqueue
+#endif
+#ifdef create_rt_workqueue
+ #undef create_rt_workqueue
+#endif
+#ifdef create_freezeable_workqueue
+ #undef create_freezeable_workqueue
+#endif
+#ifdef create_singlethread_workqueue
+ #undef create_singlethread_workqueue
+#endif
+#ifdef destroy_workqueue
+ #undef destroy_workqueue
+#endif
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/vmalloc.h>
-#include <linux/version.h>
+#include <linux/mm.h>
#include <asm/uaccess.h>
#include <linux/proc_fs.h>
-#include <memtrack.h>
+#include <linux/random.h>
+#include "memtrack.h"
#include <linux/moduleparam.h>
@@ -67,7 +119,7 @@ MODULE_LICENSE("GPL");
bit0 corresponds to MEMTRACK_KMALLOC, bit1 corresponds to MEMTRACK_VMALLOC etc. */
static unsigned long track_mask = -1; /* effectively everything */
module_param(track_mask, ulong, 0444);
-MODULE_PARM_DESC(track_mask, "bitmask definenig what is tracked");
+MODULE_PARM_DESC(track_mask, "bitmask defining what is tracked");
/* if a bit is set then the corresponding allocation is strictly tracked.
That is, before inserting the whole range is checked to not overlap any
@@ -76,59 +128,95 @@ static unsigned long strict_track_mask = 0; /* no strict tracking */
module_param(strict_track_mask, ulong, 0444);
MODULE_PARM_DESC(strict_track_mask, "bitmask which allocation requires strict tracking");
-typedef struct memtrack_meminfo_st {
+/* Sets the frequency of allocations failures injections
+ if set to 0 all allocation should succeed */
+static unsigned int inject_freq = 0;
+module_param(inject_freq, uint, 0644);
+MODULE_PARM_DESC(inject_freq, "Error injection frequency, default is 0 (disabled)");
+
+static int random_mem = 1;
+module_param(random_mem, uint, 0644);
+MODULE_PARM_DESC(random_mem, "When set, randomize allocated memory, default is 1 (enabled)");
+
+struct memtrack_meminfo_t {
unsigned long addr;
unsigned long size;
unsigned long line_num;
- struct memtrack_meminfo_st *next;
+ unsigned long dev;
+ unsigned long addr2;
+ int direction;
+ struct memtrack_meminfo_t *next;
struct list_head list; /* used to link all items from a certain type together */
char filename[MAX_FILENAME_LEN + 1]; /* putting the char array last is better for struct. packing */
-} memtrack_meminfo_t;
+ char ext_info[32];
+};
static struct kmem_cache *meminfo_cache;
-typedef struct {
- memtrack_meminfo_t *mem_hash[MEMTRACK_HASH_SZ];
+struct tracked_obj_desc_t {
+ struct memtrack_meminfo_t *mem_hash[MEMTRACK_HASH_SZ];
spinlock_t hash_lock;
unsigned long count; /* size of memory tracked (*malloc) or number of objects tracked */
struct list_head tracked_objs_head; /* head of list of all objects */
int strict_track; /* if 1 then for each object inserted check if it overlaps any of the objects already in the list */
-} tracked_obj_desc_t;
+};
-static tracked_obj_desc_t *tracked_objs_arr[MEMTRACK_NUM_OF_MEMTYPES];
+static struct tracked_obj_desc_t *tracked_objs_arr[MEMTRACK_NUM_OF_MEMTYPES];
static const char *rsc_names[MEMTRACK_NUM_OF_MEMTYPES] = {
"kmalloc",
"vmalloc",
- "kmem_cache_alloc"
+ "kmem_cache_alloc",
+ "io_remap",
+ "create_workqueue",
+ "alloc_pages",
+ "ib_dma_map_single",
+ "ib_dma_map_page",
+ "ib_dma_map_sg"
};
-
static const char *rsc_free_names[MEMTRACK_NUM_OF_MEMTYPES] = {
"kfree",
"vfree",
- "kmem_cache_free"
+ "kmem_cache_free",
+ "io_unmap",
+ "destory_workqueue",
+ "free_pages",
+ "ib_dma_unmap_single",
+ "ib_dma_unmap_page",
+ "ib_dma_unmap_sg"
};
-
-static inline const char *memtype_alloc_str(memtrack_memtype_t memtype)
+static inline const char *memtype_alloc_str(enum memtrack_memtype_t memtype)
{
switch (memtype) {
- case MEMTRACK_KMALLOC:
- case MEMTRACK_VMALLOC:
- case MEMTRACK_KMEM_OBJ:
+ case MEMTRACK_KMALLOC:
+ case MEMTRACK_VMALLOC:
+ case MEMTRACK_KMEM_OBJ:
+ case MEMTRACK_IOREMAP:
+ case MEMTRACK_WORK_QUEUE:
+ case MEMTRACK_PAGE_ALLOC:
+ case MEMTRACK_DMA_MAP_SINGLE:
+ case MEMTRACK_DMA_MAP_PAGE:
+ case MEMTRACK_DMA_MAP_SG:
return rsc_names[memtype];
default:
return "(Unknown allocation type)";
}
}
-static inline const char *memtype_free_str(memtrack_memtype_t memtype)
+static inline const char *memtype_free_str(enum memtrack_memtype_t memtype)
{
switch (memtype) {
- case MEMTRACK_KMALLOC:
- case MEMTRACK_VMALLOC:
- case MEMTRACK_KMEM_OBJ:
+ case MEMTRACK_KMALLOC:
+ case MEMTRACK_VMALLOC:
+ case MEMTRACK_KMEM_OBJ:
+ case MEMTRACK_IOREMAP:
+ case MEMTRACK_WORK_QUEUE:
+ case MEMTRACK_PAGE_ALLOC:
+ case MEMTRACK_DMA_MAP_SINGLE:
+ case MEMTRACK_DMA_MAP_PAGE:
+ case MEMTRACK_DMA_MAP_SG:
return rsc_free_names[memtype];
default:
return "(Unknown allocation type)";
@@ -138,56 +226,56 @@ static inline const char *memtype_free_str(memtrack_memtype_t memtype)
/*
* overlap_a_b
*/
-static int overlap_a_b(unsigned long a_start, unsigned long a_end,
+static inline int overlap_a_b(unsigned long a_start, unsigned long a_end,
unsigned long b_start, unsigned long b_end)
{
- if ((b_start > a_end) || (a_start > b_end)) {
+ if ((b_start > a_end) || (a_start > b_end))
return 0;
- }
+
return 1;
}
/*
* check_overlap
*/
-static void check_overlap(memtrack_memtype_t memtype,
- memtrack_meminfo_t * mem_info_p,
- tracked_obj_desc_t * obj_desc_p)
+static void check_overlap(enum memtrack_memtype_t memtype,
+ struct memtrack_meminfo_t *mem_info_p,
+ struct tracked_obj_desc_t *obj_desc_p)
{
struct list_head *pos, *next;
- memtrack_meminfo_t *cur;
+ struct memtrack_meminfo_t *cur;
unsigned long start_a, end_a, start_b, end_b;
- list_for_each_safe(pos, next, &obj_desc_p->tracked_objs_head) {
- cur = list_entry(pos, memtrack_meminfo_t, list);
-
start_a = mem_info_p->addr;
end_a = mem_info_p->addr + mem_info_p->size - 1;
+
+ list_for_each_safe(pos, next, &obj_desc_p->tracked_objs_head) {
+ cur = list_entry(pos, struct memtrack_meminfo_t, list);
+
start_b = cur->addr;
end_b = cur->addr + cur->size - 1;
- if (overlap_a_b(start_a, end_a, start_b, end_b)) {
- printk
- ("%s overlaps! new_start=0x%lx, new_end=0x%lx, item_start=0x%lx, item_end=0x%lx\n",
+ if (overlap_a_b(start_a, end_a, start_b, end_b))
+ printk(KERN_ERR "%s overlaps! new_start=0x%lx, new_end=0x%lx, item_start=0x%lx, item_end=0x%lx\n",
memtype_alloc_str(memtype), mem_info_p->addr,
mem_info_p->addr + mem_info_p->size - 1, cur->addr,
cur->addr + cur->size - 1);
}
- }
}
/* Invoke on memory allocation */
-void memtrack_alloc(memtrack_memtype_t memtype, unsigned long addr,
- unsigned long size, const char *filename,
+void memtrack_alloc(enum memtrack_memtype_t memtype, unsigned long dev,
+ unsigned long addr, unsigned long size, unsigned long addr2,
+ int direction, const char *filename,
const unsigned long line_num, int alloc_flags)
{
unsigned long hash_val;
- memtrack_meminfo_t *cur_mem_info_p, *new_mem_info_p;
- tracked_obj_desc_t *obj_desc_p;
+ struct memtrack_meminfo_t *cur_mem_info_p, *new_mem_info_p;
+ struct tracked_obj_desc_t *obj_desc_p;
unsigned long flags;
if (memtype >= MEMTRACK_NUM_OF_MEMTYPES) {
- printk("%s: Invalid memory type (%d)\n", __func__, memtype);
+ printk(KERN_ERR "%s: Invalid memory type (%d)\n", __func__, memtype);
return;
}
@@ -199,11 +287,9 @@ void memtrack_alloc(memtrack_memtype_t memtype, unsigned long addr,
hash_val = addr % MEMTRACK_HASH_SZ;
- new_mem_info_p = (memtrack_meminfo_t *)
- kmem_cache_alloc(meminfo_cache, alloc_flags);
+ new_mem_info_p = (struct memtrack_meminfo_t *)kmem_cache_alloc(meminfo_cache, alloc_flags);
if (new_mem_info_p == NULL) {
- printk
- ("%s: Failed allocating kmem_cache item for new mem_info. "
+ printk(KERN_ERR "%s: Failed allocating kmem_cache item for new mem_info. "
"Lost tracking on allocation at %s:%lu...\n", __func__,
filename, line_num);
return;
@@ -211,26 +297,34 @@ void memtrack_alloc(memtrack_memtype_t memtype, unsigned long addr,
/* save allocation properties */
new_mem_info_p->addr = addr;
new_mem_info_p->size = size;
+ new_mem_info_p->dev = dev;
+ new_mem_info_p->addr2 = addr2;
+ new_mem_info_p->direction = direction;
+
new_mem_info_p->line_num = line_num;
+ *new_mem_info_p->ext_info = '\0';
/* Make sure that we will print out the path tail if the given filename is longer
* than MAX_FILENAME_LEN. (otherwise, we will not see the name of the actual file
* in the printout -- only the path head!
*/
- if (strlen(filename) > MAX_FILENAME_LEN) {
+ if (strlen(filename) > MAX_FILENAME_LEN)
strncpy(new_mem_info_p->filename, filename + strlen(filename) - MAX_FILENAME_LEN, MAX_FILENAME_LEN);
- } else {
+ else
strncpy(new_mem_info_p->filename, filename, MAX_FILENAME_LEN);
- }
+
new_mem_info_p->filename[MAX_FILENAME_LEN] = 0; /* NULL terminate anyway */
memtrack_spin_lock(&obj_desc_p->hash_lock, flags);
/* make sure given memory location is not already allocated */
+ if ((memtype != MEMTRACK_DMA_MAP_SINGLE) && (memtype != MEMTRACK_DMA_MAP_PAGE) &&
+ (memtype != MEMTRACK_DMA_MAP_SG)) {
+
+ /* make sure given memory location is not already allocated */
cur_mem_info_p = obj_desc_p->mem_hash[hash_val];
while (cur_mem_info_p != NULL) {
- if (cur_mem_info_p->addr == addr) {
+ if ((cur_mem_info_p->addr == addr) && (cur_mem_info_p->dev == dev)) {
/* Found given address in the database */
- printk
- ("mtl rsc inconsistency: %s: %s::%lu: %s @ addr=0x%lX which is already known from %s:%lu\n",
+ printk(KERN_ERR "mtl rsc inconsistency: %s: %s::%lu: %s @ addr=0x%lX which is already known from %s:%lu\n",
__func__, filename, line_num,
memtype_alloc_str(memtype), addr,
cur_mem_info_p->filename,
@@ -241,31 +335,33 @@ void memtrack_alloc(memtrack_memtype_t memtype, unsigned long addr,
}
cur_mem_info_p = cur_mem_info_p->next;
}
+ }
/* not found - we can put in the hash bucket */
/* link as first */
new_mem_info_p->next = obj_desc_p->mem_hash[hash_val];
obj_desc_p->mem_hash[hash_val] = new_mem_info_p;
- if (obj_desc_p->strict_track) {
+ if (obj_desc_p->strict_track)
check_overlap(memtype, new_mem_info_p, obj_desc_p);
- }
obj_desc_p->count += size;
list_add(&new_mem_info_p->list, &obj_desc_p->tracked_objs_head);
memtrack_spin_unlock(&obj_desc_p->hash_lock, flags);
return;
}
+EXPORT_SYMBOL(memtrack_alloc);
/* Invoke on memory free */
-void memtrack_free(memtrack_memtype_t memtype, unsigned long addr,
+void memtrack_free(enum memtrack_memtype_t memtype, unsigned long dev,
+ unsigned long addr, unsigned long size, int direction,
const char *filename, const unsigned long line_num)
{
unsigned long hash_val;
- memtrack_meminfo_t *cur_mem_info_p, *prev_mem_info_p;
- tracked_obj_desc_t *obj_desc_p;
+ struct memtrack_meminfo_t *cur_mem_info_p, *prev_mem_info_p;
+ struct tracked_obj_desc_t *obj_desc_p;
unsigned long flags;
if (memtype >= MEMTRACK_NUM_OF_MEMTYPES) {
- printk("%s: Invalid memory type (%d)\n", __func__, memtype);
+ printk(KERN_ERR "%s: Invalid memory type (%d)\n", __func__, memtype);
return;
}
@@ -282,13 +378,27 @@ void memtrack_free(memtrack_memtype_t memtype, unsigned long addr,
prev_mem_info_p = NULL;
cur_mem_info_p = obj_desc_p->mem_hash[hash_val];
while (cur_mem_info_p != NULL) {
- if (cur_mem_info_p->addr == addr) {
- /* Found given address in the database - remove from the bucket/list */
- if (prev_mem_info_p == NULL) {
+ if ((cur_mem_info_p->addr == addr) && (cur_mem_info_p->dev == dev)) {
+ /* Found given address in the database */
+ if ((memtype == MEMTRACK_DMA_MAP_SINGLE) || (memtype == MEMTRACK_DMA_MAP_PAGE) ||
+ (memtype == MEMTRACK_DMA_MAP_SG)) {
+ if (direction != cur_mem_info_p->direction)
+ printk(KERN_ERR "mtl rsc inconsistency: %s: %s::%lu: %s bad direction for addr 0x%lX: alloc:0x%x, free:0x%x (allocated in %s::%lu)\n",
+ __func__, filename, line_num, memtype_free_str(memtype), addr, cur_mem_info_p->direction, direction,
+ cur_mem_info_p->filename, cur_mem_info_p->line_num);
+
+ if (size != cur_mem_info_p->size)
+ printk(KERN_ERR "mtl rsc inconsistency: %s: %s::%lu: %s bad size for addr 0x%lX: size:%lu, free:%lu (allocated in %s::%lu)\n",
+ __func__, filename, line_num, memtype_free_str(memtype), addr, cur_mem_info_p->size, size,
+ cur_mem_info_p->filename, cur_mem_info_p->line_num);
+ }
+
+ /* Remove from the bucket/list */
+ if (prev_mem_info_p == NULL)
obj_desc_p->mem_hash[hash_val] = cur_mem_info_p->next; /* removing first */
- } else {
+ else
prev_mem_info_p->next = cur_mem_info_p->next; /* "crossover" */
- }
+
list_del(&cur_mem_info_p->list);
obj_desc_p->count -= cur_mem_info_p->size;
@@ -301,64 +411,317 @@ void memtrack_free(memtrack_memtype_t memtype, unsigned long addr,
}
/* not found */
- printk
- ("mtl rsc inconsistency: %s: %s::%lu: %s for unknown address=0x%lX\n",
- __func__, filename, line_num, memtype_free_str(memtype), addr);
+ printk(KERN_ERR "mtl rsc inconsistency: %s: %s::%lu: %s for unknown address=0x%lX, device=0x%lX\n",
+ __func__, filename, line_num, memtype_free_str(memtype), addr, dev);
memtrack_spin_unlock(&obj_desc_p->hash_lock, flags);
return;
}
+EXPORT_SYMBOL(memtrack_free);
+
+/*
+ * This function recognizes allocations which
+ * may be released by kernel (e.g. skb) and
+ * therefore not trackable by memtrack.
+ * The allocations are recognized by the name
+ * of their calling function.
+ */
+int is_non_trackable_alloc_func(const char *func_name)
+{
+ static const char * const str_str_arr[] = {
+ /* functions containing these strings consider non trackable */
+ "skb",
+ };
+ static const char * const str_str_excep_arr[] = {
+ /* functions which are exception to the str_str_arr table */
+ "ipoib_cm_skb_too_long"
+ };
+ static const char * const str_cmp_arr[] = {
+ /* functions that allocate SKBs */
+ "mlx4_en_alloc_frags",
+ "mlx4_en_alloc_frag",
+ "mlx4_en_init_allocator",
+ "mlx4_en_free_frag",
+ "mlx4_en_free_rx_desc",
+ "mlx4_en_destroy_allocator",
+ "mlx4_en_complete_rx_desc",
+ /* vnic skb functions */
+ "free_single_frag",
+ "vnic_alloc_rx_skb",
+ "vnic_rx_skb",
+ "vnic_alloc_frag",
+ "vnic_empty_rx_entry",
+ "vnic_init_allocator",
+ "vnic_destroy_allocator",
+ "sdp_post_recv",
+ "sdp_rx_ring_purge",
+ "sdp_post_srcavail",
+ "sk_stream_alloc_page",
+ "update_send_head",
+ "sdp_bcopy_get",
+ "sdp_destroy_resources",
+
+ /* function that allocate memory for RDMA device context */
+ "ib_alloc_device"
+ };
+ size_t str_str_arr_size = sizeof(str_str_arr)/sizeof(char *);
+ size_t str_str_excep_size = sizeof(str_str_excep_arr)/sizeof(char *);
+ size_t str_cmp_arr_size = sizeof(str_cmp_arr)/sizeof(char *);
+
+ int i, j;
+
+ for (i = 0; i < str_str_arr_size; ++i)
+ if (strstr(func_name, str_str_arr[i])) {
+ for (j = 0; j < str_str_excep_size; ++j)
+ if (!strcmp(func_name, str_str_excep_arr[j]))
+ return 0;
+ return 1;
+ }
+ for (i = 0; i < str_cmp_arr_size; ++i)
+ if (!strcmp(func_name, str_cmp_arr[i]))
+ return 1;
+ return 0;
+}
+EXPORT_SYMBOL(is_non_trackable_alloc_func);
+
+/*
+ * In some cases we need to free a memory
+ * we defined as "non trackable" (see
+ * is_non_trackable_alloc_func).
+ * This function recognizes such releases
+ * by the name of their calling function.
+ */
+int is_non_trackable_free_func(const char *func_name)
+{
+
+ static const char * const str_cmp_arr[] = {
+ /* function that deallocate memory for RDMA device context */
+ "ib_dealloc_device"
+ };
+ size_t str_cmp_arr_size = sizeof(str_cmp_arr)/sizeof(char *);
+
+ int i;
+
+ for (i = 0; i < str_cmp_arr_size; ++i)
+ if (!strcmp(func_name, str_cmp_arr[i]))
+ return 1;
+ return 0;
+}
+EXPORT_SYMBOL(is_non_trackable_free_func);
+
+
+/* WA - In this function handles confirm
+ the the function name is
+ '__ib_umem_release' or 'ib_umem_get'
+ In this case we won't track the
+ memory there because the kernel
+ was the one who allocated it.
+ Return value:
+ 1 - if the function name is match, else 0 */
+int is_umem_put_page(const char *func_name)
+{
+ const char func_str[18] = "__ib_umem_release";
+ /* In case of error flow put_page is called as part of ib_umem_get */
+ const char func_str1[12] = "ib_umem_get";
+
+ return ((strstr(func_name, func_str) != NULL) ||
+ (strstr(func_name, func_str1) != NULL)) ? 1 : 0;
+}
+EXPORT_SYMBOL(is_umem_put_page);
+
+/* Check page order size
+ When Freeing a page allocation it checks whether
+ we are trying to free the same size
+ we asked to allocate */
+int memtrack_check_size(enum memtrack_memtype_t memtype, unsigned long addr,
+ unsigned long size, const char *filename,
+ const unsigned long line_num)
+{
+ unsigned long hash_val;
+ struct memtrack_meminfo_t *cur_mem_info_p;
+ struct tracked_obj_desc_t *obj_desc_p;
+ unsigned long flags;
+ int ret = 0;
+
+ if (memtype >= MEMTRACK_NUM_OF_MEMTYPES) {
+ printk(KERN_ERR "%s: Invalid memory type (%d)\n", __func__, memtype);
+ return 1;
+ }
+
+ if (!tracked_objs_arr[memtype]) {
+ /* object is not tracked */
+ return 1;
+ }
+ obj_desc_p = tracked_objs_arr[memtype];
+
+ hash_val = addr % MEMTRACK_HASH_SZ;
+
+ memtrack_spin_lock(&obj_desc_p->hash_lock, flags);
+ /* find mem_info of given memory location */
+ cur_mem_info_p = obj_desc_p->mem_hash[hash_val];
+ while (cur_mem_info_p != NULL) {
+ if (cur_mem_info_p->addr == addr) {
+ /* Found given address in the database - check size */
+ if (cur_mem_info_p->size != size) {
+ printk(KERN_ERR "mtl size inconsistency: %s: %s::%lu: try to %s at address=0x%lX with size %lu while was created with size %lu\n",
+ __func__, filename, line_num, memtype_free_str(memtype),
+ addr, size, cur_mem_info_p->size);
+ snprintf(cur_mem_info_p->ext_info, sizeof(cur_mem_info_p->ext_info),
+ "invalid free size %lu\n", size);
+ ret = 1;
+ }
+ memtrack_spin_unlock(&obj_desc_p->hash_lock, flags);
+ return ret;
+ }
+ cur_mem_info_p = cur_mem_info_p->next;
+ }
+
+ /* not found - This function will not give any indication
+ but will only check the correct size\order
+ For inconsistency the 'free' function will check that */
+ memtrack_spin_unlock(&obj_desc_p->hash_lock, flags);
+ return 1;
+}
+EXPORT_SYMBOL(memtrack_check_size);
+
+/* Search for a specific addr whether it exist in the
+ current data-base.
+ It will print an error msg if we get an unexpected result,
+ Return value: 0 - if addr exist, else 1 */
+int memtrack_is_new_addr(enum memtrack_memtype_t memtype, unsigned long addr, int expect_exist,
+ const char *filename, const unsigned long line_num)
+{
+ unsigned long hash_val;
+ struct memtrack_meminfo_t *cur_mem_info_p;
+ struct tracked_obj_desc_t *obj_desc_p;
+ unsigned long flags;
+
+ if (memtype >= MEMTRACK_NUM_OF_MEMTYPES) {
+ printk(KERN_ERR "%s: Invalid memory type (%d)\n", __func__, memtype);
+ return 1;
+ }
+
+ if (!tracked_objs_arr[memtype]) {
+ /* object is not tracked */
+ return 0;
+ }
+ obj_desc_p = tracked_objs_arr[memtype];
+
+ hash_val = addr % MEMTRACK_HASH_SZ;
+
+ memtrack_spin_lock(&obj_desc_p->hash_lock, flags);
+ /* find mem_info of given memory location */
+ cur_mem_info_p = obj_desc_p->mem_hash[hash_val];
+ while (cur_mem_info_p != NULL) {
+ if (cur_mem_info_p->addr == addr) {
+ /* Found given address in the database - exiting */
+ memtrack_spin_unlock(&obj_desc_p->hash_lock, flags);
+ return 0;
+ }
+ cur_mem_info_p = cur_mem_info_p->next;
+ }
+
+ /* not found */
+ if (expect_exist)
+ printk(KERN_ERR "mtl rsc inconsistency: %s: %s::%lu: %s for unknown address=0x%lX\n",
+ __func__, filename, line_num, memtype_free_str(memtype), addr);
+
+ memtrack_spin_unlock(&obj_desc_p->hash_lock, flags);
+ return 1;
+}
+EXPORT_SYMBOL(memtrack_is_new_addr);
+
+/* Return current page reference counter */
+int memtrack_get_page_ref_count(unsigned long addr)
+{
+ unsigned long hash_val;
+ struct memtrack_meminfo_t *cur_mem_info_p;
+ struct tracked_obj_desc_t *obj_desc_p;
+ unsigned long flags;
+ /* This function is called only for page allocation */
+ enum memtrack_memtype_t memtype = MEMTRACK_PAGE_ALLOC;
+ int ref_conut = 0;
+
+ if (!tracked_objs_arr[memtype]) {
+ /* object is not tracked */
+ return ref_conut;
+ }
+ obj_desc_p = tracked_objs_arr[memtype];
+
+ hash_val = addr % MEMTRACK_HASH_SZ;
+
+ memtrack_spin_lock(&obj_desc_p->hash_lock, flags);
+ /* find mem_info of given memory location */
+ cur_mem_info_p = obj_desc_p->mem_hash[hash_val];
+ while (cur_mem_info_p != NULL) {
+ if (cur_mem_info_p->addr == addr) {
+ /* Found given address in the database - check ref-count */
+ struct page *page = (struct page *)(cur_mem_info_p->addr);
+ ref_conut = atomic_read(&page->_count);
+ memtrack_spin_unlock(&obj_desc_p->hash_lock, flags);
+ return ref_conut;
+ }
+ cur_mem_info_p = cur_mem_info_p->next;
+ }
+
+ /* not found */
+ memtrack_spin_unlock(&obj_desc_p->hash_lock, flags);
+ return ref_conut;
+}
+EXPORT_SYMBOL(memtrack_get_page_ref_count);
/* Report current allocations status (for all memory types) */
static void memtrack_report(void)
{
- memtrack_memtype_t memtype;
+ enum memtrack_memtype_t memtype;
unsigned long cur_bucket;
- memtrack_meminfo_t *cur_mem_info_p;
+ struct memtrack_meminfo_t *cur_mem_info_p;
int serial = 1;
- tracked_obj_desc_t *obj_desc_p;
+ struct tracked_obj_desc_t *obj_desc_p;
unsigned long flags;
+ unsigned long detected_leaks = 0;
- printk("%s: Currently known allocations:\n", __func__);
+ printk(KERN_INFO "%s: Currently known allocations:\n", __func__);
for (memtype = 0; memtype < MEMTRACK_NUM_OF_MEMTYPES; memtype++) {
if (tracked_objs_arr[memtype]) {
- printk("%d) %s:\n", serial, memtype_alloc_str(memtype));
+ printk(KERN_INFO "%d) %s:\n", serial, memtype_alloc_str(memtype));
obj_desc_p = tracked_objs_arr[memtype];
/* Scan all buckets to find existing allocations */
/* TBD: this may be optimized by holding a linked list of all hash items */
- for (cur_bucket = 0; cur_bucket < MEMTRACK_HASH_SZ;
- cur_bucket++) {
+ for (cur_bucket = 0; cur_bucket < MEMTRACK_HASH_SZ; cur_bucket++) {
memtrack_spin_lock(&obj_desc_p->hash_lock, flags); /* protect per bucket/list */
- cur_mem_info_p =
- obj_desc_p->mem_hash[cur_bucket];
+ cur_mem_info_p = obj_desc_p->mem_hash[cur_bucket];
while (cur_mem_info_p != NULL) { /* scan bucket */
- printk("%s::%lu: %s(%lu)==%lX\n",
+ printk(KERN_INFO "%s::%lu: %s(%lu)==%lX dev=%lX %s\n",
cur_mem_info_p->filename,
cur_mem_info_p->line_num,
memtype_alloc_str(memtype),
cur_mem_info_p->size,
- cur_mem_info_p->addr);
+ cur_mem_info_p->addr,
+ cur_mem_info_p->dev,
+ cur_mem_info_p->ext_info);
cur_mem_info_p = cur_mem_info_p->next;
+ ++ detected_leaks;
} /* while cur_mem_info_p */
memtrack_spin_unlock(&obj_desc_p->hash_lock, flags);
} /* for cur_bucket */
serial++;
}
} /* for memtype */
+ printk(KERN_INFO "%s: Summary: %lu leak(s) detected\n", __func__, detected_leaks);
}
static struct proc_dir_entry *memtrack_tree;
-static memtrack_memtype_t get_rsc_by_name(const char *name)
+static enum memtrack_memtype_t get_rsc_by_name(const char *name)
{
- memtrack_memtype_t i;
+ enum memtrack_memtype_t i;
- for (i=0; i<MEMTRACK_NUM_OF_MEMTYPES; ++i) {
- if (strcmp(name, rsc_names[i]) == 0) {
+ for (i = 0; i < MEMTRACK_NUM_OF_MEMTYPES; ++i) {
+ if (strcmp(name, rsc_names[i]) == 0)
return i;
}
- }
return i;
}
@@ -375,44 +738,41 @@ static ssize_t memtrack_read(struct file *filp,
static int file_len;
int _read, to_ret, left;
const char *fname;
- memtrack_memtype_t memtype;
+ enum memtrack_memtype_t memtype;
if (pos < 0)
return -EINVAL;
- fname= filp->f_dentry->d_name.name;
+ fname = filp->f_dentry->d_name.name;
- memtype= get_rsc_by_name(fname);
+ memtype = get_rsc_by_name(fname);
if (memtype >= MEMTRACK_NUM_OF_MEMTYPES) {
- printk("invalid file name\n");
+ printk(KERN_ERR "invalid file name\n");
return -EINVAL;
}
- if ( pos == 0 ) {
+ if (pos == 0) {
memtrack_spin_lock(&tracked_objs_arr[memtype]->hash_lock, flags);
- cur= tracked_objs_arr[memtype]->count;
+ cur = tracked_objs_arr[memtype]->count;
memtrack_spin_unlock(&tracked_objs_arr[memtype]->hash_lock, flags);
_read = sprintf(kbuf, "%lu\n", cur);
- if ( _read < 0 ) {
+ if (_read < 0)
return _read;
- }
- else {
+ else
file_len = _read;
}
- }
left = file_len - pos;
to_ret = (left < size) ? left : size;
- if ( copy_to_user(buf, kbuf+pos, to_ret) ) {
+ if (copy_to_user(buf, kbuf+pos, to_ret))
return -EFAULT;
- }
else {
*offset = pos + to_ret;
return to_ret;
}
}
-static struct file_operations memtrack_proc_fops = {
+static const struct file_operations memtrack_proc_fops = {
.read = memtrack_read,
};
@@ -426,30 +786,28 @@ static int create_procfs_tree(void)
unsigned long bit_mask;
dir_ent = proc_mkdir(memtrack_proc_entry_name, NULL);
- if ( !dir_ent ) {
+ if (!dir_ent)
return -1;
- }
memtrack_tree = dir_ent;
- for (i=0, bit_mask=1; i<MEMTRACK_NUM_OF_MEMTYPES; ++i, bit_mask<<=1) {
+ for (i = 0, bit_mask = 1; i < MEMTRACK_NUM_OF_MEMTYPES; ++i, bit_mask <<= 1) {
if (bit_mask & track_mask) {
proc_ent = create_proc_entry(rsc_names[i], S_IRUGO, memtrack_tree);
- if ( !proc_ent )
+ if (!proc_ent)
goto undo_create_root;
- proc_ent->proc_fops = &memtrack_proc_fops;
+ proc_ent->proc_fops = &memtrack_proc_fops;
}
}
goto exit_ok;
undo_create_root:
- for (j=0, bit_mask=1; j<i; ++j, bit_mask<<=1) {
- if (bit_mask & track_mask) {
+ for (j = 0, bit_mask = 1; j < i; ++j, bit_mask <<= 1) {
+ if (bit_mask & track_mask)
remove_proc_entry(rsc_names[j], memtrack_tree);
}
- }
remove_proc_entry(memtrack_proc_entry_name, NULL);
return -1;
@@ -463,30 +821,48 @@ static void destroy_procfs_tree(void)
int i;
unsigned long bit_mask;
- for (i=0, bit_mask=1; i<MEMTRACK_NUM_OF_MEMTYPES; ++i, bit_mask<<=1) {
- if (bit_mask & track_mask) {
+ for (i = 0, bit_mask = 1; i < MEMTRACK_NUM_OF_MEMTYPES; ++i, bit_mask <<= 1) {
+ if (bit_mask & track_mask)
remove_proc_entry(rsc_names[i], memtrack_tree);
- }
+
}
remove_proc_entry(memtrack_proc_entry_name, NULL);
}
+int memtrack_inject_error(void)
+{
+ int val = 0;
+
+ if (inject_freq) {
+ if (!(random32() % inject_freq))
+ val = 1;
+ }
+
+ return val;
+}
+EXPORT_SYMBOL(memtrack_inject_error);
+
+int memtrack_randomize_mem(void)
+{
+ return random_mem;
+}
+EXPORT_SYMBOL(memtrack_randomize_mem);
/* module entry points */
int init_module(void)
{
- memtrack_memtype_t i;
+ enum memtrack_memtype_t i;
int j;
unsigned long bit_mask;
/* create a cache for the memtrack_meminfo_t strcutures */
meminfo_cache = kmem_cache_create("memtrack_meminfo_t",
- sizeof(memtrack_meminfo_t), 0,
+ sizeof(struct memtrack_meminfo_t), 0,
SLAB_HWCACHE_ALIGN, NULL);
if (!meminfo_cache) {
- printk("memtrack::%s: failed to allocate meminfo cache\n", __func__);
+ printk(KERN_ERR "memtrack::%s: failed to allocate meminfo cache\n", __func__);
return -1;
}
@@ -494,49 +870,43 @@ int init_module(void)
memset(tracked_objs_arr, 0, sizeof(tracked_objs_arr));
/* create a tracking object descriptor for all required objects */
- for (i = 0, bit_mask = 1; i < MEMTRACK_NUM_OF_MEMTYPES;
- ++i, bit_mask <<= 1) {
+ for (i = 0, bit_mask = 1; i < MEMTRACK_NUM_OF_MEMTYPES; ++i, bit_mask <<= 1) {
if (bit_mask & track_mask) {
- tracked_objs_arr[i] =
- vmalloc(sizeof(tracked_obj_desc_t));
+ tracked_objs_arr[i] = vmalloc(sizeof(struct tracked_obj_desc_t));
if (!tracked_objs_arr[i]) {
- printk("memtrack: failed to allocate tracking object\n");
+ printk(KERN_ERR "memtrack: failed to allocate tracking object\n");
goto undo_cache_create;
}
- memset(tracked_objs_arr[i], 0, sizeof(tracked_obj_desc_t));
+ memset(tracked_objs_arr[i], 0, sizeof(struct tracked_obj_desc_t));
spin_lock_init(&tracked_objs_arr[i]->hash_lock);
INIT_LIST_HEAD(&tracked_objs_arr[i]->tracked_objs_head);
- if (bit_mask & strict_track_mask) {
+ if (bit_mask & strict_track_mask)
tracked_objs_arr[i]->strict_track = 1;
- } else {
+ else
tracked_objs_arr[i]->strict_track = 0;
}
}
- }
- if ( create_procfs_tree() ) {
- printk("%s: create_procfs_tree() failed\n", __FILE__);
+ if (create_procfs_tree()) {
+ printk(KERN_ERR "%s: create_procfs_tree() failed\n", __FILE__);
goto undo_cache_create;
}
-
- printk("memtrack::%s done.\n", __func__);
+ printk(KERN_INFO "memtrack::%s done.\n", __func__);
return 0;
undo_cache_create:
- for (j=0; j<i; ++j) {
- if (tracked_objs_arr[j]) {
+ for (j = 0; j < i; ++j) {
+ if (tracked_objs_arr[j])
vfree(tracked_objs_arr[j]);
}
- }
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
- if (kmem_cache_destroy(meminfo_cache) != 0) {
- printk("Failed on kmem_cache_destroy !\n");
- }
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19)
+ if (kmem_cache_destroy(meminfo_cache) != 0)
+ printk(KERN_ERR "Failed on kmem_cache_destroy!\n");
#else
kmem_cache_destroy(meminfo_cache);
#endif
@@ -546,10 +916,10 @@ undo_cache_create:
void cleanup_module(void)
{
- memtrack_memtype_t memtype;
+ enum memtrack_memtype_t memtype;
unsigned long cur_bucket;
- memtrack_meminfo_t *cur_mem_info_p, *next_mem_info_p;
- tracked_obj_desc_t *obj_desc_p;
+ struct memtrack_meminfo_t *cur_mem_info_p, *next_mem_info_p;
+ struct tracked_obj_desc_t *obj_desc_p;
unsigned long flags;
@@ -564,15 +934,12 @@ void cleanup_module(void)
/* TBD: this may be optimized by holding a linked list of all hash items */
if (tracked_objs_arr[memtype]) {
obj_desc_p = tracked_objs_arr[memtype];
- for (cur_bucket = 0; cur_bucket < MEMTRACK_HASH_SZ;
- cur_bucket++) {
+ for (cur_bucket = 0; cur_bucket < MEMTRACK_HASH_SZ; cur_bucket++) {
memtrack_spin_lock(&obj_desc_p->hash_lock, flags); /* protect per bucket/list */
- cur_mem_info_p =
- obj_desc_p->mem_hash[cur_bucket];
+ cur_mem_info_p = obj_desc_p->mem_hash[cur_bucket];
while (cur_mem_info_p != NULL) { /* scan bucket */
next_mem_info_p = cur_mem_info_p->next; /* save "next" pointer before the "free" */
- kmem_cache_free(meminfo_cache,
- cur_mem_info_p);
+ kmem_cache_free(meminfo_cache, cur_mem_info_p);
cur_mem_info_p = next_mem_info_p;
} /* while cur_mem_info_p */
memtrack_spin_unlock(&obj_desc_p->hash_lock, flags);
@@ -581,20 +948,11 @@ void cleanup_module(void)
}
} /* for memtype */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
- if (kmem_cache_destroy(meminfo_cache) != 0) {
- printk
- ("memtrack::cleanup_module: Failed on kmem_cache_destroy !\n");
- }
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19)
+ if (kmem_cache_destroy(meminfo_cache) != 0)
+ printk(KERN_ERR "memtrack::cleanup_module: Failed on kmem_cache_destroy!\n");
#else
kmem_cache_destroy(meminfo_cache);
#endif
- printk("memtrack::cleanup_module done.\n");
+ printk(KERN_INFO "memtrack::cleanup_module done.\n");
}
-
-EXPORT_SYMBOL(memtrack_alloc);
-EXPORT_SYMBOL(memtrack_free);
-
-//module_init(memtrack_init)
-//module_exit(memtrack_exit)
-
diff --git a/sys/ofed/drivers/infiniband/debug/memtrack.h b/sys/ofed/drivers/infiniband/debug/memtrack.h
index e443a31..76265ae 100644
--- a/sys/ofed/drivers/infiniband/debug/memtrack.h
+++ b/sys/ofed/drivers/infiniband/debug/memtrack.h
@@ -22,24 +22,85 @@
#ifndef H_MEMTRACK_H
#define H_MEMTRACK_H
-typedef enum {
+enum memtrack_memtype_t {
MEMTRACK_KMALLOC,
MEMTRACK_VMALLOC,
MEMTRACK_KMEM_OBJ,
+ MEMTRACK_IOREMAP, /* IO-RE/UN-MAP */
+ MEMTRACK_WORK_QUEUE, /* Handle work-queue create & destroy */
+ MEMTRACK_PAGE_ALLOC, /* Handle page allocation and free */
+ MEMTRACK_DMA_MAP_SINGLE,/* Handle ib_dma_single map and unmap */
+ MEMTRACK_DMA_MAP_PAGE, /* Handle ib_dma_page map and unmap */
+ MEMTRACK_DMA_MAP_SG, /* Handle ib_dma_sg map and unmap with and without attributes */
MEMTRACK_NUM_OF_MEMTYPES
-} memtrack_memtype_t;
+};
/* Invoke on memory allocation */
-void memtrack_alloc(memtrack_memtype_t memtype, unsigned long addr,
- unsigned long size, const char *filename,
+void memtrack_alloc(enum memtrack_memtype_t memtype, unsigned long dev,
+ unsigned long addr, unsigned long size, unsigned long addr2,
+ int direction, const char *filename,
const unsigned long line_num, int alloc_flags);
/* Invoke on memory free */
-void memtrack_free(memtrack_memtype_t memtype, unsigned long addr,
+void memtrack_free(enum memtrack_memtype_t memtype, unsigned long dev,
+ unsigned long addr, unsigned long size, int direction,
const char *filename, const unsigned long line_num);
+/*
+ * This function recognizes allocations which
+ * may be released by kernel (e.g. skb & vnic) and
+ * therefore not trackable by memtrack.
+ * The allocations are recognized by the name
+ * of their calling function.
+ */
+int is_non_trackable_alloc_func(const char *func_name);
+/*
+ * In some cases we need to free a memory
+ * we defined as "non trackable" (see
+ * is_non_trackable_alloc_func).
+ * This function recognizes such releases
+ * by the name of their calling function.
+ */
+int is_non_trackable_free_func(const char *func_name);
+
+/* WA - In this function handles confirm
+ the the function name is
+ '__ib_umem_release' or 'ib_umem_get'
+ In this case we won't track the
+ memory there because the kernel
+ was the one who allocated it.
+ Return value:
+ 1 - if the function name is match, else 0 */
+int is_umem_put_page(const char *func_name);
+
+/* Check page order size
+ When Freeing a page allocation it checks whether
+ we are trying to free the same amount of pages
+ we ask to allocate (In log2(order)).
+ In case an error if found it will print
+ an error msg */
+int memtrack_check_size(enum memtrack_memtype_t memtype, unsigned long addr,
+ unsigned long size, const char *filename,
+ const unsigned long line_num);
+
+/* Search for a specific addr whether it exist in the
+ current data-base.
+ If not it will print an error msg,
+ Return value: 0 - if addr exist, else 1 */
+int memtrack_is_new_addr(enum memtrack_memtype_t memtype, unsigned long addr, int expect_exist,
+ const char *filename, const unsigned long line_num);
+
+/* Return current page reference counter */
+int memtrack_get_page_ref_count(unsigned long addr);
+
/* Report current allocations status (for all memory types) */
/* we do not export this function since it is used by cleanup_module only */
/* void memtrack_report(void); */
+/* Allow support of error injections */
+int memtrack_inject_error(void);
+
+/* randomize allocated memory */
+int memtrack_randomize_mem(void);
+
#endif
diff --git a/sys/ofed/drivers/infiniband/debug/mtrack.h b/sys/ofed/drivers/infiniband/debug/mtrack.h
index 337d9c3..5c0cd20 100644
--- a/sys/ofed/drivers/infiniband/debug/mtrack.h
+++ b/sys/ofed/drivers/infiniband/debug/mtrack.h
@@ -1,46 +1,84 @@
#ifndef __mtrack_h_
#define __mtrack_h_
-#include <memtrack.h>
+#include "memtrack.h"
#include <linux/slab.h>
#include <linux/vmalloc.h>
-#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/io.h> /* For ioremap_nocache, ioremap, iounmap */
+#include <linux/random.h>
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 27)
+# include <linux/io-mapping.h> /* For ioremap_nocache, ioremap, iounmap */
+#endif
+#include <linux/mm.h> /* For all page handling */
+#include <linux/workqueue.h> /* For all work-queue handling */
+#include <linux/scatterlist.h> /* For using scatterlists */
+#include <linux/skbuff.h> /* For skbufs handling */
+#include <asm/uaccess.h> /* For copy from/to user */
+
+#define MEMTRACK_ERROR_INJECTION_MESSAGE(file, line, func) ({ \
+ printk(KERN_ERR "%s failure injected at %s:%d\n", func, file, line); \
+ dump_stack(); \
+})
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 14)
#define RDMA_KZALLOC_H
#define kzalloc(size, flags) ({ \
- void *__memtrack_kz_addr; \
+ void *__memtrack_kz_addr = NULL; \
\
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kzalloc");\
+ else \
__memtrack_kz_addr = kmalloc(size, flags); \
- if ( __memtrack_kz_addr ) { \
- memset( __memtrack_kz_addr, 0, size) ; \
+ if (__memtrack_kz_addr && !is_non_trackable_alloc_func(__func__)) { \
+ memset(__memtrack_kz_addr, 0, size); \
} \
__memtrack_kz_addr; \
})
#else
#define kzalloc(size, flags) ({ \
- void *__memtrack_addr; \
+ void *__memtrack_addr = NULL; \
\
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kzalloc");\
+ else \
__memtrack_addr = kzalloc(size, flags); \
- if ( __memtrack_addr && (size)) { \
- memtrack_alloc(MEMTRACK_KMALLOC, (unsigned long)(__memtrack_addr), size, __FILE__, __LINE__, flags); \
+ if (__memtrack_addr && !is_non_trackable_alloc_func(__func__)) { \
+ memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, flags); \
} \
__memtrack_addr; \
})
#endif
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
+#define kzalloc_node(size, flags, node) ({ \
+ void *__memtrack_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kzalloc_node"); \
+ else \
+ __memtrack_addr = kzalloc_node(size, flags, node); \
+ if (__memtrack_addr && (size) && \
+ !is_non_trackable_alloc_func(__func__)) { \
+ memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, flags); \
+ } \
+ __memtrack_addr; \
+})
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19)
#define kcalloc(n, size, flags) kzalloc((n)*(size), flags)
#else
#define kcalloc(n, size, flags) ({ \
- void *__memtrack_addr; \
+ void *__memtrack_addr = NULL; \
\
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kcalloc");\
+ else \
__memtrack_addr = kcalloc(n, size, flags); \
- if ( __memtrack_addr && (size)) { \
- memtrack_alloc(MEMTRACK_KMALLOC, (unsigned long)(__memtrack_addr), (n)*(size), __FILE__, __LINE__, flags); \
+ if (__memtrack_addr && (size)) { \
+ memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), (n)*(size), 0UL, 0, __FILE__, __LINE__, flags); \
} \
__memtrack_addr; \
})
@@ -50,76 +88,208 @@
#ifdef ZERO_OR_NULL_PTR
#define kmalloc(sz, flgs) ({ \
- void *__memtrack_addr; \
+ void *__memtrack_addr = NULL; \
\
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kmalloc");\
+ else \
__memtrack_addr = kmalloc(sz, flgs); \
- if ( !ZERO_OR_NULL_PTR(__memtrack_addr)) { \
- memtrack_alloc(MEMTRACK_KMALLOC, (unsigned long)(__memtrack_addr), sz, __FILE__, __LINE__, flgs); \
+ if (!ZERO_OR_NULL_PTR(__memtrack_addr)) { \
+ memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), sz, 0UL, 0, __FILE__, __LINE__, flgs); \
+ if (memtrack_randomize_mem()) \
+ get_random_bytes(__memtrack_addr, sz); \
} \
__memtrack_addr; \
})
#else
#define kmalloc(sz, flgs) ({ \
- void *__memtrack_addr; \
+ void *__memtrack_addr = NULL; \
\
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kmalloc");\
+ else \
__memtrack_addr = kmalloc(sz, flgs); \
- if ( __memtrack_addr ) { \
- memtrack_alloc(MEMTRACK_KMALLOC, (unsigned long)(__memtrack_addr), sz, __FILE__, __LINE__, flgs); \
+ if (__memtrack_addr) { \
+ memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), sz, 0UL, 0, __FILE__, __LINE__, flgs); \
+ if (memtrack_randomize_mem()) \
+ get_random_bytes(__memtrack_addr, sz); \
} \
__memtrack_addr; \
})
#endif
+#define kmalloc_node(sz, flgs, node) ({ \
+ void *__memtrack_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kmalloc_node"); \
+ else \
+ __memtrack_addr = kmalloc_node(sz, flgs, node); \
+ if (__memtrack_addr) { \
+ memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), sz, 0UL, 0, __FILE__, __LINE__, flgs); \
+ if (memtrack_randomize_mem() && ((flgs) == GFP_KERNEL)) \
+ get_random_bytes(__memtrack_addr, sz); \
+ } \
+ __memtrack_addr; \
+})
+
+#ifdef ZERO_OR_NULL_PTR
+#define kmemdup(src, sz, flgs) ({ \
+ void *__memtrack_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kmemdup");\
+ else \
+ __memtrack_addr = kmemdup(src, sz, flgs); \
+ if (!ZERO_OR_NULL_PTR(__memtrack_addr)) { \
+ memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), sz, 0UL, 0, __FILE__, __LINE__, flgs); \
+ } \
+ __memtrack_addr; \
+})
+#else
+#define kmemdup(src, sz, flgs) ({ \
+ void *__memtrack_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kmemdup");\
+ else \
+ __memtrack_addr = kmemdup(src, sz, flgs); \
+ if (__memtrack_addr) { \
+ memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), sz, 0UL, 0, __FILE__, __LINE__, flgs); \
+ } \
+ __memtrack_addr; \
+})
+#endif
+
#ifdef ZERO_OR_NULL_PTR
#define kfree(addr) ({ \
void *__memtrack_addr = (void *)addr; \
- if ( !ZERO_OR_NULL_PTR(__memtrack_addr) ) { \
- memtrack_free(MEMTRACK_KMALLOC, (unsigned long)(__memtrack_addr), __FILE__, __LINE__); \
+ \
+ if (!ZERO_OR_NULL_PTR(__memtrack_addr) && \
+ !is_non_trackable_free_func(__func__)) { \
+ memtrack_free(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
} \
kfree(__memtrack_addr); \
})
#else
#define kfree(addr) ({ \
void *__memtrack_addr = (void *)addr; \
- if ( __memtrack_addr ) { \
- memtrack_free(MEMTRACK_KMALLOC, (unsigned long)(__memtrack_addr), __FILE__, __LINE__); \
+ \
+ if (__memtrack_addr && !is_non_trackable_free_func(__func__)) { \
+ memtrack_free(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
} \
kfree(__memtrack_addr); \
})
#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0) || defined (CONFIG_COMPAT_RCU)
+#ifdef kfree_rcu
+ #undef kfree_rcu
+#endif
-
-
-
+#ifdef ZERO_OR_NULL_PTR
+#define kfree_rcu(addr, rcu_head) ({ \
+ void *__memtrack_addr = (void *)addr; \
+ \
+ if (!ZERO_OR_NULL_PTR(__memtrack_addr) && \
+ !is_non_trackable_free_func(__func__)) { \
+ memtrack_free(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
+ } \
+ __kfree_rcu(&((addr)->rcu_head), offsetof(typeof(*(addr)), rcu_head)); \
+})
+#else
+#define kfree_rcu(addr, rcu_head) ({ \
+ void *__memtrack_addr = (void *)addr; \
+ \
+ if (__memtrack_addr && !is_non_trackable_free_func(__func__)) { \
+ memtrack_free(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
+ } \
+ __kfree_rcu(&((addr)->rcu_head), offsetof(typeof(*(addr)), rcu_head)); \
+})
+#endif
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) */
#define vmalloc(size) ({ \
- void *__memtrack_addr; \
+ void *__memtrack_addr = NULL; \
\
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "vmalloc");\
+ else \
__memtrack_addr = vmalloc(size); \
- if ( __memtrack_addr ) { \
- memtrack_alloc(MEMTRACK_VMALLOC, (unsigned long)(__memtrack_addr), size, __FILE__, __LINE__, GFP_ATOMIC); \
+ if (__memtrack_addr) { \
+ memtrack_alloc(MEMTRACK_VMALLOC, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ if (memtrack_randomize_mem()) \
+ get_random_bytes(__memtrack_addr, size); \
+ } \
+ __memtrack_addr; \
+})
+
+#ifndef vzalloc
+#define vzalloc(size) ({ \
+ void *__memtrack_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "vzalloc");\
+ else \
+ __memtrack_addr = vzalloc(size); \
+ if (__memtrack_addr) { \
+ memtrack_alloc(MEMTRACK_VMALLOC, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
__memtrack_addr; \
})
+#endif
+
+#ifndef vzalloc_node
+#define vzalloc_node(size, node) ({ \
+ void *__memtrack_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "vzalloc_node"); \
+ else \
+ __memtrack_addr = vzalloc_node(size, node); \
+ if (__memtrack_addr) { \
+ memtrack_alloc(MEMTRACK_VMALLOC, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ if (memtrack_randomize_mem()) \
+ get_random_bytes(__memtrack_addr, size); \
+ } \
+ __memtrack_addr; \
+})
+#endif
+#define vmalloc_node(size, node) ({ \
+ void *__memtrack_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "vmalloc_node"); \
+ else \
+ __memtrack_addr = vmalloc_node(size, node); \
+ if (__memtrack_addr) { \
+ memtrack_alloc(MEMTRACK_VMALLOC, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ if (memtrack_randomize_mem()) \
+ get_random_bytes(__memtrack_addr, size); \
+ } \
+ __memtrack_addr; \
+})
#define vfree(addr) ({ \
void *__memtrack_addr = (void *)addr; \
- if ( __memtrack_addr ) { \
- memtrack_free(MEMTRACK_VMALLOC, (unsigned long)(__memtrack_addr), __FILE__, __LINE__); \
+ if (__memtrack_addr) { \
+ memtrack_free(MEMTRACK_VMALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
} \
vfree(__memtrack_addr); \
})
#define kmem_cache_alloc(cache, flags) ({ \
- void *__memtrack_addr; \
+ void *__memtrack_addr = NULL; \
\
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kmem_cache_alloc"); \
+ else \
__memtrack_addr = kmem_cache_alloc(cache, flags); \
- if ( __memtrack_addr ) { \
- memtrack_alloc(MEMTRACK_KMEM_OBJ, (unsigned long)(__memtrack_addr), 1, __FILE__, __LINE__, flags); \
+ if (__memtrack_addr) { \
+ memtrack_alloc(MEMTRACK_KMEM_OBJ, 0UL, (unsigned long)(__memtrack_addr), 1, 0UL, 0, __FILE__, __LINE__, flags); \
} \
__memtrack_addr; \
})
@@ -127,12 +297,548 @@
#define kmem_cache_free(cache, addr) ({ \
void *__memtrack_addr = (void *)addr; \
- if ( __memtrack_addr ) { \
- memtrack_free(MEMTRACK_KMEM_OBJ, (unsigned long)(__memtrack_addr), __FILE__, __LINE__); \
+ \
+ if (__memtrack_addr) { \
+ memtrack_free(MEMTRACK_KMEM_OBJ, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
} \
kmem_cache_free(cache, __memtrack_addr); \
})
+/* All IO-MAP handling */
+#define ioremap(phys_addr, size) ({ \
+ void __iomem *__memtrack_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "ioremap");\
+ else \
+ __memtrack_addr = ioremap(phys_addr, size); \
+ if (__memtrack_addr) { \
+ memtrack_alloc(MEMTRACK_IOREMAP, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ __memtrack_addr; \
+})
+
+#define io_mapping_create_wc(base, size) ({ \
+ void __iomem *__memtrack_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "io_mapping_create_wc"); \
+ else \
+ __memtrack_addr = io_mapping_create_wc(base, size); \
+ if (__memtrack_addr) { \
+ memtrack_alloc(MEMTRACK_IOREMAP, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ __memtrack_addr; \
+})
+
+#define io_mapping_free(addr) ({ \
+ void *__memtrack_addr = (void *)addr; \
+ \
+ if (__memtrack_addr) { \
+ memtrack_free(MEMTRACK_IOREMAP, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
+ } \
+ io_mapping_free(__memtrack_addr); \
+})
+
+#ifdef CONFIG_PPC
+#ifdef ioremap_nocache
+ #undef ioremap_nocache
+#endif
+#define ioremap_nocache(phys_addr, size) ({ \
+ void __iomem *__memtrack_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "ioremap_nocache"); \
+ else \
+ __memtrack_addr = ioremap(phys_addr, size); \
+ if (__memtrack_addr) { \
+ memtrack_alloc(MEMTRACK_IOREMAP, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ __memtrack_addr; \
+})
+#else
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 18) /* 2.6.16 - 2.6.17 */
+#ifdef ioremap_nocache
+ #undef ioremap_nocache
+#endif
+#define ioremap_nocache(phys_addr, size) ({ \
+ void __iomem *__memtrack_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "ioremap_nocache"); \
+ else \
+ __memtrack_addr = ioremap(phys_addr, size); \
+ if (__memtrack_addr) { \
+ memtrack_alloc(MEMTRACK_IOREMAP, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ __memtrack_addr; \
+})
+#else
+#define ioremap_nocache(phys_addr, size) ({ \
+ void __iomem *__memtrack_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "ioremap_nocache"); \
+ else \
+ __memtrack_addr = ioremap_nocache(phys_addr, size); \
+ if (__memtrack_addr) { \
+ memtrack_alloc(MEMTRACK_IOREMAP, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ __memtrack_addr; \
+})
+#endif /* Kernel version is under 2.6.18 */
+#endif /* PPC */
+
+#define iounmap(addr) ({ \
+ void *__memtrack_addr = (void *)addr; \
+ \
+ if (__memtrack_addr) { \
+ memtrack_free(MEMTRACK_IOREMAP, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
+ } \
+ iounmap(__memtrack_addr); \
+})
+
+
+/* All Page handlers */
+/* TODO: Catch netif_rx for page dereference */
+#define alloc_pages_node(nid, gfp_mask, order) ({ \
+ struct page *page_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "alloc_pages_node"); \
+ else \
+ page_addr = (struct page *)alloc_pages_node(nid, gfp_mask, order); \
+ if (page_addr && !is_non_trackable_alloc_func(__func__)) { \
+ memtrack_alloc(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(page_addr), (unsigned long)(order), 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ page_addr; \
+})
+
+#ifdef CONFIG_NUMA
+#define alloc_pages(gfp_mask, order) ({ \
+ struct page *page_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "alloc_pages"); \
+ else \
+ page_addr = (struct page *)alloc_pages(gfp_mask, order); \
+ if (page_addr && !is_non_trackable_alloc_func(__func__)) { \
+ memtrack_alloc(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(page_addr), (unsigned long)(order), 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ page_addr; \
+})
+#else
+#ifdef alloc_pages
+ #undef alloc_pages
+#endif
+#define alloc_pages(gfp_mask, order) ({ \
+ struct page *page_addr; \
+ \
+ page_addr = (struct page *)alloc_pages_node(numa_node_id(), gfp_mask, order); \
+ page_addr; \
+})
+#endif
+
+#define __get_free_pages(gfp_mask, order) ({ \
+ struct page *page_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "__get_free_pages"); \
+ else \
+ page_addr = (struct page *)__get_free_pages(gfp_mask, order); \
+ if (page_addr && !is_non_trackable_alloc_func(__func__)) { \
+ memtrack_alloc(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(page_addr), (unsigned long)(order), 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ page_addr; \
+})
+
+#define get_zeroed_page(gfp_mask) ({ \
+ struct page *page_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "get_zeroed_page"); \
+ else \
+ page_addr = (struct page *)get_zeroed_page(gfp_mask); \
+ if (page_addr && !is_non_trackable_alloc_func(__func__)) { \
+ memtrack_alloc(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(page_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ (unsigned long)page_addr; \
+})
+
+#define __free_pages(addr, order) ({ \
+ void *__memtrack_addr = (void *)addr; \
+ \
+ if (__memtrack_addr && !is_non_trackable_alloc_func(__func__)) { \
+ if (!memtrack_check_size(MEMTRACK_PAGE_ALLOC, (unsigned long)(__memtrack_addr), (unsigned long)(order), __FILE__, __LINE__)) \
+ memtrack_free(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
+ } \
+ __free_pages(addr, order); \
+})
+
+
+#define free_pages(addr, order) ({ \
+ void *__memtrack_addr = (void *)addr; \
+ \
+ if (__memtrack_addr && !is_non_trackable_alloc_func(__func__)) { \
+ if (!memtrack_check_size(MEMTRACK_PAGE_ALLOC, (unsigned long)(__memtrack_addr), (unsigned long)(order), __FILE__, __LINE__)) \
+ memtrack_free(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
+ } \
+ free_pages(addr, order); \
+})
+
+
+#define get_page(addr) ({ \
+ void *__memtrack_addr = (void *)addr; \
+ \
+ if (__memtrack_addr && !is_non_trackable_alloc_func(__func__)) { \
+ if (memtrack_is_new_addr(MEMTRACK_PAGE_ALLOC, (unsigned long)(__memtrack_addr), 0, __FILE__, __LINE__)) { \
+ memtrack_alloc(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(__memtrack_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ } \
+ get_page(addr); \
+})
+
+#define get_user_pages_fast(start, nr_pages, write, pages) ({ \
+ int __memtrack_rc = -1; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "get_user_pages_fast"); \
+ else \
+ __memtrack_rc = get_user_pages_fast(start, nr_pages, write, pages); \
+ if (__memtrack_rc > 0 && !is_non_trackable_alloc_func(__func__)) { \
+ int __memtrack_i; \
+ \
+ for (__memtrack_i = 0; __memtrack_i < __memtrack_rc; __memtrack_i++) \
+ memtrack_alloc(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(pages[__memtrack_i]), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ __memtrack_rc; \
+})
+
+#define put_page(addr) ({ \
+ void *__memtrack_addr = (void *)addr; \
+ \
+ if (__memtrack_addr && !is_non_trackable_alloc_func(__func__)) { \
+ /* Check whether this is not part of umem put page & not */\
+ /* a new addr and the ref-count is 1 then we'll free this addr */\
+ /* Don't change the order these conditions */ \
+ if (!is_umem_put_page(__func__) && \
+ !memtrack_is_new_addr(MEMTRACK_PAGE_ALLOC, (unsigned long)(__memtrack_addr), 1, __FILE__, __LINE__) && \
+ (memtrack_get_page_ref_count((unsigned long)(__memtrack_addr)) == 1)) { \
+ memtrack_free(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
+ } \
+ } \
+ put_page(addr); \
+})
+
+
+/* Work-Queue handlers */
+#ifdef create_workqueue
+ #undef create_workqueue
+#endif
+#ifdef create_rt_workqueue
+ #undef create_rt_workqueue
+#endif
+#ifdef create_freezeable_workqueue
+ #undef create_freezeable_workqueue
+#endif
+#ifdef create_singlethread_workqueue
+ #undef create_singlethread_workqueue
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) /* 2.6.18 - 2.6.19 */
+#define create_workqueue(name) ({ \
+ struct workqueue_struct *wq_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_workqueue"); \
+ else \
+ wq_addr = __create_workqueue((name), 0); \
+ if (wq_addr) { \
+ memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ wq_addr; \
+})
+
+#define create_singlethread_workqueue(name) ({ \
+ struct workqueue_struct *wq_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_singlethread_workqueue"); \
+ else \
+ wq_addr = __create_workqueue((name), 1); \
+ if (wq_addr) { \
+ memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ wq_addr; \
+})
+
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28) /* 2.6.20 - 2.6.27 */
+#define create_workqueue(name) ({ \
+ struct workqueue_struct *wq_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_workqueue"); \
+ else \
+ wq_addr = __create_workqueue((name), 0, 0); \
+ if (wq_addr) { \
+ memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ wq_addr; \
+})
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22) /* 2.6.20 - 2.6.21 */
+#define create_freezeable_workqueue(name) ({ \
+ struct workqueue_struct *wq_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_freezeable_workqueue"); \
+ else \
+ wq_addr = __create_workqueue((name), 0, 1); \
+ if (wq_addr) { \
+ memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ wq_addr; \
+})
+#else /* 2.6.22 - 2.6.27 */
+#define create_freezeable_workqueue(name) ({ \
+ struct workqueue_struct *wq_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_freezeable_workqueue"); \
+ else \
+ wq_addr = __create_workqueue((name), 1, 1); \
+ if (wq_addr) { \
+ memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ wq_addr; \
+})
+#endif /* 2.6.20 - 2.6.27 */
+
+#define create_singlethread_workqueue(name) ({ \
+ struct workqueue_struct *wq_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_singlethread_workqueue"); \
+ else \
+ wq_addr = __create_workqueue((name), 1, 0); \
+ if (wq_addr) { \
+ memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ wq_addr; \
+})
+
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) /* 2.6.28 - 2.6.35 */
+
+#ifdef alloc_workqueue
+ #undef alloc_workqueue
+#endif
+
+#define alloc_workqueue(name, flags, max_active) ({ \
+ struct workqueue_struct *wq_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "alloc_workqueue"); \
+ else \
+ wq_addr = __create_workqueue((name), (flags), (max_active), 0); \
+ if (wq_addr) { \
+ memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ wq_addr; \
+})
+
+#define create_workqueue(name) ({ \
+ struct workqueue_struct *wq_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_workqueue"); \
+ else \
+ wq_addr = __create_workqueue((name), 0, 0, 0); \
+ if (wq_addr) { \
+ memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ wq_addr; \
+})
+
+#define create_rt_workqueue(name) ({ \
+ struct workqueue_struct *wq_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_rt_workqueue"); \
+ else \
+ wq_addr = __create_workqueue((name), 0, 0, 1); \
+ if (wq_addr) { \
+ memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ wq_addr; \
+})
+
+#define create_freezeable_workqueue(name) ({ \
+ struct workqueue_struct *wq_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_freezeable_workqueue"); \
+ else \
+ wq_addr = __create_workqueue((name), 1, 1, 0); \
+ if (wq_addr) { \
+ memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ wq_addr; \
+})
+
+#define create_singlethread_workqueue(name) ({ \
+ struct workqueue_struct *wq_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_singlethread_workqueue"); \
+ else \
+ wq_addr = __create_workqueue((name), 1, 0, 0); \
+ if (wq_addr) { \
+ memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ wq_addr; \
+})
+#else /* 2.6.36 */
+#ifdef alloc_workqueue
+ #undef alloc_workqueue
+#endif
+#ifdef CONFIG_LOCKDEP
+#define alloc_workqueue(name, flags, max_active) \
+({ \
+ static struct lock_class_key __key; \
+ const char *__lock_name; \
+ struct workqueue_struct *wq_addr = NULL; \
+ \
+ if (__builtin_constant_p(name)) \
+ __lock_name = (name); \
+ else \
+ __lock_name = #name; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "alloc_workqueue"); \
+ else \
+ wq_addr = __alloc_workqueue_key((name), (flags), (max_active), \
+ &__key, __lock_name); \
+ if (wq_addr) { \
+ memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ wq_addr; \
+})
+#else
+#define alloc_workqueue(name, flags, max_active) ({ \
+ struct workqueue_struct *wq_addr = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "alloc_workqueue"); \
+ else \
+ wq_addr = __alloc_workqueue_key((name), (flags), (max_active), NULL, NULL); \
+ if (wq_addr) { \
+ memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
+ } \
+ wq_addr; \
+})
+#endif
+
+#define create_workqueue(name) \
+ alloc_workqueue((name), WQ_RESCUER, 1);
+
+#define create_freezeable_workqueue(name) \
+ alloc_workqueue((name), WQ_FREEZEABLE | WQ_UNBOUND | WQ_RESCUER, 1);
+
+#define create_singlethread_workqueue(name) \
+ alloc_workqueue((name), WQ_UNBOUND | WQ_RESCUER, 1);
+
+#endif /* Work-Queue Kernel Versions */
+
+#define destroy_workqueue(wq_addr) ({ \
+ void *__memtrack_addr = (void *)wq_addr; \
+ \
+ if (__memtrack_addr) { \
+ memtrack_free(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
+ } \
+ destroy_workqueue(wq_addr); \
+})
+
+/* ONLY error injection to functions that we don't monitor */
+#define alloc_skb(size, prio) ({ \
+ struct sk_buff *__memtrack_skb = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "alloc_skb"); \
+ else \
+ __memtrack_skb = alloc_skb(size, prio); \
+ __memtrack_skb; \
+})
+
+#define dev_alloc_skb(size) ({ \
+ struct sk_buff *__memtrack_skb = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "dev_alloc_skb"); \
+ else \
+ __memtrack_skb = dev_alloc_skb(size); \
+ __memtrack_skb; \
+})
+
+#define alloc_skb_fclone(size, prio) ({ \
+ struct sk_buff *__memtrack_skb = NULL; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "alloc_skb_fclone"); \
+ else \
+ __memtrack_skb = alloc_skb_fclone(size, prio); \
+ __memtrack_skb; \
+})
+
+#define copy_from_user(to, from, n) ({ \
+ int ret = n; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "copy_from_user"); \
+ else \
+ ret = copy_from_user(to, from, n); \
+ ret; \
+})
+
+#define copy_to_user(to, from, n) ({ \
+ int ret = n; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "copy_to_user"); \
+ else \
+ ret = copy_to_user(to, from, n); \
+ ret; \
+})
+
+#define sysfs_create_file(kobj, attr) ({ \
+ int ret = -ENOSYS; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "sysfs_create_file"); \
+ else \
+ ret = sysfs_create_file(kobj, attr); \
+ ret; \
+})
+
+#define sysfs_create_link(kobj, target, name) ({ \
+ int ret = -ENOSYS; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "sysfs_create_link"); \
+ else \
+ ret = sysfs_create_link(kobj, target, name); \
+ ret; \
+})
+
+#define sysfs_create_group(kobj, grp) ({ \
+ int ret = -ENOSYS; \
+ \
+ if (memtrack_inject_error()) \
+ MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "sysfs_create_group"); \
+ else \
+ ret = sysfs_create_group(kobj, grp); \
+ ret; \
+})
+
#endif /* __mtrack_h_ */
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/Makefile b/sys/ofed/drivers/infiniband/hw/mlx4/Makefile
deleted file mode 100644
index 7b81da0..0000000
--- a/sys/ofed/drivers/infiniband/hw/mlx4/Makefile
+++ /dev/null
@@ -1,31 +0,0 @@
-# $FreeBSD$
-#.PATH: ${.CURDIR}/../../ofed/drivers/infiniband/hw/mlx4
-#.PATH: ${.CURDIR}/../../../../include/linux
-
-.include <src.opts.mk>
-
-KMOD = mlx4ib
-SRCS = device_if.h bus_if.h pci_if.h vnode_if.h
-#SRCS+= linux_compat.c linux_radix.c
-SRCS+= ah.c cq.c doorbell.c mad.c main.c mr.c qp.c srq.c wc.c
-SRCS+= opt_inet.h opt_inet6.h
-
-#CFLAGS+= -I${.CURDIR}/../../ofed/include/
-CFLAGS+= -I${.CURDIR}/../../../../include
-CFLAGS+= -DCONFIG_INFINIBAND_USER_MEM
-
-.if !defined(KERNBUILDDIR)
-.if ${MK_INET_SUPPORT} != "no"
-opt_inet.h:
- @echo "#define INET 1" > ${.TARGET}
-.endif
-
-.if ${MK_INET6_SUPPORT} != "no"
-opt_inet6.h:
- @echo "#define INET6 1" > ${.TARGET}
-.endif
-.endif
-
-.include <bsd.kmod.mk>
-
-CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS}
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/ah.c b/sys/ofed/drivers/infiniband/hw/mlx4/ah.c
index fe35e62..1c30fa9 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/ah.c
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/ah.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-
#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
@@ -95,21 +94,18 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
{
struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
struct mlx4_dev *dev = ibdev->dev;
- union ib_gid sgid;
- u8 mac[6];
- int err;
- int is_mcast;
+ int is_mcast = 0;
+ struct in6_addr in6;
u16 vlan_tag;
- err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num);
- if (err)
- return ERR_PTR(err);
-
- memcpy(ah->av.eth.mac, mac, 6);
- err = ib_get_cached_gid(pd->device, ah_attr->port_num, ah_attr->grh.sgid_index, &sgid);
- if (err)
- return ERR_PTR(err);
- vlan_tag = rdma_get_vlan_id(&sgid);
+ memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
+ if (rdma_is_multicast_addr(&in6)) {
+ is_mcast = 1;
+ resolve_mcast_mac(&in6, ah->av.eth.mac);
+ } else {
+ memcpy(ah->av.eth.mac, ah_attr->dmac, 6);
+ }
+ vlan_tag = ah_attr->vlan_id;
if (vlan_tag < 0x1000)
vlan_tag |= (ah_attr->sl & 7) << 13;
ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c b/sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c
index 0738adc..17e646a 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -57,6 +57,7 @@ struct mlx4_alias_guid_work_context {
int query_id;
struct list_head list;
int block_num;
+ u8 method;
};
struct mlx4_next_alias_guid_work {
@@ -80,7 +81,8 @@ void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
ports_guid[port_num - 1].
all_rec_per_port[block_num].guid_indexes);
- pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, (long long)guid_indexes);
+ pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num,
+ (unsigned long long)guid_indexes);
for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
/* The location of the specific index starts from bit number 4
@@ -144,7 +146,8 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
ports_guid[port_num - 1].
all_rec_per_port[block_num].guid_indexes);
- pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, (long long)guid_indexes);
+ pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num,
+ (unsigned long long)guid_indexes);
/*calculate the slaves and notify them*/
for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
@@ -201,7 +204,7 @@ static void aliasguid_query_handler(int status,
{
struct mlx4_ib_dev *dev;
struct mlx4_alias_guid_work_context *cb_ctx = context;
- u8 port_index ;
+ u8 port_index;
int i;
struct mlx4_sriov_alias_guid_info_rec_det *rec;
unsigned long flags, flags1;
@@ -240,6 +243,18 @@ static void aliasguid_query_handler(int status,
for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
__be64 tmp_cur_ag;
tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE];
+ if ((cb_ctx->method == MLX4_GUID_INFO_RECORD_DELETE)
+ && (MLX4_NOT_SET_GUID == tmp_cur_ag)) {
+ pr_debug("%s:Record num %d in block_num:%d "
+ "was deleted by SM,ownership by %d "
+ "(0 = driver, 1=sysAdmin, 2=None)\n",
+ __func__, i, guid_rec->block_num,
+ rec->ownership);
+ rec->guid_indexes = rec->guid_indexes &
+ ~mlx4_ib_get_aguid_comp_mask_from_ix(i);
+ continue;
+ }
+
/* check if the SM didn't assign one of the records.
* if it didn't, if it was not sysadmin request:
* ask the SM to give a new GUID, (instead of the driver request).
@@ -379,7 +394,7 @@ static int set_guid_rec(struct ib_device *ibdev,
callback_context->port = port;
callback_context->dev = dev;
callback_context->block_num = index;
-
+ callback_context->method = rec_det->method;
memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
guid_info_rec.lid = cpu_to_be16(attr.lid);
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/cm.c b/sys/ofed/drivers/infiniband/hw/mlx4/cm.c
index 1bfbeee..3ff7600 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/cm.c
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/cm.c
@@ -33,6 +33,7 @@
#include <rdma/ib_mad.h>
#include <linux/mlx4/cmd.h>
+#include <linux/rbtree.h>
#include <linux/idr.h>
#include <rdma/ib_cm.h>
@@ -60,6 +61,11 @@ struct cm_generic_msg {
__be32 remote_comm_id;
};
+struct cm_sidr_generic_msg {
+ struct ib_mad_hdr hdr;
+ __be32 request_id;
+};
+
struct cm_req_msg {
unsigned char unused[0x60];
union ib_gid primary_path_sgid;
@@ -68,28 +74,62 @@ struct cm_req_msg {
static void set_local_comm_id(struct ib_mad *mad, u32 cm_id)
{
+ if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ struct cm_sidr_generic_msg *msg =
+ (struct cm_sidr_generic_msg *)mad;
+ msg->request_id = cpu_to_be32(cm_id);
+ } else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
+ pr_err("trying to set local_comm_id in SIDR_REP\n");
+ return;
+ } else {
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
msg->local_comm_id = cpu_to_be32(cm_id);
+ }
}
static u32 get_local_comm_id(struct ib_mad *mad)
{
+ if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ struct cm_sidr_generic_msg *msg =
+ (struct cm_sidr_generic_msg *)mad;
+ return be32_to_cpu(msg->request_id);
+ } else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
+ pr_err("trying to set local_comm_id in SIDR_REP\n");
+ return -1;
+ } else {
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
-
return be32_to_cpu(msg->local_comm_id);
+ }
}
static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id)
{
+ if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
+ struct cm_sidr_generic_msg *msg =
+ (struct cm_sidr_generic_msg *)mad;
+ msg->request_id = cpu_to_be32(cm_id);
+ } else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ pr_err("trying to set remote_comm_id in SIDR_REQ\n");
+ return;
+ } else {
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
msg->remote_comm_id = cpu_to_be32(cm_id);
+ }
}
static u32 get_remote_comm_id(struct ib_mad *mad)
{
+ if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
+ struct cm_sidr_generic_msg *msg =
+ (struct cm_sidr_generic_msg *)mad;
+ return be32_to_cpu(msg->request_id);
+ } else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ pr_err("trying to set remote_comm_id in SIDR_REQ\n");
+ return -1;
+ } else {
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
-
return be32_to_cpu(msg->remote_comm_id);
+ }
}
static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad)
@@ -285,19 +325,22 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
u32 sl_cm_id;
int pv_cm_id = -1;
- sl_cm_id = get_local_comm_id(mad);
-
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
- mad->mad_hdr.attr_id == CM_REP_ATTR_ID) {
+ mad->mad_hdr.attr_id == CM_REP_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
+ sl_cm_id = get_local_comm_id(mad);
id = id_map_alloc(ibdev, slave_id, sl_cm_id);
if (IS_ERR(id)) {
mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n",
__func__, slave_id, sl_cm_id);
return PTR_ERR(id);
}
- } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) {
+ } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
return 0;
} else {
+ sl_cm_id = get_local_comm_id(mad);
id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
}
@@ -323,7 +366,8 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
u32 pv_cm_id;
struct id_map_entry *id;
- if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) {
+ if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
union ib_gid gid;
if (is_eth)
@@ -333,7 +377,7 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
*slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id);
if (*slave < 0) {
mlx4_ib_warn(ibdev, "failed matching slave_id by gid (0x%llx)\n",
- (long long)gid.global.interface_id);
+ (unsigned long long)gid.global.interface_id);
return -ENOENT;
}
return 0;
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/cq.c b/sys/ofed/drivers/infiniband/hw/mlx4/cq.c
index 293917a..52788c2 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/cq.c
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/cq.c
@@ -33,6 +33,7 @@
#include <linux/mlx4/cq.h>
#include <linux/mlx4/qp.h>
+#include <linux/mlx4/srq.h>
#include <linux/slab.h>
#include "mlx4_ib.h"
@@ -92,12 +93,33 @@ static struct mlx4_cqe *next_cqe_sw(struct mlx4_ib_cq *cq)
return get_sw_cqe(cq, cq->mcq.cons_index);
}
-int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+int mlx4_ib_modify_cq(struct ib_cq *cq,
+ struct ib_cq_attr *cq_attr,
+ int cq_attr_mask)
{
+ int err = 0;
struct mlx4_ib_cq *mcq = to_mcq(cq);
struct mlx4_ib_dev *dev = to_mdev(cq->device);
- return mlx4_cq_modify(dev->dev, &mcq->mcq, cq_count, cq_period);
+ if (cq_attr_mask & IB_CQ_CAP_FLAGS) {
+ if (cq_attr->cq_cap_flags & IB_CQ_TIMESTAMP)
+ return -ENOTSUPP;
+
+ if (cq_attr->cq_cap_flags & IB_CQ_IGNORE_OVERRUN) {
+ if (dev->dev->caps.cq_flags & MLX4_DEV_CAP_CQ_FLAG_IO)
+ err = mlx4_cq_ignore_overrun(dev->dev, &mcq->mcq);
+ else
+ err = -ENOSYS;
+ }
+ }
+
+ if (!err)
+ if (cq_attr_mask & IB_CQ_MODERATION)
+ err = mlx4_cq_modify(dev->dev, &mcq->mcq,
+ cq_attr->moderation.cq_count,
+ cq_attr->moderation.cq_period);
+
+ return err;
}
static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int nent)
@@ -173,7 +195,11 @@ err_buf:
return err;
}
-struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
+/* we don't support system timestamping */
+#define CQ_CREATE_FLAGS_SUPPORTED IB_CQ_TIMESTAMP
+
+struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
+ struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
struct ib_udata *udata)
{
@@ -181,11 +207,16 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
struct mlx4_ib_cq *cq;
struct mlx4_uar *uar;
int err;
+ int entries = attr->cqe;
+ int vector = attr->comp_vector;
if (entries < 1 || entries > dev->dev->caps.max_cqes)
return ERR_PTR(-EINVAL);
- cq = kmalloc(sizeof *cq, GFP_KERNEL);
+ if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED)
+ return ERR_PTR(-EINVAL);
+
+ cq = kzalloc(sizeof(*cq), GFP_KERNEL);
if (!cq)
return ERR_PTR(-ENOMEM);
@@ -195,6 +226,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
spin_lock_init(&cq->lock);
cq->resize_buf = NULL;
cq->resize_umem = NULL;
+ cq->create_flags = attr->flags;
if (context) {
struct mlx4_ib_create_cq ucmd;
@@ -236,7 +268,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
vector = dev->eq_table[vector % ibdev->num_comp_vectors];
err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
- cq->db.dma, &cq->mcq, vector, 0, 0);
+ cq->db.dma, &cq->mcq, vector, 0,
+ !!(cq->create_flags & IB_CQ_TIMESTAMP));
if (err)
goto err_dbmap;
@@ -331,21 +364,23 @@ static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq)
u32 i;
i = cq->mcq.cons_index;
- while (get_sw_cqe(cq, i & cq->ibcq.cqe))
+ while (get_sw_cqe(cq, i))
++i;
return i - cq->mcq.cons_index;
}
-static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
+static int mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
{
struct mlx4_cqe *cqe, *new_cqe;
int i;
int cqe_size = cq->buf.entry_size;
int cqe_inc = cqe_size == 64 ? 1 : 0;
+ struct mlx4_cqe *start_cqe;
i = cq->mcq.cons_index;
cqe = get_cqe(cq, i & cq->ibcq.cqe);
+ start_cqe = cqe;
cqe += cqe_inc;
while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
@@ -357,9 +392,15 @@ static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) |
(((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0);
cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
+ if (cqe == start_cqe) {
+ pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n", cq->mcq.cqn);
+ return -ENOMEM;
+ }
cqe += cqe_inc;
+
}
++cq->mcq.cons_index;
+ return 0;
}
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
@@ -374,7 +415,6 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
return -ENOSYS;
mutex_lock(&cq->resize_mutex);
-
if (entries < 1 || entries > dev->dev->caps.max_cqes) {
err = -EINVAL;
goto out;
@@ -386,6 +426,11 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
goto out;
}
+ if (entries > dev->dev->caps.max_cqes + 1) {
+ err = -EINVAL;
+ goto out;
+ }
+
if (ibcq->uobject) {
err = mlx4_alloc_resize_umem(dev, cq, entries, udata);
if (err)
@@ -425,7 +470,7 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
spin_lock_irq(&cq->lock);
if (cq->resize_buf) {
- mlx4_ib_cq_resize_copy_cqes(cq);
+ err = mlx4_ib_cq_resize_copy_cqes(cq);
tmp_buf = cq->buf;
tmp_cqe = cq->ibcq.cqe;
cq->buf = cq->resize_buf->buf;
@@ -580,7 +625,7 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
}
static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
- unsigned tail, struct mlx4_cqe *cqe)
+ unsigned tail, struct mlx4_cqe *cqe, int is_eth)
{
struct mlx4_ib_proxy_sqp_hdr *hdr;
@@ -590,12 +635,19 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct
DMA_FROM_DEVICE);
hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr);
wc->pkey_index = be16_to_cpu(hdr->tun.pkey_index);
- wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32);
- wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
wc->src_qp = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF;
wc->wc_flags |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0;
wc->dlid_path_bits = 0;
+ if (is_eth) {
+ wc->vlan_id = be16_to_cpu(hdr->tun.sl_vid);
+ memcpy(&(wc->smac[0]), (char *)&hdr->tun.mac_31_0, 4);
+ memcpy(&(wc->smac[4]), (char *)&hdr->tun.slid_mac_47_32, 2);
+ } else {
+ wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32);
+ wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
+ }
+
return 0;
}
@@ -607,11 +659,14 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
struct mlx4_qp *mqp;
struct mlx4_ib_wq *wq;
struct mlx4_ib_srq *srq;
+ struct mlx4_srq *msrq = NULL;
int is_send;
int is_error;
u32 g_mlpath_rqpn;
u16 wqe_ctr;
unsigned tail = 0;
+ int timestamp_en = !!(cq->create_flags & IB_CQ_TIMESTAMP);
+
repoll:
cqe = next_cqe_sw(cq);
@@ -675,6 +730,20 @@ repoll:
wc->qp = &(*cur_qp)->ibqp;
+ if (wc->qp->qp_type == IB_QPT_XRC_TGT) {
+ u32 srq_num;
+ g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn);
+ srq_num = g_mlpath_rqpn & 0xffffff;
+ /* SRQ is also in the radix tree */
+ msrq = mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev,
+ srq_num);
+ if (unlikely(!msrq)) {
+ pr_warn("CQ %06x with entry for unknown SRQN %06x\n",
+ cq->mcq.cqn, srq_num);
+ return -EINVAL;
+ }
+ }
+
if (is_send) {
wq = &(*cur_qp)->sq;
if (!(*cur_qp)->sq_signal_bits) {
@@ -688,6 +757,11 @@ repoll:
wqe_ctr = be16_to_cpu(cqe->wqe_index);
wc->wr_id = srq->wrid[wqe_ctr];
mlx4_ib_free_srq_wqe(srq, wqe_ctr);
+ } else if (msrq) {
+ srq = to_mibsrq(msrq);
+ wqe_ctr = be16_to_cpu(cqe->wqe_index);
+ wc->wr_id = srq->wrid[wqe_ctr];
+ mlx4_ib_free_srq_wqe(srq, wqe_ctr);
} else {
wq = &(*cur_qp)->rq;
tail = wq->tail & (wq->wqe_cnt - 1);
@@ -707,6 +781,7 @@ repoll:
switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
case MLX4_OPCODE_RDMA_WRITE_IMM:
wc->wc_flags |= IB_WC_WITH_IMM;
+ /* fall through */
case MLX4_OPCODE_RDMA_WRITE:
wc->opcode = IB_WC_RDMA_WRITE;
break;
@@ -778,10 +853,31 @@ repoll:
if ((*cur_qp)->mlx4_ib_qp_type &
(MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
- return use_tunnel_data(*cur_qp, cq, wc, tail, cqe);
+ return use_tunnel_data
+ (*cur_qp, cq, wc, tail, cqe,
+ rdma_port_get_link_layer
+ (wc->qp->device,
+ (*cur_qp)->port) ==
+ IB_LINK_LAYER_ETHERNET);
}
+ if (timestamp_en) {
+ /* currently, only CQ_CREATE_WITH_TIMESTAMPING_RAW is
+ * supported. CQ_CREATE_WITH_TIMESTAMPING_SYS isn't
+ * supported */
+ if (cq->create_flags & IB_CQ_TIMESTAMP_TO_SYS_TIME) {
+ wc->ts.timestamp = 0;
+ } else {
+ wc->ts.timestamp =
+ ((u64)(be32_to_cpu(cqe->timestamp_16_47)
+ + !cqe->timestamp_0_15) << 16)
+ | be16_to_cpu(cqe->timestamp_0_15);
+ wc->wc_flags |= IB_WC_WITH_TIMESTAMP;
+ }
+ } else {
+ wc->wc_flags |= IB_WC_WITH_SLID;
wc->slid = be16_to_cpu(cqe->rlid);
+ }
g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn);
wc->src_qp = g_mlpath_rqpn & 0xffffff;
wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
@@ -789,11 +885,27 @@ repoll:
wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status,
cqe->checksum) ? IB_WC_IP_CSUM_OK : 0;
+ if (!timestamp_en) {
if (rdma_port_get_link_layer(wc->qp->device,
- (*cur_qp)->port) == IB_LINK_LAYER_ETHERNET)
+ (*cur_qp)->port) ==
+ IB_LINK_LAYER_ETHERNET)
wc->sl = be16_to_cpu(cqe->sl_vid) >> 13;
else
wc->sl = be16_to_cpu(cqe->sl_vid) >> 12;
+ wc->wc_flags |= IB_WC_WITH_SL;
+ }
+ if ((be32_to_cpu(cqe->vlan_my_qpn) &
+ MLX4_CQE_VLAN_PRESENT_MASK) && !timestamp_en) {
+ wc->vlan_id = be16_to_cpu(cqe->sl_vid) &
+ MLX4_CQE_VID_MASK;
+ wc->wc_flags |= IB_WC_WITH_VLAN;
+ } else {
+ wc->vlan_id = 0xffff;
+ }
+ if (!timestamp_en) {
+ memcpy(wc->smac, cqe->smac, 6);
+ wc->wc_flags |= IB_WC_WITH_SMAC;
+ }
}
return 0;
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/doorbell.c b/sys/ofed/drivers/infiniband/hw/mlx4/doorbell.c
index 8aee423..c517409 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/doorbell.c
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/doorbell.c
@@ -45,7 +45,6 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
struct mlx4_db *db)
{
struct mlx4_ib_user_db_page *page;
- struct ib_umem_chunk *chunk;
int err = 0;
mutex_lock(&context->db_page_mutex);
@@ -73,8 +72,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
list_add(&page->list, &context->db_page_list);
found:
- chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
- db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
+ db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK);
db->u.user_page = page;
++page->refcnt;
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/mad.c b/sys/ofed/drivers/infiniband/hw/mlx4/mad.c
index 74bbf5c..bd36931 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/mad.c
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/mad.c
@@ -545,11 +545,32 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
/* adjust tunnel data */
tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix);
- tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
- tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF);
tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0;
+ if (is_eth) {
+ u16 vlan = 0;
+ if (mlx4_get_slave_default_vlan(dev->dev, port, slave, &vlan,
+ NULL)) {
+ if (vlan != wc->vlan_id)
+ /* VST and default vlan is not the packet vlan drop the
+ * packet*/
+ goto out;
+ else
+ /* VST , remove hide the vlan from the VF */
+ vlan = 0;
+ } else {
+ vlan = wc->vlan_id;
+ }
+
+ tun_mad->hdr.sl_vid = cpu_to_be16(vlan);
+ memcpy((char *)&tun_mad->hdr.mac_31_0, &(wc->smac[0]), 4);
+ memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2);
+ } else {
+ tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
+ tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
+ }
+
ib_dma_sync_single_for_device(&dev->ib_dev,
tun_qp->tx_ring[tun_tx_ix].buf.map,
sizeof (struct mlx4_rcv_tunnel_mad),
@@ -696,12 +717,11 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
be16_to_cpu(in_mad->mad_hdr.attr_id));
if (in_wc->wc_flags & IB_WC_GRH) {
pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
- (long long)be64_to_cpu(in_grh->sgid.global.subnet_prefix),
- (long long)
- be64_to_cpu(in_grh->sgid.global.interface_id));
+ (unsigned long long)be64_to_cpu(in_grh->sgid.global.subnet_prefix),
+ (unsigned long long)be64_to_cpu(in_grh->sgid.global.interface_id));
pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n",
- (long long)be64_to_cpu(in_grh->dgid.global.subnet_prefix),
- (long long)be64_to_cpu(in_grh->dgid.global.interface_id));
+ (unsigned long long)be64_to_cpu(in_grh->dgid.global.subnet_prefix),
+ (unsigned long long)be64_to_cpu(in_grh->dgid.global.interface_id));
}
}
@@ -946,7 +966,7 @@ int mlx4_ib_query_if_stat(struct mlx4_ib_dev *dev, u32 counter_index,
err = mlx4_cmd_box(dev->dev, 0, mailbox->dma, inmod, 0,
MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C,
- MLX4_CMD_WRAPPED);
+ MLX4_CMD_NATIVE);
if (!err)
memcpy(counter, mailbox->buf, MLX4_IF_STAT_SZ(1));
@@ -961,7 +981,7 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
int err;
- u32 counter_index = dev->counters[port_num - 1] & 0xffff;
+ u32 counter_index = dev->counters[port_num - 1].counter_index & 0xffff;
u8 mode;
char counter_buf[MLX4_IF_STAT_SZ(1)];
union mlx4_counter *counter = (union mlx4_counter *)
@@ -970,10 +990,16 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
return -EINVAL;
- if (mlx4_ib_query_if_stat(dev, counter_index, counter, 0)) {
- err = IB_MAD_RESULT_FAILURE;
- } else {
+ /* in case of default counter IB shares the counter with ETH */
+ /* the state could be -EEXIST or -ENOSPC */
+ if (dev->counters[port_num - 1].status) {
memset(out_mad->data, 0, sizeof out_mad->data);
+ err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ } else {
+ if (mlx4_ib_query_if_stat(dev, counter_index, counter, 0))
+ return IB_MAD_RESULT_FAILURE;
+
+ memset(out_mad->data, 0, sizeof(out_mad->data));
mode = counter->control.cnt_mode & 0xFF;
err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
switch (mode & 0xf) {
@@ -992,7 +1018,6 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
}
}
-
return err;
}
@@ -1179,6 +1204,11 @@ void handle_port_mgmt_change_event(struct work_struct *work)
u16 lid = be16_to_cpu(eqe->event.port_mgmt_change.params.port_info.mstr_sm_lid);
u8 sl = eqe->event.port_mgmt_change.params.port_info.mstr_sm_sl & 0xf;
update_sm_ah(dev, port, lid, sl);
+ mlx4_ib_dispatch_event(dev, port, IB_EVENT_SM_CHANGE);
+ if (mlx4_is_master(dev->dev))
+ mlx4_gen_slaves_port_mgt_ev(dev->dev, port,
+ changed_attr & MSTR_SM_CHANGE_MASK,
+ lid, sl);
}
/* Check if it is a lid change event */
@@ -1295,8 +1325,9 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
- enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
- u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad)
+ enum ib_qp_type dest_qpt, u16 pkey_index,
+ u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr,
+ u8 *s_mac, struct ib_mad *mad)
{
struct ib_sge list;
struct ib_send_wr wr, *bad_wr;
@@ -1385,6 +1416,9 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
wr.num_sge = 1;
wr.opcode = IB_WR_SEND;
wr.send_flags = IB_SEND_SIGNALED;
+ if (s_mac)
+ memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6);
+
ret = ib_post_send(send_qp, &wr, &bad_wr);
out:
@@ -1512,6 +1546,11 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
if (ah_attr.ah_flags & IB_AH_GRH)
if (get_real_sgid_index(dev, slave, ctx->port, &ah_attr))
return;
+ memcpy(ah_attr.dmac, tunnel->hdr.mac, 6);
+ ah_attr.vlan_id = tunnel->hdr.vlan;
+ /* if slave have default vlan use it */
+ mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
+ &ah_attr.vlan_id, &ah_attr.sl);
mlx4_ib_send_to_wire(dev, slave, ctx->port,
is_proxy_qp0(dev, wc->src_qp, slave) ?
@@ -1519,7 +1558,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
be16_to_cpu(tunnel->hdr.pkey_index),
be32_to_cpu(tunnel->hdr.remote_qpn),
be32_to_cpu(tunnel->hdr.qkey),
- &ah_attr, &tunnel->mad);
+ &ah_attr, wc->smac, &tunnel->mad);
}
static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
@@ -1564,6 +1603,12 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
tun_qp->ring[i].addr,
rx_buf_size,
DMA_FROM_DEVICE);
+ if (unlikely(ib_dma_mapping_error(ctx->ib_dev,
+ tun_qp->ring[i].map))) {
+ mlx4_ib_warn(ctx->ib_dev, "ib_dma_map_single failed\n");
+ kfree(tun_qp->ring[i].addr);
+ goto err;
+ }
}
for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
@@ -1576,6 +1621,12 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
tun_qp->tx_ring[i].buf.addr,
tx_buf_size,
DMA_TO_DEVICE);
+ if (unlikely(ib_dma_mapping_error(ctx->ib_dev,
+ tun_qp->tx_ring[i].buf.map))) {
+ mlx4_ib_warn(ctx->ib_dev, "ib_dma_map_single failed\n");
+ kfree(tun_qp->tx_ring[i].buf.addr);
+ goto tx_err;
+ }
tun_qp->tx_ring[i].ah = NULL;
}
spin_lock_init(&tun_qp->tx_lock);
@@ -1664,12 +1715,12 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
(MLX4_NUM_TUNNEL_BUFS - 1));
if (ret)
pr_err("Failed reposting tunnel "
- "buf:%lld\n", (long long)wc.wr_id);
+ "buf:%lld\n", (unsigned long long)wc.wr_id);
break;
case IB_WC_SEND:
pr_debug("received tunnel send completion:"
"wrid=0x%llx, status=0x%x\n",
- (long long)wc.wr_id, wc.status);
+ (unsigned long long)wc.wr_id, wc.status);
ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].ah);
tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
@@ -1685,7 +1736,7 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
} else {
pr_debug("mlx4_ib: completion error in tunnel: %d."
" status = %d, wrid = 0x%llx\n",
- ctx->slave, wc.status, (long long)wc.wr_id);
+ ctx->slave, wc.status, (unsigned long long)wc.wr_id);
if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].ah);
@@ -1757,6 +1808,11 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
memset(&attr, 0, sizeof attr);
attr.qp_state = IB_QPS_INIT;
+ ret = 0;
+ if (create_tun)
+ ret = find_slave_port_pkey_ix(to_mdev(ctx->ib_dev), ctx->slave,
+ ctx->port, 0xFFFF, &attr.pkey_index);
+ if (ret || !create_tun)
attr.pkey_index =
to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0];
attr.qkey = IB_QP1_QKEY;
@@ -1837,7 +1893,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)))
pr_err("Failed reposting SQP "
- "buf:%lld\n", (long long)wc.wr_id);
+ "buf:%lld\n", (unsigned long long)wc.wr_id);
break;
default:
BUG_ON(1);
@@ -1846,7 +1902,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
} else {
pr_debug("mlx4_ib: completion error in tunnel: %d."
" status = %d, wrid = 0x%llx\n",
- ctx->slave, wc.status, (long long)wc.wr_id);
+ ctx->slave, wc.status, (unsigned long long)wc.wr_id);
if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
ib_destroy_ah(sqp->tx_ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].ah);
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/main.c b/sys/ofed/drivers/infiniband/hw/mlx4/main.c
index fd0b723..bdcffbe 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/main.c
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/main.c
@@ -32,37 +32,37 @@
*/
#include <linux/module.h>
-
-#ifdef __linux__
-#include <linux/proc_fs.h>
-#endif
-
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/if_vlan.h>
-#include <linux/bitops.h>
-#include <linux/if_ether.h>
#include <linux/fs.h>
+#include <net/ipv6.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_user_verbs_exp.h>
#include <rdma/ib_addr.h>
#include <linux/mlx4/driver.h>
#include <linux/mlx4/cmd.h>
#include <linux/sched.h>
+#include <linux/page.h>
+#include <linux/printk.h>
#include "mlx4_ib.h"
+#include "mlx4_exp.h"
#include "user.h"
#include "wc.h"
#define DRV_NAME MLX4_IB_DRV_NAME
#define DRV_VERSION "1.0"
-#define DRV_RELDATE "April 4, 2008"
+#define DRV_RELDATE __DATE__
#define MLX4_IB_DRIVER_PROC_DIR_NAME "driver/mlx4_ib"
#define MLX4_IB_MRS_PROC_DIR_NAME "mrs"
+#define MLX4_IB_FLOW_MAX_PRIO 0xFFF
+#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
@@ -73,20 +73,30 @@ MODULE_VERSION(DRV_VERSION);
int mlx4_ib_sm_guid_assign = 1;
-#ifdef __linux__
-struct proc_dir_entry *mlx4_mrs_dir_entry;
-static struct proc_dir_entry *mlx4_ib_driver_dir_entry;
-#endif
-
module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)");
-static char dev_assign_str[512];
-//module_param_string(dev_assign_str, dev_assign_str, sizeof(dev_assign_str), 0644);
-MODULE_PARM_DESC(dev_assign_str, "Map all device function numbers to "
- "IB device numbers following the pattern: "
- "bb:dd.f-0,bb:dd.f-1,... (all numbers are hexadecimals)."
- " Max supported devices - 32");
+enum {
+ MAX_NUM_STR_BITMAP = 1 << 15,
+ DEFAULT_TBL_VAL = -1
+};
+
+static struct mlx4_dbdf2val_lst dev_assign_str = {
+ .name = "dev_assign_str param",
+ .num_vals = 1,
+ .def_val = {DEFAULT_TBL_VAL},
+ .range = {0, MAX_NUM_STR_BITMAP - 1}
+};
+module_param_string(dev_assign_str, dev_assign_str.str,
+ sizeof(dev_assign_str.str), 0444);
+MODULE_PARM_DESC(dev_assign_str,
+ "Map device function numbers to IB device numbers (e.g. '0000:04:00.0-0,002b:1c:0b.a-1,...').\n"
+ "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for IB device numbers (e.g. 1).\n"
+ "\t\tMax supported devices - 32");
+
+
+static unsigned long *dev_num_str_bitmap;
+static spinlock_t dev_num_str_lock;
static const char mlx4_ib_version[] =
DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
@@ -106,11 +116,16 @@ struct dev_rec {
int nr;
};
-#define MAX_DR 32
-static struct dev_rec dr[MAX_DR];
+static int dr_active;
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
+static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, struct net_device*,
+ unsigned long);
+
+static u8 mlx4_ib_get_dev_port(struct net_device *dev,
+ struct mlx4_ib_dev *ibdev);
+
static struct workqueue_struct *wq;
static void init_query_mad(struct ib_smp *mad)
@@ -123,7 +138,30 @@ static void init_query_mad(struct ib_smp *mad)
static union ib_gid zgid;
-static int mlx4_ib_query_device(struct ib_device *ibdev,
+static int check_flow_steering_support(struct mlx4_dev *dev)
+{
+ int eth_num_ports = 0;
+ int ib_num_ports = 0;
+ int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED;
+
+ if (dmfs) {
+ int i;
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
+ eth_num_ports++;
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+ ib_num_ports++;
+ dmfs &= (!ib_num_ports ||
+ (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) &&
+ (!eth_num_ports ||
+ (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN));
+ if (ib_num_ports && mlx4_is_mfunc(dev)) {
+ dmfs = 0;
+ }
+ }
+ return dmfs;
+}
+
+int mlx4_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
@@ -174,12 +212,26 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)
props->device_cap_flags |= IB_DEVICE_XRC;
+ if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_CROSS_CHANNEL)
+ props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
+
+ if (check_flow_steering_support(dev->dev))
+ props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
+
props->device_cap_flags |= IB_DEVICE_QPG;
if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) {
props->device_cap_flags |= IB_DEVICE_UD_RSS;
props->max_rss_tbl_sz = dev->dev->caps.max_rss_tbl_sz;
}
+ if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)
+ props->device_cap_flags |= IB_DEVICE_MEM_WINDOW;
+ if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
+ if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B)
+ props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
+ else
+ props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
+ }
props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
0xffffff;
props->vendor_part_id = dev->dev->pdev->device;
@@ -213,6 +265,13 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
+ props->hca_core_clock = dev->dev->caps.hca_core_clock;
+ if (dev->dev->caps.hca_core_clock > 0)
+ props->comp_mask |= IB_DEVICE_ATTR_WITH_HCA_CORE_CLOCK;
+ if (dev->dev->caps.cq_timestamp) {
+ props->timestamp_mask = 0xFFFFFFFFFFFF;
+ props->comp_mask |= IB_DEVICE_ATTR_WITH_TIMESTAMP_MASK;
+ }
out:
kfree(in_mad);
@@ -334,6 +393,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
struct net_device *ndev;
enum ib_mtu tmp;
struct mlx4_cmd_mailbox *mailbox;
+ unsigned long flags;
int err = 0;
mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
@@ -362,7 +422,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
props->state = IB_PORT_DOWN;
props->phys_state = state_to_phys_state(props->state);
props->active_mtu = IB_MTU_256;
- spin_lock(&iboe->lock);
+ spin_lock_irqsave(&iboe->lock, flags);
ndev = iboe->netdevs[port - 1];
if (!ndev)
goto out_unlock;
@@ -374,7 +434,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
IB_PORT_ACTIVE : IB_PORT_DOWN;
props->phys_state = state_to_phys_state(props->state);
out_unlock:
- spin_unlock(&iboe->lock);
+ spin_unlock_irqrestore(&iboe->lock, flags);
out:
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
return err;
@@ -674,7 +734,9 @@ static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
return 0;
}
-#ifdef __linux__
+
+/* XXX FBSD has no support for get_unmapped_area function */
+#if 0
static unsigned long mlx4_ib_get_unmapped_area(struct file *file,
unsigned long addr,
unsigned long len, unsigned long pgoff,
@@ -732,7 +794,6 @@ full_search:
static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
struct mlx4_ib_dev *dev = to_mdev(context->device);
- int err;
/* Last 8 bits hold the command others are data per that command */
unsigned long command = vma->vm_pgoff & MLX4_IB_MMAP_CMD_MASK;
@@ -758,31 +819,81 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
dev->dev->caps.num_uars,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
- } else if (command == MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES) {
- /* Getting contiguous physical pages */
- unsigned long total_size = vma->vm_end - vma->vm_start;
- unsigned long page_size_order = (vma->vm_pgoff) >>
- MLX4_IB_MMAP_CMD_BITS;
- struct ib_cmem *ib_cmem;
- ib_cmem = ib_cmem_alloc_contiguous_pages(context, total_size,
- page_size_order);
- if (IS_ERR(ib_cmem)) {
- err = PTR_ERR(ib_cmem);
- return err;
- }
+ } else if (command == MLX4_IB_MMAP_GET_HW_CLOCK) {
+ struct mlx4_clock_params params;
+ int ret;
- err = ib_cmem_map_contiguous_pages_to_vma(ib_cmem, vma);
- if (err) {
- ib_cmem_release_contiguous_pages(ib_cmem);
- return err;
- }
- return 0;
+ ret = mlx4_get_internal_clock_params(dev->dev, &params);
+ if (ret)
+ return ret;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ if (io_remap_pfn_range(vma, vma->vm_start,
+ (pci_resource_start(dev->dev->pdev,
+ params.bar) + params.offset)
+ >> PAGE_SHIFT,
+ PAGE_SIZE, vma->vm_page_prot))
+ return -EAGAIN;
} else
return -EINVAL;
return 0;
}
+static int mlx4_ib_ioctl(struct ib_ucontext *context, unsigned int cmd,
+ unsigned long arg)
+{
+ struct mlx4_ib_dev *dev = to_mdev(context->device);
+ int ret;
+ int offset;
+
+ switch (cmd) {
+ case MLX4_IOCHWCLOCKOFFSET: {
+ struct mlx4_clock_params params;
+ int ret;
+ ret = mlx4_get_internal_clock_params(dev->dev, &params);
+ if (!ret) {
+ offset = params.offset % PAGE_SIZE;
+ ret = put_user(offset,
+ (int *)arg);
+ return sizeof(int);
+ } else {
+ return ret;
+ }
+ }
+ default: {
+ pr_err("mlx4_ib: invalid ioctl %u command with arg %lX\n",
+ cmd, arg);
+ return -ENOTTY;
+ }
+ }
+
+ return ret;
+}
+
+static int mlx4_ib_query_values(struct ib_device *device, int q_values,
+ struct ib_device_values *values)
+{
+ struct mlx4_ib_dev *dev = to_mdev(device);
+ cycle_t cycles;
+
+ values->values_mask = 0;
+ if (q_values & IBV_VALUES_HW_CLOCK) {
+ cycles = mlx4_read_clock(dev->dev);
+ if (cycles < 0) {
+ values->hwclock = cycles & CORE_CLOCK_MASK;
+ values->values_mask |= IBV_VALUES_HW_CLOCK;
+ }
+ q_values &= ~IBV_VALUES_HW_CLOCK;
+ }
+
+ if (q_values)
+ return -ENOTTY;
+
+ return 0;
+}
+
static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
@@ -926,258 +1037,220 @@ struct mlx4_ib_steering {
union ib_gid gid;
};
-static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+static int parse_flow_attr(struct mlx4_dev *dev,
+ union ib_flow_spec *ib_spec,
+ struct _rule_hw *mlx4_spec)
{
- int err;
- struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
- struct mlx4_ib_qp *mqp = to_mqp(ibqp);
- u64 reg_id;
- struct mlx4_ib_steering *ib_steering = NULL;
+ enum mlx4_net_trans_rule_id type;
+
+ switch (ib_spec->type) {
+ case IB_FLOW_SPEC_ETH:
+ type = MLX4_NET_TRANS_RULE_ID_ETH;
+ memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac,
+ ETH_ALEN);
+ memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac,
+ ETH_ALEN);
+ mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
+ mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
+ break;
- if (mdev->dev->caps.steering_mode ==
- MLX4_STEERING_MODE_DEVICE_MANAGED) {
- ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
- if (!ib_steering)
- return -ENOMEM;
- }
+ case IB_FLOW_SPEC_IB:
+ type = MLX4_NET_TRANS_RULE_ID_IB;
+ mlx4_spec->ib.l3_qpn = ib_spec->ib.val.l3_type_qpn;
+ mlx4_spec->ib.qpn_mask = ib_spec->ib.mask.l3_type_qpn;
+ memcpy(&mlx4_spec->ib.dst_gid, ib_spec->ib.val.dst_gid, 16);
+ memcpy(&mlx4_spec->ib.dst_gid_msk,
+ ib_spec->ib.mask.dst_gid, 16);
+ break;
- err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
- !!(mqp->flags &
- MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
- MLX4_PROT_IB_IPV6, &reg_id);
- if (err)
- goto err_malloc;
+ case IB_FLOW_SPEC_IPV4:
+ type = MLX4_NET_TRANS_RULE_ID_IPV4;
+ mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip;
+ mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip;
+ mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip;
+ mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip;
+ break;
- err = add_gid_entry(ibqp, gid);
- if (err)
- goto err_add;
+ case IB_FLOW_SPEC_TCP:
+ case IB_FLOW_SPEC_UDP:
+ type = ib_spec->type == IB_FLOW_SPEC_TCP ?
+ MLX4_NET_TRANS_RULE_ID_TCP :
+ MLX4_NET_TRANS_RULE_ID_UDP;
+ mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port;
+ mlx4_spec->tcp_udp.dst_port_msk =
+ ib_spec->tcp_udp.mask.dst_port;
+ mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port;
+ mlx4_spec->tcp_udp.src_port_msk =
+ ib_spec->tcp_udp.mask.src_port;
+ break;
- if (ib_steering) {
- memcpy(ib_steering->gid.raw, gid->raw, 16);
- ib_steering->reg_id = reg_id;
- mutex_lock(&mqp->mutex);
- list_add(&ib_steering->list, &mqp->steering_rules);
- mutex_unlock(&mqp->mutex);
+ default:
+ return -EINVAL;
}
- return 0;
-
-err_add:
- mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
- MLX4_PROT_IB_IPV6, reg_id);
-err_malloc:
- kfree(ib_steering);
-
- return err;
+ if (map_sw_to_hw_steering_id(dev, type) < 0 ||
+ hw_rule_sz(dev, type) < 0)
+ return -EINVAL;
+ mlx4_spec->id = cpu_to_be16(map_sw_to_hw_steering_id(dev, type));
+ mlx4_spec->size = hw_rule_sz(dev, type) >> 2;
+ return hw_rule_sz(dev, type);
}
-enum {
- IBV_FLOW_L4_NONE = 0,
- IBV_FLOW_L4_OTHER = 3,
- IBV_FLOW_L4_UDP = 5,
- IBV_FLOW_L4_TCP = 6
-};
-
-struct mlx4_cm_steering {
- struct list_head list;
- u64 reg_id;
- struct ib_flow_spec spec;
-};
-
-static int flow_spec_to_net_rule(struct ib_device *dev, struct ib_flow_spec *flow_spec,
- struct list_head *rule_list_h)
+static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
+ int domain,
+ enum mlx4_net_trans_promisc_mode flow_type,
+ u64 *reg_id)
{
- struct mlx4_spec_list *spec_l2, *spec_l3, *spec_l4;
- u64 mac_msk = cpu_to_be64(MLX4_MAC_MASK << 16);
-
- spec_l2 = kzalloc(sizeof *spec_l2, GFP_KERNEL);
- if (!spec_l2)
- return -ENOMEM;
+ int ret, i;
+ int size = 0;
+ void *ib_flow;
+ struct mlx4_ib_dev *mdev = to_mdev(qp->device);
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_net_trans_rule_hw_ctrl *ctrl;
+ size_t rule_size = sizeof(struct mlx4_net_trans_rule_hw_ctrl) +
+ (sizeof(struct _rule_hw) * flow_attr->num_of_specs);
+
+ static const u16 __mlx4_domain[] = {
+ [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
+ [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL,
+ [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS,
+ [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC,
+ };
- switch (flow_spec->type) {
- case IB_FLOW_ETH:
- spec_l2->id = MLX4_NET_TRANS_RULE_ID_ETH;
- memcpy(spec_l2->eth.dst_mac, flow_spec->l2_id.eth.mac, ETH_ALEN);
- memcpy(spec_l2->eth.dst_mac_msk, &mac_msk, ETH_ALEN);
- spec_l2->eth.ether_type = flow_spec->l2_id.eth.ethertype;
- if (flow_spec->l2_id.eth.vlan_present) {
- spec_l2->eth.vlan_id = flow_spec->l2_id.eth.vlan;
- spec_l2->eth.vlan_id_msk = cpu_to_be16(0x0fff);
- }
- break;
- case IB_FLOW_IB_UC:
- spec_l2->id = MLX4_NET_TRANS_RULE_ID_IB;
- if(flow_spec->l2_id.ib_uc.qpn) {
- spec_l2->ib.l3_qpn = cpu_to_be32(flow_spec->l2_id.ib_uc.qpn);
- spec_l2->ib.qpn_msk = cpu_to_be32(0xffffff);
+ if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
+ pr_err("Invalid priority value.\n");
+ return -EINVAL;
}
- break;
- case IB_FLOW_IB_MC_IPV4:
- case IB_FLOW_IB_MC_IPV6:
- spec_l2->id = MLX4_NET_TRANS_RULE_ID_IB;
- memcpy(spec_l2->ib.dst_gid, flow_spec->l2_id.ib_mc.mgid, 16);
- memset(spec_l2->ib.dst_gid_msk, 0xff, 16);
- break;
+ if (domain >= IB_FLOW_DOMAIN_NUM) {
+ pr_err("Invalid domain value.\n");
+ return -EINVAL;
}
+ if (map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
+ return -EINVAL;
+ mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ memset(mailbox->buf, 0, rule_size);
+ ctrl = mailbox->buf;
+
+ ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
+ flow_attr->priority);
+ ctrl->type = map_sw_to_hw_steering_mode(mdev->dev, flow_type);
+ ctrl->port = flow_attr->port;
+ ctrl->qpn = cpu_to_be32(qp->qp_num);
+
+ if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_ALLOW_LOOP_BACK)
+ ctrl->flags = (1 << 3);
+
+ ib_flow = flow_attr + 1;
+ size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
+ for (i = 0; i < flow_attr->num_of_specs; i++) {
+ ret = parse_flow_attr(mdev->dev, ib_flow, mailbox->buf + size);
+ if (ret < 0) {
+ mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+ return -EINVAL;
+ }
+ ib_flow += ((union ib_flow_spec *)ib_flow)->size;
+ size += ret;
+ }
- list_add_tail(&spec_l2->list, rule_list_h);
+ ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
+ MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (ret == -ENOMEM)
+ pr_err("mcg table is full. Fail to register network rule.\n");
+ else if (ret == -ENXIO)
+ pr_err("Device managed flow steering is disabled. Fail to register network rule.\n");
+ else if (ret)
+ pr_err("Invalid argumant. Fail to register network rule.\n");
+ mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+ return ret;
+}
- if (flow_spec->l2_id.eth.ethertype == cpu_to_be16(ETH_P_IP) ||
- flow_spec->type != IB_FLOW_ETH) {
- spec_l3 = kzalloc(sizeof *spec_l3, GFP_KERNEL);
- if (!spec_l3)
- return -ENOMEM;
+static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
+{
+ int err;
+ err = mlx4_cmd(dev, reg_id, 0, 0,
+ MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ pr_err("Fail to detach network rule. registration id = 0x%llx\n",
+ (unsigned long long)reg_id);
+ return err;
+}
- spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4;
- spec_l3->ipv4.src_ip = flow_spec->src_ip;
- if (flow_spec->type != IB_FLOW_IB_MC_IPV4 &&
- flow_spec->type != IB_FLOW_IB_MC_IPV6)
- spec_l3->ipv4.dst_ip = flow_spec->dst_ip;
+static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr,
+ int domain)
+{
+ int err = 0, i = 0;
+ struct mlx4_ib_flow *mflow;
+ enum mlx4_net_trans_promisc_mode type[2];
- if (spec_l3->ipv4.src_ip)
- spec_l3->ipv4.src_ip_msk = MLX4_BE_WORD_MASK;
- if (spec_l3->ipv4.dst_ip)
- spec_l3->ipv4.dst_ip_msk = MLX4_BE_WORD_MASK;
+ memset(type, 0, sizeof(type));
- list_add_tail(&spec_l3->list, rule_list_h);
+ mflow = kzalloc(sizeof(struct mlx4_ib_flow), GFP_KERNEL);
+ if (!mflow) {
+ err = -ENOMEM;
+ goto err_free;
}
- if (flow_spec->l4_protocol) {
- spec_l4 = kzalloc(sizeof(*spec_l4), GFP_KERNEL);
- if (!spec_l4)
- return -ENOMEM;
-
- spec_l4->tcp_udp.src_port = flow_spec->src_port;
- spec_l4->tcp_udp.dst_port = flow_spec->dst_port;
- if (spec_l4->tcp_udp.src_port)
- spec_l4->tcp_udp.src_port_msk =
- MLX4_BE_SHORT_MASK;
- if (spec_l4->tcp_udp.dst_port)
- spec_l4->tcp_udp.dst_port_msk =
- MLX4_BE_SHORT_MASK;
-
- switch (flow_spec->l4_protocol) {
- case IBV_FLOW_L4_UDP:
- spec_l4->id = MLX4_NET_TRANS_RULE_ID_UDP;
+ switch (flow_attr->type) {
+ case IB_FLOW_ATTR_NORMAL:
+ type[0] = MLX4_FS_REGULAR;
break;
- case IBV_FLOW_L4_TCP:
- spec_l4->id = MLX4_NET_TRANS_RULE_ID_TCP;
- break;
- default:
- dev_err(dev->dma_device,
- "Unsupported l4 protocol.\n");
- kfree(spec_l4);
- return -EPROTONOSUPPORT;
- }
- list_add_tail(&spec_l4->list, rule_list_h);
- }
- return 0;
-}
-
-static int __mlx4_ib_flow_attach(struct mlx4_ib_dev *mdev,
- struct mlx4_ib_qp *mqp,
- struct ib_flow_spec *flow_spec,
- int priority, int lock_qp)
-{
- u64 reg_id = 0;
- int err = 0;
- struct mlx4_cm_steering *cm_flow;
- struct mlx4_spec_list *spec, *tmp_spec;
- struct mlx4_net_trans_rule rule =
- { .queue_mode = MLX4_NET_TRANS_Q_FIFO,
- .exclusive = 0,
- };
+ case IB_FLOW_ATTR_ALL_DEFAULT:
+ type[0] = MLX4_FS_ALL_DEFAULT;
+ break;
- rule.promisc_mode = flow_spec->rule_type;
- rule.port = mqp->port;
- rule.qpn = mqp->mqp.qpn;
- INIT_LIST_HEAD(&rule.list);
+ case IB_FLOW_ATTR_MC_DEFAULT:
+ type[0] = MLX4_FS_MC_DEFAULT;
+ break;
- cm_flow = kmalloc(sizeof(*cm_flow), GFP_KERNEL);
- if (!cm_flow)
- return -ENOMEM;
+ case IB_FLOW_ATTR_SNIFFER:
+ type[0] = MLX4_FS_UC_SNIFFER;
+ type[1] = MLX4_FS_MC_SNIFFER;
+ break;
- if (rule.promisc_mode == MLX4_FS_REGULAR) {
- rule.allow_loopback = !flow_spec->block_mc_loopback;
- rule.priority = MLX4_DOMAIN_UVERBS | priority;
- err = flow_spec_to_net_rule(&mdev->ib_dev, flow_spec,
- &rule.list);
- if (err)
- goto free_list;
+ default:
+ err = -EINVAL;
+ goto err_free;
}
- err = mlx4_flow_attach(mdev->dev, &rule, &reg_id);
+ while (i < ARRAY_SIZE(type) && type[i]) {
+ err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
+ &mflow->reg_id[i]);
if (err)
- goto free_list;
-
- memcpy(&cm_flow->spec, flow_spec, sizeof(*flow_spec));
- cm_flow->reg_id = reg_id;
+ goto err_free;
+ i++;
+ }
- if (lock_qp)
- mutex_lock(&mqp->mutex);
- list_add(&cm_flow->list, &mqp->rules_list);
- if (lock_qp)
- mutex_unlock(&mqp->mutex);
+ return &mflow->ibflow;
-free_list:
- list_for_each_entry_safe(spec, tmp_spec, &rule.list, list) {
- list_del(&spec->list);
- kfree(spec);
- }
- if (err) {
- kfree(cm_flow);
- dev_err(mdev->ib_dev.dma_device,
- "Fail to attach flow steering rule\n");
- }
- return err;
+err_free:
+ kfree(mflow);
+ return ERR_PTR(err);
}
-static int __mlx4_ib_flow_detach(struct mlx4_ib_dev *mdev,
- struct mlx4_ib_qp *mqp,
- struct ib_flow_spec *spec, int priority,
- int lock_qp)
+static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
{
- struct mlx4_cm_steering *cm_flow;
- int ret;
+ int err, ret = 0;
+ int i = 0;
+ struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
+ struct mlx4_ib_flow *mflow = to_mflow(flow_id);
- if (lock_qp)
- mutex_lock(&mqp->mutex);
- list_for_each_entry(cm_flow, &mqp->rules_list, list) {
- if (!memcmp(&cm_flow->spec, spec, sizeof(*spec))) {
- list_del(&cm_flow->list);
- break;
- }
- }
- if (lock_qp)
- mutex_unlock(&mqp->mutex);
-
- if (&cm_flow->list == &mqp->rules_list) {
- dev_err(mdev->ib_dev.dma_device, "Couldn't find reg_id for flow spec. "
- "Steering rule is left attached\n");
- return -EINVAL;
+ while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i]) {
+ err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i]);
+ if (err)
+ ret = err;
+ i++;
}
- ret = mlx4_flow_detach(mdev->dev, cm_flow->reg_id);
-
- kfree(cm_flow);
+ kfree(mflow);
return ret;
}
-static int mlx4_ib_flow_attach(struct ib_qp *qp, struct ib_flow_spec *flow_spec,
- int priority)
-{
- return __mlx4_ib_flow_attach(to_mdev(qp->device), to_mqp(qp),
- flow_spec, priority, 1);
-}
-
-static int mlx4_ib_flow_detach(struct ib_qp *qp, struct ib_flow_spec *spec,
- int priority)
-{
- return __mlx4_ib_flow_detach(to_mdev(qp->device), to_mqp(qp),
- spec, priority, 1);
-}
-
static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
{
struct mlx4_ib_gid_entry *ge;
@@ -1194,40 +1267,14 @@ static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
return ret;
}
-static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+
+static int del_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
{
- int err;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
- u8 mac[6];
- struct net_device *ndev;
struct mlx4_ib_gid_entry *ge;
- u64 reg_id = 0;
-
- if (mdev->dev->caps.steering_mode ==
- MLX4_STEERING_MODE_DEVICE_MANAGED) {
- struct mlx4_ib_steering *ib_steering;
-
- mutex_lock(&mqp->mutex);
- list_for_each_entry(ib_steering, &mqp->steering_rules, list) {
- if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) {
- list_del(&ib_steering->list);
- break;
- }
- }
- mutex_unlock(&mqp->mutex);
- if (&ib_steering->list == &mqp->steering_rules) {
- pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n");
- return -EINVAL;
- }
- reg_id = ib_steering->reg_id;
- kfree(ib_steering);
- }
-
- err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
- MLX4_PROT_IB_IPV6, reg_id);
- if (err)
- return err;
+ struct net_device *ndev;
+ u8 mac[6];
mutex_lock(&mqp->mutex);
ge = find_gid_entry(mqp, gid->raw);
@@ -1250,8 +1297,174 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
pr_warn("could not find mgid entry\n");
mutex_unlock(&mqp->mutex);
+ return ge != 0 ? 0 : -EINVAL;
+}
+
+static int _mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid,
+ int count)
+{
+ int err;
+ struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
+ struct mlx4_ib_qp *mqp = to_mqp(ibqp);
+ u64 reg_id = 0;
+ int record_err = 0;
+
+ if (mdev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ struct mlx4_ib_steering *ib_steering;
+ struct mlx4_ib_steering *tmp;
+ LIST_HEAD(temp);
+
+ mutex_lock(&mqp->mutex);
+ list_for_each_entry_safe(ib_steering, tmp, &mqp->steering_rules,
+ list) {
+ if (memcmp(ib_steering->gid.raw, gid->raw, 16))
+ continue;
+
+ if (--count < 0)
+ break;
+
+ list_del(&ib_steering->list);
+ list_add(&ib_steering->list, &temp);
+ }
+ mutex_unlock(&mqp->mutex);
+ list_for_each_entry_safe(ib_steering, tmp, &temp,
+ list) {
+ reg_id = ib_steering->reg_id;
+
+ err = mlx4_multicast_detach(mdev->dev, &mqp->mqp,
+ gid->raw,
+ (ibqp->qp_type == IB_QPT_RAW_PACKET) ?
+ MLX4_PROT_ETH : MLX4_PROT_IB_IPV6,
+ reg_id);
+ if (err) {
+ record_err = record_err ?: err;
+ continue;
+ }
+
+ err = del_gid_entry(ibqp, gid);
+ if (err) {
+ record_err = record_err ?: err;
+ continue;
+ }
+
+ list_del(&ib_steering->list);
+ kfree(ib_steering);
+ }
+ mutex_lock(&mqp->mutex);
+ list_for_each_entry(ib_steering, &temp, list) {
+ list_add(&ib_steering->list, &mqp->steering_rules);
+ }
+ mutex_unlock(&mqp->mutex);
+ if (count) {
+ pr_warn("Couldn't release all reg_ids for mgid. Steering rule is left attached\n");
+ return -EINVAL;
+ }
+
+ } else {
+ if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_B0 &&
+ ibqp->qp_type == IB_QPT_RAW_PACKET)
+ gid->raw[5] = mqp->port;
+
+ err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
+ (ibqp->qp_type == IB_QPT_RAW_PACKET) ?
+ MLX4_PROT_ETH : MLX4_PROT_IB_IPV6,
+ reg_id);
+ if (err)
+ return err;
+
+ err = del_gid_entry(ibqp, gid);
+
+ if (err)
+ return err;
+ }
+
+ return record_err;
+}
+
+static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
+ int count = (mdev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) ?
+ mdev->dev->caps.num_ports : 1;
+
+ return _mlx4_ib_mcg_detach(ibqp, gid, lid, count);
+}
+
+static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ int err = -ENODEV;
+ struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
+ struct mlx4_ib_qp *mqp = to_mqp(ibqp);
+ DECLARE_BITMAP(ports, MLX4_MAX_PORTS);
+ int i = 0;
+
+ if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_B0 &&
+ ibqp->qp_type == IB_QPT_RAW_PACKET)
+ gid->raw[5] = mqp->port;
+
+ if (mdev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ bitmap_fill(ports, mdev->dev->caps.num_ports);
+ } else {
+ if (mqp->port <= mdev->dev->caps.num_ports) {
+ bitmap_zero(ports, mdev->dev->caps.num_ports);
+ set_bit(0, ports);
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ for (; i < mdev->dev->caps.num_ports; i++) {
+ u64 reg_id;
+ struct mlx4_ib_steering *ib_steering = NULL;
+ if (!test_bit(i, ports))
+ continue;
+ if (mdev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
+ if (!ib_steering)
+ goto err_add;
+ }
+
+ err = mlx4_multicast_attach(mdev->dev, &mqp->mqp,
+ gid->raw, i + 1,
+ !!(mqp->flags &
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
+ (ibqp->qp_type == IB_QPT_RAW_PACKET) ?
+ MLX4_PROT_ETH : MLX4_PROT_IB_IPV6,
+ &reg_id);
+ if (err) {
+ kfree(ib_steering);
+ goto err_add;
+ }
+
+ err = add_gid_entry(ibqp, gid);
+ if (err) {
+ mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
+ MLX4_PROT_IB_IPV6, reg_id);
+ kfree(ib_steering);
+ goto err_add;
+ }
+
+ if (ib_steering) {
+ memcpy(ib_steering->gid.raw, gid->raw, 16);
+ mutex_lock(&mqp->mutex);
+ list_add(&ib_steering->list, &mqp->steering_rules);
+ mutex_unlock(&mqp->mutex);
+ ib_steering->reg_id = reg_id;
+ }
+ }
+
return 0;
+
+err_add:
+ if (i > 0)
+ _mlx4_ib_mcg_detach(ibqp, gid, lid, i);
+
+ return err;
}
static int init_node_data(struct mlx4_ib_dev *dev)
@@ -1327,27 +1540,39 @@ static ssize_t show_board(struct device *device, struct device_attribute *attr,
dev->dev->board_id);
}
+static ssize_t show_vsd(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_ib_dev *dev =
+ container_of(device, struct mlx4_ib_dev, ib_dev.dev);
+ ssize_t len = MLX4_VSD_LEN;
+
+ if (dev->dev->vsd_vendor_id == PCI_VENDOR_ID_MELLANOX)
+ len = sprintf(buf, "%.*s\n", MLX4_VSD_LEN, dev->dev->vsd);
+ else
+ memcpy(buf, dev->dev->vsd, MLX4_VSD_LEN);
+
+ return len;
+}
+
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+static DEVICE_ATTR(vsd, S_IRUGO, show_vsd, NULL);
static struct device_attribute *mlx4_class_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_fw_ver,
&dev_attr_hca_type,
- &dev_attr_board_id
+ &dev_attr_board_id,
+ &dev_attr_vsd
};
-static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev)
+static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev, u8 port)
{
-#ifdef __linux__
- memcpy(eui, dev->dev_addr, 3);
- memcpy(eui + 5, dev->dev_addr + 3, 3);
-#else
memcpy(eui, IF_LLADDR(dev), 3);
memcpy(eui + 5, IF_LLADDR(dev) + 3, 3);
-#endif
if (vlan_id < 0x1000) {
eui[3] = vlan_id >> 8;
eui[4] = vlan_id & 0xff;
@@ -1366,191 +1591,352 @@ static void update_gids_task(struct work_struct *work)
int err;
struct mlx4_dev *dev = gw->dev->dev;
+
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) {
pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox));
- return;
+ goto free;
}
gids = mailbox->buf;
memcpy(gids, gw->gids, sizeof gw->gids);
- err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
+ if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, gw->port) ==
+ IB_LINK_LAYER_ETHERNET) {
+ err = mlx4_cmd(dev, mailbox->dma,
+ MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
MLX4_CMD_WRAPPED);
+
if (err)
pr_warn("set port command failed\n");
- else {
- memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids);
- mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE);
+ else
+ mlx4_ib_dispatch_event(gw->dev, gw->port,
+ IB_EVENT_GID_CHANGE);
+ }
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+free:
+ kfree(gw);
+}
+
+static void reset_gids_task(struct work_struct *work)
+{
+ struct update_gid_work *gw =
+ container_of(work, struct update_gid_work, work);
+ struct mlx4_cmd_mailbox *mailbox;
+ union ib_gid *gids;
+ int err;
+ struct mlx4_dev *dev = gw->dev->dev;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ pr_warn("reset gid table failed\n");
+ goto free;
+ }
+
+ gids = mailbox->buf;
+ memcpy(gids, gw->gids, sizeof(gw->gids));
+
+ if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, 1) ==
+ IB_LINK_LAYER_ETHERNET &&
+ dev->caps.num_ports > 0) {
+ err = mlx4_cmd(dev, mailbox->dma,
+ MLX4_SET_PORT_GID_TABLE << 8 | 1,
+ 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ pr_warn("set port 1 command failed\n");
+ }
+
+ if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, 2) ==
+ IB_LINK_LAYER_ETHERNET &&
+ dev->caps.num_ports > 1) {
+ err = mlx4_cmd(dev, mailbox->dma,
+ MLX4_SET_PORT_GID_TABLE << 8 | 2,
+ 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ pr_warn("set port 2 command failed\n");
}
mlx4_free_cmd_mailbox(dev, mailbox);
+free:
kfree(gw);
}
-static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear)
+static int update_gid_table(struct mlx4_ib_dev *dev, int port,
+ union ib_gid *gid, int clear, int default_gid)
{
- struct net_device *ndev = dev->iboe.netdevs[port - 1];
struct update_gid_work *work;
- struct net_device *tmp;
int i;
- u8 *hits;
- union ib_gid gid;
- int index_free;
- int found;
int need_update = 0;
+ int free = -1;
+ int found = -1;
int max_gids;
- u16 vid;
-
- work = kzalloc(sizeof *work, GFP_ATOMIC);
- if (!work)
- return -ENOMEM;
-
- hits = kzalloc(128, GFP_ATOMIC);
- if (!hits) {
- kfree(work);
- return -ENOMEM;
- }
+ int start_index = !default_gid;
max_gids = dev->dev->caps.gid_table_len[port];
-
-#ifdef __linux__
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, tmp) {
-#else
- IFNET_RLOCK();
- TAILQ_FOREACH(tmp, &V_ifnet, if_link) {
-#endif
- if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) {
- gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
- vid = rdma_vlan_dev_vlan_id(tmp);
- mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev);
- found = 0;
- index_free = -1;
- for (i = 0; i < max_gids; ++i) {
- if (index_free < 0 &&
- !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
- index_free = i;
- if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) {
- hits[i] = 1;
- found = 1;
+ for (i = start_index; i < max_gids; ++i) {
+ if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid,
+ sizeof(*gid)))
+ found = i;
+
+ if (clear) {
+ if (found >= 0) {
+ need_update = 1;
+ dev->iboe.gid_table[port - 1][found] = zgid;
break;
}
- }
+ } else {
+ if (found >= 0)
+ break;
- if (!found) {
- if (tmp == ndev &&
- (memcmp(&dev->iboe.gid_table[port - 1][0],
- &gid, sizeof gid) ||
- !memcmp(&dev->iboe.gid_table[port - 1][0],
- &zgid, sizeof gid))) {
- dev->iboe.gid_table[port - 1][0] = gid;
- ++need_update;
- hits[0] = 1;
- } else if (index_free >= 0) {
- dev->iboe.gid_table[port - 1][index_free] = gid;
- hits[index_free] = 1;
- ++need_update;
+ if (free < 0 &&
+ !memcmp(&dev->iboe.gid_table[port - 1][i],
+ &zgid, sizeof(*gid)))
+ free = i;
}
}
+
+ if (found == -1 && !clear && free < 0) {
+ pr_err("GID table of port %d is full. Can't add "GID_PRINT_FMT"\n",
+ port, GID_PRINT_ARGS(gid));
+ return -ENOMEM;
}
-#ifdef __linux__
+ if (found == -1 && clear) {
+ pr_err(GID_PRINT_FMT" is not in GID table of port %d\n", GID_PRINT_ARGS(gid), port);
+ return -EINVAL;
}
- rcu_read_unlock();
-#else
+ if (found == -1 && !clear && free >= 0) {
+ dev->iboe.gid_table[port - 1][free] = *gid;
+ need_update = 1;
}
- IFNET_RUNLOCK();
-#endif
- for (i = 0; i < max_gids; ++i)
- if (!hits[i]) {
- if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
- ++need_update;
- dev->iboe.gid_table[port - 1][i] = zgid;
- }
+ if (!need_update)
+ return 0;
+
+ work = kzalloc(sizeof *work, GFP_ATOMIC);
+ if (!work)
+ return -ENOMEM;
- if (need_update) {
- memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids);
+ memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof(work->gids));
INIT_WORK(&work->work, update_gids_task);
work->port = port;
work->dev = dev;
queue_work(wq, &work->work);
- } else
- kfree(work);
- kfree(hits);
return 0;
}
-static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event)
+static int reset_gid_table(struct mlx4_ib_dev *dev)
{
- switch (event) {
- case NETDEV_UP:
-#ifdef __linux__
- case NETDEV_CHANGEADDR:
+ struct update_gid_work *work;
+
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return -ENOMEM;
+
+ memset(dev->iboe.gid_table, 0, sizeof(dev->iboe.gid_table));
+ memset(work->gids, 0, sizeof(work->gids));
+ INIT_WORK(&work->work, reset_gids_task);
+ work->dev = dev;
+ queue_work(wq, &work->work);
+ return 0;
+}
+
+/* XXX BOND Related - stub (no support for these flags in FBSD)*/
+static inline int netif_is_bond_master(struct net_device *dev)
+{
+#if 0
+ return (dev->flags & IFF_MASTER) && (dev->priv_flags & IFF_BONDING);
#endif
- update_ipv6_gids(dev, port, 0);
+ return 0;
+}
+
+static void mlx4_make_default_gid(struct net_device *dev, union ib_gid *gid, u8 port)
+{
+ gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+ mlx4_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev, port);
+}
+
+static u8 mlx4_ib_get_dev_port(struct net_device *dev, struct mlx4_ib_dev *ibdev)
+{
+ u8 port = 0;
+ struct mlx4_ib_iboe *iboe;
+ struct net_device *real_dev = rdma_vlan_dev_real_dev(dev) ?
+ rdma_vlan_dev_real_dev(dev) : dev;
+
+ iboe = &ibdev->iboe;
+
+ for (port = 1; port <= MLX4_MAX_PORTS; ++port)
+ if ((netif_is_bond_master(real_dev) && (real_dev == iboe->masters[port - 1])) ||
+ (!netif_is_bond_master(real_dev) && (real_dev == iboe->netdevs[port - 1])))
break;
- case NETDEV_DOWN:
- update_ipv6_gids(dev, port, 1);
- dev->iboe.netdevs[port - 1] = NULL;
+ return port > MLX4_MAX_PORTS ? 0 : port;
+}
+
+static void mlx4_ib_get_dev_addr(struct net_device *dev, struct mlx4_ib_dev *ibdev, u8 port)
+{
+ struct ifaddr *ifa;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ struct inet6_dev *in6_dev;
+ union ib_gid *pgid;
+ struct inet6_ifaddr *ifp;
+#endif
+ union ib_gid gid;
+
+
+ if ((port == 0) || (port > MLX4_MAX_PORTS))
+ return;
+
+ /* IPv4 gids */
+ TAILQ_FOREACH(ifa, &dev->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET){
+ ipv6_addr_set_v4mapped(
+ ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr.s_addr,
+ (struct in6_addr *)&gid);
+ update_gid_table(ibdev, port, &gid, 0, 0);
+ }
+
+ }
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ /* IPv6 gids */
+ in6_dev = in6_dev_get(dev);
+ if (in6_dev) {
+ read_lock_bh(&in6_dev->lock);
+ list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
+ pgid = (union ib_gid *)&ifp->addr;
+ update_gid_table(ibdev, port, pgid, 0, 0);
}
+ read_unlock_bh(&in6_dev->lock);
+ in6_dev_put(in6_dev);
+ }
+#endif
}
-static void netdev_added(struct mlx4_ib_dev *dev, int port)
+static void mlx4_set_default_gid(struct mlx4_ib_dev *ibdev,
+ struct net_device *dev, u8 port)
{
- update_ipv6_gids(dev, port, 0);
+ union ib_gid gid;
+ mlx4_make_default_gid(dev, &gid, port);
+ update_gid_table(ibdev, port, &gid, 0, 1);
}
-static void netdev_removed(struct mlx4_ib_dev *dev, int port)
+static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev)
{
- update_ipv6_gids(dev, port, 1);
+ struct net_device *dev;
+
+ if (reset_gid_table(ibdev))
+ return -1;
+
+ IFNET_RLOCK_NOSLEEP();
+ TAILQ_FOREACH(dev, &V_ifnet, if_link) {
+ u8 port = mlx4_ib_get_dev_port(dev, ibdev);
+ if (port) {
+ if (!rdma_vlan_dev_real_dev(dev) &&
+ !netif_is_bond_master(dev))
+ mlx4_set_default_gid(ibdev, dev, port);
+ mlx4_ib_get_dev_addr(dev, ibdev, port);
+ }
+ }
+
+ IFNET_RUNLOCK_NOSLEEP();
+
+ return 0;
}
-static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event,
- void *ptr)
+static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
+ struct net_device *dev, unsigned long event)
{
- struct net_device *dev = ptr;
- struct mlx4_ib_dev *ibdev;
- struct net_device *oldnd;
struct mlx4_ib_iboe *iboe;
int port;
+ int init = 0;
+ unsigned long flags;
-#ifdef __linux__
- if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
-#endif
-
- ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
iboe = &ibdev->iboe;
- spin_lock(&iboe->lock);
+ spin_lock_irqsave(&iboe->lock, flags);
mlx4_foreach_ib_transport_port(port, ibdev->dev) {
- oldnd = iboe->netdevs[port - 1];
+ struct net_device *old_netdev = iboe->netdevs[port - 1];
+/* XXX BOND related */
+#if 0
+ struct net_device *old_master = iboe->masters[port - 1];
+#endif
+ iboe->masters[port - 1] = NULL;
iboe->netdevs[port - 1] =
mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
- if (oldnd != iboe->netdevs[port - 1]) {
- if (iboe->netdevs[port - 1])
- netdev_added(ibdev, port);
- else
- netdev_removed(ibdev, port);
- }
+
+
+ if (old_netdev != iboe->netdevs[port - 1])
+ init = 1;
+ if (dev == iboe->netdevs[port - 1] &&
+ event == NETDEV_CHANGEADDR)
+ init = 1;
+/* XXX BOND related */
+#if 0
+ if (iboe->netdevs[port - 1] && netif_is_bond_slave(iboe->netdevs[port - 1]))
+ iboe->masters[port - 1] = iboe->netdevs[port - 1]->master;
+
+ /* if bonding is used it is possible that we add it to masters only after
+ IP address is assigned to the net bonding interface */
+ if (old_master != iboe->masters[port - 1])
+ init = 1;
+#endif
}
- if (dev == iboe->netdevs[0] ||
- (iboe->netdevs[0] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[0]))
- handle_en_event(ibdev, 1, event);
- else if (dev == iboe->netdevs[1]
- || (iboe->netdevs[1] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[1]))
- handle_en_event(ibdev, 2, event);
+ spin_unlock_irqrestore(&iboe->lock, flags);
+
+ if (init)
+ if (mlx4_ib_init_gid_table(ibdev))
+ pr_warn("Fail to reset gid table\n");
+}
+
+static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = ptr;
+ struct mlx4_ib_dev *ibdev;
- spin_unlock(&iboe->lock);
+ ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
+
+ mlx4_ib_scan_netdevs(ibdev, dev, event);
return NOTIFY_DONE;
}
+/* This function initializes the gid table only if the event_netdev real device is an iboe
+ * device, will be invoked by the inet/inet6 events */
+static int mlx4_ib_inet_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct net_device *event_netdev = ptr;
+ struct mlx4_ib_dev *ibdev;
+ struct mlx4_ib_iboe *ibdev_iboe;
+ int port = 0;
+
+ ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet);
+
+ struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ?
+ rdma_vlan_dev_real_dev(event_netdev) :
+ event_netdev;
+
+ ibdev_iboe = &ibdev->iboe;
+
+ port = mlx4_ib_get_dev_port(real_dev, ibdev);
+
+ /* Perform init_gid_table if the event real_dev is the net_device which represents this port,
+ * otherwise this event is not related and would be ignored.*/
+ if(port && (real_dev == ibdev_iboe->netdevs[port - 1]))
+ if (mlx4_ib_init_gid_table(ibdev))
+ pr_warn("Fail to reset gid table\n");
+
+ return NOTIFY_DONE;
+}
+
+
static void init_pkeys(struct mlx4_ib_dev *ibdev)
{
int port;
@@ -1615,7 +2001,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
eq = 0;
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) {
for (j = 0; j < eq_per_port; j++) {
- snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%d:%d:%d:%d", i, j,
+ sprintf(name, "mlx4-ib-%d-%d@%d:%d:%d:%d", i, j,
pci_get_domain(dev->pdev->dev.bsddev),
pci_get_bus(dev->pdev->dev.bsddev),
PCI_SLOT(dev->pdev->devfn),
@@ -1779,89 +2165,61 @@ static struct attribute_group diag_counters_group = {
.attrs = diag_rprt_attrs
};
-#ifdef __linux__
-static int mlx4_ib_proc_init(void)
+static void init_dev_assign(void)
{
- /* Creating procfs directories /proc/drivers/mlx4_ib/ &&
- /proc/drivers/mlx4_ib/mrs for further use by the driver.
- */
- int err;
+ int i = 1;
- mlx4_ib_driver_dir_entry = proc_mkdir(MLX4_IB_DRIVER_PROC_DIR_NAME,
- NULL);
- if (!mlx4_ib_driver_dir_entry) {
- pr_err("mlx4_ib_proc_init has failed for %s\n",
- MLX4_IB_DRIVER_PROC_DIR_NAME);
- err = -ENODEV;
- goto error;
+ spin_lock_init(&dev_num_str_lock);
+ if (mlx4_fill_dbdf2val_tbl(&dev_assign_str))
+ return;
+ dev_num_str_bitmap =
+ kmalloc(BITS_TO_LONGS(MAX_NUM_STR_BITMAP) * sizeof(long),
+ GFP_KERNEL);
+ if (!dev_num_str_bitmap) {
+ pr_warn("bitmap alloc failed -- cannot apply dev_assign_str parameter\n");
+ return;
}
-
- mlx4_mrs_dir_entry = proc_mkdir(MLX4_IB_MRS_PROC_DIR_NAME,
- mlx4_ib_driver_dir_entry);
- if (!mlx4_mrs_dir_entry) {
- pr_err("mlx4_ib_proc_init has failed for %s\n",
- MLX4_IB_MRS_PROC_DIR_NAME);
- err = -ENODEV;
- goto remove_entry;
+ bitmap_zero(dev_num_str_bitmap, MAX_NUM_STR_BITMAP);
+ while ((i < MLX4_DEVS_TBL_SIZE) && (dev_assign_str.tbl[i].dbdf !=
+ MLX4_ENDOF_TBL)) {
+ if (bitmap_allocate_region(dev_num_str_bitmap,
+ dev_assign_str.tbl[i].val[0], 0))
+ goto err;
+ i++;
}
+ dr_active = 1;
+ return;
- return 0;
-
-remove_entry:
- remove_proc_entry(MLX4_IB_DRIVER_PROC_DIR_NAME,
- NULL);
-error:
- return err;
+err:
+ kfree(dev_num_str_bitmap);
+ dev_num_str_bitmap = NULL;
+ pr_warn("mlx4_ib: The value of 'dev_assign_str' parameter "
+ "is incorrect. The parameter value is discarded!");
}
-#endif
-static void init_dev_assign(void)
+static int mlx4_ib_dev_idx(struct mlx4_dev *dev)
{
- int bus, slot, fn, ib_idx;
- char *p = dev_assign_str, *t;
- char curr_val[32] = {0};
- int ret;
- int j, i = 0;
-
- memset(dr, 0, sizeof dr);
-
- if (dev_assign_str[0] == 0)
- return;
-
- while (strlen(p)) {
- ret = sscanf(p, "%02x:%02x.%x-%x", &bus, &slot, &fn, &ib_idx);
- if (ret != 4 || ib_idx < 0)
- goto err;
-
- for (j = 0; j < i; j++)
- if (dr[j].nr == ib_idx)
- goto err;
-
- dr[i].bus = bus;
- dr[i].dev = slot;
- dr[i].func = fn;
- dr[i].nr = ib_idx;
-
- t = strchr(p, ',');
- sprintf(curr_val, "%02x:%02x.%x-%x", bus, slot, fn, ib_idx);
- if ((!t) && strlen(p) == strlen(curr_val))
- return;
-
- if (!t || (t + 1) >= dev_assign_str + sizeof dev_assign_str)
- goto err;
-
- ++i;
- if (i >= MAX_DR)
- goto err;
-
- p = t + 1;
+ int i, val;
+
+ if (!dr_active)
+ return -1;
+ if (!dev)
+ return -1;
+ if (mlx4_get_val(dev_assign_str.tbl, dev->pdev, 0, &val))
+ return -1;
+
+ if (val != DEFAULT_TBL_VAL) {
+ dev->flags |= MLX4_FLAG_DEV_NUM_STR;
+ return val;
}
- return;
-err:
- memset(dr, 0, sizeof dr);
- printk(KERN_WARNING "mlx4_ib: The value of 'dev_assign_str' parameter "
- "is incorrect. The parameter value is discarded!");
+ spin_lock(&dev_num_str_lock);
+ i = bitmap_find_free_region(dev_num_str_bitmap, MAX_NUM_STR_BITMAP, 0);
+ spin_unlock(&dev_num_str_lock);
+ if (i >= 0)
+ return i;
+
+ return -1;
}
static void *mlx4_ib_add(struct mlx4_dev *dev)
@@ -1871,8 +2229,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
int i, j;
int err;
struct mlx4_ib_iboe *iboe;
+ int dev_idx;
- printk(KERN_INFO "%s", mlx4_ib_version);
+ pr_info_once("%s", mlx4_ib_version);
mlx4_foreach_ib_transport_port(i, dev)
num_ports++;
@@ -1905,7 +2264,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->dev = dev;
+ dev_idx = mlx4_ib_dev_idx(dev);
+ if (dev_idx >= 0)
+ sprintf(ibdev->ib_dev.name, "mlx4_%d", dev_idx);
+ else
strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
+
ibdev->ib_dev.owner = THIS_MODULE;
ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
@@ -1942,10 +2306,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
(1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
- (1ull << IB_USER_VERBS_CMD_OPEN_QP) |
- (1ull << IB_USER_VERBS_CMD_ATTACH_FLOW) |
- (1ull << IB_USER_VERBS_CMD_DETACH_FLOW) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
+ (1ull << IB_USER_VERBS_CMD_OPEN_QP);
ibdev->ib_dev.query_device = mlx4_ib_query_device;
ibdev->ib_dev.query_port = mlx4_ib_query_port;
@@ -1957,7 +2318,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext;
ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext;
ibdev->ib_dev.mmap = mlx4_ib_mmap;
-#ifdef __linux__
+/* XXX FBSD has no support for get_unmapped_area function */
+#if 0
ibdev->ib_dev.get_unmapped_area = mlx4_ib_get_unmapped_area;
#endif
ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd;
@@ -1990,9 +2352,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list;
ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
- ibdev->ib_dev.attach_flow = mlx4_ib_flow_attach;
- ibdev->ib_dev.detach_flow = mlx4_ib_flow_detach;
ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
+ ibdev->ib_dev.ioctl = mlx4_ib_ioctl;
+ ibdev->ib_dev.query_values = mlx4_ib_query_values;
if (!mlx4_is_slave(ibdev->dev)) {
ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
@@ -2001,6 +2363,16 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
}
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW) {
+ ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
+ ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw;
+ ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
+
+ ibdev->ib_dev.uverbs_cmd_mask |=
+ (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
+ }
+
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
@@ -2009,6 +2381,29 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
+ /*
+ * Set experimental data
+ */
+ ibdev->ib_dev.uverbs_exp_cmd_mask =
+ (1ull << IB_USER_VERBS_EXP_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_EXP_CMD_MODIFY_CQ) |
+ (1ull << IB_USER_VERBS_EXP_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_EXP_CMD_CREATE_CQ);
+ ibdev->ib_dev.exp_create_qp = mlx4_ib_exp_create_qp;
+ ibdev->ib_dev.exp_query_device = mlx4_ib_exp_query_device;
+ if (check_flow_steering_support(dev)) {
+ ibdev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
+ ibdev->ib_dev.create_flow = mlx4_ib_create_flow;
+ ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow;
+ } else {
+ pr_debug("Device managed flow steering is unavailable for this configuration.\n");
+ }
+ /*
+ * End of experimental data
+ */
+
mlx4_ib_alloc_eqs(dev, ibdev);
spin_lock_init(&iboe->lock);
@@ -2019,18 +2414,29 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
for (i = 0; i < ibdev->num_ports; ++i) {
if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
IB_LINK_LAYER_ETHERNET) {
- err = mlx4_counter_alloc(ibdev->dev, i + 1, &ibdev->counters[i]);
- if (err)
- ibdev->counters[i] = -1;
- } else
- ibdev->counters[i] = -1;
+ if (mlx4_is_slave(dev)) {
+ ibdev->counters[i].status = mlx4_counter_alloc(ibdev->dev,
+ i + 1,
+ &ibdev->counters[i].counter_index);
+ } else {/* allocating the PF IB default counter indices reserved in mlx4_init_counters_table */
+ ibdev->counters[i].counter_index = ((i + 1) << 1) - 1;
+ ibdev->counters[i].status = 0;
+ }
+
+ dev_info(&dev->pdev->dev,
+ "%s: allocated counter index %d for port %d\n",
+ __func__, ibdev->counters[i].counter_index, i+1);
+ } else {
+ ibdev->counters[i].counter_index = MLX4_SINK_COUNTER_INDEX;
+ ibdev->counters[i].status = -ENOSPC;
+ }
}
spin_lock_init(&ibdev->sm_lock);
mutex_init(&ibdev->cap_mask_mutex);
if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED &&
- !mlx4_is_slave(dev)) {
+ !mlx4_is_mfunc(dev)) {
ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
MLX4_IB_UC_STEER_QPN_ALIGN, &ibdev->steer_qpn_base, 0);
@@ -2063,20 +2469,32 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_init_sriov(ibdev))
goto err_mad;
- if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) {
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) {
+ if (!iboe->nb.notifier_call) {
iboe->nb.notifier_call = mlx4_ib_netdev_event;
err = register_netdevice_notifier(&iboe->nb);
- if (err)
- goto err_sriov;
+ if (err) {
+ iboe->nb.notifier_call = NULL;
+ goto err_notify;
+ }
+ }
+ if (!iboe->nb_inet.notifier_call) {
+ iboe->nb_inet.notifier_call = mlx4_ib_inet_event;
+ err = register_inetaddr_notifier(&iboe->nb_inet);
+ if (err) {
+ iboe->nb_inet.notifier_call = NULL;
+ goto err_notify;
+ }
+ }
+ mlx4_ib_scan_netdevs(ibdev, NULL, 0);
}
-
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
if (device_create_file(&ibdev->ib_dev.dev,
mlx4_class_attributes[j]))
- goto err_notif;
+ goto err_notify;
}
if (sysfs_create_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group))
- goto err_notif;
+ goto err_notify;
ibdev->ib_active = true;
@@ -2094,12 +2512,24 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
}
return ibdev;
-err_notif:
+err_notify:
+ for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
+ device_remove_file(&ibdev->ib_dev.dev,
+ mlx4_class_attributes[j]);
+ }
+
+ if (ibdev->iboe.nb.notifier_call) {
if (unregister_netdevice_notifier(&ibdev->iboe.nb))
pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb.notifier_call = NULL;
+ }
+ if (ibdev->iboe.nb_inet.notifier_call) {
+ if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb_inet.notifier_call = NULL;
+ }
flush_workqueue(wq);
-err_sriov:
mlx4_ib_close_sriov(ibdev);
err_mad:
@@ -2116,9 +2546,14 @@ err_steer_qp_release:
mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
ibdev->steer_qpn_count);
err_counter:
- for (; i; --i)
- if (ibdev->counters[i - 1] != -1)
- mlx4_counter_free(ibdev->dev, i, ibdev->counters[i - 1]);
+ for (; i; --i) {
+ if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i) ==
+ IB_LINK_LAYER_ETHERNET) {
+ mlx4_counter_free(ibdev->dev,
+ i,
+ ibdev->counters[i - 1].counter_index);
+ }
+ }
err_map:
iounmap(ibdev->priv_uar.map);
@@ -2167,30 +2602,71 @@ void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
int is_attach)
{
- struct ib_flow_spec spec = {
- .type = IB_FLOW_IB_UC,
- .l2_id.ib_uc.qpn = mqp->ibqp.qp_num,
- };
-
- return is_attach ?
- __mlx4_ib_flow_attach(mdev, mqp, &spec, MLX4_DOMAIN_NIC, 0)
- : __mlx4_ib_flow_detach(mdev, mqp, &spec, MLX4_DOMAIN_NIC, 0);
+ int err;
+ size_t flow_size;
+ struct ib_flow_attr *flow = NULL;
+ struct ib_flow_spec_ib *ib_spec;
+
+ if (is_attach) {
+ flow_size = sizeof(struct ib_flow_attr) +
+ sizeof(struct ib_flow_spec_ib);
+ flow = kzalloc(flow_size, GFP_KERNEL);
+ if (!flow)
+ return -ENOMEM;
+ flow->port = mqp->port;
+ flow->num_of_specs = 1;
+ flow->size = flow_size;
+ ib_spec = (struct ib_flow_spec_ib *)(flow + 1);
+ ib_spec->type = IB_FLOW_SPEC_IB;
+ ib_spec->size = sizeof(struct ib_flow_spec_ib);
+ ib_spec->val.l3_type_qpn = mqp->ibqp.qp_num;
+ ib_spec->mask.l3_type_qpn = MLX4_IB_FLOW_QPN_MASK;
+
+ err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
+ IB_FLOW_DOMAIN_NIC,
+ MLX4_FS_REGULAR,
+ &mqp->reg_id);
+ } else {
+ err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
+ }
+ kfree(flow);
+ return err;
}
static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
{
struct mlx4_ib_dev *ibdev = ibdev_ptr;
- int p,j;
+ int p, j;
+ int dev_idx, ret;
+
+ if (ibdev->iboe.nb_inet.notifier_call) {
+ if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb_inet.notifier_call = NULL;
+ }
mlx4_ib_close_sriov(ibdev);
sysfs_remove_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group);
mlx4_ib_mad_cleanup(ibdev);
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
- device_remove_file(&ibdev->ib_dev.dev, mlx4_class_attributes[j]);
+ device_remove_file(&ibdev->ib_dev.dev,
+ mlx4_class_attributes[j]);
}
+
+ dev_idx = -1;
+ if (dr_active && !(ibdev->dev->flags & MLX4_FLAG_DEV_NUM_STR)) {
+ ret = sscanf(ibdev->ib_dev.name, "mlx4_%d", &dev_idx);
+ if (ret != 1)
+ dev_idx = -1;
+ }
ib_unregister_device(&ibdev->ib_dev);
+ if (dev_idx >= 0) {
+ spin_lock(&dev_num_str_lock);
+ bitmap_release_region(dev_num_str_bitmap, dev_idx, 0);
+ spin_unlock(&dev_num_str_lock);
+ }
if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
@@ -2204,9 +2680,16 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
ibdev->iboe.nb.notifier_call = NULL;
}
iounmap(ibdev->priv_uar.map);
- for (p = 0; p < ibdev->num_ports; ++p)
- if (ibdev->counters[p] != -1)
- mlx4_counter_free(ibdev->dev, p + 1, ibdev->counters[p]);
+
+ for (p = 0; p < ibdev->num_ports; ++p) {
+ if (mlx4_ib_port_link_layer(&ibdev->ib_dev, p + 1) ==
+ IB_LINK_LAYER_ETHERNET) {
+ mlx4_counter_free(ibdev->dev,
+ p + 1,
+ ibdev->counters[p].counter_index);
+ }
+ }
+
mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
mlx4_CLOSE_PORT(dev, p);
@@ -2355,12 +2838,6 @@ static int __init mlx4_ib_init(void)
if (!wq)
return -ENOMEM;
-#ifdef __linux__
- err = mlx4_ib_proc_init();
- if (err)
- goto clean_wq;
-#endif
-
err = mlx4_ib_mcg_init();
if (err)
goto clean_proc;
@@ -2377,13 +2854,6 @@ clean_mcg:
mlx4_ib_mcg_destroy();
clean_proc:
-#ifdef __linux__
- remove_proc_entry(MLX4_IB_MRS_PROC_DIR_NAME,
- mlx4_ib_driver_dir_entry);
- remove_proc_entry(MLX4_IB_DRIVER_PROC_DIR_NAME, NULL);
-
-clean_wq:
-#endif
destroy_workqueue(wq);
return err;
}
@@ -2394,13 +2864,7 @@ static void __exit mlx4_ib_cleanup(void)
mlx4_ib_mcg_destroy();
destroy_workqueue(wq);
- /* Remove proc entries */
-#ifdef __linux__
- remove_proc_entry(MLX4_IB_MRS_PROC_DIR_NAME,
- mlx4_ib_driver_dir_entry);
- remove_proc_entry(MLX4_IB_DRIVER_PROC_DIR_NAME, NULL);
-#endif
-
+ kfree(dev_num_str_bitmap);
}
module_init_order(mlx4_ib_init, SI_ORDER_MIDDLE);
@@ -2417,7 +2881,7 @@ static moduledata_t mlx4ib_mod = {
.evhand = mlx4ib_evhand,
};
-DECLARE_MODULE(mlx4ib, mlx4ib_mod, SI_SUB_OFED_PREINIT, SI_ORDER_ANY);
+DECLARE_MODULE(mlx4ib, mlx4ib_mod, SI_SUB_SMP, SI_ORDER_ANY);
MODULE_DEPEND(mlx4ib, mlx4, 1, 1, 1);
MODULE_DEPEND(mlx4ib, ibcore, 1, 1, 1);
MODULE_DEPEND(mlx4ib, linuxapi, 1, 1, 1);
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/mcg.c b/sys/ofed/drivers/infiniband/hw/mlx4/mcg.c
index e70dfe9..07d5c87 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/mcg.c
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/mcg.c
@@ -36,6 +36,7 @@
#include <rdma/ib_sa.h>
#include <linux/mlx4/cmd.h>
+#include <linux/rbtree.h>
#include <linux/delay.h>
#include "mlx4_ib.h"
@@ -53,6 +54,7 @@
#define mcg_error_group(group, format, arg...) \
pr_err(" %16s: " format, (group)->name, ## arg)
+
static union ib_gid mgid0;
static struct workqueue_struct *clean_wq;
@@ -214,7 +216,7 @@ static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
spin_unlock(&dev->sm_lock);
return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port,
- IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, mad);
+ IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, 0, mad);
}
static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
@@ -567,7 +569,7 @@ static void mlx4_ib_mcg_timeout_handler(struct work_struct *work)
mcg_warn_group(group, "invalid state %s\n", get_state_string(group->state));
group->state = MCAST_IDLE;
atomic_inc(&group->refcount);
- queue_work(group->demux->mcg_wq, &group->work);
+ if (!queue_work(group->demux->mcg_wq, &group->work))
safe_atomic_dec(&group->refcount);
mutex_unlock(&group->lock);
@@ -656,8 +658,9 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work)
method = group->response_sa_mad.mad_hdr.method;
if (group->last_req_tid != group->response_sa_mad.mad_hdr.tid) {
mcg_warn_group(group, "Got MAD response to existing MGID but wrong TID, dropping. Resp TID=%llx, group TID=%llx\n",
- (long long unsigned int)be64_to_cpu(group->response_sa_mad.mad_hdr.tid),
- (long long unsigned int)be64_to_cpu(group->last_req_tid));
+ (long long)be64_to_cpu(
+ group->response_sa_mad.mad_hdr.tid),
+ (long long)be64_to_cpu(group->last_req_tid));
group->state = group->prev_state;
goto process_requests;
}
@@ -752,8 +755,8 @@ static struct mcast_group *search_relocate_mgid0_group(struct mlx4_ib_demux_ctx
if (memcmp(new_mgid, &mgid0, sizeof mgid0)) {
group->rec.mgid = *new_mgid;
sprintf(group->name, "%016llx%016llx",
- (long long unsigned int)be64_to_cpu(group->rec.mgid.global.subnet_prefix),
- (long long unsigned int)be64_to_cpu(group->rec.mgid.global.interface_id));
+ (long long)be64_to_cpu(group->rec.mgid.global.subnet_prefix),
+ (long long)be64_to_cpu(group->rec.mgid.global.interface_id));
list_del_init(&group->mgid0_list);
cur_group = mcast_insert(ctx, group);
if (cur_group) {
@@ -834,8 +837,10 @@ static struct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx,
INIT_DELAYED_WORK(&group->timeout_work, mlx4_ib_mcg_timeout_handler);
mutex_init(&group->lock);
sprintf(group->name, "%016llx%016llx",
- (long long unsigned int)be64_to_cpu(group->rec.mgid.global.subnet_prefix),
- (long long unsigned int)be64_to_cpu(group->rec.mgid.global.interface_id));
+ (long long)be64_to_cpu(
+ group->rec.mgid.global.subnet_prefix),
+ (long long)be64_to_cpu(
+ group->rec.mgid.global.interface_id));
sysfs_attr_init(&group->dentry.attr);
group->dentry.show = sysfs_show_group;
group->dentry.store = NULL;
@@ -871,7 +876,7 @@ static void queue_req(struct mcast_req *req)
list_add_tail(&req->group_list, &group->pending_list);
list_add_tail(&req->func_list, &group->func[req->func].pending);
/* calls mlx4_ib_mcg_work_handler */
- queue_work(group->demux->mcg_wq, &group->work);
+ if (!queue_work(group->demux->mcg_wq, &group->work))
safe_atomic_dec(&group->refcount);
}
@@ -907,7 +912,7 @@ int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
group->state = MCAST_RESP_READY;
/* calls mlx4_ib_mcg_work_handler */
atomic_inc(&group->refcount);
- queue_work(ctx->mcg_wq, &group->work);
+ if (!queue_work(ctx->mcg_wq, &group->work))
safe_atomic_dec(&group->refcount);
mutex_unlock(&group->lock);
release_group(group, 0);
@@ -998,13 +1003,14 @@ static ssize_t sysfs_show_group(struct device *dev,
else
sprintf(state_str, "%s(TID=0x%llx)",
get_state_string(group->state),
- (long long unsigned int)be64_to_cpu(group->last_req_tid));
+ (long long)be64_to_cpu(group->last_req_tid));
if (list_empty(&group->pending_list)) {
sprintf(pending_str, "No");
} else {
req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
sprintf(pending_str, "Yes(TID=0x%llx)",
- (long long unsigned int)be64_to_cpu(req->sa_mad.mad_hdr.tid));
+ (long long)be64_to_cpu(
+ req->sa_mad.mad_hdr.tid));
}
len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s ",
group->rec.scope_join_state & 0xf,
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_exp.c b/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_exp.c
new file mode 100644
index 0000000..b6a6962
--- /dev/null
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_exp.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "mlx4_ib.h"
+#include "mlx4_exp.h"
+#include <linux/mlx4/qp.h>
+
+int mlx4_ib_exp_query_device(struct ib_device *ibdev,
+ struct ib_exp_device_attr *props)
+{
+ struct ib_device_attr *base = &props->base;
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ int ret = mlx4_ib_query_device(ibdev, &props->base);
+
+ props->exp_comp_mask = IB_EXP_DEVICE_ATTR_INLINE_RECV_SZ;
+ props->inline_recv_sz = dev->dev->caps.max_rq_sg * sizeof(struct mlx4_wqe_data_seg);
+ props->device_cap_flags2 = 0;
+
+ /* move RSS device cap from device_cap to device_cap_flags2 */
+ if (base->device_cap_flags & IB_DEVICE_QPG) {
+ props->device_cap_flags2 |= IB_EXP_DEVICE_QPG;
+ if (base->device_cap_flags & IB_DEVICE_UD_RSS)
+ props->device_cap_flags2 |= IB_EXP_DEVICE_UD_RSS;
+ }
+ base->device_cap_flags &= ~(IB_DEVICE_QPG |
+ IB_DEVICE_UD_RSS |
+ IB_DEVICE_UD_TSS);
+
+ if (base->max_rss_tbl_sz > 0) {
+ props->max_rss_tbl_sz = base->max_rss_tbl_sz;
+ props->exp_comp_mask |= IB_EXP_DEVICE_ATTR_RSS_TBL_SZ;
+ } else {
+ props->max_rss_tbl_sz = 0;
+ props->exp_comp_mask &= ~IB_EXP_DEVICE_ATTR_RSS_TBL_SZ;
+ }
+
+ if (props->device_cap_flags2)
+ props->exp_comp_mask |= IB_EXP_DEVICE_ATTR_CAP_FLAGS2;
+
+ return ret;
+}
+
+/*
+ * Experimental functions
+ */
+struct ib_qp *mlx4_ib_exp_create_qp(struct ib_pd *pd,
+ struct ib_exp_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ int rwqe_size;
+ struct ib_qp *qp;
+ struct mlx4_ib_qp *mqp;
+ int use_inlr;
+ struct mlx4_ib_dev *dev;
+
+ if (init_attr->max_inl_recv && !udata)
+ return ERR_PTR(-EINVAL);
+
+ use_inlr = mlx4_ib_qp_has_rq((struct ib_qp_init_attr *)init_attr) &&
+ init_attr->max_inl_recv && pd;
+ if (use_inlr) {
+ rwqe_size = roundup_pow_of_two(max(1U, init_attr->cap.max_recv_sge)) *
+ sizeof(struct mlx4_wqe_data_seg);
+ if (rwqe_size < init_attr->max_inl_recv) {
+ dev = to_mdev(pd->device);
+ init_attr->max_inl_recv = min(init_attr->max_inl_recv,
+ (u32)(dev->dev->caps.max_rq_sg *
+ sizeof(struct mlx4_wqe_data_seg)));
+ init_attr->cap.max_recv_sge = roundup_pow_of_two(init_attr->max_inl_recv) /
+ sizeof(struct mlx4_wqe_data_seg);
+ }
+ } else {
+ init_attr->max_inl_recv = 0;
+ }
+ qp = mlx4_ib_create_qp(pd, (struct ib_qp_init_attr *)init_attr, udata);
+ if (IS_ERR(qp))
+ return qp;
+
+ if (use_inlr) {
+ mqp = to_mqp(qp);
+ mqp->max_inlr_data = 1 << mqp->rq.wqe_shift;
+ init_attr->max_inl_recv = mqp->max_inlr_data;
+ }
+
+ return qp;
+}
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_exp.h b/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_exp.h
new file mode 100644
index 0000000..58675a4
--- /dev/null
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_exp.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX4_EXP_H
+#define MLX4_EXP_H
+
+#include <rdma/ib_verbs_exp.h>
+#include "mlx4_ib.h"
+
+struct ib_qp *mlx4_ib_exp_create_qp(struct ib_pd *pd,
+ struct ib_exp_qp_init_attr *init_attr,
+ struct ib_udata *udata);
+int mlx4_ib_exp_query_device(struct ib_device *ibdev,
+ struct ib_exp_device_attr *props);
+
+#endif /* MLX4_EXP_H */
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h b/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 2435df5..ddf5236 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -38,6 +38,7 @@
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/idr.h>
+#include <linux/rbtree.h>
#include <linux/notifier.h>
#include <rdma/ib_verbs.h>
@@ -47,7 +48,6 @@
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
-#include <linux/rbtree.h>
#define MLX4_IB_DRV_NAME "mlx4_ib"
@@ -72,9 +72,7 @@ enum {
/*module param to indicate if SM assigns the alias_GUID*/
extern int mlx4_ib_sm_guid_assign;
-#ifdef __linux__
extern struct proc_dir_entry *mlx4_mrs_dir_entry;
-#endif
#define MLX4_IB_UC_STEER_QPN_ALIGN 1
#define MLX4_IB_UC_MAX_NUM_QPS (256 * 1024)
@@ -128,6 +126,7 @@ struct mlx4_ib_cq {
struct mutex resize_mutex;
struct ib_umem *umem;
struct ib_umem *resize_umem;
+ int create_flags;
};
struct mlx4_ib_mr {
@@ -135,6 +134,13 @@ struct mlx4_ib_mr {
struct mlx4_mr mmr;
struct ib_umem *umem;
struct mlx4_shared_mr_info *smr_info;
+ atomic_t invalidated;
+ struct completion invalidation_comp;
+};
+
+struct mlx4_ib_mw {
+ struct ib_mw ibmw;
+ struct mlx4_mw mmw;
};
struct mlx4_ib_fast_reg_page_list {
@@ -148,6 +154,12 @@ struct mlx4_ib_fmr {
struct mlx4_fmr mfmr;
};
+struct mlx4_ib_flow {
+ struct ib_flow ibflow;
+ /* translating DMFS verbs sniffer rule to FW API requires two reg IDs */
+ u64 reg_id[2];
+};
+
struct mlx4_ib_wq {
u64 *wrid;
spinlock_t lock;
@@ -163,6 +175,9 @@ struct mlx4_ib_wq {
enum mlx4_ib_qp_flags {
MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
+ MLX4_IB_QP_CAP_CROSS_CHANNEL = IB_QP_CREATE_CROSS_CHANNEL,
+ MLX4_IB_QP_CAP_MANAGED_SEND = IB_QP_CREATE_MANAGED_SEND,
+ MLX4_IB_QP_CAP_MANAGED_RECV = IB_QP_CREATE_MANAGED_RECV,
MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
MLX4_IB_SRIOV_SQP = 1 << 31,
@@ -179,6 +194,7 @@ enum mlx4_ib_mmap_cmd {
MLX4_IB_MMAP_UAR_PAGE = 0,
MLX4_IB_MMAP_BLUE_FLAME_PAGE = 1,
MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES = 2,
+ MLX4_IB_MMAP_GET_HW_CLOCK = 3,
};
enum mlx4_ib_qp_type {
@@ -319,8 +335,14 @@ struct mlx4_ib_qp {
struct mlx4_roce_smac_vlan_info pri;
struct mlx4_roce_smac_vlan_info alt;
struct list_head rules_list;
+ u64 reg_id;
int max_inline_data;
struct mlx4_bf bf;
+
+ /*
+ * Experimental data
+ */
+ int max_inlr_data;
};
struct mlx4_ib_srq {
@@ -354,6 +376,12 @@ struct mlx4_ib_ah {
#define MLX4_NOT_SET_GUID (0x00LL)
#define MLX4_GUID_FOR_DELETE_VAL (~(0x00LL))
+/****************************************/
+/* ioctl codes */
+/****************************************/
+#define MLX4_IOC_MAGIC 'm'
+#define MLX4_IOCHWCLOCKOFFSET _IOR(MLX4_IOC_MAGIC, 1, int)
+
enum mlx4_guid_alias_rec_status {
MLX4_GUID_INFO_STATUS_IDLE,
MLX4_GUID_INFO_STATUS_SET,
@@ -478,7 +506,9 @@ struct mlx4_ib_sriov {
struct mlx4_ib_iboe {
spinlock_t lock;
struct net_device *netdevs[MLX4_MAX_PORTS];
+ struct net_device *masters[MLX4_MAX_PORTS];
struct notifier_block nb;
+ struct notifier_block nb_inet;
union ib_gid gid_table[MLX4_MAX_PORTS][128];
};
@@ -518,6 +548,11 @@ struct mlx4_ib_iov_port {
struct mlx4_ib_iov_sysfs_attr mcg_dentry;
};
+struct mlx4_ib_counter {
+ int counter_index;
+ int status;
+};
+
struct mlx4_ib_dev {
struct ib_device ib_dev;
struct mlx4_dev *dev;
@@ -534,7 +569,7 @@ struct mlx4_ib_dev {
struct mutex cap_mask_mutex;
bool ib_active;
struct mlx4_ib_iboe iboe;
- int counters[MLX4_MAX_PORTS];
+ struct mlx4_ib_counter counters[MLX4_MAX_PORTS];
int *eq_table;
int eq_added;
struct kobject *iov_parent;
@@ -595,6 +630,11 @@ static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr)
return container_of(ibmr, struct mlx4_ib_mr, ibmr);
}
+static inline struct mlx4_ib_mw *to_mmw(struct ib_mw *ibmw)
+{
+ return container_of(ibmw, struct mlx4_ib_mw, ibmw);
+}
+
static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
{
return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl);
@@ -604,6 +644,12 @@ static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
{
return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
}
+
+static inline struct mlx4_ib_flow *to_mflow(struct ib_flow *ibflow)
+{
+ return container_of(ibflow, struct mlx4_ib_flow, ibflow);
+}
+
static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp)
{
return container_of(ibqp, struct mlx4_ib_qp, ibqp);
@@ -646,16 +692,23 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata, int mr_id);
int mlx4_ib_dereg_mr(struct ib_mr *mr);
+struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
+int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
+ struct ib_mw_bind *mw_bind);
+int mlx4_ib_dealloc_mw(struct ib_mw *mw);
struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
int max_page_list_len);
struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
int page_list_len);
void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
-int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+int mlx4_ib_modify_cq(struct ib_cq *cq,
+ struct ib_cq_attr *cq_attr,
+ int cq_attr_mask);
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
int mlx4_ib_ignore_overrun_cq(struct ib_cq *ibcq);
-struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
+struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
+ struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
struct ib_udata *udata);
int mlx4_ib_destroy_cq(struct ib_cq *cq);
@@ -730,6 +783,13 @@ static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
return !!(ah->av.ib.g_slid & 0x80);
}
+static inline int mlx4_ib_qp_has_rq(struct ib_qp_init_attr *attr)
+{
+ if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
+ return 0;
+
+ return !attr->srq;
+}
int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx);
void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq);
@@ -757,7 +817,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
struct ib_grh *grh, struct ib_mad *mad);
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
- u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad);
+ u32 qkey, struct ib_ah_attr *attr, u8 *s_mac, struct ib_mad *mad);
__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
@@ -799,5 +859,7 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn);
void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count);
int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
int is_attach);
+int mlx4_ib_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props);
#endif /* MLX4_IB_H */
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/mr.c b/sys/ofed/drivers/infiniband/hw/mlx4/mr.c
index 9ea4901..61c2088 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/mr.c
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/mr.c
@@ -35,11 +35,6 @@
#include <linux/module.h>
#include <linux/sched.h>
-#ifdef __linux__
-#include <linux/proc_fs.h>
-#include <linux/cred.h>
-#endif
-
#include "mlx4_ib.h"
static u32 convert_access(int acc)
@@ -48,9 +43,11 @@ static u32 convert_access(int acc)
(acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) |
(acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) |
(acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) |
+ (acc & IB_ACCESS_MW_BIND ? MLX4_PERM_BIND_MW : 0) |
MLX4_PERM_LOCAL_READ;
}
-#ifdef __linux__
+/* No suuport for Shared MR feature */
+#if 0
static ssize_t shared_mr_proc_read(struct file *file,
char __user *buffer,
size_t len,
@@ -129,7 +126,7 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
return &mr->ibmr;
err_mr:
- mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
+ (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
err_free:
kfree(mr);
@@ -159,7 +156,7 @@ static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev,
if (len & (mtt_size-1ULL)) {
WARN(1 ,
"write_block: len %llx is not aligned to mtt_size %llx\n",
- (long long)len, (long long)mtt_size);
+ (unsigned long long)len, (unsigned long long)mtt_size);
return -EINVAL;
}
@@ -203,8 +200,6 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem)
{
u64 *pages;
- struct ib_umem_chunk *chunk;
- int j;
u64 len = 0;
int err = 0;
u64 mtt_size;
@@ -212,6 +207,8 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
u64 mtt_shift;
int start_index = 0;
int npages = 0;
+ struct scatterlist *sg;
+ int i;
pages = (u64 *) __get_free_page(GFP_KERNEL);
if (!pages)
@@ -220,12 +217,11 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
mtt_shift = mtt->page_shift;
mtt_size = 1ULL << mtt_shift;
- list_for_each_entry(chunk, &umem->chunk_list, list)
- for (j = 0; j < chunk->nmap; ++j) {
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
if (cur_start_addr + len ==
- sg_dma_address(&chunk->page_list[j])) {
+ sg_dma_address(sg)) {
/* still the same block */
- len += sg_dma_len(&chunk->page_list[j]);
+ len += sg_dma_len(sg);
continue;
}
/* A new block is started ...*/
@@ -242,8 +238,8 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
goto out;
cur_start_addr =
- sg_dma_address(&chunk->page_list[j]);
- len = sg_dma_len(&chunk->page_list[j]);
+ sg_dma_address(sg);
+ len = sg_dma_len(sg);
}
/* Handle the last block */
@@ -319,8 +315,6 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
u64 start_va,
int *num_of_mtts)
{
- struct ib_umem_chunk *chunk;
- int j;
u64 block_shift = MLX4_MAX_MTT_SHIFT;
u64 current_block_len = 0;
u64 current_block_start = 0;
@@ -330,14 +324,18 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
u64 total_len = 0;
u64 last_block_aligned_end = 0;
u64 min_shift = ilog2(umem->page_size);
+ struct scatterlist *sg;
+ int i;
+ u64 next_block_start;
+ u64 current_block_end;
- list_for_each_entry(chunk, &umem->chunk_list, list) {
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
/* Initialization - save the first chunk start as
the current_block_start - block means contiguous pages.
*/
if (current_block_len == 0 && current_block_start == 0) {
first_block_start = current_block_start =
- sg_dma_address(&chunk->page_list[0]);
+ sg_dma_address(sg);
/* Find the bits that are different between
the physical address and the virtual
address for the start of the MR.
@@ -361,13 +359,12 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
, block_shift);
}
- /* Go over the scatter entries in the current chunk, check
+ /* Go over the scatter entries and check
if they continue the previous scatter entry.
*/
- for (j = 0; j < chunk->nmap; ++j) {
- u64 next_block_start =
- sg_dma_address(&chunk->page_list[j]);
- u64 current_block_end = current_block_start
+ next_block_start =
+ sg_dma_address(sg);
+ current_block_end = current_block_start
+ current_block_len;
/* If we have a split (non-contig.) between two block*/
if (current_block_end != next_block_start) {
@@ -392,7 +389,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
/* Start a new block */
current_block_start = next_block_start;
current_block_len =
- sg_dma_len(&chunk->page_list[j]);
+ sg_dma_len(sg);
continue;
}
/* The scatter entry is another part of
@@ -402,8 +399,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
which merge some blocks together.
*/
current_block_len +=
- sg_dma_len(&chunk->page_list[j]);
- }
+ sg_dma_len(sg);
}
/* Account for the last block in the total len */
@@ -416,7 +412,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
WARN((total_len & ((1ULL<<block_shift)-1ULL)),
" misaligned total length detected (%llu, %llu)!",
- (long long)total_len, (long long)block_shift);
+ (unsigned long long)total_len, (unsigned long long)block_shift);
*num_of_mtts = total_len >> block_shift;
end:
@@ -426,16 +422,19 @@ end:
*/
WARN(1,
"mlx4_ib_umem_calc_optimal_mtt_size - unexpected shift %lld\n",
- (long long)block_shift);
+ (unsigned long long)block_shift);
block_shift = min_shift;
}
return block_shift;
+
}
-#ifdef __linux__
+/* No suuport for Shared MR */
+#if 0
static int prepare_shared_mr(struct mlx4_ib_mr *mr, int access_flags, int mr_id)
{
+
struct proc_dir_entry *mr_proc_entry;
mode_t mode = S_IFREG;
char name_buff[16];
@@ -475,8 +474,51 @@ static int is_shared_mr(int access_flags)
IB_ACCESS_SHARED_MR_OTHER_WRITE));
}
+
+static void free_smr_info(struct mlx4_ib_mr *mr)
+{
+ /* When master/parent shared mr is dereged there is
+ no ability to share this mr any more - its mr_id will be
+ returned to the kernel as part of ib_uverbs_dereg_mr
+ and may be allocated again as part of other reg_mr.
+ */
+ char name_buff[16];
+
+ sprintf(name_buff, "%X", mr->smr_info->mr_id);
+ /* Remove proc entry is checking internally that no operation
+ was strated on that proc fs file and if in the middle
+ current process will wait till end of operation.
+ That's why no sync mechanism is needed when we release
+ below the shared umem.
+ */
+ remove_proc_entry(name_buff, mlx4_mrs_dir_entry);
+ kfree(mr->smr_info);
+ mr->smr_info = NULL;
+}
#endif
+static void mlx4_invalidate_umem(void *invalidation_cookie,
+ struct ib_umem *umem,
+ unsigned long addr, size_t size)
+{
+ struct mlx4_ib_mr *mr = (struct mlx4_ib_mr *)invalidation_cookie;
+
+ /* This function is called under client peer lock so its resources are race protected */
+ if (atomic_inc_return(&mr->invalidated) > 1) {
+ umem->invalidation_ctx->inflight_invalidation = 1;
+ goto end;
+ }
+
+ umem->invalidation_ctx->peer_callback = 1;
+ mlx4_mr_free(to_mdev(mr->ibmr.device)->dev, &mr->mmr);
+ ib_umem_release(umem);
+ complete(&mr->invalidation_comp);
+
+end:
+ return;
+
+}
+
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata,
@@ -487,18 +529,20 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int shift;
int err;
int n;
+ struct ib_peer_memory_client *ib_peer_mem;
mr = kzalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
- mr->umem = ib_umem_get(pd->uobject->context, start, length,
- access_flags, 0);
+ mr->umem = ib_umem_get_ex(pd->uobject->context, start, length,
+ access_flags, 0, 1);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err_free;
}
+ ib_peer_mem = mr->umem->ib_peer_mem;
n = ib_umem_page_count(mr->umem);
shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start,
&n);
@@ -516,7 +560,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto err_mr;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
-#ifdef __linux__
+/* No suuport for Shared MR */
+#if 0
/* Check whether MR should be shared */
if (is_shared_mr(access_flags)) {
/* start address and length must be aligned to page size in order
@@ -531,10 +576,32 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto err_mr;
}
#endif
+ if (ib_peer_mem) {
+ if (access_flags & IB_ACCESS_MW_BIND) {
+ /* Prevent binding MW on peer clients.
+ * mlx4_invalidate_umem must be void,
+ * therefore, mlx4_mr_free should not fail
+ * when using peer clients. */
+ err = -ENOSYS;
+ pr_err("MW is not supported with peer memory client");
+ goto err_smr;
+ }
+ init_completion(&mr->invalidation_comp);
+ ib_umem_activate_invalidation_notifier(mr->umem,
+ mlx4_invalidate_umem, mr);
+ }
+
+ atomic_set(&mr->invalidated, 0);
return &mr->ibmr;
+err_smr:
+/* No suuport for Shared MR */
+#if 0
+ if (mr->smr_info)
+ free_smr_info(mr);
+#endif
err_mr:
- mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
+ (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
err_umem:
ib_umem_release(mr->umem);
@@ -545,41 +612,106 @@ err_free:
return ERR_PTR(err);
}
-
int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
+ struct ib_umem *umem = mr->umem;
+ int ret;
- mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
- if (mr->smr_info) {
- /* When master/parent shared mr is dereged there is
- no ability to share this mr any more - its mr_id will be
- returned to the kernel as part of ib_uverbs_dereg_mr
- and may be allocated again as part of other reg_mr.
- */
- char name_buff[16];
-
- sprintf(name_buff, "%X", mr->smr_info->mr_id);
- /* Remove proc entry is checking internally that no operation
- was strated on that proc fs file and if in the middle
- current process will wait till end of operation.
- That's why no sync mechanism is needed when we release
- below the shared umem.
- */
-#ifdef __linux__
- remove_proc_entry(name_buff, mlx4_mrs_dir_entry);
- kfree(mr->smr_info);
+/* No suuport for Shared MR */
+#if 0
+ if (mr->smr_info)
+ free_smr_info(mr);
#endif
+
+ if (atomic_inc_return(&mr->invalidated) > 1) {
+ wait_for_completion(&mr->invalidation_comp);
+ goto end;
+ }
+
+ ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
+ if (ret) {
+ /* Error is not expected here, except when memory windows
+ * are bound to MR which is not supported with
+ * peer memory clients */
+ atomic_set(&mr->invalidated, 0);
+ return ret;
}
- if (mr->umem)
+ if (!umem)
+ goto end;
+
ib_umem_release(mr->umem);
+end:
kfree(mr);
return 0;
}
+struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
+{
+ struct mlx4_ib_dev *dev = to_mdev(pd->device);
+ struct mlx4_ib_mw *mw;
+ int err;
+
+ mw = kmalloc(sizeof(*mw), GFP_KERNEL);
+ if (!mw)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn, (enum mlx4_mw_type)type, &mw->mmw);
+ if (err)
+ goto err_free;
+
+ err = mlx4_mw_enable(dev->dev, &mw->mmw);
+ if (err)
+ goto err_mw;
+
+ mw->ibmw.rkey = mw->mmw.key;
+
+ return &mw->ibmw;
+
+err_mw:
+ mlx4_mw_free(dev->dev, &mw->mmw);
+
+err_free:
+ kfree(mw);
+
+ return ERR_PTR(err);
+}
+
+int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
+ struct ib_mw_bind *mw_bind)
+{
+ struct ib_send_wr wr;
+ struct ib_send_wr *bad_wr;
+ int ret;
+
+ memset(&wr, 0, sizeof(wr));
+ wr.opcode = IB_WR_BIND_MW;
+ wr.wr_id = mw_bind->wr_id;
+ wr.send_flags = mw_bind->send_flags;
+ wr.wr.bind_mw.mw = mw;
+ wr.wr.bind_mw.bind_info = mw_bind->bind_info;
+ wr.wr.bind_mw.rkey = ib_inc_rkey(mw->rkey);
+
+ ret = mlx4_ib_post_send(qp, &wr, &bad_wr);
+ if (!ret)
+ mw->rkey = wr.wr.bind_mw.rkey;
+
+ return ret;
+}
+
+int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
+{
+ struct mlx4_ib_mw *mw = to_mmw(ibmw);
+
+ mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw);
+ kfree(mw);
+
+ return 0;
+}
+
struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
int max_page_list_len)
{
@@ -606,7 +738,7 @@ struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
return &mr->ibmr;
err_mr:
- mlx4_mr_free(dev->dev, &mr->mmr);
+ (void) mlx4_mr_free(dev->dev, &mr->mmr);
err_free:
kfree(mr);
@@ -685,7 +817,7 @@ struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
return &fmr->ibfmr;
err_mr:
- mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
+ (void) mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
err_free:
kfree(fmr);
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/qp.c b/sys/ofed/drivers/infiniband/hw/mlx4/qp.c
index c5ebe6b..b3d9695 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/qp.c
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/qp.c
@@ -45,13 +45,11 @@
#include <linux/mlx4/driver.h>
#include <linux/io.h>
-#ifndef __linux__
-#define asm __asm
-#endif
-
#include "mlx4_ib.h"
#include "user.h"
+#define asm __asm
+
enum {
MLX4_IB_ACK_REQ_FREQ = 8,
};
@@ -111,6 +109,8 @@ static const __be32 mlx4_ib_opcode[] = {
[IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR),
[IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
+ [IB_WR_BIND_MW] = cpu_to_be32(
+ MLX4_OPCODE_BIND_MW),
};
#ifndef wc_wmb
@@ -263,7 +263,7 @@ static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size)
/* Pad the remainder of the WQE with an inline data segment. */
if (size > s) {
inl = wqe + s;
- inl->byte_count = cpu_to_be32(1U << 31 | (size - s - sizeof *inl));
+ inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl));
}
ctrl->srcrb_flags = 0;
ctrl->fence_size = size / 16;
@@ -274,7 +274,7 @@ static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size)
wmb();
ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) |
- (n & qp->sq.wqe_cnt ? cpu_to_be32(1U << 31) : 0);
+ (n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
stamp_send_wqe(qp, n + qp->sq_spare_wqes, size);
}
@@ -573,6 +573,12 @@ static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
sizeof (struct mlx4_ib_proxy_sqp_hdr),
DMA_FROM_DEVICE);
+ if (unlikely(ib_dma_mapping_error(dev,
+ qp->sqp_proxy_rcv[i].map))) {
+ pr_warn("ib_dma_map_single failed\n");
+ kfree(qp->sqp_proxy_rcv[i].addr);
+ goto err;
+ }
}
return 0;
@@ -602,15 +608,6 @@ static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
kfree(qp->sqp_proxy_rcv);
}
-static int qp_has_rq(struct ib_qp_init_attr *attr)
-{
- if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
- return 0;
-
- return !attr->srq;
-}
-
-#ifdef __linux__
static int init_qpg_parent(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *pqp,
struct ib_qp_init_attr *attr, int *qpn)
{
@@ -644,7 +641,7 @@ static int init_qpg_parent(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *pqp,
err = mlx4_ib_steer_qp_alloc(dev, tss_align_num, &tss_base);
else
err = mlx4_qp_reserve_range(dev->dev, tss_align_num,
- tss_align_num, &tss_base, 1);
+ tss_align_num, &tss_base, MLX4_RESERVE_BF_QP);
if (err)
goto err1;
@@ -791,7 +788,6 @@ static void free_qpg_qpn(struct mlx4_ib_qp *mqp, int qpn)
break;
}
}
-#endif
static int alloc_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
struct ib_qp_init_attr *attr, int *qpn)
@@ -800,10 +796,12 @@ static int alloc_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
switch (attr->qpg_type) {
case IB_QPG_NONE:
- /* Raw packet QPNs must be aligned to 8 bits. If not, the WQE
- * BlueFlame setup flow wrongly causes VLAN insertion. */
+ /* Raw packet QPNs may not have bits 6,7 set in their qp_num;
+ * otherwise, the WQE BlueFlame setup flow wrongly causes
+ * VLAN insertion. */
if (attr->qp_type == IB_QPT_RAW_PACKET) {
- err = mlx4_qp_reserve_range(dev->dev, 1, 1, qpn, 1);
+ err = mlx4_qp_reserve_range(dev->dev, 1, 1, qpn,
+ MLX4_RESERVE_BF_QP);
} else {
if(qp->flags & MLX4_IB_QP_NETIF)
err = mlx4_ib_steer_qp_alloc(dev, 1, qpn);
@@ -812,15 +810,11 @@ static int alloc_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
}
break;
case IB_QPG_PARENT:
-#ifdef __linux__
err = init_qpg_parent(dev, qp, attr, qpn);
-#endif
break;
case IB_QPG_CHILD_TX:
case IB_QPG_CHILD_RX:
-#ifdef __linux__
err = alloc_qpg_qpn(attr, qp, qpn);
-#endif
break;
default:
qp->qpg_type = IB_QPG_NONE;
@@ -844,15 +838,11 @@ static void free_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
mlx4_qp_release_range(dev->dev, qpn, 1);
break;
case IB_QPG_PARENT:
-#ifdef __linux__
free_qpg_parent(dev, qp);
-#endif
break;
case IB_QPG_CHILD_TX:
case IB_QPG_CHILD_RX:
-#ifdef __linux__
free_qpg_qpn(qp, qpn);
-#endif
break;
default:
break;
@@ -881,10 +871,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
struct mlx4_ib_qp *qp;
enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
-#ifndef __linux__
- init_attr->qpg_type = IB_QPG_NONE;
-#endif
-
/* When tunneling special qps, we use a plain UD qp */
if (sqpn) {
if (mlx4_is_mfunc(dev->dev) &&
@@ -941,6 +927,23 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
qp->mlx4_ib_qp_type = qp_type;
+ if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
+ qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
+
+ if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
+ qp->flags |= MLX4_IB_QP_LSO;
+
+ if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
+ if (dev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED &&
+ !mlx4_is_mfunc(dev->dev))
+ qp->flags |= MLX4_IB_QP_NETIF;
+ else {
+ err = -EINVAL;
+ goto err;
+ }
+ }
+
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
@@ -952,7 +955,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
- err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp);
+ err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, mlx4_ib_qp_has_rq(init_attr), qp);
if (err)
goto err;
@@ -961,11 +964,20 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
int shift;
int n;
- if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
+ if (!udata || ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
err = -EFAULT;
goto err;
}
+ if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL)
+ qp->flags |= MLX4_IB_QP_CAP_CROSS_CHANNEL;
+
+ if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND)
+ qp->flags |= MLX4_IB_QP_CAP_MANAGED_SEND;
+
+ if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV)
+ qp->flags |= MLX4_IB_QP_CAP_MANAGED_RECV;
+
qp->sq_no_prefetch = ucmd.sq_no_prefetch;
err = set_user_sq_size(dev, qp, &ucmd);
@@ -990,7 +1002,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (err)
goto err_mtt;
- if (qp_has_rq(init_attr)) {
+ if (mlx4_ib_qp_has_rq(init_attr)) {
err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
ucmd.db_addr, &qp->db);
if (err)
@@ -999,23 +1011,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
} else {
qp->sq_no_prefetch = 0;
- if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
- qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
-
- if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
- qp->flags |= MLX4_IB_QP_LSO;
-
- if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP &&
- dev->dev->caps.steering_mode ==
- MLX4_STEERING_MODE_DEVICE_MANAGED &&
- !mlx4_is_mfunc(dev->dev))
- qp->flags |= MLX4_IB_QP_NETIF;
-
err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
if (err)
goto err;
- if (qp_has_rq(init_attr)) {
+ if (mlx4_ib_qp_has_rq(init_attr)) {
err = mlx4_db_alloc(dev->dev, &qp->db, 0);
if (err)
goto err;
@@ -1097,7 +1097,7 @@ err_proxy:
free_proxy_bufs(pd->device, qp);
err_wrid:
if (pd->uobject) {
- if (qp_has_rq(init_attr))
+ if (mlx4_ib_qp_has_rq(init_attr))
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
} else {
kfree(qp->sq.wrid);
@@ -1114,7 +1114,7 @@ err_buf:
mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
err_db:
- if (!pd->uobject && qp_has_rq(init_attr))
+ if (!pd->uobject && mlx4_ib_qp_has_rq(init_attr))
mlx4_db_free(dev->dev, &qp->db);
if (qp->max_inline_data)
@@ -1145,7 +1145,7 @@ static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv
{
if (send_cq == recv_cq) {
spin_lock_irq(&send_cq->lock);
- (void) __acquire(&recv_cq->lock);
+ __acquire(&recv_cq->lock);
} else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_lock_irq(&send_cq->lock);
spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
@@ -1159,7 +1159,7 @@ static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *re
__releases(&send_cq->lock) __releases(&recv_cq->lock)
{
if (send_cq == recv_cq) {
- (void) __release(&recv_cq->lock);
+ __release(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
} else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_unlock(&recv_cq->lock);
@@ -1300,14 +1300,14 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
return dev->dev->caps.qp1_proxy[attr->port_num - 1];
}
-#ifdef __linux__
static int check_qpg_attr(struct mlx4_ib_dev *dev,
struct ib_qp_init_attr *attr)
{
if (attr->qpg_type == IB_QPG_NONE)
return 0;
- if (attr->qp_type != IB_QPT_UD)
+ if (attr->qp_type != IB_QPT_UD &&
+ attr->qp_type != IB_QPT_RAW_PACKET)
return -EINVAL;
if (attr->qpg_type == IB_QPG_PARENT) {
@@ -1346,7 +1346,6 @@ static int check_qpg_attr(struct mlx4_ib_dev *dev,
}
return 0;
}
-#endif
#define RESERVED_FLAGS_MASK ((((unsigned int)IB_QP_CREATE_RESERVED_END - 1) | IB_QP_CREATE_RESERVED_END) \
& ~(IB_QP_CREATE_RESERVED_START - 1))
@@ -1364,6 +1363,15 @@ static enum mlx4_ib_qp_flags to_mlx4_ib_qp_flags(enum ib_qp_create_flags ib_qp_f
if (ib_qp_flags & IB_QP_CREATE_NETIF_QP)
mlx4_ib_qp_flags |= MLX4_IB_QP_NETIF;
+ if (ib_qp_flags & IB_QP_CREATE_CROSS_CHANNEL)
+ mlx4_ib_qp_flags |= MLX4_IB_QP_CAP_CROSS_CHANNEL;
+
+ if (ib_qp_flags & IB_QP_CREATE_MANAGED_SEND)
+ mlx4_ib_qp_flags |= MLX4_IB_QP_CAP_MANAGED_SEND;
+
+ if (ib_qp_flags & IB_QP_CREATE_MANAGED_RECV)
+ mlx4_ib_qp_flags |= MLX4_IB_QP_CAP_MANAGED_RECV;
+
/* reserved flags */
mlx4_ib_qp_flags |= (ib_qp_flags & RESERVED_FLAGS_MASK);
@@ -1387,6 +1395,9 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
* and only for kernel UD QPs.
*/
if (mlx4_qp_flags & ~(MLX4_IB_QP_LSO |
+ MLX4_IB_QP_CAP_CROSS_CHANNEL |
+ MLX4_IB_QP_CAP_MANAGED_SEND |
+ MLX4_IB_QP_CAP_MANAGED_RECV |
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP |
MLX4_IB_QP_NETIF))
@@ -1397,19 +1408,30 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
return ERR_PTR(-EINVAL);
}
- if (init_attr->create_flags &&
- (udata ||
- ((mlx4_qp_flags & ~MLX4_IB_SRIOV_SQP) &&
+ if ((mlx4_qp_flags &
+ (MLX4_IB_QP_CAP_CROSS_CHANNEL |
+ MLX4_IB_QP_CAP_MANAGED_SEND |
+ MLX4_IB_QP_CAP_MANAGED_RECV)) &&
+ !(to_mdev(device)->dev->caps.flags &
+ MLX4_DEV_CAP_FLAG_CROSS_CHANNEL)) {
+ pr_debug("%s Does not support cross-channel operations\n",
+ to_mdev(device)->ib_dev.name);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if ((init_attr->create_flags &
+ ~(IB_QP_CREATE_CROSS_CHANNEL |
+ IB_QP_CREATE_MANAGED_SEND |
+ IB_QP_CREATE_MANAGED_RECV)) &&
+ (((mlx4_qp_flags & ~MLX4_IB_SRIOV_SQP) &&
init_attr->qp_type != IB_QPT_UD) ||
((mlx4_qp_flags & MLX4_IB_SRIOV_SQP) &&
init_attr->qp_type > IB_QPT_GSI)))
return ERR_PTR(-EINVAL);
-#ifdef __linux__
err = check_qpg_attr(to_mdev(device), init_attr);
if (err)
return ERR_PTR(err);
-#endif
switch (init_attr->qp_type) {
case IB_QPT_XRC_TGT:
@@ -1559,32 +1581,42 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
}
+static int ib_rate_to_mlx4(struct mlx4_ib_dev *dev, u8 rate)
+{
+ if (rate == IB_RATE_PORT_CURRENT) {
+ return 0;
+ } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) {
+ return -EINVAL;
+ } else {
+ while (rate != IB_RATE_2_5_GBPS &&
+ !(1 << (rate + MLX4_STAT_RATE_OFFSET) &
+ dev->dev->caps.stat_rate_support))
+ --rate;
+ }
+
+ return rate + MLX4_STAT_RATE_OFFSET;
+}
+
static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
- struct mlx4_ib_qp *qp, struct mlx4_qp_path *path,
- u8 port, int is_primary)
+ u8 *smac, u16 vlan_id, struct mlx4_ib_qp *qp,
+ struct mlx4_qp_path *path, u8 port, int is_primary)
{
- struct net_device *ndev;
- int err;
int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) ==
IB_LINK_LAYER_ETHERNET;
- u8 mac[6];
- int is_mcast;
u16 vlan_tag;
int vidx;
int smac_index;
+ int err;
u64 u64_mac;
- u8 *smac;
struct mlx4_roce_smac_vlan_info *smac_info;
path->grh_mylmc = ah->src_path_bits & 0x7f;
path->rlid = cpu_to_be16(ah->dlid);
- if (ah->static_rate) {
- path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET;
- while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
- !(1 << path->static_rate & dev->dev->caps.stat_rate_support))
- --path->static_rate;
- } else
- path->static_rate = 0;
+
+ err = ib_rate_to_mlx4(dev, ah->static_rate);
+ if (err < 0)
+ return err;
+ path->static_rate = err;
if (ah->ah_flags & IB_AH_GRH) {
if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) {
@@ -1614,7 +1646,7 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
else
smac_info = &qp->alt;
- vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]);
+ vlan_tag = vlan_id;
if (vlan_tag < 0x1000) {
if (smac_info->vid < 0x1000) {
/* both valid vlan ids */
@@ -1653,28 +1685,13 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
}
}
- err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port);
- if (err)
- return err;
/* get smac_index for RoCE use.
* If no smac was yet assigned, register one.
* If one was already assigned, but the new mac differs,
* unregister the old one and register the new one.
*/
- spin_lock(&dev->iboe.lock);
- ndev = dev->iboe.netdevs[port - 1];
- if (ndev) {
-#ifdef __linux__
- smac = ndev->dev_addr; /* fixme: cache this value */
-#else
- smac = IF_LLADDR(ndev); /* fixme: cache this value */
-#endif
-
u64_mac = mlx4_mac_to_u64(smac);
- } else
- u64_mac = dev->dev->caps.def_mac[port];
- spin_unlock(&dev->iboe.lock);
if (!smac_info->smac || smac_info->smac != u64_mac) {
/* register candidate now, unreg if needed, after success */
@@ -1688,7 +1705,7 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
} else
smac_index = smac_info->smac_index;
- memcpy(path->dmac, mac, 6);
+ memcpy(path->dmac, ah->dmac, 6);
path->ackto = MLX4_IB_LINK_TYPE_ETH;
/* put MAC table smac index for IBoE */
path->grh_mylmc = (u8) (smac_index) | 0x80 ;
@@ -1712,24 +1729,21 @@ static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
}
}
-static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
+static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, const u8 *smac,
struct mlx4_qp_context *context)
{
struct net_device *ndev;
u64 u64_mac;
- u8 *smac;
int smac_index;
+
ndev = dev->iboe.netdevs[qp->port - 1];
if (ndev) {
-#ifdef __linux__
- smac = ndev->dev_addr; /* fixme: cache this value */
-#else
- smac = IF_LLADDR(ndev); /* fixme: cache this value */
-#endif
+ smac = IF_LLADDR(ndev);
u64_mac = mlx4_mac_to_u64(smac);
- } else
+ } else {
u64_mac = dev->dev->caps.def_mac[qp->port];
+ }
context->pri_path.sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((qp->port - 1) << 6);
if (!qp->pri.smac) {
@@ -1783,6 +1797,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
}
+ if (qp->max_inlr_data)
+ context->param3 |= cpu_to_be32(1 << 25);
+
if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
else if (ibqp->qp_type == IB_QPT_RAW_PACKET)
@@ -1834,12 +1851,13 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
- if (dev->counters[qp->port - 1] != -1) {
+ if (dev->counters[qp->port - 1].counter_index != -1) {
context->pri_path.counter_index =
- dev->counters[qp->port - 1];
+ dev->counters[qp->port - 1].counter_index;
optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
- } else
+ } else {
context->pri_path.counter_index = 0xff;
+ }
if (qp->flags & MLX4_IB_QP_NETIF &&
(qp->qpg_type == IB_QPG_NONE || qp->qpg_type == IB_QPG_PARENT)) {
@@ -1855,8 +1873,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
}
- if (attr_mask & IB_QP_AV) {
- if (mlx4_set_path(dev, &attr->ah_attr, qp, &context->pri_path,
+ if ((attr_mask & IB_QP_AV) && (ibqp->qp_type != IB_QPT_RAW_PACKET)) {
+ if (mlx4_set_path(dev, &attr->ah_attr, (u8 *)attr->smac,
+ attr_mask & IB_QP_VID ?
+ attr->vlan_id : 0xffff ,
+ qp, &context->pri_path,
attr_mask & IB_QP_PORT ?
attr->port_num : qp->port, 1))
goto out;
@@ -1879,12 +1900,16 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
dev->dev->caps.pkey_table_len[attr->alt_port_num])
goto out;
- if (mlx4_set_path(dev, &attr->alt_ah_attr, qp, &context->alt_path,
+ if (mlx4_set_path(dev, &attr->alt_ah_attr, (u8 *)attr->smac,
+ attr_mask & IB_QP_ALT_VID ?
+ attr->alt_vlan_id : 0xffff,
+ qp, &context->alt_path,
attr->alt_port_num, 0))
goto out;
context->alt_path.pkey_index = attr->alt_pkey_index;
context->alt_path.ackto = attr->alt_timeout << 3;
+ context->alt_path.counter_index = dev->counters[attr->alt_port_num - 1].counter_index;
optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
}
@@ -1943,6 +1968,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (attr_mask & IB_M_EXT_CLASS_3)
context->params2 |= cpu_to_be32(MLX4_QP_BIT_COLL_SYNC_RQ);
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ context->params2 |= (qp->flags & MLX4_IB_QP_CAP_CROSS_CHANNEL ?
+ cpu_to_be32(MLX4_QP_BIT_COLL_MASTER) : 0);
+ context->params2 |= (qp->flags & MLX4_IB_QP_CAP_MANAGED_SEND ?
+ cpu_to_be32(MLX4_QP_BIT_COLL_MASTER | MLX4_QP_BIT_COLL_SYNC_SQ) : 0);
+ context->params2 |= (qp->flags & MLX4_IB_QP_CAP_MANAGED_RECV ?
+ cpu_to_be32(MLX4_QP_BIT_COLL_MASTER | MLX4_QP_BIT_COLL_SYNC_RQ) : 0);
+ }
+
if (ibqp->srq)
context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);
@@ -1997,6 +2031,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
context->pri_path.fl = 0x80;
context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
}
+ if (ibqp->qp_type == IB_QPT_RAW_PACKET &&
+ (attr_mask & IB_QP_AV)) {
+ context->pri_path.sched_queue |=
+ ((attr->ah_attr.sl & 0xf) << 3);
+ context->pri_path.feup = 1 << 6;
+ }
is_eth = rdma_port_get_link_layer(&dev->ib_dev, qp->port) ==
IB_LINK_LAYER_ETHERNET;
if (is_eth) {
@@ -2007,13 +2047,19 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD ||
qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI ||
qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) {
- err = handle_eth_ud_smac_index(dev, qp, context);
+ err = handle_eth_ud_smac_index(dev, qp, (const u8 *)attr->smac, context);
if (err)
return -EINVAL;
}
}
}
+ if (ibqp->qp_type == IB_QPT_UD)
+ if (is_eth && (new_state == IB_QPS_RTR)) {
+ context->pri_path.ackto = MLX4_IB_LINK_TYPE_ETH;
+ optpar |= MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH;
+ }
+
if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
sqd_event = 1;
@@ -2072,7 +2118,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
for (i = 0; i < qp->sq.wqe_cnt; ++i) {
ctrl = get_send_wqe(qp, i);
- ctrl->owner_opcode = cpu_to_be32(1U << 31);
+ ctrl->owner_opcode = cpu_to_be32(1 << 31);
if (qp->sq_max_wqes_per_wr == 1)
ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4);
@@ -2080,6 +2126,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
}
+ if ((qp->port && rdma_port_get_link_layer(&dev->ib_dev, qp->port) ==
+ IB_LINK_LAYER_ETHERNET) && (qp->ibqp.qp_type == IB_QPT_RAW_PACKET))
+ context->pri_path.ackto = (context->pri_path.ackto & 0xf8) |
+ MLX4_IB_LINK_TYPE_ETH;
+
err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state),
to_mlx4_state(new_state), context, optpar,
sqd_event, &qp->mqp);
@@ -2268,14 +2319,22 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct mlx4_ib_qp *qp = to_mqp(ibqp);
enum ib_qp_state cur_state, new_state;
int err = -EINVAL;
+ int ll;
mutex_lock(&qp->mutex);
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+ if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+ ll = IB_LINK_LAYER_UNSPECIFIED;
+ } else {
+ int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ ll = rdma_port_get_link_layer(&dev->ib_dev, port);
+ }
+
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
- attr_mask & ~IB_M_QP_MOD_VEND_MASK)) {
+ attr_mask & ~IB_M_QP_MOD_VEND_MASK, ll)) {
pr_debug("qpn 0x%x: invalid attribute mask specified "
"for transition %d to %d. qp_type %d,"
" attr_mask 0x%x\n",
@@ -2299,11 +2358,6 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
}
- if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type == IB_QPT_RAW_PACKET) &&
- (rdma_port_get_link_layer(&dev->ib_dev, attr->port_num) !=
- IB_LINK_LAYER_ETHERNET))
- goto out;
-
if (attr_mask & IB_QP_PKEY_INDEX) {
int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) {
@@ -2421,11 +2475,11 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
spc = MLX4_INLINE_ALIGN -
((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
if (header_size <= spc) {
- inl->byte_count = cpu_to_be32(1U << 31 | header_size);
+ inl->byte_count = cpu_to_be32(1 << 31 | header_size);
memcpy(inl + 1, sqp->header_buf, header_size);
i = 1;
} else {
- inl->byte_count = cpu_to_be32(1U << 31 | spc);
+ inl->byte_count = cpu_to_be32(1 << 31 | spc);
memcpy(inl + 1, sqp->header_buf, spc);
inl = (void *) (inl + 1) + spc;
@@ -2444,7 +2498,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
* of 16 mod 64.
*/
wmb();
- inl->byte_count = cpu_to_be32(1U << 31 | (header_size - spc));
+ inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
i = 2;
}
@@ -2470,7 +2524,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
int is_eth;
int is_vlan = 0;
int is_grh;
- u16 vlan = 0;
+ u16 uninitialized_var(vlan);
int err = 0;
send_size = 0;
@@ -2497,8 +2551,10 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
return err;
}
- vlan = rdma_get_vlan_id(&sgid);
- is_vlan = vlan < 0x1000;
+ if (is_eth && ah->av.eth.vlan != 0xffff) {
+ vlan = cpu_to_be16(ah->av.eth.vlan) & 0x0fff;
+ is_vlan = 1;
+ }
}
ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
@@ -2565,7 +2621,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
}
if (is_eth) {
- u8 smac[6];
+ u8 *smac;
struct in6_addr in6;
u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
@@ -2577,8 +2633,13 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
memcpy(&in6, sgid.raw, sizeof(in6));
- rdma_get_ll_mac(&in6, smac);
+
+ if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev))
+ smac = IF_LLADDR(to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]);
+ else
+ smac = ah->av.eth.s_mac; /* use the src mac of the tunnel */
memcpy(sqp->ud_header.eth.smac_h, smac, 6);
+
if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
if (!is_vlan) {
@@ -2628,11 +2689,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
spc = MLX4_INLINE_ALIGN -
((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
if (header_size <= spc) {
- inl->byte_count = cpu_to_be32(1U << 31 | header_size);
+ inl->byte_count = cpu_to_be32(1 << 31 | header_size);
memcpy(inl + 1, sqp->header_buf, header_size);
i = 1;
} else {
- inl->byte_count = cpu_to_be32(1U << 31 | spc);
+ inl->byte_count = cpu_to_be32(1 << 31 | spc);
memcpy(inl + 1, sqp->header_buf, spc);
inl = (void *) (inl + 1) + spc;
@@ -2651,7 +2712,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
* of 16 mod 64.
*/
wmb();
- inl->byte_count = cpu_to_be32(1U << 31 | (header_size - spc));
+ inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
i = 2;
}
@@ -2679,9 +2740,12 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq
static __be32 convert_access(int acc)
{
- return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC) : 0) |
- (acc & IB_ACCESS_REMOTE_WRITE ? cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE) : 0) |
- (acc & IB_ACCESS_REMOTE_READ ? cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ) : 0) |
+ return (acc & IB_ACCESS_REMOTE_ATOMIC ?
+ cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC) : 0) |
+ (acc & IB_ACCESS_REMOTE_WRITE ?
+ cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE) : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ?
+ cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ) : 0) |
(acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) |
cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
}
@@ -2707,6 +2771,24 @@ static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr)
fseg->reserved[1] = 0;
}
+static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ib_send_wr *wr)
+{
+ bseg->flags1 =
+ convert_access(wr->wr.bind_mw.bind_info.mw_access_flags) &
+ cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ |
+ MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE |
+ MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC);
+ bseg->flags2 = 0;
+ if (wr->wr.bind_mw.mw->type == IB_MW_TYPE_2)
+ bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2);
+ if (wr->wr.bind_mw.bind_info.mw_access_flags & IB_ZERO_BASED)
+ bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED);
+ bseg->new_rkey = cpu_to_be32(wr->wr.bind_mw.rkey);
+ bseg->lkey = cpu_to_be32(wr->wr.bind_mw.bind_info.mr->lkey);
+ bseg->addr = cpu_to_be64(wr->wr.bind_mw.bind_info.addr);
+ bseg->length = cpu_to_be64(wr->wr.bind_mw.bind_info.length);
+}
+
static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
{
iseg->mem_key = cpu_to_be32(rkey);
@@ -2792,23 +2874,25 @@ static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_
hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index);
hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+ memcpy(hdr.mac, ah->av.eth.mac, 6);
+ hdr.vlan = cpu_to_be16(ah->av.eth.vlan);
spc = MLX4_INLINE_ALIGN -
((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
if (sizeof (hdr) <= spc) {
memcpy(inl + 1, &hdr, sizeof (hdr));
wmb();
- inl->byte_count = cpu_to_be32(1U << 31 | sizeof (hdr));
+ inl->byte_count = cpu_to_be32(1 << 31 | sizeof (hdr));
i = 1;
} else {
memcpy(inl + 1, &hdr, spc);
wmb();
- inl->byte_count = cpu_to_be32(1U << 31 | spc);
+ inl->byte_count = cpu_to_be32(1 << 31 | spc);
inl = (void *) (inl + 1) + spc;
memcpy(inl + 1, (void *) &hdr + spc, sizeof (hdr) - spc);
wmb();
- inl->byte_count = cpu_to_be32(1U << 31 | (sizeof (hdr) - spc));
+ inl->byte_count = cpu_to_be32(1 << 31 | (sizeof (hdr) - spc));
i = 2;
}
@@ -2833,7 +2917,7 @@ static void set_mlx_icrc_seg(void *dseg)
*/
wmb();
- iseg->byte_count = cpu_to_be32((1U << 31) | 4);
+ iseg->byte_count = cpu_to_be32((1 << 31) | 4);
}
static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
@@ -2901,7 +2985,7 @@ static void add_zero_len_inline(void *wqe)
{
struct mlx4_wqe_inline_seg *inl = wqe;
memset(wqe, 0, 16);
- inl->byte_count = cpu_to_be32(1U << 31);
+ inl->byte_count = cpu_to_be32(1 << 31);
}
static int lay_inline_data(struct mlx4_ib_qp *qp, struct ib_send_wr *wr,
@@ -3102,6 +3186,12 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
break;
+ case IB_WR_BIND_MW:
+ ctrl->srcrb_flags |=
+ cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
+ set_bind_seg(wqe, wr);
+ wqe += sizeof(struct mlx4_wqe_bind_seg);
+ size += sizeof(struct mlx4_wqe_bind_seg) / 16;
default:
/* No extra segments required for sends */
break;
@@ -3246,14 +3336,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
*/
wmb();
- if (wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
+ if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
*bad_wr = wr;
err = -EINVAL;
goto out;
}
ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
- (ind & qp->sq.wqe_cnt ? cpu_to_be32(1U << 31) : 0) | blh;
+ (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
stamp = ind + qp->sq_spare_wqes;
ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
@@ -3576,6 +3666,15 @@ done:
qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ?
IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
+ if (qp->flags & MLX4_IB_QP_CAP_CROSS_CHANNEL)
+ qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL;
+
+ if (qp->flags & MLX4_IB_QP_CAP_MANAGED_SEND)
+ qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND;
+
+ if (qp->flags & MLX4_IB_QP_CAP_MANAGED_RECV)
+ qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV;
+
qp_init_attr->qpg_type = ibqp->qpg_type;
if (ibqp->qpg_type == IB_QPG_PARENT)
qp_init_attr->cap.qpg_tss_mask_sz = qp->qpg_data->qpg_tss_mask_sz;
@@ -3586,4 +3685,3 @@ out:
mutex_unlock(&qp->mutex);
return err;
}
-
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c b/sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c
index 6837b86..df4549f 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c
@@ -56,8 +56,8 @@ static ssize_t show_admin_alias_guid(struct device *dev,
record_num = mlx4_ib_iov_dentry->entry_num / 8 ;
guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ;
- return sprintf(buf, "%llx\n", (long long)
- be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid.
+ return sprintf(buf, "%llx\n",
+ (long long)be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid.
ports_guid[port->num - 1].
all_rec_per_port[record_num].
all_recs[8 * guid_index_in_rec]));
diff --git a/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c b/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c
index 088e440..3fed07c 100644
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -672,8 +672,8 @@ static int mthca_destroy_qp(struct ib_qp *qp)
return 0;
}
-static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
- int comp_vector,
+static struct ib_cq *mthca_create_cq(struct ib_device *ibdev,
+ struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
struct ib_udata *udata)
{
@@ -681,6 +681,7 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
struct mthca_cq *cq;
int nent;
int err;
+ int entries = attr->cqe;
if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes)
return ERR_PTR(-EINVAL);
@@ -1010,12 +1011,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata, int mr_id)
{
struct mthca_dev *dev = to_mdev(pd->device);
- struct ib_umem_chunk *chunk;
+ struct scatterlist *sg;
struct mthca_mr *mr;
struct mthca_reg_mr ucmd;
u64 *pages;
int shift, n, len;
- int i, j, k;
+ int i, k, entry;
int err = 0;
int write_mtt_size;
@@ -1044,10 +1045,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
shift = ffs(mr->umem->page_size) - 1;
- n = 0;
- list_for_each_entry(chunk, &mr->umem->chunk_list, list)
- n += chunk->nents;
-
+ n = mr->umem->nmap;;
mr->mtt = mthca_alloc_mtt(dev, n);
if (IS_ERR(mr->mtt)) {
err = PTR_ERR(mr->mtt);
@@ -1064,25 +1062,27 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
- list_for_each_entry(chunk, &mr->umem->chunk_list, list)
- for (j = 0; j < chunk->nmap; ++j) {
- len = sg_dma_len(&chunk->page_list[j]) >> shift;
- for (k = 0; k < len; ++k) {
- pages[i++] = sg_dma_address(&chunk->page_list[j]) +
- mr->umem->page_size * k;
- /*
- * Be friendly to write_mtt and pass it chunks
- * of appropriate size.
- */
- if (i == write_mtt_size) {
- err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
- if (err)
- goto mtt_done;
- n += i;
- i = 0;
- }
+ for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) {
+ len = sg_dma_len(sg) >> shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = sg_dma_address(sg) +
+ mr->umem->page_size * k;
+ /*
+ * Be friendly to write_mtt and pass it chunks
+ * of appropriate size.
+ */
+ if (i == write_mtt_size) {
+ err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
+ if (err)
+ goto mtt_done;
+ n += i;
+ i = 0;
}
}
+ }
+
+
+
if (i)
err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
diff --git a/sys/ofed/drivers/infiniband/hw/mthca/mthca_qp.c b/sys/ofed/drivers/infiniband/hw/mthca/mthca_qp.c
index 2264bcd..b4c70b4 100644
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -870,7 +870,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
+ attr_mask, IB_LINK_LAYER_UNSPECIFIED)) {
mthca_dbg(dev, "Bad QP transition (transport %d) "
"%d->%d with attr 0x%08x\n",
qp->transport, cur_state, new_state,
diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/Makefile b/sys/ofed/drivers/infiniband/ulp/ipoib/Makefile
deleted file mode 100644
index 3090100..0000000
--- a/sys/ofed/drivers/infiniband/ulp/ipoib/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-obj-$(CONFIG_INFINIBAND_IPOIB) += ib_ipoib.o
-
-ib_ipoib-y := ipoib_main.o \
- ipoib_ib.o \
- ipoib_multicast.o \
- ipoib_verbs.o \
- ipoib_vlan.o \
- ipoib_ethtool.o
-ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_CM) += ipoib_cm.o
-ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG) += ipoib_fs.o
-
diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
index 7d5e175..eb269a4 100644
--- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -80,6 +80,7 @@
#include <linux/workqueue.h>
#include <linux/kref.h>
#include <linux/mutex.h>
+#include <linux/rbtree.h>
#include <asm/atomic.h>
@@ -313,6 +314,7 @@ struct ipoib_ethtool_st {
*/
struct ipoib_dev_priv {
spinlock_t lock;
+ spinlock_t drain_lock;
struct ifnet *dev;
diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 4fb39b4..814938c 100644
--- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -383,6 +383,7 @@ ipoib_poll(struct ipoib_dev_priv *priv)
int n, i;
poll_more:
+ spin_lock(&priv->drain_lock);
for (;;) {
n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc);
@@ -401,6 +402,7 @@ poll_more:
if (n != IPOIB_NUM_WC)
break;
}
+ spin_unlock(&priv->drain_lock);
if (ib_req_notify_cq(priv->recv_cq,
IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS))
@@ -707,6 +709,7 @@ void ipoib_drain_cq(struct ipoib_dev_priv *priv)
{
int i, n;
+ spin_lock(&priv->drain_lock);
do {
n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc);
for (i = 0; i < n; ++i) {
@@ -727,6 +730,7 @@ void ipoib_drain_cq(struct ipoib_dev_priv *priv)
ipoib_ib_handle_rx_wc(priv, priv->ibwc + i);
}
} while (n == IPOIB_NUM_WC);
+ spin_unlock(&priv->drain_lock);
spin_lock(&priv->lock);
while (ipoib_poll_tx(priv))
diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 695621f..35e16417 100644
--- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -832,6 +832,7 @@ ipoib_priv_alloc(void)
priv = malloc(sizeof(struct ipoib_dev_priv), M_TEMP, M_ZERO|M_WAITOK);
spin_lock_init(&priv->lock);
+ spin_lock_init(&priv->drain_lock);
mutex_init(&priv->vlan_mutex);
INIT_LIST_HEAD(&priv->path_list);
INIT_LIST_HEAD(&priv->child_intfs);
diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 9c7bcec..4c04da1 100644
--- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -466,12 +466,20 @@ void ipoib_mcast_join_task(struct work_struct *work)
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, mcast_task.work);
struct ifnet *dev = priv->dev;
+ struct ib_port_attr attr;
ipoib_dbg_mcast(priv, "Running join task. flags 0x%lX\n", priv->flags);
if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
return;
+ if (ib_query_port(priv->ca, priv->port, &attr) ||
+ attr.state != IB_PORT_ACTIVE) {
+ ipoib_dbg(priv, "%s: port state is not ACTIVE (state = %d) suspend task.\n",
+ __func__, attr.state);
+ return;
+ }
+
if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
ipoib_warn(priv, "ib_query_gid() failed\n");
else
diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
index 2e91d85..ace705c 100644
--- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
+++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
@@ -129,7 +129,7 @@ sdp_pcbbind(struct sdp_sock *ssk, struct sockaddr *nam, struct ucred *cred)
/* rdma_bind_addr handles bind races. */
SDP_WUNLOCK(ssk);
if (ssk->id == NULL)
- ssk->id = rdma_create_id(sdp_cma_handler, ssk, RDMA_PS_SDP);
+ ssk->id = rdma_create_id(sdp_cma_handler, ssk, RDMA_PS_SDP, IB_QPT_RC);
if (ssk->id == NULL) {
SDP_WLOCK(ssk);
return (ENOMEM);
@@ -1702,11 +1702,15 @@ int sdp_mod_usec = 0;
void
sdp_set_default_moderation(struct sdp_sock *ssk)
{
+ struct ib_cq_attr attr;
if (sdp_mod_count <= 0 || sdp_mod_usec <= 0)
return;
- ib_modify_cq(ssk->rx_ring.cq, sdp_mod_count, sdp_mod_usec);
-}
+ memset(&attr, 0, sizeof(attr));
+ attr.moderation.cq_count = sdp_mod_count;
+ attr.moderation.cq_period = sdp_mod_usec;
+ ib_modify_cq(ssk->rx_ring.cq, &attr, IB_CQ_MODERATION);
+}
static void
sdp_dev_add(struct ib_device *device)
diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c
index 4e581ab..0b78212 100644
--- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c
+++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c
@@ -736,7 +736,7 @@ sdp_rx_ring_create(struct sdp_sock *ssk, struct ib_device *device)
}
rx_cq = ib_create_cq(device, sdp_rx_irq, sdp_rx_cq_event_handler,
- ssk->socket, SDP_RX_SIZE, IB_CQ_VECTOR_LEAST_ATTACHED);
+ ssk->socket, SDP_RX_SIZE, 0);
if (IS_ERR(rx_cq)) {
rc = PTR_ERR(rx_cq);
diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c
index f7d84be..6f54331 100644
--- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c
+++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c
@@ -438,7 +438,7 @@ sdp_tx_ring_create(struct sdp_sock *ssk, struct ib_device *device)
}
tx_cq = ib_create_cq(device, sdp_tx_irq, sdp_tx_cq_event_handler,
- ssk, SDP_TX_SIZE, IB_CQ_VECTOR_LEAST_ATTACHED);
+ ssk, SDP_TX_SIZE, 0);
if (IS_ERR(tx_cq)) {
rc = PTR_ERR(tx_cq);
diff --git a/sys/ofed/drivers/net/mlx4/Makefile b/sys/ofed/drivers/net/mlx4/Makefile
index dc0e2a3..05338e8 100644
--- a/sys/ofed/drivers/net/mlx4/Makefile
+++ b/sys/ofed/drivers/net/mlx4/Makefile
@@ -30,4 +30,4 @@ opt_inet6.h:
.include <bsd.kmod.mk>
-CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS}
+CFLAGS+= -Wno-cast-qual -Wno-pointer-arith
diff --git a/sys/ofed/drivers/net/mlx4/en_rx.c b/sys/ofed/drivers/net/mlx4/en_rx.c
index fa26326..39688a5 100644
--- a/sys/ofed/drivers/net/mlx4/en_rx.c
+++ b/sys/ofed/drivers/net/mlx4/en_rx.c
@@ -492,7 +492,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
if (nr)
mb->m_next = mb_list[nr];
mb = mb_list[nr];
- mb->m_len = frag_info[nr].frag_size;
+ mb->m_len = frag_info->frag_size;
dma = be64_to_cpu(rx_desc->data[nr].addr);
/* Allocate a replacement page */
diff --git a/sys/ofed/include/linux/device.h b/sys/ofed/include/linux/device.h
index f7bb0fb..87cf0e8 100644
--- a/sys/ofed/include/linux/device.h
+++ b/sys/ofed/include/linux/device.h
@@ -431,17 +431,6 @@ static inline char *kvasprintf(gfp_t gfp, const char *fmt, va_list ap)
return p;
}
-static inline char *kasprintf(gfp_t gfp, const char *fmt, ...)
-{
- va_list ap;
- char *p;
-
- va_start(ap, fmt);
- p = kvasprintf(gfp, fmt, ap);
- va_end(ap);
-
- return p;
-}
-
+char *kasprintf(gfp_t, const char *, ...);
#endif /* _LINUX_DEVICE_H_ */
diff --git a/sys/ofed/include/linux/linux_compat.c b/sys/ofed/include/linux/linux_compat.c
index 081177d..5dc881d 100644
--- a/sys/ofed/include/linux/linux_compat.c
+++ b/sys/ofed/include/linux/linux_compat.c
@@ -712,6 +712,20 @@ vunmap(void *addr)
kfree(vmmap);
}
+
+char *
+kasprintf(gfp_t gfp, const char *fmt, ...)
+{
+ va_list ap;
+ char *p;
+
+ va_start(ap, fmt);
+ p = kvasprintf(gfp, fmt, ap);
+ va_end(ap);
+
+ return p;
+}
+
static void
linux_compat_init(void)
{
diff --git a/sys/ofed/include/linux/printk.h b/sys/ofed/include/linux/printk.h
new file mode 100644
index 0000000..3c97ae7
--- /dev/null
+++ b/sys/ofed/include/linux/printk.h
@@ -0,0 +1,40 @@
+/*-
+ * Copyright (c) 2010 Isilon Systems, Inc.
+ * Copyright (c) 2010 iX Systems, Inc.
+ * Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _FBSD_PRINTK_H_
+#define _FBSD_PRINTK_H_
+
+/* GID printing macros */
+#define GID_PRINT_FMT "%.4x:%.4x:%.4x:%.4x:%.4x:%.4x:%.4x:%.4x"
+#define GID_PRINT_ARGS(gid_raw) htons(((u16 *)gid_raw)[0]), htons(((u16 *)gid_raw)[1]),\
+ htons(((u16 *)gid_raw)[2]), htons(((u16 *)gid_raw)[3]),\
+ htons(((u16 *)gid_raw)[4]), htons(((u16 *)gid_raw)[5]),\
+ htons(((u16 *)gid_raw)[6]), htons(((u16 *)gid_raw)[7])
+
+#endif /* _FBSD_PRINTK_H */
diff --git a/sys/ofed/include/rdma/ib_addr.h b/sys/ofed/include/rdma/ib_addr.h
index b711510..b564415 100644
--- a/sys/ofed/include/rdma/ib_addr.h
+++ b/sys/ofed/include/rdma/ib_addr.h
@@ -31,17 +31,20 @@
* SOFTWARE.
*/
-#if !defined(IB_ADDR_H)
+#ifndef IB_ADDR_H
#define IB_ADDR_H
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/if_arp.h>
#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
#include <linux/socket.h>
+#include <linux/if_vlan.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_pack.h>
-#include <linux/if_vlan.h>
+#include <net/if_inet6.h>
+#include <net/ipv6.h>
struct rdma_addr_client {
atomic_t refcount;
@@ -72,7 +75,8 @@ struct rdma_dev_addr {
* rdma_translate_ip - Translate a local IP address to an RDMA hardware
* address.
*/
-int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr);
+int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
+ u16 *vlan_id);
/**
* rdma_resolve_ip - Resolve source and destination IP addresses to
@@ -101,6 +105,9 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr);
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
const unsigned char *dst_dev_addr);
+int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
+int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *smac,
+ u16 *vlan_id);
static inline int ip_addr_size(struct sockaddr *addr)
{
@@ -130,50 +137,56 @@ static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr)
return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0;
}
-static inline void iboe_mac_vlan_to_ll(union ib_gid *gid, u8 *mac, u16 vid)
-{
- memset(gid->raw, 0, 16);
- *((u32 *)gid->raw) = cpu_to_be32(0xfe800000);
- if (vid < 0x1000) {
- gid->raw[12] = vid & 0xff;
- gid->raw[11] = vid >> 8;
- } else {
- gid->raw[12] = 0xfe;
- gid->raw[11] = 0xff;
- }
-
- memcpy(gid->raw + 13, mac + 3, 3);
- memcpy(gid->raw + 8, mac, 3);
- gid->raw[8] ^= 2;
-}
-
static inline u16 rdma_vlan_dev_vlan_id(const struct net_device *dev)
{
-#ifdef __linux__
- return dev->priv_flags & IFF_802_1Q_VLAN ?
- vlan_dev_vlan_id(dev) : 0xffff;
-#else
uint16_t tag;
if (VLAN_TAG(__DECONST(struct ifnet *, dev), &tag) != 0)
return 0xffff;
return tag;
-#endif
}
-static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr,
- union ib_gid *gid)
+static inline int rdma_ip2gid(struct sockaddr *addr, union ib_gid *gid)
{
- struct net_device *dev;
- u16 vid = 0xffff;
+ switch (addr->sa_family) {
+ case AF_INET:
+ ipv6_addr_set_v4mapped(((struct sockaddr_in *)addr)->sin_addr.s_addr,
+ (struct in6_addr *)gid);
+ break;
+ case AF_INET6:
+ memcpy(gid->raw, &((struct sockaddr_in6 *)addr)->sin6_addr,
+ 16);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
- dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
- if (dev) {
- vid = rdma_vlan_dev_vlan_id(dev);
- dev_put(dev);
+/* Important - sockaddr should be a union of sockaddr_in and sockaddr_in6 */
+static inline int rdma_gid2ip(struct sockaddr *out, union ib_gid *gid)
+{
+ if (ipv6_addr_v4mapped((struct in6_addr *)gid)) {
+ struct sockaddr_in *out_in = (struct sockaddr_in *)out;
+ memset(out_in, 0, sizeof(*out_in));
+ out_in->sin_len = sizeof(*out_in);
+ out_in->sin_family = AF_INET;
+ memcpy(&out_in->sin_addr.s_addr, gid->raw + 12, 4);
+ } else {
+ struct sockaddr_in6 *out_in = (struct sockaddr_in6 *)out;
+ memset(out_in, 0, sizeof(*out_in));
+ out_in->sin6_family = AF_INET6;
+ memcpy(&out_in->sin6_addr.s6_addr, gid->raw, 16);
}
+ return 0;
+}
- iboe_mac_vlan_to_ll(gid, dev_addr->src_dev_addr, vid);
+/* This func is called only in loopback ip address (127.0.0.1)
+ * case in which sgid is not relevant
+ */
+static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr,
+ union ib_gid *gid)
+{
}
static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
@@ -223,27 +236,6 @@ static inline enum ib_mtu iboe_get_mtu(int mtu)
return 0;
}
-#ifdef __linux__
-static inline int iboe_get_rate(struct net_device *dev)
-{
- struct ethtool_cmd cmd;
-
- if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings ||
- dev->ethtool_ops->get_settings(dev, &cmd))
- return IB_RATE_PORT_CURRENT;
-
- if (cmd.speed >= 40000)
- return IB_RATE_40_GBPS;
- else if (cmd.speed >= 30000)
- return IB_RATE_30_GBPS;
- else if (cmd.speed >= 20000)
- return IB_RATE_20_GBPS;
- else if (cmd.speed >= 10000)
- return IB_RATE_10_GBPS;
- else
- return IB_RATE_PORT_CURRENT;
-}
-#else
static inline int iboe_get_rate(struct net_device *dev)
{
if (dev->if_baudrate >= IF_Gbps(40))
@@ -257,11 +249,10 @@ static inline int iboe_get_rate(struct net_device *dev)
else
return IB_RATE_PORT_CURRENT;
}
-#endif
static inline int rdma_link_local_addr(struct in6_addr *addr)
{
- if (addr->s6_addr32[0] == cpu_to_be32(0xfe800000) &&
+ if (addr->s6_addr32[0] == htonl(0xfe800000) &&
addr->s6_addr32[1] == 0)
return 1;
@@ -280,6 +271,20 @@ static inline int rdma_is_multicast_addr(struct in6_addr *addr)
return addr->s6_addr[0] == 0xff;
}
+static inline void resolve_mcast_mac(struct in6_addr *addr, u8 *mac)
+{
+ if (addr->s6_addr[0] != 0xff)
+ return;
+
+#ifdef DUAL_MODE_MCAST_MAC
+ if (addr->s6_addr[1] == 0x0e) /* IPv4 */
+ ip_eth_mc_map(addr->s6_addr32[3], mac);
+ else
+#endif
+ ipv6_eth_mc_map(addr, mac);
+}
+
+
static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac)
{
int i;
@@ -300,12 +305,7 @@ static inline u16 rdma_get_vlan_id(union ib_gid *dgid)
static inline struct net_device *rdma_vlan_dev_real_dev(const struct net_device *dev)
{
-#ifdef __linux__
- return dev->priv_flags & IFF_802_1Q_VLAN ?
- vlan_dev_real_dev(dev) : 0;
-#else
return VLAN_TRUNKDEV(__DECONST(struct ifnet *, dev));
-#endif
}
#endif /* IB_ADDR_H */
diff --git a/sys/ofed/include/rdma/ib_cache.h b/sys/ofed/include/rdma/ib_cache.h
index 00a2b8e..ad9a3c2 100644
--- a/sys/ofed/include/rdma/ib_cache.h
+++ b/sys/ofed/include/rdma/ib_cache.h
@@ -101,6 +101,22 @@ int ib_find_cached_pkey(struct ib_device *device,
u16 *index);
/**
+ * ib_find_exact_cached_pkey - Returns the PKey table index where a specified
+ * PKey value occurs. Comparison uses the FULL 16 bits (incl membership bit)
+ * @device: The device to query.
+ * @port_num: The port number of the device to search for the PKey.
+ * @pkey: The PKey value to search for.
+ * @index: The index into the cached PKey table where the PKey was found.
+ *
+ * ib_find_exact_cached_pkey() searches the specified PKey table in
+ * the local software cache.
+ */
+int ib_find_exact_cached_pkey(struct ib_device *device,
+ u8 port_num,
+ u16 pkey,
+ u16 *index);
+
+/**
* ib_get_cached_lmc - Returns a cached lmc table entry
* @device: The device to query.
* @port_num: The port number of the device to query.
diff --git a/sys/ofed/include/rdma/ib_cm.h b/sys/ofed/include/rdma/ib_cm.h
index 40c24b6..a7ffaf9 100644
--- a/sys/ofed/include/rdma/ib_cm.h
+++ b/sys/ofed/include/rdma/ib_cm.h
@@ -497,7 +497,7 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
* message.
* @cm_id: Connection identifier associated with the connection message.
* @service_timeout: The lower 5-bits specify the maximum time required for
- * the sender to reply to to the connection message. The upper 3-bits
+ * the sender to reply to the connection message. The upper 3-bits
* specify additional control flags.
* @private_data: Optional user-defined private data sent with the
* message receipt acknowledgement.
@@ -601,4 +601,6 @@ struct ib_cm_sidr_rep_param {
int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
struct ib_cm_sidr_rep_param *param);
+int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac);
+
#endif /* IB_CM_H */
diff --git a/sys/ofed/include/rdma/ib_mad.h b/sys/ofed/include/rdma/ib_mad.h
index 32f8114..3d81b90 100644
--- a/sys/ofed/include/rdma/ib_mad.h
+++ b/sys/ofed/include/rdma/ib_mad.h
@@ -77,6 +77,15 @@
#define IB_MGMT_MAX_METHODS 128
+/* MAD Status field bit masks */
+#define IB_MGMT_MAD_STATUS_SUCCESS 0x0000
+#define IB_MGMT_MAD_STATUS_BUSY 0x0001
+#define IB_MGMT_MAD_STATUS_REDIRECT_REQD 0x0002
+#define IB_MGMT_MAD_STATUS_BAD_VERSION 0x0004
+#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD 0x0008
+#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB 0x000c
+#define IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE 0x001c
+
/* RMPP information */
#define IB_MGMT_RMPP_VERSION 1
diff --git a/sys/ofed/include/rdma/ib_pack.h b/sys/ofed/include/rdma/ib_pack.h
index af615a4..1678be7 100644
--- a/sys/ofed/include/rdma/ib_pack.h
+++ b/sys/ofed/include/rdma/ib_pack.h
@@ -263,7 +263,5 @@ int ib_ud_header_pack(struct ib_ud_header *header,
int ib_ud_header_unpack(void *buf,
struct ib_ud_header *header);
-int ib_lrh_header_pack(struct ib_unpacked_lrh *lrh, void *buf);
-int ib_lrh_header_unpack(void *buf, struct ib_unpacked_lrh *lrh);
#endif /* IB_PACK_H */
diff --git a/sys/ofed/include/rdma/ib_peer_mem.h b/sys/ofed/include/rdma/ib_peer_mem.h
new file mode 100644
index 0000000..b2a8a4a
--- /dev/null
+++ b/sys/ofed/include/rdma/ib_peer_mem.h
@@ -0,0 +1,59 @@
+#if !defined(IB_PEER_MEM_H)
+#define IB_PEER_MEM_H
+
+#include <rdma/peer_mem.h>
+
+
+struct invalidation_ctx;
+struct ib_ucontext;
+
+struct ib_peer_memory_statistics {
+ unsigned long num_alloc_mrs;
+ unsigned long num_dealloc_mrs;
+ unsigned long num_reg_pages;
+ unsigned long num_dereg_pages;
+ unsigned long num_free_callbacks;
+};
+
+struct ib_peer_memory_client {
+ const struct peer_memory_client *peer_mem;
+
+ struct list_head core_peer_list;
+ struct list_head core_ticket_list;
+ unsigned long last_ticket;
+#ifdef __FreeBSD__
+ int holdcount;
+ int needwakeup;
+ struct cv peer_cv;
+#else
+ struct srcu_struct peer_srcu;
+#endif
+ struct mutex lock;
+ struct kobject *kobj;
+ struct attribute_group peer_mem_attr_group;
+ struct ib_peer_memory_statistics stats;
+};
+
+struct core_ticket {
+ unsigned long key;
+ void *context;
+ struct list_head ticket_list;
+};
+
+struct ib_peer_memory_client *ib_get_peer_client(struct ib_ucontext *context, unsigned long addr,
+ size_t size, void **peer_client_context,
+ int *srcu_key);
+
+void ib_put_peer_client(struct ib_peer_memory_client *ib_peer_client,
+ void *peer_client_context,
+ int srcu_key);
+
+unsigned long ib_peer_insert_context(struct ib_peer_memory_client *ib_peer_client,
+ void *context);
+int ib_peer_remove_context(struct ib_peer_memory_client *ib_peer_client,
+ unsigned long key);
+struct core_ticket *ib_peer_search_context(struct ib_peer_memory_client *ib_peer_client,
+ unsigned long key);
+#endif
+
+
diff --git a/sys/ofed/include/rdma/ib_sa.h b/sys/ofed/include/rdma/ib_sa.h
index 61588d9..65f1a00 100644
--- a/sys/ofed/include/rdma/ib_sa.h
+++ b/sys/ofed/include/rdma/ib_sa.h
@@ -154,6 +154,9 @@ struct ib_sa_path_rec {
u8 packet_life_time_selector;
u8 packet_life_time;
u8 preference;
+ u8 smac[ETH_ALEN];
+ u8 dmac[6];
+ __be16 vlan_id;
};
#define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0)
@@ -251,127 +254,6 @@ struct ib_sa_service_rec {
u64 data64[2];
};
-enum {
- IB_SA_EVENT_TYPE_FATAL = 0x0,
- IB_SA_EVENT_TYPE_URGENT = 0x1,
- IB_SA_EVENT_TYPE_SECURITY = 0x2,
- IB_SA_EVENT_TYPE_SM = 0x3,
- IB_SA_EVENT_TYPE_INFO = 0x4,
- IB_SA_EVENT_TYPE_EMPTY = 0x7F,
- IB_SA_EVENT_TYPE_ALL = 0xFFFF
-};
-
-enum {
- IB_SA_EVENT_PRODUCER_TYPE_CA = 0x1,
- IB_SA_EVENT_PRODUCER_TYPE_SWITCH = 0x2,
- IB_SA_EVENT_PRODUCER_TYPE_ROUTER = 0x3,
- IB_SA_EVENT_PRODUCER_TYPE_CLASS_MANAGER = 0x4,
- IB_SA_EVENT_PRODUCER_TYPE_ALL = 0xFFFFFF
-};
-
-enum {
- IB_SA_SM_TRAP_GID_IN_SERVICE = 64,
- IB_SA_SM_TRAP_GID_OUT_OF_SERVICE = 65,
- IB_SA_SM_TRAP_CREATE_MC_GROUP = 66,
- IB_SA_SM_TRAP_DELETE_MC_GROUP = 67,
- IB_SA_SM_TRAP_PORT_CHANGE_STATE = 128,
- IB_SA_SM_TRAP_LINK_INTEGRITY = 129,
- IB_SA_SM_TRAP_EXCESSIVE_BUFFER_OVERRUN = 130,
- IB_SA_SM_TRAP_FLOW_CONTROL_UPDATE_EXPIRED = 131,
- IB_SA_SM_TRAP_BAD_M_KEY = 256,
- IB_SA_SM_TRAP_BAD_P_KEY = 257,
- IB_SA_SM_TRAP_BAD_Q_KEY = 258,
- IB_SA_SM_TRAP_SWITCH_BAD_P_KEY = 259,
- IB_SA_SM_TRAP_ALL = 0xFFFF
-};
-
-struct ib_sa_inform {
- union ib_gid gid;
- __be16 lid_range_begin;
- __be16 lid_range_end;
- u8 is_generic;
- u8 subscribe;
- __be16 type;
- union {
- struct {
- __be16 trap_num;
- __be32 qpn;
- u8 resp_time;
- __be32 producer_type;
- } generic;
- struct {
- __be16 device_id;
- __be32 qpn;
- u8 resp_time;
- __be32 vendor_id;
- } vendor;
- } trap;
-};
-
-struct ib_sa_notice {
- u8 is_generic;
- u8 type;
- union {
- struct {
- __be32 producer_type;
- __be16 trap_num;
- } generic;
- struct {
- __be32 vendor_id;
- __be16 device_id;
- } vendor;
- } trap;
- __be16 issuer_lid;
- __be16 notice_count;
- u8 notice_toggle;
- /*
- * Align data 16 bits off 64 bit field to match InformInfo definition.
- * Data contained within this field will then align properly.
- * See IB spec 1.2, sections 13.4.8.2 and 14.2.5.1.
- */
- u8 reserved[5];
- u8 data_details[54];
- union ib_gid issuer_gid;
-};
-
-/*
- * SM notice data details for:
- *
- * IB_SA_SM_TRAP_GID_IN_SERVICE = 64
- * IB_SA_SM_TRAP_GID_OUT_OF_SERVICE = 65
- * IB_SA_SM_TRAP_CREATE_MC_GROUP = 66
- * IB_SA_SM_TRAP_DELETE_MC_GROUP = 67
- */
-struct ib_sa_notice_data_gid {
- u8 reserved[6];
- u8 gid[16];
- u8 padding[32];
-};
-
-/*
- * SM notice data details for:
- *
- * IB_SA_SM_TRAP_PORT_CHANGE_STATE = 128
- */
-struct ib_sa_notice_data_port_change {
- __be16 lid;
- u8 padding[52];
-};
-
-/*
- * SM notice data details for:
- *
- * IB_SA_SM_TRAP_LINK_INTEGRITY = 129
- * IB_SA_SM_TRAP_EXCESSIVE_BUFFER_OVERRUN = 130
- * IB_SA_SM_TRAP_FLOW_CONTROL_UPDATE_EXPIRED = 131
- */
-struct ib_sa_notice_data_port_error {
- u8 reserved[2];
- __be16 lid;
- u8 port_num;
- u8 padding[49];
-};
-
#define IB_SA_GUIDINFO_REC_LID IB_SA_COMP_MASK(0)
#define IB_SA_GUIDINFO_REC_BLOCK_NUM IB_SA_COMP_MASK(1)
#define IB_SA_GUIDINFO_REC_RES1 IB_SA_COMP_MASK(2)
@@ -528,56 +410,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
*/
void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec);
-struct ib_inform_info {
- void *context;
- int (*callback)(int status,
- struct ib_inform_info *info,
- struct ib_sa_notice *notice);
- u16 trap_number;
-};
-
-/**
- * ib_sa_register_inform_info - Registers to receive notice events.
- * @device: Device associated with the registration.
- * @port_num: Port on the specified device to associate with the registration.
- * @trap_number: InformInfo trap number to register for.
- * @gfp_mask: GFP mask for memory allocations.
- * @callback: User callback invoked once the registration completes and to
- * report noticed events.
- * @context: User specified context stored with the ib_inform_reg structure.
- *
- * This call initiates a registration request with the SA for the specified
- * trap number. If the operation is started successfully, it returns
- * an ib_inform_info structure that is used to track the registration operation.
- * Users must free this structure by calling ib_unregister_inform_info,
- * even if the operation later fails. (The callback status is non-zero.)
- *
- * If the registration fails; status will be non-zero. If the registration
- * succeeds, the callback status will be zero, but the notice parameter will
- * be NULL. If the notice parameter is not NULL, a trap or notice is being
- * reported to the user.
- *
- * A status of -ENETRESET indicates that an error occurred which requires
- * reregisteration.
- */
-struct ib_inform_info *
-ib_sa_register_inform_info(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- u16 trap_number, gfp_t gfp_mask,
- int (*callback)(int status,
- struct ib_inform_info *info,
- struct ib_sa_notice *notice),
- void *context);
-
-/**
- * ib_sa_unregister_inform_info - Releases an InformInfo registration.
- * @info: InformInfo registration tracking structure.
- *
- * This call blocks until the registration request is destroyed. It may
- * not be called from within the registration callback.
- */
-void ib_sa_unregister_inform_info(struct ib_inform_info *info);
-
+/* Support GuidInfoRecord */
int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
struct ib_sa_guidinfo_rec *rec,
@@ -588,6 +421,4 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
void *context),
void *context,
struct ib_sa_query **sa_query);
-
-
#endif /* IB_SA_H */
diff --git a/sys/ofed/include/rdma/ib_umem.h b/sys/ofed/include/rdma/ib_umem.h
index a825111..82f6cfa 100644
--- a/sys/ofed/include/rdma/ib_umem.h
+++ b/sys/ofed/include/rdma/ib_umem.h
@@ -37,9 +37,26 @@
#include <linux/scatterlist.h>
#include <linux/workqueue.h>
#include <linux/dma-attrs.h>
+#include <linux/completion.h>
+#include <rdma/ib_peer_mem.h>
struct ib_ucontext;
-struct vm_area_struct;
+struct ib_umem;
+
+typedef void (*umem_invalidate_func_t)(void *invalidation_cookie,
+ struct ib_umem *umem,
+ unsigned long addr, size_t size);
+
+struct invalidation_ctx {
+ struct ib_umem *umem;
+ umem_invalidate_func_t func;
+ void *cookie;
+ unsigned long context_ticket;
+ int peer_callback;
+ int inflight_invalidation;
+ int peer_invalidated;
+ struct completion comp;
+};
struct ib_umem {
struct ib_ucontext *context;
@@ -48,55 +65,29 @@ struct ib_umem {
int page_size;
int writable;
int hugetlb;
- struct list_head chunk_list;
-#ifdef __linux__
struct work_struct work;
- struct mm_struct *mm;
-#else
- unsigned long start;
-#endif
unsigned long diff;
-};
-
-struct ib_cmem {
-
- struct ib_ucontext *context;
- size_t length;
- /* Link list of contiguous blocks being part of that cmem */
- struct list_head ib_cmem_block;
-
- /* Order of cmem block, 2^ block_order will equal number
- of physical pages per block
- */
- unsigned long block_order;
- /* Refernce counter for that memory area
- - When value became 0 pages will be returned to the kernel.
- */
- struct kref refcount;
-};
-
-
-struct ib_umem_chunk {
- struct list_head list;
- int nents;
+ unsigned long start;
+ struct sg_table sg_head;
int nmap;
- struct dma_attrs attrs;
- struct scatterlist page_list[0];
+ int npages;
+ /* peer memory that manages this umem*/
+ struct ib_peer_memory_client *ib_peer_mem;
+ struct invalidation_ctx *invalidation_ctx;
+ int peer_mem_srcu_key;
+ /* peer memory private context */
+ void *peer_mem_client_context;
};
struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
size_t size, int access, int dmasync);
+struct ib_umem *ib_umem_get_ex(struct ib_ucontext *context, unsigned long addr,
+ size_t size, int access, int dmasync,
+ int invalidation_supported);
+void ib_umem_activate_invalidation_notifier(struct ib_umem *umem,
+ umem_invalidate_func_t func,
+ void *cookie);
void ib_umem_release(struct ib_umem *umem);
int ib_umem_page_count(struct ib_umem *umem);
-int ib_cmem_map_contiguous_pages_to_vma(struct ib_cmem *ib_cmem,
- struct vm_area_struct *vma);
-struct ib_cmem *ib_cmem_alloc_contiguous_pages(struct ib_ucontext *context,
- unsigned long total_size,
- unsigned long page_size_order);
-void ib_cmem_release_contiguous_pages(struct ib_cmem *cmem);
-int ib_umem_map_to_vma(struct ib_umem *umem,
- struct vm_area_struct *vma);
-
-
#endif /* IB_UMEM_H */
diff --git a/sys/ofed/include/rdma/ib_user_verbs.h b/sys/ofed/include/rdma/ib_user_verbs.h
index 670d6e8..a07de88 100644
--- a/sys/ofed/include/rdma/ib_user_verbs.h
+++ b/sys/ofed/include/rdma/ib_user_verbs.h
@@ -43,6 +43,13 @@
* compatibility are made.
*/
#define IB_USER_VERBS_ABI_VERSION 6
+#define IB_USER_VERBS_CMD_THRESHOLD 50
+
+/*
+ * To support 6 legacy commands using the old extension style
+ */
+#define IB_USER_VERBS_LEGACY_CMD_FIRST 52
+#define IB_USER_VERBS_LEGACY_EX_CMD_LAST 56
enum {
IB_USER_VERBS_CMD_GET_CONTEXT,
@@ -85,17 +92,15 @@ enum {
IB_USER_VERBS_CMD_OPEN_XRCD,
IB_USER_VERBS_CMD_CLOSE_XRCD,
IB_USER_VERBS_CMD_CREATE_XSRQ,
- IB_USER_VERBS_CMD_OPEN_QP,
- IB_USER_VERBS_CMD_ATTACH_FLOW,
- IB_USER_VERBS_CMD_DETACH_FLOW,
- IB_USER_VERBS_CMD_CREATE_XRC_SRQ,
- IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP,
- IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP,
- IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP,
- IB_USER_VERBS_CMD_REG_XRC_RCV_QP,
- IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP,
+ IB_USER_VERBS_CMD_OPEN_QP
+};
+
+enum {
+ IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
+ IB_USER_VERBS_EX_CMD_DESTROY_FLOW
};
+
/*
* Make sure that all structs defined in this file remain laid out so
* that they pack the same way on 32-bit and 64-bit architectures (to
@@ -125,12 +130,33 @@ struct ib_uverbs_comp_event_desc {
* the rest of the command struct based on these value.
*/
+#define IBV_RESP_TO_VERBS_RESP_EX_RAW(ex_ptr, ex_type, ibv_type, field) \
+ ((ibv_type *)((void *)(ex_ptr) + offsetof(ex_type, \
+ field) + sizeof((ex_ptr)->field)))
+
+#define IBV_RESP_TO_VERBS_RESP_EX(ex_ptr, ex_type, ibv_type) \
+ IBV_RESP_TO_VERBS_RESP_EX_RAW(ex_ptr, ex_type, ibv_type, comp_mask)
+
+
+#define IB_USER_VERBS_CMD_COMMAND_MASK 0xff
+#define IB_USER_VERBS_CMD_FLAGS_MASK 0xff000000u
+#define IB_USER_VERBS_CMD_FLAGS_SHIFT 24
+
+#define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80
+
struct ib_uverbs_cmd_hdr {
__u32 command;
__u16 in_words;
__u16 out_words;
};
+struct ib_uverbs_ex_cmd_hdr {
+ __u64 response;
+ __u16 provider_in_words;
+ __u16 provider_out_words;
+ __u32 cmd_hdr_reserved;
+};
+
struct ib_uverbs_get_context {
__u64 response;
__u64 driver_data[0];
@@ -146,6 +172,11 @@ struct ib_uverbs_query_device {
__u64 driver_data[0];
};
+struct ib_uverbs_query_device_ex {
+ __u64 comp_mask;
+ __u64 driver_data[0];
+};
+
struct ib_uverbs_query_device_resp {
__u64 fw_ver;
__be64 node_guid;
@@ -269,6 +300,22 @@ struct ib_uverbs_dereg_mr {
__u32 mr_handle;
};
+struct ib_uverbs_alloc_mw {
+ __u64 response;
+ __u32 pd_handle;
+ __u8 mw_type;
+ __u8 reserved[3];
+};
+
+struct ib_uverbs_alloc_mw_resp {
+ __u32 mw_handle;
+ __u32 rkey;
+};
+
+struct ib_uverbs_dealloc_mw {
+ __u32 mw_handle;
+};
+
struct ib_uverbs_create_comp_channel {
__u64 response;
};
@@ -292,6 +339,30 @@ struct ib_uverbs_create_cq_resp {
__u32 cqe;
};
+enum ib_uverbs_create_cq_ex_comp_mask {
+ IB_UVERBS_CREATE_CQ_EX_CAP_FLAGS = (u64)1 << 0,
+};
+
+struct ib_uverbs_create_cq_ex {
+ __u64 comp_mask;
+ __u64 user_handle;
+ __u32 cqe;
+ __u32 comp_vector;
+ __s32 comp_channel;
+ __u32 reserved;
+ __u64 create_flags;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_modify_cq_ex {
+ __u64 comp_mask;
+ __u32 cq_handle;
+ __u32 attr_mask;
+ __u16 cq_count;
+ __u16 cq_period;
+ __u32 cq_cap_flags;
+};
+
struct ib_uverbs_resize_cq {
__u64 response;
__u32 cq_handle;
@@ -543,6 +614,42 @@ struct ib_uverbs_modify_qp {
__u64 driver_data[0];
};
+enum ib_uverbs_modify_qp_ex_comp_mask {
+ IB_UVERBS_QP_ATTR_DCT_KEY = 1ULL << 0,
+};
+
+struct ib_uverbs_modify_qp_ex {
+ __u32 comp_mask;
+ struct ib_uverbs_qp_dest dest;
+ struct ib_uverbs_qp_dest alt_dest;
+ __u32 qp_handle;
+ __u32 attr_mask;
+ __u32 qkey;
+ __u32 rq_psn;
+ __u32 sq_psn;
+ __u32 dest_qp_num;
+ __u32 qp_access_flags;
+ __u16 pkey_index;
+ __u16 alt_pkey_index;
+ __u8 qp_state;
+ __u8 cur_qp_state;
+ __u8 path_mtu;
+ __u8 path_mig_state;
+ __u8 en_sqd_async_notify;
+ __u8 max_rd_atomic;
+ __u8 max_dest_rd_atomic;
+ __u8 min_rnr_timer;
+ __u8 port_num;
+ __u8 timeout;
+ __u8 retry_cnt;
+ __u8 rnr_retry;
+ __u8 alt_port_num;
+ __u8 alt_timeout;
+ __u8 reserved[2];
+ __u64 dct_key;
+ __u64 driver_data[0];
+};
+
struct ib_uverbs_modify_qp_resp {
};
@@ -599,16 +706,6 @@ struct ib_uverbs_send_wr {
} wr;
};
-struct ibv_uverbs_flow_spec {
- __u32 type;
- __be32 src_ip;
- __be32 dst_ip;
- __be16 src_port;
- __be16 dst_port;
- __u8 l4_protocol;
- __u8 block_mc_loopback;
-};
-
struct ib_uverbs_post_send {
__u64 response;
__u32 qp_handle;
@@ -686,43 +783,117 @@ struct ib_uverbs_detach_mcast {
__u64 driver_data[0];
};
-struct ibv_kern_flow_spec {
+struct ib_uverbs_flow_spec_hdr {
__u32 type;
- __u32 reserved1;
+ __u16 size;
+ __u16 reserved;
+ /* followed by flow_spec */
+ __u64 flow_spec_data[0];
+};
+
+struct ib_kern_eth_filter {
+ __u8 dst_mac[6];
+ __u8 src_mac[6];
+ __be16 ether_type;
+ __be16 vlan_tag;
+};
+
+struct ib_uverbs_flow_spec_eth {
union {
+ struct ib_uverbs_flow_spec_hdr hdr;
struct {
- __be16 ethertype;
- __be16 vlan;
- __u8 vlan_present;
- __u8 mac[6];
- __u8 port;
- } eth;
- struct {
- __be32 qpn;
- } ib_uc;
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ struct ib_kern_eth_filter val;
+ struct ib_kern_eth_filter mask;
+};
+
+struct ib_kern_ib_filter {
+ __be32 l3_type_qpn;
+ __u8 dst_gid[16];
+};
+
+struct ib_uverbs_flow_spec_ib {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
struct {
- __u8 mgid[16];
- } ib_mc;
- } l2_id;
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ struct ib_kern_ib_filter val;
+ struct ib_kern_ib_filter mask;
+};
+
+struct ib_kern_ipv4_filter {
__be32 src_ip;
__be32 dst_ip;
- __be16 src_port;
+};
+
+struct ib_uverbs_flow_spec_ipv4 {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ struct ib_kern_ipv4_filter val;
+ struct ib_kern_ipv4_filter mask;
+};
+
+struct ib_kern_tcp_udp_filter {
__be16 dst_port;
- __u8 l4_protocol;
- __u8 block_mc_loopback;
+ __be16 src_port;
+};
+
+struct ib_uverbs_flow_spec_tcp_udp {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ struct ib_kern_tcp_udp_filter val;
+ struct ib_kern_tcp_udp_filter mask;
+};
+
+struct ib_uverbs_flow_attr {
+ __u32 type;
+ __u16 size;
+ __u16 priority;
+ __u8 num_of_specs;
__u8 reserved[2];
+ __u8 port;
+ __u32 flags;
+ /* Following are the optional layers according to user request
+ * struct ib_flow_spec_xxx
+ * struct ib_flow_spec_yyy
+ */
+ struct ib_uverbs_flow_spec_hdr flow_specs[0];
};
-struct ib_uverbs_attach_flow {
+struct ib_uverbs_create_flow {
+ __u32 comp_mask;
__u32 qp_handle;
- __u32 priority;
- struct ibv_kern_flow_spec spec;
+ struct ib_uverbs_flow_attr flow_attr;
};
-struct ib_uverbs_detach_flow {
- __u32 qp_handle;
- __u32 priority;
- struct ibv_kern_flow_spec spec;
+struct ib_uverbs_create_flow_resp {
+ __u32 comp_mask;
+ __u32 flow_handle;
+};
+
+struct ib_uverbs_destroy_flow {
+ __u32 comp_mask;
+ __u32 flow_handle;
};
struct ib_uverbs_create_srq {
@@ -788,95 +959,22 @@ struct ib_uverbs_destroy_srq_resp {
__u32 events_reported;
};
-struct ib_uverbs_open_xrc_domain {
- __u64 response;
- __u32 fd;
- __u32 oflags;
- __u64 driver_data[0];
-};
-
-struct ib_uverbs_open_xrc_domain_resp {
- __u32 xrcd_handle;
-};
-
-struct ib_uverbs_close_xrc_domain {
- __u64 response;
- __u32 xrcd_handle;
- __u32 reserved;
- __u64 driver_data[0];
-};
-
-struct ib_uverbs_create_xrc_rcv_qp {
- __u64 response;
- __u64 user_handle;
- __u32 xrc_domain_handle;
- __u32 max_send_wr;
- __u32 max_recv_wr;
- __u32 max_send_sge;
- __u32 max_recv_sge;
- __u32 max_inline_data;
- __u8 sq_sig_all;
- __u8 qp_type;
- __u8 reserved[6];
- __u64 driver_data[0];
-};
-
-struct ib_uverbs_create_xrc_rcv_qp_resp {
- __u32 qpn;
- __u32 reserved;
-};
-struct ib_uverbs_modify_xrc_rcv_qp {
- __u32 xrc_domain_handle;
- __u32 qp_num;
- struct ib_uverbs_qp_dest dest;
- struct ib_uverbs_qp_dest alt_dest;
- __u32 attr_mask;
- __u32 qkey;
- __u32 rq_psn;
- __u32 sq_psn;
- __u32 dest_qp_num;
- __u32 qp_access_flags;
- __u16 pkey_index;
- __u16 alt_pkey_index;
- __u8 qp_state;
- __u8 cur_qp_state;
- __u8 path_mtu;
- __u8 path_mig_state;
- __u8 en_sqd_async_notify;
- __u8 max_rd_atomic;
- __u8 max_dest_rd_atomic;
- __u8 min_rnr_timer;
- __u8 port_num;
- __u8 timeout;
- __u8 retry_cnt;
- __u8 rnr_retry;
- __u8 alt_port_num;
- __u8 alt_timeout;
- __u8 reserved[6];
- __u64 driver_data[0];
+/*
+ * Legacy extended verbs related structures
+ */
+struct ib_uverbs_ex_cmd_hdr_legacy {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u16 provider_in_words;
+ __u16 provider_out_words;
+ __u32 cmd_hdr_reserved;
};
-struct ib_uverbs_query_xrc_rcv_qp {
+struct ib_uverbs_ex_cmd_resp1_legacy {
+ __u64 comp_mask;
__u64 response;
- __u32 xrc_domain_handle;
- __u32 qp_num;
- __u32 attr_mask;
- __u32 reserved;
- __u64 driver_data[0];
-};
-
-struct ib_uverbs_reg_xrc_rcv_qp {
- __u32 xrc_domain_handle;
- __u32 qp_num;
- __u64 driver_data[0];
};
-struct ib_uverbs_unreg_xrc_rcv_qp {
- __u32 xrc_domain_handle;
- __u32 qp_num;
- __u64 driver_data[0];
-};
-
-
#endif /* IB_USER_VERBS_H */
diff --git a/sys/ofed/include/rdma/ib_user_verbs_exp.h b/sys/ofed/include/rdma/ib_user_verbs_exp.h
new file mode 100644
index 0000000..557d4ba
--- /dev/null
+++ b/sys/ofed/include/rdma/ib_user_verbs_exp.h
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
+ * Copyright (c) 2006 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_USER_VERBS_EXP_H
+#define IB_USER_VERBS_EXP_H
+
+#include <rdma/ib_user_verbs.h>
+
+enum {
+ IB_USER_VERBS_EXP_CMD_FIRST = 64
+};
+
+enum {
+ IB_USER_VERBS_EXP_CMD_CREATE_QP,
+ IB_USER_VERBS_EXP_CMD_MODIFY_CQ,
+ IB_USER_VERBS_EXP_CMD_MODIFY_QP,
+ IB_USER_VERBS_EXP_CMD_CREATE_CQ,
+ IB_USER_VERBS_EXP_CMD_QUERY_DEVICE,
+ IB_USER_VERBS_EXP_CMD_CREATE_DCT,
+ IB_USER_VERBS_EXP_CMD_DESTROY_DCT,
+ IB_USER_VERBS_EXP_CMD_QUERY_DCT,
+};
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * Specifically:
+ * - Do not use pointer types -- pass pointers in __u64 instead.
+ * - Make sure that any structure larger than 4 bytes is padded to a
+ * multiple of 8 bytes. Otherwise the structure size will be
+ * different between 32-bit and 64-bit architectures.
+ */
+
+enum ib_uverbs_exp_create_qp_comp_mask {
+ IB_UVERBS_EXP_CREATE_QP_CAP_FLAGS = (1ULL << 0),
+ IB_UVERBS_EXP_CREATE_QP_INL_RECV = (1ULL << 1),
+ IB_UVERBS_EXP_CREATE_QP_QPG = (1ULL << 2)
+};
+
+struct ib_uverbs_qpg_init_attrib {
+ __u32 tss_child_count;
+ __u32 rss_child_count;
+};
+
+struct ib_uverbs_qpg {
+ __u32 qpg_type;
+ union {
+ struct {
+ __u32 parent_handle;
+ __u32 reserved;
+ };
+ struct ib_uverbs_qpg_init_attrib parent_attrib;
+ };
+ __u32 reserved2;
+};
+
+struct ib_uverbs_exp_create_qp {
+ __u64 comp_mask;
+ __u64 user_handle;
+ __u32 pd_handle;
+ __u32 send_cq_handle;
+ __u32 recv_cq_handle;
+ __u32 srq_handle;
+ __u32 max_send_wr;
+ __u32 max_recv_wr;
+ __u32 max_send_sge;
+ __u32 max_recv_sge;
+ __u32 max_inline_data;
+ __u8 sq_sig_all;
+ __u8 qp_type;
+ __u8 is_srq;
+ __u8 reserved;
+ __u64 qp_cap_flags;
+ __u32 max_inl_recv;
+ __u32 reserved1;
+ struct ib_uverbs_qpg qpg;
+ __u64 driver_data[0];
+};
+
+enum ib_uverbs_exp_create_qp_resp_comp_mask {
+ IB_UVERBS_EXP_CREATE_QP_RESP_INL_RECV = (1ULL << 0),
+};
+
+struct ib_uverbs_exp_create_qp_resp {
+ __u64 comp_mask;
+ __u32 qp_handle;
+ __u32 qpn;
+ __u32 max_send_wr;
+ __u32 max_recv_wr;
+ __u32 max_send_sge;
+ __u32 max_recv_sge;
+ __u32 max_inline_data;
+ __u32 max_inl_recv;
+};
+
+struct ib_uverbs_create_dct {
+ __u64 comp_mask;
+ __u64 user_handle;
+ __u32 pd_handle;
+ __u32 cq_handle;
+ __u32 srq_handle;
+ __u32 access_flags;
+ __u32 flow_label;
+ __u64 dc_key;
+ __u8 min_rnr_timer;
+ __u8 tclass;
+ __u8 port;
+ __u8 pkey_index;
+ __u8 gid_index;
+ __u8 hop_limit;
+ __u8 mtu;
+ __u8 rsvd;
+ __u32 create_flags;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_create_dct_resp {
+ __u32 dct_handle;
+ __u32 dctn;
+};
+
+struct ib_uverbs_destroy_dct {
+ __u64 comp_mask;
+ __u64 user_handle;
+};
+
+struct ib_uverbs_destroy_dct_resp {
+ __u64 reserved;
+};
+
+struct ib_uverbs_query_dct {
+ __u64 comp_mask;
+ __u64 dct_handle;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_query_dct_resp {
+ __u64 dc_key;
+ __u32 access_flags;
+ __u32 flow_label;
+ __u32 key_violations;
+ __u8 port;
+ __u8 min_rnr_timer;
+ __u8 tclass;
+ __u8 mtu;
+ __u8 pkey_index;
+ __u8 gid_index;
+ __u8 hop_limit;
+ __u8 state;
+ __u32 rsvd;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_exp_query_device {
+ __u64 comp_mask;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_exp_query_device_resp {
+ __u64 comp_mask;
+ struct ib_uverbs_query_device_resp base;
+ __u64 timestamp_mask;
+ __u64 hca_core_clock;
+ __u64 device_cap_flags2;
+ __u32 dc_rd_req;
+ __u32 dc_rd_res;
+ __u32 inline_recv_sz;
+ __u32 max_rss_tbl_sz;
+};
+
+#endif /* IB_USER_VERBS_EXP_H */
diff --git a/sys/ofed/include/rdma/ib_verbs.h b/sys/ofed/include/rdma/ib_verbs.h
index d167e42..d2607c8 100644
--- a/sys/ofed/include/rdma/ib_verbs.h
+++ b/sys/ofed/include/rdma/ib_verbs.h
@@ -48,10 +48,10 @@
#include <linux/rwsem.h>
#include <linux/scatterlist.h>
#include <linux/workqueue.h>
+#include <linux/if_ether.h>
+#include <linux/mutex.h>
#include <asm/uaccess.h>
-#include <linux/rbtree.h>
-#include <linux/mutex.h>
extern struct workqueue_struct *ib_wq;
@@ -68,12 +68,14 @@ enum rdma_node_type {
RDMA_NODE_IB_CA = 1,
RDMA_NODE_IB_SWITCH,
RDMA_NODE_IB_ROUTER,
- RDMA_NODE_RNIC
+ RDMA_NODE_RNIC,
+ RDMA_NODE_MIC
};
enum rdma_transport_type {
RDMA_TRANSPORT_IB,
- RDMA_TRANSPORT_IWARP
+ RDMA_TRANSPORT_IWARP,
+ RDMA_TRANSPORT_SCIF
};
enum rdma_transport_type
@@ -83,6 +85,7 @@ enum rdma_link_layer {
IB_LINK_LAYER_UNSPECIFIED,
IB_LINK_LAYER_INFINIBAND,
IB_LINK_LAYER_ETHERNET,
+ IB_LINK_LAYER_SCIF
};
enum ib_device_cap_flags {
@@ -120,7 +123,29 @@ enum ib_device_cap_flags {
IB_DEVICE_SHARED_MR = (1<<24),
IB_DEVICE_QPG = (1<<25),
IB_DEVICE_UD_RSS = (1<<26),
- IB_DEVICE_UD_TSS = (1<<27)
+ IB_DEVICE_UD_TSS = (1<<27),
+ IB_DEVICE_CROSS_CHANNEL = (1<<28),
+ IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
+ /*
+ * Devices can set either IB_DEVICE_MEM_WINDOW_TYPE_2A or
+ * IB_DEVICE_MEM_WINDOW_TYPE_2B if it supports type 2A or type 2B
+ * memory windows. It can set neither to indicate it doesn't support
+ * type 2 windows at all.
+ */
+ IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<30),
+ IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<31),
+ IB_DEVICE_SIGNATURE_HANDOVER = (1LL<<32)
+};
+
+enum ib_signature_prot_cap {
+ IB_PROT_T10DIF_TYPE_1 = 1,
+ IB_PROT_T10DIF_TYPE_2 = 1 << 1,
+ IB_PROT_T10DIF_TYPE_3 = 1 << 2,
+};
+
+enum ib_signature_guard_cap {
+ IB_GUARD_T10DIF_CRC = 1,
+ IB_GUARD_T10DIF_CSUM = 1 << 1,
};
enum ib_atomic_cap {
@@ -129,6 +154,12 @@ enum ib_atomic_cap {
IB_ATOMIC_GLOB
};
+enum ib_cq_create_flags {
+ IB_CQ_CREATE_CROSS_CHANNEL = 1 << 0,
+ IB_CQ_TIMESTAMP = 1 << 1,
+ IB_CQ_TIMESTAMP_TO_SYS_TIME = 1 << 2
+};
+
struct ib_device_attr {
u64 fw_ver;
__be64 sys_image_guid;
@@ -139,7 +170,7 @@ struct ib_device_attr {
u32 hw_ver;
int max_qp;
int max_qp_wr;
- int device_cap_flags;
+ u64 device_cap_flags;
int max_sge;
int max_sge_rd;
int max_cq;
@@ -171,6 +202,16 @@ struct ib_device_attr {
int max_rss_tbl_sz;
u16 max_pkeys;
u8 local_ca_ack_delay;
+ int comp_mask;
+ uint64_t timestamp_mask;
+ uint64_t hca_core_clock;
+ unsigned int sig_prot_cap;
+ unsigned int sig_guard_cap;
+};
+
+enum ib_device_attr_comp_mask {
+ IB_DEVICE_ATTR_WITH_TIMESTAMP_MASK = 1ULL << 1,
+ IB_DEVICE_ATTR_WITH_HCA_CORE_CLOCK = 1ULL << 2
};
enum ib_mtu {
@@ -199,7 +240,8 @@ enum ib_port_state {
IB_PORT_INIT = 2,
IB_PORT_ARMED = 3,
IB_PORT_ACTIVE = 4,
- IB_PORT_ACTIVE_DEFER = 5
+ IB_PORT_ACTIVE_DEFER = 5,
+ IB_PORT_DUMMY = -1 /* force enum signed */
};
enum ib_port_cap_flags {
@@ -326,7 +368,6 @@ struct ib_port_attr {
u8 active_width;
u8 active_speed;
u8 phys_state;
- enum rdma_link_layer link_layer;
};
enum ib_device_modify_flags {
@@ -373,10 +414,6 @@ enum ib_event_type {
IB_EVENT_GID_CHANGE,
};
-enum ib_event_flags {
- IB_XRC_QP_EVENT_FLAG = 0x80000000,
-};
-
struct ib_event {
struct ib_device *device;
union {
@@ -384,7 +421,6 @@ struct ib_event {
struct ib_qp *qp;
struct ib_srq *srq;
u8 port_num;
- u32 xrc_qp_num;
} element;
enum ib_event_type event;
};
@@ -450,6 +486,22 @@ enum ib_rate {
IB_RATE_300_GBPS = 18
};
+enum ib_mr_create_flags {
+ IB_MR_SIGNATURE_EN = 1,
+};
+
+/**
+ * ib_mr_init_attr - Memory region init attributes passed to routine
+ * ib_create_mr.
+ * @max_reg_descriptors: max number of registration descriptors that
+ * may be used with registration work requests.
+ * @flags: MR creation flags bit mask.
+ */
+struct ib_mr_init_attr {
+ int max_reg_descriptors;
+ u32 flags;
+};
+
/**
* ib_rate_to_mult - Convert the IB rate enum to a multiple of the
* base rate of 2.5 Gbit/sec. For example, IB_RATE_5_GBPS will be
@@ -465,6 +517,120 @@ int ib_rate_to_mult(enum ib_rate rate) __attribute_const__;
*/
int ib_rate_to_mbps(enum ib_rate rate) __attribute_const__;
+struct ib_cq_init_attr {
+ int cqe;
+ int comp_vector;
+ u32 flags;
+};
+
+enum ib_signature_type {
+ IB_SIG_TYPE_T10_DIF,
+};
+
+/**
+ * T10-DIF Signature types
+ * T10-DIF types are defined by SCSI
+ * specifications.
+ */
+enum ib_t10_dif_type {
+ IB_T10DIF_NONE,
+ IB_T10DIF_TYPE1,
+ IB_T10DIF_TYPE2,
+ IB_T10DIF_TYPE3
+};
+
+/**
+ * Signature T10-DIF block-guard types
+ * IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules.
+ * IB_T10DIF_CSUM: Corresponds to IP checksum rules.
+ */
+enum ib_t10_dif_bg_type {
+ IB_T10DIF_CRC,
+ IB_T10DIF_CSUM
+};
+
+/**
+ * struct ib_t10_dif_domain - Parameters specific for T10-DIF
+ * domain.
+ * @type: T10-DIF type (0|1|2|3)
+ * @bg_type: T10-DIF block guard type (CRC|CSUM)
+ * @pi_interval: protection information interval.
+ * @bg: seed of guard computation.
+ * @app_tag: application tag of guard block
+ * @ref_tag: initial guard block reference tag.
+ * @type3_inc_reftag: T10-DIF type 3 does not state
+ * about the reference tag, it is the user
+ * choice to increment it or not.
+ */
+struct ib_t10_dif_domain {
+ enum ib_t10_dif_type type;
+ enum ib_t10_dif_bg_type bg_type;
+ u32 pi_interval;
+ u16 bg;
+ u16 app_tag;
+ u32 ref_tag;
+ bool type3_inc_reftag;
+};
+
+/**
+ * struct ib_sig_domain - Parameters for signature domain
+ * @sig_type: specific signauture type
+ * @sig: union of all signature domain attributes that may
+ * be used to set domain layout.
+ */
+struct ib_sig_domain {
+ enum ib_signature_type sig_type;
+ union {
+ struct ib_t10_dif_domain dif;
+ } sig;
+};
+
+/**
+ * struct ib_sig_attrs - Parameters for signature handover operation
+ * @check_mask: bitmask for signature byte check (8 bytes)
+ * @mem: memory domain layout desciptor.
+ * @wire: wire domain layout desciptor.
+ */
+struct ib_sig_attrs {
+ u8 check_mask;
+ struct ib_sig_domain mem;
+ struct ib_sig_domain wire;
+};
+
+enum ib_sig_err_type {
+ IB_SIG_BAD_GUARD,
+ IB_SIG_BAD_REFTAG,
+ IB_SIG_BAD_APPTAG,
+};
+
+/**
+ * struct ib_sig_err - signature error descriptor
+ */
+struct ib_sig_err {
+ enum ib_sig_err_type err_type;
+ u32 expected;
+ u32 actual;
+ u64 sig_err_offset;
+ u32 key;
+};
+
+enum ib_mr_status_check {
+ IB_MR_CHECK_SIG_STATUS = 1,
+};
+
+/**
+ * struct ib_mr_status - Memory region status container
+ *
+ * @fail_status: Bitmask of MR checks status. For each
+ * failed check a corresponding status bit is set.
+ * @sig_err: Additional info for IB_MR_CEHCK_SIG_STATUS
+ * failure.
+ */
+struct ib_mr_status {
+ u32 fail_status;
+ struct ib_sig_err sig_err;
+};
+
/**
* mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate
* enum.
@@ -480,6 +646,8 @@ struct ib_ah_attr {
u8 static_rate;
u8 ah_flags;
u8 port_num;
+ u8 dmac[6];
+ u16 vlan_id;
};
enum ib_wc_status {
@@ -532,6 +700,11 @@ enum ib_wc_flags {
IB_WC_WITH_IMM = (1<<1),
IB_WC_WITH_INVALIDATE = (1<<2),
IB_WC_IP_CSUM_OK = (1<<3),
+ IB_WC_WITH_SL = (1<<4),
+ IB_WC_WITH_SLID = (1<<5),
+ IB_WC_WITH_TIMESTAMP = (1<<6),
+ IB_WC_WITH_SMAC = (1<<7),
+ IB_WC_WITH_VLAN = (1<<8),
};
struct ib_wc {
@@ -553,6 +726,11 @@ struct ib_wc {
u8 dlid_path_bits;
u8 port_num; /* valid only for DR SMPs on switches */
int csum_ok;
+ struct {
+ uint64_t timestamp; /* timestamp = 0 indicates error*/
+ } ts;
+ u8 smac[6];
+ u16 vlan_id;
};
enum ib_cq_notify_flags {
@@ -618,19 +796,37 @@ enum ib_qp_type {
IB_QPT_RC,
IB_QPT_UC,
IB_QPT_UD,
- IB_QPT_XRC,
IB_QPT_RAW_IPV6,
IB_QPT_RAW_ETHERTYPE,
IB_QPT_RAW_PACKET = 8,
IB_QPT_XRC_INI = 9,
IB_QPT_XRC_TGT,
+ IB_QPT_DC_INI,
IB_QPT_MAX,
+ /* Reserve a range for qp types internal to the low level driver.
+ * These qp types will not be visible at the IB core layer, so the
+ * IB_QPT_MAX usages should not be affected in the core layer
+ */
+ IB_QPT_RESERVED1 = 0x1000,
+ IB_QPT_RESERVED2,
+ IB_QPT_RESERVED3,
+ IB_QPT_RESERVED4,
+ IB_QPT_RESERVED5,
+ IB_QPT_RESERVED6,
+ IB_QPT_RESERVED7,
+ IB_QPT_RESERVED8,
+ IB_QPT_RESERVED9,
+ IB_QPT_RESERVED10,
};
enum ib_qp_create_flags {
IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0,
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
- IB_QP_CREATE_NETIF_QP = 1 << 2,
+ IB_QP_CREATE_CROSS_CHANNEL = 1 << 2,
+ IB_QP_CREATE_MANAGED_SEND = 1 << 3,
+ IB_QP_CREATE_MANAGED_RECV = 1 << 4,
+ IB_QP_CREATE_NETIF_QP = 1 << 5,
+ IB_QP_CREATE_SIGNATURE_EN = 1 << 6,
/* reserve bits 26-31 for low level drivers' internal use */
IB_QP_CREATE_RESERVED_START = 1 << 26,
IB_QP_CREATE_RESERVED_END = 1 << 31,
@@ -659,7 +855,7 @@ struct ib_qp_init_attr {
union {
struct ib_qp *qpg_parent; /* see qpg_type */
struct ib_qpg_init_attrib parent_attrib;
- } pp;
+ };
enum ib_sig_type sq_sig_type;
enum ib_qp_type qp_type;
enum ib_qp_create_flags create_flags;
@@ -667,6 +863,43 @@ struct ib_qp_init_attr {
u8 port_num; /* special QP types only */
};
+enum {
+ IB_DCT_CREATE_FLAG_RCV_INLINE = 1 << 0,
+ IB_DCT_CREATE_FLAGS_MASK = IB_DCT_CREATE_FLAG_RCV_INLINE,
+};
+
+struct ib_dct_init_attr {
+ struct ib_pd *pd;
+ struct ib_cq *cq;
+ struct ib_srq *srq;
+ u64 dc_key;
+ u8 port;
+ u32 access_flags;
+ u8 min_rnr_timer;
+ u8 tclass;
+ u32 flow_label;
+ enum ib_mtu mtu;
+ u8 pkey_index;
+ u8 gid_index;
+ u8 hop_limit;
+ u32 create_flags;
+};
+
+struct ib_dct_attr {
+ u64 dc_key;
+ u8 port;
+ u32 access_flags;
+ u8 min_rnr_timer;
+ u8 tclass;
+ u32 flow_label;
+ enum ib_mtu mtu;
+ u8 pkey_index;
+ u8 gid_index;
+ u8 hop_limit;
+ u32 key_violations;
+ u8 state;
+};
+
struct ib_qp_open_attr {
void (*event_handler)(struct ib_event *, void *);
void *qp_context;
@@ -731,7 +964,12 @@ enum ib_qp_attr_mask {
IB_QP_PATH_MIG_STATE = (1<<18),
IB_QP_CAP = (1<<19),
IB_QP_DEST_QPN = (1<<20),
- IB_QP_GROUP_RSS = (1<<21)
+ IB_QP_GROUP_RSS = (1<<21),
+ IB_QP_DC_KEY = (1<<22),
+ IB_QP_SMAC = (1<<23),
+ IB_QP_ALT_SMAC = (1<<24),
+ IB_QP_VID = (1<<25),
+ IB_QP_ALT_VID = (1<<26)
};
enum ib_qp_state {
@@ -741,7 +979,8 @@ enum ib_qp_state {
IB_QPS_RTS,
IB_QPS_SQD,
IB_QPS_SQE,
- IB_QPS_ERR
+ IB_QPS_ERR,
+ IB_QPS_DUMMY = -1 /* force enum signed */
};
enum ib_mig_state {
@@ -750,6 +989,11 @@ enum ib_mig_state {
IB_MIG_ARMED
};
+enum ib_mw_type {
+ IB_MW_TYPE_1 = 1,
+ IB_MW_TYPE_2 = 2
+};
+
struct ib_qp_attr {
enum ib_qp_state qp_state;
enum ib_qp_state cur_qp_state;
@@ -776,6 +1020,40 @@ struct ib_qp_attr {
u8 rnr_retry;
u8 alt_port_num;
u8 alt_timeout;
+ u8 smac[ETH_ALEN];
+ u8 alt_smac[ETH_ALEN];
+ u16 vlan_id;
+ u16 alt_vlan_id;
+
+};
+
+struct ib_qp_attr_ex {
+ enum ib_qp_state qp_state;
+ enum ib_qp_state cur_qp_state;
+ enum ib_mtu path_mtu;
+ enum ib_mig_state path_mig_state;
+ u32 qkey;
+ u32 rq_psn;
+ u32 sq_psn;
+ u32 dest_qp_num;
+ int qp_access_flags;
+ struct ib_qp_cap cap;
+ struct ib_ah_attr ah_attr;
+ struct ib_ah_attr alt_ah_attr;
+ u16 pkey_index;
+ u16 alt_pkey_index;
+ u8 en_sqd_async_notify;
+ u8 sq_draining;
+ u8 max_rd_atomic;
+ u8 max_dest_rd_atomic;
+ u8 min_rnr_timer;
+ u8 port_num;
+ u8 timeout;
+ u8 retry_cnt;
+ u8 rnr_retry;
+ u8 alt_port_num;
+ u8 alt_timeout;
+ u64 dct_key;
};
enum ib_wr_opcode {
@@ -787,13 +1065,27 @@ enum ib_wr_opcode {
IB_WR_ATOMIC_CMP_AND_SWP,
IB_WR_ATOMIC_FETCH_AND_ADD,
IB_WR_LSO,
- IB_WR_BIG_LSO,
IB_WR_SEND_WITH_INV,
IB_WR_RDMA_READ_WITH_INV,
IB_WR_LOCAL_INV,
IB_WR_FAST_REG_MR,
IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
+ IB_WR_BIND_MW,
+ IB_WR_REG_SIG_MR,
+ /* reserve values for low level drivers' internal use.
+ * These values will not be used at all in the ib core layer.
+ */
+ IB_WR_RESERVED1 = 0xf0,
+ IB_WR_RESERVED2,
+ IB_WR_RESERVED3,
+ IB_WR_RESERVED4,
+ IB_WR_RESERVED5,
+ IB_WR_RESERVED6,
+ IB_WR_RESERVED7,
+ IB_WR_RESERVED8,
+ IB_WR_RESERVED9,
+ IB_WR_RESERVED10,
};
enum ib_send_flags {
@@ -801,21 +1093,12 @@ enum ib_send_flags {
IB_SEND_SIGNALED = (1<<1),
IB_SEND_SOLICITED = (1<<2),
IB_SEND_INLINE = (1<<3),
- IB_SEND_IP_CSUM = (1<<4)
-};
+ IB_SEND_IP_CSUM = (1<<4),
-enum ib_flow_types {
- IB_FLOW_ETH = 0,
- IB_FLOW_IB_UC = 1,
- IB_FLOW_IB_MC_IPV4 = 2,
- IB_FLOW_IB_MC_IPV6 = 3
-};
-
-enum {
- IB_FLOW_L4_NONE = 0,
- IB_FLOW_L4_OTHER = 3,
- IB_FLOW_L4_UDP = 5,
- IB_FLOW_L4_TCP = 6
+ /* reserve bits 26-31 for low level drivers' internal use */
+ IB_SEND_RESERVED_START = (1 << 26),
+ IB_SEND_RESERVED_END = (1 << 31),
+ IB_SEND_UMR_UNREG = (1<<5)
};
struct ib_sge {
@@ -830,6 +1113,23 @@ struct ib_fast_reg_page_list {
unsigned int max_page_list_len;
};
+/**
+ * struct ib_mw_bind_info - Parameters for a memory window bind operation.
+ * @mr: A memory region to bind the memory window to.
+ * @addr: The address where the memory window should begin.
+ * @length: The length of the memory window, in bytes.
+ * @mw_access_flags: Access flags from enum ib_access_flags for the window.
+ *
+ * This struct contains the shared parameters for type 1 and type 2
+ * memory window bind operations.
+ */
+struct ib_mw_bind_info {
+ struct ib_mr *mr;
+ u64 addr;
+ u64 length;
+ int mw_access_flags;
+};
+
struct ib_send_wr {
struct ib_send_wr *next;
u64 wr_id;
@@ -874,10 +1174,26 @@ struct ib_send_wr {
u32 rkey;
} fast_reg;
struct {
- struct ib_unpacked_lrh *lrh;
- u32 eth_type;
- u8 static_rate;
- } raw_ety;
+ int npages;
+ int access_flags;
+ u32 mkey;
+ struct ib_pd *pd;
+ u64 virt_addr;
+ u64 length;
+ int page_shift;
+ } umr;
+ struct {
+ struct ib_mw *mw;
+ /* The new rkey for the memory window. */
+ u32 rkey;
+ struct ib_mw_bind_info bind_info;
+ } bind_mw;
+ struct {
+ struct ib_sig_attrs *sig_attrs;
+ struct ib_mr *sig_mr;
+ int access_flags;
+ struct ib_sge *prot;
+ } sig_handover;
} wr;
u32 xrc_remote_srq_num; /* XRC TGT QPs only */
};
@@ -896,13 +1212,7 @@ enum ib_access_flags {
IB_ACCESS_REMOTE_ATOMIC = (1<<3),
IB_ACCESS_MW_BIND = (1<<4),
IB_ACCESS_ALLOCATE_MR = (1<<5),
- IB_ACCESS_SHARED_MR_USER_READ = (1<<6),
- IB_ACCESS_SHARED_MR_USER_WRITE = (1<<7),
- IB_ACCESS_SHARED_MR_GROUP_READ = (1<<8),
- IB_ACCESS_SHARED_MR_GROUP_WRITE = (1<<9),
- IB_ACCESS_SHARED_MR_OTHER_READ = (1<<10),
- IB_ACCESS_SHARED_MR_OTHER_WRITE = (1<<11)
-
+ IB_ZERO_BASED = (1<<13)
};
struct ib_phys_buf {
@@ -925,13 +1235,16 @@ enum ib_mr_rereg_flags {
IB_MR_REREG_ACCESS = (1<<2)
};
+/**
+ * struct ib_mw_bind - Parameters for a type 1 memory window bind operation.
+ * @wr_id: Work request id.
+ * @send_flags: Flags from ib_send_flags enum.
+ * @bind_info: More parameters of the bind operation.
+ */
struct ib_mw_bind {
- struct ib_mr *mr;
u64 wr_id;
- u64 addr;
- u32 length;
int send_flags;
- int mw_access_flags;
+ struct ib_mw_bind_info bind_info;
};
struct ib_fmr_attr {
@@ -950,7 +1263,11 @@ struct ib_ucontext {
struct list_head srq_list;
struct list_head ah_list;
struct list_head xrcd_list;
+ struct list_head rule_list;
+ struct list_head dct_list;
int closing;
+ void *peer_mem_private_data;
+ char *peer_mem_name;
};
struct ib_uobject {
@@ -964,19 +1281,22 @@ struct ib_uobject {
int live;
};
+struct ib_udata;
+struct ib_udata_ops {
+ int (*copy_from)(void *dest, struct ib_udata *udata,
+ size_t len);
+ int (*copy_to)(struct ib_udata *udata, void *src,
+ size_t len);
+};
+
struct ib_udata {
+ struct ib_udata_ops *ops;
void __user *inbuf;
void __user *outbuf;
size_t inlen;
size_t outlen;
};
-struct ib_uxrc_rcv_object {
- struct list_head list; /* link to context's list */
- u32 qp_num;
- u32 domain_handle;
-};
-
struct ib_pd {
struct ib_device *device;
struct ib_uobject *uobject;
@@ -985,10 +1305,8 @@ struct ib_pd {
struct ib_xrcd {
struct ib_device *device;
- struct ib_uobject *uobject;
atomic_t usecnt; /* count all exposed resources */
struct inode *inode;
- struct rb_node node;
struct mutex tgt_qp_mutex;
struct list_head tgt_qp_list;
@@ -1000,6 +1318,23 @@ struct ib_ah {
struct ib_uobject *uobject;
};
+enum ib_cq_attr_mask {
+ IB_CQ_MODERATION = (1 << 0),
+ IB_CQ_CAP_FLAGS = (1 << 1)
+};
+
+enum ib_cq_cap_flags {
+ IB_CQ_IGNORE_OVERRUN = (1 << 0)
+};
+
+struct ib_cq_attr {
+ struct {
+ u16 cq_count;
+ u16 cq_period;
+ } moderation;
+ u32 cq_cap_flags;
+};
+
typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
struct ib_cq {
@@ -1038,7 +1373,8 @@ struct ib_qp {
struct ib_srq *srq;
struct ib_xrcd *xrcd; /* XRC TGT QPs only */
struct list_head xrcd_list;
- atomic_t usecnt; /* count times opened, mcast attaches */
+ /* count times opened, mcast attaches, flow attaches */
+ atomic_t usecnt;
struct list_head open_list;
struct ib_qp *real_qp;
struct ib_uobject *uobject;
@@ -1047,6 +1383,16 @@ struct ib_qp {
u32 qp_num;
enum ib_qp_type qp_type;
enum ib_qpg_type qpg_type;
+ u8 port_num;
+};
+
+struct ib_dct {
+ struct ib_device *device;
+ struct ib_uobject *uobject;
+ struct ib_pd *pd;
+ struct ib_cq *cq;
+ struct ib_srq *srq;
+ u32 dct_num;
};
struct ib_mr {
@@ -1063,6 +1409,7 @@ struct ib_mw {
struct ib_pd *pd;
struct ib_uobject *uobject;
u32 rkey;
+ enum ib_mw_type type;
};
struct ib_fmr {
@@ -1073,30 +1420,128 @@ struct ib_fmr {
u32 rkey;
};
-struct ib_flow_spec {
- enum ib_flow_types type;
- union {
- struct {
- __be16 ethertype;
- __be16 vlan;
- u8 vlan_present;
- u8 mac[6];
- u8 port;
- } eth;
- struct {
- __be32 qpn;
- } ib_uc;
- struct {
- u8 mgid[16];
- } ib_mc;
- } l2_id;
+/* Supported steering options */
+enum ib_flow_attr_type {
+ /* steering according to rule specifications */
+ IB_FLOW_ATTR_NORMAL = 0x0,
+ /* default unicast and multicast rule -
+ * receive all Eth traffic which isn't steered to any QP
+ */
+ IB_FLOW_ATTR_ALL_DEFAULT = 0x1,
+ /* default multicast rule -
+ * receive all Eth multicast traffic which isn't steered to any QP
+ */
+ IB_FLOW_ATTR_MC_DEFAULT = 0x2,
+ /* sniffer rule - receive all port traffic */
+ IB_FLOW_ATTR_SNIFFER = 0x3
+};
+
+/* Supported steering header types */
+enum ib_flow_spec_type {
+ /* L2 headers*/
+ IB_FLOW_SPEC_ETH = 0x20,
+ IB_FLOW_SPEC_IB = 0x21,
+ /* L3 header*/
+ IB_FLOW_SPEC_IPV4 = 0x30,
+ /* L4 headers*/
+ IB_FLOW_SPEC_TCP = 0x40,
+ IB_FLOW_SPEC_UDP = 0x41
+};
+
+#define IB_FLOW_SPEC_SUPPORT_LAYERS 4
+
+/* Flow steering rule priority is set according to it's domain.
+ * Lower domain value means higher priority.
+ */
+enum ib_flow_domain {
+ IB_FLOW_DOMAIN_USER,
+ IB_FLOW_DOMAIN_ETHTOOL,
+ IB_FLOW_DOMAIN_RFS,
+ IB_FLOW_DOMAIN_NIC,
+ IB_FLOW_DOMAIN_NUM /* Must be last */
+};
+
+enum ib_flow_flags {
+ IB_FLOW_ATTR_FLAGS_ALLOW_LOOP_BACK = 1
+};
+
+struct ib_flow_eth_filter {
+ u8 dst_mac[6];
+ u8 src_mac[6];
+ __be16 ether_type;
+ __be16 vlan_tag;
+};
+
+struct ib_flow_spec_eth {
+ enum ib_flow_spec_type type;
+ u16 size;
+ struct ib_flow_eth_filter val;
+ struct ib_flow_eth_filter mask;
+};
+
+struct ib_flow_ib_filter {
+ __be32 l3_type_qpn;
+ u8 dst_gid[16];
+};
+
+struct ib_flow_spec_ib {
+ enum ib_flow_spec_type type;
+ u16 size;
+ struct ib_flow_ib_filter val;
+ struct ib_flow_ib_filter mask;
+};
+
+struct ib_flow_ipv4_filter {
__be32 src_ip;
__be32 dst_ip;
- __be16 src_port;
+};
+
+struct ib_flow_spec_ipv4 {
+ enum ib_flow_spec_type type;
+ u16 size;
+ struct ib_flow_ipv4_filter val;
+ struct ib_flow_ipv4_filter mask;
+};
+
+struct ib_flow_tcp_udp_filter {
__be16 dst_port;
- u8 l4_protocol;
- u8 block_mc_loopback;
- u8 rule_type;
+ __be16 src_port;
+};
+
+struct ib_flow_spec_tcp_udp {
+ enum ib_flow_spec_type type;
+ u16 size;
+ struct ib_flow_tcp_udp_filter val;
+ struct ib_flow_tcp_udp_filter mask;
+};
+
+union ib_flow_spec {
+ struct {
+ enum ib_flow_spec_type type;
+ u16 size;
+ };
+ struct ib_flow_spec_ib ib;
+ struct ib_flow_spec_eth eth;
+ struct ib_flow_spec_ipv4 ipv4;
+ struct ib_flow_spec_tcp_udp tcp_udp;
+};
+
+struct ib_flow_attr {
+ enum ib_flow_attr_type type;
+ u16 size;
+ u16 priority;
+ u8 num_of_specs;
+ u8 port;
+ u32 flags;
+ /* Following are the optional layers according to user request
+ * struct ib_flow_spec_xxx
+ * struct ib_flow_spec_yyy
+ */
+};
+
+struct ib_flow {
+ struct ib_qp *qp;
+ struct ib_uobject *uobject;
};
struct ib_mad;
@@ -1125,6 +1570,15 @@ struct ib_cache {
u8 *lmc_cache;
};
+enum verbs_values_mask {
+ IBV_VALUES_HW_CLOCK = 1 << 0
+};
+
+struct ib_device_values {
+ int values_mask;
+ uint64_t hwclock;
+};
+
struct ib_dma_mapping_ops {
int (*mapping_error)(struct ib_device *dev,
u64 dma_addr);
@@ -1169,6 +1623,8 @@ struct ib_dma_mapping_ops {
};
struct iw_cm_verbs;
+struct ib_exp_device_attr;
+struct ib_exp_qp_init_attr;
struct ib_device {
struct device *dma_device;
@@ -1257,12 +1713,13 @@ struct ib_device {
int (*post_recv)(struct ib_qp *qp,
struct ib_recv_wr *recv_wr,
struct ib_recv_wr **bad_recv_wr);
- struct ib_cq * (*create_cq)(struct ib_device *device, int cqe,
- int comp_vector,
+ struct ib_cq * (*create_cq)(struct ib_device *device,
+ struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
struct ib_udata *udata);
- int (*modify_cq)(struct ib_cq *cq, u16 cq_count,
- u16 cq_period);
+ int (*modify_cq)(struct ib_cq *cq,
+ struct ib_cq_attr *cq_attr,
+ int cq_attr_mask);
int (*destroy_cq)(struct ib_cq *cq);
int (*resize_cq)(struct ib_cq *cq, int cqe,
struct ib_udata *udata);
@@ -1289,6 +1746,9 @@ struct ib_device {
int (*query_mr)(struct ib_mr *mr,
struct ib_mr_attr *mr_attr);
int (*dereg_mr)(struct ib_mr *mr);
+ int (*destroy_mr)(struct ib_mr *mr);
+ struct ib_mr * (*create_mr)(struct ib_pd *pd,
+ struct ib_mr_init_attr *mr_init_attr);
struct ib_mr * (*alloc_fast_reg_mr)(struct ib_pd *pd,
int max_page_list_len);
struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device,
@@ -1301,7 +1761,8 @@ struct ib_device {
int num_phys_buf,
int mr_access_flags,
u64 *iova_start);
- struct ib_mw * (*alloc_mw)(struct ib_pd *pd);
+ struct ib_mw * (*alloc_mw)(struct ib_pd *pd,
+ enum ib_mw_type type);
int (*bind_mw)(struct ib_qp *qp,
struct ib_mw *mw,
struct ib_mw_bind *mw_bind);
@@ -1327,43 +1788,28 @@ struct ib_device {
struct ib_grh *in_grh,
struct ib_mad *in_mad,
struct ib_mad *out_mad);
- struct ib_srq * (*create_xrc_srq)(struct ib_pd *pd,
- struct ib_cq *xrc_cq,
- struct ib_xrcd *xrcd,
- struct ib_srq_init_attr *srq_init_attr,
- struct ib_udata *udata);
struct ib_xrcd * (*alloc_xrcd)(struct ib_device *device,
struct ib_ucontext *ucontext,
struct ib_udata *udata);
int (*dealloc_xrcd)(struct ib_xrcd *xrcd);
- int (*create_xrc_rcv_qp)(struct ib_qp_init_attr *init_attr,
- u32 *qp_num);
- int (*modify_xrc_rcv_qp)(struct ib_xrcd *xrcd,
- u32 qp_num,
- struct ib_qp_attr *attr,
- int attr_mask);
- int (*query_xrc_rcv_qp)(struct ib_xrcd *xrcd,
- u32 qp_num,
- struct ib_qp_attr *attr,
- int attr_mask,
- struct ib_qp_init_attr *init_attr);
- int (*reg_xrc_rcv_qp)(struct ib_xrcd *xrcd,
- void *context,
- u32 qp_num);
- int (*unreg_xrc_rcv_qp)(struct ib_xrcd *xrcd,
- void *context,
- u32 qp_num);
- int (*attach_flow)(struct ib_qp *qp,
- struct ib_flow_spec *spec,
- int priority);
- int (*detach_flow)(struct ib_qp *qp,
- struct ib_flow_spec *spec,
- int priority);
+ struct ib_flow * (*create_flow)(struct ib_qp *qp,
+ struct ib_flow_attr
+ *flow_attr,
+ int domain);
+ int (*destroy_flow)(struct ib_flow *flow_id);
+ int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
+ struct ib_mr_status *mr_status);
unsigned long (*get_unmapped_area)(struct file *file,
unsigned long addr,
unsigned long len, unsigned long pgoff,
unsigned long flags);
+ int (*ioctl)(struct ib_ucontext *context,
+ unsigned int cmd,
+ unsigned long arg);
+ int (*query_values)(struct ib_device *device,
+ int q_values,
+ struct ib_device_values *values);
struct ib_dma_mapping_ops *dma_ops;
struct module *owner;
@@ -1379,14 +1825,33 @@ struct ib_device {
int uverbs_abi_ver;
u64 uverbs_cmd_mask;
+ u64 uverbs_ex_cmd_mask;
char node_desc[64];
__be64 node_guid;
u32 local_dma_lkey;
u8 node_type;
u8 phys_port_cnt;
- struct rb_root ib_uverbs_xrcd_table;
- struct mutex xrcd_table_mutex;
+ int cmd_perf;
+ u64 cmd_avg;
+ u32 cmd_n;
+ spinlock_t cmd_perf_lock;
+
+ /*
+ * Experimental data and functions
+ */
+ int (*exp_query_device)(struct ib_device *device,
+ struct ib_exp_device_attr *device_attr);
+ struct ib_qp * (*exp_create_qp)(struct ib_pd *pd,
+ struct ib_exp_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata);
+ struct ib_dct * (*exp_create_dct)(struct ib_pd *pd,
+ struct ib_dct_init_attr *attr,
+ struct ib_udata *udata);
+ int (*exp_destroy_dct)(struct ib_dct *dct);
+ int (*exp_query_dct)(struct ib_dct *dct, struct ib_dct_attr *attr);
+
+ u64 uverbs_exp_cmd_mask;
};
struct ib_client {
@@ -1414,12 +1879,12 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client,
static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
{
- return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0;
+ return udata->ops->copy_from(dest, udata, len);
}
static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
{
- return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
+ return udata->ops->copy_to(udata, src, len);
}
/**
@@ -1430,6 +1895,7 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len
* @next_state: Next QP state
* @type: QP type
* @mask: Mask of supplied QP attributes
+ * @ll : link layer of port
*
* This function is a helper function that a low-level driver's
* modify_qp method can use to validate the consumer's input. It
@@ -1438,7 +1904,8 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len
* and that the attribute mask supplied is allowed for the transition.
*/
int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
- enum ib_qp_type type, enum ib_qp_attr_mask mask);
+ enum ib_qp_type type, enum ib_qp_attr_mask mask,
+ enum rdma_link_layer ll);
int ib_register_event_handler (struct ib_event_handler *event_handler);
int ib_unregister_event_handler(struct ib_event_handler *event_handler);
@@ -1552,26 +2019,6 @@ int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
int ib_destroy_ah(struct ib_ah *ah);
/**
- * ib_create_xrc_srq - Creates an XRC SRQ associated with the specified
- * protection domain, cq, and xrc domain.
- * @pd: The protection domain associated with the SRQ.
- * @xrc_cq: The cq to be associated with the XRC SRQ.
- * @xrcd: The XRC domain to be associated with the XRC SRQ.
- * @srq_init_attr: A list of initial attributes required to create the
- * XRC SRQ. If XRC SRQ creation succeeds, then the attributes are updated
- * to the actual capabilities of the created XRC SRQ.
- *
- * srq_attr->max_wr and srq_attr->max_sge are read the determine the
- * requested size of the XRC SRQ, and set to the actual values allocated
- * on return. If ib_create_xrc_srq() succeeds, then max_wr and max_sge
- * will always be at least as large as the requested values.
- */
-struct ib_srq *ib_create_xrc_srq(struct ib_pd *pd,
- struct ib_cq *xrc_cq,
- struct ib_xrcd *xrcd,
- struct ib_srq_init_attr *srq_init_attr);
-
-/**
* ib_create_srq - Creates a SRQ associated with the specified protection
* domain.
* @pd: The protection domain associated with the SRQ.
@@ -1732,13 +2179,6 @@ static inline int ib_post_recv(struct ib_qp *qp,
return qp->device->post_recv(qp, recv_wr, bad_recv_wr);
}
-/*
- * IB_CQ_VECTOR_LEAST_ATTACHED: The constant specifies that
- * the CQ will be attached to the completion vector that has
- * the least number of CQs already attached to it.
- */
-#define IB_CQ_VECTOR_LEAST_ATTACHED 0xffffffff
-
/**
* ib_create_cq - Creates a CQ on the specified device.
* @device: The device on which to create the CQ.
@@ -1769,13 +2209,16 @@ struct ib_cq *ib_create_cq(struct ib_device *device,
int ib_resize_cq(struct ib_cq *cq, int cqe);
/**
- * ib_modify_cq - Modifies moderation params of the CQ
+ * ib_modify_cq - Modifies the attributes for the specified CQ and then
+ * transitions the CQ to the given state.
* @cq: The CQ to modify.
- * @cq_count: number of CQEs that will trigger an event
- * @cq_period: max period of time in usec before triggering an event
- *
+ * @cq_attr: specifies the CQ attributes to modify.
+ * @cq_attr_mask: A bit-mask used to specify which attributes of the CQ
+ * are being modified.
*/
-int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+int ib_modify_cq(struct ib_cq *cq,
+ struct ib_cq_attr *cq_attr,
+ int cq_attr_mask);
/**
* ib_destroy_cq - Destroys the specified CQ.
@@ -2179,9 +2622,30 @@ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
* ib_dereg_mr - Deregisters a memory region and removes it from the
* HCA translation table.
* @mr: The memory region to deregister.
+ *
+ * This function can fail, if the memory region has memory windows bound to it.
*/
int ib_dereg_mr(struct ib_mr *mr);
+
+/**
+ * ib_create_mr - Allocates a memory region that may be used for
+ * signature handover operations.
+ * @pd: The protection domain associated with the region.
+ * @mr_init_attr: memory region init attributes.
+ */
+struct ib_mr *ib_create_mr(struct ib_pd *pd,
+ struct ib_mr_init_attr *mr_init_attr);
+
+/**
+ * ib_destroy_mr - Destroys a memory region that was created using
+ * ib_create_mr and removes it from HW translation tables.
+ * @mr: The memory region to destroy.
+ *
+ * This function can fail, if the memory region has memory windows bound to it.
+ */
+int ib_destroy_mr(struct ib_mr *mr);
+
/**
* ib_alloc_fast_reg_mr - Allocates memory region usable with the
* IB_WR_FAST_REG_MR send work request.
@@ -2231,10 +2695,22 @@ static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey)
}
/**
+ * ib_inc_rkey - increments the key portion of the given rkey. Can be used
+ * for calculating a new rkey for type 2 memory windows.
+ * @rkey - the rkey to increment.
+ */
+static inline u32 ib_inc_rkey(u32 rkey)
+{
+ const u32 mask = 0x000000ff;
+ return ((rkey + 1) & mask) | (rkey & ~mask);
+}
+
+/**
* ib_alloc_mw - Allocates a memory window.
* @pd: The protection domain associated with the memory window.
+ * @type: The type of the memory window (1 or 2).
*/
-struct ib_mw *ib_alloc_mw(struct ib_pd *pd);
+struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
/**
* ib_bind_mw - Posts a work request to the send queue of the specified
@@ -2244,6 +2720,10 @@ struct ib_mw *ib_alloc_mw(struct ib_pd *pd);
* @mw: The memory window to bind.
* @mw_bind: Specifies information about the memory window, including
* its address range, remote access rights, and associated memory region.
+ *
+ * If there is no immediate error, the function will update the rkey member
+ * of the mw parameter to its new value. The bind operation can still fail
+ * asynchronously.
*/
static inline int ib_bind_mw(struct ib_qp *qp,
struct ib_mw *mw,
@@ -2334,7 +2814,77 @@ struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device);
*/
int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
-int ib_attach_flow(struct ib_qp *qp, struct ib_flow_spec *spec, int priority);
-int ib_detach_flow(struct ib_qp *qp, struct ib_flow_spec *spec, int priority);
+struct ib_flow *ib_create_flow(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr, int domain);
+int ib_destroy_flow(struct ib_flow *flow_id);
+
+struct ib_dct *ib_create_dct(struct ib_pd *pd, struct ib_dct_init_attr *attr,
+ struct ib_udata *udata);
+int ib_destroy_dct(struct ib_dct *dct);
+int ib_query_dct(struct ib_dct *dct, struct ib_dct_attr *attr);
+
+int ib_query_values(struct ib_device *device,
+ int q_values, struct ib_device_values *values);
+
+static inline void ib_active_speed_enum_to_rate(u8 active_speed,
+ int *rate,
+ char **speed)
+{
+ switch (active_speed) {
+ case IB_SPEED_DDR:
+ *speed = " DDR";
+ *rate = 50;
+ break;
+ case IB_SPEED_QDR:
+ *speed = " QDR";
+ *rate = 100;
+ break;
+ case IB_SPEED_FDR10:
+ *speed = " FDR10";
+ *rate = 100;
+ break;
+ case IB_SPEED_FDR:
+ *speed = " FDR";
+ *rate = 140;
+ break;
+ case IB_SPEED_EDR:
+ *speed = " EDR";
+ *rate = 250;
+ break;
+ case IB_SPEED_SDR:
+ default: /* default to SDR for invalid rates */
+ *rate = 25;
+ break;
+ }
+
+}
+
+static inline int ib_check_mr_access(int flags)
+{
+ /*
+ * Local write permission is required if remote write or
+ * remote atomic permission is also requested.
+ */
+ if (flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
+ !(flags & IB_ACCESS_LOCAL_WRITE))
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * ib_check_mr_status: lightweight check of MR status.
+ * This routine may provide status checks on a selected
+ * ib_mr. first use is for signature status check.
+ *
+ * @mr: A memory region.
+ * @check_mask: Bitmask of which checks to perform from
+ * ib_mr_status_check enumeration.
+ * @mr_status: The container of relevant status checks.
+ * failed checks will be indicated in the status bitmask
+ * and the relevant info shall be in the error item.
+ */
+int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
+ struct ib_mr_status *mr_status);
#endif /* IB_VERBS_H */
diff --git a/sys/ofed/include/rdma/ib_verbs_exp.h b/sys/ofed/include/rdma/ib_verbs_exp.h
new file mode 100644
index 0000000..ca5b84b
--- /dev/null
+++ b/sys/ofed/include/rdma/ib_verbs_exp.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
+ * Copyright (c) 2004 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_VERBS_EXP_H
+#define IB_VERBS_EXP_H
+
+#include <rdma/ib_verbs.h>
+
+
+enum ib_exp_device_cap_flags2 {
+ IB_EXP_DEVICE_DC_TRANSPORT = 1 << 0,
+ IB_EXP_DEVICE_QPG = 1 << 1,
+ IB_EXP_DEVICE_UD_RSS = 1 << 2,
+ IB_EXP_DEVICE_UD_TSS = 1 << 3
+};
+
+enum ib_exp_device_attr_comp_mask {
+ IB_EXP_DEVICE_ATTR_WITH_TIMESTAMP_MASK = 1ULL << 1,
+ IB_EXP_DEVICE_ATTR_WITH_HCA_CORE_CLOCK = 1ULL << 2,
+ IB_EXP_DEVICE_ATTR_CAP_FLAGS2 = 1ULL << 3,
+ IB_EXP_DEVICE_ATTR_DC_REQ_RD = 1ULL << 4,
+ IB_EXP_DEVICE_ATTR_DC_RES_RD = 1ULL << 5,
+ IB_EXP_DEVICE_ATTR_INLINE_RECV_SZ = 1ULL << 6,
+ IB_EXP_DEVICE_ATTR_RSS_TBL_SZ = 1ULL << 7,
+};
+
+struct ib_exp_device_attr {
+ struct ib_device_attr base;
+ /* Use IB_EXP_DEVICE_ATTR_... for exp_comp_mask */
+ uint32_t exp_comp_mask;
+ uint64_t device_cap_flags2;
+ uint32_t dc_rd_req;
+ uint32_t dc_rd_res;
+ uint32_t inline_recv_sz;
+ uint32_t max_rss_tbl_sz;
+};
+
+struct ib_exp_qp_init_attr {
+ void (*event_handler)(struct ib_event *, void *);
+ void *qp_context;
+ struct ib_cq *send_cq;
+ struct ib_cq *recv_cq;
+ struct ib_srq *srq;
+ struct ib_xrcd *xrcd; /* XRC TGT QPs only */
+ struct ib_qp_cap cap;
+ union {
+ struct ib_qp *qpg_parent; /* see qpg_type */
+ struct ib_qpg_init_attrib parent_attrib;
+ };
+ enum ib_sig_type sq_sig_type;
+ enum ib_qp_type qp_type;
+ enum ib_qp_create_flags create_flags;
+ enum ib_qpg_type qpg_type;
+ u8 port_num; /* special QP types only */
+ u32 max_inl_recv;
+};
+
+
+int ib_exp_query_device(struct ib_device *device,
+ struct ib_exp_device_attr *device_attr);
+
+
+
+
+#endif /* IB_VERBS_EXP_H */
diff --git a/sys/ofed/include/rdma/iw_cm.h b/sys/ofed/include/rdma/iw_cm.h
index 412320e..271c2f8 100644
--- a/sys/ofed/include/rdma/iw_cm.h
+++ b/sys/ofed/include/rdma/iw_cm.h
@@ -46,24 +46,17 @@ enum iw_cm_event_type {
IW_CM_EVENT_CLOSE /* close complete */
};
-enum iw_cm_event_status {
- IW_CM_EVENT_STATUS_OK = 0, /* request successful */
- IW_CM_EVENT_STATUS_ACCEPTED = 0, /* connect request accepted */
- IW_CM_EVENT_STATUS_REJECTED, /* connect request rejected */
- IW_CM_EVENT_STATUS_TIMEOUT, /* the operation timed out */
- IW_CM_EVENT_STATUS_RESET, /* reset from remote peer */
- IW_CM_EVENT_STATUS_EINVAL, /* asynchronous failure for bad parm */
-};
-
struct iw_cm_event {
enum iw_cm_event_type event;
- enum iw_cm_event_status status;
+ int status;
struct sockaddr_in local_addr;
struct sockaddr_in remote_addr;
void *private_data;
- u8 private_data_len;
void *provider_data;
+ u8 private_data_len;
struct socket *so;
+ u8 ord;
+ u8 ird;
};
/**
diff --git a/sys/ofed/include/rdma/peer_mem.h b/sys/ofed/include/rdma/peer_mem.h
new file mode 100644
index 0000000..8565883
--- /dev/null
+++ b/sys/ofed/include/rdma/peer_mem.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(PEER_MEM_H)
+#define PEER_MEM_H
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/scatterlist.h>
+#include <linux/mutex.h>
+
+
+#define IB_PEER_MEMORY_NAME_MAX 64
+#define IB_PEER_MEMORY_VER_MAX 16
+
+struct peer_memory_client {
+ char name[IB_PEER_MEMORY_NAME_MAX];
+ char version[IB_PEER_MEMORY_VER_MAX];
+ /* acquire return code: 1 mine, 0 - not mine */
+ int (*acquire) (unsigned long addr, size_t size, void *peer_mem_private_data,
+ char *peer_mem_name, void **client_context);
+ int (*get_pages) (unsigned long addr,
+ size_t size, int write, int force,
+ struct sg_table *sg_head,
+ void *client_context, void *core_context);
+ int (*dma_map) (struct sg_table *sg_head, void *client_context,
+ struct device *dma_device, int dmasync, int *nmap);
+ int (*dma_unmap) (struct sg_table *sg_head, void *client_context,
+ struct device *dma_device);
+ void (*put_pages) (struct sg_table *sg_head, void *client_context);
+ unsigned long (*get_page_size) (void *client_context);
+ void (*release) (void *client_context);
+
+};
+
+typedef int (*invalidate_peer_memory)(void *reg_handle,
+ void *core_context);
+
+void *ib_register_peer_memory_client(struct peer_memory_client *peer_client,
+ invalidate_peer_memory *invalidate_callback);
+void ib_unregister_peer_memory_client(void *reg_handle);
+
+#endif
diff --git a/sys/ofed/include/rdma/rdma_cm.h b/sys/ofed/include/rdma/rdma_cm.h
index c6b2962..d699261 100644
--- a/sys/ofed/include/rdma/rdma_cm.h
+++ b/sys/ofed/include/rdma/rdma_cm.h
@@ -59,15 +59,26 @@ enum rdma_cm_event_type {
RDMA_CM_EVENT_MULTICAST_JOIN,
RDMA_CM_EVENT_MULTICAST_ERROR,
RDMA_CM_EVENT_ADDR_CHANGE,
- RDMA_CM_EVENT_TIMEWAIT_EXIT
+ RDMA_CM_EVENT_TIMEWAIT_EXIT,
+ RDMA_CM_EVENT_ALT_ROUTE_RESOLVED,
+ RDMA_CM_EVENT_ALT_ROUTE_ERROR,
+ RDMA_CM_EVENT_LOAD_ALT_PATH,
+ RDMA_CM_EVENT_ALT_PATH_LOADED,
};
enum rdma_port_space {
RDMA_PS_SDP = 0x0001,
RDMA_PS_IPOIB = 0x0002,
+ RDMA_PS_IB = 0x013F,
RDMA_PS_TCP = 0x0106,
RDMA_PS_UDP = 0x0111,
- RDMA_PS_SCTP = 0x0183
+};
+
+enum alt_path_type {
+ RDMA_ALT_PATH_NONE,
+ RDMA_ALT_PATH_PORT,
+ RDMA_ALT_PATH_LID,
+ RDMA_ALT_PATH_BEST
};
struct rdma_addr {
@@ -101,6 +112,7 @@ struct rdma_ud_param {
struct ib_ah_attr ah_attr;
u32 qp_num;
u32 qkey;
+ u8 alt_path_index;
};
struct rdma_cm_event {
@@ -112,6 +124,20 @@ struct rdma_cm_event {
} param;
};
+enum rdma_cm_state {
+ RDMA_CM_IDLE,
+ RDMA_CM_ADDR_QUERY,
+ RDMA_CM_ADDR_RESOLVED,
+ RDMA_CM_ROUTE_QUERY,
+ RDMA_CM_ROUTE_RESOLVED,
+ RDMA_CM_CONNECT,
+ RDMA_CM_DISCONNECT,
+ RDMA_CM_ADDR_BOUND,
+ RDMA_CM_LISTEN,
+ RDMA_CM_DEVICE_REMOVAL,
+ RDMA_CM_DESTROYING
+};
+
struct rdma_cm_id;
/**
@@ -131,7 +157,9 @@ struct rdma_cm_id {
rdma_cm_event_handler event_handler;
struct rdma_route route;
enum rdma_port_space ps;
+ enum ib_qp_type qp_type;
u8 port_num;
+ void *ucontext;
};
/**
@@ -141,9 +169,11 @@ struct rdma_cm_id {
* returned rdma_id.
* @context: User specified context associated with the id.
* @ps: RDMA port space.
+ * @qp_type: type of queue pair associated with the id.
*/
struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
- void *context, enum rdma_port_space ps);
+ void *context, enum rdma_port_space ps,
+ enum ib_qp_type qp_type);
/**
* rdma_destroy_id - Destroys an RDMA identifier.
@@ -192,6 +222,19 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms);
/**
+ * rdma_enable_apm - Get ready to use APM for the given ID.
+ * Actual Alternate path discovery and load will take place only
+ * after a connection has been established.
+ *
+ * Calling this function only has an effect on the connection's client side.
+ * It should be called after rdma_resolve_route and before rdma_connect.
+ *
+ * @id: RDMA identifier.
+ * @alt_type: Alternate path type to resolve.
+ */
+int rdma_enable_apm(struct rdma_cm_id *id, enum alt_path_type alt_type);
+
+/**
* rdma_create_qp - Allocate a QP and associate it with the specified RDMA
* identifier.
*
@@ -330,4 +373,32 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr);
*/
void rdma_set_service_type(struct rdma_cm_id *id, int tos);
+/**
+ * rdma_set_reuseaddr - Allow the reuse of local addresses when binding
+ * the rdma_cm_id.
+ * @id: Communication identifier to configure.
+ * @reuse: Value indicating if the bound address is reusable.
+ *
+ * Reuse must be set before an address is bound to the id.
+ */
+int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse);
+
+/**
+ * rdma_set_afonly - Specify that listens are restricted to the
+ * bound address family only.
+ * @id: Communication identifer to configure.
+ * @afonly: Value indicating if listens are restricted.
+ *
+ * Must be set before identifier is in the listening state.
+ */
+int rdma_set_afonly(struct rdma_cm_id *id, int afonly);
+
+/**
+ * rdma_set_timeout - Set the QP timeout associated with a connection
+ * identifier.
+ * @id: Communication identifier to associated with service type.
+ * @timeout: QP timeout
+ */
+void rdma_set_timeout(struct rdma_cm_id *id, int timeout);
+
#endif /* RDMA_CM_H */
diff --git a/sys/ofed/include/rdma/rdma_user_cm.h b/sys/ofed/include/rdma/rdma_user_cm.h
index 1d16502..4d99099 100644
--- a/sys/ofed/include/rdma/rdma_user_cm.h
+++ b/sys/ofed/include/rdma/rdma_user_cm.h
@@ -77,7 +77,8 @@ struct rdma_ucm_create_id {
__u64 uid;
__u64 response;
__u16 ps;
- __u8 reserved[6];
+ __u8 qp_type;
+ __u8 reserved[5];
};
struct rdma_ucm_create_id_resp {
@@ -222,7 +223,11 @@ enum {
/* Option details */
enum {
RDMA_OPTION_ID_TOS = 0,
- RDMA_OPTION_IB_PATH = 1
+ RDMA_OPTION_ID_REUSEADDR = 1,
+ RDMA_OPTION_ID_AFONLY = 2,
+
+ RDMA_OPTION_IB_PATH = 1,
+ RDMA_OPTION_IB_APM = 2,
};
struct rdma_ucm_set_option {
diff --git a/sys/powerpc/powerpc/db_trace.c b/sys/powerpc/powerpc/db_trace.c
index f85918c..7dfbb84 100644
--- a/sys/powerpc/powerpc/db_trace.c
+++ b/sys/powerpc/powerpc/db_trace.c
@@ -267,6 +267,9 @@ db_backtrace(struct thread *td, db_addr_t fp, int count)
db_printf("%-10s r1=%#zx cr=%#x xer=%#x ctr=%#zx",
"", tf->fixreg[1], (uint32_t)tf->cr,
(uint32_t)tf->xer, tf->ctr);
+#ifdef __powerpc64__
+ db_printf(" r2=%#zx", tf->fixreg[2]);
+#endif
if (tf->exc == EXC_DSI)
db_printf(" sr=%#x",
(uint32_t)tf->cpu.aim.dsisr);
diff --git a/sys/sparc64/include/asm.h b/sys/sparc64/include/asm.h
index dea06c5..69a53be 100644
--- a/sys/sparc64/include/asm.h
+++ b/sys/sparc64/include/asm.h
@@ -103,6 +103,16 @@ CNAME(x):
#define END(x) .size x, . - x
/*
+ * WEAK_REFERENCE(): create a weak reference alias from sym.
+ * The macro is not a general asm macro that takes arbitrary names,
+ * but one that takes only C names. It does the non-null name
+ * translation inside the macro.
+ */
+#define WEAK_REFERENCE(sym, alias) \
+ .weak CNAME(alias); \
+ .equ CNAME(alias),CNAME(sym)
+
+/*
* Kernel RCS ID tag and copyright macros
*/
diff --git a/sys/sys/fbio.h b/sys/sys/fbio.h
index c47304a..034d052 100644
--- a/sys/sys/fbio.h
+++ b/sys/sys/fbio.h
@@ -115,6 +115,7 @@ struct fb_info;
typedef int fb_enter_t(void *priv);
typedef int fb_leave_t(void *priv);
+typedef int fb_setblankmode_t(void *priv, int mode);
struct fb_info {
/* Raw copy of fbtype. Do not change. */
@@ -129,6 +130,7 @@ struct fb_info {
fb_enter_t *enter;
fb_leave_t *leave;
+ fb_setblankmode_t *setblankmode;
intptr_t fb_pbase; /* For FB mmap. */
intptr_t fb_vbase; /* if NULL, use fb_write/fb_read. */
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
index 2e94d2a..4729320 100644
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@@ -184,8 +184,16 @@ struct mbuf {
* Compile-time assertions in uipc_mbuf.c test these values to ensure
* that they are correct.
*/
- struct mbuf *m_next; /* next buffer in chain */
- struct mbuf *m_nextpkt; /* next chain in queue/record */
+ union { /* next buffer in chain */
+ struct mbuf *m_next;
+ SLIST_ENTRY(mbuf) m_slist;
+ STAILQ_ENTRY(mbuf) m_stailq;
+ };
+ union { /* next chain in queue/record */
+ struct mbuf *m_nextpkt;
+ SLIST_ENTRY(mbuf) m_slistpkt;
+ STAILQ_ENTRY(mbuf) m_stailqpkt;
+ };
caddr_t m_data; /* location of data */
int32_t m_len; /* amount of data in this mbuf */
uint32_t m_type:8, /* type of data in this mbuf */
diff --git a/sys/sys/param.h b/sys/sys/param.h
index aba65ac..c92ee7b 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -58,7 +58,7 @@
* in the range 5 to 9.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 1100059 /* Master, propagated to newvers */
+#define __FreeBSD_version 1100060 /* Master, propagated to newvers */
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
diff --git a/sys/sys/taskqueue.h b/sys/sys/taskqueue.h
index bf23ac1..9a00ada 100644
--- a/sys/sys/taskqueue.h
+++ b/sys/sys/taskqueue.h
@@ -36,6 +36,7 @@
#include <sys/queue.h>
#include <sys/_task.h>
#include <sys/_callout.h>
+#include <sys/_cpuset.h>
struct taskqueue;
struct thread;
@@ -71,6 +72,8 @@ struct taskqueue *taskqueue_create(const char *name, int mflags,
void *context);
int taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
const char *name, ...) __printflike(4, 5);
+int taskqueue_start_threads_cpuset(struct taskqueue **tqp, int count,
+ int pri, cpuset_t *mask, const char *name, ...) __printflike(5, 6);
int taskqueue_start_threads_pinned(struct taskqueue **tqp, int count,
int pri, int cpu_id, const char *name,
...) __printflike(5, 6);
diff --git a/sys/vm/memguard.c b/sys/vm/memguard.c
index ec372c1..5d483b1 100644
--- a/sys/vm/memguard.c
+++ b/sys/vm/memguard.c
@@ -89,9 +89,7 @@ memguard_sysctl_desc(SYSCTL_HANDLER_ARGS)
return (error);
mtx_lock(&malloc_mtx);
- /*
- * If mtp is NULL, it will be initialized in memguard_cmp().
- */
+ /* If mtp is NULL, it will be initialized in memguard_cmp() */
vm_memguard_mtype = malloc_desc2type(desc);
strlcpy(vm_memguard_desc, desc, sizeof(vm_memguard_desc));
mtx_unlock(&malloc_mtx);
@@ -502,7 +500,7 @@ int
memguard_cmp_zone(uma_zone_t zone)
{
- if ((memguard_options & MG_GUARD_NOFREE) == 0 &&
+ if ((memguard_options & MG_GUARD_NOFREE) == 0 &&
zone->uz_flags & UMA_ZONE_NOFREE)
return (0);
diff --git a/sys/x86/acpica/acpi_wakeup.c b/sys/x86/acpica/acpi_wakeup.c
index 74522be..e652419 100644
--- a/sys/x86/acpica/acpi_wakeup.c
+++ b/sys/x86/acpica/acpi_wakeup.c
@@ -270,6 +270,7 @@ acpi_wakeup_machdep(struct acpi_softc *sc, int state, int sleep_result,
initializecpu();
PCPU_SET(switchtime, 0);
PCPU_SET(switchticks, ticks);
+ lapic_xapic_mode();
#ifdef SMP
if (!CPU_EMPTY(&suspcpus))
acpi_wakeup_cpus(sc);
diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c
index 1809fa6..1b66674 100644
--- a/sys/x86/x86/local_apic.c
+++ b/sys/x86/x86/local_apic.c
@@ -527,7 +527,6 @@ native_lapic_xapic_mode(void)
saveintr = intr_disable();
if (x2apic_mode)
native_lapic_enable_x2apic();
- native_lapic_disable();
intr_restore(saveintr);
}
diff --git a/sys/x86/xen/pvcpu_enum.c b/sys/x86/xen/pvcpu_enum.c
index 8d54bdd..5b8b053 100644
--- a/sys/x86/xen/pvcpu_enum.c
+++ b/sys/x86/xen/pvcpu_enum.c
@@ -81,6 +81,7 @@ madt_parse_interrupt_override(ACPI_MADT_INTERRUPT_OVERRIDE *intr)
{
enum intr_trigger trig;
enum intr_polarity pol;
+ int ret;
if (acpi_quirks & ACPI_Q_MADT_IRQ0 && intr->SourceIrq == 0 &&
intr->GlobalIrq == 2) {
@@ -101,7 +102,9 @@ madt_parse_interrupt_override(ACPI_MADT_INTERRUPT_OVERRIDE *intr)
acpi_OverrideInterruptLevel(intr->GlobalIrq);
/* Register the IRQ with the polarity and trigger mode found. */
- xen_register_pirq(intr->GlobalIrq, trig, pol);
+ ret = xen_register_pirq(intr->GlobalIrq, trig, pol);
+ if (ret != 0)
+ panic("Unable to register interrupt override");
}
/*
@@ -175,7 +178,7 @@ xenpv_setup_io(void)
{
if (xen_initial_domain()) {
- int i;
+ int i, ret;
/* Map MADT */
madt_physaddr = acpi_find_table(ACPI_SIG_MADT);
@@ -201,8 +204,21 @@ xenpv_setup_io(void)
if (!madt_found_sci_override) {
printf(
"MADT: Forcing active-low polarity and level trigger for SCI\n");
- xen_register_pirq(AcpiGbl_FADT.SciInterrupt,
+ ret = xen_register_pirq(AcpiGbl_FADT.SciInterrupt,
INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW);
+ if (ret != 0)
+ panic("Unable to register SCI IRQ");
+ }
+
+ /* Register legacy ISA IRQs */
+ for (i = 1; i < 16; i++) {
+ if (intr_lookup_source(i) != NULL)
+ continue;
+ ret = xen_register_pirq(i, INTR_TRIGGER_EDGE,
+ INTR_POLARITY_LOW);
+ if (ret != 0 && bootverbose)
+ printf("Unable to register legacy IRQ#%d: %d\n",
+ i, ret);
}
acpi_SetDefaultIntrModel(ACPI_INTR_APIC);
diff --git a/sys/x86/xen/xen_intr.c b/sys/x86/xen/xen_intr.c
index 03ccd7c..a964fac 100644
--- a/sys/x86/xen/xen_intr.c
+++ b/sys/x86/xen/xen_intr.c
@@ -126,7 +126,6 @@ struct xenisrc {
int xi_virq;
void *xi_cookie;
u_int xi_close:1; /* close on unbind? */
- u_int xi_shared:1; /* Shared with other domains. */
u_int xi_activehi:1;
u_int xi_edgetrigger:1;
};
@@ -579,11 +578,12 @@ xen_intr_handle_upcall(struct trapframe *trap_frame)
/* process port */
port = (l1i * LONG_BIT) + l2i;
- synch_clear_bit(port, &s->evtchn_pending[0]);
isrc = xen_intr_port_to_isrc[port];
- if (__predict_false(isrc == NULL))
+ if (__predict_false(isrc == NULL)) {
+ synch_clear_bit(port, &s->evtchn_pending[0]);
continue;
+ }
/* Make sure we are firing on the right vCPU */
KASSERT((isrc->xi_cpu == PCPU_GET(cpuid)),
@@ -932,6 +932,9 @@ out:
static void
xen_intr_disable_source(struct intsrc *isrc, int eoi)
{
+
+ if (eoi == PIC_EOI)
+ xen_intr_eoi_source(isrc);
}
/*
@@ -950,8 +953,13 @@ xen_intr_enable_source(struct intsrc *isrc)
* \param isrc The interrupt source to EOI.
*/
static void
-xen_intr_eoi_source(struct intsrc *isrc)
+xen_intr_eoi_source(struct intsrc *base_isrc)
{
+ struct xenisrc *isrc;
+
+ isrc = (struct xenisrc *)base_isrc;
+ synch_clear_bit(isrc->xi_port,
+ &HYPERVISOR_shared_info->evtchn_pending[0]);
}
/*
@@ -981,8 +989,9 @@ xen_intr_pirq_disable_source(struct intsrc *base_isrc, int eoi)
struct xenisrc *isrc;
isrc = (struct xenisrc *)base_isrc;
- evtchn_mask_port(isrc->xi_port);
+ if (isrc->xi_edgetrigger == 0)
+ evtchn_mask_port(isrc->xi_port);
if (eoi == PIC_EOI)
xen_intr_pirq_eoi_source(base_isrc);
}
@@ -998,7 +1007,9 @@ xen_intr_pirq_enable_source(struct intsrc *base_isrc)
struct xenisrc *isrc;
isrc = (struct xenisrc *)base_isrc;
- evtchn_unmask_port(isrc->xi_port);
+
+ if (isrc->xi_edgetrigger == 0)
+ evtchn_unmask_port(isrc->xi_port);
}
/*
@@ -1010,13 +1021,19 @@ static void
xen_intr_pirq_eoi_source(struct intsrc *base_isrc)
{
struct xenisrc *isrc;
+ int error;
- /* XXX Use shared page of flags for this. */
isrc = (struct xenisrc *)base_isrc;
+
+ synch_clear_bit(isrc->xi_port,
+ &HYPERVISOR_shared_info->evtchn_pending[0]);
if (test_bit(isrc->xi_pirq, xen_intr_pirq_eoi_map)) {
struct physdev_eoi eoi = { .irq = isrc->xi_pirq };
- (void)HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
+ error = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
+ if (error != 0)
+ panic("Unable to EOI PIRQ#%d: %d\n",
+ isrc->xi_pirq, error);
}
}
@@ -1361,7 +1378,6 @@ int
xen_register_pirq(int vector, enum intr_trigger trig, enum intr_polarity pol)
{
struct physdev_map_pirq map_pirq;
- struct physdev_irq alloc_pirq;
struct xenisrc *isrc;
int error;
@@ -1382,14 +1398,6 @@ xen_register_pirq(int vector, enum intr_trigger trig, enum intr_polarity pol)
return (error);
}
- alloc_pirq.irq = vector;
- alloc_pirq.vector = 0;
- error = HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &alloc_pirq);
- if (error) {
- printf("xen: unable to alloc PIRQ for IRQ#%d\n", vector);
- return (error);
- }
-
mtx_lock(&xen_intr_isrc_lock);
isrc = xen_intr_alloc_isrc(EVTCHN_TYPE_PIRQ, vector);
mtx_unlock(&xen_intr_isrc_lock);
@@ -1432,6 +1440,8 @@ xen_register_msi(device_t dev, int vector, int count)
KASSERT(isrc != NULL,
("xen: unable to allocate isrc for interrupt"));
isrc->xi_pirq = msi_irq.pirq + i;
+ /* MSI interrupts are always edge triggered */
+ isrc->xi_edgetrigger = 1;
}
mtx_unlock(&xen_intr_isrc_lock);
@@ -1573,10 +1583,9 @@ xen_intr_dump_port(struct xenisrc *isrc)
isrc->xi_port, xen_intr_print_type(isrc->xi_type));
if (isrc->xi_type == EVTCHN_TYPE_PIRQ) {
db_printf("\tPirq: %d ActiveHi: %d EdgeTrigger: %d "
- "NeedsEOI: %d Shared: %d\n",
+ "NeedsEOI: %d\n",
isrc->xi_pirq, isrc->xi_activehi, isrc->xi_edgetrigger,
- !!test_bit(isrc->xi_pirq, xen_intr_pirq_eoi_map),
- isrc->xi_shared);
+ !!test_bit(isrc->xi_pirq, xen_intr_pirq_eoi_map));
}
if (isrc->xi_type == EVTCHN_TYPE_VIRQ)
db_printf("\tVirq: %d\n", isrc->xi_virq);
diff --git a/usr.bin/compress/compress.c b/usr.bin/compress/compress.c
index 1f458e5..2d2efb3 100644
--- a/usr.bin/compress/compress.c
+++ b/usr.bin/compress/compress.c
@@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$");
#include <err.h>
#include <errno.h>
+#include <fcntl.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
@@ -360,14 +361,14 @@ err: if (ofp) {
static void
setfile(const char *name, struct stat *fs)
{
- static struct timeval tv[2];
+ static struct timespec tspec[2];
fs->st_mode &= S_ISUID|S_ISGID|S_IRWXU|S_IRWXG|S_IRWXO;
- TIMESPEC_TO_TIMEVAL(&tv[0], &fs->st_atim);
- TIMESPEC_TO_TIMEVAL(&tv[1], &fs->st_mtim);
- if (utimes(name, tv))
- cwarn("utimes: %s", name);
+ tspec[0] = fs->st_atim;
+ tspec[1] = fs->st_mtim;
+ if (utimensat(AT_FDCWD, name, tspec, 0))
+ cwarn("utimensat: %s", name);
/*
* Changing the ownership probably won't succeed, unless we're root
diff --git a/usr.bin/gcore/elfcore.c b/usr.bin/gcore/elfcore.c
index d487b21..2d1acb8 100644
--- a/usr.bin/gcore/elfcore.c
+++ b/usr.bin/gcore/elfcore.c
@@ -511,7 +511,8 @@ readmap(pid_t pid)
((pflags & PFLAGS_FULL) == 0 &&
kve->kve_type != KVME_TYPE_DEFAULT &&
kve->kve_type != KVME_TYPE_VNODE &&
- kve->kve_type != KVME_TYPE_SWAP))
+ kve->kve_type != KVME_TYPE_SWAP &&
+ kve->kve_type != KVME_TYPE_PHYS))
continue;
ent = calloc(1, sizeof(*ent));
diff --git a/usr.bin/gzip/gzip.c b/usr.bin/gzip/gzip.c
index 9f92b6a..f197f18 100644
--- a/usr.bin/gzip/gzip.c
+++ b/usr.bin/gzip/gzip.c
@@ -1070,7 +1070,7 @@ out2:
static void
copymodes(int fd, const struct stat *sbp, const char *file)
{
- struct timeval times[2];
+ struct timespec times[2];
struct stat sb;
/*
@@ -1098,10 +1098,10 @@ copymodes(int fd, const struct stat *sbp, const char *file)
if (fchmod(fd, sb.st_mode) < 0)
maybe_warn("couldn't fchmod: %s", file);
- TIMESPEC_TO_TIMEVAL(&times[0], &sb.st_atim);
- TIMESPEC_TO_TIMEVAL(&times[1], &sb.st_mtim);
- if (futimes(fd, times) < 0)
- maybe_warn("couldn't utimes: %s", file);
+ times[0] = sb.st_atim;
+ times[1] = sb.st_mtim;
+ if (futimens(fd, times) < 0)
+ maybe_warn("couldn't futimens: %s", file);
/* only try flags if they exist already */
if (sb.st_flags != 0 && fchflags(fd, sb.st_flags) < 0)
diff --git a/usr.bin/kdump/kdump.c b/usr.bin/kdump/kdump.c
index b8e5903..fe7bcd2 100644
--- a/usr.bin/kdump/kdump.c
+++ b/usr.bin/kdump/kdump.c
@@ -1155,7 +1155,7 @@ ktrsyscall(struct ktr_syscall *ktr, u_int flags)
print_number(ip, narg, c);
print_number(ip, narg, c);
putchar(',');
- sendfileflagsname(*ip);
+ sendfileflagsname(*(int *)ip);
ip++;
narg--;
break;
diff --git a/usr.bin/timeout/timeout.c b/usr.bin/timeout/timeout.c
index 06dc086..a682dde 100644
--- a/usr.bin/timeout/timeout.c
+++ b/usr.bin/timeout/timeout.c
@@ -172,6 +172,7 @@ main(int argc, char **argv)
double second_kill;
bool timedout = false;
bool do_second_kill = false;
+ bool child_done = false;
struct sigaction signals;
struct procctl_reaper_status info;
struct procctl_reaper_kill killemall;
@@ -187,7 +188,6 @@ main(int argc, char **argv)
foreground = preserve = 0;
second_kill = 0;
- cpid = -1;
const struct option longopts[] = {
{ "preserve-status", no_argument, &preserve, 1 },
@@ -281,20 +281,26 @@ main(int argc, char **argv)
if (sig_chld) {
sig_chld = 0;
- while (((cpid = wait(&status)) < 0) && errno == EINTR)
- continue;
- if (cpid == pid) {
- pstat = status;
- if (!foreground)
- break;
+ while ((cpid = waitpid(-1, &status, WNOHANG)) != 0) {
+ if (cpid < 0) {
+ if (errno == EINTR)
+ continue;
+ else
+ break;
+ } else if (cpid == pid) {
+ pstat = status;
+ child_done = true;
+ }
}
- if (!foreground) {
- procctl(P_PID, getpid(), PROC_REAP_STATUS,
- &info);
- if (info.rs_children == 0) {
- cpid = pid;
+ if (child_done) {
+ if (foreground) {
break;
+ } else {
+ procctl(P_PID, getpid(),
+ PROC_REAP_STATUS, &info);
+ if (info.rs_children == 0)
+ break;
}
}
} else if (sig_alrm) {
@@ -336,7 +342,7 @@ main(int argc, char **argv)
}
}
- while (cpid != pid && wait(&pstat) == -1) {
+ while (!child_done && wait(&pstat) == -1) {
if (errno != EINTR)
err(EX_OSERR, "waitpid()");
}
diff --git a/usr.bin/touch/touch.c b/usr.bin/touch/touch.c
index c8bfd28..804af7e 100644
--- a/usr.bin/touch/touch.c
+++ b/usr.bin/touch/touch.c
@@ -76,8 +76,8 @@ main(int argc, char *argv[])
myname = basename(argv[0]);
Aflag = aflag = cflag = mflag = timeset = 0;
atflag = 0;
- if (clock_gettime(CLOCK_REALTIME, &ts[0]) == -1)
- err(1, "clock_gettime(CLOCK_REALTIME)");
+ ts[0].tv_sec = ts[1].tv_sec = 0;
+ ts[0].tv_nsec = ts[1].tv_nsec = UTIME_NOW;
while ((ch = getopt(argc, argv, "A:acd:fhmr:t:")) != -1)
switch(ch) {
@@ -152,6 +152,11 @@ main(int argc, char *argv[])
ts[1] = ts[0];
}
+ if (!aflag)
+ ts[0].tv_nsec = UTIME_OMIT;
+ if (!mflag)
+ ts[1].tv_nsec = UTIME_OMIT;
+
if (*argv == NULL)
usage(myname);
@@ -183,11 +188,6 @@ main(int argc, char *argv[])
continue;
}
- if (!aflag)
- ts[0] = sb.st_atim;
- if (!mflag)
- ts[1] = sb.st_mtim;
-
/*
* We're adjusting the times based on the file times, not a
* specified time (that gets handled above).
@@ -203,26 +203,9 @@ main(int argc, char *argv[])
}
}
- /* Try utimensat(2). */
if (!utimensat(AT_FDCWD, *argv, ts, atflag))
continue;
- /* If the user specified a time, nothing else we can do. */
- if (timeset || Aflag) {
- rval = 1;
- warn("%s", *argv);
- continue;
- }
-
- /*
- * System V and POSIX 1003.1 require that a NULL argument
- * set the access/modification times to the current time.
- * The permission checks are different, too, in that the
- * ability to write the file is sufficient. Take a shot.
- */
- if (!utimensat(AT_FDCWD, *argv, NULL, atflag))
- continue;
-
rval = 1;
warn("%s", *argv);
}
@@ -238,8 +221,8 @@ stime_arg1(const char *arg, struct timespec *tvp)
struct tm *t;
int yearset;
char *p;
- /* Start with the current time. */
- now = tvp[0].tv_sec;
+
+ now = time(NULL);
if ((t = localtime(&now)) == NULL)
err(1, "localtime");
/* [[CC]YY]MMDDhhmm[.SS] */
@@ -300,8 +283,8 @@ stime_arg2(const char *arg, int year, struct timespec *tvp)
{
time_t now;
struct tm *t;
- /* Start with the current time. */
- now = tvp[0].tv_sec;
+
+ now = time(NULL);
if ((t = localtime(&now)) == NULL)
err(1, "localtime");
diff --git a/usr.sbin/binmiscctl/binmiscctl.8 b/usr.sbin/binmiscctl/binmiscctl.8
index 8eb5552..4bc9986 100644
--- a/usr.sbin/binmiscctl/binmiscctl.8
+++ b/usr.sbin/binmiscctl/binmiscctl.8
@@ -180,6 +180,17 @@ Look up and list the record for the
image activator:
.Dl # binmiscctl lookup llvmbc
.Pp
+Add QEMU bsd-user program as an image activator for ARM AARCH64 binaries:
+.Bd -literal -offset indent
+# binmiscctl add arm64 \e
+ --interpreter "/usr/local/bin/qemu-aarch64-static" \e
+ --magic "\ex7f\ex45\ex4c\ex46\ex02\ex01\ex01\ex00\ex00\ex00\e
+ \ex00\ex00\ex00\ex00\ex00\ex00\ex02\ex00\exb7\ex00" \e
+ --mask "\exff\exff\exff\exff\exff\exff\exff\ex00\exff\exff\e
+ \exff\exff\exff\exff\exff\exff\exfe\exff\exff\exff" \e
+ --size 20 --set-enabled
+.Ed
+.Pp
Add QEMU bsd-user program as an image activator for ARM little-endian binaries:
.Bd -literal -offset indent
# binmiscctl add armelf \e
diff --git a/usr.sbin/binmiscctl/binmiscctl.c b/usr.sbin/binmiscctl/binmiscctl.c
index 44122eb..5ab82e4 100644
--- a/usr.sbin/binmiscctl/binmiscctl.c
+++ b/usr.sbin/binmiscctl/binmiscctl.c
@@ -371,8 +371,10 @@ add_cmd(__unused int argc, char *argv[], ximgact_binmisc_entry_t *xbe)
}
int
-name_cmd(__unused int argc, char *argv[], ximgact_binmisc_entry_t *xbe)
+name_cmd(int argc, char *argv[], ximgact_binmisc_entry_t *xbe)
{
+ if (argc == 0)
+ usage("Required argument missing\n");
if (strlen(argv[0]) > IBE_NAME_MAX)
usage("'%s' string length longer than IBE_NAME_MAX (%d)",
IBE_NAME_MAX);
diff --git a/usr.sbin/devctl/Makefile b/usr.sbin/devctl/Makefile
index 5a6e19d..a7deb37 100644
--- a/usr.sbin/devctl/Makefile
+++ b/usr.sbin/devctl/Makefile
@@ -2,7 +2,6 @@
PROG= devctl
MAN= devctl.8
-MAN=
LIBADD= devctl
diff --git a/usr.sbin/pw/tests/pw_useradd.sh b/usr.sbin/pw/tests/pw_useradd.sh
index 2930c41..1934bbc 100755
--- a/usr.sbin/pw/tests/pw_useradd.sh
+++ b/usr.sbin/pw/tests/pw_useradd.sh
@@ -63,6 +63,108 @@ user_add_comments_invalid_noupdate_body() {
atf_check -s exit:1 -o empty grep "^test:.*" $HOME/master.passwd
}
+# Test add user with alternate homedir
+atf_test_case user_add_homedir
+user_add_homedir_body() {
+ populate_etc_skel
+
+ atf_check -s exit:0 ${PW} useradd test -d /foo/bar
+ atf_check -s exit:0 -o match:"^test:\*:.*::0:0:User &:/foo/bar:.*" \
+ ${PW} usershow test
+}
+
+# Test add user with account expiration as an epoch date
+atf_test_case user_add_account_expiration_epoch
+user_add_account_expiration_epoch_body() {
+ populate_etc_skel
+
+ DATE=`date -j -v+1d "+%s"`
+ atf_check -s exit:0 ${PW} useradd test -e ${DATE}
+ atf_check -s exit:0 -o match:"^test:\*:.*::0:${DATE}:.*" \
+ ${PW} usershow test
+}
+
+# Test add user with account expiration as a DD-MM-YYYY date
+atf_test_case user_add_account_expiration_date_numeric
+user_add_account_expiration_date_numeric_body() {
+ populate_etc_skel
+
+ DATE=`date -j -v+1d "+%d-%m-%Y"`
+ EPOCH=`date -j -f "%d-%m-%Y %H:%M:%S" "${DATE} 00:00:00" "+%s"`
+ atf_check -s exit:0 ${PW} useradd test -e ${DATE}
+ atf_check -s exit:0 -o match:"^test:\*:.*::0:${EPOCH}:User &:.*" \
+ ${PW} usershow test
+}
+
+# Test add user with account expiration as a DD-MM-YYYY date
+atf_test_case user_add_account_expiration_date_month
+user_add_account_expiration_date_month_body() {
+ populate_etc_skel
+
+ DATE=`date -j -v+1d "+%d-%b-%Y"`
+ EPOCH=`date -j -f "%d-%b-%Y %H:%M:%S" "${DATE} 00:00:00" "+%s"`
+ atf_check -s exit:0 ${PW} useradd test -e ${DATE}
+ atf_check -s exit:0 -o match:"^test:\*:.*::0:${EPOCH}:User &:.*" \
+ ${PW} usershow test
+}
+
+# Test add user with account expiration as a relative date
+atf_test_case user_add_account_expiration_date_relative
+user_add_account_expiration_date_relative_body() {
+ populate_etc_skel
+
+ EPOCH=`date -j -v+13m "+%s"`
+ atf_check -s exit:0 ${PW} useradd test -e +13o
+ atf_check -s exit:0 -o match:"^test:\*:.*::0:${EPOCH}:User &:.*" \
+ ${PW} usershow test
+}
+
+# Test add user with password expiration as an epoch date
+atf_test_case user_add_password_expiration_epoch
+user_add_password_expiration_epoch_body() {
+ populate_etc_skel
+
+ DATE=`date -j -v+1d "+%s"`
+ atf_check -s exit:0 ${PW} useradd test -p ${DATE}
+ atf_check -s exit:0 -o match:"^test:\*:.*::${DATE}:0:.*" \
+ ${PW} usershow test
+}
+
+# Test add user with password expiration as a DD-MM-YYYY date
+atf_test_case user_add_password_expiration_date_numeric
+user_add_password_expiration_date_numeric_body() {
+ populate_etc_skel
+
+ DATE=`date -j -v+1d "+%d-%m-%Y"`
+ EPOCH=`date -j -f "%d-%m-%Y %H:%M:%S" "${DATE} 00:00:00" "+%s"`
+ atf_check -s exit:0 ${PW} useradd test -p ${DATE}
+ atf_check -s exit:0 -o match:"^test:\*:.*::${EPOCH}:0:User &:.*" \
+ ${PW} usershow test
+}
+
+# Test add user with password expiration as a DD-MMM-YYYY date
+atf_test_case user_add_password_expiration_date_month
+user_add_password_expiration_date_month_body() {
+ populate_etc_skel
+
+ DATE=`date -j -v+1d "+%d-%b-%Y"`
+ EPOCH=`date -j -f "%d-%b-%Y %H:%M:%S" "${DATE} 00:00:00" "+%s"`
+ atf_check -s exit:0 ${PW} useradd test -p ${DATE}
+ atf_check -s exit:0 -o match:"^test:\*:.*::${EPOCH}:0:User &:.*" \
+ ${PW} usershow test
+}
+
+# Test add user with password expiration as a relative date
+atf_test_case user_add_password_expiration_date_relative
+user_add_password_expiration_date_relative_body() {
+ populate_etc_skel
+
+ EPOCH=`date -j -v+13m "+%s"`
+ atf_check -s exit:0 ${PW} useradd test -p +13o
+ atf_check -s exit:0 -o match:"^test:\*:.*::${EPOCH}:0:User &:.*" \
+ ${PW} usershow test
+}
+
atf_init_test_cases() {
atf_add_test_case user_add
atf_add_test_case user_add_noupdate
@@ -70,4 +172,13 @@ atf_init_test_cases() {
atf_add_test_case user_add_comments_noupdate
atf_add_test_case user_add_comments_invalid
atf_add_test_case user_add_comments_invalid_noupdate
+ atf_add_test_case user_add_homedir
+ atf_add_test_case user_add_account_expiration_epoch
+ atf_add_test_case user_add_account_expiration_date_numeric
+ atf_add_test_case user_add_account_expiration_date_month
+ atf_add_test_case user_add_account_expiration_date_relative
+ atf_add_test_case user_add_password_expiration_epoch
+ atf_add_test_case user_add_password_expiration_date_numeric
+ atf_add_test_case user_add_password_expiration_date_month
+ atf_add_test_case user_add_password_expiration_date_relative
}
diff --git a/usr.sbin/pw/tests/pw_usernext.sh b/usr.sbin/pw/tests/pw_usernext.sh
index 1cdadee..89f938e 100755
--- a/usr.sbin/pw/tests/pw_usernext.sh
+++ b/usr.sbin/pw/tests/pw_usernext.sh
@@ -8,14 +8,15 @@ atf_test_case usernext
usernext_body() {
populate_etc_skel
- var0=1
- LIMIT=`jot -r 1 2 10`
- while [ "$var0" -lt "$LIMIT" ]
+ CURRENT=`${PW} usernext | sed -e 's/:.*//'`
+ RANDOM=`jot -r 1 1 150`
+ MAX=`expr ${CURRENT} + ${RANDOM}`
+ while [ "${CURRENT}" -lt "${MAX}" ]
do
- atf_check -s exit:0 ${PW} useradd test$var0
- var0=`expr $var0 + 1`
+ atf_check -s exit:0 ${PW} useradd test${CURRENT}
+ CURRENT=`expr ${CURRENT} + 1`
done
- atf_check -s exit:0 -o match:"100${LIMIT}:100${LIMIT}" \
+ atf_check -s exit:0 -o match:"${CURRENT}:${CURRENT}" \
${PW} usernext
}
@@ -25,14 +26,16 @@ atf_test_case usernext_assigned_group
usernext_assigned_group_body() {
populate_etc_skel
- var0=1
- LIMIT=`jot -r 1 2 10`
- while [ "$var0" -lt "$LIMIT" ]
+ CURRENT=`${PW} usernext | sed -e 's/:.*//'`
+ CURRENTGID=`${PW} groupnext`
+ RANDOM=`jot -r 1 1 150`
+ MAX=`expr ${CURRENT} + ${RANDOM}`
+ while [ "${CURRENT}" -lt "${MAX}" ]
do
- atf_check -s exit:0 ${PW} useradd -n test$var0 -g 0
- var0=`expr $var0 + 1`
+ atf_check -s exit:0 ${PW} useradd -n test${CURRENT} -g 0
+ CURRENT=`expr ${CURRENT} + 1`
done
- atf_check -s exit:0 -o match:"100${LIMIT}:1001}" \
+ atf_check -s exit:0 -o match:"${CURRENT}:${CURRENTGID}" \
${PW} usernext
}
OpenPOWER on IntegriCloud