summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/apic_vector.S16
-rw-r--r--sys/amd64/amd64/mp_machdep.c1034
-rw-r--r--sys/amd64/conf/GENERIC4
-rw-r--r--sys/amd64/conf/NOTES2
-rw-r--r--sys/amd64/include/smp.h39
-rw-r--r--sys/amd64/include/vm.h2
-rw-r--r--sys/amd64/include/vmm.h6
-rw-r--r--sys/amd64/include/xen/xenfunc.h9
-rw-r--r--sys/amd64/include/xen/xenpmap.h227
-rw-r--r--sys/amd64/include/xen/xenvar.h61
-rw-r--r--sys/amd64/vmm/amd/amdv.c1
-rw-r--r--sys/amd64/vmm/amd/svm.c1
-rw-r--r--sys/amd64/vmm/amd/svm_msr.c24
-rw-r--r--sys/amd64/vmm/amd/vmcb.c1
-rw-r--r--sys/amd64/vmm/intel/vmx_msr.c16
-rw-r--r--sys/amd64/vmm/io/vatpic.c1
-rw-r--r--sys/amd64/vmm/io/vatpit.c1
-rw-r--r--sys/amd64/vmm/io/vhpet.c1
-rw-r--r--sys/amd64/vmm/io/vioapic.c1
-rw-r--r--sys/amd64/vmm/io/vlapic.c20
-rw-r--r--sys/amd64/vmm/io/vpmtmr.c1
-rw-r--r--sys/amd64/vmm/io/vrtc.c118
-rw-r--r--sys/amd64/vmm/vmm.c8
-rw-r--r--sys/amd64/vmm/vmm_instruction_emul.c129
-rw-r--r--sys/amd64/vmm/vmm_ioport.c6
-rw-r--r--sys/amd64/vmm/vmm_stat.c1
-rw-r--r--sys/amd64/vmm/x86.c4
-rw-r--r--sys/arm/allwinner/std.a102
-rw-r--r--sys/arm/amlogic/aml8726/aml8726_mmc.c318
-rw-r--r--sys/arm/amlogic/aml8726/aml8726_mmc.h14
-rw-r--r--sys/arm/amlogic/aml8726/files.aml87263
-rw-r--r--sys/arm/amlogic/aml8726/files.smp4
-rw-r--r--sys/arm/amlogic/aml8726/std.aml87269
-rw-r--r--sys/arm/amlogic/aml8726/std.odroidc117
-rw-r--r--sys/arm/amlogic/aml8726/std.vsatv102-m617
-rw-r--r--sys/arm/arm/busdma_machdep-v6.c4
-rw-r--r--sys/arm/arm/busdma_machdep.c4
-rw-r--r--sys/arm/arm/cpufunc.c3
-rw-r--r--sys/arm/arm/locore-v4.S74
-rw-r--r--sys/arm/arm/locore-v6.S125
-rw-r--r--sys/arm/arm/pmap-v6-new.c9
-rw-r--r--sys/arm/broadcom/bcm2835/bcm2835_mbox.c3
-rw-r--r--sys/arm/broadcom/bcm2835/bcm2835_sdhci.c13
-rw-r--r--sys/arm/broadcom/bcm2835/std.bcm28362
-rw-r--r--sys/arm/conf/AML8726143
-rw-r--r--sys/arm/conf/EFIKA_MX2
-rw-r--r--sys/arm/conf/IMX532
-rw-r--r--sys/arm/conf/IMX62
-rw-r--r--sys/arm/conf/ODROIDC1121
-rw-r--r--sys/arm/conf/RK31884
-rw-r--r--sys/arm/conf/RPI24
-rw-r--r--sys/arm/conf/VSATV102121
-rw-r--r--sys/arm/freescale/imx/files.imx5 (renamed from sys/arm/freescale/imx/files.imx53)5
-rw-r--r--sys/arm/freescale/imx/files.imx5149
-rw-r--r--sys/arm/freescale/imx/std.imx513
-rw-r--r--sys/arm/freescale/imx/std.imx533
-rw-r--r--sys/arm64/arm64/bus_machdep.c16
-rw-r--r--sys/arm64/arm64/bus_space_asm.S6
-rw-r--r--sys/arm64/arm64/db_disasm.c41
-rw-r--r--sys/arm64/arm64/db_interface.c168
-rw-r--r--sys/arm64/arm64/db_trace.c152
-rw-r--r--sys/arm64/arm64/debug_monitor.c487
-rw-r--r--sys/arm64/arm64/nexus.c6
-rw-r--r--sys/arm64/arm64/pmap.c12
-rw-r--r--sys/arm64/include/pmap.h2
-rw-r--r--sys/boot/common/md.c2
-rw-r--r--sys/boot/fdt/dts/arm/bcm2836.dtsi14
-rw-r--r--sys/boot/fdt/dts/arm/rpi2.dts20
-rw-r--r--sys/boot/forth/loader.conf10
-rw-r--r--sys/boot/i386/common/edd.h2
-rw-r--r--sys/boot/libstand32/Makefile36
-rw-r--r--sys/boot/userboot/libstand/Makefile39
-rw-r--r--sys/cddl/contrib/opensolaris/common/nvpair/nvpair.c1
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c13
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c32
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c3
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c5
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c24
-rw-r--r--sys/cddl/dev/dtrace/amd64/dtrace_asm.S208
-rw-r--r--sys/cddl/dev/dtrace/arm/dtrace_asm.S18
-rw-r--r--sys/cddl/dev/dtrace/dtrace_hacks.c3
-rw-r--r--sys/cddl/dev/dtrace/i386/dtrace_asm.S164
-rw-r--r--sys/cddl/dev/dtrace/mips/dtrace_asm.S44
-rw-r--r--sys/cddl/dev/dtrace/powerpc/dtrace_asm.S40
-rw-r--r--sys/cddl/dev/profile/profile.c6
-rw-r--r--sys/compat/linux/linux_getcwd.c413
-rw-r--r--sys/conf/files58
-rw-r--r--sys/conf/files.amd6410
-rw-r--r--sys/conf/files.arm644
-rw-r--r--sys/conf/files.i38647
-rw-r--r--sys/conf/files.pc981
-rw-r--r--sys/conf/files.powerpc7
-rw-r--r--sys/conf/kern.mk2
-rw-r--r--sys/conf/kern.pre.mk2
-rw-r--r--sys/conf/options2
-rw-r--r--sys/conf/options.amd642
-rw-r--r--sys/conf/options.arm3
-rw-r--r--sys/conf/options.arm642
-rw-r--r--sys/conf/options.i3864
-rw-r--r--sys/dev/acpica/acpi.c2
-rw-r--r--sys/dev/atkbdc/atkbd.c82
-rw-r--r--sys/dev/bxe/bxe.h2
-rw-r--r--sys/dev/cxgbe/t4_main.c41
-rw-r--r--sys/dev/e1000/if_igb.c79
-rw-r--r--sys/dev/hyperv/include/hyperv.h167
-rw-r--r--sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c801
-rw-r--r--sys/dev/hyperv/storvsc/hv_vstorage.h16
-rw-r--r--sys/dev/hyperv/utilities/hv_kvp.c11
-rw-r--r--sys/dev/hyperv/utilities/hv_util.c9
-rw-r--r--sys/dev/hyperv/vmbus/hv_channel.c98
-rw-r--r--sys/dev/hyperv/vmbus/hv_channel_mgmt.c265
-rw-r--r--sys/dev/hyperv/vmbus/hv_connection.c286
-rw-r--r--sys/dev/hyperv/vmbus/hv_hv.c66
-rw-r--r--sys/dev/hyperv/vmbus/hv_ring_buffer.c76
-rw-r--r--sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c364
-rw-r--r--sys/dev/hyperv/vmbus/hv_vmbus_priv.h71
-rw-r--r--sys/dev/ichsmb/ichsmb_pci.c4
-rw-r--r--sys/dev/iicbus/pcf8563.c67
-rw-r--r--sys/dev/ipmi/ipmi.c23
-rw-r--r--sys/dev/ipmi/ipmi_kcs.c1
-rw-r--r--sys/dev/ipmi/ipmi_smic.c1
-rw-r--r--sys/dev/ipmi/ipmivars.h1
-rw-r--r--sys/dev/ixgbe/if_ix.c1073
-rw-r--r--sys/dev/ixgbe/if_ixv.c9
-rw-r--r--sys/dev/ixgbe/ix_txrx.c57
-rw-r--r--sys/dev/ixgbe/ixgbe.h39
-rw-r--r--sys/dev/ixgbe/ixgbe_82598.c7
-rw-r--r--sys/dev/ixgbe/ixgbe_82598.h2
-rw-r--r--sys/dev/ixgbe/ixgbe_82599.c6
-rw-r--r--sys/dev/ixgbe/ixgbe_82599.h2
-rw-r--r--sys/dev/ixgbe/ixgbe_api.c122
-rw-r--r--sys/dev/ixgbe/ixgbe_api.h11
-rw-r--r--sys/dev/ixgbe/ixgbe_common.c6
-rw-r--r--sys/dev/ixgbe/ixgbe_common.h2
-rw-r--r--sys/dev/ixgbe/ixgbe_dcb.c12
-rw-r--r--sys/dev/ixgbe/ixgbe_dcb.h2
-rw-r--r--sys/dev/ixgbe/ixgbe_dcb_82598.c2
-rw-r--r--sys/dev/ixgbe/ixgbe_dcb_82598.h2
-rw-r--r--sys/dev/ixgbe/ixgbe_dcb_82599.c2
-rw-r--r--sys/dev/ixgbe/ixgbe_dcb_82599.h2
-rw-r--r--sys/dev/ixgbe/ixgbe_mbx.c4
-rw-r--r--sys/dev/ixgbe/ixgbe_mbx.h2
-rw-r--r--sys/dev/ixgbe/ixgbe_osdep.h9
-rw-r--r--sys/dev/ixgbe/ixgbe_phy.c228
-rw-r--r--sys/dev/ixgbe/ixgbe_phy.h23
-rw-r--r--sys/dev/ixgbe/ixgbe_type.h265
-rw-r--r--sys/dev/ixgbe/ixgbe_vf.c3
-rw-r--r--sys/dev/ixgbe/ixgbe_vf.h2
-rw-r--r--sys/dev/ixgbe/ixgbe_x540.c12
-rw-r--r--sys/dev/ixgbe/ixgbe_x540.h2
-rw-r--r--sys/dev/ixgbe/ixgbe_x550.c3191
-rw-r--r--sys/dev/ixgbe/ixgbe_x550.h109
-rw-r--r--sys/dev/ixl/ixl.h2
-rw-r--r--sys/dev/mii/brgphy.c28
-rw-r--r--sys/dev/mxge/if_mxge.c2
-rw-r--r--sys/dev/pccbb/pccbb_pci.c52
-rw-r--r--sys/dev/pci/pci.c120
-rw-r--r--sys/dev/pci/pci_pci.c66
-rw-r--r--sys/dev/pci/pci_subr.c2
-rw-r--r--sys/dev/pci/pcib_private.h3
-rw-r--r--sys/dev/pci/pcivar.h10
-rw-r--r--sys/dev/smbus/smb.c148
-rw-r--r--sys/dev/smbus/smb.h37
-rw-r--r--sys/dev/smbus/smbconf.h33
-rw-r--r--sys/dev/smbus/smbus.c88
-rw-r--r--sys/dev/smbus/smbus.h4
-rw-r--r--sys/dev/smbus/smbus_if.m17
-rw-r--r--sys/dev/sound/pcm/dsp.c12
-rw-r--r--sys/dev/usb/controller/dwc_otg.c16
-rw-r--r--sys/dev/usb/usbdevs1
-rw-r--r--sys/dev/usb/wlan/if_urtwn.c61
-rw-r--r--sys/dev/virtio/network/if_vtnet.c5
-rw-r--r--sys/dev/vt/hw/fb/vt_fb.c54
-rw-r--r--sys/dev/vt/hw/vga/vt_vga.c16
-rw-r--r--sys/dev/vt/vt_core.c20
-rw-r--r--sys/dev/xen/balloon/balloon.c48
-rw-r--r--sys/dev/xen/blkback/blkback.c13
-rw-r--r--sys/dev/xen/control/control.c130
-rw-r--r--sys/dev/xen/grant_table/grant_table.c70
-rw-r--r--sys/dev/xen/netback/netback.c14
-rw-r--r--sys/dev/xen/netfront/netfront.c10
-rw-r--r--sys/fs/nfs/nfsport.h2
-rw-r--r--sys/fs/nfsclient/nfs_clvfsops.c8
-rw-r--r--sys/fs/nfsserver/nfs_nfsdkrpc.c3
-rw-r--r--sys/geom/uncompress/g_uncompress.c2
-rw-r--r--sys/geom/uzip/g_uzip.c2
-rw-r--r--sys/i386/conf/DEFAULTS1
-rw-r--r--sys/i386/conf/GENERIC4
-rw-r--r--sys/i386/conf/XEN96
-rw-r--r--sys/i386/i386/apic_vector.s24
-rw-r--r--sys/i386/i386/genassym.c5
-rw-r--r--sys/i386/i386/machdep.c318
-rw-r--r--sys/i386/i386/minidump_machdep.c28
-rw-r--r--sys/i386/i386/mp_machdep.c1002
-rw-r--r--sys/i386/i386/support.s2
-rw-r--r--sys/i386/i386/swtch.s22
-rw-r--r--sys/i386/i386/sys_machdep.c103
-rw-r--r--sys/i386/i386/vm_machdep.c27
-rw-r--r--sys/i386/include/asmacros.h31
-rw-r--r--sys/i386/include/cpufunc.h46
-rw-r--r--sys/i386/include/intr_machdep.h8
-rw-r--r--sys/i386/include/pcpu.h32
-rw-r--r--sys/i386/include/pmap.h81
-rw-r--r--sys/i386/include/segments.h6
-rw-r--r--sys/i386/include/smp.h44
-rw-r--r--sys/i386/include/vm.h2
-rw-r--r--sys/i386/include/vmparam.h8
-rw-r--r--sys/i386/include/xen/features.h22
-rw-r--r--sys/i386/include/xen/hypercall.h6
-rw-r--r--sys/i386/include/xen/xen-os.h99
-rw-r--r--sys/i386/include/xen/xenfunc.h1
-rw-r--r--sys/i386/include/xen/xenpmap.h237
-rw-r--r--sys/i386/include/xen/xenstored.h89
-rw-r--r--sys/i386/include/xen/xenvar.h85
-rw-r--r--sys/i386/isa/npx.c9
-rw-r--r--sys/i386/pci/pci_cfgreg.c10
-rw-r--r--sys/i386/pci/pci_pir.c9
-rw-r--r--sys/i386/xen/clock.c570
-rw-r--r--sys/i386/xen/exception.s494
-rw-r--r--sys/i386/xen/locore.s360
-rw-r--r--sys/i386/xen/mp_machdep.c1300
-rw-r--r--sys/i386/xen/mptable.c109
-rw-r--r--sys/i386/xen/pmap.c4420
-rw-r--r--sys/i386/xen/xen_machdep.c1236
-rw-r--r--sys/kern/imgact_elf.c14
-rw-r--r--sys/kern/kern_descrip.c38
-rw-r--r--sys/kern/kern_exec.c2
-rw-r--r--sys/kern/kern_exit.c8
-rw-r--r--sys/kern/kern_gzio.c3
-rw-r--r--sys/kern/kern_intr.c5
-rw-r--r--sys/kern/kern_jail.c22
-rw-r--r--sys/kern/kern_proc.c2
-rw-r--r--sys/kern/kern_racct.c98
-rw-r--r--sys/kern/kern_rctl.c69
-rw-r--r--sys/kern/kern_shutdown.c3
-rw-r--r--sys/kern/kern_synch.c9
-rw-r--r--sys/kern/kern_thr.c26
-rw-r--r--sys/kern/link_elf.c2
-rw-r--r--sys/kern/link_elf_obj.c2
-rw-r--r--sys/kern/sched_4bsd.c2
-rw-r--r--sys/kern/subr_dnvlist.c166
-rw-r--r--sys/kern/subr_nvlist.c807
-rw-r--r--sys/kern/subr_nvpair.c379
-rw-r--r--sys/kern/subr_param.c4
-rw-r--r--sys/kern/subr_trap.c22
-rw-r--r--sys/kern/sysv_msg.c42
-rw-r--r--sys/kern/sysv_sem.c25
-rw-r--r--sys/kern/sysv_shm.c80
-rw-r--r--sys/kern/uipc_shm.c2
-rw-r--r--sys/kern/vfs_aio.c206
-rw-r--r--sys/kern/vfs_bio.c107
-rw-r--r--sys/libkern/zlib.c (renamed from sys/net/zlib.c)2
-rw-r--r--sys/mips/mips/busdma_machdep.c4
-rw-r--r--sys/modules/ix/Makefile2
-rw-r--r--sys/modules/ixv/Makefile2
-rw-r--r--sys/modules/oce/Makefile2
-rw-r--r--sys/modules/zlib/Makefile2
-rw-r--r--sys/net/if_vlan.c21
-rw-r--r--sys/net/netisr.c15
-rw-r--r--sys/netgraph/ng_deflate.c3
-rw-r--r--sys/netinet/in_kdtrace.c3
-rw-r--r--sys/netinet/in_kdtrace.h3
-rw-r--r--sys/netinet/ip_fw.h5
-rw-r--r--sys/netinet/ip_ipsec.c11
-rw-r--r--sys/netinet/libalias/libalias.38
-rw-r--r--sys/netinet/sctp_indata.c11
-rw-r--r--sys/netinet/siftr.c4
-rw-r--r--sys/netinet/tcp_subr.c1
-rw-r--r--sys/netinet/tcp_timewait.c7
-rw-r--r--sys/netinet6/ip6_forward.c9
-rw-r--r--sys/netinet6/ip6_ipsec.c12
-rw-r--r--sys/netipsec/ipsec.c3
-rw-r--r--sys/netipsec/ipsec.h1
-rw-r--r--sys/netipsec/ipsec_input.c1
-rw-r--r--sys/netipsec/ipsec_output.c22
-rw-r--r--sys/netipsec/xform_ah.c7
-rw-r--r--sys/netipsec/xform_esp.c7
-rw-r--r--sys/netipsec/xform_ipcomp.c7
-rw-r--r--sys/netpfil/ipfw/ip_fw2.c6
-rw-r--r--sys/netpfil/ipfw/ip_fw_nat.c26
-rw-r--r--sys/netpfil/ipfw/ip_fw_private.h113
-rw-r--r--sys/netpfil/ipfw/ip_fw_sockopt.c564
-rw-r--r--sys/netpfil/ipfw/ip_fw_table.c573
-rw-r--r--sys/netpfil/ipfw/ip_fw_table.h13
-rw-r--r--sys/opencrypto/cryptodeflate.c4
-rw-r--r--sys/opencrypto/deflate.h2
-rw-r--r--sys/powerpc/aim/aim_machdep.c (renamed from sys/powerpc/aim/machdep.c)345
-rw-r--r--sys/powerpc/aim/mmu_oea64.c5
-rw-r--r--sys/powerpc/booke/booke_machdep.c (renamed from sys/powerpc/booke/machdep.c)369
-rw-r--r--sys/powerpc/booke/pmap.c7
-rw-r--r--sys/powerpc/conf/GENERIC1
-rw-r--r--sys/powerpc/ofw/ofw_machdep.c23
-rw-r--r--sys/powerpc/powerpc/busdma_machdep.c4
-rw-r--r--sys/powerpc/powerpc/machdep.c502
-rw-r--r--sys/powerpc/powerpc/uma_machdep.c (renamed from sys/powerpc/aim/uma_machdep.c)0
-rw-r--r--sys/sparc64/pci/apb.c10
-rw-r--r--sys/sys/cdefs.h24
-rw-r--r--sys/sys/nv.h124
-rw-r--r--sys/sys/nv_impl.h51
-rw-r--r--sys/sys/nvlist_impl.h4
-rw-r--r--sys/sys/param.h16
-rw-r--r--sys/sys/procctl.h2
-rw-r--r--sys/sys/racct.h4
-rw-r--r--sys/sys/seq.h8
-rw-r--r--sys/sys/systm.h1
-rw-r--r--sys/sys/zlib.h (renamed from sys/net/zlib.h)0
-rw-r--r--sys/sys/zutil.h (renamed from sys/net/zutil.h)2
-rw-r--r--sys/ufs/ffs/ffs_alloc.c78
-rw-r--r--sys/ufs/ffs/ffs_vfsops.c1
-rw-r--r--sys/ufs/ufs/inode.h2
-rw-r--r--sys/vm/sg_pager.c2
-rw-r--r--sys/vm/swap_pager.c12
-rw-r--r--sys/vm/uma_int.h2
-rw-r--r--sys/vm/vm_fault.c4
-rw-r--r--sys/vm/vm_map.c55
-rw-r--r--sys/vm/vm_mmap.c32
-rw-r--r--sys/vm/vm_page.c186
-rw-r--r--sys/vm/vm_pageout.c59
-rw-r--r--sys/vm/vm_unix.c81
-rw-r--r--sys/x86/acpica/srat.c2
-rw-r--r--sys/x86/include/apicvar.h1
-rw-r--r--sys/x86/include/mca.h2
-rw-r--r--sys/x86/include/segments.h8
-rw-r--r--sys/x86/pci/qpi.c2
-rw-r--r--sys/x86/x86/busdma_bounce.c9
-rw-r--r--sys/x86/x86/cpu_machdep.c53
-rw-r--r--sys/x86/x86/identcpu.c4
-rw-r--r--sys/x86/x86/intr_machdep.c7
-rw-r--r--sys/x86/x86/local_apic.c6
-rw-r--r--sys/x86/x86/mca.c2
-rw-r--r--sys/x86/x86/mp_x86.c1120
-rw-r--r--sys/x86/xen/xen_intr.c4
-rw-r--r--sys/x86/xen/xen_nexus.c6
335 files changed, 13087 insertions, 18513 deletions
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S
index c3aac33..4455cab 100644
--- a/sys/amd64/amd64/apic_vector.S
+++ b/sys/amd64/amd64/apic_vector.S
@@ -174,6 +174,22 @@ IDTVEC(xen_intr_upcall)
jmp doreti
#endif
+#ifdef HYPERV
+/*
+ * This is the Hyper-V vmbus channel direct callback interrupt.
+ * Only used when it is running on Hyper-V.
+ */
+ .text
+ SUPERALIGN_TEXT
+IDTVEC(hv_vmbus_callback)
+ PUSH_FRAME
+ FAKE_MCOUNT(TF_RIP(%rsp))
+ movq %rsp, %rdi
+ call hv_vector_handler
+ MEXITCOUNT
+ jmp doreti
+#endif
+
#ifdef SMP
/*
* Global address space TLB shootdown.
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index c81495a..83ca548 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -81,28 +81,11 @@ __FBSDID("$FreeBSD$");
#define BIOS_RESET (0x0f)
#define BIOS_WARM (0x0a)
-/* lock region used by kernel profiling */
-int mcount_lock;
-
-int mp_naps; /* # of Applications processors */
-int boot_cpu_id = -1; /* designated BSP */
-
-extern struct pcpu __pcpu[];
-
-/* AP uses this during bootstrap. Do not staticize. */
-char *bootSTK;
-int bootAP;
-
-/* Free these after use */
-void *bootstacks[MAXCPU];
+extern struct pcpu __pcpu[];
/* Temporary variables for init_secondary() */
char *doublefault_stack;
char *nmi_stack;
-void *dpcpu;
-
-struct pcb stoppcbs[MAXCPU];
-struct susppcb **susppcbs;
/* Variables needed for SMP tlb shootdown. */
vm_offset_t smp_tlb_addr2;
@@ -112,309 +95,16 @@ uint64_t pcid_cr3;
pmap_t smp_tlb_pmap;
extern int invpcid_works;
-#ifdef COUNT_IPIS
-/* Interrupt counts. */
-static u_long *ipi_preempt_counts[MAXCPU];
-static u_long *ipi_ast_counts[MAXCPU];
-u_long *ipi_invltlb_counts[MAXCPU];
-u_long *ipi_invlrng_counts[MAXCPU];
-u_long *ipi_invlpg_counts[MAXCPU];
-u_long *ipi_invlcache_counts[MAXCPU];
-u_long *ipi_rendezvous_counts[MAXCPU];
-static u_long *ipi_hardclock_counts[MAXCPU];
-#endif
-
-/* Default cpu_ops implementation. */
-struct cpu_ops cpu_ops;
-
extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
-extern int pmap_pcid_enabled;
-
/*
* Local data and functions.
*/
-static volatile cpuset_t ipi_nmi_pending;
-
-/* used to hold the AP's until we are ready to release them */
-struct mtx ap_boot_mtx;
-
-/* Set to 1 once we're ready to let the APs out of the pen. */
-static volatile int aps_ready = 0;
-
-/*
- * Store data from cpu_add() until later in the boot when we actually setup
- * the APs.
- */
-struct cpu_info {
- int cpu_present:1;
- int cpu_bsp:1;
- int cpu_disabled:1;
- int cpu_hyperthread:1;
-} static cpu_info[MAX_APIC_ID + 1];
-int cpu_apic_ids[MAXCPU];
-int apic_cpuids[MAX_APIC_ID + 1];
-
-/* Holds pending bitmap based IPIs per CPU */
-volatile u_int cpu_ipi_pending[MAXCPU];
-
-static u_int boot_address;
-static int cpu_logical; /* logical cpus per core */
-static int cpu_cores; /* cores per package */
-
-static void assign_cpu_ids(void);
-static void set_interrupt_apic_ids(void);
static int start_ap(int apic_id);
-static void release_aps(void *dummy);
-static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */
-static int hyperthreading_allowed = 1;
static u_int bootMP_size;
-
-static void
-mem_range_AP_init(void)
-{
- if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
- mem_range_softc.mr_op->initAP(&mem_range_softc);
-}
-
-static void
-topo_probe_amd(void)
-{
- int core_id_bits;
- int id;
-
- /* AMD processors do not support HTT. */
- cpu_logical = 1;
-
- if ((amd_feature2 & AMDID2_CMP) == 0) {
- cpu_cores = 1;
- return;
- }
-
- core_id_bits = (cpu_procinfo2 & AMDID_COREID_SIZE) >>
- AMDID_COREID_SIZE_SHIFT;
- if (core_id_bits == 0) {
- cpu_cores = (cpu_procinfo2 & AMDID_CMP_CORES) + 1;
- return;
- }
-
- /* Fam 10h and newer should get here. */
- for (id = 0; id <= MAX_APIC_ID; id++) {
- /* Check logical CPU availability. */
- if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled)
- continue;
- /* Check if logical CPU has the same package ID. */
- if ((id >> core_id_bits) != (boot_cpu_id >> core_id_bits))
- continue;
- cpu_cores++;
- }
-}
-
-/*
- * Round up to the next power of two, if necessary, and then
- * take log2.
- * Returns -1 if argument is zero.
- */
-static __inline int
-mask_width(u_int x)
-{
-
- return (fls(x << (1 - powerof2(x))) - 1);
-}
-
-static void
-topo_probe_0x4(void)
-{
- u_int p[4];
- int pkg_id_bits;
- int core_id_bits;
- int max_cores;
- int max_logical;
- int id;
-
- /* Both zero and one here mean one logical processor per package. */
- max_logical = (cpu_feature & CPUID_HTT) != 0 ?
- (cpu_procinfo & CPUID_HTT_CORES) >> 16 : 1;
- if (max_logical <= 1)
- return;
-
- /*
- * Because of uniformity assumption we examine only
- * those logical processors that belong to the same
- * package as BSP. Further, we count number of
- * logical processors that belong to the same core
- * as BSP thus deducing number of threads per core.
- */
- if (cpu_high >= 0x4) {
- cpuid_count(0x04, 0, p);
- max_cores = ((p[0] >> 26) & 0x3f) + 1;
- } else
- max_cores = 1;
- core_id_bits = mask_width(max_logical/max_cores);
- if (core_id_bits < 0)
- return;
- pkg_id_bits = core_id_bits + mask_width(max_cores);
-
- for (id = 0; id <= MAX_APIC_ID; id++) {
- /* Check logical CPU availability. */
- if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled)
- continue;
- /* Check if logical CPU has the same package ID. */
- if ((id >> pkg_id_bits) != (boot_cpu_id >> pkg_id_bits))
- continue;
- cpu_cores++;
- /* Check if logical CPU has the same package and core IDs. */
- if ((id >> core_id_bits) == (boot_cpu_id >> core_id_bits))
- cpu_logical++;
- }
-
- KASSERT(cpu_cores >= 1 && cpu_logical >= 1,
- ("topo_probe_0x4 couldn't find BSP"));
-
- cpu_cores /= cpu_logical;
- hyperthreading_cpus = cpu_logical;
-}
-
-static void
-topo_probe_0xb(void)
-{
- u_int p[4];
- int bits;
- int cnt;
- int i;
- int logical;
- int type;
- int x;
-
- /* We only support three levels for now. */
- for (i = 0; i < 3; i++) {
- cpuid_count(0x0b, i, p);
-
- /* Fall back if CPU leaf 11 doesn't really exist. */
- if (i == 0 && p[1] == 0) {
- topo_probe_0x4();
- return;
- }
-
- bits = p[0] & 0x1f;
- logical = p[1] &= 0xffff;
- type = (p[2] >> 8) & 0xff;
- if (type == 0 || logical == 0)
- break;
- /*
- * Because of uniformity assumption we examine only
- * those logical processors that belong to the same
- * package as BSP.
- */
- for (cnt = 0, x = 0; x <= MAX_APIC_ID; x++) {
- if (!cpu_info[x].cpu_present ||
- cpu_info[x].cpu_disabled)
- continue;
- if (x >> bits == boot_cpu_id >> bits)
- cnt++;
- }
- if (type == CPUID_TYPE_SMT)
- cpu_logical = cnt;
- else if (type == CPUID_TYPE_CORE)
- cpu_cores = cnt;
- }
- if (cpu_logical == 0)
- cpu_logical = 1;
- cpu_cores /= cpu_logical;
-}
-
-/*
- * Both topology discovery code and code that consumes topology
- * information assume top-down uniformity of the topology.
- * That is, all physical packages must be identical and each
- * core in a package must have the same number of threads.
- * Topology information is queried only on BSP, on which this
- * code runs and for which it can query CPUID information.
- * Then topology is extrapolated on all packages using the
- * uniformity assumption.
- */
-static void
-topo_probe(void)
-{
- static int cpu_topo_probed = 0;
-
- if (cpu_topo_probed)
- return;
-
- CPU_ZERO(&logical_cpus_mask);
- if (mp_ncpus <= 1)
- cpu_cores = cpu_logical = 1;
- else if (cpu_vendor_id == CPU_VENDOR_AMD)
- topo_probe_amd();
- else if (cpu_vendor_id == CPU_VENDOR_INTEL) {
- /*
- * See Intel(R) 64 Architecture Processor
- * Topology Enumeration article for details.
- *
- * Note that 0x1 <= cpu_high < 4 case should be
- * compatible with topo_probe_0x4() logic when
- * CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1)
- * or it should trigger the fallback otherwise.
- */
- if (cpu_high >= 0xb)
- topo_probe_0xb();
- else if (cpu_high >= 0x1)
- topo_probe_0x4();
- }
-
- /*
- * Fallback: assume each logical CPU is in separate
- * physical package. That is, no multi-core, no SMT.
- */
- if (cpu_cores == 0 || cpu_logical == 0)
- cpu_cores = cpu_logical = 1;
- cpu_topo_probed = 1;
-}
-
-struct cpu_group *
-cpu_topo(void)
-{
- int cg_flags;
-
- /*
- * Determine whether any threading flags are
- * necessry.
- */
- topo_probe();
- if (cpu_logical > 1 && hyperthreading_cpus)
- cg_flags = CG_FLAG_HTT;
- else if (cpu_logical > 1)
- cg_flags = CG_FLAG_SMT;
- else
- cg_flags = 0;
- if (mp_ncpus % (cpu_cores * cpu_logical) != 0) {
- printf("WARNING: Non-uniform processors.\n");
- printf("WARNING: Using suboptimal topology.\n");
- return (smp_topo_none());
- }
- /*
- * No multi-core or hyper-threaded.
- */
- if (cpu_logical * cpu_cores == 1)
- return (smp_topo_none());
- /*
- * Only HTT no multi-core.
- */
- if (cpu_logical > 1 && cpu_cores == 1)
- return (smp_topo_1level(CG_SHARE_L1, cpu_logical, cg_flags));
- /*
- * Only multi-core no HTT.
- */
- if (cpu_cores > 1 && cpu_logical == 1)
- return (smp_topo_1level(CG_SHARE_L2, cpu_cores, cg_flags));
- /*
- * Both HTT and multi-core.
- */
- return (smp_topo_2level(CG_SHARE_L2, cpu_cores,
- CG_SHARE_L1, cpu_logical, cg_flags));
-}
+static u_int boot_address;
/*
* Calculate usable address in base memory for AP trampoline code.
@@ -433,85 +123,6 @@ mp_bootaddress(u_int basemem)
return mptramp_pagetables;
}
-void
-cpu_add(u_int apic_id, char boot_cpu)
-{
-
- if (apic_id > MAX_APIC_ID) {
- panic("SMP: APIC ID %d too high", apic_id);
- return;
- }
- KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
- apic_id));
- cpu_info[apic_id].cpu_present = 1;
- if (boot_cpu) {
- KASSERT(boot_cpu_id == -1,
- ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
- boot_cpu_id));
- boot_cpu_id = apic_id;
- cpu_info[apic_id].cpu_bsp = 1;
- }
- if (mp_ncpus < MAXCPU) {
- mp_ncpus++;
- mp_maxid = mp_ncpus - 1;
- }
- if (bootverbose)
- printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
- "AP");
-}
-
-void
-cpu_mp_setmaxid(void)
-{
-
- /*
- * mp_maxid should be already set by calls to cpu_add().
- * Just sanity check its value here.
- */
- if (mp_ncpus == 0)
- KASSERT(mp_maxid == 0,
- ("%s: mp_ncpus is zero, but mp_maxid is not", __func__));
- else if (mp_ncpus == 1)
- mp_maxid = 0;
- else
- KASSERT(mp_maxid >= mp_ncpus - 1,
- ("%s: counters out of sync: max %d, count %d", __func__,
- mp_maxid, mp_ncpus));
-}
-
-int
-cpu_mp_probe(void)
-{
-
- /*
- * Always record BSP in CPU map so that the mbuf init code works
- * correctly.
- */
- CPU_SETOF(0, &all_cpus);
- if (mp_ncpus == 0) {
- /*
- * No CPUs were found, so this must be a UP system. Setup
- * the variables to represent a system with a single CPU
- * with an id of 0.
- */
- mp_ncpus = 1;
- return (0);
- }
-
- /* At least one CPU was found. */
- if (mp_ncpus == 1) {
- /*
- * One CPU was found, so this must be a UP system with
- * an I/O APIC.
- */
- mp_maxid = 0;
- return (0);
- }
-
- /* At least two CPUs were found. */
- return (1);
-}
-
/*
* Initialize the IPI handlers and start up the AP's.
*/
@@ -575,47 +186,6 @@ cpu_mp_start(void)
/*
- * Print various information about the SMP system hardware and setup.
- */
-void
-cpu_mp_announce(void)
-{
- const char *hyperthread;
- int i;
-
- printf("FreeBSD/SMP: %d package(s) x %d core(s)",
- mp_ncpus / (cpu_cores * cpu_logical), cpu_cores);
- if (hyperthreading_cpus > 1)
- printf(" x %d HTT threads", cpu_logical);
- else if (cpu_logical > 1)
- printf(" x %d SMT threads", cpu_logical);
- printf("\n");
-
- /* List active CPUs first. */
- printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
- for (i = 1; i < mp_ncpus; i++) {
- if (cpu_info[cpu_apic_ids[i]].cpu_hyperthread)
- hyperthread = "/HT";
- else
- hyperthread = "";
- printf(" cpu%d (AP%s): APIC ID: %2d\n", i, hyperthread,
- cpu_apic_ids[i]);
- }
-
- /* List disabled CPUs last. */
- for (i = 0; i <= MAX_APIC_ID; i++) {
- if (!cpu_info[i].cpu_present || !cpu_info[i].cpu_disabled)
- continue;
- if (cpu_info[i].cpu_hyperthread)
- hyperthread = "/HT";
- else
- hyperthread = "";
- printf(" cpu (AP%s): APIC ID: %2d (disabled)\n", hyperthread,
- i);
- }
-}
-
-/*
* AP CPU's call this to initialize themselves.
*/
void
@@ -624,7 +194,6 @@ init_secondary(void)
struct pcpu *pc;
struct nmi_pcpu *np;
u_int64_t msr, cr0;
- u_int cpuid;
int cpu, gsel_tss, x;
struct region_descriptor ap_gdt;
@@ -712,94 +281,7 @@ init_secondary(void)
while (!aps_ready)
ia32_pause();
- /*
- * On real hardware, switch to x2apic mode if possible. Do it
- * after aps_ready was signalled, to avoid manipulating the
- * mode while BSP might still want to send some IPI to us
- * (second startup IPI is ignored on modern hardware etc).
- */
- lapic_xapic_mode();
-
- /* Initialize the PAT MSR. */
- pmap_init_pat();
-
- /* set up CPU registers and state */
- cpu_setregs();
-
- /* set up SSE/NX */
- initializecpu();
-
- /* set up FPU state on the AP */
- fpuinit();
-
- if (cpu_ops.cpu_init)
- cpu_ops.cpu_init();
-
- /* A quick check from sanity claus */
- cpuid = PCPU_GET(cpuid);
- if (PCPU_GET(apic_id) != lapic_id()) {
- printf("SMP: cpuid = %d\n", cpuid);
- printf("SMP: actual apic_id = %d\n", lapic_id());
- printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
- panic("cpuid mismatch! boom!!");
- }
-
- /* Initialize curthread. */
- KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
- PCPU_SET(curthread, PCPU_GET(idlethread));
-
- mca_init();
-
- mtx_lock_spin(&ap_boot_mtx);
-
- /* Init local apic for irq's */
- lapic_setup(1);
-
- /* Set memory range attributes for this CPU to match the BSP */
- mem_range_AP_init();
-
- smp_cpus++;
-
- CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid);
- printf("SMP: AP CPU #%d Launched!\n", cpuid);
-
- /* Determine if we are a logical CPU. */
- /* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */
- if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0)
- CPU_SET(cpuid, &logical_cpus_mask);
-
- if (bootverbose)
- lapic_dump("AP");
-
- if (smp_cpus == mp_ncpus) {
- /* enable IPI's, tlb shootdown, freezes etc */
- atomic_store_rel_int(&smp_started, 1);
- }
-
- /*
- * Enable global pages TLB extension
- * This also implicitly flushes the TLB
- */
-
- load_cr4(rcr4() | CR4_PGE);
- if (pmap_pcid_enabled)
- load_cr4(rcr4() | CR4_PCIDE);
- load_ds(_udatasel);
- load_es(_udatasel);
- load_fs(_ufssel);
- mtx_unlock_spin(&ap_boot_mtx);
-
- /* Wait until all the AP's are up. */
- while (smp_started == 0)
- ia32_pause();
-
- /* Start per-CPU event timers. */
- cpu_initclocks_ap();
-
- sched_throw(NULL);
-
- panic("scheduler returned us to %s", __func__);
- /* NOTREACHED */
+ init_secondary_tail();
}
/*******************************************************************
@@ -807,108 +289,6 @@ init_secondary(void)
*/
/*
- * We tell the I/O APIC code about all the CPUs we want to receive
- * interrupts. If we don't want certain CPUs to receive IRQs we
- * can simply not tell the I/O APIC code about them in this function.
- * We also do not tell it about the BSP since it tells itself about
- * the BSP internally to work with UP kernels and on UP machines.
- */
-static void
-set_interrupt_apic_ids(void)
-{
- u_int i, apic_id;
-
- for (i = 0; i < MAXCPU; i++) {
- apic_id = cpu_apic_ids[i];
- if (apic_id == -1)
- continue;
- if (cpu_info[apic_id].cpu_bsp)
- continue;
- if (cpu_info[apic_id].cpu_disabled)
- continue;
-
- /* Don't let hyperthreads service interrupts. */
- if (cpu_logical > 1 &&
- apic_id % cpu_logical != 0)
- continue;
-
- intr_add_cpu(i);
- }
-}
-
-/*
- * Assign logical CPU IDs to local APICs.
- */
-static void
-assign_cpu_ids(void)
-{
- u_int i;
-
- TUNABLE_INT_FETCH("machdep.hyperthreading_allowed",
- &hyperthreading_allowed);
-
- /* Check for explicitly disabled CPUs. */
- for (i = 0; i <= MAX_APIC_ID; i++) {
- if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
- continue;
-
- if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) {
- cpu_info[i].cpu_hyperthread = 1;
-
- /*
- * Don't use HT CPU if it has been disabled by a
- * tunable.
- */
- if (hyperthreading_allowed == 0) {
- cpu_info[i].cpu_disabled = 1;
- continue;
- }
- }
-
- /* Don't use this CPU if it has been disabled by a tunable. */
- if (resource_disabled("lapic", i)) {
- cpu_info[i].cpu_disabled = 1;
- continue;
- }
- }
-
- if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) {
- hyperthreading_cpus = 0;
- cpu_logical = 1;
- }
-
- /*
- * Assign CPU IDs to local APIC IDs and disable any CPUs
- * beyond MAXCPU. CPU 0 is always assigned to the BSP.
- *
- * To minimize confusion for userland, we attempt to number
- * CPUs such that all threads and cores in a package are
- * grouped together. For now we assume that the BSP is always
- * the first thread in a package and just start adding APs
- * starting with the BSP's APIC ID.
- */
- mp_ncpus = 1;
- cpu_apic_ids[0] = boot_cpu_id;
- apic_cpuids[boot_cpu_id] = 0;
- for (i = boot_cpu_id + 1; i != boot_cpu_id;
- i == MAX_APIC_ID ? i = 0 : i++) {
- if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
- cpu_info[i].cpu_disabled)
- continue;
-
- if (mp_ncpus < MAXCPU) {
- cpu_apic_ids[mp_ncpus] = i;
- apic_cpuids[i] = mp_ncpus;
- mp_ncpus++;
- } else
- cpu_info[i].cpu_disabled = 1;
- }
- KASSERT(mp_maxid >= mp_ncpus - 1,
- ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
- mp_ncpus));
-}
-
-/*
* start each AP in our list
*/
int
@@ -1026,129 +406,6 @@ start_ap(int apic_id)
return 0; /* return FAILURE */
}
-#ifdef COUNT_XINVLTLB_HITS
-u_int xhits_gbl[MAXCPU];
-u_int xhits_pg[MAXCPU];
-u_int xhits_rng[MAXCPU];
-static SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
-SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
- sizeof(xhits_gbl), "IU", "");
-SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
- sizeof(xhits_pg), "IU", "");
-SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
- sizeof(xhits_rng), "IU", "");
-
-u_int ipi_global;
-u_int ipi_page;
-u_int ipi_range;
-u_int ipi_range_size;
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW,
- &ipi_range_size, 0, "");
-
-u_int ipi_masked_global;
-u_int ipi_masked_page;
-u_int ipi_masked_range;
-u_int ipi_masked_range_size;
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
- &ipi_masked_global, 0, "");
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
- &ipi_masked_page, 0, "");
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
- &ipi_masked_range, 0, "");
-SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
- &ipi_masked_range_size, 0, "");
-#endif /* COUNT_XINVLTLB_HITS */
-
-/*
- * Init and startup IPI.
- */
-void
-ipi_startup(int apic_id, int vector)
-{
-
- /*
- * This attempts to follow the algorithm described in the
- * Intel Multiprocessor Specification v1.4 in section B.4.
- * For each IPI, we allow the local APIC ~20us to deliver the
- * IPI. If that times out, we panic.
- */
-
- /*
- * first we do an INIT IPI: this INIT IPI might be run, resetting
- * and running the target CPU. OR this INIT IPI might be latched (P5
- * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
- * ignored.
- */
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
- APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
- lapic_ipi_wait(100);
-
- /* Explicitly deassert the INIT IPI. */
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
- APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT,
- apic_id);
-
- DELAY(10000); /* wait ~10mS */
-
- /*
- * next we do a STARTUP IPI: the previous INIT IPI might still be
- * latched, (P5 bug) this 1st STARTUP would then terminate
- * immediately, and the previously started INIT IPI would continue. OR
- * the previous INIT IPI has already run. and this STARTUP IPI will
- * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
- * will run.
- */
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
- APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
- vector, apic_id);
- if (!lapic_ipi_wait(100))
- panic("Failed to deliver first STARTUP IPI to APIC %d",
- apic_id);
- DELAY(200); /* wait ~200uS */
-
- /*
- * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
- * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
- * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
- * recognized after hardware RESET or INIT IPI.
- */
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
- APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
- vector, apic_id);
- if (!lapic_ipi_wait(100))
- panic("Failed to deliver second STARTUP IPI to APIC %d",
- apic_id);
-
- DELAY(200); /* wait ~200uS */
-}
-
-/*
- * Send an IPI to specified CPU handling the bitmap logic.
- */
-static void
-ipi_send_cpu(int cpu, u_int ipi)
-{
- u_int bitmap, old_pending, new_pending;
-
- KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
-
- if (IPI_IS_BITMAPED(ipi)) {
- bitmap = 1 << ipi;
- ipi = IPI_BITMAP_VECTOR;
- do {
- old_pending = cpu_ipi_pending[cpu];
- new_pending = old_pending | bitmap;
- } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
- old_pending, new_pending));
- if (old_pending)
- return;
- }
- lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
-}
-
/*
* Flush the TLB on all other CPU's
*/
@@ -1228,26 +485,6 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
}
void
-smp_cache_flush(void)
-{
-
- if (smp_started)
- smp_tlb_shootdown(IPI_INVLCACHE, NULL, 0, 0);
-}
-
-void
-smp_invltlb(pmap_t pmap)
-{
-
- if (smp_started) {
- smp_tlb_shootdown(IPI_INVLTLB, pmap, 0, 0);
-#ifdef COUNT_XINVLTLB_HITS
- ipi_global++;
-#endif
- }
-}
-
-void
smp_invlpg(pmap_t pmap, vm_offset_t addr)
{
@@ -1312,210 +549,23 @@ smp_masked_invlpg_range(cpuset_t mask, pmap_t pmap, vm_offset_t addr1,
}
void
-ipi_bitmap_handler(struct trapframe frame)
-{
- struct trapframe *oldframe;
- struct thread *td;
- int cpu = PCPU_GET(cpuid);
- u_int ipi_bitmap;
-
- critical_enter();
- td = curthread;
- td->td_intr_nesting_level++;
- oldframe = td->td_intr_frame;
- td->td_intr_frame = &frame;
- ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
- if (ipi_bitmap & (1 << IPI_PREEMPT)) {
-#ifdef COUNT_IPIS
- (*ipi_preempt_counts[cpu])++;
-#endif
- sched_preempt(td);
- }
- if (ipi_bitmap & (1 << IPI_AST)) {
-#ifdef COUNT_IPIS
- (*ipi_ast_counts[cpu])++;
-#endif
- /* Nothing to do for AST */
- }
- if (ipi_bitmap & (1 << IPI_HARDCLOCK)) {
-#ifdef COUNT_IPIS
- (*ipi_hardclock_counts[cpu])++;
-#endif
- hardclockintr();
- }
- td->td_intr_frame = oldframe;
- td->td_intr_nesting_level--;
- critical_exit();
-}
-
-/*
- * send an IPI to a set of cpus.
- */
-void
-ipi_selected(cpuset_t cpus, u_int ipi)
-{
- int cpu;
-
- /*
- * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
- * of help in order to understand what is the source.
- * Set the mask of receiving CPUs for this purpose.
- */
- if (ipi == IPI_STOP_HARD)
- CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
-
- while ((cpu = CPU_FFS(&cpus)) != 0) {
- cpu--;
- CPU_CLR(cpu, &cpus);
- CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
- ipi_send_cpu(cpu, ipi);
- }
-}
-
-/*
- * send an IPI to a specific CPU.
- */
-void
-ipi_cpu(int cpu, u_int ipi)
-{
-
- /*
- * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
- * of help in order to understand what is the source.
- * Set the mask of receiving CPUs for this purpose.
- */
- if (ipi == IPI_STOP_HARD)
- CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
-
- CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
- ipi_send_cpu(cpu, ipi);
-}
-
-/*
- * send an IPI to all CPUs EXCEPT myself
- */
-void
-ipi_all_but_self(u_int ipi)
+smp_cache_flush(void)
{
- cpuset_t other_cpus;
-
- other_cpus = all_cpus;
- CPU_CLR(PCPU_GET(cpuid), &other_cpus);
- if (IPI_IS_BITMAPED(ipi)) {
- ipi_selected(other_cpus, ipi);
- return;
- }
-
- /*
- * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
- * of help in order to understand what is the source.
- * Set the mask of receiving CPUs for this purpose.
- */
- if (ipi == IPI_STOP_HARD)
- CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
-
- CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
- lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
+ if (smp_started)
+ smp_tlb_shootdown(IPI_INVLCACHE, NULL, 0, 0);
}
-int
-ipi_nmi_handler()
-{
- u_int cpuid;
-
- /*
- * As long as there is not a simple way to know about a NMI's
- * source, if the bitmask for the current CPU is present in
- * the global pending bitword an IPI_STOP_HARD has been issued
- * and should be handled.
- */
- cpuid = PCPU_GET(cpuid);
- if (!CPU_ISSET(cpuid, &ipi_nmi_pending))
- return (1);
-
- CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending);
- cpustop_handler();
- return (0);
-}
-
-/*
- * Handle an IPI_STOP by saving our current context and spinning until we
- * are resumed.
- */
void
-cpustop_handler(void)
-{
- u_int cpu;
-
- cpu = PCPU_GET(cpuid);
-
- savectx(&stoppcbs[cpu]);
-
- /* Indicate that we are stopped */
- CPU_SET_ATOMIC(cpu, &stopped_cpus);
-
- /* Wait for restart */
- while (!CPU_ISSET(cpu, &started_cpus))
- ia32_pause();
-
- CPU_CLR_ATOMIC(cpu, &started_cpus);
- CPU_CLR_ATOMIC(cpu, &stopped_cpus);
+smp_invltlb(pmap_t pmap)
+{
-#ifdef DDB
- amd64_db_resume_dbreg();
+ if (smp_started) {
+ smp_tlb_shootdown(IPI_INVLTLB, pmap, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+ ipi_global++;
#endif
-
- if (cpu == 0 && cpustop_restartfunc != NULL) {
- cpustop_restartfunc();
- cpustop_restartfunc = NULL;
- }
-}
-
-/*
- * Handle an IPI_SUSPEND by saving our current context and spinning until we
- * are resumed.
- */
-void
-cpususpend_handler(void)
-{
- u_int cpu;
-
- mtx_assert(&smp_ipi_mtx, MA_NOTOWNED);
-
- cpu = PCPU_GET(cpuid);
- if (savectx(&susppcbs[cpu]->sp_pcb)) {
- fpususpend(susppcbs[cpu]->sp_fpususpend);
- wbinvd();
- CPU_SET_ATOMIC(cpu, &suspended_cpus);
- } else {
- fpuresume(susppcbs[cpu]->sp_fpususpend);
- pmap_init_pat();
- initializecpu();
- PCPU_SET(switchtime, 0);
- PCPU_SET(switchticks, ticks);
-
- /* Indicate that we are resumed */
- CPU_CLR_ATOMIC(cpu, &suspended_cpus);
}
-
- /* Wait for resume */
- while (!CPU_ISSET(cpu, &started_cpus))
- ia32_pause();
-
- if (cpu_ops.cpu_resume)
- cpu_ops.cpu_resume();
- if (vmm_resume_p)
- vmm_resume_p();
-
- /* Resume MCA and local APIC */
- lapic_xapic_mode();
- mca_resume();
- lapic_setup(0);
-
- CPU_CLR_ATOMIC(cpu, &started_cpus);
- /* Indicate that we are resumed */
- CPU_CLR_ATOMIC(cpu, &suspended_cpus);
}
/*
@@ -1678,63 +728,3 @@ invlrng_handler(void)
atomic_add_int(&smp_tlb_wait, 1);
}
-
-void
-invlcache_handler(void)
-{
-#ifdef COUNT_IPIS
- (*ipi_invlcache_counts[PCPU_GET(cpuid)])++;
-#endif /* COUNT_IPIS */
-
- wbinvd();
- atomic_add_int(&smp_tlb_wait, 1);
-}
-
-/*
- * This is called once the rest of the system is up and running and we're
- * ready to let the AP's out of the pen.
- */
-static void
-release_aps(void *dummy __unused)
-{
-
- if (mp_ncpus == 1)
- return;
- atomic_store_rel_int(&aps_ready, 1);
- while (smp_started == 0)
- ia32_pause();
-}
-SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
-
-#ifdef COUNT_IPIS
-/*
- * Setup interrupt counters for IPI handlers.
- */
-static void
-mp_ipi_intrcnt(void *dummy)
-{
- char buf[64];
- int i;
-
- CPU_FOREACH(i) {
- snprintf(buf, sizeof(buf), "cpu%d:invltlb", i);
- intrcnt_add(buf, &ipi_invltlb_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:invlrng", i);
- intrcnt_add(buf, &ipi_invlrng_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:invlpg", i);
- intrcnt_add(buf, &ipi_invlpg_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:invlcache", i);
- intrcnt_add(buf, &ipi_invlcache_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:preempt", i);
- intrcnt_add(buf, &ipi_preempt_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:ast", i);
- intrcnt_add(buf, &ipi_ast_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i);
- intrcnt_add(buf, &ipi_rendezvous_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:hardclock", i);
- intrcnt_add(buf, &ipi_hardclock_counts[i]);
- }
-}
-SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
-#endif
-
diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC
index bdaca33..c24dd5a 100644
--- a/sys/amd64/conf/GENERIC
+++ b/sys/amd64/conf/GENERIC
@@ -340,7 +340,9 @@ device virtio_blk # VirtIO Block device
device virtio_scsi # VirtIO SCSI device
device virtio_balloon # VirtIO Memory Balloon device
-# HyperV drivers
+# HyperV drivers and enchancement support
+# NOTE: HYPERV depends on hyperv. They must be added or removed together.
+options HYPERV # Hyper-V kernel infrastructure
device hyperv # HyperV drivers
# Xen HVM Guest Optimizations
diff --git a/sys/amd64/conf/NOTES b/sys/amd64/conf/NOTES
index 9b697f0..e0fe465 100644
--- a/sys/amd64/conf/NOTES
+++ b/sys/amd64/conf/NOTES
@@ -494,6 +494,8 @@ device virtio_balloon # VirtIO Memory Balloon device
device virtio_random # VirtIO Entropy device
device virtio_console # VirtIO Console device
+# Microsoft Hyper-V enchancement support
+options HYPERV # Hyper-V kernel infrastructure
device hyperv # HyperV drivers
# Xen HVM Guest Optimizations
diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h
index 3a4b6b3..034a693 100644
--- a/sys/amd64/include/smp.h
+++ b/sys/amd64/include/smp.h
@@ -35,6 +35,39 @@ extern int mp_naps;
extern int boot_cpu_id;
extern struct pcb stoppcbs[];
extern int cpu_apic_ids[];
+extern int bootAP;
+extern void *dpcpu;
+extern char *bootSTK;
+extern int bootAP;
+extern void *bootstacks[];
+extern volatile u_int cpu_ipi_pending[];
+extern volatile int aps_ready;
+extern struct mtx ap_boot_mtx;
+extern int cpu_logical;
+extern int cpu_cores;
+extern int pmap_pcid_enabled;
+extern u_int xhits_gbl[];
+extern u_int xhits_pg[];
+extern u_int xhits_rng[];
+extern u_int ipi_global;
+extern u_int ipi_page;
+extern u_int ipi_range;
+extern u_int ipi_range_size;
+extern u_int ipi_masked_global;
+extern u_int ipi_masked_page;
+extern u_int ipi_masked_range;
+extern u_int ipi_masked_range_size;
+
+extern volatile int smp_tlb_wait;
+
+struct cpu_info {
+ int cpu_present:1;
+ int cpu_bsp:1;
+ int cpu_disabled:1;
+ int cpu_hyperthread:1;
+};
+extern struct cpu_info cpu_info[];
+
#ifdef COUNT_IPIS
extern u_long *ipi_invltlb_counts[MAXCPU];
extern u_long *ipi_invlrng_counts[MAXCPU];
@@ -60,9 +93,11 @@ inthand_t
struct pmap;
/* functions in mp_machdep.c */
+void assign_cpu_ids(void);
void cpu_add(u_int apic_id, char boot_cpu);
void cpustop_handler(void);
void cpususpend_handler(void);
+void init_secondary_tail(void);
void invltlb_handler(void);
void invltlb_pcid_handler(void);
void invlpg_handler(void);
@@ -77,6 +112,7 @@ void ipi_cpu(int cpu, u_int ipi);
int ipi_nmi_handler(void);
void ipi_selected(cpuset_t cpus, u_int ipi);
u_int mp_bootaddress(u_int);
+void set_interrupt_apic_ids(void);
void smp_cache_flush(void);
void smp_invlpg(struct pmap *pmap, vm_offset_t addr);
void smp_masked_invlpg(cpuset_t mask, struct pmap *pmap, vm_offset_t addr);
@@ -87,6 +123,9 @@ void smp_masked_invlpg_range(cpuset_t mask, struct pmap *pmap,
void smp_invltlb(struct pmap *pmap);
void smp_masked_invltlb(cpuset_t mask, struct pmap *pmap);
int native_start_all_aps(void);
+void mem_range_AP_init(void);
+void topo_probe(void);
+void ipi_send_cpu(int cpu, u_int ipi);
#endif /* !LOCORE */
#endif /* SMP */
diff --git a/sys/amd64/include/vm.h b/sys/amd64/include/vm.h
index 6573e37..22d2eca 100644
--- a/sys/amd64/include/vm.h
+++ b/sys/amd64/include/vm.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009 Advanced Computing Technologies LLC
+ * Copyright (c) 2009 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
*
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index 52294bd..7c617be 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -204,13 +204,12 @@ int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state);
int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state);
int vm_apicid2vcpuid(struct vm *vm, int apicid);
int vm_activate_cpu(struct vm *vm, int vcpu);
-cpuset_t vm_active_cpus(struct vm *vm);
-cpuset_t vm_suspended_cpus(struct vm *vm);
struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
+#ifdef _SYS__CPUSET_H_
/*
* Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
* The rendezvous 'func(arg)' is not allowed to do anything that will
@@ -228,6 +227,9 @@ void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg);
void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
vm_rendezvous_func_t func, void *arg);
+cpuset_t vm_active_cpus(struct vm *vm);
+cpuset_t vm_suspended_cpus(struct vm *vm);
+#endif /* _SYS__CPUSET_H_ */
static __inline int
vcpu_rendezvous_pending(void *rendezvous_cookie)
diff --git a/sys/amd64/include/xen/xenfunc.h b/sys/amd64/include/xen/xenfunc.h
index d03d4f6..d8a6b5c 100644
--- a/sys/amd64/include/xen/xenfunc.h
+++ b/sys/amd64/include/xen/xenfunc.h
@@ -29,12 +29,7 @@
#ifndef _XEN_XENFUNC_H_
#define _XEN_XENFUNC_H_
-#ifdef XENHVM
#include <machine/xen/xenvar.h>
-#else
-#include <machine/xen/xenpmap.h>
-#include <machine/segments.h>
-#endif
#define BKPT __asm__("int3");
#define XPQ_CALL_DEPTH 5
@@ -64,10 +59,6 @@ void _xen_machphys_update(vm_paddr_t, vm_paddr_t, char *file, int line);
#define xen_machphys_update(a, b) _xen_machphys_update((a), (b), NULL, 0)
#endif
-#ifndef XENHVM
-void xen_update_descriptor(union descriptor *, union descriptor *);
-#endif
-
extern struct mtx balloon_lock;
#if 0
#define balloon_lock(__flags) mtx_lock_irqsave(&balloon_lock, __flags)
diff --git a/sys/amd64/include/xen/xenpmap.h b/sys/amd64/include/xen/xenpmap.h
deleted file mode 100644
index d768dad..0000000
--- a/sys/amd64/include/xen/xenpmap.h
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * Copyright (c) 2004,2005 Kip Macy
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#ifndef _XEN_XENPMAP_H_
-#define _XEN_XENPMAP_H_
-
-#include <machine/xen/features.h>
-
-void _xen_queue_pt_update(vm_paddr_t, vm_paddr_t, char *, int);
-void xen_pt_switch(vm_paddr_t);
-void xen_set_ldt(vm_paddr_t, unsigned long);
-void xen_pgdpt_pin(vm_paddr_t);
-void xen_pgd_pin(vm_paddr_t);
-void xen_pgd_unpin(vm_paddr_t);
-void xen_pt_pin(vm_paddr_t);
-void xen_pt_unpin(vm_paddr_t);
-void xen_flush_queue(void);
-void xen_check_queue(void);
-#if 0
-void pmap_ref(pt_entry_t *pte, vm_paddr_t ma);
-#endif
-
-#ifdef INVARIANTS
-#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), __FILE__, __LINE__)
-#else
-#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), NULL, 0)
-#endif
-
-#ifdef PMAP_DEBUG
-#define PMAP_REF pmap_ref
-#define PMAP_DEC_REF_PAGE pmap_dec_ref_page
-#define PMAP_MARK_PRIV pmap_mark_privileged
-#define PMAP_MARK_UNPRIV pmap_mark_unprivileged
-#else
-#define PMAP_MARK_PRIV(a)
-#define PMAP_MARK_UNPRIV(a)
-#define PMAP_REF(a, b)
-#define PMAP_DEC_REF_PAGE(a)
-#endif
-
-#define ALWAYS_SYNC 0
-
-#ifdef PT_DEBUG
-#define PT_LOG() printk("WP PT_SET %s:%d\n", __FILE__, __LINE__)
-#else
-#define PT_LOG()
-#endif
-
-#define INVALID_P2M_ENTRY (~0UL)
-
-#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */
-
-#define SH_PD_SET_VA 1
-#define SH_PD_SET_VA_MA 2
-#define SH_PD_SET_VA_CLEAR 3
-
-struct pmap;
-void pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type);
-#ifdef notyet
-static vm_paddr_t
-vptetomachpte(vm_paddr_t *pte)
-{
- vm_offset_t offset, ppte;
- vm_paddr_t pgoffset, retval, *pdir_shadow_ptr;
- int pgindex;
-
- ppte = (vm_offset_t)pte;
- pgoffset = (ppte & PAGE_MASK);
- offset = ppte - (vm_offset_t)PTmap;
- pgindex = ppte >> PDRSHIFT;
-
- pdir_shadow_ptr = (vm_paddr_t *)PCPU_GET(pdir_shadow);
- retval = (pdir_shadow_ptr[pgindex] & ~PAGE_MASK) + pgoffset;
- return (retval);
-}
-#endif
-#define PT_GET(_ptp) \
- (pmap_valid_entry(*(_ptp)) ? xpmap_mtop(*(_ptp)) : (0))
-
-#ifdef WRITABLE_PAGETABLES
-
-#define PT_SET_VA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- PT_LOG(); \
- *(_ptp) = xpmap_ptom((_npte)); \
-} while (/*CONSTCOND*/0)
-#define PT_SET_VA_MA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- PT_LOG(); \
- *(_ptp) = (_npte); \
-} while (/*CONSTCOND*/0)
-#define PT_CLEAR_VA(_ptp, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- PT_LOG(); \
- *(_ptp) = 0; \
-} while (/*CONSTCOND*/0)
-
-#define PD_SET_VA(_pmap, _ptp, _npte, sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_SET_VA_MA(_pmap, _ptp, _npte, sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA_MA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_CLEAR_VA(_pmap, _ptp, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- pd_set((_pmap),(_ptp), 0, SH_PD_SET_VA_CLEAR); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-#else /* !WRITABLE_PAGETABLES */
-
-#define PT_SET_VA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- xen_queue_pt_update(vtomach(_ptp), \
- xpmap_ptom(_npte)); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PT_SET_VA_MA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- xen_queue_pt_update(vtomach(_ptp), _npte); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PT_CLEAR_VA(_ptp, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- xen_queue_pt_update(vtomach(_ptp), 0); \
- if (sync || ALWAYS_SYNC) \
- xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-#define PD_SET_VA(_pmap, _ptepindex,_npte,sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_SET_VA_MA(_pmap, _ptepindex,_npte,sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA_MA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_CLEAR_VA(_pmap, _ptepindex, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- pd_set((_pmap),(_ptepindex), 0, SH_PD_SET_VA_CLEAR); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-#endif
-
-#define PT_SET_MA(_va, _ma) \
-do { \
- PANIC_IF(HYPERVISOR_update_va_mapping(((unsigned long)(_va)),\
- (_ma), \
- UVMF_INVLPG| UVMF_ALL) < 0); \
-} while (/*CONSTCOND*/0)
-
-#define PT_UPDATES_FLUSH() do { \
- xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-static __inline vm_paddr_t
-xpmap_mtop(vm_paddr_t mpa)
-{
- vm_paddr_t tmp = (mpa & PG_FRAME);
-
- return machtophys(tmp) | (mpa & ~PG_FRAME);
-}
-
-static __inline vm_paddr_t
-xpmap_ptom(vm_paddr_t ppa)
-{
- vm_paddr_t tmp = (ppa & PG_FRAME);
-
- return phystomach(tmp) | (ppa & ~PG_FRAME);
-}
-
-static __inline void
-set_phys_to_machine(unsigned long pfn, unsigned long mfn)
-{
-#ifdef notyet
- PANIC_IF(max_mapnr && pfn >= max_mapnr);
-#endif
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
-#ifdef notyet
- PANIC_IF((pfn != mfn && mfn != INVALID_P2M_ENTRY));
-#endif
- return;
- }
- xen_phys_machine[pfn] = mfn;
-}
-
-
-
-
-#endif /* _XEN_XENPMAP_H_ */
diff --git a/sys/amd64/include/xen/xenvar.h b/sys/amd64/include/xen/xenvar.h
index d9dbc5d..110a351 100644
--- a/sys/amd64/include/xen/xenvar.h
+++ b/sys/amd64/include/xen/xenvar.h
@@ -48,68 +48,7 @@ if (xendebug_flags & argflags) XENPRINTF("(file=%s, line=%d) " _f "\n", __FILE__
#define TRACE_DEBUG(argflags, _f, _a...)
#endif
-#ifdef XENHVM
-
-static inline vm_paddr_t
-phystomach(vm_paddr_t pa)
-{
-
- return (pa);
-}
-
-static inline vm_paddr_t
-machtophys(vm_paddr_t ma)
-{
-
- return (ma);
-}
-
#define vtomach(va) pmap_kextract((vm_offset_t) (va))
-#define PFNTOMFN(pa) (pa)
-#define MFNTOPFN(ma) (ma)
-
-#define set_phys_to_machine(pfn, mfn) ((void)0)
-#define phys_to_machine_mapping_valid(pfn) (TRUE)
-#define PT_UPDATES_FLUSH() ((void)0)
-
-#else
-
-extern xen_pfn_t *xen_phys_machine;
-
-
-extern xen_pfn_t *xen_machine_phys;
-/* Xen starts physical pages after the 4MB ISA hole -
- * FreeBSD doesn't
- */
-
-
-#undef ADD_ISA_HOLE /* XXX */
-
-#ifdef ADD_ISA_HOLE
-#define ISA_INDEX_OFFSET 1024
-#define ISA_PDR_OFFSET 1
-#else
-#define ISA_INDEX_OFFSET 0
-#define ISA_PDR_OFFSET 0
-#endif
-
-
-#define PFNTOMFN(i) (xen_phys_machine[(i)])
-#define MFNTOPFN(i) ((vm_paddr_t)xen_machine_phys[(i)])
-
-#define VTOP(x) ((((uintptr_t)(x))) - KERNBASE)
-#define PTOV(x) (((uintptr_t)(x)) + KERNBASE)
-
-#define VTOPFN(x) (VTOP(x) >> PAGE_SHIFT)
-#define PFNTOV(x) PTOV((vm_paddr_t)(x) << PAGE_SHIFT)
-
-#define VTOMFN(va) (vtomach(va) >> PAGE_SHIFT)
-#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-
-#define phystomach(pa) (((vm_paddr_t)(PFNTOMFN((pa) >> PAGE_SHIFT))) << PAGE_SHIFT)
-#define machtophys(ma) (((vm_paddr_t)(MFNTOPFN((ma) >> PAGE_SHIFT))) << PAGE_SHIFT)
-
-#endif
void xpq_init(void);
diff --git a/sys/amd64/vmm/amd/amdv.c b/sys/amd64/vmm/amd/amdv.c
index acb3a3d..3157e21 100644
--- a/sys/amd64/vmm/amd/amdv.c
+++ b/sys/amd64/vmm/amd/amdv.c
@@ -32,7 +32,6 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/errno.h>
-#include <sys/smp.h>
#include <machine/vmm.h>
#include "io/iommu.h"
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
index f505ea1..7cc13ca 100644
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -802,6 +802,7 @@ svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit)
case CPU_MODE_REAL:
vmexit->u.inst_emul.cs_base = seg.base;
vmexit->u.inst_emul.cs_d = 0;
+ break;
case CPU_MODE_PROTECTED:
case CPU_MODE_COMPATIBILITY:
vmexit->u.inst_emul.cs_base = seg.base;
diff --git a/sys/amd64/vmm/amd/svm_msr.c b/sys/amd64/vmm/amd/svm_msr.c
index 100af4b..d3a6fe8 100644
--- a/sys/amd64/vmm/amd/svm_msr.c
+++ b/sys/amd64/vmm/amd/svm_msr.c
@@ -27,12 +27,17 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <sys/types.h>
+#include <sys/param.h>
#include <sys/errno.h>
+#include <sys/systm.h>
#include <machine/cpufunc.h>
#include <machine/specialreg.h>
+#include <machine/vmm.h>
+#include "svm.h"
+#include "vmcb.h"
+#include "svm_softc.h"
#include "svm_msr.h"
#ifndef MSR_AMDK8_IPM
@@ -105,6 +110,14 @@ svm_rdmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t *result,
int error = 0;
switch (num) {
+ case MSR_MTRRcap:
+ case MSR_MTRRdefType:
+ case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
+ case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
+ case MSR_MTRR64kBase:
+ case MSR_SYSCFG:
+ *result = 0;
+ break;
case MSR_AMDK8_IPM:
*result = 0;
break;
@@ -122,6 +135,15 @@ svm_wrmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t val, bool *retu)
int error = 0;
switch (num) {
+ case MSR_MTRRcap:
+ vm_inject_gp(sc->vm, vcpu);
+ break;
+ case MSR_MTRRdefType:
+ case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
+ case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
+ case MSR_MTRR64kBase:
+ case MSR_SYSCFG:
+ break; /* Ignore writes */
case MSR_AMDK8_IPM:
/*
* Ignore writes to the "Interrupt Pending Message" MSR.
diff --git a/sys/amd64/vmm/amd/vmcb.c b/sys/amd64/vmm/amd/vmcb.c
index fb4b2c8..d860169 100644
--- a/sys/amd64/vmm/amd/vmcb.c
+++ b/sys/amd64/vmm/amd/vmcb.c
@@ -29,7 +29,6 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
-#include <sys/cpuset.h>
#include <machine/segments.h>
#include <machine/specialreg.h>
diff --git a/sys/amd64/vmm/intel/vmx_msr.c b/sys/amd64/vmm/intel/vmx_msr.c
index e517778..526b0d1 100644
--- a/sys/amd64/vmm/intel/vmx_msr.c
+++ b/sys/amd64/vmm/intel/vmx_msr.c
@@ -31,7 +31,6 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
-#include <sys/cpuset.h>
#include <machine/clock.h>
#include <machine/cpufunc.h>
@@ -396,6 +395,13 @@ vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
error = 0;
switch (num) {
+ case MSR_MTRRcap:
+ case MSR_MTRRdefType:
+ case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
+ case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
+ case MSR_MTRR64kBase:
+ *val = 0;
+ break;
case MSR_IA32_MISC_ENABLE:
*val = misc_enable;
break;
@@ -427,6 +433,14 @@ vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
error = 0;
switch (num) {
+ case MSR_MTRRcap:
+ vm_inject_gp(vmx->vm, vcpuid);
+ break;
+ case MSR_MTRRdefType:
+ case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
+ case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
+ case MSR_MTRR64kBase:
+ break; /* Ignore writes */
case MSR_IA32_MISC_ENABLE:
changed = val ^ misc_enable;
/*
diff --git a/sys/amd64/vmm/io/vatpic.c b/sys/amd64/vmm/io/vatpic.c
index 0df6e7c..6e94f5b 100644
--- a/sys/amd64/vmm/io/vatpic.c
+++ b/sys/amd64/vmm/io/vatpic.c
@@ -30,7 +30,6 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/types.h>
#include <sys/queue.h>
-#include <sys/cpuset.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
diff --git a/sys/amd64/vmm/io/vatpit.c b/sys/amd64/vmm/io/vatpit.c
index 842253d..173ef1f 100644
--- a/sys/amd64/vmm/io/vatpit.c
+++ b/sys/amd64/vmm/io/vatpit.c
@@ -31,7 +31,6 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/types.h>
#include <sys/queue.h>
-#include <sys/cpuset.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
diff --git a/sys/amd64/vmm/io/vhpet.c b/sys/amd64/vmm/io/vhpet.c
index a4c96cd..1db1c51 100644
--- a/sys/amd64/vmm/io/vhpet.c
+++ b/sys/amd64/vmm/io/vhpet.c
@@ -36,7 +36,6 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/systm.h>
-#include <sys/cpuset.h>
#include <dev/acpica/acpi_hpet.h>
diff --git a/sys/amd64/vmm/io/vioapic.c b/sys/amd64/vmm/io/vioapic.c
index 411887d..e6b8b5a 100644
--- a/sys/amd64/vmm/io/vioapic.c
+++ b/sys/amd64/vmm/io/vioapic.c
@@ -32,7 +32,6 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/queue.h>
-#include <sys/cpuset.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/systm.h>
diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c
index 7097248..3451e1e 100644
--- a/sys/amd64/vmm/io/vlapic.c
+++ b/sys/amd64/vmm/io/vlapic.c
@@ -547,6 +547,8 @@ vlapic_update_ppr(struct vlapic *vlapic)
VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr);
}
+static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt");
+
static void
vlapic_process_eoi(struct vlapic *vlapic)
{
@@ -557,11 +559,7 @@ vlapic_process_eoi(struct vlapic *vlapic)
isrptr = &lapic->isr0;
tmrptr = &lapic->tmr0;
- /*
- * The x86 architecture reserves the the first 32 vectors for use
- * by the processor.
- */
- for (i = 7; i > 0; i--) {
+ for (i = 7; i >= 0; i--) {
idx = i * 4;
bitpos = fls(isrptr[idx]);
if (bitpos-- != 0) {
@@ -570,17 +568,21 @@ vlapic_process_eoi(struct vlapic *vlapic)
vlapic->isrvec_stk_top);
}
isrptr[idx] &= ~(1 << bitpos);
+ vector = i * 32 + bitpos;
+ VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "EOI vector %d",
+ vector);
VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi");
vlapic->isrvec_stk_top--;
vlapic_update_ppr(vlapic);
if ((tmrptr[idx] & (1 << bitpos)) != 0) {
- vector = i * 32 + bitpos;
vioapic_process_eoi(vlapic->vm, vlapic->vcpuid,
vector);
}
return;
}
}
+ VCPU_CTR0(vlapic->vm, vlapic->vcpuid, "Gratuitous EOI");
+ vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_GRATUITOUS_EOI, 1);
}
static __inline int
@@ -1092,11 +1094,7 @@ vlapic_pending_intr(struct vlapic *vlapic, int *vecptr)
irrptr = &lapic->irr0;
- /*
- * The x86 architecture reserves the the first 32 vectors for use
- * by the processor.
- */
- for (i = 7; i > 0; i--) {
+ for (i = 7; i >= 0; i--) {
idx = i * 4;
val = atomic_load_acq_int(&irrptr[idx]);
bitpos = fls(val);
diff --git a/sys/amd64/vmm/io/vpmtmr.c b/sys/amd64/vmm/io/vpmtmr.c
index 09f763f..1e7bb93 100644
--- a/sys/amd64/vmm/io/vpmtmr.c
+++ b/sys/amd64/vmm/io/vpmtmr.c
@@ -29,7 +29,6 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/queue.h>
-#include <sys/cpuset.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/systm.h>
diff --git a/sys/amd64/vmm/io/vrtc.c b/sys/amd64/vmm/io/vrtc.c
index ab9cabb..18ebc4b 100644
--- a/sys/amd64/vmm/io/vrtc.c
+++ b/sys/amd64/vmm/io/vrtc.c
@@ -30,7 +30,6 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/queue.h>
-#include <sys/cpuset.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/lock.h>
@@ -63,9 +62,12 @@ struct rtcdev {
uint8_t reg_b;
uint8_t reg_c;
uint8_t reg_d;
- uint8_t nvram[128 - 14];
+ uint8_t nvram[36];
+ uint8_t century;
+ uint8_t nvram2[128 - 51];
} __packed;
CTASSERT(sizeof(struct rtcdev) == 128);
+CTASSERT(offsetof(struct rtcdev, century) == RTC_CENTURY);
struct vrtc {
struct vm *vm;
@@ -139,20 +141,23 @@ update_enabled(struct vrtc *vrtc)
}
static time_t
-vrtc_curtime(struct vrtc *vrtc)
+vrtc_curtime(struct vrtc *vrtc, sbintime_t *basetime)
{
sbintime_t now, delta;
- time_t t;
+ time_t t, secs;
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
t = vrtc->base_rtctime;
+ *basetime = vrtc->base_uptime;
if (update_enabled(vrtc)) {
now = sbinuptime();
delta = now - vrtc->base_uptime;
KASSERT(delta >= 0, ("vrtc_curtime: uptime went backwards: "
"%#lx to %#lx", vrtc->base_uptime, now));
- t += delta / SBT_1S;
+ secs = delta / SBT_1S;
+ t += secs;
+ *basetime += secs * SBT_1S;
}
return (t);
}
@@ -245,6 +250,7 @@ secs_to_rtc(time_t rtctime, struct vrtc *vrtc, int force_update)
rtc->day_of_month = rtcset(rtc, ct.day);
rtc->month = rtcset(rtc, ct.mon);
rtc->year = rtcset(rtc, ct.year % 100);
+ rtc->century = rtcset(rtc, ct.year / 100);
}
static int
@@ -274,7 +280,7 @@ rtc_to_secs(struct vrtc *vrtc)
struct timespec ts;
struct rtcdev *rtc;
struct vm *vm;
- int error, hour, pm, year;
+ int century, error, hour, pm, year;
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
@@ -358,10 +364,14 @@ rtc_to_secs(struct vrtc *vrtc)
VM_CTR2(vm, "Invalid RTC year %#x/%d", rtc->year, year);
goto fail;
}
- if (year >= 70)
- ct.year = 1900 + year;
- else
- ct.year = 2000 + year;
+
+ error = rtcget(rtc, rtc->century, &century);
+ ct.year = century * 100 + year;
+ if (error || ct.year < POSIX_BASE_YEAR) {
+ VM_CTR2(vm, "Invalid RTC century %#x/%d", rtc->century,
+ ct.year);
+ goto fail;
+ }
error = clock_ct_to_ts(&ct, &ts);
if (error || ts.tv_sec < 0) {
@@ -373,13 +383,19 @@ rtc_to_secs(struct vrtc *vrtc)
}
return (ts.tv_sec); /* success */
fail:
- return (VRTC_BROKEN_TIME); /* failure */
+ /*
+ * Stop updating the RTC if the date/time fields programmed by
+ * the guest are invalid.
+ */
+ VM_CTR0(vrtc->vm, "Invalid RTC date/time programming detected");
+ return (VRTC_BROKEN_TIME);
}
static int
-vrtc_time_update(struct vrtc *vrtc, time_t newtime)
+vrtc_time_update(struct vrtc *vrtc, time_t newtime, sbintime_t newbase)
{
struct rtcdev *rtc;
+ sbintime_t oldbase;
time_t oldtime;
uint8_t alarm_sec, alarm_min, alarm_hour;
@@ -391,16 +407,21 @@ vrtc_time_update(struct vrtc *vrtc, time_t newtime)
alarm_hour = rtc->alarm_hour;
oldtime = vrtc->base_rtctime;
- VM_CTR2(vrtc->vm, "Updating RTC time from %#lx to %#lx",
+ VM_CTR2(vrtc->vm, "Updating RTC secs from %#lx to %#lx",
oldtime, newtime);
+ oldbase = vrtc->base_uptime;
+ VM_CTR2(vrtc->vm, "Updating RTC base uptime from %#lx to %#lx",
+ oldbase, newbase);
+ vrtc->base_uptime = newbase;
+
if (newtime == oldtime)
return (0);
/*
* If 'newtime' indicates that RTC updates are disabled then just
* record that and return. There is no need to do alarm interrupt
- * processing or update 'base_uptime' in this case.
+ * processing in this case.
*/
if (newtime == VRTC_BROKEN_TIME) {
vrtc->base_rtctime = VRTC_BROKEN_TIME;
@@ -446,8 +467,6 @@ vrtc_time_update(struct vrtc *vrtc, time_t newtime)
if (uintr_enabled(vrtc))
vrtc_set_reg_c(vrtc, rtc->reg_c | RTCIR_UPDATE);
- vrtc->base_uptime = sbinuptime();
-
return (0);
}
@@ -518,7 +537,7 @@ static void
vrtc_callout_handler(void *arg)
{
struct vrtc *vrtc = arg;
- sbintime_t freqsbt;
+ sbintime_t freqsbt, basetime;
time_t rtctime;
int error;
@@ -540,8 +559,8 @@ vrtc_callout_handler(void *arg)
vrtc_set_reg_c(vrtc, vrtc->rtcdev.reg_c | RTCIR_PERIOD);
if (aintr_enabled(vrtc) || uintr_enabled(vrtc)) {
- rtctime = vrtc_curtime(vrtc);
- error = vrtc_time_update(vrtc, rtctime);
+ rtctime = vrtc_curtime(vrtc, &basetime);
+ error = vrtc_time_update(vrtc, rtctime, basetime);
KASSERT(error == 0, ("%s: vrtc_time_update error %d",
__func__, error));
}
@@ -606,7 +625,7 @@ static int
vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval)
{
struct rtcdev *rtc;
- sbintime_t oldfreq, newfreq;
+ sbintime_t oldfreq, newfreq, basetime;
time_t curtime, rtctime;
int error;
uint8_t oldval, changed;
@@ -627,19 +646,13 @@ vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval)
if (changed & RTCSB_HALT) {
if ((newval & RTCSB_HALT) == 0) {
rtctime = rtc_to_secs(vrtc);
+ basetime = sbinuptime();
if (rtctime == VRTC_BROKEN_TIME) {
- /*
- * Stop updating the RTC if the date/time
- * programmed by the guest is not correct.
- */
- VM_CTR0(vrtc->vm, "Invalid RTC date/time "
- "programming detected");
-
if (rtc_flag_broken_time)
return (-1);
}
} else {
- curtime = vrtc_curtime(vrtc);
+ curtime = vrtc_curtime(vrtc, &basetime);
KASSERT(curtime == vrtc->base_rtctime, ("%s: mismatch "
"between vrtc basetime (%#lx) and curtime (%#lx)",
__func__, vrtc->base_rtctime, curtime));
@@ -658,7 +671,7 @@ vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval)
rtctime = VRTC_BROKEN_TIME;
rtc->reg_b &= ~RTCSB_UINTR;
}
- error = vrtc_time_update(vrtc, rtctime);
+ error = vrtc_time_update(vrtc, rtctime, basetime);
KASSERT(error == 0, ("vrtc_time_update error %d", error));
}
@@ -738,7 +751,7 @@ vrtc_set_time(struct vm *vm, time_t secs)
vrtc = vm_rtc(vm);
VRTC_LOCK(vrtc);
- error = vrtc_time_update(vrtc, secs);
+ error = vrtc_time_update(vrtc, secs, sbinuptime());
VRTC_UNLOCK(vrtc);
if (error) {
@@ -755,11 +768,12 @@ time_t
vrtc_get_time(struct vm *vm)
{
struct vrtc *vrtc;
+ sbintime_t basetime;
time_t t;
vrtc = vm_rtc(vm);
VRTC_LOCK(vrtc);
- t = vrtc_curtime(vrtc);
+ t = vrtc_curtime(vrtc, &basetime);
VRTC_UNLOCK(vrtc);
return (t);
@@ -777,7 +791,7 @@ vrtc_nvram_write(struct vm *vm, int offset, uint8_t value)
* Don't allow writes to RTC control registers or the date/time fields.
*/
if (offset < offsetof(struct rtcdev, nvram[0]) ||
- offset >= sizeof(struct rtcdev)) {
+ offset == RTC_CENTURY || offset >= sizeof(struct rtcdev)) {
VM_CTR1(vrtc->vm, "RTC nvram write to invalid offset %d",
offset);
return (EINVAL);
@@ -796,6 +810,7 @@ int
vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval)
{
struct vrtc *vrtc;
+ sbintime_t basetime;
time_t curtime;
uint8_t *ptr;
@@ -811,8 +826,8 @@ vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval)
/*
* Update RTC date/time fields if necessary.
*/
- if (offset < 10) {
- curtime = vrtc_curtime(vrtc);
+ if (offset < 10 || offset == RTC_CENTURY) {
+ curtime = vrtc_curtime(vrtc, &basetime);
secs_to_rtc(curtime, vrtc, 0);
}
@@ -852,6 +867,7 @@ vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
{
struct vrtc *vrtc;
struct rtcdev *rtc;
+ sbintime_t basetime;
time_t curtime;
int error, offset;
@@ -869,16 +885,20 @@ vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
}
error = 0;
- curtime = vrtc_curtime(vrtc);
- vrtc_time_update(vrtc, curtime);
+ curtime = vrtc_curtime(vrtc, &basetime);
+ vrtc_time_update(vrtc, curtime, basetime);
- if (in) {
- /*
- * Update RTC date/time fields if necessary.
- */
- if (offset < 10)
- secs_to_rtc(curtime, vrtc, 0);
+ /*
+ * Update RTC date/time fields if necessary.
+ *
+ * This is not just for reads of the RTC. The side-effect of writing
+ * the century byte requires other RTC date/time fields (e.g. sec)
+ * to be updated here.
+ */
+ if (offset < 10 || offset == RTC_CENTURY)
+ secs_to_rtc(curtime, vrtc, 0);
+ if (in) {
if (offset == 12) {
/*
* XXX
@@ -922,6 +942,18 @@ vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
*((uint8_t *)rtc + offset) = *val;
break;
}
+
+ /*
+ * XXX some guests (e.g. OpenBSD) write the century byte
+ * outside of RTCSB_HALT so re-calculate the RTC date/time.
+ */
+ if (offset == RTC_CENTURY && !rtc_halted(vrtc)) {
+ curtime = rtc_to_secs(vrtc);
+ error = vrtc_time_update(vrtc, curtime, sbinuptime());
+ KASSERT(!error, ("vrtc_time_update error %d", error));
+ if (curtime == VRTC_BROKEN_TIME && rtc_flag_broken_time)
+ error = -1;
+ }
}
VRTC_UNLOCK(vrtc);
return (error);
@@ -971,7 +1003,7 @@ vrtc_init(struct vm *vm)
VRTC_LOCK(vrtc);
vrtc->base_rtctime = VRTC_BROKEN_TIME;
- vrtc_time_update(vrtc, curtime);
+ vrtc_time_update(vrtc, curtime, sbinuptime());
secs_to_rtc(curtime, vrtc, 0);
VRTC_UNLOCK(vrtc);
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 6bd5bce..bca9b98 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -1293,8 +1293,12 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
else if (error != 0)
panic("%s: vmm_fetch_instruction error %d", __func__, error);
- if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0)
- return (EFAULT);
+ if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0) {
+ VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %#lx",
+ vme->rip + cs_base);
+ *retu = true; /* dump instruction bytes in userspace */
+ return (0);
+ }
/*
* If the instruction length was not specified then update it now
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c
index 0b50e92..7172365 100644
--- a/sys/amd64/vmm/vmm_instruction_emul.c
+++ b/sys/amd64/vmm/vmm_instruction_emul.c
@@ -72,6 +72,8 @@ enum {
VIE_OP_TYPE_POP,
VIE_OP_TYPE_MOVS,
VIE_OP_TYPE_GROUP1,
+ VIE_OP_TYPE_STOS,
+ VIE_OP_TYPE_BITTEST,
VIE_OP_TYPE_LAST
};
@@ -91,6 +93,11 @@ static const struct vie_op two_byte_opcodes[256] = {
.op_byte = 0xB7,
.op_type = VIE_OP_TYPE_MOVZX,
},
+ [0xBA] = {
+ .op_byte = 0xBA,
+ .op_type = VIE_OP_TYPE_BITTEST,
+ .op_flags = VIE_OP_F_IMM8,
+ },
[0xBE] = {
.op_byte = 0xBE,
.op_type = VIE_OP_TYPE_MOVSX,
@@ -146,6 +153,16 @@ static const struct vie_op one_byte_opcodes[256] = {
.op_type = VIE_OP_TYPE_MOVS,
.op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
},
+ [0xAA] = {
+ .op_byte = 0xAA,
+ .op_type = VIE_OP_TYPE_STOS,
+ .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
+ },
+ [0xAB] = {
+ .op_byte = 0xAB,
+ .op_type = VIE_OP_TYPE_STOS,
+ .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
+ },
[0xC6] = {
/* XXX Group 11 extended opcode - not just MOV */
.op_byte = 0xC6,
@@ -803,6 +820,68 @@ done:
}
static int
+emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+ struct vm_guest_paging *paging, mem_region_read_t memread,
+ mem_region_write_t memwrite, void *arg)
+{
+ int error, opsize, repeat;
+ uint64_t val;
+ uint64_t rcx, rdi, rflags;
+
+ opsize = (vie->op.op_byte == 0xAA) ? 1 : vie->opsize;
+ repeat = vie->repz_present | vie->repnz_present;
+
+ if (repeat) {
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx);
+ KASSERT(!error, ("%s: error %d getting rcx", __func__, error));
+
+ /*
+ * The count register is %rcx, %ecx or %cx depending on the
+ * address size of the instruction.
+ */
+ if ((rcx & vie_size2mask(vie->addrsize)) == 0)
+ return (0);
+ }
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val);
+ KASSERT(!error, ("%s: error %d getting rax", __func__, error));
+
+ error = memwrite(vm, vcpuid, gpa, val, opsize, arg);
+ if (error)
+ return (error);
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi);
+ KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error));
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
+
+ if (rflags & PSL_D)
+ rdi -= opsize;
+ else
+ rdi += opsize;
+
+ error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RDI, rdi,
+ vie->addrsize);
+ KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error));
+
+ if (repeat) {
+ rcx = rcx - 1;
+ error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX,
+ rcx, vie->addrsize);
+ KASSERT(!error, ("%s: error %d updating rcx", __func__, error));
+
+ /*
+ * Repeat the instruction if the count register is not zero.
+ */
+ if ((rcx & vie_size2mask(vie->addrsize)) != 0)
+ vm_restart_instruction(vm, vcpuid);
+ }
+
+ return (0);
+}
+
+static int
emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
{
@@ -1262,6 +1341,48 @@ emulate_group1(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
return (error);
}
+static int
+emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+ mem_region_read_t memread, mem_region_write_t memwrite, void *memarg)
+{
+ uint64_t val, rflags;
+ int error, bitmask, bitoff;
+
+ /*
+ * 0F BA is a Group 8 extended opcode.
+ *
+ * Currently we only emulate the 'Bit Test' instruction which is
+ * identified by a ModR/M:reg encoding of 100b.
+ */
+ if ((vie->reg & 7) != 4)
+ return (EINVAL);
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
+
+ error = memread(vm, vcpuid, gpa, &val, vie->opsize, memarg);
+ if (error)
+ return (error);
+
+ /*
+ * Intel SDM, Vol 2, Table 3-2:
+ * "Range of Bit Positions Specified by Bit Offset Operands"
+ */
+ bitmask = vie->opsize * 8 - 1;
+ bitoff = vie->immediate & bitmask;
+
+ /* Copy the bit into the Carry flag in %rflags */
+ if (val & (1UL << bitoff))
+ rflags |= PSL_C;
+ else
+ rflags &= ~PSL_C;
+
+ error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
+ KASSERT(error == 0, ("%s: error %d updating rflags", __func__, error));
+
+ return (0);
+}
+
int
vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
struct vm_guest_paging *paging, mem_region_read_t memread,
@@ -1302,6 +1423,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
error = emulate_movs(vm, vcpuid, gpa, vie, paging, memread,
memwrite, memarg);
break;
+ case VIE_OP_TYPE_STOS:
+ error = emulate_stos(vm, vcpuid, gpa, vie, paging, memread,
+ memwrite, memarg);
+ break;
case VIE_OP_TYPE_AND:
error = emulate_and(vm, vcpuid, gpa, vie,
memread, memwrite, memarg);
@@ -1314,6 +1439,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
error = emulate_sub(vm, vcpuid, gpa, vie,
memread, memwrite, memarg);
break;
+ case VIE_OP_TYPE_BITTEST:
+ error = emulate_bittest(vm, vcpuid, gpa, vie,
+ memread, memwrite, memarg);
+ break;
default:
error = EINVAL;
break;
diff --git a/sys/amd64/vmm/vmm_ioport.c b/sys/amd64/vmm/vmm_ioport.c
index fc68a61..63044e8 100644
--- a/sys/amd64/vmm/vmm_ioport.c
+++ b/sys/amd64/vmm/vmm_ioport.c
@@ -28,16 +28,10 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
-#include <sys/types.h>
-#include <sys/queue.h>
-#include <sys/cpuset.h>
#include <sys/systm.h>
-#include <vm/vm.h>
-
#include <machine/vmm.h>
#include <machine/vmm_instruction_emul.h>
-#include <x86/psl.h>
#include "vatpic.h"
#include "vatpit.h"
diff --git a/sys/amd64/vmm/vmm_stat.c b/sys/amd64/vmm/vmm_stat.c
index 9ecf9af..4ae5fb9 100644
--- a/sys/amd64/vmm/vmm_stat.c
+++ b/sys/amd64/vmm/vmm_stat.c
@@ -33,7 +33,6 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/malloc.h>
-#include <sys/smp.h>
#include <machine/vmm.h>
#include "vmm_util.h"
diff --git a/sys/amd64/vmm/x86.c b/sys/amd64/vmm/x86.c
index c37d21c..45e08b5 100644
--- a/sys/amd64/vmm/x86.c
+++ b/sys/amd64/vmm/x86.c
@@ -32,7 +32,6 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/pcpu.h>
#include <sys/systm.h>
-#include <sys/cpuset.h>
#include <sys/sysctl.h>
#include <machine/clock.h>
@@ -289,9 +288,8 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
/*
* Machine check handling is done in the host.
- * Hide MTRR capability.
*/
- regs[3] &= ~(CPUID_MCA | CPUID_MCE | CPUID_MTRR);
+ regs[3] &= ~(CPUID_MCA | CPUID_MCE);
/*
* Hide the debug store capability.
diff --git a/sys/arm/allwinner/std.a10 b/sys/arm/allwinner/std.a10
index 0084c41..da5d895 100644
--- a/sys/arm/allwinner/std.a10
+++ b/sys/arm/allwinner/std.a10
@@ -17,5 +17,7 @@ options KERNPHYSADDR=0x40200000
makeoptions KERNVIRTADDR=0xc0200000
options KERNVIRTADDR=0xc0200000
+options ARM_L2_PIPT
+
files "../allwinner/files.allwinner"
files "../allwinner/files.a10"
diff --git a/sys/arm/amlogic/aml8726/aml8726_mmc.c b/sys/arm/amlogic/aml8726/aml8726_mmc.c
index 167d3a3..7d1ef4e 100644
--- a/sys/arm/amlogic/aml8726/aml8726_mmc.c
+++ b/sys/arm/amlogic/aml8726/aml8726_mmc.c
@@ -70,6 +70,7 @@ struct aml8726_mmc_softc {
device_t dev;
struct resource *res[2];
struct mtx mtx;
+ struct callout ch;
uint32_t port;
unsigned int ref_freq;
struct aml8726_mmc_gpio pwr_en;
@@ -81,7 +82,7 @@ struct aml8726_mmc_softc {
struct mmc_host host;
int bus_busy;
struct mmc_command *cmd;
- unsigned int timeout_remaining;
+ uint32_t stop_timeout;
};
static struct resource_spec aml8726_mmc_spec[] = {
@@ -92,6 +93,7 @@ static struct resource_spec aml8726_mmc_spec[] = {
#define AML_MMC_LOCK(sc) mtx_lock(&(sc)->mtx)
#define AML_MMC_UNLOCK(sc) mtx_unlock(&(sc)->mtx)
+#define AML_MMC_LOCK_ASSERT(sc) mtx_assert(&(sc)->mtx, MA_OWNED)
#define AML_MMC_LOCK_INIT(sc) \
mtx_init(&(sc)->mtx, device_get_nameunit((sc)->dev), \
"mmc", MTX_DEF)
@@ -107,6 +109,10 @@ static struct resource_spec aml8726_mmc_spec[] = {
#define PWR_OFF_FLAG(pol) ((pol) == 0 ? GPIO_PIN_HIGH : \
GPIO_PIN_LOW)
+#define MSECS_TO_TICKS(ms) (((ms)*hz)/1000 + 1)
+
+static void aml8726_mmc_timeout(void *arg);
+
static unsigned int
aml8726_mmc_clk(phandle_t node)
{
@@ -126,6 +132,37 @@ aml8726_mmc_clk(phandle_t node)
return ((unsigned int)prop);
}
+static uint32_t
+aml8726_mmc_freq(struct aml8726_mmc_softc *sc, uint32_t divisor)
+{
+
+ return (sc->ref_freq / ((divisor + 1) * 2));
+}
+
+static uint32_t
+aml8726_mmc_div(struct aml8726_mmc_softc *sc, uint32_t desired_freq)
+{
+ uint32_t divisor;
+
+ divisor = sc->ref_freq / (desired_freq * 2);
+
+ if (divisor == 0)
+ divisor = 1;
+
+ divisor -= 1;
+
+ if (aml8726_mmc_freq(sc, divisor) > desired_freq)
+ divisor += 1;
+
+ if (divisor > (AML_MMC_CONFIG_CMD_CLK_DIV_MASK >>
+ AML_MMC_CONFIG_CMD_CLK_DIV_SHIFT)) {
+ divisor = AML_MMC_CONFIG_CMD_CLK_DIV_MASK >>
+ AML_MMC_CONFIG_CMD_CLK_DIV_SHIFT;
+ }
+
+ return (divisor);
+}
+
static void
aml8726_mmc_mapmem(void *arg, bus_dma_segment_t *segs, int nseg, int error)
{
@@ -162,25 +199,18 @@ aml8726_mmc_power_on(struct aml8726_mmc_softc *sc)
PWR_ON_FLAG(sc->pwr_en.pol)));
}
-static int
-aml8726_mmc_restart_timer(struct aml8726_mmc_softc *sc)
+static void
+aml8726_mmc_soft_reset(struct aml8726_mmc_softc *sc, boolean_t enable_irq)
{
- uint32_t count;
- uint32_t isr;
-
- if (sc->cmd == NULL || sc->timeout_remaining == 0)
- return (0);
-
- count = (sc->timeout_remaining > 0x1fff) ? 0x1fff :
- sc->timeout_remaining;
- sc->timeout_remaining -= count;
+ uint32_t icr;
- isr = (count << AML_MMC_IRQ_STATUS_TIMER_CNT_SHIFT) |
- AML_MMC_IRQ_STATUS_TIMER_EN | AML_MMC_IRQ_STATUS_TIMEOUT_IRQ;
+ icr = AML_MMC_IRQ_CONFIG_SOFT_RESET;
- CSR_WRITE_4(sc, AML_MMC_IRQ_STATUS_REG, isr);
+ if (enable_irq == true)
+ icr |= AML_MMC_IRQ_CONFIG_CMD_DONE_EN;
- return (1);
+ CSR_WRITE_4(sc, AML_MMC_IRQ_CONFIG_REG, icr);
+ CSR_BARRIER(sc, AML_MMC_IRQ_CONFIG_REG);
}
static int
@@ -191,7 +221,6 @@ aml8726_mmc_start_command(struct aml8726_mmc_softc *sc, struct mmc_command *cmd)
uint32_t block_size;
uint32_t bus_width;
uint32_t cmdr;
- uint32_t cycles_per_msec;
uint32_t extr;
uint32_t mcfgr;
uint32_t nbits_per_pkg;
@@ -203,14 +232,9 @@ aml8726_mmc_start_command(struct aml8726_mmc_softc *sc, struct mmc_command *cmd)
return (MMC_ERR_INVALID);
/*
- * Ensure the hardware state machine is in a known state,
- * the command done interrupt is enabled, and previous
- * IRQ status bits have been cleared.
+ * Ensure the hardware state machine is in a known state.
*/
- CSR_WRITE_4(sc, AML_MMC_IRQ_CONFIG_REG,
- (AML_MMC_IRQ_CONFIG_SOFT_RESET | AML_MMC_IRQ_CONFIG_CMD_DONE_EN));
- CSR_BARRIER(sc, AML_MMC_IRQ_CONFIG_REG);
- CSR_WRITE_4(sc, AML_MMC_IRQ_STATUS_REG, AML_MMC_IRQ_STATUS_CLEAR_IRQ);
+ aml8726_mmc_soft_reset(sc, true);
/*
* Start and transmission bits are per section 4.7.2 of the:
@@ -225,6 +249,13 @@ aml8726_mmc_start_command(struct aml8726_mmc_softc *sc, struct mmc_command *cmd)
mcfgr = sc->port;
timeout = AML_MMC_CMD_TIMEOUT;
+ /*
+ * If this is a linked command, then use the previous timeout.
+ */
+ if (cmd == cmd->mrq->stop && sc->stop_timeout)
+ timeout = sc->stop_timeout;
+ sc->stop_timeout = 0;
+
if ((cmd->flags & MMC_RSP_136) != 0) {
cmdr |= AML_MMC_CMD_RESP_CRC7_FROM_8;
cmdr |= (133 << AML_MMC_CMD_RESP_BITS_SHIFT);
@@ -291,32 +322,24 @@ aml8726_mmc_start_command(struct aml8726_mmc_softc *sc, struct mmc_command *cmd)
timeout = AML_MMC_WRITE_TIMEOUT *
(data->len / block_size);
}
+
+ /*
+ * Stop terminates a multiblock read / write and thus
+ * can take as long to execute as an actual read / write.
+ */
+ if (cmd->mrq->stop != NULL)
+ sc->stop_timeout = timeout;
}
sc->cmd = cmd;
cmd->error = MMC_ERR_NONE;
- /*
- * Round up while calculating the number of cycles which
- * correspond to a millisecond. Use that to determine
- * the count from the desired timeout in milliseconds.
- *
- * The counter has a limited range which is not sufficient
- * for directly implementing worst case timeouts at high clock
- * rates so a 32 bit counter is implemented in software.
- *
- * The documentation isn't clear on when the timer starts
- * so add 48 cycles for the command and 136 cycles for the
- * response (the values are from the previously mentioned
- * standard).
- */
if (timeout > AML_MMC_MAX_TIMEOUT)
timeout = AML_MMC_MAX_TIMEOUT;
- cycles_per_msec = (ios->clock + 1000 - 1) / 1000;
- sc->timeout_remaining = 48 + 136 + timeout * cycles_per_msec;
- aml8726_mmc_restart_timer(sc);
+ callout_reset(&sc->ch, MSECS_TO_TICKS(timeout),
+ aml8726_mmc_timeout, sc);
CSR_WRITE_4(sc, AML_MMC_CMD_ARGUMENT_REG, cmd->arg);
CSR_WRITE_4(sc, AML_MMC_MULT_CONFIG_REG, mcfgr);
@@ -330,20 +353,96 @@ aml8726_mmc_start_command(struct aml8726_mmc_softc *sc, struct mmc_command *cmd)
}
static void
-aml8726_mmc_intr(void *arg)
+aml8726_mmc_finish_command(struct aml8726_mmc_softc *sc, int mmc_error)
{
- struct aml8726_mmc_softc *sc = (struct aml8726_mmc_softc *)arg;
+ int mmc_stop_error;
struct mmc_command *cmd;
struct mmc_command *stop_cmd;
struct mmc_data *data;
+
+ AML_MMC_LOCK_ASSERT(sc);
+
+ /* Clear all interrupts since the request is no longer in flight. */
+ CSR_WRITE_4(sc, AML_MMC_IRQ_STATUS_REG, AML_MMC_IRQ_STATUS_CLEAR_IRQ);
+ CSR_BARRIER(sc, AML_MMC_IRQ_STATUS_REG);
+
+ /* In some cases (e.g. finish called via timeout) this is a NOP. */
+ callout_stop(&sc->ch);
+
+ cmd = sc->cmd;
+ sc->cmd = NULL;
+
+ cmd->error = mmc_error;
+
+ data = cmd->data;
+
+ if (data && data->len &&
+ (data->flags & (MMC_DATA_READ | MMC_DATA_WRITE)) != 0) {
+ if ((data->flags & MMC_DATA_READ) != 0)
+ bus_dmamap_sync(sc->dmatag, sc->dmamap,
+ BUS_DMASYNC_POSTREAD);
+ else
+ bus_dmamap_sync(sc->dmatag, sc->dmamap,
+ BUS_DMASYNC_POSTWRITE);
+ bus_dmamap_unload(sc->dmatag, sc->dmamap);
+ }
+
+ /*
+ * If there's a linked stop command, then start the stop command.
+ * In order to establish a known state attempt the stop command
+ * even if the original request encountered an error.
+ */
+
+ stop_cmd = (cmd->mrq->stop != cmd) ? cmd->mrq->stop : NULL;
+
+ if (stop_cmd != NULL) {
+ mmc_stop_error = aml8726_mmc_start_command(sc, stop_cmd);
+ if (mmc_stop_error == MMC_ERR_NONE) {
+ AML_MMC_UNLOCK(sc);
+ return;
+ }
+ stop_cmd->error = mmc_stop_error;
+ }
+
+ AML_MMC_UNLOCK(sc);
+
+ /* Execute the callback after dropping the lock. */
+ if (cmd->mrq)
+ cmd->mrq->done(cmd->mrq);
+}
+
+static void
+aml8726_mmc_timeout(void *arg)
+{
+ struct aml8726_mmc_softc *sc = (struct aml8726_mmc_softc *)arg;
+
+ /*
+ * The command failed to complete in time so forcefully
+ * terminate it.
+ */
+ aml8726_mmc_soft_reset(sc, false);
+
+ /*
+ * Ensure the command has terminated before continuing on
+ * to things such as bus_dmamap_sync / bus_dmamap_unload.
+ */
+ while ((CSR_READ_4(sc, AML_MMC_IRQ_STATUS_REG) &
+ AML_MMC_IRQ_STATUS_CMD_BUSY) != 0)
+ cpu_spinwait();
+
+ aml8726_mmc_finish_command(sc, MMC_ERR_TIMEOUT);
+}
+
+static void
+aml8726_mmc_intr(void *arg)
+{
+ struct aml8726_mmc_softc *sc = (struct aml8726_mmc_softc *)arg;
uint32_t cmdr;
- uint32_t icr;
uint32_t isr;
uint32_t mcfgr;
uint32_t previous_byte;
uint32_t resp;
int mmc_error;
- int mmc_stop_error;
unsigned int i;
AML_MMC_LOCK(sc);
@@ -367,12 +466,6 @@ aml8726_mmc_intr(void *arg)
if ((cmdr & AML_MMC_CMD_CMD_HAS_DATA) != 0 &&
(isr & AML_MMC_IRQ_STATUS_WR_CRC16_OK) == 0)
mmc_error = MMC_ERR_BADCRC;
- } else if ((isr & AML_MMC_IRQ_STATUS_TIMEOUT_IRQ) != 0) {
- if (aml8726_mmc_restart_timer(sc) != 0) {
- AML_MMC_UNLOCK(sc);
- return;
- }
- mmc_error = MMC_ERR_TIMEOUT;
} else {
spurious:
@@ -389,49 +482,12 @@ spurious:
return;
}
- if ((isr & AML_MMC_IRQ_STATUS_CMD_BUSY) != 0 &&
- /*
- * A multiblock operation may keep the hardware
- * busy until stop transmission is executed.
- */
- (isr & AML_MMC_IRQ_STATUS_CMD_DONE_IRQ) == 0) {
- if (mmc_error == MMC_ERR_NONE)
- mmc_error = MMC_ERR_FAILED;
-
- /*
- * Issue a soft reset (while leaving the command complete
- * interrupt enabled) to terminate the command.
- *
- * Ensure the command has terminated before continuing on
- * to things such as bus_dmamap_sync / bus_dmamap_unload.
- */
-
- icr = AML_MMC_IRQ_CONFIG_SOFT_RESET |
- AML_MMC_IRQ_CONFIG_CMD_DONE_EN;
-
- CSR_WRITE_4(sc, AML_MMC_IRQ_CONFIG_REG, icr);
-
- while ((CSR_READ_4(sc, AML_MMC_IRQ_STATUS_REG) &
- AML_MMC_IRQ_STATUS_CMD_BUSY) != 0)
- cpu_spinwait();
- }
-
- /* Clear all interrupts since the request is no longer in flight. */
- CSR_WRITE_4(sc, AML_MMC_IRQ_STATUS_REG, AML_MMC_IRQ_STATUS_CLEAR_IRQ);
- CSR_BARRIER(sc, AML_MMC_IRQ_STATUS_REG);
-
- cmd = sc->cmd;
- sc->cmd = NULL;
-
- cmd->error = mmc_error;
-
- if ((cmd->flags & MMC_RSP_PRESENT) != 0 &&
- mmc_error == MMC_ERR_NONE) {
+ if ((cmdr & AML_MMC_CMD_RESP_BITS_MASK) != 0) {
mcfgr = sc->port;
mcfgr |= AML_MMC_MULT_CONFIG_RESP_READOUT_EN;
CSR_WRITE_4(sc, AML_MMC_MULT_CONFIG_REG, mcfgr);
- if ((cmd->flags & MMC_RSP_136) != 0) {
+ if ((cmdr & AML_MMC_CMD_RESP_CRC7_FROM_8) != 0) {
/*
* Controller supplies 135:8 instead of
@@ -444,48 +500,39 @@ spurious:
for (i = 0; i < 4; i++) {
resp = CSR_READ_4(sc, AML_MMC_CMD_ARGUMENT_REG);
- cmd->resp[3 - i] = (resp << 8) | previous_byte;
+ sc->cmd->resp[3 - i] = (resp << 8) |
+ previous_byte;
previous_byte = (resp >> 24) & 0xff;
}
} else
- cmd->resp[0] = CSR_READ_4(sc, AML_MMC_CMD_ARGUMENT_REG);
+ sc->cmd->resp[0] = CSR_READ_4(sc,
+ AML_MMC_CMD_ARGUMENT_REG);
}
- data = cmd->data;
-
- if (data && data->len &&
- (data->flags & (MMC_DATA_READ | MMC_DATA_WRITE)) != 0) {
- if ((data->flags & MMC_DATA_READ) != 0)
- bus_dmamap_sync(sc->dmatag, sc->dmamap,
- BUS_DMASYNC_POSTREAD);
- else
- bus_dmamap_sync(sc->dmatag, sc->dmamap,
- BUS_DMASYNC_POSTWRITE);
- bus_dmamap_unload(sc->dmatag, sc->dmamap);
- }
+ if ((isr & AML_MMC_IRQ_STATUS_CMD_BUSY) != 0 &&
+ /*
+ * A multiblock operation may keep the hardware
+ * busy until stop transmission is executed.
+ */
+ (isr & AML_MMC_IRQ_STATUS_CMD_DONE_IRQ) == 0) {
+ if (mmc_error == MMC_ERR_NONE)
+ mmc_error = MMC_ERR_FAILED;
- /*
- * If there's a linked stop command, then start the stop command.
- * In order to establish a known state attempt the stop command
- * even if the original request encountered an error.
- */
+ /*
+ * Issue a soft reset to terminate the command.
+ *
+ * Ensure the command has terminated before continuing on
+ * to things such as bus_dmamap_sync / bus_dmamap_unload.
+ */
- stop_cmd = (cmd->mrq->stop != cmd) ? cmd->mrq->stop : NULL;
+ aml8726_mmc_soft_reset(sc, false);
- if (stop_cmd != NULL) {
- mmc_stop_error = aml8726_mmc_start_command(sc, stop_cmd);
- if (mmc_stop_error == MMC_ERR_NONE) {
- AML_MMC_UNLOCK(sc);
- return;
- }
- stop_cmd->error = mmc_stop_error;
+ while ((CSR_READ_4(sc, AML_MMC_IRQ_STATUS_REG) &
+ AML_MMC_IRQ_STATUS_CMD_BUSY) != 0)
+ cpu_spinwait();
}
- AML_MMC_UNLOCK(sc);
-
- /* Execute the callback after dropping the lock. */
- if (cmd->mrq)
- cmd->mrq->done(cmd->mrq);
+ aml8726_mmc_finish_command(sc, mmc_error);
}
static int
@@ -698,8 +745,10 @@ aml8726_mmc_attach(device_t dev)
goto fail;
}
- sc->host.f_min = 200000;
- sc->host.f_max = 50000000;
+ callout_init_mtx(&sc->ch, &sc->mtx, CALLOUT_RETURNUNLOCKED);
+
+ sc->host.f_min = aml8726_mmc_freq(sc, aml8726_mmc_div(sc, 200000));
+ sc->host.f_max = aml8726_mmc_freq(sc, aml8726_mmc_div(sc, 50000000));
sc->host.host_ocr = sc->voltages[0] | sc->voltages[1];
sc->host.caps = MMC_CAP_4_BIT_DATA | MMC_CAP_HSPEED;
@@ -756,10 +805,12 @@ aml8726_mmc_detach(device_t dev)
* disable the interrupts, and clear the interrupts.
*/
(void)aml8726_mmc_power_off(sc);
- CSR_WRITE_4(sc, AML_MMC_IRQ_CONFIG_REG, AML_MMC_IRQ_CONFIG_SOFT_RESET);
- CSR_BARRIER(sc, AML_MMC_IRQ_CONFIG_REG);
+ aml8726_mmc_soft_reset(sc, false);
CSR_WRITE_4(sc, AML_MMC_IRQ_STATUS_REG, AML_MMC_IRQ_STATUS_CLEAR_IRQ);
+ /* This should be a NOP since no command was in flight. */
+ callout_stop(&sc->ch);
+
AML_MMC_UNLOCK(sc);
bus_generic_detach(dev);
@@ -787,8 +838,7 @@ aml8726_mmc_shutdown(device_t dev)
* disable the interrupts, and clear the interrupts.
*/
(void)aml8726_mmc_power_off(sc);
- CSR_WRITE_4(sc, AML_MMC_IRQ_CONFIG_REG, AML_MMC_IRQ_CONFIG_SOFT_RESET);
- CSR_BARRIER(sc, AML_MMC_IRQ_CONFIG_REG);
+ aml8726_mmc_soft_reset(sc, false);
CSR_WRITE_4(sc, AML_MMC_IRQ_STATUS_REG, AML_MMC_IRQ_STATUS_CLEAR_IRQ);
return (0);
@@ -799,7 +849,6 @@ aml8726_mmc_update_ios(device_t bus, device_t child)
{
struct aml8726_mmc_softc *sc = device_get_softc(bus);
struct mmc_ios *ios = &sc->host.ios;
- unsigned int divisor;
int error;
int i;
uint32_t cfgr;
@@ -820,15 +869,8 @@ aml8726_mmc_update_ios(device_t bus, device_t child)
return (EINVAL);
}
- divisor = sc->ref_freq / (ios->clock * 2) - 1;
- if (divisor == 0 || divisor == -1)
- divisor = 1;
- if ((sc->ref_freq / ((divisor + 1) * 2)) > ios->clock)
- divisor += 1;
- if (divisor > 0x3ff)
- divisor = 0x3ff;
-
- cfgr |= divisor;
+ cfgr |= aml8726_mmc_div(sc, ios->clock) <<
+ AML_MMC_CONFIG_CMD_CLK_DIV_SHIFT;
CSR_WRITE_4(sc, AML_MMC_CONFIG_REG, cfgr);
diff --git a/sys/arm/amlogic/aml8726/aml8726_mmc.h b/sys/arm/amlogic/aml8726/aml8726_mmc.h
index 0370943..64e3bae 100644
--- a/sys/arm/amlogic/aml8726/aml8726_mmc.h
+++ b/sys/arm/amlogic/aml8726/aml8726_mmc.h
@@ -47,20 +47,6 @@
#define AML_MMC_WRITE_TIMEOUT 500
#define AML_MMC_MAX_TIMEOUT 5000
-/*
- * Internally the timeout is implemented by counting clock cycles.
- *
- * Since the hardware implements timeouts by counting cycles
- * the minimum read / write timeout (assuming the minimum
- * conversion factor of 1 cycle per usec) is:
- *
- * (8 bits * 512 bytes per block + 16 bits CRC) = 4112 usec
- */
-#if ((AML_MMC_READ_TIMEOUT * 1000) < 4112 || \
- (AML_MMC_WRITE_TIMEOUT * 1000) < 4112)
-#error "Single block timeout is smaller than supported"
-#endif
-
#define AML_MMC_CMD_ARGUMENT_REG 0
#define AML_MMC_CMD_SEND_REG 4
diff --git a/sys/arm/amlogic/aml8726/files.aml8726 b/sys/arm/amlogic/aml8726/files.aml8726
index b32ebcd..ca058a5 100644
--- a/sys/arm/amlogic/aml8726/files.aml8726
+++ b/sys/arm/amlogic/aml8726/files.aml8726
@@ -4,12 +4,13 @@ kern/kern_clocksource.c standard
arm/arm/bus_space_base.c standard
arm/arm/bus_space_generic.c standard
-arm/arm/bus_space_asm_generic.S standard
+arm/arm/gic.c standard
arm/arm/pl310.c standard
arm/amlogic/aml8726/aml8726_l2cache.c standard
arm/amlogic/aml8726/aml8726_machdep.c standard
+arm/amlogic/aml8726/aml8726_mp.c optional smp
arm/amlogic/aml8726/aml8726_identsoc.c standard
arm/amlogic/aml8726/aml8726_ccm.c standard
arm/amlogic/aml8726/aml8726_clkmsr.c standard
diff --git a/sys/arm/amlogic/aml8726/files.smp b/sys/arm/amlogic/aml8726/files.smp
deleted file mode 100644
index 9b6c6ab..0000000
--- a/sys/arm/amlogic/aml8726/files.smp
+++ /dev/null
@@ -1,4 +0,0 @@
-#$FreeBSD$
-
-arm/arm/gic.c standard
-arm/amlogic/aml8726/aml8726_mp.c standard
diff --git a/sys/arm/amlogic/aml8726/std.aml8726 b/sys/arm/amlogic/aml8726/std.aml8726
index 073f02c..61b515f 100644
--- a/sys/arm/amlogic/aml8726/std.aml8726
+++ b/sys/arm/amlogic/aml8726/std.aml8726
@@ -4,6 +4,15 @@ cpu CPU_CORTEXA
machine arm armv6
makeoptions CONF_CFLAGS="-march=armv7a"
+# Physical memory starts at 0x80000000. We assume the kernel is loaded
+# at 0x80100000 by u-boot (which doesn't support ubldr since it's missing
+# CONFIG_API). The kernel must be supplied as a binary since u-boot is
+# also missing CONFIG_CMD_ELF.
+#
+#
+options KERNVIRTADDR=0xc0100000 # Used in ldscript.arm
+makeoptions KERNVIRTADDR=0xc0100000
+
device fdt_pinctrl
files "../amlogic/aml8726/files.aml8726"
diff --git a/sys/arm/amlogic/aml8726/std.odroidc1 b/sys/arm/amlogic/aml8726/std.odroidc1
deleted file mode 100644
index 441c135..0000000
--- a/sys/arm/amlogic/aml8726/std.odroidc1
+++ /dev/null
@@ -1,17 +0,0 @@
-# $FreeBSD$
-
-include "../amlogic/aml8726/std.aml8726"
-
-makeoptions FDT_DTS_FILE=odroidc1.dts
-
-options SMP # Enable multiple cores
-files "../amlogic/aml8726/files.smp"
-
-# Physical memory starts at 0x00000000. We assume the kernel is loaded
-# at 0x00100000 by u-boot (which doesn't support ubldr since it's missing
-# CONFIG_API). The kernel must be supplied as a binary since u-boot is
-# also missing CONFIG_CMD_ELF.
-#
-#
-options KERNVIRTADDR=0xc0100000 # Used in ldscript.arm
-makeoptions KERNVIRTADDR=0xc0100000
diff --git a/sys/arm/amlogic/aml8726/std.vsatv102-m6 b/sys/arm/amlogic/aml8726/std.vsatv102-m6
deleted file mode 100644
index e0014a4..0000000
--- a/sys/arm/amlogic/aml8726/std.vsatv102-m6
+++ /dev/null
@@ -1,17 +0,0 @@
-# $FreeBSD$
-
-include "../amlogic/aml8726/std.aml8726"
-
-makeoptions FDT_DTS_FILE=vsatv102-m6.dts
-
-options SMP # Enable multiple cores
-files "../amlogic/aml8726/files.smp"
-
-# Physical memory starts at 0x80000000. We assume the kernel is loaded
-# at 0x80100000 by u-boot (which doesn't support ubldr since it's missing
-# CONFIG_API). The kernel must be supplied as a binary since u-boot is
-# also missing CONFIG_CMD_ELF.
-#
-#
-options KERNVIRTADDR=0xc0100000 # Used in ldscript.arm
-makeoptions KERNVIRTADDR=0xc0100000
diff --git a/sys/arm/arm/busdma_machdep-v6.c b/sys/arm/arm/busdma_machdep-v6.c
index ed501c5..7236c5a 100644
--- a/sys/arm/arm/busdma_machdep-v6.c
+++ b/sys/arm/arm/busdma_machdep-v6.c
@@ -1685,8 +1685,8 @@ add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr,
if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) {
/* Page offset needs to be preserved. */
- bpage->vaddr |= vaddr & PAGE_MASK;
- bpage->busaddr |= vaddr & PAGE_MASK;
+ bpage->vaddr |= addr & PAGE_MASK;
+ bpage->busaddr |= addr & PAGE_MASK;
}
bpage->datavaddr = vaddr;
bpage->dataaddr = addr;
diff --git a/sys/arm/arm/busdma_machdep.c b/sys/arm/arm/busdma_machdep.c
index 265292d..acd8f81 100644
--- a/sys/arm/arm/busdma_machdep.c
+++ b/sys/arm/arm/busdma_machdep.c
@@ -1441,8 +1441,8 @@ add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr,
if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) {
/* Page offset needs to be preserved. */
- bpage->vaddr |= vaddr & PAGE_MASK;
- bpage->busaddr |= vaddr & PAGE_MASK;
+ bpage->vaddr |= addr & PAGE_MASK;
+ bpage->busaddr |= addr & PAGE_MASK;
}
bpage->datavaddr = vaddr;
bpage->dataaddr = addr;
diff --git a/sys/arm/arm/cpufunc.c b/sys/arm/arm/cpufunc.c
index f98e91a..a3c8239 100644
--- a/sys/arm/arm/cpufunc.c
+++ b/sys/arm/arm/cpufunc.c
@@ -1186,7 +1186,8 @@ arm11x6_setup(void)
CPU_CONTROL_32BD_ENABLE |
CPU_CONTROL_LABT_ENABLE |
CPU_CONTROL_SYST_ENABLE |
- CPU_CONTROL_IC_ENABLE;
+ CPU_CONTROL_IC_ENABLE |
+ CPU_CONTROL_UNAL_ENABLE;
/*
* "write as existing" bits
diff --git a/sys/arm/arm/locore-v4.S b/sys/arm/arm/locore-v4.S
index 8ef53e9..d798e97 100644
--- a/sys/arm/arm/locore-v4.S
+++ b/sys/arm/arm/locore-v4.S
@@ -116,7 +116,7 @@ ASENTRY_NP(_start)
* If we're running with MMU disabled, test against the
* physical address instead.
*/
- mrc p15, 0, r2, c1, c0, 0
+ mrc p15, 0, r2, c1, c0, 0
ands r2, r2, #CPU_CONTROL_MMU_ENABLE
ldreq r6, =PHYSADDR
ldrne r6, =LOADERRAMADDR
@@ -125,7 +125,7 @@ ASENTRY_NP(_start)
cmp r7, pc
bhi from_ram
b do_copy
-
+
flash_lower:
cmp r6, pc
bls from_ram
@@ -148,12 +148,12 @@ from_ram:
disable_mmu:
/* Disable MMU for a while */
- mrc p15, 0, r2, c1, c0, 0
+ mrc p15, 0, r2, c1, c0, 0
bic r2, r2, #(CPU_CONTROL_MMU_ENABLE | CPU_CONTROL_DC_ENABLE |\
CPU_CONTROL_WBUF_ENABLE)
bic r2, r2, #(CPU_CONTROL_IC_ENABLE)
bic r2, r2, #(CPU_CONTROL_BPRD_ENABLE)
- mcr p15, 0, r2, c1, c0, 0
+ mcr p15, 0, r2, c1, c0, 0
nop
nop
@@ -169,36 +169,16 @@ Lunmapped:
adr r0, Lpagetable
bl translate_va_to_pa
-#ifndef _ARM_ARCH_6
/*
* Some of the older ports (the various XScale, mostly) assume
* that the memory before the kernel is mapped, and use it for
- * the various stacks, page tables, etc. For those CPUs, map the
- * 64 first MB of RAM, as it used to be.
+ * the various stacks, page tables, etc. For those CPUs, map the
+ * 64 first MB of RAM, as it used to be.
*/
/*
* Map PA == VA
- */
- ldr r5, =PHYSADDR
- mov r1, r5
- mov r2, r5
- /* Map 64MiB, preserved over calls to build_pagetables */
- mov r3, #64
- bl build_pagetables
-
- /* Create the kernel map to jump to */
- mov r1, r5
- ldr r2, =(KERNBASE)
- bl build_pagetables
- ldr r5, =(KERNPHYSADDR)
-#else
- /*
- * Map PA == VA
- */
- /* Find the start kernels load address */
- adr r5, _start
- ldr r2, =(L1_S_OFFSET)
- bic r5, r2
+ */
+ ldr r5, =PHYSADDR
mov r1, r5
mov r2, r5
/* Map 64MiB, preserved over calls to build_pagetables */
@@ -207,10 +187,10 @@ Lunmapped:
/* Create the kernel map to jump to */
mov r1, r5
- ldr r2, =(KERNVIRTADDR)
+ ldr r2, =(KERNBASE)
bl build_pagetables
-#endif
-
+ ldr r5, =(KERNPHYSADDR)
+
#if defined(SOCDEV_PA) && defined(SOCDEV_VA)
/* Create the custom map */
ldr r1, =SOCDEV_PA
@@ -221,26 +201,16 @@ Lunmapped:
mcr p15, 0, r0, c2, c0, 0 /* Set TTB */
mcr p15, 0, r0, c8, c7, 0 /* Flush TLB */
-#if defined(CPU_ARM1136) || defined(CPU_ARM1176) || defined(CPU_CORTEXA) || defined(CPU_MV_PJ4B) || defined(CPU_KRAIT)
- mov r0, #0
- mcr p15, 0, r0, c13, c0, 1 /* Set ASID to 0 */
-#endif
-
/* Set the Domain Access register. Very important! */
- mov r0, #((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_CLIENT)
+ mov r0, #((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_CLIENT)
mcr p15, 0, r0, c3, c0, 0
- /*
+ /*
* Enable MMU.
* On armv6 enable extended page tables, and set alignment checking
* to modulo-4 (CPU_CONTROL_UNAL_ENABLE) for the ldrd/strd
* instructions emitted by clang.
*/
mrc p15, 0, r0, c1, c0, 0
-#ifdef _ARM_ARCH_6
- orr r0, r0, #(CPU_CONTROL_V6_EXTPAGE | CPU_CONTROL_UNAL_ENABLE)
- orr r0, r0, #(CPU_CONTROL_AFLT_ENABLE)
- orr r0, r0, #(CPU_CONTROL_AF_ENABLE)
-#endif
orr r0, r0, #(CPU_CONTROL_MMU_ENABLE)
mcr p15, 0, r0, c1, c0, 0
nop
@@ -280,7 +250,7 @@ virt_done:
/* init arm will return the new stack pointer. */
mov sp, r0
- bl _C_LABEL(mi_startup) /* call mi_startup()! */
+ bl _C_LABEL(mi_startup) /* call mi_startup()! */
adr r0, .Lmainreturned
b _C_LABEL(panic)
@@ -389,11 +359,11 @@ pagetable:
.word _C_LABEL(cpufuncs)
ENTRY_NP(cpu_halt)
- mrs r2, cpsr
+ mrs r2, cpsr
bic r2, r2, #(PSR_MODE)
- orr r2, r2, #(PSR_SVC32_MODE)
+ orr r2, r2, #(PSR_SVC32_MODE)
orr r2, r2, #(PSR_I | PSR_F)
- msr cpsr_fsxc, r2
+ msr cpsr_fsxc, r2
ldr r4, .Lcpu_reset_address
ldr r4, [r4]
@@ -419,9 +389,9 @@ ENTRY_NP(cpu_halt)
* Hurl ourselves into the ROM
*/
mov r0, #(CPU_CONTROL_32BP_ENABLE | CPU_CONTROL_32BD_ENABLE)
- mcr p15, 0, r0, c1, c0, 0
- mcrne p15, 0, r2, c8, c7, 0 /* nail I+D TLB on ARMv4 and greater */
- mov pc, r4
+ mcr p15, 0, r0, c1, c0, 0
+ mcrne p15, 0, r2, c8, c7, 0 /* nail I+D TLB on ARMv4 and greater */
+ mov pc, r4
/*
* _cpu_reset_address contains the address to branch to, to complete
@@ -458,7 +428,7 @@ ENTRY(longjmp)
END(longjmp)
.data
- .global _C_LABEL(esym)
+ .global _C_LABEL(esym)
_C_LABEL(esym): .word _C_LABEL(end)
ENTRY_NP(abort)
@@ -471,7 +441,7 @@ ENTRY_NP(sigcode)
/*
* Call the sigreturn system call.
- *
+ *
* We have to load r7 manually rather than using
* "ldr r7, =SYS_sigreturn" to ensure the value of szsigcode is
* correct. Using the alternative places esigcode at the address
diff --git a/sys/arm/arm/locore-v6.S b/sys/arm/arm/locore-v6.S
index 55b4311..7d5ba97 100644
--- a/sys/arm/arm/locore-v6.S
+++ b/sys/arm/arm/locore-v6.S
@@ -39,7 +39,7 @@
__FBSDID("$FreeBSD$");
-#ifndef ARM_NEW_PMAP
+#ifndef ARM_NEW_PMAP
#define PTE1_OFFSET L1_S_OFFSET
#define PTE1_SHIFT L1_S_SHIFT
#define PTE1_SIZE L1_S_SIZE
@@ -52,13 +52,13 @@ __FBSDID("$FreeBSD$");
.align 2
/*
- * On entry for FreeBSD boot ABI:
- * r0 - metadata pointer or 0 (boothowto on AT91's boot2)
- * r1 - if (r0 == 0) then metadata pointer
- * On entry for Linux boot ABI:
+ * On entry for FreeBSD boot ABI:
+ * r0 - metadata pointer or 0 (boothowto on AT91's boot2)
+ * r1 - if (r0 == 0) then metadata pointer
+ * On entry for Linux boot ABI:
* r0 - 0
* r1 - machine type (passed as arg2 to initarm)
- * r2 - Pointer to a tagged list or dtb image (phys addr) (passed as arg1 initarm)
+ * r2 - Pointer to a tagged list or dtb image (phys addr) (passed as arg1 initarm)
*
* For both types of boot we gather up the args, put them in a struct arm_boot_params
* structure and pass that to initarm.
@@ -66,17 +66,17 @@ __FBSDID("$FreeBSD$");
.globl btext
btext:
ASENTRY_NP(_start)
- STOP_UNWINDING /* Can't unwind into the bootloader! */
+ STOP_UNWINDING /* Can't unwind into the bootloader! */
- /* Make sure interrupts are disabled. */
+ /* Make sure interrupts are disabled. */
cpsid ifa
- mov r8, r0 /* 0 or boot mode from boot2 */
- mov r9, r1 /* Save Machine type */
- mov r10, r2 /* Save meta data */
+ mov r8, r0 /* 0 or boot mode from boot2 */
+ mov r9, r1 /* Save Machine type */
+ mov r10, r2 /* Save meta data */
mov r11, r3 /* Future expansion */
- /*
+ /*
* Check whether data cache is enabled. If it is, then we know
* current tags are valid (not power-on garbage values) and there
* might be dirty lines that need cleaning. Disable cache to prevent
@@ -93,7 +93,7 @@ ASENTRY_NP(_start)
* valid. Disable all caches and the MMU, and invalidate everything
* before setting up new page tables and re-enabling the mmu.
*/
-1:
+1:
bic r7, #CPU_CONTROL_DC_ENABLE
bic r7, #CPU_CONTROL_MMU_ENABLE
bic r7, #CPU_CONTROL_IC_ENABLE
@@ -119,13 +119,13 @@ ASENTRY_NP(_start)
/*
* Map PA == VA
*/
- /* Find the start kernels load address */
+ /* Find the start kernels load address */
adr r5, _start
ldr r2, =(PTE1_OFFSET)
bic r5, r2
mov r1, r5
mov r2, r5
- /* Map 64MiB, preserved over calls to build_pagetables */
+ /* Map 64MiB, preserved over calls to build_pagetables */
mov r3, #64
bl build_pagetables
@@ -142,41 +142,41 @@ ASENTRY_NP(_start)
#endif
bl init_mmu
- /* Switch to virtual addresses. */
+ /* Switch to virtual addresses. */
ldr pc, =1f
1:
- /* Setup stack, clear BSS */
+ /* Setup stack, clear BSS */
ldr r1, =.Lstart
ldmia r1, {r1, r2, sp} /* Set initial stack and */
add sp, sp, #INIT_ARM_STACK_SIZE
- sub r2, r2, r1 /* get zero init data */
+ sub r2, r2, r1 /* get zero init data */
mov r3, #0
2:
str r3, [r1], #0x0004 /* get zero init data */
- subs r2, r2, #4
+ subs r2, r2, #4
bgt 2b
- mov r1, #28 /* loader info size is 28 bytes also second arg */
- subs sp, sp, r1 /* allocate arm_boot_params struct on stack */
- mov r0, sp /* loader info pointer is first arg */
- bic sp, sp, #7 /* align stack to 8 bytes */
- str r1, [r0] /* Store length of loader info */
+ mov r1, #28 /* loader info size is 28 bytes also second arg */
+ subs sp, sp, r1 /* allocate arm_boot_params struct on stack */
+ mov r0, sp /* loader info pointer is first arg */
+ bic sp, sp, #7 /* align stack to 8 bytes */
+ str r1, [r0] /* Store length of loader info */
str r8, [r0, #4] /* Store r0 from boot loader */
str r9, [r0, #8] /* Store r1 from boot loader */
str r10, [r0, #12] /* store r2 from boot loader */
str r11, [r0, #16] /* store r3 from boot loader */
str r5, [r0, #20] /* store the physical address */
- adr r4, Lpagetable /* load the pagetable address */
+ adr r4, Lpagetable /* load the pagetable address */
ldr r5, [r4, #4]
str r5, [r0, #24] /* store the pagetable address */
mov fp, #0 /* trace back starts here */
bl _C_LABEL(initarm) /* Off we go */
- /* init arm will return the new stack pointer. */
+ /* init arm will return the new stack pointer. */
mov sp, r0
- bl _C_LABEL(mi_startup) /* call mi_startup()! */
+ bl _C_LABEL(mi_startup) /* call mi_startup()! */
ldr r0, =.Lmainreturned
b _C_LABEL(panic)
@@ -219,8 +219,8 @@ translate_va_to_pa:
mov pc, lr
/*
- * Init MMU
- * r0 - The table base address
+ * Init MMU
+ * r0 - the table base address
*/
ASENTRY_NP(init_mmu)
@@ -267,11 +267,11 @@ END(init_mmu)
/*
- * Init SMP coherent mode, enable caching and switch to final MMU table.
- * Called with disabled caches
- * r0 - The table base address
- * r1 - clear bits for aux register
- * r2 - set bits for aux register
+ * Init SMP coherent mode, enable caching and switch to final MMU table.
+ * Called with disabled caches
+ * r0 - The table base address
+ * r1 - clear bits for aux register
+ * r2 - set bits for aux register
*/
ASENTRY_NP(reinit_mmu)
push {r4-r11, lr}
@@ -331,11 +331,11 @@ END(reinit_mmu)
/*
* Builds the page table
- * r0 - The table base address
- * r1 - The physical address (trashed)
- * r2 - The virtual address (trashed)
- * r3 - The number of 1MiB sections
- * r4 - Trashed
+ * r0 - The table base address
+ * r1 - The physical address (trashed)
+ * r2 - The virtual address (trashed)
+ * r3 - The number of 1MiB sections
+ * r4 - Trashed
*
* Addresses must be 1MiB aligned
*/
@@ -350,15 +350,15 @@ build_pagetables:
#endif
orr r1, r4
- /* Move the virtual address to the correct bit location */
+ /* Move the virtual address to the correct bit location */
lsr r2, #(PTE1_SHIFT - 2)
mov r4, r3
1:
str r1, [r0, r2]
- add r2, r2, #4
- add r1, r1, #(PTE1_SIZE)
- adds r4, r4, #-1
+ add r2, r2, #4
+ add r1, r1, #(PTE1_SIZE)
+ adds r4, r4, #-1
bhi 1b
mov pc, lr
@@ -372,7 +372,7 @@ VA_TO_PA_POINTER(Lpagetable, boot_pt1)
.word svcstk /* must remain in order together. */
.Lmainreturned:
- .asciz "main() returned"
+ .asciz "main() returned"
.align 2
.bss
@@ -380,8 +380,8 @@ svcstk:
.space INIT_ARM_STACK_SIZE * MAXCPU
/*
- * Memory for the initial pagetable. We are unable to place this in
- * the bss as this will be cleared after the table is loaded.
+ * Memory for the initial pagetable. We are unable to place this in
+ * the bss as this will be cleared after the table is loaded.
*/
.section ".init_pagetable"
.align 14 /* 16KiB aligned */
@@ -398,7 +398,7 @@ boot_pt1:
#if defined(SMP)
ASENTRY_NP(mpentry)
- /* Make sure interrupts are disabled. */
+ /* Make sure interrupts are disabled. */
cpsid ifa
/* Setup core, disable all caches. */
@@ -419,10 +419,10 @@ ASENTRY_NP(mpentry)
mcr CP15_ICIALLU
ISB
- /* Find the delta between VA and PA */
+ /* Find the delta between VA and PA */
adr r0, Lpagetable
bl translate_va_to_pa
-
+
bl init_mmu
adr r1, .Lstart+8 /* Get initstack pointer from */
@@ -433,7 +433,7 @@ ASENTRY_NP(mpentry)
mul r2, r1, r0 /* Point sp to initstack */
add sp, sp, r2 /* area for this processor. */
- /* Switch to virtual addresses. */
+ /* Switch to virtual addresses. */
ldr pc, =1f
1:
mov fp, #0 /* trace back starts here */
@@ -459,14 +459,14 @@ ENTRY_NP(cpu_halt)
ldr r4, [r4]
teq r4, #0
movne pc, r4
-1:
+1:
WFI
b 1b
/*
* _cpu_reset_address contains the address to branch to, to complete
* the cpu reset after turning the MMU off
- * This variable is provided by the hardware specific code
+ * This variable is provided by the hardware specific code
*/
.Lcpu_reset_address:
.word _C_LABEL(cpu_reset_address)
@@ -498,38 +498,37 @@ END(abort)
ENTRY_NP(sigcode)
mov r0, sp
- add r0, r0, #SIGF_UC
+ add r0, r0, #SIGF_UC
/*
- * Call the sigreturn system call.
+ * Call the sigreturn system call.
*
* We have to load r7 manually rather than using
- * "ldr r7, =SYS_sigreturn" to ensure the value of szsigcode is
+ * "ldr r7, =SYS_sigreturn" to ensure the value of szsigcode is
* correct. Using the alternative places esigcode at the address
- * of the data rather than the address one past the data.
+ * of the data rather than the address one past the data.
*/
- ldr r7, [pc, #12] /* Load SYS_sigreturn */
+ ldr r7, [pc, #12] /* Load SYS_sigreturn */
swi SYS_sigreturn
- /* Well if that failed we better exit quick ! */
+ /* Well if that failed we better exit quick ! */
- ldr r7, [pc, #8] /* Load SYS_exit */
+ ldr r7, [pc, #8] /* Load SYS_exit */
swi SYS_exit
- /* Branch back to retry SYS_sigreturn */
+ /* Branch back to retry SYS_sigreturn */
b . - 16
END(sigcode)
-
.word SYS_sigreturn
.word SYS_exit
.align 2
- .global _C_LABEL(esigcode)
+ .global _C_LABEL(esigcode)
_C_LABEL(esigcode):
.data
- .global szsigcode
+ .global szsigcode
szsigcode:
.long esigcode-sigcode
diff --git a/sys/arm/arm/pmap-v6-new.c b/sys/arm/arm/pmap-v6-new.c
index 53896d4..f404214 100644
--- a/sys/arm/arm/pmap-v6-new.c
+++ b/sys/arm/arm/pmap-v6-new.c
@@ -6094,13 +6094,13 @@ pmap_set_pcb_pagedir(pmap_t pmap, struct pcb *pcb)
/*
- * Clean L1 data cache range on a single page, which is not mapped yet.
+ * Clean L1 data cache range by physical address.
+ * The range must be within a single page.
*/
static void
pmap_dcache_wb_pou(vm_paddr_t pa, vm_size_t size, vm_memattr_t ma)
{
struct sysmaps *sysmaps;
- vm_offset_t va;
KASSERT(((pa & PAGE_MASK) + size) <= PAGE_SIZE,
("%s: not on single page", __func__));
@@ -6111,9 +6111,8 @@ pmap_dcache_wb_pou(vm_paddr_t pa, vm_size_t size, vm_memattr_t ma)
if (*sysmaps->CMAP3)
panic("%s: CMAP3 busy", __func__);
pte2_store(sysmaps->CMAP3, PTE2_KERN_NG(pa, PTE2_AP_KRW, ma));
- va = (vm_offset_t)sysmaps->CADDR3;
- tlb_flush_local(va);
- dcache_wb_pou(va, size);
+ tlb_flush_local((vm_offset_t)sysmaps->CADDR3);
+ dcache_wb_pou((vm_offset_t)sysmaps->CADDR3 + (pa & PAGE_MASK), size);
pte2_clear(sysmaps->CMAP3);
sched_unpin();
mtx_unlock(&sysmaps->lock);
diff --git a/sys/arm/broadcom/bcm2835/bcm2835_mbox.c b/sys/arm/broadcom/bcm2835/bcm2835_mbox.c
index 2ab7ebf..af600ca 100644
--- a/sys/arm/broadcom/bcm2835/bcm2835_mbox.c
+++ b/sys/arm/broadcom/bcm2835/bcm2835_mbox.c
@@ -306,8 +306,7 @@ bcm2835_mbox_init_dma(device_t dev, size_t len, bus_dma_tag_t *tag,
return (NULL);
}
- err = bus_dmamap_load(*tag, *map, buf,
- sizeof(struct msg_set_power_state), bcm2835_mbox_dma_cb,
+ err = bus_dmamap_load(*tag, *map, buf, len, bcm2835_mbox_dma_cb,
phys, 0);
if (err != 0) {
bus_dmamem_free(*tag, buf, *map);
diff --git a/sys/arm/broadcom/bcm2835/bcm2835_sdhci.c b/sys/arm/broadcom/bcm2835/bcm2835_sdhci.c
index 19eeb71..1024a48 100644
--- a/sys/arm/broadcom/bcm2835/bcm2835_sdhci.c
+++ b/sys/arm/broadcom/bcm2835/bcm2835_sdhci.c
@@ -181,17 +181,16 @@ bcm_sdhci_attach(device_t dev)
if (err == 0) {
/* Convert to MHz */
default_freq /= 1000000;
- if (bootverbose)
- device_printf(dev, "default frequency: %dMHz\n",
- default_freq);
+ }
+ if (default_freq == 0) {
+ node = ofw_bus_get_node(sc->sc_dev);
+ if ((OF_getencprop(node, "clock-frequency", &cell,
+ sizeof(cell))) > 0)
+ default_freq = cell / 1000000;
}
if (default_freq == 0)
default_freq = BCM2835_DEFAULT_SDHCI_FREQ;
- node = ofw_bus_get_node(sc->sc_dev);
- if ((OF_getprop(node, "clock-frequency", &cell, sizeof(cell))) > 0)
- default_freq = fdt32_to_cpu(cell)/1000000;
-
if (bootverbose)
device_printf(dev, "SDHCI frequency: %dMHz\n", default_freq);
diff --git a/sys/arm/broadcom/bcm2835/std.bcm2836 b/sys/arm/broadcom/bcm2835/std.bcm2836
index 874083f..bb112be 100644
--- a/sys/arm/broadcom/bcm2835/std.bcm2836
+++ b/sys/arm/broadcom/bcm2835/std.bcm2836
@@ -5,6 +5,8 @@ cpu CPU_CORTEXA
makeoptions CONF_CFLAGS="-march=armv7a"
options SOC_BCM2836
+options ARM_L2_PIPT
+
files "../broadcom/bcm2835/files.bcm2836"
files "../broadcom/bcm2835/files.bcm283x"
diff --git a/sys/arm/conf/AML8726 b/sys/arm/conf/AML8726
new file mode 100644
index 0000000..7272db4
--- /dev/null
+++ b/sys/arm/conf/AML8726
@@ -0,0 +1,143 @@
+#
+# Kernel configuration for Amlogic aml8726 boards.
+#
+# For more information on this file, please read the config(5) manual page,
+# and/or the handbook section on Kernel Configuration Files:
+#
+# http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html
+#
+# The handbook is also available locally in /usr/share/doc/handbook
+# if you've installed the doc distribution, otherwise always see the
+# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the
+# latest information.
+#
+# An exhaustive list of options and more detailed explanations of the
+# device lines is also present in the ../../conf/NOTES and NOTES files.
+# If you are in doubt as to the purpose or necessity of a line, check first
+# in NOTES.
+#
+# $FreeBSD$
+
+ident AML8726
+include "../amlogic/aml8726/std.aml8726"
+
+options HZ=100
+options SCHED_ULE # ULE scheduler
+options PREEMPTION # Enable kernel thread preemption
+options INET # InterNETworking
+options INET6 # IPv6 communications protocols
+options SCTP # Stream Control Transmission Protocol
+options FFS # Berkeley Fast Filesystem
+options SOFTUPDATES # Enable FFS soft updates support
+options UFS_ACL # Support for access control lists
+options UFS_DIRHASH # Improve performance on big directories
+options UFS_GJOURNAL # Enable gjournal-based UFS journaling
+options QUOTA # Enable disk quotas for UFS
+options NFSCL # Network Filesystem Client
+options NFSLOCKD # Network Lock Manager
+options NFS_ROOT # NFS usable as /, requires NFSCL
+options MSDOSFS # MSDOS Filesystem
+options CD9660 # ISO 9660 Filesystem
+options PROCFS # Process filesystem (requires PSEUDOFS)
+options PSEUDOFS # Pseudo-filesystem framework
+options TMPFS # Efficient memory filesystem
+options GEOM_PART_GPT # GUID Partition Tables
+options GEOM_PART_BSD # BSD partition scheme
+options GEOM_PART_MBR # MBR partition scheme
+options COMPAT_43 # Compatible with BSD 4.3 [KEEP THIS!]
+options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI
+options KTRACE # ktrace(1) support
+options SYSVSHM # SYSV-style shared memory
+options SYSVMSG # SYSV-style message queues
+options SYSVSEM # SYSV-style semaphores
+options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
+options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed.
+options KBD_INSTALL_CDEV # install a CDEV entry in /dev
+options FREEBSD_BOOT_LOADER # Process metadata passed from loader(8)
+options LINUX_BOOT_ABI
+options VFP # Enable floating point hardware support
+options SMP # Enable multiple cores
+
+# Debugging
+makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols
+options ALT_BREAK_TO_DEBUGGER
+#options VERBOSE_SYSINIT # Enable verbose sysinit messages
+options BOOTVERBOSE=1
+options KDB # Enable kernel debugger support
+# For minimum debugger support (stable branch) use:
+#options KDB_TRACE # Print a stack trace for a panic
+# For full debugger support use this instead:
+options DDB # Enable the kernel debugger
+options INVARIANTS # Enable calls of extra sanity checking
+options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS
+options WITNESS # Enable checks to detect deadlocks and cycles
+options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed
+#options DIAGNOSTIC
+
+# NFS root from boopt/dhcp
+#options BOOTP
+#options BOOTP_NFSROOT
+#options BOOTP_COMPAT
+#options BOOTP_NFSV3
+#options BOOTP_WIRED_TO=axe0
+
+# MMC/SD/SDIO Card slot support
+device mmc # mmc/sd bus
+device mmcsd # mmc/sd flash cards
+
+# Boot device is 2nd slice on MMC/SD card
+options ROOTDEVNAME=\"ufs:mmcsd0s2\"
+
+# GPIO
+device gpio
+device gpioled
+
+# I2C support
+device iicbus
+device iicbb
+device iic
+
+# vt is the default console driver, resembling an SCO console
+device vt
+#device kbdmux
+
+# Serial (COM) ports
+device uart # Generic UART driver
+
+# Pseudo devices.
+device loop # Network loopback
+device random # Entropy device
+device ether # Ethernet support
+device pty # BSD-style compatibility pseudo ttys
+
+# The `bpf' device enables the Berkeley Packet Filter.
+# Be aware of the administrative consequences of enabling this!
+# Note that 'bpf' is required for DHCP.
+device bpf # Berkeley packet filter
+
+# USB support
+device usb # General USB code (mandatory for USB)
+device dwcotg # DWC OTG controller
+options USB_HOST_ALIGN=64 # Align usb buffers to cache line size.
+options USB_DEBUG
+#options USB_REQ_DEBUG
+#options USB_VERBOSE
+
+#device ukbd # USB keyboard
+#device ums # USB mouse
+
+device scbus # SCSI bus (required for ATA/SCSI)
+device da # Direct Access (disks)
+device umass # Disks/Mass storage - Requires scbus and da
+
+# Ethernet support
+device miibus # MII bus support
+
+# SoC Ethernet, requires miibus
+device dwc
+
+# USB Ethernet support, requires miibus
+device axe # ASIX Electronics USB Ethernet
+
+# Flattened Device Tree
+options FDT # Configure using FDT/DTB data
diff --git a/sys/arm/conf/EFIKA_MX b/sys/arm/conf/EFIKA_MX
index 6ea9d8a..a4690f6 100644
--- a/sys/arm/conf/EFIKA_MX
+++ b/sys/arm/conf/EFIKA_MX
@@ -24,6 +24,8 @@ include "../freescale/imx/std.imx51"
makeoptions WITHOUT_MODULES="ahc"
+options SOC_IMX51
+
options SCHED_4BSD # 4BSD scheduler
options PREEMPTION # Enable kernel thread preemption
options INET # InterNETworking
diff --git a/sys/arm/conf/IMX53 b/sys/arm/conf/IMX53
index d627c25..9f78f30 100644
--- a/sys/arm/conf/IMX53
+++ b/sys/arm/conf/IMX53
@@ -22,6 +22,8 @@ ident IMX53
include "../freescale/imx/std.imx53"
+options SOC_IMX53
+
options SCHED_4BSD # 4BSD scheduler
options PREEMPTION # Enable kernel thread preemption
options INET # InterNETworking
diff --git a/sys/arm/conf/IMX6 b/sys/arm/conf/IMX6
index ada4eea..f1baf29 100644
--- a/sys/arm/conf/IMX6
+++ b/sys/arm/conf/IMX6
@@ -21,6 +21,8 @@
ident IMX6
include "../freescale/imx/std.imx6"
+options SOC_IMX6
+
options HZ=500 # Scheduling quantum is 2 milliseconds.
options SCHED_ULE # ULE scheduler
options PREEMPTION # Enable kernel thread preemption
diff --git a/sys/arm/conf/ODROIDC1 b/sys/arm/conf/ODROIDC1
index 7d70fa4..7bb7b56 100644
--- a/sys/arm/conf/ODROIDC1
+++ b/sys/arm/conf/ODROIDC1
@@ -1,7 +1,7 @@
# ODROIDC1 -- Custom configuration for the HardKernel ODROID-C1 SBC
#
-# For more information on this file, please read the handbook section on
-# Kernel Configuration Files:
+# For more information on this file, please read the config(5) manual page,
+# and/or the handbook section on Kernel Configuration Files:
#
# http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html
#
@@ -17,117 +17,10 @@
#
# $FreeBSD$
-ident ODROIDC1
-
-include "../amlogic/aml8726/std.odroidc1"
-
-options HZ=100
-options SCHED_ULE # ULE scheduler
-options PREEMPTION # Enable kernel thread preemption
-options INET # InterNETworking
-options INET6 # IPv6 communications protocols
-options FFS # Berkeley Fast Filesystem
-options SOFTUPDATES # Enable FFS soft updates support
-options UFS_ACL # Support for access control lists
-options UFS_DIRHASH # Improve performance on big directories
-options PROCFS # Process filesystem (requires PSEUDOFS)
-options PSEUDOFS # Pseudo-filesystem framework
-options GEOM_PART_BSD # BSD partition scheme
-options GEOM_PART_MBR # MBR partition scheme
-options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI
-options KTRACE # ktrace(1) support
-options SYSVSHM # SYSV-style shared memory
-options SYSVMSG # SYSV-style message queues
-options SYSVSEM # SYSV-style semaphores
-options _KPOSIX_PRIORITY_SCHEDULING # Posix P1003_1B real-time extensions
-options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed.
-options KBD_INSTALL_CDEV # install a CDEV entry in /dev
-options LINUX_BOOT_ABI
-options VFP # vfp/neon
-
-# Debugging
-makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols
-options ALT_BREAK_TO_DEBUGGER
-#options VERBOSE_SYSINIT # Enable verbose sysinit messages
-options BOOTVERBOSE=1
-options KDB
-options DDB # Enable the kernel debugger
-options INVARIANTS # Enable calls of extra sanity checking
-options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS
-options WITNESS # Enable checks to detect deadlocks and cycles
-options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed
-#options DIAGNOSTIC
-
-# NFS support
-#options NFSCL # New Network Filesystem Client
-#options NFSLOCKD # Network Lock Manager
-#options NFS_ROOT # NFS usable as /, requires NFSCL
-
-# NFS root from boopt/dhcp
-#options BOOTP
-#options BOOTP_NFSROOT
-#options BOOTP_COMPAT
-#options BOOTP_NFSV3
-#options BOOTP_WIRED_TO=axe0
-
-# MMC/SD/SDIO card slot support
-device mmc # mmc/sd bus
-device mmcsd # mmc/sd flash cards
-
-# Boot device is 2nd slice on MMC/SD card
-options ROOTDEVNAME=\"ufs:mmcsd0s2\"
-
-# GPIO
-device gpio
-device gpioled
-
-# I2C support
-device iicbus
-device iicbb
-device iic
+#NO_UNIVERSE
-# vt is the default console driver, resembling an SCO console
-device vt
-#device kbdmux
-
-# Serial (COM) ports
-device uart # Generic UART driver
-
-# Pseudo devices.
-device loop # Network loopback
-device random # Entropy device
-device ether # Ethernet support
-device pty # BSD-style compatibility pseudo ttys
-
-# The `bpf' device enables the Berkeley Packet Filter.
-# Be aware of the administrative consequences of enabling this!
-# Note that 'bpf' is required for DHCP.
-device bpf # Berkeley packet filter
-
-# USB support
-device usb # General USB code (mandatory for USB)
-device dwcotg # DWC OTG controller
-options USB_HOST_ALIGN=64 # Cacheline size is 64
-options USB_DEBUG
-#options USB_REQ_DEBUG
-#options USB_VERBOSE
-
-#device ukbd # USB keyboard
-#device ums # USB mouse
-
-device scbus # SCSI bus (required for ATA/SCSI)
-device da # Direct Access (disks)
-device umass # Disks/Mass storage - Requires scbus and da
-
-# Ethernet support
-device miibus # MII bus support
-
-# SoC Ethernet, requires miibus
-device dwc
-
-# USB Ethernet, requires miibus
-device axe # ASIX Electronics USB Ethernet
+include "AML8726"
+ident ODROIDC1
-# Flattened Device Tree
-options FDT
-options FDT_DTB_STATIC
+options FDT_DTB_STATIC
+makeoptions FDT_DTS_FILE=odroidc1.dts
diff --git a/sys/arm/conf/RK3188 b/sys/arm/conf/RK3188
index 2aba68e..64065d3 100644
--- a/sys/arm/conf/RK3188
+++ b/sys/arm/conf/RK3188
@@ -72,8 +72,8 @@ options WITNESS # Enable checks to detect deadlocks and cycles
options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed
options DIAGNOSTIC
-# Boot device is 2nd slice on USB
-options ROOTDEVNAME=\"ufs:/dev/da0s2\"
+# Root mount from MMC/SD card
+options ROOTDEVNAME=\"ufs:/dev/mmcsd0\"
# MMC/SD/SDIO Card slot support
device mmc # mmc/sd bus
diff --git a/sys/arm/conf/RPI2 b/sys/arm/conf/RPI2
index 02f3869..18d97d7 100644
--- a/sys/arm/conf/RPI2
+++ b/sys/arm/conf/RPI2
@@ -137,6 +137,6 @@ device bcm2835_spi
options FDT # Configure using FDT/DTB data
# Note: DTB is normally loaded and modified by RPi boot loader, then
# handed to kernel via U-Boot and ubldr.
-options FDT_DTB_STATIC
-makeoptions FDT_DTS_FILE=rpi2.dts
+#options FDT_DTB_STATIC
+#makeoptions FDT_DTS_FILE=rpi2.dts
makeoptions MODULES_EXTRA=dtb/rpi
diff --git a/sys/arm/conf/VSATV102 b/sys/arm/conf/VSATV102
index a3df9fa..b5de5f9 100644
--- a/sys/arm/conf/VSATV102
+++ b/sys/arm/conf/VSATV102
@@ -1,7 +1,7 @@
# VSATV102 -- Custom configuration for the Visson ATV-102 media player
#
-# For more information on this file, please read the handbook section on
-# Kernel Configuration Files:
+# For more information on this file, please read the config(5) manual page,
+# and/or the handbook section on Kernel Configuration Files:
#
# http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html
#
@@ -17,117 +17,10 @@
#
# $FreeBSD$
-ident VSATV102
-
-include "../amlogic/aml8726/std.vsatv102-m6"
-
-options HZ=100
-options SCHED_ULE # ULE scheduler
-options PREEMPTION # Enable kernel thread preemption
-options INET # InterNETworking
-options INET6 # IPv6 communications protocols
-options FFS # Berkeley Fast Filesystem
-options SOFTUPDATES # Enable FFS soft updates support
-options UFS_ACL # Support for access control lists
-options UFS_DIRHASH # Improve performance on big directories
-options PROCFS # Process filesystem (requires PSEUDOFS)
-options PSEUDOFS # Pseudo-filesystem framework
-options GEOM_PART_BSD # BSD partition scheme
-options GEOM_PART_MBR # MBR partition scheme
-options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI
-options KTRACE # ktrace(1) support
-options SYSVSHM # SYSV-style shared memory
-options SYSVMSG # SYSV-style message queues
-options SYSVSEM # SYSV-style semaphores
-options _KPOSIX_PRIORITY_SCHEDULING # Posix P1003_1B real-time extensions
-options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed.
-options KBD_INSTALL_CDEV # install a CDEV entry in /dev
-options LINUX_BOOT_ABI
-options VFP # vfp/neon
-
-# Debugging
-makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols
-options ALT_BREAK_TO_DEBUGGER
-#options VERBOSE_SYSINIT # Enable verbose sysinit messages
-options BOOTVERBOSE=1
-options KDB
-options DDB # Enable the kernel debugger
-options INVARIANTS # Enable calls of extra sanity checking
-options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS
-options WITNESS # Enable checks to detect deadlocks and cycles
-options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed
-#options DIAGNOSTIC
-
-# NFS support
-#options NFSCL # New Network Filesystem Client
-#options NFSLOCKD # Network Lock Manager
-#options NFS_ROOT # NFS usable as /, requires NFSCL
-
-# NFS root from boopt/dhcp
-#options BOOTP
-#options BOOTP_NFSROOT
-#options BOOTP_COMPAT
-#options BOOTP_NFSV3
-#options BOOTP_WIRED_TO=axe0
-
-# MMC/SD/SDIO card slot support
-device mmc # mmc/sd bus
-device mmcsd # mmc/sd flash cards
-
-# Boot device is 2nd slice on MMC/SD card
-options ROOTDEVNAME=\"ufs:mmcsd0s2\"
-
-# GPIO
-device gpio
-device gpioled
-
-# I2C support
-device iicbus
-device iicbb
-device iic
+#NO_UNIVERSE
-# vt is the default console driver, resembling an SCO console
-device vt
-#device kbdmux
-
-# Serial (COM) ports
-device uart # Generic UART driver
-
-# Pseudo devices.
-device loop # Network loopback
-device random # Entropy device
-device ether # Ethernet support
-device pty # BSD-style compatibility pseudo ttys
-
-# The `bpf' device enables the Berkeley Packet Filter.
-# Be aware of the administrative consequences of enabling this!
-# Note that 'bpf' is required for DHCP.
-device bpf # Berkeley packet filter
-
-# USB support
-device usb # General USB code (mandatory for USB)
-device dwcotg # DWC OTG controller
-options USB_HOST_ALIGN=64 # Cacheline size is 64
-options USB_DEBUG
-#options USB_REQ_DEBUG
-#options USB_VERBOSE
-
-#device ukbd # USB keyboard
-#device ums # USB mouse
-
-device scbus # SCSI bus (required for ATA/SCSI)
-device da # Direct Access (disks)
-device umass # Disks/Mass storage - Requires scbus and da
-
-# Ethernet support
-device miibus # MII bus support
-
-# SoC Ethernet, requires miibus
-device dwc
-
-# USB Ethernet, requires miibus
-device axe # ASIX Electronics USB Ethernet
+include "AML8726"
+ident VSATV102
-# Flattened Device Tree
-options FDT
-options FDT_DTB_STATIC
+options FDT_DTB_STATIC
+makeoptions FDT_DTS_FILE=vsatv102-m6.dts
diff --git a/sys/arm/freescale/imx/files.imx53 b/sys/arm/freescale/imx/files.imx5
index 6ca4ffd..58e925b 100644
--- a/sys/arm/freescale/imx/files.imx53
+++ b/sys/arm/freescale/imx/files.imx5
@@ -6,14 +6,15 @@ kern/kern_clocksource.c standard
# Init
arm/freescale/imx/imx_common.c standard
arm/freescale/imx/imx_machdep.c standard
-arm/freescale/imx/imx53_machdep.c standard
+arm/freescale/imx/imx51_machdep.c optional soc_imx51
+arm/freescale/imx/imx53_machdep.c optional soc_imx53
arm/arm/bus_space_base.c standard
# Special serial console for debuging early boot code
#arm/freescale/imx/console.c standard
# UART driver (includes serial console support)
-dev/uart/uart_dev_imx.c optional uart
+dev/uart/uart_dev_imx.c optional uart
# TrustZone Interrupt Controller
arm/freescale/imx/tzic.c standard
diff --git a/sys/arm/freescale/imx/files.imx51 b/sys/arm/freescale/imx/files.imx51
deleted file mode 100644
index b779ee2..0000000
--- a/sys/arm/freescale/imx/files.imx51
+++ /dev/null
@@ -1,49 +0,0 @@
-# $FreeBSD$
-arm/arm/bus_space_asm_generic.S standard
-arm/arm/bus_space_generic.c standard
-kern/kern_clocksource.c standard
-
-# Init
-arm/freescale/imx/imx_common.c standard
-arm/freescale/imx/imx_machdep.c standard
-arm/freescale/imx/imx51_machdep.c standard
-arm/arm/bus_space_base.c standard
-
-# Dummy serial console
-#arm/freescale/imx/console.c standard
-
-# TrustZone Interrupt Controller
-arm/freescale/imx/tzic.c standard
-
-# IOMUX - external pins multiplexor
-arm/freescale/imx/imx_iomux.c standard
-
-# GPIO
-arm/freescale/imx/imx_gpio.c optional gpio
-
-# Generic Periodic Timer
-arm/freescale/imx/imx_gpt.c standard
-
-# Clock Configuration Manager
-arm/freescale/imx/imx51_ccm.c standard
-
-# i.MX5xx PATA controller
-dev/ata/chipsets/ata-fsl.c optional imxata
-
-# UART driver
-dev/uart/uart_dev_imx.c optional uart
-
-# USB OH3 controller (1 OTG, 3 EHCI)
-arm/freescale/imx/imx_nop_usbphy.c optional ehci
-dev/usb/controller/ehci_imx.c optional ehci
-
-# Watchdog
-arm/freescale/imx/imx_wdog.c optional imxwdt
-
-# i2c
-arm/freescale/imx/imx_i2c.c optional fsliic
-
-# IPU - Image Processing Unit (frame buffer also)
-arm/freescale/imx/imx51_ipuv3.c optional sc
-arm/freescale/imx/imx51_ipuv3_fbd.c optional vt
-dev/vt/hw/fb/vt_early_fb.c optional vt
diff --git a/sys/arm/freescale/imx/std.imx51 b/sys/arm/freescale/imx/std.imx51
index fbc1349..eca33c2 100644
--- a/sys/arm/freescale/imx/std.imx51
+++ b/sys/arm/freescale/imx/std.imx51
@@ -13,5 +13,4 @@ options PHYSADDR=0x90000000
device fdt_pinctrl
-files "../freescale/imx/files.imx51"
-
+files "../freescale/imx/files.imx5"
diff --git a/sys/arm/freescale/imx/std.imx53 b/sys/arm/freescale/imx/std.imx53
index cbef21a..1da484c 100644
--- a/sys/arm/freescale/imx/std.imx53
+++ b/sys/arm/freescale/imx/std.imx53
@@ -13,5 +13,4 @@ options PHYSADDR=0x70000000
device fdt_pinctrl
-files "../freescale/imx/files.imx53"
-
+files "../freescale/imx/files.imx5"
diff --git a/sys/arm64/arm64/bus_machdep.c b/sys/arm64/arm64/bus_machdep.c
index d8e6646..25a675e 100644
--- a/sys/arm64/arm64/bus_machdep.c
+++ b/sys/arm64/arm64/bus_machdep.c
@@ -169,10 +169,10 @@ struct bus_space memmap_bus = {
.bs_r_8_s = NULL,
/* read multiple stream */
- .bs_rm_1_s = NULL,
- .bs_rm_2_s = NULL,
- .bs_rm_4_s = NULL,
- .bs_rm_8_s = NULL,
+ .bs_rm_1_s = generic_bs_rm_1,
+ .bs_rm_2_s = generic_bs_rm_2,
+ .bs_rm_4_s = generic_bs_rm_4,
+ .bs_rm_8_s = generic_bs_rm_8,
/* read region stream */
.bs_rr_1_s = NULL,
@@ -187,10 +187,10 @@ struct bus_space memmap_bus = {
.bs_w_8_s = NULL,
/* write multiple stream */
- .bs_wm_1_s = NULL,
- .bs_wm_2_s = NULL,
- .bs_wm_4_s = NULL,
- .bs_wm_8_s = NULL,
+ .bs_wm_1_s = generic_bs_wm_1,
+ .bs_wm_2_s = generic_bs_wm_2,
+ .bs_wm_4_s = generic_bs_wm_4,
+ .bs_wm_8_s = generic_bs_wm_8,
/* write region stream */
.bs_wr_1_s = NULL,
diff --git a/sys/arm64/arm64/bus_space_asm.S b/sys/arm64/arm64/bus_space_asm.S
index 52ad5dd..20d4128 100644
--- a/sys/arm64/arm64/bus_space_asm.S
+++ b/sys/arm64/arm64/bus_space_asm.S
@@ -63,7 +63,7 @@ ENTRY(generic_bs_rm_1)
/* Read the data. */
1: ldrb w1, [x0]
- strb w1, [x3], #2
+ strb w1, [x3], #1
subs x4, x4, #1
b.ne 1b
@@ -105,7 +105,7 @@ ENTRY(generic_bs_rm_4)
/* Read the data. */
1: ldr w1, [x0]
- str w1, [x3], #2
+ str w1, [x3], #4
subs x4, x4, #1
b.ne 1b
@@ -126,7 +126,7 @@ ENTRY(generic_bs_rm_8)
/* Read the data. */
1: ldr x1, [x0]
- str x1, [x3], #2
+ str x1, [x3], #8
subs x4, x4, #1
b.ne 1b
diff --git a/sys/arm64/arm64/db_disasm.c b/sys/arm64/arm64/db_disasm.c
new file mode 100644
index 0000000..ec943a4
--- /dev/null
+++ b/sys/arm64/arm64/db_disasm.c
@@ -0,0 +1,41 @@
+/*-
+ * Copyright (c) 2015 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Semihalf under
+ * the sponsorship of the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <sys/param.h>
+#include <ddb/ddb.h>
+
+vm_offset_t
+db_disasm(vm_offset_t loc, boolean_t altfmt)
+{
+ return 0;
+}
+
+/* End of db_disasm.c */
diff --git a/sys/arm64/arm64/db_interface.c b/sys/arm64/arm64/db_interface.c
new file mode 100644
index 0000000..38834af
--- /dev/null
+++ b/sys/arm64/arm64/db_interface.c
@@ -0,0 +1,168 @@
+/*-
+ * Copyright (c) 2015 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Semihalf under
+ * the sponsorship of the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+
+#ifdef KDB
+#include <sys/kdb.h>
+#endif
+
+#include <ddb/ddb.h>
+#include <ddb/db_variables.h>
+
+#include <machine/cpu.h>
+#include <machine/pcb.h>
+#include <machine/vmparam.h>
+
+static int
+db_frame(struct db_variable *vp, db_expr_t *valuep, int op)
+{
+ long *reg;
+
+ if (kdb_frame == NULL)
+ return (0);
+
+ reg = (long *)((uintptr_t)kdb_frame + (db_expr_t)vp->valuep);
+ if (op == DB_VAR_GET)
+ *valuep = *reg;
+ else
+ *reg = *valuep;
+ return (1);
+}
+
+#define DB_OFFSET(x) (db_expr_t *)offsetof(struct trapframe, x)
+struct db_variable db_regs[] = {
+ { "spsr", DB_OFFSET(tf_spsr), db_frame },
+ { "x0", DB_OFFSET(tf_x[0]), db_frame },
+ { "x1", DB_OFFSET(tf_x[1]), db_frame },
+ { "x2", DB_OFFSET(tf_x[2]), db_frame },
+ { "x3", DB_OFFSET(tf_x[3]), db_frame },
+ { "x4", DB_OFFSET(tf_x[4]), db_frame },
+ { "x5", DB_OFFSET(tf_x[5]), db_frame },
+ { "x6", DB_OFFSET(tf_x[6]), db_frame },
+ { "x7", DB_OFFSET(tf_x[7]), db_frame },
+ { "x8", DB_OFFSET(tf_x[8]), db_frame },
+ { "x9", DB_OFFSET(tf_x[9]), db_frame },
+ { "x10", DB_OFFSET(tf_x[10]), db_frame },
+ { "x11", DB_OFFSET(tf_x[11]), db_frame },
+ { "x12", DB_OFFSET(tf_x[12]), db_frame },
+ { "x13", DB_OFFSET(tf_x[13]), db_frame },
+ { "x14", DB_OFFSET(tf_x[14]), db_frame },
+ { "x15", DB_OFFSET(tf_x[15]), db_frame },
+ { "x16", DB_OFFSET(tf_x[16]), db_frame },
+ { "x17", DB_OFFSET(tf_x[17]), db_frame },
+ { "x18", DB_OFFSET(tf_x[18]), db_frame },
+ { "x19", DB_OFFSET(tf_x[19]), db_frame },
+ { "x20", DB_OFFSET(tf_x[20]), db_frame },
+ { "x21", DB_OFFSET(tf_x[21]), db_frame },
+ { "x22", DB_OFFSET(tf_x[22]), db_frame },
+ { "x23", DB_OFFSET(tf_x[23]), db_frame },
+ { "x24", DB_OFFSET(tf_x[24]), db_frame },
+ { "x25", DB_OFFSET(tf_x[25]), db_frame },
+ { "x26", DB_OFFSET(tf_x[26]), db_frame },
+ { "x27", DB_OFFSET(tf_x[27]), db_frame },
+ { "x28", DB_OFFSET(tf_x[28]), db_frame },
+ { "x29", DB_OFFSET(tf_x[29]), db_frame },
+ { "lr", DB_OFFSET(tf_lr), db_frame },
+ { "elr", DB_OFFSET(tf_elr), db_frame },
+ { "sp", DB_OFFSET(tf_sp), db_frame },
+};
+
+struct db_variable *db_eregs = db_regs + sizeof(db_regs)/sizeof(db_regs[0]);
+
+void
+db_show_mdpcpu(struct pcpu *pc)
+{
+}
+
+static int
+db_validate_address(vm_offset_t addr)
+{
+ struct proc *p = curproc;
+ struct pmap *pmap;
+
+ if (!p || !p->p_vmspace || !p->p_vmspace->vm_map.pmap ||
+ addr >= VM_MAXUSER_ADDRESS)
+ pmap = pmap_kernel();
+ else
+ pmap = p->p_vmspace->vm_map.pmap;
+
+ return (pmap_extract(pmap, addr) == FALSE);
+}
+
+/*
+ * Read bytes from kernel address space for debugger.
+ */
+int
+db_read_bytes(vm_offset_t addr, size_t size, char *data)
+{
+ const char *src = (const char *)addr;
+
+ while (size-- > 0) {
+ if (db_validate_address((vm_offset_t)src)) {
+ db_printf("address %p is invalid\n", src);
+ return (-1);
+ }
+ *data++ = *src++;
+ }
+ return (0);
+}
+
+/*
+ * Write bytes to kernel address space for debugger.
+ */
+int
+db_write_bytes(vm_offset_t addr, size_t size, char *data)
+{
+ char *dst;
+
+ dst = (char *)addr;
+ while (size-- > 0) {
+ if (db_validate_address((vm_offset_t)dst)) {
+ db_printf("address %p is invalid\n", dst);
+ return (-1);
+ }
+ *dst++ = *data++;
+ }
+
+ dsb();
+ /* Clean D-cache and invalidate I-cache */
+ cpu_dcache_wb_range(addr, (vm_size_t)size);
+ cpu_icache_sync_range(addr, (vm_size_t)size);
+ dsb();
+ isb();
+
+ return (0);
+}
diff --git a/sys/arm64/arm64/db_trace.c b/sys/arm64/arm64/db_trace.c
new file mode 100644
index 0000000..1e89bac
--- /dev/null
+++ b/sys/arm64/arm64/db_trace.c
@@ -0,0 +1,152 @@
+/*-
+ * Copyright (c) 2015 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Semihalf under
+ * the sponsorship of the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/kdb.h>
+#include <machine/pcb.h>
+#include <ddb/ddb.h>
+#include <ddb/db_sym.h>
+
+#include <machine/armreg.h>
+#include <machine/debug_monitor.h>
+
+struct unwind_state {
+ uint64_t fp;
+ uint64_t sp;
+ uint64_t pc;
+};
+
+void
+db_md_list_watchpoints()
+{
+
+ dbg_show_watchpoint();
+}
+
+int
+db_md_clr_watchpoint(db_expr_t addr, db_expr_t size)
+{
+
+ return (dbg_remove_watchpoint(addr, size, DBG_FROM_EL1));
+}
+
+int
+db_md_set_watchpoint(db_expr_t addr, db_expr_t size)
+{
+
+ return (dbg_setup_watchpoint(addr, size, DBG_FROM_EL1,
+ HW_BREAKPOINT_RW));
+}
+
+static int
+db_unwind_frame(struct unwind_state *frame)
+{
+ uint64_t fp = frame->fp;
+
+ if (fp == 0)
+ return -1;
+
+ frame->sp = fp + 0x10;
+ /* FP to previous frame (X29) */
+ frame->fp = *(uint64_t *)(fp);
+ /* LR (X30) */
+ frame->pc = *(uint64_t *)(fp + 8) - 4;
+ return (0);
+}
+
+static void
+db_stack_trace_cmd(struct unwind_state *frame)
+{
+ c_db_sym_t sym;
+ const char *name;
+ db_expr_t value;
+ db_expr_t offset;
+
+ while (1) {
+ uint64_t pc = frame->pc;
+ int ret;
+
+ ret = db_unwind_frame(frame);
+ if (ret < 0)
+ break;
+
+ sym = db_search_symbol(pc, DB_STGY_ANY, &offset);
+ if (sym == C_DB_SYM_NULL) {
+ value = 0;
+ name = "(null)";
+ } else
+ db_symbol_values(sym, &name, &value);
+
+ db_printf("%s() at ", name);
+ db_printsym(frame->pc, DB_STGY_PROC);
+ db_printf("\n");
+
+ db_printf("\t pc = 0x%016lx lr = 0x%016lx\n", pc,
+ frame->pc);
+ db_printf("\t sp = 0x%016lx fp = 0x%016lx\n", frame->sp,
+ frame->fp);
+ /* TODO: Show some more registers */
+ db_printf("\n");
+ }
+}
+
+int
+db_trace_thread(struct thread *thr, int count)
+{
+ struct unwind_state frame;
+ struct pcb *ctx;
+
+ if (thr != curthread) {
+ ctx = kdb_thr_ctx(thr);
+
+ frame.sp = (uint64_t)ctx->pcb_sp;
+ frame.fp = (uint64_t)ctx->pcb_x[29];
+ frame.pc = (uint64_t)ctx->pcb_x[30];
+ db_stack_trace_cmd(&frame);
+ } else
+ db_trace_self();
+ return (0);
+}
+
+void
+db_trace_self(void)
+{
+ struct unwind_state frame;
+ uint64_t sp;
+
+ __asm __volatile("mov %0, sp" : "=&r" (sp));
+
+ frame.sp = sp;
+ frame.fp = (uint64_t)__builtin_frame_address(0);
+ frame.pc = (uint64_t)db_trace_self;
+ db_stack_trace_cmd(&frame);
+}
diff --git a/sys/arm64/arm64/debug_monitor.c b/sys/arm64/arm64/debug_monitor.c
new file mode 100644
index 0000000..50d663d
--- /dev/null
+++ b/sys/arm64/arm64/debug_monitor.c
@@ -0,0 +1,487 @@
+/*-
+ * Copyright (c) 2014 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Semihalf under
+ * the sponsorship of the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/kdb.h>
+#include <sys/pcpu.h>
+#include <sys/systm.h>
+
+#include <machine/armreg.h>
+#include <machine/cpu.h>
+#include <machine/debug_monitor.h>
+#include <machine/kdb.h>
+#include <machine/param.h>
+
+#include <ddb/ddb.h>
+#include <ddb/db_sym.h>
+
+enum dbg_t {
+ DBG_TYPE_BREAKPOINT = 0,
+ DBG_TYPE_WATCHPOINT = 1,
+};
+
+static int dbg_watchpoint_num;
+static int dbg_breakpoint_num;
+static int dbg_ref_count_mde[MAXCPU];
+static int dbg_ref_count_kde[MAXCPU];
+
+/* Watchpoints/breakpoints control register bitfields */
+#define DBG_WATCH_CTRL_LEN_1 (0x1 << 5)
+#define DBG_WATCH_CTRL_LEN_2 (0x3 << 5)
+#define DBG_WATCH_CTRL_LEN_4 (0xf << 5)
+#define DBG_WATCH_CTRL_LEN_8 (0xff << 5)
+#define DBG_WATCH_CTRL_LEN_MASK(x) ((x) & (0xff << 5))
+#define DBG_WATCH_CTRL_EXEC (0x0 << 3)
+#define DBG_WATCH_CTRL_LOAD (0x1 << 3)
+#define DBG_WATCH_CTRL_STORE (0x2 << 3)
+#define DBG_WATCH_CTRL_ACCESS_MASK(x) ((x) & (0x3 << 3))
+
+/* Common for breakpoint and watchpoint */
+#define DBG_WB_CTRL_EL1 (0x1 << 1)
+#define DBG_WB_CTRL_EL0 (0x2 << 1)
+#define DBG_WB_CTRL_ELX_MASK(x) ((x) & (0x3 << 1))
+#define DBG_WB_CTRL_E (0x1 << 0)
+
+#define DBG_REG_BASE_BVR 0
+#define DBG_REG_BASE_BCR (DBG_REG_BASE_BVR + 16)
+#define DBG_REG_BASE_WVR (DBG_REG_BASE_BCR + 16)
+#define DBG_REG_BASE_WCR (DBG_REG_BASE_WVR + 16)
+
+/* Watchpoint/breakpoint helpers */
+#define DBG_WB_WVR "wvr"
+#define DBG_WB_WCR "wcr"
+#define DBG_WB_BVR "bvr"
+#define DBG_WB_BCR "bcr"
+
+#define DBG_WB_READ(reg, num, val) do { \
+ __asm __volatile("mrs %0, dbg" reg #num "_el1" : "=r" (val)); \
+} while (0)
+
+#define DBG_WB_WRITE(reg, num, val) do { \
+ __asm __volatile("msr dbg" reg #num "_el1, %0" :: "r" (val)); \
+} while (0)
+
+#define READ_WB_REG_CASE(reg, num, offset, val) \
+ case (num + offset): \
+ DBG_WB_READ(reg, num, val); \
+ break
+
+#define WRITE_WB_REG_CASE(reg, num, offset, val) \
+ case (num + offset): \
+ DBG_WB_WRITE(reg, num, val); \
+ break
+
+#define SWITCH_CASES_READ_WB_REG(reg, offset, val) \
+ READ_WB_REG_CASE(reg, 0, offset, val); \
+ READ_WB_REG_CASE(reg, 1, offset, val); \
+ READ_WB_REG_CASE(reg, 2, offset, val); \
+ READ_WB_REG_CASE(reg, 3, offset, val); \
+ READ_WB_REG_CASE(reg, 4, offset, val); \
+ READ_WB_REG_CASE(reg, 5, offset, val); \
+ READ_WB_REG_CASE(reg, 6, offset, val); \
+ READ_WB_REG_CASE(reg, 7, offset, val); \
+ READ_WB_REG_CASE(reg, 8, offset, val); \
+ READ_WB_REG_CASE(reg, 9, offset, val); \
+ READ_WB_REG_CASE(reg, 10, offset, val); \
+ READ_WB_REG_CASE(reg, 11, offset, val); \
+ READ_WB_REG_CASE(reg, 12, offset, val); \
+ READ_WB_REG_CASE(reg, 13, offset, val); \
+ READ_WB_REG_CASE(reg, 14, offset, val); \
+ READ_WB_REG_CASE(reg, 15, offset, val)
+
+#define SWITCH_CASES_WRITE_WB_REG(reg, offset, val) \
+ WRITE_WB_REG_CASE(reg, 0, offset, val); \
+ WRITE_WB_REG_CASE(reg, 1, offset, val); \
+ WRITE_WB_REG_CASE(reg, 2, offset, val); \
+ WRITE_WB_REG_CASE(reg, 3, offset, val); \
+ WRITE_WB_REG_CASE(reg, 4, offset, val); \
+ WRITE_WB_REG_CASE(reg, 5, offset, val); \
+ WRITE_WB_REG_CASE(reg, 6, offset, val); \
+ WRITE_WB_REG_CASE(reg, 7, offset, val); \
+ WRITE_WB_REG_CASE(reg, 8, offset, val); \
+ WRITE_WB_REG_CASE(reg, 9, offset, val); \
+ WRITE_WB_REG_CASE(reg, 10, offset, val); \
+ WRITE_WB_REG_CASE(reg, 11, offset, val); \
+ WRITE_WB_REG_CASE(reg, 12, offset, val); \
+ WRITE_WB_REG_CASE(reg, 13, offset, val); \
+ WRITE_WB_REG_CASE(reg, 14, offset, val); \
+ WRITE_WB_REG_CASE(reg, 15, offset, val)
+
+static uint64_t
+dbg_wb_read_reg(int reg, int n)
+{
+ uint64_t val = 0;
+
+ switch (reg + n) {
+ SWITCH_CASES_READ_WB_REG(DBG_WB_WVR, DBG_REG_BASE_WVR, val);
+ SWITCH_CASES_READ_WB_REG(DBG_WB_WCR, DBG_REG_BASE_WCR, val);
+ SWITCH_CASES_READ_WB_REG(DBG_WB_BVR, DBG_REG_BASE_BVR, val);
+ SWITCH_CASES_READ_WB_REG(DBG_WB_BCR, DBG_REG_BASE_BCR, val);
+ default:
+ db_printf("trying to read from wrong debug register %d\n", n);
+ }
+
+ return val;
+}
+
+static void
+dbg_wb_write_reg(int reg, int n, uint64_t val)
+{
+ switch (reg + n) {
+ SWITCH_CASES_WRITE_WB_REG(DBG_WB_WVR, DBG_REG_BASE_WVR, val);
+ SWITCH_CASES_WRITE_WB_REG(DBG_WB_WCR, DBG_REG_BASE_WCR, val);
+ SWITCH_CASES_WRITE_WB_REG(DBG_WB_BVR, DBG_REG_BASE_BVR, val);
+ SWITCH_CASES_WRITE_WB_REG(DBG_WB_BCR, DBG_REG_BASE_BCR, val);
+ default:
+ db_printf("trying to write to wrong debug register %d\n", n);
+ }
+ isb();
+}
+
+void
+kdb_cpu_set_singlestep(void)
+{
+
+ kdb_frame->tf_spsr |= DBG_SPSR_SS;
+ WRITE_SPECIALREG(MDSCR_EL1, READ_SPECIALREG(MDSCR_EL1) |
+ DBG_MDSCR_SS | DBG_MDSCR_KDE);
+
+ /*
+ * Disable breakpoints and watchpoints, e.g. stepping
+ * over watched instruction will trigger break exception instead of
+ * single-step exception and locks CPU on that instruction for ever.
+ */
+ if (dbg_ref_count_mde[PCPU_GET(cpuid)] > 0) {
+ WRITE_SPECIALREG(MDSCR_EL1,
+ READ_SPECIALREG(MDSCR_EL1) & ~DBG_MDSCR_MDE);
+ }
+}
+
+void
+kdb_cpu_clear_singlestep(void)
+{
+
+ WRITE_SPECIALREG(MDSCR_EL1, READ_SPECIALREG(MDSCR_EL1) &
+ ~(DBG_MDSCR_SS | DBG_MDSCR_KDE));
+
+ /* Restore breakpoints and watchpoints */
+ if (dbg_ref_count_mde[PCPU_GET(cpuid)] > 0) {
+ WRITE_SPECIALREG(MDSCR_EL1,
+ READ_SPECIALREG(MDSCR_EL1) | DBG_MDSCR_MDE);
+ }
+
+ if (dbg_ref_count_kde[PCPU_GET(cpuid)] > 0) {
+ WRITE_SPECIALREG(MDSCR_EL1,
+ READ_SPECIALREG(MDSCR_EL1) | DBG_MDSCR_KDE);
+ }
+}
+
+static const char *
+dbg_watchtype_str(uint32_t type)
+{
+ switch (type) {
+ case DBG_WATCH_CTRL_EXEC:
+ return ("execute");
+ case DBG_WATCH_CTRL_STORE:
+ return ("write");
+ case DBG_WATCH_CTRL_LOAD:
+ return ("read");
+ case DBG_WATCH_CTRL_LOAD | DBG_WATCH_CTRL_STORE:
+ return ("read/write");
+ default:
+ return ("invalid");
+ }
+}
+
+static int
+dbg_watchtype_len(uint32_t len)
+{
+ switch (len) {
+ case DBG_WATCH_CTRL_LEN_1:
+ return (1);
+ case DBG_WATCH_CTRL_LEN_2:
+ return (2);
+ case DBG_WATCH_CTRL_LEN_4:
+ return (4);
+ case DBG_WATCH_CTRL_LEN_8:
+ return (8);
+ default:
+ return (0);
+ }
+}
+
+void
+dbg_show_watchpoint(void)
+{
+ uint32_t wcr, len, type;
+ uint64_t addr;
+ int i;
+
+ db_printf("\nhardware watchpoints:\n");
+ db_printf(" watch status type len address symbol\n");
+ db_printf(" ----- -------- ---------- --- ------------------ ------------------\n");
+ for (i = 0; i < dbg_watchpoint_num; i++) {
+ wcr = dbg_wb_read_reg(DBG_REG_BASE_WCR, i);
+ if ((wcr & DBG_WB_CTRL_E) != 0) {
+ type = DBG_WATCH_CTRL_ACCESS_MASK(wcr);
+ len = DBG_WATCH_CTRL_LEN_MASK(wcr);
+ addr = dbg_wb_read_reg(DBG_REG_BASE_WVR, i);
+ db_printf(" %-5d %-8s %10s %3d 0x%16lx ",
+ i, "enabled", dbg_watchtype_str(type),
+ dbg_watchtype_len(len), addr);
+ db_printsym((db_addr_t)addr, DB_STGY_ANY);
+ db_printf("\n");
+ } else {
+ db_printf(" %-5d disabled\n", i);
+ }
+ }
+}
+
+
+static int
+dbg_find_free_slot(enum dbg_t type)
+{
+ u_int max, reg, i;
+
+ switch(type) {
+ case DBG_TYPE_BREAKPOINT:
+ max = dbg_breakpoint_num;
+ reg = DBG_REG_BASE_BCR;
+
+ break;
+ case DBG_TYPE_WATCHPOINT:
+ max = dbg_watchpoint_num;
+ reg = DBG_REG_BASE_WCR;
+ break;
+ default:
+ db_printf("Unsupported debug type\n");
+ return (i);
+ }
+
+ for (i = 0; i < max; i++) {
+ if ((dbg_wb_read_reg(reg, i) & DBG_WB_CTRL_E) == 0)
+ return (i);
+ }
+
+ return (-1);
+}
+
+static int
+dbg_find_slot(enum dbg_t type, db_expr_t addr)
+{
+ u_int max, reg_addr, reg_ctrl, i;
+
+ switch(type) {
+ case DBG_TYPE_BREAKPOINT:
+ max = dbg_breakpoint_num;
+ reg_addr = DBG_REG_BASE_BVR;
+ reg_ctrl = DBG_REG_BASE_BCR;
+ break;
+ case DBG_TYPE_WATCHPOINT:
+ max = dbg_watchpoint_num;
+ reg_addr = DBG_REG_BASE_WVR;
+ reg_ctrl = DBG_REG_BASE_WCR;
+ break;
+ default:
+ db_printf("Unsupported debug type\n");
+ return (i);
+ }
+
+ for (i = 0; i < max; i++) {
+ if ((dbg_wb_read_reg(reg_addr, i) == addr) &&
+ ((dbg_wb_read_reg(reg_ctrl, i) & DBG_WB_CTRL_E) != 0))
+ return (i);
+ }
+
+ return (-1);
+}
+
+static void
+dbg_enable_monitor(enum dbg_el_t el)
+{
+ uint64_t reg_mdcr = 0;
+
+ /*
+ * There is no need to have debug monitor on permanently, thus we are
+ * refcounting and turn it on only if any of CPU is going to use that.
+ */
+ if (atomic_fetchadd_int(&dbg_ref_count_mde[PCPU_GET(cpuid)], 1) == 0)
+ reg_mdcr = DBG_MDSCR_MDE;
+
+ if ((el == DBG_FROM_EL1) &&
+ atomic_fetchadd_int(&dbg_ref_count_kde[PCPU_GET(cpuid)], 1) == 0)
+ reg_mdcr |= DBG_MDSCR_KDE;
+
+ if (reg_mdcr)
+ WRITE_SPECIALREG(MDSCR_EL1, READ_SPECIALREG(MDSCR_EL1) | reg_mdcr);
+}
+
+static void
+dbg_disable_monitor(enum dbg_el_t el)
+{
+ uint64_t reg_mdcr = 0;
+
+ if (atomic_fetchadd_int(&dbg_ref_count_mde[PCPU_GET(cpuid)], -1) == 1)
+ reg_mdcr = DBG_MDSCR_MDE;
+
+ if ((el == DBG_FROM_EL1) &&
+ atomic_fetchadd_int(&dbg_ref_count_kde[PCPU_GET(cpuid)], -1) == 1)
+ reg_mdcr |= DBG_MDSCR_KDE;
+
+ if (reg_mdcr)
+ WRITE_SPECIALREG(MDSCR_EL1, READ_SPECIALREG(MDSCR_EL1) & ~reg_mdcr);
+}
+
+int
+dbg_setup_watchpoint(db_expr_t addr, db_expr_t size, enum dbg_el_t el,
+ enum dbg_access_t access)
+{
+ uint64_t wcr_size, wcr_priv, wcr_access;
+ u_int i;
+
+ i = dbg_find_free_slot(DBG_TYPE_WATCHPOINT);
+ if (i == -1) {
+ db_printf("Can not find slot for watchpoint, max %d"
+ " watchpoints supported\n", dbg_watchpoint_num);
+ return (i);
+ }
+
+ switch(size) {
+ case 1:
+ wcr_size = DBG_WATCH_CTRL_LEN_1;
+ break;
+ case 2:
+ wcr_size = DBG_WATCH_CTRL_LEN_2;
+ break;
+ case 4:
+ wcr_size = DBG_WATCH_CTRL_LEN_4;
+ break;
+ case 8:
+ wcr_size = DBG_WATCH_CTRL_LEN_8;
+ break;
+ default:
+ db_printf("Unsupported address size for watchpoint\n");
+ return (-1);
+ }
+
+ switch(el) {
+ case DBG_FROM_EL0:
+ wcr_priv = DBG_WB_CTRL_EL0;
+ break;
+ case DBG_FROM_EL1:
+ wcr_priv = DBG_WB_CTRL_EL1;
+ break;
+ default:
+ db_printf("Unsupported exception level for watchpoint\n");
+ return (-1);
+ }
+
+ switch(access) {
+ case HW_BREAKPOINT_X:
+ wcr_access = DBG_WATCH_CTRL_EXEC;
+ break;
+ case HW_BREAKPOINT_R:
+ wcr_access = DBG_WATCH_CTRL_LOAD;
+ break;
+ case HW_BREAKPOINT_W:
+ wcr_access = DBG_WATCH_CTRL_STORE;
+ break;
+ case HW_BREAKPOINT_RW:
+ wcr_access = DBG_WATCH_CTRL_LOAD | DBG_WATCH_CTRL_STORE;
+ break;
+ default:
+ db_printf("Unsupported exception level for watchpoint\n");
+ return (-1);
+ }
+
+ dbg_wb_write_reg(DBG_REG_BASE_WVR, i, addr);
+ dbg_wb_write_reg(DBG_REG_BASE_WCR, i, wcr_size | wcr_access | wcr_priv |
+ DBG_WB_CTRL_E);
+ dbg_enable_monitor(el);
+ return (0);
+}
+
+int
+dbg_remove_watchpoint(db_expr_t addr, db_expr_t size, enum dbg_el_t el)
+{
+ u_int i;
+
+ i = dbg_find_slot(DBG_TYPE_WATCHPOINT, addr);
+ if (i == -1) {
+ db_printf("Can not find watchpoint for address 0%lx\n", addr);
+ return (i);
+ }
+
+ dbg_wb_write_reg(DBG_REG_BASE_WCR, i, 0);
+ dbg_disable_monitor(el);
+ return (0);
+}
+
+void
+dbg_monitor_init(void)
+{
+ u_int i;
+
+ /* Clear OS lock */
+ WRITE_SPECIALREG(OSLAR_EL1, 0);
+
+ /* Find out many breakpoints and watchpoints we can use */
+ dbg_watchpoint_num = ((READ_SPECIALREG(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
+ dbg_breakpoint_num = ((READ_SPECIALREG(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
+
+ if (bootverbose && PCPU_GET(cpuid) == 0) {
+ db_printf("%d watchpoints and %d breakpoints supported\n",
+ dbg_watchpoint_num, dbg_breakpoint_num);
+ }
+
+ /*
+ * We have limited number of {watch,break}points, each consists of
+ * two registers:
+ * - wcr/bcr regsiter configurates corresponding {watch,break}point
+ * behaviour
+ * - wvr/bvr register keeps address we are hunting for
+ *
+ * Reset all breakpoints and watchpoints.
+ */
+ for (i = 0; i < dbg_watchpoint_num; ++i) {
+ dbg_wb_write_reg(DBG_REG_BASE_WCR, i, 0);
+ dbg_wb_write_reg(DBG_REG_BASE_WVR, i, 0);
+ }
+
+ for (i = 0; i < dbg_breakpoint_num; ++i) {
+ dbg_wb_write_reg(DBG_REG_BASE_BCR, i, 0);
+ dbg_wb_write_reg(DBG_REG_BASE_BVR, i, 0);
+ }
+
+ dbg_enable();
+}
diff --git a/sys/arm64/arm64/nexus.c b/sys/arm64/arm64/nexus.c
index 15b18b5..bc38330 100644
--- a/sys/arm64/arm64/nexus.c
+++ b/sys/arm64/arm64/nexus.c
@@ -208,12 +208,12 @@ nexus_alloc_resource(device_t bus, device_t child, int type, int *rid,
break;
default:
- return (0);
+ return (NULL);
}
rv = rman_reserve_resource(rm, start, end, count, flags, child);
if (rv == 0)
- return (0);
+ return (NULL);
rman_set_rid(rv, *rid);
rman_set_bushandle(rv, rman_get_start(rv));
@@ -221,7 +221,7 @@ nexus_alloc_resource(device_t bus, device_t child, int type, int *rid,
if (needactivate) {
if (bus_activate_resource(child, type, *rid, rv)) {
rman_release_resource(rv);
- return (0);
+ return (NULL);
}
}
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 174b023..c192fa6 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -2936,6 +2936,18 @@ pmap_clear_modify(vm_page_t m)
/* TODO: We lack support for tracking if a page is modified */
}
+void *
+pmap_mapbios(vm_paddr_t pa, vm_size_t size)
+{
+
+ return ((void *)PHYS_TO_DMAP(pa));
+}
+
+void
+pmap_unmapbios(vm_paddr_t pa, vm_size_t size)
+{
+}
+
/*
* Sets the memory attribute for the specified page.
*/
diff --git a/sys/arm64/include/pmap.h b/sys/arm64/include/pmap.h
index e5401ef..63ae34c 100644
--- a/sys/arm64/include/pmap.h
+++ b/sys/arm64/include/pmap.h
@@ -142,7 +142,9 @@ void pmap_kremove(vm_offset_t);
void pmap_kremove_device(vm_offset_t, vm_size_t);
void *pmap_mapdev(vm_offset_t, vm_size_t);
+void *pmap_mapbios(vm_paddr_t, vm_size_t);
void pmap_unmapdev(vm_offset_t, vm_size_t);
+void pmap_unmapbios(vm_offset_t, vm_size_t);
boolean_t pmap_map_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
diff --git a/sys/boot/common/md.c b/sys/boot/common/md.c
index 6d2d2b4..a8f092b9 100644
--- a/sys/boot/common/md.c
+++ b/sys/boot/common/md.c
@@ -27,11 +27,11 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <stand.h>
#include <sys/param.h>
#include <sys/endian.h>
#include <sys/queue.h>
#include <machine/stdarg.h>
-#include <stand.h>
#include "bootstrap.h"
diff --git a/sys/boot/fdt/dts/arm/bcm2836.dtsi b/sys/boot/fdt/dts/arm/bcm2836.dtsi
index 7fdaecf..bd75c84 100644
--- a/sys/boot/fdt/dts/arm/bcm2836.dtsi
+++ b/sys/boot/fdt/dts/arm/bcm2836.dtsi
@@ -101,6 +101,12 @@
*/
};
+ watchdog0 {
+ compatible = "broadcom,bcm2835-wdt",
+ "broadcom,bcm2708-wdt";
+ reg = <0x10001c 0x0c>; /* 0x1c, 0x20, 0x24 */
+ };
+
gpio: gpio {
compatible = "broadcom,bcm2835-gpio",
"broadcom,bcm2708-gpio";
@@ -125,11 +131,11 @@
pinctrl-names = "default";
pinctrl-0 = <&pins_reserved>;
- /* Pins that can short 3.3V to GND in output mode: 46-47
+ /* Pins that can short 3.3V to GND in output mode: 46
* Pins used by VideoCore: 48-53
*/
- broadcom,read-only = <46>, <47>, <48>, <49>, <50>,
- <51>, <52>, <53>;
+ broadcom,read-only = <46>, <48>, <49>, <50>,
+ <51>, <52>, <53>;
/* BSC0 */
pins_bsc0_a: bsc0_a {
@@ -431,7 +437,7 @@
interrupts = <70>;
interrupt-parent = <&intc>;
- clock-frequency = <2500000000>; /* Set by VideoCore */
+ clock-frequency = <250000000>; /* Set by VideoCore */
};
uart0: uart0 {
diff --git a/sys/boot/fdt/dts/arm/rpi2.dts b/sys/boot/fdt/dts/arm/rpi2.dts
index 59974f1..1c8c559 100644
--- a/sys/boot/fdt/dts/arm/rpi2.dts
+++ b/sys/boot/fdt/dts/arm/rpi2.dts
@@ -322,18 +322,14 @@
leds {
compatible = "gpio-leds";
- ok {
- label = "ok";
- gpios = <&gpio 16 1>;
-
- /* Don't change this - it configures
- * how the led driver determines if
- * the led is on or off when it loads.
- */
- default-state = "keep";
-
- /* This is the real default state. */
- linux,default-trigger = "default-on";
+ pwr {
+ label = "pwr";
+ gpios = <&gpio 35 0>;
+ };
+
+ act {
+ label = "act";
+ gpios = <&gpio 47 0>;
};
};
diff --git a/sys/boot/forth/loader.conf b/sys/boot/forth/loader.conf
index 275a58b..240e403 100644
--- a/sys/boot/forth/loader.conf
+++ b/sys/boot/forth/loader.conf
@@ -48,6 +48,16 @@ entropy_cache_type="/boot/entropy"
#kern.random.sys.seeded="0" # Set this to 1 to start /dev/random
# without waiting for a (re)seed.
+##############################################################
+### RAM Blacklist configuration #############################
+##############################################################
+
+ram_blacklist_load="NO" # Set this to YES to load a file
+ # containing a list of addresses to
+ # exclude from the running system.
+ram_blacklist_name="/boot/blacklist.txt" # Set this to the name of the file
+ram_blacklist_type="ram_blacklist" # Required for the kernel to find
+ # the blacklist module
##############################################################
### Loader settings ########################################
diff --git a/sys/boot/i386/common/edd.h b/sys/boot/i386/common/edd.h
index 4a204f7..7d1f450 100644
--- a/sys/boot/i386/common/edd.h
+++ b/sys/boot/i386/common/edd.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2011 Advanced Computing Technologies LLC
+ * Copyright (c) 2011 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
*
diff --git a/sys/boot/libstand32/Makefile b/sys/boot/libstand32/Makefile
index a08a8e9..c41dc21 100644
--- a/sys/boot/libstand32/Makefile
+++ b/sys/boot/libstand32/Makefile
@@ -11,9 +11,10 @@ MAN=
.include <src.opts.mk>
MK_SSP= no
-S= ${.CURDIR}/../../../lib/libstand
+LIBSTAND_SRC= ${.CURDIR}/../../../lib/libstand
+LIBC_SRC= ${LIBSTAND_SRC}/../libc
-.PATH: ${S}
+.PATH: ${LIBSTAND_SRC}
LIB= stand
INTERNALLIB=
MK_PROFILE= no
@@ -22,7 +23,7 @@ NO_PIC=
WARNS?= 0
CFLAGS+= -ffreestanding -Wformat
-CFLAGS+= -I${S}
+CFLAGS+= -I${LIBSTAND_SRC}
.if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64"
CFLAGS.gcc+= -mpreferred-stack-boundary=2
@@ -52,56 +53,54 @@ SRCS+= gzguts.h zutil.h __main.c assert.c bcd.c bswap.c environment.c getopt.c g
# private (pruned) versions of libc string functions
SRCS+= strcasecmp.c
-LIBC= ${S}/../libc
-
-.PATH: ${LIBC}/net
+.PATH: ${LIBC_SRC}/net
SRCS+= ntoh.c
# string functions from libc
-.PATH: ${LIBC}/string
+.PATH: ${LIBC_SRC}/string
SRCS+= bcmp.c bcopy.c bzero.c ffs.c memccpy.c memchr.c memcmp.c memcpy.c \
memmove.c memset.c qdivrem.c strcat.c strchr.c strcmp.c strcpy.c \
strcspn.c strlen.c strncat.c strncmp.c strncpy.c strpbrk.c \
strrchr.c strsep.c strspn.c strstr.c strtok.c swab.c
.if ${MACHINE_CPUARCH} == "arm"
-.PATH: ${LIBC}/arm/gen
+.PATH: ${LIBC_SRC}/arm/gen
# Compiler support functions
-.PATH: ${.CURDIR}/../../../contrib/compiler-rt/lib/builtins/
+.PATH: ${LIBSTAND_SRC}/../../contrib/compiler-rt/lib/builtins/
# __clzsi2 and ctzsi2 for various builtin functions
SRCS+= clzsi2.c ctzsi2.c
# Divide and modulus functions called by the compiler
SRCS+= divmoddi4.c divmodsi4.c divdi3.c divsi3.c moddi3.c modsi3.c
SRCS+= udivmoddi4.c udivmodsi4.c udivdi3.c udivsi3.c umoddi3.c umodsi3.c
-.PATH: ${.CURDIR}/../../../contrib/compiler-rt/lib/builtins/arm/
+.PATH: ${LIBSTAND_SRC}/../../contrib/compiler-rt/lib/builtins/arm/
SRCS+= aeabi_idivmod.S aeabi_ldivmod.S aeabi_uidivmod.S aeabi_uldivmod.S
SRCS+= aeabi_memcmp.S aeabi_memcpy.S aeabi_memmove.S aeabi_memset.S
.endif
.if ${MACHINE_CPUARCH} == "powerpc"
-.PATH: ${LIBC}/quad
+.PATH: ${LIBC_SRC}/quad
SRCS+= ashldi3.c ashrdi3.c
SRCS+= syncicache.c
.endif
# uuid functions from libc
-.PATH: ${LIBC}/uuid
+.PATH: ${LIBC_SRC}/uuid
SRCS+= uuid_equal.c uuid_is_nil.c
# _setjmp/_longjmp
.if ${MACHINE_CPUARCH} == "amd64"
-.PATH: ${S}/i386
+.PATH: ${LIBSTAND_SRC}/i386
.else
-.PATH: ${S}/${MACHINE_CPUARCH}
+.PATH: ${LIBSTAND_SRC}/${MACHINE_CPUARCH}
.endif
SRCS+= _setjmp.S
# decompression functionality from libbz2
# NOTE: to actually test this functionality after libbz2 upgrade compile
# loader(8) with LOADER_BZIP2_SUPPORT defined
-.PATH: ${.CURDIR}/../../../contrib/bzip2
+.PATH: ${LIBSTAND_SRC}/../../contrib/bzip2
CFLAGS+= -DBZ_NO_STDIO -DBZ_NO_COMPRESS
SRCS+= libstand_bzlib_private.h
@@ -110,7 +109,8 @@ SRCS+= _${file}
CLEANFILES+= _${file}
_${file}: ${file}
- sed "s|bzlib_private\.h|libstand_bzlib_private.h|" ${.ALLSRC} > ${.TARGET}
+ sed "s|bzlib_private\.h|libstand_bzlib_private.h|" \
+ ${.ALLSRC} > ${.TARGET}
.endfor
CLEANFILES+= libstand_bzlib_private.h
@@ -119,8 +119,8 @@ libstand_bzlib_private.h: bzlib_private.h
${.ALLSRC} > ${.TARGET}
# decompression functionality from libz
-.PATH: ${S}/../libz
-CFLAGS+=-DHAVE_MEMCPY -I${S}/../libz
+.PATH: ${LIBSTAND_SRC}/../libz
+CFLAGS+=-DHAVE_MEMCPY -I${LIBSTAND_SRC}/../libz
SRCS+= adler32.c crc32.c libstand_zutil.h libstand_gzguts.h
.for file in infback.c inffast.c inflate.c inftrees.c zutil.c
diff --git a/sys/boot/userboot/libstand/Makefile b/sys/boot/userboot/libstand/Makefile
index 35903fc..88eb98f 100644
--- a/sys/boot/userboot/libstand/Makefile
+++ b/sys/boot/userboot/libstand/Makefile
@@ -11,9 +11,10 @@ MAN=
.include <bsd.own.mk>
MK_SSP= no
-S= ${.CURDIR}/../../../../lib/libstand
+LIBSTAND_SRC= ${.CURDIR}/../../../../lib/libstand
+LIBC_SRC= ${LIBSTAND_SRC}/../libc
-.PATH: ${S}
+.PATH: ${LIBSTAND_SRC}
LIB= stand
INTERNALLIB=
MK_PROFILE= no
@@ -22,7 +23,7 @@ NO_PIC=
WARNS?= 0
CFLAGS+= -ffreestanding -Wformat -fPIC
-CFLAGS+= -I${.CURDIR}/../../../../lib/libstand
+CFLAGS+= -I${LIBSTAND_SRC}
.if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64"
CFLAGS+= -mno-mmx -mno-3dnow -mno-sse -mno-sse2
@@ -49,14 +50,12 @@ SRCS+= gzguts.h zutil.h __main.c assert.c bcd.c bswap.c environment.c getopt.c g
# private (pruned) versions of libc string functions
SRCS+= strcasecmp.c
-LIBC= ${.CURDIR}/../../../../lib/libc
-
-.PATH: ${LIBC}/net
+.PATH: ${LIBC_SRC}/net
SRCS+= ntoh.c
# string functions from libc
-.PATH: ${LIBC}/string
+.PATH: ${LIBC_SRC}/string
.if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "powerpc" || \
${MACHINE_CPUARCH} == "sparc64" || ${MACHINE_CPUARCH} == "amd64" || \
${MACHINE_CPUARCH} == "arm"
@@ -66,34 +65,34 @@ SRCS+= bcmp.c bcopy.c bzero.c ffs.c memccpy.c memchr.c memcmp.c memcpy.c \
strrchr.c strsep.c strspn.c strstr.c strtok.c swab.c
.endif
.if ${MACHINE_CPUARCH} == "arm"
-.PATH: ${LIBC}/arm/gen
+.PATH: ${LIBC_SRC}/arm/gen
SRCS+= divsi3.S
.endif
.if ${MACHINE_CPUARCH} == "powerpc"
-.PATH: ${LIBC}/libc/quad
+.PATH: ${LIBC_SRC}/quad
SRCS+= ashldi3.c ashrdi3.c
-.PATH: ${LIBC}/powerpc/gen
+.PATH: ${LIBC_SRC}/powerpc/gen
SRCS+= syncicache.c
.endif
# uuid functions from libc
-.PATH: ${LIBC}/uuid
+.PATH: ${LIBC_SRC}/uuid
SRCS+= uuid_equal.c uuid_is_nil.c
# _setjmp/_longjmp
.if ${MACHINE_CPUARCH} == "amd64"
-.PATH: ${S}/amd64
+.PATH: ${LIBSTAND_SRC}/amd64
.elif ${MACHINE_ARCH} == "powerpc64"
-.PATH: ${S}/powerpc
+.PATH: ${LIBSTAND_SRC}/powerpc
.else
-.PATH: ${S}/${MACHINE_CPUARCH}
+.PATH: ${LIBSTAND_SRC}/${MACHINE_CPUARCH}
.endif
SRCS+= _setjmp.S
# decompression functionality from libbz2
# NOTE: to actually test this functionality after libbz2 upgrade compile
# loader(8) with LOADER_BZIP2_SUPPORT defined
-.PATH: ${.CURDIR}/../../../../contrib/bzip2
+.PATH: ${LIBSTAND_SRC}/../../contrib/bzip2
CFLAGS+= -DBZ_NO_STDIO -DBZ_NO_COMPRESS
SRCS+= libstand_bzlib_private.h
@@ -102,7 +101,8 @@ SRCS+= _${file}
CLEANFILES+= _${file}
_${file}: ${file}
- sed "s|bzlib_private\.h|libstand_bzlib_private.h|" ${.ALLSRC} > ${.TARGET}
+ sed "s|bzlib_private\.h|libstand_bzlib_private.h|" \
+ ${.ALLSRC} > ${.TARGET}
.endfor
CLEANFILES+= libstand_bzlib_private.h
@@ -111,8 +111,8 @@ libstand_bzlib_private.h: bzlib_private.h
${.ALLSRC} > ${.TARGET}
# decompression functionality from libz
-.PATH: ${.CURDIR}/../../../../lib/libz
-CFLAGS+=-DHAVE_MEMCPY -I${.CURDIR}/../../../../lib/libz
+.PATH: ${LIBSTAND_SRC}/../libz
+CFLAGS+=-DHAVE_MEMCPY -I${LIBSTAND_SRC}/../libz
SRCS+= adler32.c crc32.c libstand_zutil.h libstand_gzguts.h
.for file in infback.c inffast.c inflate.c inftrees.c zutil.c
@@ -121,7 +121,8 @@ CLEANFILES+= _${file}
_${file}: ${file}
sed -e "s|zutil\.h|libstand_zutil.h|" \
- -e "s|gzguts\.h|libstand_gzguts.h|" ${.ALLSRC} > ${.TARGET}
+ -e "s|gzguts\.h|libstand_gzguts.h|" \
+ ${.ALLSRC} > ${.TARGET}
.endfor
# depend on stand.h being able to be included multiple times
diff --git a/sys/cddl/contrib/opensolaris/common/nvpair/nvpair.c b/sys/cddl/contrib/opensolaris/common/nvpair/nvpair.c
index 53ed5f2..cd6af4e 100644
--- a/sys/cddl/contrib/opensolaris/common/nvpair/nvpair.c
+++ b/sys/cddl/contrib/opensolaris/common/nvpair/nvpair.c
@@ -1227,6 +1227,7 @@ nvpair_type_is_array(nvpair_t *nvp)
data_type_t type = NVP_TYPE(nvp);
if ((type == DATA_TYPE_BYTE_ARRAY) ||
+ (type == DATA_TYPE_INT8_ARRAY) ||
(type == DATA_TYPE_UINT8_ARRAY) ||
(type == DATA_TYPE_INT16_ARRAY) ||
(type == DATA_TYPE_UINT16_ARRAY) ||
diff --git a/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c b/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c
index dc7c283..b31e8bb 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c
@@ -611,7 +611,11 @@ dtrace_panic(const char *format, ...)
va_list alist;
va_start(alist, format);
+#ifdef __FreeBSD__
+ vpanic(format, alist);
+#else
dtrace_vpanic(format, alist);
+#endif
va_end(alist);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
index 36ac27a..fb9c8a1 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
@@ -23,6 +23,7 @@
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -879,11 +880,7 @@ dmu_objset_clone_check(void *arg, dmu_tx_t *tx)
dsl_dir_rele(pdd, FTAG);
return (SET_ERROR(EEXIST));
}
- /* You can't clone across pools. */
- if (pdd->dd_pool != dp) {
- dsl_dir_rele(pdd, FTAG);
- return (SET_ERROR(EXDEV));
- }
+
error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL,
doca->doca_cred);
if (error != 0) {
@@ -896,12 +893,6 @@ dmu_objset_clone_check(void *arg, dmu_tx_t *tx)
if (error != 0)
return (error);
- /* You can't clone across pools. */
- if (origin->ds_dir->dd_pool != dp) {
- dsl_dataset_rele(origin, FTAG);
- return (SET_ERROR(EXDEV));
- }
-
/* You can only clone snapshots, not the head datasets. */
if (!dsl_dataset_is_snapshot(origin)) {
dsl_dataset_rele(origin, FTAG);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
index 7b1d6be..faa6d0c 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
@@ -306,21 +306,22 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
arc_flags_t flags = ARC_FLAG_WAIT;
int i;
int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
+ dnode_phys_t *cdnp;
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err != 0)
goto post;
- dnp = buf->b_data;
+ cdnp = buf->b_data;
for (i = 0; i < epb; i++) {
- prefetch_dnode_metadata(td, &dnp[i], zb->zb_objset,
+ prefetch_dnode_metadata(td, &cdnp[i], zb->zb_objset,
zb->zb_blkid * epb + i);
}
/* recursively visitbp() blocks below this */
for (i = 0; i < epb; i++) {
- err = traverse_dnode(td, &dnp[i], zb->zb_objset,
+ err = traverse_dnode(td, &cdnp[i], zb->zb_objset,
zb->zb_blkid * epb + i);
if (err != 0)
break;
@@ -328,7 +329,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
arc_flags_t flags = ARC_FLAG_WAIT;
objset_phys_t *osp;
- dnode_phys_t *dnp;
+ dnode_phys_t *mdnp, *gdnp, *udnp;
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
@@ -336,26 +337,27 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
goto post;
osp = buf->b_data;
- dnp = &osp->os_meta_dnode;
- prefetch_dnode_metadata(td, dnp, zb->zb_objset,
+ mdnp = &osp->os_meta_dnode;
+ gdnp = &osp->os_groupused_dnode;
+ udnp = &osp->os_userused_dnode;
+
+ prefetch_dnode_metadata(td, mdnp, zb->zb_objset,
DMU_META_DNODE_OBJECT);
if (arc_buf_size(buf) >= sizeof (objset_phys_t)) {
- prefetch_dnode_metadata(td, &osp->os_groupused_dnode,
- zb->zb_objset, DMU_GROUPUSED_OBJECT);
- prefetch_dnode_metadata(td, &osp->os_userused_dnode,
- zb->zb_objset, DMU_USERUSED_OBJECT);
+ prefetch_dnode_metadata(td, gdnp, zb->zb_objset,
+ DMU_GROUPUSED_OBJECT);
+ prefetch_dnode_metadata(td, udnp, zb->zb_objset,
+ DMU_USERUSED_OBJECT);
}
- err = traverse_dnode(td, dnp, zb->zb_objset,
+ err = traverse_dnode(td, mdnp, zb->zb_objset,
DMU_META_DNODE_OBJECT);
if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
- dnp = &osp->os_groupused_dnode;
- err = traverse_dnode(td, dnp, zb->zb_objset,
+ err = traverse_dnode(td, gdnp, zb->zb_objset,
DMU_GROUPUSED_OBJECT);
}
if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
- dnp = &osp->os_userused_dnode;
- err = traverse_dnode(td, dnp, zb->zb_objset,
+ err = traverse_dnode(td, udnp, zb->zb_objset,
DMU_USERUSED_OBJECT);
}
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
index ce22f4e..58db4dd 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
@@ -413,7 +413,8 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
offsetof(dmu_sendarg_t, dsa_link));
if (doi.doi_type == DMU_OTN_ZAP_METADATA) {
- int zaperr = zap_contains(mos, dsobj, DS_FIELD_LARGE_BLOCKS);
+ int zaperr = zap_contains(mos, dsobj,
+ DS_FIELD_LARGE_BLOCKS);
if (zaperr != ENOENT) {
VERIFY0(zaperr);
ds->ds_large_blocks = B_TRUE;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
index 1862f8c..26f5c2d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
@@ -24,6 +24,7 @@
* All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/dmu.h>
@@ -408,7 +409,7 @@ dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
/* Make sure the name is in the specified pool. */
spaname = spa_name(dp->dp_spa);
if (strcmp(buf, spaname) != 0)
- return (SET_ERROR(EINVAL));
+ return (SET_ERROR(EXDEV));
ASSERT(dsl_pool_config_held(dp));
@@ -1706,7 +1707,7 @@ dsl_dir_rename_check(void *arg, dmu_tx_t *tx)
if (dd->dd_pool != newparent->dd_pool) {
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
- return (SET_ERROR(ENXIO));
+ return (SET_ERROR(EXDEV));
}
/* new name should not already exist */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c
index 6a90b9c..fe02399 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c
@@ -137,7 +137,7 @@ zfs_onexit_fd_hold(int fd, minor_t *minorp)
*minorp = (minor_t)(uintptr_t)data;
curthread->td_fpop = tmpfp;
if (error != 0)
- return (error);
+ return (SET_ERROR(EBADF));
return (zfs_onexit_minor_to_state(*minorp, &zo));
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
index d925712..e3b314f 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
@@ -1320,15 +1320,25 @@ zfs_rezget(znode_t *zp)
}
/*
- * XXXPJD: Not sure how is that possible, but under heavy
- * zfs recv -F load it happens that z_gen is the same, but
- * vnode type is different than znode type. This would mean
- * that for example regular file was replaced with directory
- * which has the same object number.
+ * It is highly improbable but still quite possible that two
+ * objects in different datasets are created with the same
+ * object numbers and in transaction groups with the same
+ * numbers. znodes corresponding to those objects would
+ * have the same z_id and z_gen, but their other attributes
+ * may be different.
+ * zfs recv -F may replace one of such objects with the other.
+ * As a result file properties recorded in the replaced
+ * object's vnode may no longer match the received object's
+ * properties. At present the only cached property is the
+ * files type recorded in v_type.
+ * So, handle this case by leaving the old vnode and znode
+ * disassociated from the actual object. A new vnode and a
+ * znode will be created if the object is accessed
+ * (e.g. via a look-up). The old vnode and znode will be
+ * recycled when the last vnode reference is dropped.
*/
vp = ZTOV(zp);
- if (vp != NULL &&
- vp->v_type != IFTOVT((mode_t)zp->z_mode)) {
+ if (vp != NULL && vp->v_type != IFTOVT((mode_t)zp->z_mode)) {
zfs_znode_dmu_fini(zp);
ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
return (EIO);
diff --git a/sys/cddl/dev/dtrace/amd64/dtrace_asm.S b/sys/cddl/dev/dtrace/amd64/dtrace_asm.S
index cf8314c..a6c079e 100644
--- a/sys/cddl/dev/dtrace/amd64/dtrace_asm.S
+++ b/sys/cddl/dev/dtrace/amd64/dtrace_asm.S
@@ -363,211 +363,3 @@ dtrace_interrupt_enable(dtrace_icookie_t cookie)
popfq
ret
END(dtrace_interrupt_enable)
-
-/*
- * The panic() and cmn_err() functions invoke vpanic() as a common entry point
- * into the panic code implemented in panicsys(). vpanic() is responsible
- * for passing through the format string and arguments, and constructing a
- * regs structure on the stack into which it saves the current register
- * values. If we are not dying due to a fatal trap, these registers will
- * then be preserved in panicbuf as the current processor state. Before
- * invoking panicsys(), vpanic() activates the first panic trigger (see
- * common/os/panic.c) and switches to the panic_stack if successful. Note that
- * DTrace takes a slightly different panic path if it must panic from probe
- * context. Instead of calling panic, it calls into dtrace_vpanic(), which
- * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and
- * branches back into vpanic().
- */
-
-/*
-void
-vpanic(const char *format, va_list alist)
-*/
- ENTRY(vpanic) /* Initial stack layout: */
-
- pushq %rbp /* | %rip | 0x60 */
- movq %rsp, %rbp /* | %rbp | 0x58 */
- pushfq /* | rfl | 0x50 */
- pushq %r11 /* | %r11 | 0x48 */
- pushq %r10 /* | %r10 | 0x40 */
- pushq %rbx /* | %rbx | 0x38 */
- pushq %rax /* | %rax | 0x30 */
- pushq %r9 /* | %r9 | 0x28 */
- pushq %r8 /* | %r8 | 0x20 */
- pushq %rcx /* | %rcx | 0x18 */
- pushq %rdx /* | %rdx | 0x10 */
- pushq %rsi /* | %rsi | 0x8 alist */
- pushq %rdi /* | %rdi | 0x0 format */
-
- movq %rsp, %rbx /* %rbx = current %rsp */
-
- leaq panic_quiesce(%rip), %rdi /* %rdi = &panic_quiesce */
- call panic_trigger /* %eax = panic_trigger() */
-
-vpanic_common:
- /*
- * The panic_trigger result is in %eax from the call above, and
- * dtrace_panic places it in %eax before branching here.
- * The rdmsr instructions that follow below will clobber %eax so
- * we stash the panic_trigger result in %r11d.
- */
- movl %eax, %r11d
- cmpl $0, %r11d
- je 0f
-
- /*
- * If panic_trigger() was successful, we are the first to initiate a
- * panic: we now switch to the reserved panic_stack before continuing.
- */
- leaq panic_stack(%rip), %rsp
- addq $PANICSTKSIZE, %rsp
-0: subq $REGSIZE, %rsp
- /*
- * Now that we've got everything set up, store the register values as
- * they were when we entered vpanic() to the designated location in
- * the regs structure we allocated on the stack.
- */
-#ifdef notyet
- movq 0x0(%rbx), %rcx
- movq %rcx, REGOFF_RDI(%rsp)
- movq 0x8(%rbx), %rcx
- movq %rcx, REGOFF_RSI(%rsp)
- movq 0x10(%rbx), %rcx
- movq %rcx, REGOFF_RDX(%rsp)
- movq 0x18(%rbx), %rcx
- movq %rcx, REGOFF_RCX(%rsp)
- movq 0x20(%rbx), %rcx
-
- movq %rcx, REGOFF_R8(%rsp)
- movq 0x28(%rbx), %rcx
- movq %rcx, REGOFF_R9(%rsp)
- movq 0x30(%rbx), %rcx
- movq %rcx, REGOFF_RAX(%rsp)
- movq 0x38(%rbx), %rcx
- movq %rcx, REGOFF_RBX(%rsp)
- movq 0x58(%rbx), %rcx
-
- movq %rcx, REGOFF_RBP(%rsp)
- movq 0x40(%rbx), %rcx
- movq %rcx, REGOFF_R10(%rsp)
- movq 0x48(%rbx), %rcx
- movq %rcx, REGOFF_R11(%rsp)
- movq %r12, REGOFF_R12(%rsp)
-
- movq %r13, REGOFF_R13(%rsp)
- movq %r14, REGOFF_R14(%rsp)
- movq %r15, REGOFF_R15(%rsp)
-
- xorl %ecx, %ecx
- movw %ds, %cx
- movq %rcx, REGOFF_DS(%rsp)
- movw %es, %cx
- movq %rcx, REGOFF_ES(%rsp)
- movw %fs, %cx
- movq %rcx, REGOFF_FS(%rsp)
- movw %gs, %cx
- movq %rcx, REGOFF_GS(%rsp)
-
- movq $0, REGOFF_TRAPNO(%rsp)
-
- movq $0, REGOFF_ERR(%rsp)
- leaq vpanic(%rip), %rcx
- movq %rcx, REGOFF_RIP(%rsp)
- movw %cs, %cx
- movzwq %cx, %rcx
- movq %rcx, REGOFF_CS(%rsp)
- movq 0x50(%rbx), %rcx
- movq %rcx, REGOFF_RFL(%rsp)
- movq %rbx, %rcx
- addq $0x60, %rcx
- movq %rcx, REGOFF_RSP(%rsp)
- movw %ss, %cx
- movzwq %cx, %rcx
- movq %rcx, REGOFF_SS(%rsp)
-
- /*
- * panicsys(format, alist, rp, on_panic_stack)
- */
- movq REGOFF_RDI(%rsp), %rdi /* format */
- movq REGOFF_RSI(%rsp), %rsi /* alist */
- movq %rsp, %rdx /* struct regs */
- movl %r11d, %ecx /* on_panic_stack */
- call panicsys
- addq $REGSIZE, %rsp
-#endif
- popq %rdi
- popq %rsi
- popq %rdx
- popq %rcx
- popq %r8
- popq %r9
- popq %rax
- popq %rbx
- popq %r10
- popq %r11
- popfq
- leave
- ret
- END(vpanic)
-
-/*
-void
-dtrace_vpanic(const char *format, va_list alist)
-*/
- ENTRY(dtrace_vpanic) /* Initial stack layout: */
-
- pushq %rbp /* | %rip | 0x60 */
- movq %rsp, %rbp /* | %rbp | 0x58 */
- pushfq /* | rfl | 0x50 */
- pushq %r11 /* | %r11 | 0x48 */
- pushq %r10 /* | %r10 | 0x40 */
- pushq %rbx /* | %rbx | 0x38 */
- pushq %rax /* | %rax | 0x30 */
- pushq %r9 /* | %r9 | 0x28 */
- pushq %r8 /* | %r8 | 0x20 */
- pushq %rcx /* | %rcx | 0x18 */
- pushq %rdx /* | %rdx | 0x10 */
- pushq %rsi /* | %rsi | 0x8 alist */
- pushq %rdi /* | %rdi | 0x0 format */
-
- movq %rsp, %rbx /* %rbx = current %rsp */
-
- leaq panic_quiesce(%rip), %rdi /* %rdi = &panic_quiesce */
- call dtrace_panic_trigger /* %eax = dtrace_panic_trigger() */
- jmp vpanic_common
-
- END(dtrace_vpanic)
-
-/*
-int
-panic_trigger(int *tp)
-*/
- ENTRY(panic_trigger)
- xorl %eax, %eax
- movl $0xdefacedd, %edx
- lock
- xchgl %edx, (%rdi)
- cmpl $0, %edx
- je 0f
- movl $0, %eax
- ret
-0: movl $1, %eax
- ret
- END(panic_trigger)
-
-/*
-int
-dtrace_panic_trigger(int *tp)
-*/
- ENTRY(dtrace_panic_trigger)
- xorl %eax, %eax
- movl $0xdefacedd, %edx
- lock
- xchgl %edx, (%rdi)
- cmpl $0, %edx
- je 0f
- movl $0, %eax
- ret
-0: movl $1, %eax
- ret
- END(dtrace_panic_trigger)
diff --git a/sys/cddl/dev/dtrace/arm/dtrace_asm.S b/sys/cddl/dev/dtrace/arm/dtrace_asm.S
index ce27b14..06e91d2 100644
--- a/sys/cddl/dev/dtrace/arm/dtrace_asm.S
+++ b/sys/cddl/dev/dtrace/arm/dtrace_asm.S
@@ -170,24 +170,6 @@ ENTRY(dtrace_copystr)
END(dtrace_copystr)
/*
-void
-vpanic(const char *format, va_list alist)
-*/
-ENTRY(vpanic) /* Initial stack layout: */
-vpanic_common:
- RET
-END(vpanic)
-
-/*
-void
-dtrace_vpanic(const char *format, va_list alist)
-*/
-ENTRY(dtrace_vpanic) /* Initial stack layout: */
- b vpanic
- RET
-END(dtrace_vpanic) /* Initial stack layout: */
-
-/*
uintptr_t
dtrace_caller(int aframes)
*/
diff --git a/sys/cddl/dev/dtrace/dtrace_hacks.c b/sys/cddl/dev/dtrace/dtrace_hacks.c
index 21da9f8..3f89973 100644
--- a/sys/cddl/dev/dtrace/dtrace_hacks.c
+++ b/sys/cddl/dev/dtrace/dtrace_hacks.c
@@ -3,9 +3,6 @@
dtrace_cacheid_t dtrace_predcache_id;
-int panic_quiesce;
-char panic_stack[PANICSTKSIZE];
-
boolean_t
priv_policy_only(const cred_t *a, int b, boolean_t c)
{
diff --git a/sys/cddl/dev/dtrace/i386/dtrace_asm.S b/sys/cddl/dev/dtrace/i386/dtrace_asm.S
index 6338719..d44f6c3 100644
--- a/sys/cddl/dev/dtrace/i386/dtrace_asm.S
+++ b/sys/cddl/dev/dtrace/i386/dtrace_asm.S
@@ -355,167 +355,3 @@ void dtrace_interrupt_enable(dtrace_icookie_t cookie)
popfl
ret
END(dtrace_interrupt_enable)
-
-/*
- * The panic() and cmn_err() functions invoke vpanic() as a common entry point
- * into the panic code implemented in panicsys(). vpanic() is responsible
- * for passing through the format string and arguments, and constructing a
- * regs structure on the stack into which it saves the current register
- * values. If we are not dying due to a fatal trap, these registers will
- * then be preserved in panicbuf as the current processor state. Before
- * invoking panicsys(), vpanic() activates the first panic trigger (see
- * common/os/panic.c) and switches to the panic_stack if successful. Note that
- * DTrace takes a slightly different panic path if it must panic from probe
- * context. Instead of calling panic, it calls into dtrace_vpanic(), which
- * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and
- * branches back into vpanic().
- */
-/*
-void vpanic(const char *format, va_list alist)
-*/
- ENTRY(vpanic) /* Initial stack layout: */
-
- pushl %ebp /* | %eip | 20 */
- movl %esp, %ebp /* | %ebp | 16 */
- pushl %eax /* | %eax | 12 */
- pushl %ebx /* | %ebx | 8 */
- pushl %ecx /* | %ecx | 4 */
- pushl %edx /* | %edx | 0 */
-
- movl %esp, %ebx /* %ebx = current stack pointer */
-
- lea panic_quiesce, %eax /* %eax = &panic_quiesce */
- pushl %eax /* push &panic_quiesce */
- call panic_trigger /* %eax = panic_trigger() */
- addl $4, %esp /* reset stack pointer */
-
-vpanic_common:
- cmpl $0, %eax /* if (%eax == 0) */
- je 0f /* goto 0f; */
-
- /*
- * If panic_trigger() was successful, we are the first to initiate a
- * panic: we now switch to the reserved panic_stack before continuing.
- */
- lea panic_stack, %esp /* %esp = panic_stack */
- addl $PANICSTKSIZE, %esp /* %esp += PANICSTKSIZE */
-
-0: subl $REGSIZE, %esp /* allocate struct regs */
-
- /*
- * Now that we've got everything set up, store the register values as
- * they were when we entered vpanic() to the designated location in
- * the regs structure we allocated on the stack.
- */
-#ifdef notyet
- mov %gs, %edx
- mov %edx, REGOFF_GS(%esp)
- mov %fs, %edx
- mov %edx, REGOFF_FS(%esp)
- mov %es, %edx
- mov %edx, REGOFF_ES(%esp)
- mov %ds, %edx
- mov %edx, REGOFF_DS(%esp)
- movl %edi, REGOFF_EDI(%esp)
- movl %esi, REGOFF_ESI(%esp)
- movl 16(%ebx), %ecx
- movl %ecx, REGOFF_EBP(%esp)
- movl %ebx, %ecx
- addl $20, %ecx
- movl %ecx, REGOFF_ESP(%esp)
- movl 8(%ebx), %ecx
- movl %ecx, REGOFF_EBX(%esp)
- movl 0(%ebx), %ecx
- movl %ecx, REGOFF_EDX(%esp)
- movl 4(%ebx), %ecx
- movl %ecx, REGOFF_ECX(%esp)
- movl 12(%ebx), %ecx
- movl %ecx, REGOFF_EAX(%esp)
- movl $0, REGOFF_TRAPNO(%esp)
- movl $0, REGOFF_ERR(%esp)
- lea vpanic, %ecx
- movl %ecx, REGOFF_EIP(%esp)
- mov %cs, %edx
- movl %edx, REGOFF_CS(%esp)
- pushfl
- popl %ecx
- movl %ecx, REGOFF_EFL(%esp)
- movl $0, REGOFF_UESP(%esp)
- mov %ss, %edx
- movl %edx, REGOFF_SS(%esp)
-
- movl %esp, %ecx /* %ecx = &regs */
- pushl %eax /* push on_panic_stack */
- pushl %ecx /* push &regs */
- movl 12(%ebp), %ecx /* %ecx = alist */
- pushl %ecx /* push alist */
- movl 8(%ebp), %ecx /* %ecx = format */
- pushl %ecx /* push format */
- call panicsys /* panicsys(); */
- addl $16, %esp /* pop arguments */
-
- addl $REGSIZE, %esp
-#endif
- popl %edx
- popl %ecx
- popl %ebx
- popl %eax
- leave
- ret
- END(vpanic)
-
-/*
-void dtrace_vpanic(const char *format, va_list alist)
-*/
- ENTRY(dtrace_vpanic) /* Initial stack layout: */
-
- pushl %ebp /* | %eip | 20 */
- movl %esp, %ebp /* | %ebp | 16 */
- pushl %eax /* | %eax | 12 */
- pushl %ebx /* | %ebx | 8 */
- pushl %ecx /* | %ecx | 4 */
- pushl %edx /* | %edx | 0 */
-
- movl %esp, %ebx /* %ebx = current stack pointer */
-
- lea panic_quiesce, %eax /* %eax = &panic_quiesce */
- pushl %eax /* push &panic_quiesce */
- call dtrace_panic_trigger /* %eax = dtrace_panic_trigger() */
- addl $4, %esp /* reset stack pointer */
- jmp vpanic_common /* jump back to common code */
-
- END(dtrace_vpanic)
-
-/*
-int
-panic_trigger(int *tp)
-*/
- ENTRY(panic_trigger)
- xorl %eax, %eax
- movl $0xdefacedd, %edx
- lock
- xchgl %edx, (%edi)
- cmpl $0, %edx
- je 0f
- movl $0, %eax
- ret
-0: movl $1, %eax
- ret
- END(panic_trigger)
-
-/*
-int
-dtrace_panic_trigger(int *tp)
-*/
- ENTRY(dtrace_panic_trigger)
- xorl %eax, %eax
- movl $0xdefacedd, %edx
- lock
- xchgl %edx, (%edi)
- cmpl $0, %edx
- je 0f
- movl $0, %eax
- ret
-0: movl $1, %eax
- ret
- END(dtrace_panic_trigger)
diff --git a/sys/cddl/dev/dtrace/mips/dtrace_asm.S b/sys/cddl/dev/dtrace/mips/dtrace_asm.S
index 50f6c54..199d16be 100644
--- a/sys/cddl/dev/dtrace/mips/dtrace_asm.S
+++ b/sys/cddl/dev/dtrace/mips/dtrace_asm.S
@@ -249,49 +249,6 @@ LEAF(dtrace_invop_uninit)
END(dtrace_invop_uninit)
/*
- * The panic() and cmn_err() functions invoke vpanic() as a common entry point
- * into the panic code implemented in panicsys(). vpanic() is responsible
- * for passing through the format string and arguments, and constructing a
- * regs structure on the stack into which it saves the current register
- * values. If we are not dying due to a fatal trap, these registers will
- * then be preserved in panicbuf as the current processor state. Before
- * invoking panicsys(), vpanic() activates the first panic trigger (see
- * common/os/panic.c) and switches to the panic_stack if successful. Note that
- * DTrace takes a slightly different panic path if it must panic from probe
- * context. Instead of calling panic, it calls into dtrace_vpanic(), which
- * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and
- * branches back into vpanic().
- */
-
-/*
-void
-vpanic(const char *format, va_list alist)
-*/
-LEAF(vpanic) /* Initial stack layout: */
-
-vpanic_common:
- j ra
- nop
-END(vpanic)
-
-
-
-/*
-void
-dtrace_vpanic(const char *format, va_list alist)
-*/
-LEAF(dtrace_vpanic) /* Initial stack layout: */
-
-#if 0
- jal dtrace_panic_trigger /* %eax = dtrace_panic_trigger() */
- nop
-#endif
- j vpanic_common
- nop
-
-END(dtrace_vpanic)
-
-/*
uintptr_t
dtrace_caller(int aframes)
*/
@@ -300,4 +257,3 @@ LEAF(dtrace_caller)
j ra
nop
END(dtrace_caller)
-
diff --git a/sys/cddl/dev/dtrace/powerpc/dtrace_asm.S b/sys/cddl/dev/dtrace/powerpc/dtrace_asm.S
index 5676360..3adfbe3 100644
--- a/sys/cddl/dev/dtrace/powerpc/dtrace_asm.S
+++ b/sys/cddl/dev/dtrace/powerpc/dtrace_asm.S
@@ -161,45 +161,6 @@ ASENTRY_NOPROF(dtrace_copystr)
END(dtrace_copystr)
/*
- * The panic() and cmn_err() functions invoke vpanic() as a common entry point
- * into the panic code implemented in panicsys(). vpanic() is responsible
- * for passing through the format string and arguments, and constructing a
- * regs structure on the stack into which it saves the current register
- * values. If we are not dying due to a fatal trap, these registers will
- * then be preserved in panicbuf as the current processor state. Before
- * invoking panicsys(), vpanic() activates the first panic trigger (see
- * common/os/panic.c) and switches to the panic_stack if successful. Note that
- * DTrace takes a slightly different panic path if it must panic from probe
- * context. Instead of calling panic, it calls into dtrace_vpanic(), which
- * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and
- * branches back into vpanic().
- */
-
-/*
-void
-vpanic(const char *format, va_list alist)
-*/
-ASENTRY_NOPROF(vpanic) /* Initial stack layout: */
-
-vpanic_common:
- blr
-END(vpanic)
-
-
-
-/*
-void
-dtrace_vpanic(const char *format, va_list alist)
-*/
-ASENTRY_NOPROF(dtrace_vpanic) /* Initial stack layout: */
-
-#if 0
- bl dtrace_panic_trigger /* %eax = dtrace_panic_trigger() */
-#endif
- b vpanic_common
-END(dtrace_vpanic)
-
-/*
uintptr_t
dtrace_caller(int aframes)
*/
@@ -207,4 +168,3 @@ ASENTRY_NOPROF(dtrace_caller)
li %r3, -1
blr
END(dtrace_caller)
-
diff --git a/sys/cddl/dev/profile/profile.c b/sys/cddl/dev/profile/profile.c
index 338a958..d31d5c8 100644
--- a/sys/cddl/dev/profile/profile.c
+++ b/sys/cddl/dev/profile/profile.c
@@ -134,8 +134,10 @@ struct profile_probe_percpu;
#endif
#ifdef __arm__
-/* bogus */
-#define PROF_ARTIFICIAL_FRAMES 9
+/*
+ * At least on ARMv7, this appears to work quite well.
+ */
+#define PROF_ARTIFICIAL_FRAMES 10
#endif
typedef struct profile_probe {
diff --git a/sys/compat/linux/linux_getcwd.c b/sys/compat/linux/linux_getcwd.c
index 7c0b9ba..ebcf970 100644
--- a/sys/compat/linux/linux_getcwd.c
+++ b/sys/compat/linux/linux_getcwd.c
@@ -2,11 +2,15 @@
/* $NetBSD: vfs_getcwd.c,v 1.3.2.3 1999/07/11 10:24:09 sommerfeld Exp $ */
/*-
* Copyright (c) 1999 The NetBSD Foundation, Inc.
+ * Copyright (c) 2015 The FreeBSD Foundation
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Bill Sommerfeld.
*
+ * Portions of this software were developed by Edward Tomasz Napierala
+ * under sponsorship from the FreeBSD Foundation.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -36,19 +40,9 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
-#include <sys/namei.h>
-#include <sys/filedesc.h>
-#include <sys/kernel.h>
-#include <sys/file.h>
-#include <sys/stat.h>
#include <sys/syscallsubr.h>
-#include <sys/vnode.h>
-#include <sys/mount.h>
#include <sys/proc.h>
-#include <sys/uio.h>
#include <sys/malloc.h>
-#include <sys/dirent.h>
-#include <ufs/ufs/dir.h> /* XXX only for DIRBLKSIZ */
#ifdef COMPAT_LINUX32
#include <machine/../linux32/linux.h>
@@ -60,408 +54,37 @@ __FBSDID("$FreeBSD$");
#include <compat/linux/linux_misc.h>
#include <compat/linux/linux_util.h>
-#include <security/mac/mac_framework.h>
-
-static int
-linux_getcwd_scandir(struct vnode **, struct vnode **,
- char **, char *, struct thread *);
-static int
-linux_getcwd_common(struct vnode *, struct vnode *,
- char **, char *, int, int, struct thread *);
-
-#define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4)
-
-/*
- * Vnode variable naming conventions in this file:
- *
- * rvp: the current root we're aiming towards.
- * lvp, *lvpp: the "lower" vnode
- * uvp, *uvpp: the "upper" vnode.
- *
- * Since all the vnodes we're dealing with are directories, and the
- * lookups are going *up* in the filesystem rather than *down*, the
- * usual "pvp" (parent) or "dvp" (directory) naming conventions are
- * too confusing.
- */
-
-/*
- * XXX Will infinite loop in certain cases if a directory read reliably
- * returns EINVAL on last block.
- * XXX is EINVAL the right thing to return if a directory is malformed?
- */
-
-/*
- * XXX Untested vs. mount -o union; probably does the wrong thing.
- */
-
-/*
- * Find parent vnode of *lvpp, return in *uvpp
- *
- * If we care about the name, scan it looking for name of directory
- * entry pointing at lvp.
- *
- * Place the name in the buffer which starts at bufp, immediately
- * before *bpp, and move bpp backwards to point at the start of it.
- *
- * On entry, *lvpp is a locked vnode reference; on exit, it is vput and NULL'ed
- * On exit, *uvpp is either NULL or is a locked vnode reference.
- */
-static int
-linux_getcwd_scandir(lvpp, uvpp, bpp, bufp, td)
- struct vnode **lvpp;
- struct vnode **uvpp;
- char **bpp;
- char *bufp;
- struct thread *td;
-{
- int error = 0;
- int eofflag;
- off_t off;
- int tries;
- struct uio uio;
- struct iovec iov;
- char *dirbuf = NULL;
- int dirbuflen;
- ino_t fileno;
- struct vattr va;
- struct vnode *uvp = NULL;
- struct vnode *lvp = *lvpp;
- struct componentname cn;
- int len, reclen;
- tries = 0;
-
- /*
- * If we want the filename, get some info we need while the
- * current directory is still locked.
- */
- if (bufp != NULL) {
- error = VOP_GETATTR(lvp, &va, td->td_ucred);
- if (error) {
- vput(lvp);
- *lvpp = NULL;
- *uvpp = NULL;
- return error;
- }
- }
-
- /*
- * Ok, we have to do it the hard way..
- * Next, get parent vnode using lookup of ..
- */
- cn.cn_nameiop = LOOKUP;
- cn.cn_flags = ISLASTCN | ISDOTDOT | RDONLY;
- cn.cn_thread = td;
- cn.cn_cred = td->td_ucred;
- cn.cn_pnbuf = NULL;
- cn.cn_nameptr = "..";
- cn.cn_namelen = 2;
- cn.cn_consume = 0;
- cn.cn_lkflags = LK_SHARED;
-
- /*
- * At this point, lvp is locked and will be unlocked by the lookup.
- * On successful return, *uvpp will be locked
- */
-#ifdef MAC
- error = mac_vnode_check_lookup(td->td_ucred, lvp, &cn);
- if (error == 0)
-#endif
- error = VOP_LOOKUP(lvp, uvpp, &cn);
- if (error) {
- vput(lvp);
- *lvpp = NULL;
- *uvpp = NULL;
- return error;
- }
- uvp = *uvpp;
-
- /* If we don't care about the pathname, we're done */
- if (bufp == NULL) {
- vput(lvp);
- *lvpp = NULL;
- return 0;
- }
-
- fileno = va.va_fileid;
-
- dirbuflen = DIRBLKSIZ;
- if (dirbuflen < va.va_blocksize)
- dirbuflen = va.va_blocksize;
- dirbuf = malloc(dirbuflen, M_TEMP, M_WAITOK);
-
-#if 0
-unionread:
-#endif
- off = 0;
- do {
- /* call VOP_READDIR of parent */
- iov.iov_base = dirbuf;
- iov.iov_len = dirbuflen;
-
- uio.uio_iov = &iov;
- uio.uio_iovcnt = 1;
- uio.uio_offset = off;
- uio.uio_resid = dirbuflen;
- uio.uio_segflg = UIO_SYSSPACE;
- uio.uio_rw = UIO_READ;
- uio.uio_td = td;
-
- eofflag = 0;
-
-#ifdef MAC
- error = mac_vnode_check_readdir(td->td_ucred, uvp);
- if (error == 0)
-#endif /* MAC */
- error = VOP_READDIR(uvp, &uio, td->td_ucred, &eofflag,
- 0, 0);
-
- off = uio.uio_offset;
-
- /*
- * Try again if NFS tosses its cookies.
- * XXX this can still loop forever if the directory is busted
- * such that the second or subsequent page of it always
- * returns EINVAL
- */
- if ((error == EINVAL) && (tries < 3)) {
- off = 0;
- tries++;
- continue; /* once more, with feeling */
- }
-
- if (!error) {
- char *cpos;
- struct dirent *dp;
-
- cpos = dirbuf;
- tries = 0;
-
- /* scan directory page looking for matching vnode */
- for (len = (dirbuflen - uio.uio_resid); len > 0; len -= reclen) {
- dp = (struct dirent *) cpos;
- reclen = dp->d_reclen;
-
- /* check for malformed directory.. */
- if (reclen < DIRENT_MINSIZE) {
- error = EINVAL;
- goto out;
- }
- /*
- * XXX should perhaps do VOP_LOOKUP to
- * check that we got back to the right place,
- * but getting the locking games for that
- * right would be heinous.
- */
- if ((dp->d_type != DT_WHT) &&
- (dp->d_fileno == fileno)) {
- char *bp = *bpp;
- bp -= dp->d_namlen;
-
- if (bp <= bufp) {
- error = ERANGE;
- goto out;
- }
- bcopy(dp->d_name, bp, dp->d_namlen);
- error = 0;
- *bpp = bp;
- goto out;
- }
- cpos += reclen;
- }
- }
- } while (!eofflag);
- error = ENOENT;
-
-out:
- vput(lvp);
- *lvpp = NULL;
- free(dirbuf, M_TEMP);
- return error;
-}
-
-
-/*
- * common routine shared by sys___getcwd() and linux_vn_isunder()
- */
-
-#define GETCWD_CHECK_ACCESS 0x0001
-
-static int
-linux_getcwd_common (lvp, rvp, bpp, bufp, limit, flags, td)
- struct vnode *lvp;
- struct vnode *rvp;
- char **bpp;
- char *bufp;
- int limit;
- int flags;
- struct thread *td;
-{
- struct filedesc *fdp = td->td_proc->p_fd;
- struct vnode *uvp = NULL;
- char *bp = NULL;
- int error;
- accmode_t accmode = VEXEC;
-
- if (rvp == NULL) {
- rvp = fdp->fd_rdir;
- if (rvp == NULL)
- rvp = rootvnode;
- }
-
- VREF(rvp);
- VREF(lvp);
-
- /*
- * Error handling invariant:
- * Before a `goto out':
- * lvp is either NULL, or locked and held.
- * uvp is either NULL, or locked and held.
- */
-
- error = vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY);
- if (error != 0)
- panic("vn_lock LK_RETRY returned error %d", error);
- if (bufp)
- bp = *bpp;
- /*
- * this loop will terminate when one of the following happens:
- * - we hit the root
- * - getdirentries or lookup fails
- * - we run out of space in the buffer.
- */
- if (lvp == rvp) {
- if (bp)
- *(--bp) = '/';
- goto out;
- }
- do {
- if (lvp->v_type != VDIR) {
- error = ENOTDIR;
- goto out;
- }
-
- /*
- * access check here is optional, depending on
- * whether or not caller cares.
- */
- if (flags & GETCWD_CHECK_ACCESS) {
- error = VOP_ACCESS(lvp, accmode, td->td_ucred, td);
- if (error)
- goto out;
- accmode = VEXEC|VREAD;
- }
-
- /*
- * step up if we're a covered vnode..
- */
- while (lvp->v_vflag & VV_ROOT) {
- struct vnode *tvp;
-
- if (lvp == rvp)
- goto out;
-
- tvp = lvp;
- lvp = lvp->v_mount->mnt_vnodecovered;
- vput(tvp);
- /*
- * hodie natus est radici frater
- */
- if (lvp == NULL) {
- error = ENOENT;
- goto out;
- }
- VREF(lvp);
- error = vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY);
- if (error != 0)
- panic("vn_lock LK_RETRY returned %d", error);
- }
- error = linux_getcwd_scandir(&lvp, &uvp, &bp, bufp, td);
- if (error)
- goto out;
-#ifdef DIAGNOSTIC
- if (lvp != NULL)
- panic("getcwd: oops, forgot to null lvp");
- if (bufp && (bp <= bufp)) {
- panic("getcwd: oops, went back too far");
- }
-#endif
- if (bp)
- *(--bp) = '/';
- lvp = uvp;
- uvp = NULL;
- limit--;
- } while ((lvp != rvp) && (limit > 0));
-
-out:
- if (bpp)
- *bpp = bp;
- if (uvp)
- vput(uvp);
- if (lvp)
- vput(lvp);
- vrele(rvp);
- return error;
-}
-
-
/*
* Find pathname of process's current directory.
- *
- * Use vfs vnode-to-name reverse cache; if that fails, fall back
- * to reading directory contents.
*/
-
int
linux_getcwd(struct thread *td, struct linux_getcwd_args *args)
{
- char *bp, *bend, *path;
- int error, len, lenused;
+ char *path;
+ int error, lenused;
#ifdef DEBUG
if (ldebug(getcwd))
printf(ARGS(getcwd, "%p, %ld"), args->buf, (long)args->bufsize);
#endif
- len = args->bufsize;
-
- if (len > LINUX_PATH_MAX)
- len = LINUX_PATH_MAX;
- else if (len < 2)
- return ERANGE;
+ /*
+ * Linux returns ERANGE instead of EINVAL.
+ */
+ if (args->bufsize < 2)
+ return (ERANGE);
- path = malloc(len, M_TEMP, M_WAITOK);
+ path = malloc(LINUX_PATH_MAX, M_TEMP, M_WAITOK);
- error = kern___getcwd(td, path, UIO_SYSSPACE, len, LINUX_PATH_MAX);
- if (!error) {
+ error = kern___getcwd(td, path, UIO_SYSSPACE, args->bufsize,
+ LINUX_PATH_MAX);
+ if (error == 0) {
lenused = strlen(path) + 1;
- if (lenused <= args->bufsize) {
+ error = copyout(path, args->buf, lenused);
+ if (error == 0)
td->td_retval[0] = lenused;
- error = copyout(path, args->buf, lenused);
- }
- else
- error = ERANGE;
- } else {
- bp = &path[len];
- bend = bp;
- *(--bp) = '\0';
-
- /*
- * 5th argument here is "max number of vnodes to traverse".
- * Since each entry takes up at least 2 bytes in the output buffer,
- * limit it to N/2 vnodes for an N byte buffer.
- */
-
- error = linux_getcwd_common (td->td_proc->p_fd->fd_cdir, NULL,
- &bp, path, len/2, GETCWD_CHECK_ACCESS, td);
- if (error)
- goto out;
- lenused = bend - bp;
- td->td_retval[0] = lenused;
- /* put the result into user buffer */
- error = copyout(bp, args->buf, lenused);
}
-out:
+
free(path, M_TEMP);
return (error);
}
-
diff --git a/sys/conf/files b/sys/conf/files
index ce28fa1..caaed94 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -584,7 +584,6 @@ dev/acpica/acpi_button.c optional acpi
dev/acpica/acpi_cmbat.c optional acpi
dev/acpica/acpi_cpu.c optional acpi
dev/acpica/acpi_ec.c optional acpi
-dev/acpica/acpi_hpet.c optional acpi
dev/acpica/acpi_isab.c optional acpi isa
dev/acpica/acpi_lid.c optional acpi
dev/acpica/acpi_package.c optional acpi
@@ -1780,6 +1779,8 @@ dev/ixgbe/ixgbe_82599.c optional ix ixv inet \
compile-with "${NORMAL_C} -I$S/dev/ixgbe"
dev/ixgbe/ixgbe_x540.c optional ix ixv inet \
compile-with "${NORMAL_C} -I$S/dev/ixgbe"
+dev/ixgbe/ixgbe_x550.c optional ix ixv inet \
+ compile-with "${NORMAL_C} -I$S/dev/ixgbe"
dev/ixgbe/ixgbe_dcb.c optional ix ixv inet \
compile-with "${NORMAL_C} -I$S/dev/ixgbe"
dev/ixgbe/ixgbe_dcb_82598.c optional ix ixv inet \
@@ -2677,24 +2678,24 @@ wpi.fw optional wpifw \
clean "wpi.fw"
dev/xe/if_xe.c optional xe
dev/xe/if_xe_pccard.c optional xe pccard
-dev/xen/balloon/balloon.c optional xen | xenhvm
-dev/xen/blkfront/blkfront.c optional xen | xenhvm
-dev/xen/blkback/blkback.c optional xen | xenhvm
-dev/xen/console/console.c optional xen | xenhvm
-dev/xen/console/xencons_ring.c optional xen | xenhvm
-dev/xen/control/control.c optional xen | xenhvm
-dev/xen/grant_table/grant_table.c optional xen | xenhvm
-dev/xen/netback/netback.c optional xen | xenhvm
-dev/xen/netfront/netfront.c optional xen | xenhvm
+dev/xen/balloon/balloon.c optional xenhvm
+dev/xen/blkfront/blkfront.c optional xenhvm
+dev/xen/blkback/blkback.c optional xenhvm
+dev/xen/console/console.c optional xenhvm
+dev/xen/console/xencons_ring.c optional xenhvm
+dev/xen/control/control.c optional xenhvm
+dev/xen/grant_table/grant_table.c optional xenhvm
+dev/xen/netback/netback.c optional xenhvm
+dev/xen/netfront/netfront.c optional xenhvm
dev/xen/xenpci/xenpci.c optional xenpci
-dev/xen/timer/timer.c optional xen | xenhvm
-dev/xen/pvcpu/pvcpu.c optional xen | xenhvm
-dev/xen/xenstore/xenstore.c optional xen | xenhvm
-dev/xen/xenstore/xenstore_dev.c optional xen | xenhvm
-dev/xen/xenstore/xenstored_dev.c optional xen | xenhvm
-dev/xen/evtchn/evtchn_dev.c optional xen | xenhvm
-dev/xen/privcmd/privcmd.c optional xen | xenhvm
-dev/xen/debug/debug.c optional xen | xenhvm
+dev/xen/timer/timer.c optional xenhvm
+dev/xen/pvcpu/pvcpu.c optional xenhvm
+dev/xen/xenstore/xenstore.c optional xenhvm
+dev/xen/xenstore/xenstore_dev.c optional xenhvm
+dev/xen/xenstore/xenstored_dev.c optional xenhvm
+dev/xen/evtchn/evtchn_dev.c optional xenhvm
+dev/xen/privcmd/privcmd.c optional xenhvm
+dev/xen/debug/debug.c optional xenhvm
dev/xl/if_xl.c optional xl pci
dev/xl/xlphy.c optional xl pci
fs/autofs/autofs.c optional autofs
@@ -3262,6 +3263,9 @@ libkern/strtoul.c standard
libkern/strtouq.c standard
libkern/strvalid.c standard
libkern/timingsafe_bcmp.c standard
+libkern/zlib.c optional crypto | geom_uzip | ipsec | \
+ mxge | netgraph_deflate | \
+ ddb_ctf | gzio | geom_uncompress
net/altq/altq_cbq.c optional altq
net/altq/altq_cdnr.c optional altq
net/altq/altq_hfsc.c optional altq
@@ -3324,9 +3328,6 @@ net/slcompress.c optional netgraph_vjc | sppp | \
netgraph_sppp
net/toeplitz.c optional inet rss | inet6 rss
net/vnet.c optional vimage
-net/zlib.c optional crypto | geom_uzip | ipsec | \
- mxge | netgraph_deflate | \
- ddb_ctf | gzio | geom_uncompress
net80211/ieee80211.c optional wlan
net80211/ieee80211_acl.c optional wlan wlan_acl
net80211/ieee80211_action.c optional wlan
@@ -3503,6 +3504,7 @@ netinet/sctp_sysctl.c optional inet sctp | inet6 sctp
netinet/sctp_timer.c optional inet sctp | inet6 sctp
netinet/sctp_usrreq.c optional inet sctp | inet6 sctp
netinet/sctputil.c optional inet sctp | inet6 sctp
+netinet/siftr.c optional inet siftr alq | inet6 siftr alq
netinet/tcp_debug.c optional tcpdebug
netinet/tcp_hostcache.c optional inet | inet6
netinet/tcp_input.c optional inet | inet6
@@ -4043,13 +4045,13 @@ vm/vm_reserv.c standard
vm/vm_unix.c standard
vm/vm_zeroidle.c standard
vm/vnode_pager.c standard
-xen/features.c optional xen | xenhvm
-xen/xenbus/xenbus_if.m optional xen | xenhvm
-xen/xenbus/xenbus.c optional xen | xenhvm
-xen/xenbus/xenbusb_if.m optional xen | xenhvm
-xen/xenbus/xenbusb.c optional xen | xenhvm
-xen/xenbus/xenbusb_front.c optional xen | xenhvm
-xen/xenbus/xenbusb_back.c optional xen | xenhvm
+xen/features.c optional xenhvm
+xen/xenbus/xenbus_if.m optional xenhvm
+xen/xenbus/xenbus.c optional xenhvm
+xen/xenbus/xenbusb_if.m optional xenhvm
+xen/xenbus/xenbusb.c optional xenhvm
+xen/xenbus/xenbusb_front.c optional xenhvm
+xen/xenbus/xenbusb_back.c optional xenhvm
xdr/xdr.c optional krpc | nfslockd | nfscl | nfsd
xdr/xdr_array.c optional krpc | nfslockd | nfscl | nfsd
xdr/xdr_mbuf.c optional krpc | nfslockd | nfscl | nfsd
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index ae71c39..8aadcf5 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -145,6 +145,7 @@ crypto/via/padlock.c optional padlock
crypto/via/padlock_cipher.c optional padlock
crypto/via/padlock_hash.c optional padlock
dev/acpica/acpi_if.m standard
+dev/acpica/acpi_hpet.c optional acpi
dev/acpi_support/acpi_wmi_if.m standard
dev/agp/agp_amd64.c optional agp
dev/agp/agp_i810.c optional agp
@@ -513,10 +514,10 @@ compat/ndis/winx64_wrap.S optional ndisapi pci
libkern/memmove.c standard
libkern/memset.c standard
#
-# x86 real mode BIOS emulator, required by atkbdc/dpms/pci/vesa
+# x86 real mode BIOS emulator, required by dpms/pci/vesa
#
-compat/x86bios/x86bios.c optional x86bios | atkbd | dpms | pci | vesa
-contrib/x86emu/x86emu.c optional x86bios | atkbd | dpms | pci | vesa
+compat/x86bios/x86bios.c optional x86bios | dpms | pci | vesa
+contrib/x86emu/x86emu.c optional x86bios | dpms | pci | vesa
#
# bvm console
#
@@ -569,13 +570,14 @@ x86/x86/local_apic.c standard
x86/x86/mca.c standard
x86/x86/mptable.c optional mptable
x86/x86/mptable_pci.c optional mptable pci
+x86/x86/mp_x86.c optional smp
x86/x86/msi.c optional pci
x86/x86/nexus.c standard
x86/x86/pvclock.c standard
x86/x86/tsc.c standard
x86/x86/delay.c standard
x86/xen/hvm.c optional xenhvm
-x86/xen/xen_intr.c optional xen | xenhvm
+x86/xen/xen_intr.c optional xenhvm
x86/xen/pv.c optional xenhvm
x86/xen/pvcpu_enum.c optional xenhvm
x86/xen/xen_apic.c optional xenhvm
diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64
index 758c9a6..f80fecd 100644
--- a/sys/conf/files.arm64
+++ b/sys/conf/files.arm64
@@ -10,6 +10,10 @@ arm64/arm64/clock.c standard
arm64/arm64/copyinout.S standard
arm64/arm64/copystr.c standard
arm64/arm64/cpufunc_asm.S standard
+arm64/arm64/db_disasm.c optional ddb
+arm64/arm64/db_interface.c optional ddb
+arm64/arm64/db_trace.c optional ddb
+arm64/arm64/debug_monitor.c optional kdb
arm64/arm64/dump_machdep.c standard
arm64/arm64/elf_machdep.c standard
arm64/arm64/exception.S standard
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index f072247..68dd6a9 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -285,6 +285,7 @@ dev/uart/uart_cpu_x86.c optional uart
dev/viawd/viawd.c optional viawd
dev/vmware/vmxnet3/if_vmx.c optional vmx
dev/acpica/acpi_if.m standard
+dev/acpica/acpi_hpet.c optional acpi
dev/acpi_support/acpi_wmi_if.m standard
dev/wbwd/wbwd.c optional wbwd
dev/wpi/if_wpi.c optional wpi
@@ -427,16 +428,15 @@ i386/bios/smapi_bios.S optional smapi
i386/i386/atomic.c standard \
compile-with "${CC} -c ${CFLAGS} ${DEFINED_PROF:S/^$/-fomit-frame-pointer/} ${.IMPSRC}"
i386/i386/autoconf.c standard
-i386/i386/bios.c optional native
-i386/i386/bioscall.s optional native
+i386/i386/bios.c standard
+i386/i386/bioscall.s standard
i386/i386/bpf_jit_machdep.c optional bpf_jitter
i386/i386/db_disasm.c optional ddb
i386/i386/db_interface.c optional ddb
i386/i386/db_trace.c optional ddb
i386/i386/elan-mmcr.c optional cpu_elan | cpu_soekris
i386/i386/elf_machdep.c standard
-i386/i386/exception.s optional native
-i386/xen/exception.s optional xen
+i386/i386/exception.s standard
i386/i386/gdb_machdep.c optional gdb
i386/i386/geode.c optional cpu_geode
i386/i386/i686_mem.c optional mem
@@ -444,22 +444,17 @@ i386/i386/in_cksum.c optional inet | inet6
i386/i386/initcpu.c standard
i386/i386/io.c optional io
i386/i386/k6_mem.c optional mem
-i386/i386/locore.s optional native no-obj
-i386/xen/locore.s optional xen no-obj
+i386/i386/locore.s standard no-obj
i386/i386/longrun.c optional cpu_enable_longrun
i386/i386/machdep.c standard
-i386/xen/xen_machdep.c optional xen
i386/i386/mem.c optional mem
i386/i386/minidump_machdep.c standard
i386/i386/mp_clock.c optional smp
-i386/i386/mp_machdep.c optional native smp
-i386/xen/mp_machdep.c optional xen smp
+i386/i386/mp_machdep.c optional smp
i386/i386/mp_watchdog.c optional mp_watchdog smp
-i386/i386/mpboot.s optional smp native
-i386/xen/mptable.c optional apic xen
+i386/i386/mpboot.s optional smp
i386/i386/perfmon.c optional perfmon
-i386/i386/pmap.c optional native
-i386/xen/pmap.c optional xen
+i386/i386/pmap.c standard
i386/i386/ptrace_machdep.c standard
i386/i386/stack_machdep.c optional ddb | stack
i386/i386/support.s standard
@@ -488,7 +483,6 @@ i386/ibcs2/ibcs2_util.c optional ibcs2
i386/ibcs2/ibcs2_xenix.c optional ibcs2
i386/ibcs2/ibcs2_xenix_sysent.c optional ibcs2
i386/ibcs2/imgact_coff.c optional ibcs2
-i386/xen/clock.c optional xen
i386/isa/elink.c optional ep | ie
i386/isa/npx.c optional npx
i386/isa/pmtimer.c optional pmtimer
@@ -531,9 +525,9 @@ i386/xbox/xboxfb.c optional xboxfb
dev/fb/boot_font.c optional xboxfb
i386/xbox/pic16l.s optional xbox
#
-# x86 real mode BIOS support, required by atkbdc/dpms/pci/vesa
+# x86 real mode BIOS support, required by dpms/pci/vesa
#
-compat/x86bios/x86bios.c optional x86bios | atkbd | dpms | pci | vesa
+compat/x86bios/x86bios.c optional x86bios | dpms | pci | vesa
#
# bvm console
#
@@ -565,9 +559,9 @@ x86/iommu/intel_qi.c optional acpi acpi_dmar pci
x86/iommu/intel_quirks.c optional acpi acpi_dmar pci
x86/iommu/intel_utils.c optional acpi acpi_dmar pci
x86/isa/atpic.c optional atpic
-x86/isa/atrtc.c optional native
-x86/isa/clock.c optional native
-x86/isa/elcr.c optional atpic | apic native
+x86/isa/atrtc.c standard
+x86/isa/clock.c standard
+x86/isa/elcr.c optional atpic | apic
x86/isa/isa.c optional isa
x86/isa/isa_dma.c optional isa
x86/isa/nmi.c standard
@@ -582,19 +576,20 @@ x86/x86/fdt_machdep.c optional fdt
x86/x86/identcpu.c standard
x86/x86/intr_machdep.c standard
x86/x86/io_apic.c optional apic
-x86/x86/legacy.c optional native
+x86/x86/legacy.c standard
x86/x86/local_apic.c optional apic
x86/x86/mca.c standard
-x86/x86/mptable.c optional apic native
-x86/x86/mptable_pci.c optional apic native pci
+x86/x86/mptable.c optional apic
+x86/x86/mptable_pci.c optional apic pci
+x86/x86/mp_x86.c optional smp
x86/x86/msi.c optional apic pci
x86/x86/nexus.c standard
x86/x86/tsc.c standard
x86/x86/pvclock.c standard
x86/x86/delay.c standard
x86/xen/hvm.c optional xenhvm
-x86/xen/xen_intr.c optional xen | xenhvm
+x86/xen/xen_intr.c optional xenhvm
x86/xen/xen_apic.c optional xenhvm
-x86/xen/xenpv.c optional xen | xenhvm
-x86/xen/xen_nexus.c optional xen | xenhvm
-x86/xen/xen_msi.c optional xen | xenhvm
+x86/xen/xenpv.c optional xenhvm
+x86/xen/xen_nexus.c optional xenhvm
+x86/xen/xen_msi.c optional xenhvm
diff --git a/sys/conf/files.pc98 b/sys/conf/files.pc98
index f95d0bb..ae165fc 100644
--- a/sys/conf/files.pc98
+++ b/sys/conf/files.pc98
@@ -256,6 +256,7 @@ x86/x86/io_apic.c optional apic
x86/x86/legacy.c standard
x86/x86/local_apic.c optional apic
x86/x86/mca.c standard
+x86/x86/mp_x86.c optional smp
x86/x86/mptable.c optional apic
x86/x86/mptable_pci.c optional apic pci
x86/x86/msi.c optional apic pci
diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc
index ab585b0..92f4101 100644
--- a/sys/conf/files.powerpc
+++ b/sys/conf/files.powerpc
@@ -97,17 +97,16 @@ libkern/udivdi3.c optional powerpc
libkern/umoddi3.c optional powerpc
powerpc/aim/interrupt.c optional aim
powerpc/aim/locore.S optional aim no-obj
-powerpc/aim/machdep.c optional aim
+powerpc/aim/aim_machdep.c optional aim
powerpc/aim/mmu_oea.c optional aim powerpc
powerpc/aim/mmu_oea64.c optional aim
powerpc/aim/moea64_if.m optional aim
powerpc/aim/moea64_native.c optional aim
powerpc/aim/mp_cpudep.c optional aim
powerpc/aim/slb.c optional aim powerpc64
-powerpc/aim/uma_machdep.c optional aim
powerpc/booke/interrupt.c optional booke
powerpc/booke/locore.S optional booke no-obj
-powerpc/booke/machdep.c optional booke
+powerpc/booke/booke_machdep.c optional booke
powerpc/booke/machdep_e500.c optional booke_e500
powerpc/booke/mp_cpudep.c optional booke smp
powerpc/booke/platform_bare.c optional booke
@@ -195,6 +194,7 @@ powerpc/powerpc/gdb_machdep.c optional gdb
powerpc/powerpc/in_cksum.c optional inet | inet6
powerpc/powerpc/intr_machdep.c standard
powerpc/powerpc/iommu_if.m standard
+powerpc/powerpc/machdep.c standard
powerpc/powerpc/mem.c optional mem
powerpc/powerpc/mmu_if.m standard
powerpc/powerpc/mp_machdep.c optional smp
@@ -217,6 +217,7 @@ powerpc/powerpc/syncicache.c standard
powerpc/powerpc/sys_machdep.c standard
powerpc/powerpc/trap.c standard
powerpc/powerpc/uio_machdep.c standard
+powerpc/powerpc/uma_machdep.c standard
powerpc/powerpc/vm_machdep.c standard
powerpc/ps3/ehci_ps3.c optional ps3 ehci
powerpc/ps3/ohci_ps3.c optional ps3 ohci
diff --git a/sys/conf/kern.mk b/sys/conf/kern.mk
index 9a7ad61..8e76268 100644
--- a/sys/conf/kern.mk
+++ b/sys/conf/kern.mk
@@ -187,7 +187,7 @@ CFLAGS+= -fstack-protector
CFLAGS+= -gdwarf-2
.endif
-CFLAGS+= ${CWARNEXTRA} ${CWARNFLAGS} ${CWARNFLAGS.${.IMPSRC:T}}
+CFLAGS+= ${CWARNFLAGS} ${CWARNFLAGS.${.IMPSRC:T}}
CFLAGS+= ${CFLAGS.${COMPILER_TYPE}} ${CFLAGS.${.IMPSRC:T}}
# Tell bmake not to mistake standard targets for things to be searched for
diff --git a/sys/conf/kern.pre.mk b/sys/conf/kern.pre.mk
index 99ada05..8c3b9c6 100644
--- a/sys/conf/kern.pre.mk
+++ b/sys/conf/kern.pre.mk
@@ -87,7 +87,7 @@ INCLUDES+= -I$S/dev/cxgb -I$S/dev/cxgbe
.endif
-CFLAGS= ${COPTFLAGS} ${DEBUG} ${CWARNFLAGS}
+CFLAGS= ${COPTFLAGS} ${DEBUG}
CFLAGS+= ${INCLUDES} -D_KERNEL -DHAVE_KERNEL_OPTION_HEADERS -include opt_global.h
CFLAGS_PARAM_INLINE_UNIT_GROWTH?=100
CFLAGS_PARAM_LARGE_FUNCTION_GROWTH?=1000
diff --git a/sys/conf/options b/sys/conf/options
index 2405442..b698717 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -432,6 +432,7 @@ ROUTETABLES opt_route.h
RSS opt_rss.h
SLIP_IFF_OPTS opt_slip.h
TCPDEBUG
+SIFTR
TCP_OFFLOAD opt_inet.h # Enable code to dispatch TCP offloading
TCP_SIGNATURE opt_inet.h
VLAN_ARRAY opt_vlan.h
@@ -928,6 +929,7 @@ IPOIB_CM opt_ofed.h
# Resource Accounting
RACCT opt_global.h
+RACCT_DISABLED opt_global.h
# Resource Limits
RCTL opt_global.h
diff --git a/sys/conf/options.amd64 b/sys/conf/options.amd64
index f1d4b4a..0e59187 100644
--- a/sys/conf/options.amd64
+++ b/sys/conf/options.amd64
@@ -63,5 +63,7 @@ BPF_JITTER opt_bpf.h
XENHVM opt_global.h
+HYPERV opt_global.h
+
# options for the Intel C600 SAS driver (isci)
ISCI_LOGGING opt_isci.h
diff --git a/sys/conf/options.arm b/sys/conf/options.arm
index 239be2a..b712b02 100644
--- a/sys/conf/options.arm
+++ b/sys/conf/options.arm
@@ -40,6 +40,9 @@ PV_STATS opt_pmap.h
QEMU_WORKAROUNDS opt_global.h
SOC_BCM2835 opt_global.h
SOC_BCM2836 opt_global.h
+SOC_IMX51 opt_global.h
+SOC_IMX53 opt_global.h
+SOC_IMX6 opt_global.h
SOC_MV_ARMADAXP opt_global.h
SOC_MV_DISCOVERY opt_global.h
SOC_MV_DOVE opt_global.h
diff --git a/sys/conf/options.arm64 b/sys/conf/options.arm64
index fff47ab..1dfcbec 100644
--- a/sys/conf/options.arm64
+++ b/sys/conf/options.arm64
@@ -1,4 +1,6 @@
# $FreeBSD$
ARM64 opt_global.h
+SOCDEV_PA opt_global.h
+SOCDEV_VA opt_global.h
VFP opt_global.h
diff --git a/sys/conf/options.i386 b/sys/conf/options.i386
index 0a1a52f..69eb7e3 100644
--- a/sys/conf/options.i386
+++ b/sys/conf/options.i386
@@ -121,9 +121,9 @@ NPX_DEBUG opt_npx.h
# BPF just-in-time compiler
BPF_JITTER opt_bpf.h
-NATIVE opt_global.h
-XEN opt_global.h
XENHVM opt_global.h
+HYPERV opt_global.h
+
# options for the Intel C600 SAS driver (isci)
ISCI_LOGGING opt_isci.h
diff --git a/sys/dev/acpica/acpi.c b/sys/dev/acpica/acpi.c
index 47bc791..7796fbe 100644
--- a/sys/dev/acpica/acpi.c
+++ b/sys/dev/acpica/acpi.c
@@ -606,7 +606,7 @@ acpi_attach(device_t dev)
sc->acpi_handle_reboot = 1;
/* Only enable S4BIOS by default if the FACS says it is available. */
- if (AcpiGbl_FACS->Flags & ACPI_FACS_S4_BIOS_PRESENT)
+ if (AcpiGbl_FACS != NULL && AcpiGbl_FACS->Flags & ACPI_FACS_S4_BIOS_PRESENT)
sc->acpi_s4bios = 1;
/* Probe all supported sleep states. */
diff --git a/sys/dev/atkbdc/atkbd.c b/sys/dev/atkbdc/atkbd.c
index d93c1c6..a90f5d2 100644
--- a/sys/dev/atkbdc/atkbd.c
+++ b/sys/dev/atkbdc/atkbd.c
@@ -44,19 +44,6 @@ __FBSDID("$FreeBSD$");
#include <machine/bus.h>
#include <machine/resource.h>
-#if defined(__i386__) || defined(__amd64__)
-#include <machine/md_var.h>
-#include <machine/psl.h>
-#include <compat/x86bios/x86bios.h>
-#include <machine/pc/bios.h>
-
-#include <vm/vm.h>
-#include <vm/pmap.h>
-#include <vm/vm_param.h>
-
-#include <isa/isareg.h>
-#endif /* __i386__ || __amd64__ */
-
#include <sys/kbio.h>
#include <dev/kbd/kbdreg.h>
#include <dev/atkbdc/atkbdreg.h>
@@ -82,6 +69,9 @@ static int atkbd_reset(KBDC kbdc, int flags, int c);
#define HAS_QUIRK(p, q) (((atkbdc_softc_t *)(p))->quirks & q)
#define ALLOW_DISABLE_KBD(kbdc) !HAS_QUIRK(kbdc, KBDC_QUIRK_KEEP_ACTIVATED)
+#define DEFAULT_DELAY 0x1 /* 500ms */
+#define DEFAULT_RATE 0x10 /* 14Hz */
+
int
atkbd_probe_unit(device_t dev, int irq, int flags)
{
@@ -249,7 +239,7 @@ static keyboard_switch_t atkbdsw = {
KEYBOARD_DRIVER(atkbd, atkbdsw, atkbd_configure);
/* local functions */
-static int get_typematic(keyboard_t *kbd);
+static int set_typematic(keyboard_t *kbd);
static int setup_kbd_port(KBDC kbdc, int port, int intr);
static int get_kbd_echo(KBDC kbdc);
static int probe_keyboard(KBDC kbdc, int flags);
@@ -443,7 +433,7 @@ atkbd_init(int unit, keyboard_t **kbdp, void *arg, int flags)
goto bad;
}
atkbd_ioctl(kbd, KDSETLED, (caddr_t)&state->ks_state);
- get_typematic(kbd);
+ set_typematic(kbd);
delay[0] = kbd->kb_delay1;
delay[1] = kbd->kb_delay2;
atkbd_ioctl(kbd, KDSETREPEAT, (caddr_t)delay);
@@ -503,7 +493,7 @@ atkbd_intr(keyboard_t *kbd, void *arg)
init_keyboard(state->kbdc, &kbd->kb_type, kbd->kb_config);
KBD_FOUND_DEVICE(kbd);
atkbd_ioctl(kbd, KDSETLED, (caddr_t)&state->ks_state);
- get_typematic(kbd);
+ set_typematic(kbd);
delay[0] = kbd->kb_delay1;
delay[1] = kbd->kb_delay2;
atkbd_ioctl(kbd, KDSETREPEAT, (caddr_t)delay);
@@ -1135,57 +1125,19 @@ atkbd_reset(KBDC kbdc, int flags, int c)
/* local functions */
static int
-get_typematic(keyboard_t *kbd)
+set_typematic(keyboard_t *kbd)
{
-#if defined(__i386__) || defined(__amd64__)
- /*
- * Only some systems allow us to retrieve the keyboard repeat
- * rate previously set via the BIOS...
- */
- x86regs_t regs;
- uint8_t *p;
+ int val, error;
+ atkbd_state_t *state = kbd->kb_data;
- /*
- * Traditional entry points of int 0x15 and 0x16 are fixed
- * and later BIOSes follow them. (U)EFI CSM specification
- * also mandates these fixed entry points.
- *
- * Validate the entry points here before we proceed further.
- * It's known that some recent laptops does not have the
- * same entry point and hang on boot if we call it.
- */
- if (x86bios_get_intr(0x15) != 0xf000f859 ||
- x86bios_get_intr(0x16) != 0xf000e82e)
- return (ENODEV);
-
- /* Is BIOS system configuration table supported? */
- x86bios_init_regs(&regs);
- regs.R_AH = 0xc0;
- x86bios_intr(&regs, 0x15);
- if ((regs.R_FLG & PSL_C) != 0 || regs.R_AH != 0)
- return (ENODEV);
-
- /* Is int 0x16, function 0x09 supported? */
- p = x86bios_offset((regs.R_ES << 4) + regs.R_BX);
- if (readw(p) < 5 || (readb(p + 6) & 0x40) == 0)
- return (ENODEV);
-
- /* Is int 0x16, function 0x0306 supported? */
- x86bios_init_regs(&regs);
- regs.R_AH = 0x09;
- x86bios_intr(&regs, 0x16);
- if ((regs.R_AL & 0x08) == 0)
- return (ENODEV);
-
- x86bios_init_regs(&regs);
- regs.R_AX = 0x0306;
- x86bios_intr(&regs, 0x16);
- kbd->kb_delay1 = typematic_delay(regs.R_BH << 5);
- kbd->kb_delay2 = typematic_rate(regs.R_BL);
- return (0);
-#else
- return (ENODEV);
-#endif /* __i386__ || __amd64__ */
+ val = typematic(DEFAULT_DELAY, DEFAULT_RATE);
+ error = write_kbd(state->kbdc, KBDC_SET_TYPEMATIC, val);
+ if (error == 0) {
+ kbd->kb_delay1 = typematic_delay(val);
+ kbd->kb_delay2 = typematic_rate(val);
+ }
+
+ return (error);
}
static int
diff --git a/sys/dev/bxe/bxe.h b/sys/dev/bxe/bxe.h
index f37b93f..eb695d0 100644
--- a/sys/dev/bxe/bxe.h
+++ b/sys/dev/bxe/bxe.h
@@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
#include <sys/limits.h>
#include <sys/queue.h>
#include <sys/taskqueue.h>
+#include <sys/zlib.h>
#include <net/if.h>
#include <net/if_types.h>
@@ -60,7 +61,6 @@ __FBSDID("$FreeBSD$");
#include <net/if_var.h>
#include <net/if_media.h>
#include <net/if_vlan_var.h>
-#include <net/zlib.h>
#include <net/bpf.h>
#include <netinet/in.h>
diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c
index 40098b8..4bbb55e 100644
--- a/sys/dev/cxgbe/t4_main.c
+++ b/sys/dev/cxgbe/t4_main.c
@@ -1571,9 +1571,6 @@ cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
struct ifmedia *media = NULL;
struct ifmedia_entry *cur;
int speed = pi->link_cfg.speed;
-#ifdef INVARIANTS
- int data = (pi->port_type << 8) | pi->mod_type;
-#endif
if (ifp == pi->ifp)
media = &pi->media;
@@ -1584,7 +1581,6 @@ cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
MPASS(media != NULL);
cur = media->ifm_cur;
- MPASS(cur->ifm_data == data);
ifmr->ifm_status = IFM_AVALID;
if (!pi->link_cfg.link_ok)
@@ -2884,30 +2880,29 @@ t4_set_desc(struct adapter *sc)
static void
build_medialist(struct port_info *pi, struct ifmedia *media)
{
- int data, m;
+ int m;
PORT_LOCK(pi);
ifmedia_removeall(media);
m = IFM_ETHER | IFM_FDX;
- data = (pi->port_type << 8) | pi->mod_type;
switch(pi->port_type) {
case FW_PORT_TYPE_BT_XFI:
case FW_PORT_TYPE_BT_XAUI:
- ifmedia_add(media, m | IFM_10G_T, data, NULL);
+ ifmedia_add(media, m | IFM_10G_T, 0, NULL);
/* fall through */
case FW_PORT_TYPE_BT_SGMII:
- ifmedia_add(media, m | IFM_1000_T, data, NULL);
- ifmedia_add(media, m | IFM_100_TX, data, NULL);
- ifmedia_add(media, IFM_ETHER | IFM_AUTO, data, NULL);
+ ifmedia_add(media, m | IFM_1000_T, 0, NULL);
+ ifmedia_add(media, m | IFM_100_TX, 0, NULL);
+ ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
ifmedia_set(media, IFM_ETHER | IFM_AUTO);
break;
case FW_PORT_TYPE_CX4:
- ifmedia_add(media, m | IFM_10G_CX4, data, NULL);
+ ifmedia_add(media, m | IFM_10G_CX4, 0, NULL);
ifmedia_set(media, m | IFM_10G_CX4);
break;
@@ -2918,29 +2913,29 @@ build_medialist(struct port_info *pi, struct ifmedia *media)
switch (pi->mod_type) {
case FW_PORT_MOD_TYPE_LR:
- ifmedia_add(media, m | IFM_10G_LR, data, NULL);
+ ifmedia_add(media, m | IFM_10G_LR, 0, NULL);
ifmedia_set(media, m | IFM_10G_LR);
break;
case FW_PORT_MOD_TYPE_SR:
- ifmedia_add(media, m | IFM_10G_SR, data, NULL);
+ ifmedia_add(media, m | IFM_10G_SR, 0, NULL);
ifmedia_set(media, m | IFM_10G_SR);
break;
case FW_PORT_MOD_TYPE_LRM:
- ifmedia_add(media, m | IFM_10G_LRM, data, NULL);
+ ifmedia_add(media, m | IFM_10G_LRM, 0, NULL);
ifmedia_set(media, m | IFM_10G_LRM);
break;
case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
- ifmedia_add(media, m | IFM_10G_TWINAX, data, NULL);
+ ifmedia_add(media, m | IFM_10G_TWINAX, 0, NULL);
ifmedia_set(media, m | IFM_10G_TWINAX);
break;
case FW_PORT_MOD_TYPE_NONE:
m &= ~IFM_FDX;
- ifmedia_add(media, m | IFM_NONE, data, NULL);
+ ifmedia_add(media, m | IFM_NONE, 0, NULL);
ifmedia_set(media, m | IFM_NONE);
break;
@@ -2950,7 +2945,7 @@ build_medialist(struct port_info *pi, struct ifmedia *media)
device_printf(pi->dev,
"unknown port_type (%d), mod_type (%d)\n",
pi->port_type, pi->mod_type);
- ifmedia_add(media, m | IFM_UNKNOWN, data, NULL);
+ ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
ifmedia_set(media, m | IFM_UNKNOWN);
break;
}
@@ -2960,24 +2955,24 @@ build_medialist(struct port_info *pi, struct ifmedia *media)
switch (pi->mod_type) {
case FW_PORT_MOD_TYPE_LR:
- ifmedia_add(media, m | IFM_40G_LR4, data, NULL);
+ ifmedia_add(media, m | IFM_40G_LR4, 0, NULL);
ifmedia_set(media, m | IFM_40G_LR4);
break;
case FW_PORT_MOD_TYPE_SR:
- ifmedia_add(media, m | IFM_40G_SR4, data, NULL);
+ ifmedia_add(media, m | IFM_40G_SR4, 0, NULL);
ifmedia_set(media, m | IFM_40G_SR4);
break;
case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
- ifmedia_add(media, m | IFM_40G_CR4, data, NULL);
+ ifmedia_add(media, m | IFM_40G_CR4, 0, NULL);
ifmedia_set(media, m | IFM_40G_CR4);
break;
case FW_PORT_MOD_TYPE_NONE:
m &= ~IFM_FDX;
- ifmedia_add(media, m | IFM_NONE, data, NULL);
+ ifmedia_add(media, m | IFM_NONE, 0, NULL);
ifmedia_set(media, m | IFM_NONE);
break;
@@ -2985,7 +2980,7 @@ build_medialist(struct port_info *pi, struct ifmedia *media)
device_printf(pi->dev,
"unknown port_type (%d), mod_type (%d)\n",
pi->port_type, pi->mod_type);
- ifmedia_add(media, m | IFM_UNKNOWN, data, NULL);
+ ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
ifmedia_set(media, m | IFM_UNKNOWN);
break;
}
@@ -2995,7 +2990,7 @@ build_medialist(struct port_info *pi, struct ifmedia *media)
device_printf(pi->dev,
"unknown port_type (%d), mod_type (%d)\n", pi->port_type,
pi->mod_type);
- ifmedia_add(media, m | IFM_UNKNOWN, data, NULL);
+ ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
ifmedia_set(media, m | IFM_UNKNOWN);
break;
}
diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c
index 49cc16a..6ac6eb6 100644
--- a/sys/dev/e1000/if_igb.c
+++ b/sys/dev/e1000/if_igb.c
@@ -1046,8 +1046,7 @@ igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
}
drbr_advance(ifp, txr->br);
enq++;
- if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
- if (next->m_flags & M_MCAST)
+ if (next->m_flags & M_MCAST && adapter->vf_ifp)
if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
ETHER_BPF_MTAP(ifp, next);
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
@@ -4055,7 +4054,9 @@ static bool
igb_txeof(struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
+#ifdef DEV_NETMAP
struct ifnet *ifp = adapter->ifp;
+#endif /* DEV_NETMAP */
u32 work, processed = 0;
u16 limit = txr->process_limit;
struct igb_tx_buf *buf;
@@ -4130,7 +4131,6 @@ igb_txeof(struct tx_ring *txr)
}
++txr->packets;
++processed;
- if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
txr->watchdog_time = ticks;
/* Try the next packet */
@@ -5127,7 +5127,6 @@ igb_rxeof(struct igb_queue *que, int count, int *done)
if (eop) {
rxr->fmp->m_pkthdr.rcvif = ifp;
- if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
rxr->rx_packets++;
/* capture data for AIM */
rxr->packets++;
@@ -5560,24 +5559,94 @@ igb_led_func(void *arg, int onoff)
}
static uint64_t
+igb_get_vf_counter(if_t ifp, ift_counter cnt)
+{
+ struct adapter *adapter;
+ struct e1000_vf_stats *stats;
+#ifndef IGB_LEGACY_TX
+ struct tx_ring *txr;
+ uint64_t rv;
+#endif
+
+ adapter = if_getsoftc(ifp);
+ stats = (struct e1000_vf_stats *)adapter->stats;
+
+ switch (cnt) {
+ case IFCOUNTER_IPACKETS:
+ return (stats->gprc);
+ case IFCOUNTER_OPACKETS:
+ return (stats->gptc);
+ case IFCOUNTER_IBYTES:
+ return (stats->gorc);
+ case IFCOUNTER_OBYTES:
+ return (stats->gotc);
+ case IFCOUNTER_IMCASTS:
+ return (stats->mprc);
+ case IFCOUNTER_IERRORS:
+ return (adapter->dropped_pkts);
+ case IFCOUNTER_OERRORS:
+ return (adapter->watchdog_events);
+#ifndef IGB_LEGACY_TX
+ case IFCOUNTER_OQDROPS:
+ rv = 0;
+ txr = adapter->tx_rings;
+ for (int i = 0; i < adapter->num_queues; i++, txr++)
+ rv += txr->br->br_drops;
+ return (rv);
+#endif
+ default:
+ return (if_get_counter_default(ifp, cnt));
+ }
+}
+
+static uint64_t
igb_get_counter(if_t ifp, ift_counter cnt)
{
struct adapter *adapter;
struct e1000_hw_stats *stats;
+#ifndef IGB_LEGACY_TX
+ struct tx_ring *txr;
+ uint64_t rv;
+#endif
adapter = if_getsoftc(ifp);
+ if (adapter->vf_ifp)
+ return (igb_get_vf_counter(ifp, cnt));
+
stats = (struct e1000_hw_stats *)adapter->stats;
switch (cnt) {
+ case IFCOUNTER_IPACKETS:
+ return (stats->gprc);
+ case IFCOUNTER_OPACKETS:
+ return (stats->gptc);
+ case IFCOUNTER_IBYTES:
+ return (stats->gorc);
+ case IFCOUNTER_OBYTES:
+ return (stats->gotc);
+ case IFCOUNTER_IMCASTS:
+ return (stats->mprc);
+ case IFCOUNTER_OMCASTS:
+ return (stats->mptc);
case IFCOUNTER_IERRORS:
return (adapter->dropped_pkts + stats->rxerrc +
stats->crcerrs + stats->algnerrc +
- stats->ruc + stats->roc + stats->mpc + stats->cexterr);
+ stats->ruc + stats->roc + stats->cexterr);
case IFCOUNTER_OERRORS:
return (stats->ecol + stats->latecol +
adapter->watchdog_events);
case IFCOUNTER_COLLISIONS:
return (stats->colc);
+ case IFCOUNTER_IQDROPS:
+ return (stats->mpc);
+#ifndef IGB_LEGACY_TX
+ case IFCOUNTER_OQDROPS:
+ rv = 0;
+ txr = adapter->tx_rings;
+ for (int i = 0; i < adapter->num_queues; i++, txr++)
+ rv += txr->br->br_drops;
+ return (rv);
+#endif
default:
return (if_get_counter_default(ifp, cnt));
}
diff --git a/sys/dev/hyperv/include/hyperv.h b/sys/dev/hyperv/include/hyperv.h
index 8a45d89..5360b7c 100644
--- a/sys/dev/hyperv/include/hyperv.h
+++ b/sys/dev/hyperv/include/hyperv.h
@@ -46,6 +46,7 @@
#include <sys/systm.h>
#include <sys/lock.h>
#include <sys/sema.h>
+#include <sys/smp.h>
#include <sys/mutex.h>
#include <sys/bus.h>
#include <vm/vm.h>
@@ -63,11 +64,22 @@ typedef uint8_t hv_bool_uint8_t;
#define HV_ERROR_MACHINE_LOCKED 0x800704F7
/*
- * A revision number of vmbus that is used for ensuring both ends on a
- * partition are using compatible versions.
+ * VMBUS version is 32 bit, upper 16 bit for major_number and lower
+ * 16 bit for minor_number.
+ *
+ * 0.13 -- Windows Server 2008
+ * 1.1 -- Windows 7
+ * 2.4 -- Windows 8
+ * 3.0 -- Windows 8.1
*/
+#define HV_VMBUS_VERSION_WS2008 ((0 << 16) | (13))
+#define HV_VMBUS_VERSION_WIN7 ((1 << 16) | (1))
+#define HV_VMBUS_VERSION_WIN8 ((2 << 16) | (4))
+#define HV_VMBUS_VERSION_WIN8_1 ((3 << 16) | (0))
+
+#define HV_VMBUS_VERSION_INVALID -1
-#define HV_VMBUS_REVISION_NUMBER 13
+#define HV_VMBUS_VERSION_CURRENT HV_VMBUS_VERSION_WIN8_1
/*
* Make maximum size of pipe payload of 16K
@@ -112,6 +124,18 @@ typedef struct hv_guid {
unsigned char data[16];
} __packed hv_guid;
+#define HV_NIC_GUID \
+ .data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, \
+ 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
+
+#define HV_IDE_GUID \
+ .data = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44, \
+ 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
+
+#define HV_SCSI_GUID \
+ .data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d, \
+ 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
+
/*
* At the center of the Channel Management library is
* the Channel Offer. This struct contains the
@@ -147,7 +171,11 @@ typedef struct hv_vmbus_channel_offer {
} __packed pipe;
} u;
- uint32_t padding;
+ /*
+ * Sub_channel_index, newly added in Win8.
+ */
+ uint16_t sub_channel_index;
+ uint16_t padding;
} __packed hv_vmbus_channel_offer;
@@ -344,7 +372,25 @@ typedef struct {
hv_vmbus_channel_offer offer;
uint32_t child_rel_id;
uint8_t monitor_id;
- hv_bool_uint8_t monitor_allocated;
+ /*
+ * This field has been split into a bit field on Win7
+ * and higher.
+ */
+ uint8_t monitor_allocated:1;
+ uint8_t reserved:7;
+ /*
+ * Following fields were added in win7 and higher.
+ * Make sure to check the version before accessing these fields.
+ *
+ * If "is_dedicated_interrupt" is set, we must not set the
+ * associated bit in the channel bitmap while sending the
+ * interrupt to the host.
+ *
+ * connection_id is used in signaling the host.
+ */
+ uint16_t is_dedicated_interrupt:1;
+ uint16_t reserved1:15;
+ uint32_t connection_id;
} __packed hv_vmbus_channel_offer_channel;
/*
@@ -394,9 +440,11 @@ typedef struct
hv_gpadl_handle ring_buffer_gpadl_handle;
/*
- * GPADL for the channel's server context save area.
+ * Before win8, all incoming channel interrupts are only
+ * delivered on cpu 0. Setting this value to 0 would
+ * preserve the earlier behavior.
*/
- hv_gpadl_handle server_context_area_gpadl_handle;
+ uint32_t target_vcpu;
/*
* The upstream ring buffer begins at offset zero in the memory described
@@ -646,14 +694,42 @@ typedef struct {
} hv_vmbus_ring_buffer_info;
typedef void (*hv_vmbus_pfn_channel_callback)(void *context);
+typedef void (*hv_vmbus_sc_creation_callback)(void *context);
typedef enum {
HV_CHANNEL_OFFER_STATE,
HV_CHANNEL_OPENING_STATE,
HV_CHANNEL_OPEN_STATE,
+ HV_CHANNEL_OPENED_STATE,
HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE,
} hv_vmbus_channel_state;
+/*
+ * Connection identifier type
+ */
+typedef union {
+ uint32_t as_uint32_t;
+ struct {
+ uint32_t id:24;
+ uint32_t reserved:8;
+ } u;
+
+} __packed hv_vmbus_connection_id;
+
+/*
+ * Definition of the hv_vmbus_signal_event hypercall input structure
+ */
+typedef struct {
+ hv_vmbus_connection_id connection_id;
+ uint16_t flag_number;
+ uint16_t rsvd_z;
+} __packed hv_vmbus_input_signal_event;
+
+typedef struct {
+ uint64_t align8;
+ hv_vmbus_input_signal_event event;
+} __packed hv_vmbus_input_signal_event_buffer;
+
typedef struct hv_vmbus_channel {
TAILQ_ENTRY(hv_vmbus_channel) list_entry;
struct hv_device* device;
@@ -688,8 +764,82 @@ typedef struct hv_vmbus_channel {
hv_vmbus_pfn_channel_callback on_channel_callback;
void* channel_callback_context;
+ /*
+ * If batched_reading is set to "true", mask the interrupt
+ * and read until the channel is empty.
+ * If batched_reading is set to "false", the channel is not
+ * going to perform batched reading.
+ *
+ * Batched reading is enabled by default; specific
+ * drivers that don't want this behavior can turn it off.
+ */
+ boolean_t batched_reading;
+
+ boolean_t is_dedicated_interrupt;
+
+ /*
+ * Used as an input param for HV_CALL_SIGNAL_EVENT hypercall.
+ */
+ hv_vmbus_input_signal_event_buffer signal_event_buffer;
+ /*
+ * 8-bytes aligned of the buffer above
+ */
+ hv_vmbus_input_signal_event *signal_event_param;
+
+ /*
+ * From Win8, this field specifies the target virtual process
+ * on which to deliver the interupt from the host to guest.
+ * Before Win8, all channel interrupts would only be
+ * delivered on cpu 0. Setting this value to 0 would preserve
+ * the earlier behavior.
+ */
+ uint32_t target_vcpu;
+ /* The corresponding CPUID in the guest */
+ uint32_t target_cpu;
+
+ /*
+ * Support for multi-channels.
+ * The initial offer is considered the primary channel and this
+ * offer message will indicate if the host supports multi-channels.
+ * The guest is free to ask for multi-channels to be offerred and can
+ * open these multi-channels as a normal "primary" channel. However,
+ * all multi-channels will have the same type and instance guids as the
+ * primary channel. Requests sent on a given channel will result in a
+ * response on the same channel.
+ */
+
+ /*
+ * Multi-channel creation callback. This callback will be called in
+ * process context when a Multi-channel offer is received from the host.
+ * The guest can open the Multi-channel in the context of this callback.
+ */
+ hv_vmbus_sc_creation_callback sc_creation_callback;
+
+ struct mtx sc_lock;
+
+ /*
+ * Link list of all the multi-channels if this is a primary channel
+ */
+ TAILQ_HEAD(, hv_vmbus_channel) sc_list_anchor;
+ TAILQ_ENTRY(hv_vmbus_channel) sc_list_entry;
+
+ /*
+ * The primary channel this sub-channle belongs to.
+ * This will be NULL for the primary channel.
+ */
+ struct hv_vmbus_channel *primary_channel;
+ /*
+ * Support per channel state for use by vmbus drivers.
+ */
+ void *per_channel_state;
} hv_vmbus_channel;
+static inline void
+hv_set_channel_read_state(hv_vmbus_channel* channel, boolean_t state)
+{
+ channel->batched_reading = state;
+}
+
typedef struct hv_device {
hv_guid class_id;
hv_guid device_id;
@@ -760,6 +910,8 @@ int hv_vmbus_channel_teardown_gpdal(
hv_vmbus_channel* channel,
uint32_t gpadl_handle);
+struct hv_vmbus_channel* vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary);
+
/*
* Work abstraction defines
*/
@@ -819,6 +971,7 @@ typedef struct hv_vmbus_service {
extern uint8_t* receive_buffer[];
extern hv_vmbus_service service_table[];
+extern uint32_t hv_vmbus_protocal_version;
void hv_kvp_callback(void *context);
int hv_kvp_init(hv_vmbus_service *serv);
diff --git a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
index d00d279..f8a871b 100644
--- a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
+++ b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/proc.h>
#include <sys/condvar.h>
+#include <sys/time.h>
#include <sys/systm.h>
#include <sys/sockio.h>
#include <sys/mbuf.h>
@@ -53,8 +54,12 @@ __FBSDID("$FreeBSD$");
#include <sys/callout.h>
#include <vm/vm.h>
#include <vm/pmap.h>
+#include <vm/uma.h>
#include <sys/lock.h>
#include <sys/sema.h>
+#include <sys/sglist.h>
+#include <machine/bus.h>
+#include <sys/bus_dma.h>
#include <cam/cam.h>
#include <cam/cam_ccb.h>
@@ -66,7 +71,6 @@ __FBSDID("$FreeBSD$");
#include <cam/scsi/scsi_all.h>
#include <cam/scsi/scsi_message.h>
-
#include <dev/hyperv/include/hyperv.h>
#include "hv_vstorage.h"
@@ -77,8 +81,29 @@ __FBSDID("$FreeBSD$");
#define BLKVSC_MAX_IO_REQUESTS STORVSC_MAX_IO_REQUESTS
#define STORVSC_MAX_TARGETS (2)
+#define STORVSC_WIN7_MAJOR 4
+#define STORVSC_WIN7_MINOR 2
+
+#define STORVSC_WIN8_MAJOR 5
+#define STORVSC_WIN8_MINOR 1
+
+#define HV_ALIGN(x, a) roundup2(x, a)
+
struct storvsc_softc;
+struct hv_sgl_node {
+ LIST_ENTRY(hv_sgl_node) link;
+ struct sglist *sgl_data;
+};
+
+struct hv_sgl_page_pool{
+ LIST_HEAD(, hv_sgl_node) in_use_sgl_list;
+ LIST_HEAD(, hv_sgl_node) free_sgl_list;
+ boolean_t is_init;
+} g_hv_sgl_page_pool;
+
+#define STORVSC_MAX_SG_PAGE_CNT STORVSC_MAX_IO_REQUESTS * HV_MAX_MULTIPAGE_BUFFER_COUNT
+
enum storvsc_request_type {
WRITE_TYPE,
READ_TYPE,
@@ -96,20 +121,24 @@ struct hv_storvsc_request {
struct storvsc_softc *softc;
struct callout callout;
struct sema synch_sema; /*Synchronize the request/response if needed */
+ struct sglist *bounce_sgl;
+ unsigned int bounce_sgl_count;
+ uint64_t not_aligned_seg_bits;
};
struct storvsc_softc {
struct hv_device *hs_dev;
- LIST_HEAD(, hv_storvsc_request) hs_free_list;
- struct mtx hs_lock;
- struct storvsc_driver_props *hs_drv_props;
- int hs_unit;
- uint32_t hs_frozen;
- struct cam_sim *hs_sim;
- struct cam_path *hs_path;
+ LIST_HEAD(, hv_storvsc_request) hs_free_list;
+ struct mtx hs_lock;
+ struct storvsc_driver_props *hs_drv_props;
+ int hs_unit;
+ uint32_t hs_frozen;
+ struct cam_sim *hs_sim;
+ struct cam_path *hs_path;
uint32_t hs_num_out_reqs;
boolean_t hs_destroy;
boolean_t hs_drain_notify;
+ boolean_t hs_open_multi_channel;
struct sema hs_drain_sema;
struct hv_storvsc_request hs_init_req;
struct hv_storvsc_request hs_reset_req;
@@ -124,7 +153,7 @@ struct storvsc_softc {
* The first can be tested by "sg_senddiag -vv /dev/daX",
* and the second and third can be done by
* "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
- */
+ */
#define HVS_TIMEOUT_TEST 0
/*
@@ -138,7 +167,7 @@ struct storvsc_driver_props {
char *drv_name;
char *drv_desc;
uint8_t drv_max_luns_per_target;
- uint8_t drv_max_ios_per_target;
+ uint8_t drv_max_ios_per_target;
uint32_t drv_ringbuffer_size;
};
@@ -150,6 +179,8 @@ enum hv_storage_type {
#define HS_MAX_ADAPTERS 10
+#define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1
+
/* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
static const hv_guid gStorVscDeviceType={
.data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
@@ -171,13 +202,16 @@ static struct storvsc_driver_props g_drv_props_table[] = {
STORVSC_RINGBUFFER_SIZE}
};
+static int storvsc_current_major;
+static int storvsc_current_minor;
+
/* static functions */
static int storvsc_probe(device_t dev);
static int storvsc_attach(device_t dev);
static int storvsc_detach(device_t dev);
static void storvsc_poll(struct cam_sim * sim);
static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
-static void create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
+static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
static enum hv_storage_type storvsc_get_storage_type(device_t dev);
static void hv_storvsc_on_channel_callback(void *context);
@@ -186,6 +220,14 @@ static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
struct hv_storvsc_request *request);
static int hv_storvsc_connect_vsp(struct hv_device *device);
static void storvsc_io_done(struct hv_storvsc_request *reqp);
+static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
+ bus_dma_segment_t *orig_sgl,
+ unsigned int orig_sgl_count,
+ uint64_t seg_bits);
+void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
+ unsigned int dest_sgl_count,
+ struct sglist* src_sgl,
+ uint64_t seg_bits);
static device_method_t storvsc_methods[] = {
/* Device interface */
@@ -207,7 +249,7 @@ MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
/**
- * The host is capable of sending messages to us that are
+ * The host is capable of sending messages to us that are
* completely unsolicited. So, we need to address the race
* condition where we may be in the process of unloading the
* driver when the host may send us an unsolicited message.
@@ -223,7 +265,7 @@ MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
* destroyed.
*
* 3. Once the device is marked as being destroyed, we only
- * permit incoming traffic to properly account for
+ * permit incoming traffic to properly account for
* packets already sent out.
*/
static inline struct storvsc_softc *
@@ -260,6 +302,113 @@ get_stor_device(struct hv_device *device,
}
/**
+ * @brief Callback handler, will be invoked when receive mutil-channel offer
+ *
+ * @param context new multi-channel
+ */
+static void
+storvsc_handle_sc_creation(void *context)
+{
+ hv_vmbus_channel *new_channel;
+ struct hv_device *device;
+ struct storvsc_softc *sc;
+ struct vmstor_chan_props props;
+ int ret = 0;
+
+ new_channel = (hv_vmbus_channel *)context;
+ device = new_channel->primary_channel->device;
+ sc = get_stor_device(device, TRUE);
+ if (sc == NULL)
+ return;
+
+ if (FALSE == sc->hs_open_multi_channel)
+ return;
+
+ memset(&props, 0, sizeof(props));
+
+ ret = hv_vmbus_channel_open(new_channel,
+ sc->hs_drv_props->drv_ringbuffer_size,
+ sc->hs_drv_props->drv_ringbuffer_size,
+ (void *)&props,
+ sizeof(struct vmstor_chan_props),
+ hv_storvsc_on_channel_callback,
+ new_channel);
+
+ return;
+}
+
+/**
+ * @brief Send multi-channel creation request to host
+ *
+ * @param device a Hyper-V device pointer
+ * @param max_chans the max channels supported by vmbus
+ */
+static void
+storvsc_send_multichannel_request(struct hv_device *dev, int max_chans)
+{
+ struct storvsc_softc *sc;
+ struct hv_storvsc_request *request;
+ struct vstor_packet *vstor_packet;
+ int request_channels_cnt = 0;
+ int ret;
+
+ /* get multichannels count that need to create */
+ request_channels_cnt = MIN(max_chans, mp_ncpus);
+
+ sc = get_stor_device(dev, TRUE);
+ if (sc == NULL) {
+ printf("Storvsc_error: get sc failed while send mutilchannel "
+ "request\n");
+ return;
+ }
+
+ request = &sc->hs_init_req;
+
+ /* Establish a handler for multi-channel */
+ dev->channel->sc_creation_callback = storvsc_handle_sc_creation;
+
+ /* request the host to create multi-channel */
+ memset(request, 0, sizeof(struct hv_storvsc_request));
+
+ sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
+
+ vstor_packet = &request->vstor_packet;
+
+ vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS;
+ vstor_packet->flags = REQUEST_COMPLETION_FLAG;
+ vstor_packet->u.multi_channels_cnt = request_channels_cnt;
+
+ ret = hv_vmbus_channel_send_packet(
+ dev->channel,
+ vstor_packet,
+ sizeof(struct vstor_packet),
+ (uint64_t)(uintptr_t)request,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
+ HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+
+ /* wait for 5 seconds */
+ ret = sema_timedwait(&request->synch_sema, 5 * hz);
+ if (ret != 0) {
+ printf("Storvsc_error: create multi-channel timeout, %d\n",
+ ret);
+ return;
+ }
+
+ if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
+ vstor_packet->status != 0) {
+ printf("Storvsc_error: create multi-channel invalid operation "
+ "(%d) or statue (%u)\n",
+ vstor_packet->operation, vstor_packet->status);
+ return;
+ }
+
+ sc->hs_open_multi_channel = TRUE;
+
+ if (bootverbose)
+ printf("Storvsc create multi-channel success!\n");
+}
+
+/**
* @brief initialize channel connection to parent partition
*
* @param dev a Hyper-V device pointer
@@ -272,11 +421,15 @@ hv_storvsc_channel_init(struct hv_device *dev)
struct hv_storvsc_request *request;
struct vstor_packet *vstor_packet;
struct storvsc_softc *sc;
+ uint16_t max_chans = 0;
+ boolean_t support_multichannel = FALSE;
+
+ max_chans = 0;
+ support_multichannel = FALSE;
sc = get_stor_device(dev, TRUE);
- if (sc == NULL) {
- return ENODEV;
- }
+ if (sc == NULL)
+ return (ENODEV);
request = &sc->hs_init_req;
memset(request, 0, sizeof(struct hv_storvsc_request));
@@ -300,15 +453,13 @@ hv_storvsc_channel_init(struct hv_device *dev)
HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
- if (ret != 0) {
+ if (ret != 0)
goto cleanup;
- }
-
- ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
- if (ret != 0) {
+ /* wait 5 seconds */
+ ret = sema_timedwait(&request->synch_sema, 5 * hz);
+ if (ret != 0)
goto cleanup;
- }
if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
vstor_packet->status != 0) {
@@ -321,7 +472,8 @@ hv_storvsc_channel_init(struct hv_device *dev)
vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
vstor_packet->flags = REQUEST_COMPLETION_FLAG;
- vstor_packet->u.version.major_minor = VMSTOR_PROTOCOL_VERSION_CURRENT;
+ vstor_packet->u.version.major_minor =
+ VMSTOR_PROTOCOL_VERSION(storvsc_current_major, storvsc_current_minor);
/* revision is only significant for Windows guests */
vstor_packet->u.version.revision = 0;
@@ -334,21 +486,19 @@ hv_storvsc_channel_init(struct hv_device *dev)
HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
- if (ret != 0) {
+ if (ret != 0)
goto cleanup;
- }
- ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
+ /* wait 5 seconds */
+ ret = sema_timedwait(&request->synch_sema, 5 * hz);
- if (ret) {
+ if (ret)
goto cleanup;
- }
/* TODO: Check returned version */
if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
- vstor_packet->status != 0) {
+ vstor_packet->status != 0)
goto cleanup;
- }
/**
* Query channel properties
@@ -365,22 +515,30 @@ hv_storvsc_channel_init(struct hv_device *dev)
HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
- if ( ret != 0) {
+ if ( ret != 0)
goto cleanup;
- }
- ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
+ /* wait 5 seconds */
+ ret = sema_timedwait(&request->synch_sema, 5 * hz);
- if (ret != 0) {
+ if (ret != 0)
goto cleanup;
- }
/* TODO: Check returned version */
if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
- vstor_packet->status != 0) {
+ vstor_packet->status != 0) {
goto cleanup;
}
+ /* multi-channels feature is supported by WIN8 and above version */
+ max_chans = vstor_packet->u.chan_props.max_channel_cnt;
+ if ((hv_vmbus_protocal_version != HV_VMBUS_VERSION_WIN7) &&
+ (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) &&
+ (vstor_packet->u.chan_props.flags &
+ HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) {
+ support_multichannel = TRUE;
+ }
+
memset(vstor_packet, 0, sizeof(struct vstor_packet));
vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
vstor_packet->flags = REQUEST_COMPLETION_FLAG;
@@ -397,16 +555,22 @@ hv_storvsc_channel_init(struct hv_device *dev)
goto cleanup;
}
- ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
+ /* wait 5 seconds */
+ ret = sema_timedwait(&request->synch_sema, 5 * hz);
- if (ret != 0) {
+ if (ret != 0)
goto cleanup;
- }
if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
- vstor_packet->status != 0) {
+ vstor_packet->status != 0)
goto cleanup;
- }
+
+ /*
+ * If multi-channel is supported, send multichannel create
+ * request to host.
+ */
+ if (support_multichannel)
+ storvsc_send_multichannel_request(dev, max_chans);
cleanup:
sema_destroy(&request->synch_sema);
@@ -443,8 +607,7 @@ hv_storvsc_connect_vsp(struct hv_device *dev)
(void *)&props,
sizeof(struct vmstor_chan_props),
hv_storvsc_on_channel_callback,
- dev);
-
+ dev->channel);
if (ret != 0) {
return ret;
@@ -490,7 +653,7 @@ hv_storvsc_host_reset(struct hv_device *dev)
goto cleanup;
}
- ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
+ ret = sema_timedwait(&request->synch_sema, 5 * hz); /* KYS 5 seconds */
if (ret) {
goto cleanup;
@@ -498,7 +661,7 @@ hv_storvsc_host_reset(struct hv_device *dev)
/*
- * At this point, all outstanding requests in the adapter
+ * At this point, all outstanding requests in the adapter
* should have been flushed out and return to us
*/
@@ -521,6 +684,7 @@ hv_storvsc_io_request(struct hv_device *device,
{
struct storvsc_softc *sc;
struct vstor_packet *vstor_packet = &request->vstor_packet;
+ struct hv_vmbus_channel* outgoing_channel = NULL;
int ret = 0;
sc = get_stor_device(device, TRUE);
@@ -539,19 +703,20 @@ hv_storvsc_io_request(struct hv_device *device,
vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
+ outgoing_channel = vmbus_select_outgoing_channel(device->channel);
mtx_unlock(&request->softc->hs_lock);
if (request->data_buf.length) {
ret = hv_vmbus_channel_send_packet_multipagebuffer(
- device->channel,
+ outgoing_channel,
&request->data_buf,
- vstor_packet,
- sizeof(struct vstor_packet),
+ vstor_packet,
+ sizeof(struct vstor_packet),
(uint64_t)(uintptr_t)request);
} else {
ret = hv_vmbus_channel_send_packet(
- device->channel,
+ outgoing_channel,
vstor_packet,
sizeof(struct vstor_packet),
(uint64_t)(uintptr_t)request,
@@ -610,7 +775,8 @@ static void
hv_storvsc_on_channel_callback(void *context)
{
int ret = 0;
- struct hv_device *device = (struct hv_device *)context;
+ hv_vmbus_channel *channel = (hv_vmbus_channel *)context;
+ struct hv_device *device = NULL;
struct storvsc_softc *sc;
uint32_t bytes_recvd;
uint64_t request_id;
@@ -618,15 +784,22 @@ hv_storvsc_on_channel_callback(void *context)
struct hv_storvsc_request *request;
struct vstor_packet *vstor_packet;
+ if (channel->primary_channel != NULL){
+ device = channel->primary_channel->device;
+ } else {
+ device = channel->device;
+ }
+
+ KASSERT(device, ("device is NULL"));
+
sc = get_stor_device(device, FALSE);
if (sc == NULL) {
+ printf("Storvsc_error: get stor device failed.\n");
return;
}
- KASSERT(device, ("device"));
-
ret = hv_vmbus_channel_recv_packet(
- device->channel,
+ channel,
packet,
roundup2(sizeof(struct vstor_packet), 8),
&bytes_recvd,
@@ -634,21 +807,28 @@ hv_storvsc_on_channel_callback(void *context)
while ((ret == 0) && (bytes_recvd > 0)) {
request = (struct hv_storvsc_request *)(uintptr_t)request_id;
- KASSERT(request, ("request"));
if ((request == &sc->hs_init_req) ||
(request == &sc->hs_reset_req)) {
memcpy(&request->vstor_packet, packet,
sizeof(struct vstor_packet));
- sema_post(&request->synch_sema);
+ sema_post(&request->synch_sema);
} else {
vstor_packet = (struct vstor_packet *)packet;
switch(vstor_packet->operation) {
case VSTOR_OPERATION_COMPLETEIO:
+ if (request == NULL)
+ panic("VMBUS: storvsc received a "
+ "packet with NULL request id in "
+ "COMPLETEIO operation.");
+
hv_storvsc_on_iocompletion(sc,
vstor_packet, request);
break;
case VSTOR_OPERATION_REMOVEDEVICE:
+ case VSTOR_OPERATION_ENUMERATE_BUS:
+ printf("VMBUS: storvsc operation %d not "
+ "implemented.\n", vstor_packet->operation);
/* TODO: implement */
break;
default:
@@ -656,7 +836,7 @@ hv_storvsc_on_channel_callback(void *context)
}
}
ret = hv_vmbus_channel_recv_packet(
- device->channel,
+ channel,
packet,
roundup2(sizeof(struct vstor_packet), 8),
&bytes_recvd,
@@ -680,7 +860,16 @@ storvsc_probe(device_t dev)
{
int ata_disk_enable = 0;
int ret = ENXIO;
-
+
+ if ((HV_VMBUS_VERSION_WIN8 == hv_vmbus_protocal_version) ||
+ (HV_VMBUS_VERSION_WIN8_1 == hv_vmbus_protocal_version)){
+ storvsc_current_major = STORVSC_WIN8_MAJOR;
+ storvsc_current_minor = STORVSC_WIN8_MINOR;
+ } else {
+ storvsc_current_major = STORVSC_WIN7_MAJOR;
+ storvsc_current_minor = STORVSC_WIN7_MINOR;
+ }
+
switch (storvsc_get_storage_type(dev)) {
case DRIVER_BLKVSC:
if(bootverbose)
@@ -721,9 +910,11 @@ storvsc_attach(device_t dev)
enum hv_storage_type stor_type;
struct storvsc_softc *sc;
struct cam_devq *devq;
- int ret, i;
+ int ret, i, j;
struct hv_storvsc_request *reqp;
struct root_hold_token *root_mount_token = NULL;
+ struct hv_sgl_node *sgl_node = NULL;
+ void *tmp_buff = NULL;
/*
* We need to serialize storvsc attach calls.
@@ -764,8 +955,41 @@ storvsc_attach(device_t dev)
LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
}
+ /* create sg-list page pool */
+ if (FALSE == g_hv_sgl_page_pool.is_init) {
+ g_hv_sgl_page_pool.is_init = TRUE;
+ LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list);
+ LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list);
+
+ /*
+ * Pre-create SG list, each SG list with
+ * HV_MAX_MULTIPAGE_BUFFER_COUNT segments, each
+ * segment has one page buffer
+ */
+ for (i = 0; i < STORVSC_MAX_IO_REQUESTS; i++) {
+ sgl_node = malloc(sizeof(struct hv_sgl_node),
+ M_DEVBUF, M_WAITOK|M_ZERO);
+
+ sgl_node->sgl_data =
+ sglist_alloc(HV_MAX_MULTIPAGE_BUFFER_COUNT,
+ M_WAITOK|M_ZERO);
+
+ for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) {
+ tmp_buff = malloc(PAGE_SIZE,
+ M_DEVBUF, M_WAITOK|M_ZERO);
+
+ sgl_node->sgl_data->sg_segs[j].ss_paddr =
+ (vm_paddr_t)tmp_buff;
+ }
+
+ LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list,
+ sgl_node, link);
+ }
+ }
+
sc->hs_destroy = FALSE;
sc->hs_drain_notify = FALSE;
+ sc->hs_open_multi_channel = FALSE;
sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
ret = hv_storvsc_connect_vsp(hv_dev);
@@ -834,6 +1058,20 @@ cleanup:
LIST_REMOVE(reqp, link);
free(reqp, M_DEVBUF);
}
+
+ while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
+ sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
+ LIST_REMOVE(sgl_node, link);
+ for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) {
+ if (NULL !=
+ (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
+ free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
+ }
+ }
+ sglist_free(sgl_node->sgl_data);
+ free(sgl_node, M_DEVBUF);
+ }
+
return (ret);
}
@@ -853,6 +1091,8 @@ storvsc_detach(device_t dev)
struct storvsc_softc *sc = device_get_softc(dev);
struct hv_storvsc_request *reqp = NULL;
struct hv_device *hv_device = vmbus_get_devctx(dev);
+ struct hv_sgl_node *sgl_node = NULL;
+ int j = 0;
mtx_lock(&hv_device->channel->inbound_lock);
sc->hs_destroy = TRUE;
@@ -884,6 +1124,20 @@ storvsc_detach(device_t dev)
free(reqp, M_DEVBUF);
}
mtx_unlock(&sc->hs_lock);
+
+ while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
+ sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
+ LIST_REMOVE(sgl_node, link);
+ for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++){
+ if (NULL !=
+ (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
+ free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
+ }
+ }
+ sglist_free(sgl_node->sgl_data);
+ free(sgl_node, M_DEVBUF);
+ }
+
return (0);
}
@@ -939,7 +1193,7 @@ storvsc_timeout_test(struct hv_storvsc_request *reqp,
ticks, __func__, (ret == 0)?
"IO return detected" :
"IO return not detected");
- /*
+ /*
* Now both the timer handler and io done are running
* simultaneously. We want to confirm the io done always
* finishes after the timer handler exits. So reqp used by
@@ -1023,7 +1277,7 @@ storvsc_poll(struct cam_sim *sim)
mtx_assert(&sc->hs_lock, MA_OWNED);
mtx_unlock(&sc->hs_lock);
- hv_storvsc_on_channel_callback(sc->hs_dev);
+ hv_storvsc_on_channel_callback(sc->hs_dev->channel);
mtx_lock(&sc->hs_lock);
}
@@ -1151,9 +1405,13 @@ storvsc_action(struct cam_sim *sim, union ccb *ccb)
bzero(reqp, sizeof(struct hv_storvsc_request));
reqp->softc = sc;
-
- ccb->ccb_h.status |= CAM_SIM_QUEUED;
- create_storvsc_request(ccb, reqp);
+
+ ccb->ccb_h.status |= CAM_SIM_QUEUED;
+ if ((res = create_storvsc_request(ccb, reqp)) != 0) {
+ ccb->ccb_h.status = CAM_REQ_INVALID;
+ xpt_done(ccb);
+ return;
+ }
if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
callout_init(&reqp->callout, CALLOUT_MPSAFE);
@@ -1194,6 +1452,212 @@ storvsc_action(struct cam_sim *sim, union ccb *ccb)
}
/**
+ * @brief destroy bounce buffer
+ *
+ * This function is responsible for destroy a Scatter/Gather list
+ * that create by storvsc_create_bounce_buffer()
+ *
+ * @param sgl- the Scatter/Gather need be destroy
+ * @param sg_count- page count of the SG list.
+ *
+ */
+static void
+storvsc_destroy_bounce_buffer(struct sglist *sgl)
+{
+ struct hv_sgl_node *sgl_node = NULL;
+
+ sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
+ LIST_REMOVE(sgl_node, link);
+ if (NULL == sgl_node) {
+ printf("storvsc error: not enough in use sgl\n");
+ return;
+ }
+ sgl_node->sgl_data = sgl;
+ LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
+}
+
+/**
+ * @brief create bounce buffer
+ *
+ * This function is responsible for create a Scatter/Gather list,
+ * which hold several pages that can be aligned with page size.
+ *
+ * @param seg_count- SG-list segments count
+ * @param write - if WRITE_TYPE, set SG list page used size to 0,
+ * otherwise set used size to page size.
+ *
+ * return NULL if create failed
+ */
+static struct sglist *
+storvsc_create_bounce_buffer(uint16_t seg_count, int write)
+{
+ int i = 0;
+ struct sglist *bounce_sgl = NULL;
+ unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
+ struct hv_sgl_node *sgl_node = NULL;
+
+ /* get struct sglist from free_sgl_list */
+ sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
+ LIST_REMOVE(sgl_node, link);
+ if (NULL == sgl_node) {
+ printf("storvsc error: not enough free sgl\n");
+ return NULL;
+ }
+ bounce_sgl = sgl_node->sgl_data;
+ LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
+
+ bounce_sgl->sg_maxseg = seg_count;
+
+ if (write == WRITE_TYPE)
+ bounce_sgl->sg_nseg = 0;
+ else
+ bounce_sgl->sg_nseg = seg_count;
+
+ for (i = 0; i < seg_count; i++)
+ bounce_sgl->sg_segs[i].ss_len = buf_len;
+
+ return bounce_sgl;
+}
+
+/**
+ * @brief copy data from SG list to bounce buffer
+ *
+ * This function is responsible for copy data from one SG list's segments
+ * to another SG list which used as bounce buffer.
+ *
+ * @param bounce_sgl - the destination SG list
+ * @param orig_sgl - the segment of the source SG list.
+ * @param orig_sgl_count - the count of segments.
+ * @param orig_sgl_count - indicate which segment need bounce buffer,
+ * set 1 means need.
+ *
+ */
+static void
+storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
+ bus_dma_segment_t *orig_sgl,
+ unsigned int orig_sgl_count,
+ uint64_t seg_bits)
+{
+ int src_sgl_idx = 0;
+
+ for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) {
+ if (seg_bits & (1 << src_sgl_idx)) {
+ memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr,
+ (void*)orig_sgl[src_sgl_idx].ds_addr,
+ orig_sgl[src_sgl_idx].ds_len);
+
+ bounce_sgl->sg_segs[src_sgl_idx].ss_len =
+ orig_sgl[src_sgl_idx].ds_len;
+ }
+ }
+}
+
+/**
+ * @brief copy data from SG list which used as bounce to another SG list
+ *
+ * This function is responsible for copy data from one SG list with bounce
+ * buffer to another SG list's segments.
+ *
+ * @param dest_sgl - the destination SG list's segments
+ * @param dest_sgl_count - the count of destination SG list's segment.
+ * @param src_sgl - the source SG list.
+ * @param seg_bits - indicate which segment used bounce buffer of src SG-list.
+ *
+ */
+void
+storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
+ unsigned int dest_sgl_count,
+ struct sglist* src_sgl,
+ uint64_t seg_bits)
+{
+ int sgl_idx = 0;
+
+ for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) {
+ if (seg_bits & (1 << sgl_idx)) {
+ memcpy((void*)(dest_sgl[sgl_idx].ds_addr),
+ (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr),
+ src_sgl->sg_segs[sgl_idx].ss_len);
+ }
+ }
+}
+
+/**
+ * @brief check SG list with bounce buffer or not
+ *
+ * This function is responsible for check if need bounce buffer for SG list.
+ *
+ * @param sgl - the SG list's segments
+ * @param sg_count - the count of SG list's segment.
+ * @param bits - segmengs number that need bounce buffer
+ *
+ * return -1 if SG list needless bounce buffer
+ */
+static int
+storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl,
+ unsigned int sg_count,
+ uint64_t *bits)
+{
+ int i = 0;
+ int offset = 0;
+ uint64_t phys_addr = 0;
+ uint64_t tmp_bits = 0;
+ boolean_t found_hole = FALSE;
+ boolean_t pre_aligned = TRUE;
+
+ if (sg_count < 2){
+ return -1;
+ }
+
+ *bits = 0;
+
+ phys_addr = vtophys(sgl[0].ds_addr);
+ offset = phys_addr - trunc_page(phys_addr);
+
+ if (offset != 0) {
+ pre_aligned = FALSE;
+ tmp_bits |= 1;
+ }
+
+ for (i = 1; i < sg_count; i++) {
+ phys_addr = vtophys(sgl[i].ds_addr);
+ offset = phys_addr - trunc_page(phys_addr);
+
+ if (offset == 0) {
+ if (FALSE == pre_aligned){
+ /*
+ * This segment is aligned, if the previous
+ * one is not aligned, find a hole
+ */
+ found_hole = TRUE;
+ }
+ pre_aligned = TRUE;
+ } else {
+ tmp_bits |= 1 << i;
+ if (!pre_aligned) {
+ if (phys_addr != vtophys(sgl[i-1].ds_addr +
+ sgl[i-1].ds_len)) {
+ /*
+ * Check whether connect to previous
+ * segment,if not, find the hole
+ */
+ found_hole = TRUE;
+ }
+ } else {
+ found_hole = TRUE;
+ }
+ pre_aligned = FALSE;
+ }
+ }
+
+ if (!found_hole) {
+ return (-1);
+ } else {
+ *bits = tmp_bits;
+ return 0;
+ }
+}
+
+/**
* @brief Fill in a request structure based on a CAM control block
*
* Fills in a request structure based on the contents of a CAM control
@@ -1203,7 +1667,7 @@ storvsc_action(struct cam_sim *sim, union ccb *ccb)
* @param ccb pointer to a CAM contorl block
* @param reqp pointer to a request structure
*/
-static void
+static int
create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
{
struct ccb_scsiio *csio = &ccb->csio;
@@ -1211,6 +1675,7 @@ create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
uint32_t bytes_to_copy = 0;
uint32_t pfn_num = 0;
uint32_t pfn;
+ uint64_t not_aligned_seg_bits = 0;
/* refer to struct vmscsi_req for meanings of these two fields */
reqp->vstor_packet.u.vm_srb.port =
@@ -1231,48 +1696,172 @@ create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
}
switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
- case CAM_DIR_OUT:
- reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;
- break;
- case CAM_DIR_IN:
- reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
- break;
- case CAM_DIR_NONE:
- reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
- break;
- default:
- reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
- break;
+ case CAM_DIR_OUT:
+ reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;
+ break;
+ case CAM_DIR_IN:
+ reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
+ break;
+ case CAM_DIR_NONE:
+ reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
+ break;
+ default:
+ reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
+ break;
}
reqp->sense_data = &csio->sense_data;
reqp->sense_info_len = csio->sense_len;
reqp->ccb = ccb;
- /*
- KASSERT((ccb->ccb_h.flags & CAM_SCATTER_VALID) == 0,
- ("ccb is scatter gather valid\n"));
- */
- if (csio->dxfer_len != 0) {
- reqp->data_buf.length = csio->dxfer_len;
+
+ if (0 == csio->dxfer_len) {
+ return (0);
+ }
+
+ reqp->data_buf.length = csio->dxfer_len;
+
+ switch (ccb->ccb_h.flags & CAM_DATA_MASK) {
+ case CAM_DATA_VADDR:
+ {
bytes_to_copy = csio->dxfer_len;
phys_addr = vtophys(csio->data_ptr);
- reqp->data_buf.offset = phys_addr - trunc_page(phys_addr);
+ reqp->data_buf.offset = phys_addr & PAGE_MASK;
+
+ while (bytes_to_copy != 0) {
+ int bytes, page_offset;
+ phys_addr =
+ vtophys(&csio->data_ptr[reqp->data_buf.length -
+ bytes_to_copy]);
+ pfn = phys_addr >> PAGE_SHIFT;
+ reqp->data_buf.pfn_array[pfn_num] = pfn;
+ page_offset = phys_addr & PAGE_MASK;
+
+ bytes = min(PAGE_SIZE - page_offset, bytes_to_copy);
+
+ bytes_to_copy -= bytes;
+ pfn_num++;
+ }
+ break;
}
- while (bytes_to_copy != 0) {
- int bytes, page_offset;
- phys_addr = vtophys(&csio->data_ptr[reqp->data_buf.length -
- bytes_to_copy]);
- pfn = phys_addr >> PAGE_SHIFT;
- reqp->data_buf.pfn_array[pfn_num] = pfn;
- page_offset = phys_addr - trunc_page(phys_addr);
+ case CAM_DATA_SG:
+ {
+ int i = 0;
+ int offset = 0;
+ int ret;
+
+ bus_dma_segment_t *storvsc_sglist =
+ (bus_dma_segment_t *)ccb->csio.data_ptr;
+ u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt;
+
+ printf("Storvsc: get SG I/O operation, %d\n",
+ reqp->vstor_packet.u.vm_srb.data_in);
+
+ if (storvsc_sg_count > HV_MAX_MULTIPAGE_BUFFER_COUNT){
+ printf("Storvsc: %d segments is too much, "
+ "only support %d segments\n",
+ storvsc_sg_count, HV_MAX_MULTIPAGE_BUFFER_COUNT);
+ return (EINVAL);
+ }
+
+ /*
+ * We create our own bounce buffer function currently. Idealy
+ * we should use BUS_DMA(9) framework. But with current BUS_DMA
+ * code there is no callback API to check the page alignment of
+ * middle segments before busdma can decide if a bounce buffer
+ * is needed for particular segment. There is callback,
+ * "bus_dma_filter_t *filter", but the parrameters are not
+ * sufficient for storvsc driver.
+ * TODO:
+ * Add page alignment check in BUS_DMA(9) callback. Once
+ * this is complete, switch the following code to use
+ * BUS_DMA(9) for storvsc bounce buffer support.
+ */
+ /* check if we need to create bounce buffer */
+ ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist,
+ storvsc_sg_count, &not_aligned_seg_bits);
+ if (ret != -1) {
+ reqp->bounce_sgl =
+ storvsc_create_bounce_buffer(storvsc_sg_count,
+ reqp->vstor_packet.u.vm_srb.data_in);
+ if (NULL == reqp->bounce_sgl) {
+ printf("Storvsc_error: "
+ "create bounce buffer failed.\n");
+ return (ENOMEM);
+ }
+
+ reqp->bounce_sgl_count = storvsc_sg_count;
+ reqp->not_aligned_seg_bits = not_aligned_seg_bits;
+
+ /*
+ * if it is write, we need copy the original data
+ *to bounce buffer
+ */
+ if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
+ storvsc_copy_sgl_to_bounce_buf(
+ reqp->bounce_sgl,
+ storvsc_sglist,
+ storvsc_sg_count,
+ reqp->not_aligned_seg_bits);
+ }
+
+ /* transfer virtual address to physical frame number */
+ if (reqp->not_aligned_seg_bits & 0x1){
+ phys_addr =
+ vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr);
+ }else{
+ phys_addr =
+ vtophys(storvsc_sglist[0].ds_addr);
+ }
+ reqp->data_buf.offset = phys_addr & PAGE_MASK;
+
+ pfn = phys_addr >> PAGE_SHIFT;
+ reqp->data_buf.pfn_array[0] = pfn;
+
+ for (i = 1; i < storvsc_sg_count; i++) {
+ if (reqp->not_aligned_seg_bits & (1 << i)) {
+ phys_addr =
+ vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr);
+ } else {
+ phys_addr =
+ vtophys(storvsc_sglist[i].ds_addr);
+ }
+
+ pfn = phys_addr >> PAGE_SHIFT;
+ reqp->data_buf.pfn_array[i] = pfn;
+ }
+ } else {
+ phys_addr = vtophys(storvsc_sglist[0].ds_addr);
+
+ reqp->data_buf.offset = phys_addr & PAGE_MASK;
- bytes = min(PAGE_SIZE - page_offset, bytes_to_copy);
+ for (i = 0; i < storvsc_sg_count; i++) {
+ phys_addr = vtophys(storvsc_sglist[i].ds_addr);
+ pfn = phys_addr >> PAGE_SHIFT;
+ reqp->data_buf.pfn_array[i] = pfn;
+ }
- bytes_to_copy -= bytes;
- pfn_num++;
+ /* check the last segment cross boundary or not */
+ offset = phys_addr & PAGE_MASK;
+ if (offset) {
+ phys_addr =
+ vtophys(storvsc_sglist[i-1].ds_addr +
+ PAGE_SIZE - offset);
+ pfn = phys_addr >> PAGE_SHIFT;
+ reqp->data_buf.pfn_array[i] = pfn;
+ }
+
+ reqp->bounce_sgl_count = 0;
+ }
+ break;
+ }
+ default:
+ printf("Unknow flags: %d\n", ccb->ccb_h.flags);
+ return(EINVAL);
}
+
+ return(0);
}
/**
@@ -1291,7 +1880,29 @@ storvsc_io_done(struct hv_storvsc_request *reqp)
struct ccb_scsiio *csio = &ccb->csio;
struct storvsc_softc *sc = reqp->softc;
struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
-
+ bus_dma_segment_t *ori_sglist = NULL;
+ int ori_sg_count = 0;
+
+ /* destroy bounce buffer if it is used */
+ if (reqp->bounce_sgl_count) {
+ ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
+ ori_sg_count = ccb->csio.sglist_cnt;
+
+ /*
+ * If it is READ operation, we should copy back the data
+ * to original SG list.
+ */
+ if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
+ storvsc_copy_from_bounce_buf_to_sgl(ori_sglist,
+ ori_sg_count,
+ reqp->bounce_sgl,
+ reqp->not_aligned_seg_bits);
+ }
+
+ storvsc_destroy_bounce_buffer(reqp->bounce_sgl);
+ reqp->bounce_sgl_count = 0;
+ }
+
if (reqp->retries > 0) {
mtx_lock(&sc->hs_lock);
#if HVS_TIMEOUT_TEST
@@ -1309,7 +1920,7 @@ storvsc_io_done(struct hv_storvsc_request *reqp)
mtx_unlock(&sc->hs_lock);
}
- /*
+ /*
* callout_drain() will wait for the timer handler to finish
* if it is running. So we don't need any lock to synchronize
* between this routine and the timer handler.
diff --git a/sys/dev/hyperv/storvsc/hv_vstorage.h b/sys/dev/hyperv/storvsc/hv_vstorage.h
index 2632676..deb9183 100644
--- a/sys/dev/hyperv/storvsc/hv_vstorage.h
+++ b/sys/dev/hyperv/storvsc/hv_vstorage.h
@@ -53,7 +53,7 @@
* V1 RC > 2008/1/31 2.0
*/
-#define VMSTOR_PROTOCOL_VERSION_CURRENT VMSTOR_PROTOCOL_VERSION(2, 0)
+#define VMSTOR_PROTOCOL_VERSION_CURRENT VMSTOR_PROTOCOL_VERSION(5, 1)
/**
* Packet structure ops describing virtual storage requests.
@@ -69,7 +69,10 @@ enum vstor_packet_ops {
VSTOR_OPERATION_ENDINITIALIZATION = 8,
VSTOR_OPERATION_QUERYPROTOCOLVERSION = 9,
VSTOR_OPERATION_QUERYPROPERTIES = 10,
- VSTOR_OPERATION_MAXIMUM = 10
+ VSTOR_OPERATION_ENUMERATE_BUS = 11,
+ VSTOR_OPERATION_FCHBA_DATA = 12,
+ VSTOR_OPERATION_CREATE_MULTI_CHANNELS = 13,
+ VSTOR_OPERATION_MAXIMUM = 13
};
@@ -123,10 +126,12 @@ struct vmstor_chan_props {
uint8_t path_id;
uint8_t target_id;
+ uint16_t max_channel_cnt;
+
/**
* Note: port number is only really known on the client side
*/
- uint32_t port;
+ uint16_t port;
uint32_t flags;
uint32_t max_transfer_bytes;
@@ -193,6 +198,11 @@ struct vstor_packet {
* Used during version negotiations.
*/
struct vmstor_proto_ver version;
+
+ /**
+ * Number of multichannels to create
+ */
+ uint16_t multi_channels_cnt;
} u;
} __packed;
diff --git a/sys/dev/hyperv/utilities/hv_kvp.c b/sys/dev/hyperv/utilities/hv_kvp.c
index 848d364..4598510 100644
--- a/sys/dev/hyperv/utilities/hv_kvp.c
+++ b/sys/dev/hyperv/utilities/hv_kvp.c
@@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
#include <sys/_null.h>
#include <sys/signal.h>
#include <sys/syslog.h>
+#include <sys/systm.h>
#include <sys/mutex.h>
#include <net/if_arp.h>
@@ -232,7 +233,7 @@ hv_kvp_negotiate_version(struct hv_vmbus_icmsg_hdr *icmsghdrp,
*/
if ((icframe_vercnt >= 2) && (negop->icversion_data[1].major == 3)) {
icframe_vercnt = 3;
- if (icmsg_vercnt >= 2)
+ if (icmsg_vercnt > 2)
icmsg_vercnt = 4;
else
icmsg_vercnt = 3;
@@ -734,8 +735,8 @@ hv_kvp_process_request(void *context)
recvlen = 0;
ret = hv_vmbus_channel_recv_packet(channel, kvp_buf, 2 * PAGE_SIZE,
&recvlen, &requestid);
- hv_kvp_log_info("%s: read: context %p, pending_cnt %ju ret =%d, recvlen=%d\n",
- __func__, context, pending_cnt, ret, recvlen);
+ hv_kvp_log_info("%s: read: context %p, pending_cnt %llu ret =%d, recvlen=%d\n",
+ __func__, context, (unsigned long long)pending_cnt, ret, recvlen);
}
}
@@ -813,9 +814,9 @@ static void
hv_kvp_dev_destroy(void)
{
- if (daemon_task != NULL) {
+ if (daemon_task != NULL) {
PROC_LOCK(daemon_task);
- kern_psignal(daemon_task, SIGKILL);
+ kern_psignal(daemon_task, SIGKILL);
PROC_UNLOCK(daemon_task);
}
diff --git a/sys/dev/hyperv/utilities/hv_util.c b/sys/dev/hyperv/utilities/hv_util.c
index 3e545cf..dc4b1e2 100644
--- a/sys/dev/hyperv/utilities/hv_util.c
+++ b/sys/dev/hyperv/utilities/hv_util.c
@@ -408,6 +408,15 @@ hv_util_attach(device_t dev)
}
}
+ /*
+ * These services are not performance critical and do not need
+ * batched reading. Furthermore, some services such as KVP can
+ * only handle one message from the host at a time.
+ * Turn off batched reading for all util drivers before we open the
+ * channel.
+ */
+ hv_set_channel_read_state(hv_dev->channel, FALSE);
+
ret = hv_vmbus_channel_open(hv_dev->channel, 4 * PAGE_SIZE,
4 * PAGE_SIZE, NULL, 0,
service->callback, hv_dev->channel);
diff --git a/sys/dev/hyperv/vmbus/hv_channel.c b/sys/dev/hyperv/vmbus/hv_channel.c
index 103260a..94137fb 100644
--- a/sys/dev/hyperv/vmbus/hv_channel.c
+++ b/sys/dev/hyperv/vmbus/hv_channel.c
@@ -75,7 +75,7 @@ vmbus_channel_set_event(hv_vmbus_channel *channel)
(uint32_t *)&monitor_page->
trigger_group[channel->monitor_group].u.pending);
} else {
- hv_vmbus_set_event(channel->offer_msg.child_rel_id);
+ hv_vmbus_set_event(channel);
}
}
@@ -99,6 +99,18 @@ hv_vmbus_channel_open(
hv_vmbus_channel_open_channel* open_msg;
hv_vmbus_channel_msg_info* open_info;
+ mtx_lock(&new_channel->sc_lock);
+ if (new_channel->state == HV_CHANNEL_OPEN_STATE) {
+ new_channel->state = HV_CHANNEL_OPENING_STATE;
+ } else {
+ mtx_unlock(&new_channel->sc_lock);
+ if(bootverbose)
+ printf("VMBUS: Trying to open channel <%p> which in "
+ "%d state.\n", new_channel, new_channel->state);
+ return (EINVAL);
+ }
+ mtx_unlock(&new_channel->sc_lock);
+
new_channel->on_channel_callback = pfn_on_channel_callback;
new_channel->channel_callback_context = context;
@@ -162,7 +174,7 @@ hv_vmbus_channel_open(
new_channel->ring_buffer_gpadl_handle;
open_msg->downstream_ring_buffer_page_offset = send_ring_buffer_size
>> PAGE_SHIFT;
- open_msg->server_context_area_gpadl_handle = 0;
+ open_msg->target_vcpu = new_channel->target_vcpu;
if (user_data_len)
memcpy(open_msg->user_data, user_data, user_data_len);
@@ -182,10 +194,14 @@ hv_vmbus_channel_open(
ret = sema_timedwait(&open_info->wait_sema, 500); /* KYS 5 seconds */
- if (ret)
+ if (ret) {
+ if(bootverbose)
+ printf("VMBUS: channel <%p> open timeout.\n", new_channel);
goto cleanup;
+ }
if (open_info->response.open_result.status == 0) {
+ new_channel->state = HV_CHANNEL_OPENED_STATE;
if(bootverbose)
printf("VMBUS: channel <%p> open success.\n", new_channel);
} else {
@@ -497,16 +513,20 @@ cleanup:
return (ret);
}
-/**
- * @brief Close the specified channel
- */
-void
-hv_vmbus_channel_close(hv_vmbus_channel *channel)
+static void
+hv_vmbus_channel_close_internal(hv_vmbus_channel *channel)
{
int ret = 0;
hv_vmbus_channel_close_channel* msg;
hv_vmbus_channel_msg_info* info;
+ channel->state = HV_CHANNEL_OPEN_STATE;
+ channel->sc_creation_callback = NULL;
+
+ /*
+ * Grab the lock to prevent race condition when a packet received
+ * and unloading driver is in the process.
+ */
mtx_lock(&channel->inbound_lock);
channel->on_channel_callback = NULL;
mtx_unlock(&channel->inbound_lock);
@@ -545,23 +565,37 @@ hv_vmbus_channel_close(hv_vmbus_channel *channel)
M_DEVBUF);
free(info, M_DEVBUF);
+}
+
+/**
+ * @brief Close the specified channel
+ */
+void
+hv_vmbus_channel_close(hv_vmbus_channel *channel)
+{
+ hv_vmbus_channel* sub_channel;
+
+ if (channel->primary_channel != NULL) {
+ /*
+ * We only close multi-channels when the primary is
+ * closed.
+ */
+ return;
+ }
/*
- * If we are closing the channel during an error path in
- * opening the channel, don't free the channel
- * since the caller will free the channel
+ * Close all multi-channels first.
*/
- if (channel->state == HV_CHANNEL_OPEN_STATE) {
- mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
- TAILQ_REMOVE(
- &hv_vmbus_g_connection.channel_anchor,
- channel,
- list_entry);
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
-
- hv_vmbus_free_vmbus_channel(channel);
+ TAILQ_FOREACH(sub_channel, &channel->sc_list_anchor,
+ sc_list_entry) {
+ if (sub_channel->state != HV_CHANNEL_OPENED_STATE)
+ continue;
+ hv_vmbus_channel_close_internal(sub_channel);
}
-
+ /*
+ * Then close the primary channel.
+ */
+ hv_vmbus_channel_close_internal(channel);
}
/**
@@ -581,6 +615,7 @@ hv_vmbus_channel_send_packet(
uint32_t packet_len;
uint64_t aligned_data;
uint32_t packet_len_aligned;
+ boolean_t need_sig;
hv_vmbus_sg_buffer_list buffer_list[3];
packet_len = sizeof(hv_vm_packet_descriptor) + buffer_len;
@@ -604,12 +639,11 @@ hv_vmbus_channel_send_packet(
buffer_list[2].data = &aligned_data;
buffer_list[2].length = packet_len_aligned - packet_len;
- ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3);
+ ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
+ &need_sig);
/* TODO: We should determine if this is optional */
- if (ret == 0
- && !hv_vmbus_get_ring_buffer_interrupt_mask(
- &channel->outbound)) {
+ if (ret == 0 && need_sig) {
vmbus_channel_set_event(channel);
}
@@ -632,6 +666,7 @@ hv_vmbus_channel_send_packet_pagebuffer(
int ret = 0;
int i = 0;
+ boolean_t need_sig;
uint32_t packet_len;
uint32_t packetLen_aligned;
hv_vmbus_sg_buffer_list buffer_list[3];
@@ -675,11 +710,11 @@ hv_vmbus_channel_send_packet_pagebuffer(
buffer_list[2].data = &alignedData;
buffer_list[2].length = packetLen_aligned - packet_len;
- ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3);
+ ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
+ &need_sig);
/* TODO: We should determine if this is optional */
- if (ret == 0 &&
- !hv_vmbus_get_ring_buffer_interrupt_mask(&channel->outbound)) {
+ if (ret == 0 && need_sig) {
vmbus_channel_set_event(channel);
}
@@ -700,6 +735,7 @@ hv_vmbus_channel_send_packet_multipagebuffer(
int ret = 0;
uint32_t desc_size;
+ boolean_t need_sig;
uint32_t packet_len;
uint32_t packet_len_aligned;
uint32_t pfn_count;
@@ -750,11 +786,11 @@ hv_vmbus_channel_send_packet_multipagebuffer(
buffer_list[2].data = &aligned_data;
buffer_list[2].length = packet_len_aligned - packet_len;
- ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3);
+ ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
+ &need_sig);
/* TODO: We should determine if this is optional */
- if (ret == 0 &&
- !hv_vmbus_get_ring_buffer_interrupt_mask(&channel->outbound)) {
+ if (ret == 0 && need_sig) {
vmbus_channel_set_event(channel);
}
diff --git a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
index 011e305..783f6bc 100644
--- a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
+++ b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
@@ -50,6 +50,8 @@ static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr);
static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr);
static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr);
static void vmbus_channel_process_offer(void *context);
+struct hv_vmbus_channel*
+ vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary);
/**
* Channel message dispatch table
@@ -233,6 +235,9 @@ hv_vmbus_allocate_channel(void)
return (NULL);
mtx_init(&channel->inbound_lock, "channel inbound", NULL, MTX_DEF);
+ mtx_init(&channel->sc_lock, "vmbus multi channel", NULL, MTX_DEF);
+
+ TAILQ_INIT(&channel->sc_list_anchor);
channel->control_work_queue = hv_work_queue_create("control");
@@ -262,6 +267,7 @@ ReleaseVmbusChannel(void *context)
void
hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
{
+ mtx_destroy(&channel->sc_lock);
mtx_destroy(&channel->inbound_lock);
/*
* We have to release the channel's workqueue/thread in
@@ -279,10 +285,10 @@ hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
static void
vmbus_channel_process_offer(void *context)
{
- int ret;
hv_vmbus_channel* new_channel;
boolean_t f_new;
hv_vmbus_channel* channel;
+ int ret;
new_channel = (hv_vmbus_channel*) context;
f_new = TRUE;
@@ -291,38 +297,76 @@ vmbus_channel_process_offer(void *context)
/*
* Make sure this is a new offer
*/
- mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+ mtx_lock(&hv_vmbus_g_connection.channel_lock);
TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor,
list_entry)
{
- if (!memcmp(
- &channel->offer_msg.offer.interface_type,
- &new_channel->offer_msg.offer.interface_type,
- sizeof(hv_guid))
- && !memcmp(
- &channel->offer_msg.offer.interface_instance,
+ if (memcmp(&channel->offer_msg.offer.interface_type,
+ &new_channel->offer_msg.offer.interface_type,
+ sizeof(hv_guid)) == 0 &&
+ memcmp(&channel->offer_msg.offer.interface_instance,
&new_channel->offer_msg.offer.interface_instance,
- sizeof(hv_guid))) {
- f_new = FALSE;
- break;
- }
+ sizeof(hv_guid)) == 0) {
+ f_new = FALSE;
+ break;
+ }
}
if (f_new) {
- /* Insert at tail */
- TAILQ_INSERT_TAIL(
- &hv_vmbus_g_connection.channel_anchor,
- new_channel,
- list_entry);
+ /* Insert at tail */
+ TAILQ_INSERT_TAIL(
+ &hv_vmbus_g_connection.channel_anchor,
+ new_channel,
+ list_entry);
}
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+ mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+
+ /*XXX add new channel to percpu_list */
if (!f_new) {
+ /*
+ * Check if this is a sub channel.
+ */
+ if (new_channel->offer_msg.offer.sub_channel_index != 0) {
+ /*
+ * It is a sub channel offer, process it.
+ */
+ new_channel->primary_channel = channel;
+ mtx_lock(&channel->sc_lock);
+ TAILQ_INSERT_TAIL(
+ &channel->sc_list_anchor,
+ new_channel,
+ sc_list_entry);
+ mtx_unlock(&channel->sc_lock);
+
+ /* Insert new channel into channel_anchor. */
+ printf("Storvsc get multi-channel offer, rel=%u.\n",
+ new_channel->offer_msg.child_rel_id);
+ mtx_lock(&hv_vmbus_g_connection.channel_lock);
+ TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_anchor,
+ new_channel, list_entry);
+ mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+
+ if(bootverbose)
+ printf("VMBUS: new multi-channel offer <%p>.\n",
+ new_channel);
+
+ /*XXX add it to percpu_list */
+
+ new_channel->state = HV_CHANNEL_OPEN_STATE;
+ if (channel->sc_creation_callback != NULL) {
+ channel->sc_creation_callback(new_channel);
+ }
+ return;
+ }
+
hv_vmbus_free_vmbus_channel(new_channel);
return;
}
+ new_channel->state = HV_CHANNEL_OPEN_STATE;
+
/*
* Start the process of binding this offer to the driver
* (We need to set the device field before calling
@@ -333,35 +377,86 @@ vmbus_channel_process_offer(void *context)
new_channel->offer_msg.offer.interface_instance, new_channel);
/*
- * TODO - the HV_CHANNEL_OPEN_STATE flag should not be set below
- * but in the "open" channel request. The ret != 0 logic below
- * doesn't take into account that a channel
- * may have been opened successfully
- */
-
- /*
* Add the new device to the bus. This will kick off device-driver
* binding which eventually invokes the device driver's AddDevice()
* method.
*/
ret = hv_vmbus_child_device_register(new_channel->device);
if (ret != 0) {
- mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
- TAILQ_REMOVE(
- &hv_vmbus_g_connection.channel_anchor,
- new_channel,
- list_entry);
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
- hv_vmbus_free_vmbus_channel(new_channel);
- } else {
- /*
- * This state is used to indicate a successful open
- * so that when we do close the channel normally,
- * we can clean up properly
- */
- new_channel->state = HV_CHANNEL_OPEN_STATE;
+ mtx_lock(&hv_vmbus_g_connection.channel_lock);
+ TAILQ_REMOVE(
+ &hv_vmbus_g_connection.channel_anchor,
+ new_channel,
+ list_entry);
+ mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+ hv_vmbus_free_vmbus_channel(new_channel);
+ }
+}
+
+/**
+ * Array of device guids that are performance critical. We try to distribute
+ * the interrupt load for these devices across all online cpus.
+ */
+static const hv_guid high_perf_devices[] = {
+ {HV_NIC_GUID, },
+ {HV_IDE_GUID, },
+ {HV_SCSI_GUID, },
+};
+
+enum {
+ PERF_CHN_NIC = 0,
+ PERF_CHN_IDE,
+ PERF_CHN_SCSI,
+ MAX_PERF_CHN,
+};
+/*
+ * We use this static number to distribute the channel interrupt load.
+ */
+static uint32_t next_vcpu;
+
+/**
+ * Starting with Win8, we can statically distribute the incoming
+ * channel interrupt load by binding a channel to VCPU. We
+ * implement here a simple round robin scheme for distributing
+ * the interrupt load.
+ * We will bind channels that are not performance critical to cpu 0 and
+ * performance critical channels (IDE, SCSI and Network) will be uniformly
+ * distributed across all available CPUs.
+ */
+static void
+vmbus_channel_select_cpu(hv_vmbus_channel *channel, hv_guid *guid)
+{
+ uint32_t current_cpu;
+ int i;
+ boolean_t is_perf_channel = FALSE;
+
+ for (i = PERF_CHN_NIC; i < MAX_PERF_CHN; i++) {
+ if (memcmp(guid->data, high_perf_devices[i].data,
+ sizeof(hv_guid)) == 0) {
+ is_perf_channel = TRUE;
+ break;
+ }
+ }
+
+ if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
+ (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) ||
+ (!is_perf_channel)) {
+ /* Host's view of guest cpu */
+ channel->target_vcpu = 0;
+ /* Guest's own view of cpu */
+ channel->target_cpu = 0;
+ return;
}
+ /* mp_ncpus should have the number cpus currently online */
+ current_cpu = (++next_vcpu % mp_ncpus);
+ channel->target_cpu = current_cpu;
+ channel->target_vcpu =
+ hv_vmbus_g_context.hv_vcpu_index[current_cpu];
+ if (bootverbose)
+ printf("VMBUS: Total online cpus %d, assign perf channel %d "
+ "to vcpu %d, cpu %d\n", mp_ncpus, i, channel->target_vcpu,
+ current_cpu);
}
/**
@@ -391,6 +486,38 @@ vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
if (new_channel == NULL)
return;
+ /*
+ * By default we setup state to enable batched
+ * reading. A specific service can choose to
+ * disable this prior to opening the channel.
+ */
+ new_channel->batched_reading = TRUE;
+
+ new_channel->signal_event_param =
+ (hv_vmbus_input_signal_event *)
+ (HV_ALIGN_UP((unsigned long)
+ &new_channel->signal_event_buffer,
+ HV_HYPERCALL_PARAM_ALIGN));
+
+ new_channel->signal_event_param->connection_id.as_uint32_t = 0;
+ new_channel->signal_event_param->connection_id.u.id =
+ HV_VMBUS_EVENT_CONNECTION_ID;
+ new_channel->signal_event_param->flag_number = 0;
+ new_channel->signal_event_param->rsvd_z = 0;
+
+ if (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) {
+ new_channel->is_dedicated_interrupt =
+ (offer->is_dedicated_interrupt != 0);
+ new_channel->signal_event_param->connection_id.u.id =
+ offer->connection_id;
+ }
+
+ /*
+ * Bind the channel to a chosen cpu.
+ */
+ vmbus_channel_select_cpu(new_channel,
+ &offer->offer.interface_type);
+
memcpy(&new_channel->offer_msg, offer,
sizeof(hv_vmbus_channel_offer_channel));
new_channel->monitor_group = (uint8_t) offer->monitor_id / 32;
@@ -666,7 +793,7 @@ hv_vmbus_release_unattached_channels(void)
{
hv_vmbus_channel *channel;
- mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+ mtx_lock(&hv_vmbus_g_connection.channel_lock);
while (!TAILQ_EMPTY(&hv_vmbus_g_connection.channel_anchor)) {
channel = TAILQ_FIRST(&hv_vmbus_g_connection.channel_anchor);
@@ -676,5 +803,61 @@ hv_vmbus_release_unattached_channels(void)
hv_vmbus_child_device_unregister(channel->device);
hv_vmbus_free_vmbus_channel(channel);
}
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+ mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+}
+
+/**
+ * @brief Select the best outgoing channel
+ *
+ * The channel whose vcpu binding is closest to the currect vcpu will
+ * be selected.
+ * If no multi-channel, always select primary channel
+ *
+ * @param primary - primary channel
+ */
+struct hv_vmbus_channel *
+vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary)
+{
+ hv_vmbus_channel *new_channel = NULL;
+ hv_vmbus_channel *outgoing_channel = primary;
+ int old_cpu_distance = 0;
+ int new_cpu_distance = 0;
+ int cur_vcpu = 0;
+ int smp_pro_id = PCPU_GET(cpuid);
+
+ if (TAILQ_EMPTY(&primary->sc_list_anchor)) {
+ return outgoing_channel;
+ }
+
+ if (smp_pro_id >= MAXCPU) {
+ return outgoing_channel;
+ }
+
+ cur_vcpu = hv_vmbus_g_context.hv_vcpu_index[smp_pro_id];
+
+ TAILQ_FOREACH(new_channel, &primary->sc_list_anchor, sc_list_entry) {
+ if (new_channel->state != HV_CHANNEL_OPENED_STATE){
+ continue;
+ }
+
+ if (new_channel->target_vcpu == cur_vcpu){
+ return new_channel;
+ }
+
+ old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ?
+ (outgoing_channel->target_vcpu - cur_vcpu) :
+ (cur_vcpu - outgoing_channel->target_vcpu));
+
+ new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ?
+ (new_channel->target_vcpu - cur_vcpu) :
+ (cur_vcpu - new_channel->target_vcpu));
+
+ if (old_cpu_distance < new_cpu_distance) {
+ continue;
+ }
+
+ outgoing_channel = new_channel;
+ }
+
+ return(outgoing_channel);
}
diff --git a/sys/dev/hyperv/vmbus/hv_connection.c b/sys/dev/hyperv/vmbus/hv_connection.c
index c8e0b48..0300828 100644
--- a/sys/dev/hyperv/vmbus/hv_connection.c
+++ b/sys/dev/hyperv/vmbus/hv_connection.c
@@ -45,14 +45,113 @@ hv_vmbus_connection hv_vmbus_g_connection =
{ .connect_state = HV_DISCONNECTED,
.next_gpadl_handle = 0xE1E10, };
+uint32_t hv_vmbus_protocal_version = HV_VMBUS_VERSION_WS2008;
+
+static uint32_t
+hv_vmbus_get_next_version(uint32_t current_ver)
+{
+ switch (current_ver) {
+ case (HV_VMBUS_VERSION_WIN7):
+ return(HV_VMBUS_VERSION_WS2008);
+
+ case (HV_VMBUS_VERSION_WIN8):
+ return(HV_VMBUS_VERSION_WIN7);
+
+ case (HV_VMBUS_VERSION_WIN8_1):
+ return(HV_VMBUS_VERSION_WIN8);
+
+ case (HV_VMBUS_VERSION_WS2008):
+ default:
+ return(HV_VMBUS_VERSION_INVALID);
+ }
+}
+
+/**
+ * Negotiate the highest supported hypervisor version.
+ */
+static int
+hv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info,
+ uint32_t version)
+{
+ int ret = 0;
+ hv_vmbus_channel_initiate_contact *msg;
+
+ sema_init(&msg_info->wait_sema, 0, "Msg Info Sema");
+ msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg;
+
+ msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT;
+ msg->vmbus_version_requested = version;
+
+ msg->interrupt_page = hv_get_phys_addr(
+ hv_vmbus_g_connection.interrupt_page);
+
+ msg->monitor_page_1 = hv_get_phys_addr(
+ hv_vmbus_g_connection.monitor_pages);
+
+ msg->monitor_page_2 =
+ hv_get_phys_addr(
+ ((uint8_t *) hv_vmbus_g_connection.monitor_pages
+ + PAGE_SIZE));
+
+ /**
+ * Add to list before we send the request since we may receive the
+ * response before returning from this routine
+ */
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+ TAILQ_INSERT_TAIL(
+ &hv_vmbus_g_connection.channel_msg_anchor,
+ msg_info,
+ msg_list_entry);
+
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+ ret = hv_vmbus_post_message(
+ msg,
+ sizeof(hv_vmbus_channel_initiate_contact));
+
+ if (ret != 0) {
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_REMOVE(
+ &hv_vmbus_g_connection.channel_msg_anchor,
+ msg_info,
+ msg_list_entry);
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ return (ret);
+ }
+
+ /**
+ * Wait for the connection response
+ */
+ ret = sema_timedwait(&msg_info->wait_sema, 500); /* KYS 5 seconds */
+
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_REMOVE(
+ &hv_vmbus_g_connection.channel_msg_anchor,
+ msg_info,
+ msg_list_entry);
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+ /**
+ * Check if successful
+ */
+ if (msg_info->response.version_response.version_supported) {
+ hv_vmbus_g_connection.connect_state = HV_CONNECTED;
+ } else {
+ ret = ECONNREFUSED;
+ }
+
+ return (ret);
+}
+
/**
* Send a connect request on the partition service connection
*/
int
hv_vmbus_connect(void) {
int ret = 0;
+ uint32_t version;
hv_vmbus_channel_msg_info* msg_info = NULL;
- hv_vmbus_channel_initiate_contact* msg;
/**
* Make sure we are not connecting or connected
@@ -74,7 +173,7 @@ hv_vmbus_connect(void) {
TAILQ_INIT(&hv_vmbus_g_connection.channel_anchor);
mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel",
- NULL, MTX_SPIN);
+ NULL, MTX_DEF);
/**
* Setup the vmbus event connection for channel interrupt abstraction
@@ -130,71 +229,30 @@ hv_vmbus_connect(void) {
goto cleanup;
}
- sema_init(&msg_info->wait_sema, 0, "Msg Info Sema");
- msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg;
-
- msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT;
- msg->vmbus_version_requested = HV_VMBUS_REVISION_NUMBER;
-
- msg->interrupt_page = hv_get_phys_addr(
- hv_vmbus_g_connection.interrupt_page);
-
- msg->monitor_page_1 = hv_get_phys_addr(
- hv_vmbus_g_connection.monitor_pages);
-
- msg->monitor_page_2 =
- hv_get_phys_addr(
- ((uint8_t *) hv_vmbus_g_connection.monitor_pages
- + PAGE_SIZE));
-
- /**
- * Add to list before we send the request since we may receive the
- * response before returning from this routine
+ /*
+ * Find the highest vmbus version number we can support.
*/
- mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
-
- TAILQ_INSERT_TAIL(
- &hv_vmbus_g_connection.channel_msg_anchor,
- msg_info,
- msg_list_entry);
-
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
-
- ret = hv_vmbus_post_message(
- msg,
- sizeof(hv_vmbus_channel_initiate_contact));
-
- if (ret != 0) {
- mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
- TAILQ_REMOVE(
- &hv_vmbus_g_connection.channel_msg_anchor,
- msg_info,
- msg_list_entry);
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
- goto cleanup;
- }
+ version = HV_VMBUS_VERSION_CURRENT;
+
+ do {
+ ret = hv_vmbus_negotiate_version(msg_info, version);
+ if (ret == EWOULDBLOCK) {
+ /*
+ * We timed out.
+ */
+ goto cleanup;
+ }
- /**
- * Wait for the connection response
- */
- ret = sema_timedwait(&msg_info->wait_sema, 500); /* KYS 5 seconds */
+ if (hv_vmbus_g_connection.connect_state == HV_CONNECTED)
+ break;
- mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
- TAILQ_REMOVE(
- &hv_vmbus_g_connection.channel_msg_anchor,
- msg_info,
- msg_list_entry);
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ version = hv_vmbus_get_next_version(version);
+ } while (version != HV_VMBUS_VERSION_INVALID);
- /**
- * Check if successful
- */
- if (msg_info->response.version_response.version_supported) {
- hv_vmbus_g_connection.connect_state = HV_CONNECTED;
- } else {
- ret = ECONNREFUSED;
- goto cleanup;
- }
+ hv_vmbus_protocal_version = version;
+ if (bootverbose)
+ printf("VMBUS: Portocal Version: %d.%d\n",
+ version >> 16, version & 0xFFFF);
sema_destroy(&msg_info->wait_sema);
free(msg_info, M_DEVBUF);
@@ -286,7 +344,7 @@ hv_vmbus_get_channel_from_rel_id(uint32_t rel_id) {
* and channels are accessed without the need to take this lock or search
* the list.
*/
- mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+ mtx_lock(&hv_vmbus_g_connection.channel_lock);
TAILQ_FOREACH(channel,
&hv_vmbus_g_connection.channel_anchor, list_entry) {
@@ -295,7 +353,7 @@ hv_vmbus_get_channel_from_rel_id(uint32_t rel_id) {
break;
}
}
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+ mtx_unlock(&hv_vmbus_g_connection.channel_lock);
return (foundChannel);
}
@@ -306,7 +364,10 @@ hv_vmbus_get_channel_from_rel_id(uint32_t rel_id) {
static void
VmbusProcessChannelEvent(uint32_t relid)
{
+ void* arg;
+ uint32_t bytes_to_read;
hv_vmbus_channel* channel;
+ boolean_t is_batched_reading;
/**
* Find the channel based on this relid and invokes
@@ -327,31 +388,98 @@ VmbusProcessChannelEvent(uint32_t relid)
* callback to NULL. This closes the window.
*/
- mtx_lock(&channel->inbound_lock);
+ /*
+ * Disable the lock due to newly added WITNESS check in r277723.
+ * Will seek other way to avoid race condition.
+ * -- whu
+ */
+ // mtx_lock(&channel->inbound_lock);
if (channel->on_channel_callback != NULL) {
- channel->on_channel_callback(channel->channel_callback_context);
+ arg = channel->channel_callback_context;
+ is_batched_reading = channel->batched_reading;
+ /*
+ * Optimize host to guest signaling by ensuring:
+ * 1. While reading the channel, we disable interrupts from
+ * host.
+ * 2. Ensure that we process all posted messages from the host
+ * before returning from this callback.
+ * 3. Once we return, enable signaling from the host. Once this
+ * state is set we check to see if additional packets are
+ * available to read. In this case we repeat the process.
+ */
+ do {
+ if (is_batched_reading)
+ hv_ring_buffer_read_begin(&channel->inbound);
+
+ channel->on_channel_callback(arg);
+
+ if (is_batched_reading)
+ bytes_to_read =
+ hv_ring_buffer_read_end(&channel->inbound);
+ else
+ bytes_to_read = 0;
+ } while (is_batched_reading && (bytes_to_read != 0));
}
- mtx_unlock(&channel->inbound_lock);
+ // mtx_unlock(&channel->inbound_lock);
}
+#ifdef HV_DEBUG_INTR
+extern uint32_t hv_intr_count;
+extern uint32_t hv_vmbus_swintr_event_cpu[MAXCPU];
+extern uint32_t hv_vmbus_intr_cpu[MAXCPU];
+#endif
+
/**
* Handler for events
*/
void
hv_vmbus_on_events(void *arg)
{
- int dword;
int bit;
+ int cpu;
+ int dword;
+ void *page_addr;
+ uint32_t* recv_interrupt_page = NULL;
int rel_id;
- int maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5;
+ int maxdword;
+ hv_vmbus_synic_event_flags *event;
/* int maxdword = PAGE_SIZE >> 3; */
- /*
- * receive size is 1/2 page and divide that by 4 bytes
- */
-
- uint32_t* recv_interrupt_page =
- hv_vmbus_g_connection.recv_interrupt_page;
+ cpu = (int)(long)arg;
+ KASSERT(cpu <= mp_maxid, ("VMBUS: hv_vmbus_on_events: "
+ "cpu out of range!"));
+
+#ifdef HV_DEBUG_INTR
+ int i;
+ hv_vmbus_swintr_event_cpu[cpu]++;
+ if (hv_intr_count % 10000 == 0) {
+ printf("VMBUS: Total interrupt %d\n", hv_intr_count);
+ for (i = 0; i < mp_ncpus; i++)
+ printf("VMBUS: hw cpu[%d]: %d, event sw intr cpu[%d]: %d\n",
+ i, hv_vmbus_intr_cpu[i], i, hv_vmbus_swintr_event_cpu[i]);
+ }
+#endif
+
+ if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
+ (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) {
+ maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5;
+ /*
+ * receive size is 1/2 page and divide that by 4 bytes
+ */
+ recv_interrupt_page =
+ hv_vmbus_g_connection.recv_interrupt_page;
+ } else {
+ /*
+ * On Host with Win8 or above, the event page can be
+ * checked directly to get the id of the channel
+ * that has the pending interrupt.
+ */
+ maxdword = HV_EVENT_FLAGS_DWORD_COUNT;
+ page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
+ event = (hv_vmbus_synic_event_flags *)
+ page_addr + HV_VMBUS_MESSAGE_SINT;
+ recv_interrupt_page = event->flags32;
+ }
/*
* Check events
@@ -416,16 +544,16 @@ int hv_vmbus_post_message(void *buffer, size_t bufferLen) {
* Send an event notification to the parent
*/
int
-hv_vmbus_set_event(uint32_t child_rel_id) {
+hv_vmbus_set_event(hv_vmbus_channel *channel) {
int ret = 0;
+ uint32_t child_rel_id = channel->offer_msg.child_rel_id;
/* Each uint32_t represents 32 channels */
synch_set_bit(child_rel_id & 31,
(((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
+ (child_rel_id >> 5))));
- ret = hv_vmbus_signal_event();
+ ret = hv_vmbus_signal_event(channel->signal_event_param);
return (ret);
}
-
diff --git a/sys/dev/hyperv/vmbus/hv_hv.c b/sys/dev/hyperv/vmbus/hv_hv.c
index 80a1f42..84e2a5e 100644
--- a/sys/dev/hyperv/vmbus/hv_hv.c
+++ b/sys/dev/hyperv/vmbus/hv_hv.c
@@ -67,8 +67,6 @@ static inline void do_cpuid_inline(unsigned int op, unsigned int *eax,
hv_vmbus_context hv_vmbus_g_context = {
.syn_ic_initialized = FALSE,
.hypercall_page = NULL,
- .signal_event_param = NULL,
- .signal_event_buffer = NULL,
};
static struct timecounter hv_timecounter = {
@@ -256,28 +254,6 @@ hv_vmbus_init(void)
hv_vmbus_g_context.hypercall_page = virt_addr;
- /*
- * Setup the global signal event param for the signal event hypercall
- */
- hv_vmbus_g_context.signal_event_buffer =
- malloc(sizeof(hv_vmbus_input_signal_event_buffer), M_DEVBUF,
- M_ZERO | M_NOWAIT);
- KASSERT(hv_vmbus_g_context.signal_event_buffer != NULL,
- ("Error VMBUS: Failed to allocate signal_event_buffer\n"));
- if (hv_vmbus_g_context.signal_event_buffer == NULL)
- goto cleanup;
-
- hv_vmbus_g_context.signal_event_param =
- (hv_vmbus_input_signal_event*)
- (HV_ALIGN_UP((unsigned long)
- hv_vmbus_g_context.signal_event_buffer,
- HV_HYPERCALL_PARAM_ALIGN));
- hv_vmbus_g_context.signal_event_param->connection_id.as_uint32_t = 0;
- hv_vmbus_g_context.signal_event_param->connection_id.u.id =
- HV_VMBUS_EVENT_CONNECTION_ID;
- hv_vmbus_g_context.signal_event_param->flag_number = 0;
- hv_vmbus_g_context.signal_event_param->rsvd_z = 0;
-
tc_init(&hv_timecounter); /* register virtual timecount */
return (0);
@@ -303,12 +279,6 @@ hv_vmbus_cleanup(void)
{
hv_vmbus_x64_msr_hypercall_contents hypercall_msr;
- if (hv_vmbus_g_context.signal_event_buffer != NULL) {
- free(hv_vmbus_g_context.signal_event_buffer, M_DEVBUF);
- hv_vmbus_g_context.signal_event_buffer = NULL;
- hv_vmbus_g_context.signal_event_param = NULL;
- }
-
if (hv_vmbus_g_context.guest_id == HV_FREEBSD_GUEST_ID) {
if (hv_vmbus_g_context.hypercall_page != NULL) {
hypercall_msr.as_uint64_t = 0;
@@ -370,13 +340,13 @@ hv_vmbus_post_msg_via_msg_ipc(
* event IPC. (This involves a hypercall.)
*/
hv_vmbus_status
-hv_vmbus_signal_event()
+hv_vmbus_signal_event(void *con_id)
{
hv_vmbus_status status;
status = hv_vmbus_do_hypercall(
HV_CALL_SIGNAL_EVENT,
- hv_vmbus_g_context.signal_event_param,
+ con_id,
0) & 0xFFFF;
return (status);
@@ -390,6 +360,7 @@ hv_vmbus_synic_init(void *arg)
{
int cpu;
+ uint64_t hv_vcpu_index;
hv_vmbus_synic_simp simp;
hv_vmbus_synic_siefp siefp;
hv_vmbus_synic_scontrol sctrl;
@@ -403,23 +374,14 @@ hv_vmbus_synic_init(void *arg)
return;
/*
- * KYS: Looks like we can only initialize on cpu0; don't we support
- * SMP guests?
- *
- * TODO: Need to add SMP support for FreeBSD V9
- */
-
- if (cpu != 0)
- return;
-
- /*
* TODO: Check the version
*/
version = rdmsr(HV_X64_MSR_SVERSION);
-
- hv_vmbus_g_context.syn_ic_msg_page[cpu] = setup_args->page_buffers[0];
- hv_vmbus_g_context.syn_ic_event_page[cpu] = setup_args->page_buffers[1];
+ hv_vmbus_g_context.syn_ic_msg_page[cpu] =
+ setup_args->page_buffers[2 * cpu];
+ hv_vmbus_g_context.syn_ic_event_page[cpu] =
+ setup_args->page_buffers[2 * cpu + 1];
/*
* Setup the Synic's message page
@@ -443,9 +405,10 @@ hv_vmbus_synic_init(void *arg)
wrmsr(HV_X64_MSR_SIEFP, siefp.as_uint64_t);
/*HV_SHARED_SINT_IDT_VECTOR + 0x20; */
+ shared_sint.as_uint64_t = 0;
shared_sint.u.vector = setup_args->vector;
shared_sint.u.masked = FALSE;
- shared_sint.u.auto_eoi = FALSE;
+ shared_sint.u.auto_eoi = TRUE;
wrmsr(HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT,
shared_sint.as_uint64_t);
@@ -458,6 +421,13 @@ hv_vmbus_synic_init(void *arg)
hv_vmbus_g_context.syn_ic_initialized = TRUE;
+ /*
+ * Set up the cpuid mapping from Hyper-V to FreeBSD.
+ * The array is indexed using FreeBSD cpuid.
+ */
+ hv_vcpu_index = rdmsr(HV_X64_MSR_VP_INDEX);
+ hv_vmbus_g_context.hv_vcpu_index[cpu] = (uint32_t)hv_vcpu_index;
+
return;
}
@@ -469,14 +439,10 @@ void hv_vmbus_synic_cleanup(void *arg)
hv_vmbus_synic_sint shared_sint;
hv_vmbus_synic_simp simp;
hv_vmbus_synic_siefp siefp;
- int cpu = PCPU_GET(cpuid);
if (!hv_vmbus_g_context.syn_ic_initialized)
return;
- if (cpu != 0)
- return; /* TODO: XXXKYS: SMP? */
-
shared_sint.as_uint64_t = rdmsr(
HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT);
diff --git a/sys/dev/hyperv/vmbus/hv_ring_buffer.c b/sys/dev/hyperv/vmbus/hv_ring_buffer.c
index f7c1965..5e4f52a 100644
--- a/sys/dev/hyperv/vmbus/hv_ring_buffer.c
+++ b/sys/dev/hyperv/vmbus/hv_ring_buffer.c
@@ -144,6 +144,69 @@ get_ring_buffer_indices(hv_vmbus_ring_buffer_info* ring_info)
return (uint64_t) ring_info->ring_buffer->write_index << 32;
}
+void
+hv_ring_buffer_read_begin(
+ hv_vmbus_ring_buffer_info* ring_info)
+{
+ ring_info->ring_buffer->interrupt_mask = 1;
+ mb();
+}
+
+uint32_t
+hv_ring_buffer_read_end(
+ hv_vmbus_ring_buffer_info* ring_info)
+{
+ uint32_t read, write;
+
+ ring_info->ring_buffer->interrupt_mask = 0;
+ mb();
+
+ /*
+ * Now check to see if the ring buffer is still empty.
+ * If it is not, we raced and we need to process new
+ * incoming messages.
+ */
+ get_ring_buffer_avail_bytes(ring_info, &read, &write);
+
+ return (read);
+}
+
+/*
+ * When we write to the ring buffer, check if the host needs to
+ * be signaled. Here is the details of this protocol:
+ *
+ * 1. The host guarantees that while it is draining the
+ * ring buffer, it will set the interrupt_mask to
+ * indicate it does not need to be interrupted when
+ * new data is placed.
+ *
+ * 2. The host guarantees that it will completely drain
+ * the ring buffer before exiting the read loop. Further,
+ * once the ring buffer is empty, it will clear the
+ * interrupt_mask and re-check to see if new data has
+ * arrived.
+ */
+static boolean_t
+hv_ring_buffer_needsig_on_write(
+ uint32_t old_write_location,
+ hv_vmbus_ring_buffer_info* rbi)
+{
+ mb();
+ if (rbi->ring_buffer->interrupt_mask)
+ return (FALSE);
+
+ /* Read memory barrier */
+ rmb();
+ /*
+ * This is the only case we need to signal when the
+ * ring transitions from being empty to non-empty.
+ */
+ if (old_write_location == rbi->ring_buffer->read_index)
+ return (TRUE);
+
+ return (FALSE);
+}
+
static uint32_t copy_to_ring_buffer(
hv_vmbus_ring_buffer_info* ring_info,
uint32_t start_write_offset,
@@ -204,11 +267,13 @@ int
hv_ring_buffer_write(
hv_vmbus_ring_buffer_info* out_ring_info,
hv_vmbus_sg_buffer_list sg_buffers[],
- uint32_t sg_buffer_count)
+ uint32_t sg_buffer_count,
+ boolean_t *need_sig)
{
int i = 0;
uint32_t byte_avail_to_write;
uint32_t byte_avail_to_read;
+ uint32_t old_write_location;
uint32_t total_bytes_to_write = 0;
volatile uint32_t next_write_location;
@@ -242,6 +307,8 @@ hv_ring_buffer_write(
*/
next_write_location = get_next_write_location(out_ring_info);
+ old_write_location = next_write_location;
+
for (i = 0; i < sg_buffer_count; i++) {
next_write_location = copy_to_ring_buffer(out_ring_info,
next_write_location, (char *) sg_buffers[i].data,
@@ -258,9 +325,9 @@ hv_ring_buffer_write(
(char *) &prev_indices, sizeof(uint64_t));
/*
- * Make sure we flush all writes before updating the writeIndex
+ * Full memory barrier before upding the write index.
*/
- wmb();
+ mb();
/*
* Now, update the write location
@@ -269,6 +336,9 @@ hv_ring_buffer_write(
mtx_unlock_spin(&out_ring_info->ring_lock);
+ *need_sig = hv_ring_buffer_needsig_on_write(old_write_location,
+ out_ring_info);
+
return (0);
}
diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
index ca28fd5..f9432c8 100644
--- a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
+++ b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
@@ -53,22 +53,17 @@ __FBSDID("$FreeBSD$");
#include <machine/stdarg.h>
#include <machine/intr_machdep.h>
+#include <machine/md_var.h>
+#include <machine/segments.h>
#include <sys/pcpu.h>
+#include <x86/apicvar.h>
#include "hv_vmbus_priv.h"
#define VMBUS_IRQ 0x5
-static struct intr_event *hv_msg_intr_event;
-static struct intr_event *hv_event_intr_event;
-static void *msg_swintr;
-static void *event_swintr;
static device_t vmbus_devp;
-static void *vmbus_cookiep;
-static int vmbus_rid;
-struct resource *intr_res;
-static int vmbus_irq = VMBUS_IRQ;
static int vmbus_inited;
static hv_setup_args setup_args; /* only CPU 0 supported at this time */
@@ -77,14 +72,17 @@ static hv_setup_args setup_args; /* only CPU 0 supported at this time */
* the hypervisor.
*/
static void
-vmbus_msg_swintr(void *dummy)
+vmbus_msg_swintr(void *arg)
{
int cpu;
void* page_addr;
hv_vmbus_message* msg;
hv_vmbus_message* copied;
- cpu = PCPU_GET(cpuid);
+ cpu = (int)(long)arg;
+ KASSERT(cpu <= mp_maxid, ("VMBUS: vmbus_msg_swintr: "
+ "cpu out of range!"));
+
page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
@@ -130,17 +128,8 @@ vmbus_msg_swintr(void *dummy)
*
* The purpose of this routine is to determine the type of VMBUS protocol
* message to process - an event or a channel message.
- * As this is an interrupt filter routine, the function runs in a very
- * restricted envinronment. From the manpage for bus_setup_intr(9)
- *
- * In this restricted environment, care must be taken to account for all
- * races. A careful analysis of races should be done as well. It is gener-
- * ally cheaper to take an extra interrupt, for example, than to protect
- * variables with spinlocks. Read, modify, write cycles of hardware regis-
- * ters need to be carefully analyzed if other threads are accessing the
- * same registers.
*/
-static int
+static inline int
hv_vmbus_isr(void *unused)
{
int cpu;
@@ -149,8 +138,6 @@ hv_vmbus_isr(void *unused)
void* page_addr;
cpu = PCPU_GET(cpuid);
- /* (Temporary limit) */
- KASSERT(cpu == 0, ("hv_vmbus_isr: Interrupt on CPU other than zero"));
/*
* The Windows team has advised that we check for events
@@ -162,9 +149,21 @@ hv_vmbus_isr(void *unused)
event = (hv_vmbus_synic_event_flags*)
page_addr + HV_VMBUS_MESSAGE_SINT;
- /* Since we are a child, we only need to check bit 0 */
- if (synch_test_and_clear_bit(0, &event->flags32[0])) {
- swi_sched(event_swintr, 0);
+ if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
+ (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) {
+ /* Since we are a child, we only need to check bit 0 */
+ if (synch_test_and_clear_bit(0, &event->flags32[0])) {
+ swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0);
+ }
+ } else {
+ /*
+ * On host with Win8 or above, we can directly look at
+ * the event page. If bit n is set, we have an interrupt
+ * on the channel with id n.
+ * Directly schedule the event software interrupt on
+ * current cpu.
+ */
+ swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0);
}
/* Check if there are actual msgs to be process */
@@ -172,12 +171,47 @@ hv_vmbus_isr(void *unused)
msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
- swi_sched(msg_swintr, 0);
+ swi_sched(hv_vmbus_g_context.msg_swintr[cpu], 0);
}
return FILTER_HANDLED;
}
+#ifdef HV_DEBUG_INTR
+uint32_t hv_intr_count = 0;
+#endif
+uint32_t hv_vmbus_swintr_event_cpu[MAXCPU];
+uint32_t hv_vmbus_intr_cpu[MAXCPU];
+
+void
+hv_vector_handler(struct trapframe *trap_frame)
+{
+#ifdef HV_DEBUG_INTR
+ int cpu;
+#endif
+
+ /*
+ * Disable preemption.
+ */
+ critical_enter();
+
+#ifdef HV_DEBUG_INTR
+ /*
+ * Do a little interrupt counting.
+ */
+ cpu = PCPU_GET(cpuid);
+ hv_vmbus_intr_cpu[cpu]++;
+ hv_intr_count++;
+#endif
+
+ hv_vmbus_isr(NULL);
+
+ /*
+ * Enable preemption.
+ */
+ critical_exit();
+}
+
static int
vmbus_read_ivar(
device_t dev,
@@ -316,6 +350,81 @@ vmbus_probe(device_t dev) {
return (BUS_PROBE_NOWILDCARD);
}
+#ifdef HYPERV
+extern inthand_t IDTVEC(rsvd), IDTVEC(hv_vmbus_callback);
+
+/**
+ * @brief Find a free IDT slot and setup the interrupt handler.
+ */
+static int
+vmbus_vector_alloc(void)
+{
+ int vector;
+ uintptr_t func;
+ struct gate_descriptor *ip;
+
+ /*
+ * Search backwards form the highest IDT vector available for use
+ * as vmbus channel callback vector. We install 'hv_vmbus_callback'
+ * handler at that vector and use it to interrupt vcpus.
+ */
+ vector = APIC_SPURIOUS_INT;
+ while (--vector >= APIC_IPI_INTS) {
+ ip = &idt[vector];
+ func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
+ if (func == (uintptr_t)&IDTVEC(rsvd)) {
+#ifdef __i386__
+ setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYS386IGT,
+ SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+#else
+ setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYSIGT,
+ SEL_KPL, 0);
+#endif
+
+ return (vector);
+ }
+ }
+ return (0);
+}
+
+/**
+ * @brief Restore the IDT slot to rsvd.
+ */
+static void
+vmbus_vector_free(int vector)
+{
+ uintptr_t func;
+ struct gate_descriptor *ip;
+
+ if (vector == 0)
+ return;
+
+ KASSERT(vector >= APIC_IPI_INTS && vector < APIC_SPURIOUS_INT,
+ ("invalid vector %d", vector));
+
+ ip = &idt[vector];
+ func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
+ KASSERT(func == (uintptr_t)&IDTVEC(hv_vmbus_callback),
+ ("invalid vector %d", vector));
+
+ setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
+}
+
+#else /* HYPERV */
+
+static int
+vmbus_vector_alloc(void)
+{
+ return(0);
+}
+
+static void
+vmbus_vector_free(int vector)
+{
+}
+
+#endif /* HYPERV */
+
/**
* @brief Main vmbus driver initialization routine.
*
@@ -331,22 +440,7 @@ vmbus_probe(device_t dev) {
static int
vmbus_bus_init(void)
{
- struct ioapic_intsrc {
- struct intsrc io_intsrc;
- u_int io_irq;
- u_int io_intpin:8;
- u_int io_vector:8;
- u_int io_cpu:8;
- u_int io_activehi:1;
- u_int io_edgetrigger:1;
- u_int io_masked:1;
- int io_bus:4;
- uint32_t io_lowreg;
- };
- int i, ret;
- unsigned int vector = 0;
- struct intsrc *isrc;
- struct ioapic_intsrc *intpin;
+ int i, j, n, ret;
if (vmbus_inited)
return (0);
@@ -361,80 +455,100 @@ vmbus_bus_init(void)
return (ret);
}
- ret = swi_add(&hv_msg_intr_event, "hv_msg", vmbus_msg_swintr,
- NULL, SWI_CLOCK, 0, &msg_swintr);
-
- if (ret)
- goto cleanup;
-
/*
- * Message SW interrupt handler checks a per-CPU page and
- * thus the thread needs to be bound to CPU-0 - which is where
- * all interrupts are processed.
+ * Find a free IDT slot for vmbus callback.
*/
- ret = intr_event_bind(hv_msg_intr_event, 0);
-
- if (ret)
- goto cleanup1;
+ hv_vmbus_g_context.hv_cb_vector = vmbus_vector_alloc();
- ret = swi_add(&hv_event_intr_event, "hv_event", hv_vmbus_on_events,
- NULL, SWI_CLOCK, 0, &event_swintr);
-
- if (ret)
- goto cleanup1;
+ if (hv_vmbus_g_context.hv_cb_vector == 0) {
+ if(bootverbose)
+ printf("Error VMBUS: Cannot find free IDT slot for "
+ "vmbus callback!\n");
+ goto cleanup;
+ }
- intr_res = bus_alloc_resource(vmbus_devp,
- SYS_RES_IRQ, &vmbus_rid, vmbus_irq, vmbus_irq, 1, RF_ACTIVE);
+ if(bootverbose)
+ printf("VMBUS: vmbus callback vector %d\n",
+ hv_vmbus_g_context.hv_cb_vector);
- if (intr_res == NULL) {
- ret = ENOMEM; /* XXXKYS: Need a better errno */
- goto cleanup2;
+ /*
+ * Notify the hypervisor of our vector.
+ */
+ setup_args.vector = hv_vmbus_g_context.hv_cb_vector;
+
+ CPU_FOREACH(j) {
+ hv_vmbus_intr_cpu[j] = 0;
+ hv_vmbus_swintr_event_cpu[j] = 0;
+ hv_vmbus_g_context.hv_event_intr_event[j] = NULL;
+ hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;
+ hv_vmbus_g_context.event_swintr[j] = NULL;
+ hv_vmbus_g_context.msg_swintr[j] = NULL;
+
+ for (i = 0; i < 2; i++)
+ setup_args.page_buffers[2 * j + i] = NULL;
}
/*
- * Setup interrupt filter handler
+ * Per cpu setup.
*/
- ret = bus_setup_intr(vmbus_devp, intr_res,
- INTR_TYPE_NET | INTR_MPSAFE, hv_vmbus_isr, NULL,
- NULL, &vmbus_cookiep);
-
- if (ret != 0)
- goto cleanup3;
-
- ret = bus_bind_intr(vmbus_devp, intr_res, 0);
- if (ret != 0)
- goto cleanup4;
-
- isrc = intr_lookup_source(vmbus_irq);
- if ((isrc == NULL) || (isrc->is_event == NULL)) {
- ret = EINVAL;
- goto cleanup4;
- }
+ CPU_FOREACH(j) {
+ /*
+ * Setup software interrupt thread and handler for msg handling.
+ */
+ ret = swi_add(&hv_vmbus_g_context.hv_msg_intr_event[j],
+ "hv_msg", vmbus_msg_swintr, (void *)(long)j, SWI_CLOCK, 0,
+ &hv_vmbus_g_context.msg_swintr[j]);
+ if (ret) {
+ if(bootverbose)
+ printf("VMBUS: failed to setup msg swi for "
+ "cpu %d\n", j);
+ goto cleanup1;
+ }
- /* vector = isrc->is_event->ie_vector; */
- intpin = (struct ioapic_intsrc *)isrc;
- vector = intpin->io_vector;
+ /*
+ * Bind the swi thread to the cpu.
+ */
+ ret = intr_event_bind(hv_vmbus_g_context.hv_msg_intr_event[j],
+ j);
+ if (ret) {
+ if(bootverbose)
+ printf("VMBUS: failed to bind msg swi thread "
+ "to cpu %d\n", j);
+ goto cleanup1;
+ }
- if(bootverbose)
- printf("VMBUS: irq 0x%x vector 0x%x\n", vmbus_irq, vector);
+ /*
+ * Setup software interrupt thread and handler for
+ * event handling.
+ */
+ ret = swi_add(&hv_vmbus_g_context.hv_event_intr_event[j],
+ "hv_event", hv_vmbus_on_events, (void *)(long)j,
+ SWI_CLOCK, 0, &hv_vmbus_g_context.event_swintr[j]);
+ if (ret) {
+ if(bootverbose)
+ printf("VMBUS: failed to setup event swi for "
+ "cpu %d\n", j);
+ goto cleanup1;
+ }
- /**
- * Notify the hypervisor of our irq.
- */
- setup_args.vector = vector;
- for(i = 0; i < 2; i++) {
- setup_args.page_buffers[i] =
+ /*
+ * Prepare the per cpu msg and event pages to be called on each cpu.
+ */
+ for(i = 0; i < 2; i++) {
+ setup_args.page_buffers[2 * j + i] =
malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
- if (setup_args.page_buffers[i] == NULL) {
- KASSERT(setup_args.page_buffers[i] != NULL,
+ if (setup_args.page_buffers[2 * j + i] == NULL) {
+ KASSERT(setup_args.page_buffers[2 * j + i] != NULL,
("Error VMBUS: malloc failed!"));
- if (i > 0)
- free(setup_args.page_buffers[0], M_DEVBUF);
- goto cleanup4;
+ goto cleanup1;
+ }
}
}
- /* only CPU #0 supported at this time */
+ if (bootverbose)
+ printf("VMBUS: Calling smp_rendezvous, smp_started = %d\n",
+ smp_started);
+
smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args);
/*
@@ -443,26 +557,32 @@ vmbus_bus_init(void)
ret = hv_vmbus_connect();
if (ret != 0)
- goto cleanup4;
+ goto cleanup1;
hv_vmbus_request_channel_offers();
return (ret);
- cleanup4:
-
+ cleanup1:
/*
- * remove swi, bus and intr resource
+ * Free pages alloc'ed
*/
- bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
+ for (n = 0; n < 2 * MAXCPU; n++)
+ if (setup_args.page_buffers[n] != NULL)
+ free(setup_args.page_buffers[n], M_DEVBUF);
- cleanup3:
- bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
-
- cleanup2:
- swi_remove(event_swintr);
+ /*
+ * remove swi and vmbus callback vector;
+ */
+ CPU_FOREACH(j) {
+ if (hv_vmbus_g_context.msg_swintr[j] != NULL)
+ swi_remove(hv_vmbus_g_context.msg_swintr[j]);
+ if (hv_vmbus_g_context.event_swintr[j] != NULL)
+ swi_remove(hv_vmbus_g_context.event_swintr[j]);
+ hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;
+ hv_vmbus_g_context.hv_event_intr_event[j] = NULL;
+ }
- cleanup1:
- swi_remove(msg_swintr);
+ vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
cleanup:
hv_vmbus_cleanup();
@@ -515,20 +635,24 @@ vmbus_bus_exit(void)
smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
- for(i = 0; i < 2; i++) {
+ for(i = 0; i < 2 * MAXCPU; i++) {
if (setup_args.page_buffers[i] != 0)
free(setup_args.page_buffers[i], M_DEVBUF);
}
hv_vmbus_cleanup();
- /* remove swi, bus and intr resource */
- bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
-
- bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
+ /* remove swi */
+ CPU_FOREACH(i) {
+ if (hv_vmbus_g_context.msg_swintr[i] != NULL)
+ swi_remove(hv_vmbus_g_context.msg_swintr[i]);
+ if (hv_vmbus_g_context.event_swintr[i] != NULL)
+ swi_remove(hv_vmbus_g_context.event_swintr[i]);
+ hv_vmbus_g_context.hv_msg_intr_event[i] = NULL;
+ hv_vmbus_g_context.hv_event_intr_event[i] = NULL;
+ }
- swi_remove(msg_swintr);
- swi_remove(event_swintr);
+ vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
return;
}
@@ -603,6 +727,6 @@ devclass_t vmbus_devclass;
DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0);
MODULE_VERSION(vmbus,1);
-/* TODO: We want to be earlier than SI_SUB_VFS */
-SYSINIT(vmb_init, SI_SUB_VFS, SI_ORDER_MIDDLE, vmbus_init, NULL);
+/* We want to be started after SMP is initialized */
+SYSINIT(vmb_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, vmbus_init, NULL);
diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_priv.h b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
index 6bc875d..faa6dec 100644
--- a/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
+++ b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
@@ -181,49 +181,30 @@ enum {
#define HV_HYPERCALL_PARAM_ALIGN sizeof(uint64_t)
-/*
- * Connection identifier type
- */
-typedef union {
- uint32_t as_uint32_t;
- struct {
- uint32_t id:24;
- uint32_t reserved:8;
- } u;
-
-} __packed hv_vmbus_connection_id;
-
-/*
- * Definition of the hv_vmbus_signal_event hypercall input structure
- */
-typedef struct {
- hv_vmbus_connection_id connection_id;
- uint16_t flag_number;
- uint16_t rsvd_z;
-} __packed hv_vmbus_input_signal_event;
-
-typedef struct {
- uint64_t align8;
- hv_vmbus_input_signal_event event;
-} __packed hv_vmbus_input_signal_event_buffer;
-
typedef struct {
uint64_t guest_id;
void* hypercall_page;
hv_bool_uint8_t syn_ic_initialized;
+
+ hv_vmbus_handle syn_ic_msg_page[MAXCPU];
+ hv_vmbus_handle syn_ic_event_page[MAXCPU];
/*
- * This is used as an input param to HV_CALL_SIGNAL_EVENT hypercall.
- * The input param is immutable in our usage and
- * must be dynamic mem (vs stack or global).
+ * For FreeBSD cpuid to Hyper-V vcpuid mapping.
*/
- hv_vmbus_input_signal_event_buffer *signal_event_buffer;
+ uint32_t hv_vcpu_index[MAXCPU];
/*
- * 8-bytes aligned of the buffer above
+ * Each cpu has its own software interrupt handler for channel
+ * event and msg handling.
*/
- hv_vmbus_input_signal_event *signal_event_param;
-
- hv_vmbus_handle syn_ic_msg_page[MAXCPU];
- hv_vmbus_handle syn_ic_event_page[MAXCPU];
+ struct intr_event *hv_event_intr_event[MAXCPU];
+ struct intr_event *hv_msg_intr_event[MAXCPU];
+ void *event_swintr[MAXCPU];
+ void *msg_swintr[MAXCPU];
+ /*
+ * Host use this vector to intrrupt guest for vmbus channel
+ * event and msg.
+ */
+ unsigned int hv_cb_vector;
} hv_vmbus_context;
/*
@@ -368,7 +349,8 @@ typedef struct {
TAILQ_HEAD(, hv_vmbus_channel_msg_info) channel_msg_anchor;
struct mtx channel_msg_lock;
/**
- * List of channels
+ * List of primary channels. Sub channels will be linked
+ * under their primary channel.
*/
TAILQ_HEAD(, hv_vmbus_channel) channel_anchor;
struct mtx channel_lock;
@@ -560,6 +542,8 @@ typedef union {
uint32_t flags32[HV_EVENT_FLAGS_DWORD_COUNT];
} hv_vmbus_synic_event_flags;
+/* MSR used to provide vcpu index */
+#define HV_X64_MSR_VP_INDEX (0x40000002)
/*
* Define synthetic interrupt controller model specific registers
@@ -618,7 +602,8 @@ void hv_ring_buffer_cleanup(
int hv_ring_buffer_write(
hv_vmbus_ring_buffer_info *ring_info,
hv_vmbus_sg_buffer_list sg_buffers[],
- uint32_t sg_buff_count);
+ uint32_t sg_buff_count,
+ boolean_t *need_sig);
int hv_ring_buffer_peek(
hv_vmbus_ring_buffer_info *ring_info,
@@ -638,6 +623,12 @@ void hv_vmbus_dump_ring_info(
hv_vmbus_ring_buffer_info *ring_info,
char *prefix);
+void hv_ring_buffer_read_begin(
+ hv_vmbus_ring_buffer_info *ring_info);
+
+uint32_t hv_ring_buffer_read_end(
+ hv_vmbus_ring_buffer_info *ring_info);
+
hv_vmbus_channel* hv_vmbus_allocate_channel(void);
void hv_vmbus_free_vmbus_channel(hv_vmbus_channel *channel);
void hv_vmbus_on_channel_message(void *context);
@@ -652,7 +643,7 @@ uint16_t hv_vmbus_post_msg_via_msg_ipc(
void *payload,
size_t payload_size);
-uint16_t hv_vmbus_signal_event(void);
+uint16_t hv_vmbus_signal_event(void *con_id);
void hv_vmbus_synic_init(void *irq_arg);
void hv_vmbus_synic_cleanup(void *arg);
int hv_vmbus_query_hypervisor_presence(void);
@@ -674,7 +665,7 @@ hv_vmbus_channel* hv_vmbus_get_channel_from_rel_id(uint32_t rel_id);
int hv_vmbus_connect(void);
int hv_vmbus_disconnect(void);
int hv_vmbus_post_message(void *buffer, size_t buf_size);
-int hv_vmbus_set_event(uint32_t child_rel_id);
+int hv_vmbus_set_event(hv_vmbus_channel *channel);
void hv_vmbus_on_events(void *);
@@ -718,7 +709,7 @@ static inline uint64_t hv_generate_guest_id(
typedef struct {
unsigned int vector;
- void *page_buffers[2];
+ void *page_buffers[2 * MAXCPU];
} hv_setup_args;
#endif /* __HYPERV_PRIV_H__ */
diff --git a/sys/dev/ichsmb/ichsmb_pci.c b/sys/dev/ichsmb/ichsmb_pci.c
index 5c1ef54..ae8b179 100644
--- a/sys/dev/ichsmb/ichsmb_pci.c
+++ b/sys/dev/ichsmb/ichsmb_pci.c
@@ -88,6 +88,7 @@ __FBSDID("$FreeBSD$");
#define ID_AVOTON 0x1f3c8086
#define ID_COLETOCRK 0x23B08086
#define ID_LPT 0x8c228086
+#define ID_LPTLP 0x9c228086
#define ID_WCPT 0x8ca28086
#define ID_WCPTLP 0x9ca28086
@@ -199,6 +200,9 @@ ichsmb_pci_probe(device_t dev)
case ID_LPT:
device_set_desc(dev, "Intel Lynx Point SMBus controller");
break;
+ case ID_LPTLP:
+ device_set_desc(dev, "Intel Lynx Point-LP SMBus controller");
+ break;
case ID_WCPT:
device_set_desc(dev, "Intel Wildcat Point SMBus controller");
break;
diff --git a/sys/dev/iicbus/pcf8563.c b/sys/dev/iicbus/pcf8563.c
index 88307a5..556943a 100644
--- a/sys/dev/iicbus/pcf8563.c
+++ b/sys/dev/iicbus/pcf8563.c
@@ -1,5 +1,6 @@
/*-
* Copyright (c) 2012 Marius Strobl <marius@FreeBSD.org>
+ * Copyright (c) 2015 Juraj Lutter
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,6 +32,8 @@ __FBSDID("$FreeBSD$");
* Driver for NXP PCF8563 real-time clock/calendar
*/
+#include "opt_platform.h"
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
@@ -41,6 +44,11 @@ __FBSDID("$FreeBSD$");
#include <dev/iicbus/iicbus.h>
#include <dev/iicbus/iiconf.h>
#include <dev/iicbus/pcf8563reg.h>
+#ifdef FDT
+#include <dev/ofw/openfirm.h>
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+#endif
#include "clock_if.h"
#include "iicbus_if.h"
@@ -48,6 +56,7 @@ __FBSDID("$FreeBSD$");
#define PCF8563_NCLOCKREGS (PCF8563_R_YEAR - PCF8563_R_CS1 + 1)
struct pcf8563_softc {
+ struct intr_config_hook enum_hook;
uint32_t sc_flags;
#define PCF8563_CPOL (1 << 0) /* PCF8563_R_MONTH_C means 19xx */
uint16_t sc_addr; /* PCF8563 slave address */
@@ -58,30 +67,62 @@ static device_attach_t pcf8563_attach;
static device_probe_t pcf8563_probe;
static clock_gettime_t pcf8563_gettime;
static clock_settime_t pcf8563_settime;
+static void pcf8563_start(void *);
static int
pcf8563_probe(device_t dev)
{
+#ifdef FDT
+ if (!ofw_bus_status_okay(dev))
+ return (ENXIO);
+ if (!ofw_bus_is_compatible(dev, "nxp,pcf8563") &&
+ !ofw_bus_is_compatible(dev, "philips,pcf8563") &&
+ !ofw_bus_is_compatible(dev, "pcf8563"))
+ return (ENXIO);
+#endif
device_set_desc(dev, "NXP PCF8563 RTC");
- return (BUS_PROBE_NOWILDCARD);
+
+ return (BUS_PROBE_DEFAULT);
}
static int
pcf8563_attach(device_t dev)
{
+ struct pcf8563_softc *sc;
+
+ sc = device_get_softc(dev);
+ sc->sc_addr = iicbus_get_addr(dev);
+ if (sc->sc_addr == 0)
+ sc->sc_addr = PCF8563_ADDR;
+ sc->sc_year0 = 1900;
+ sc->enum_hook.ich_func = pcf8563_start;
+ sc->enum_hook.ich_arg = dev;
+
+ /*
+ * We have to wait until interrupts are enabled. Sometimes I2C read
+ * and write only works when the interrupts are available.
+ */
+ if (config_intrhook_establish(&sc->enum_hook) != 0)
+ return (ENOMEM);
+
+ return (0);
+}
+
+static void
+pcf8563_start(void *xdev)
+{
+ device_t dev;
uint8_t reg = PCF8563_R_SECOND, val;
struct iic_msg msgs[] = {
{ 0, IIC_M_WR, sizeof(reg), &reg },
{ 0, IIC_M_RD, sizeof(val), &val }
};
struct pcf8563_softc *sc;
- int error;
+ dev = (device_t)xdev;
sc = device_get_softc(dev);
- sc->sc_addr = iicbus_get_addr(dev);
- if (sc->sc_addr == 0)
- sc->sc_addr = PCF8563_ADDR;
+ config_intrhook_disestablish(&sc->enum_hook);
/*
* NB: PCF8563_R_SECOND_VL doesn't automatically clear when VDD
@@ -94,18 +135,14 @@ pcf8563_attach(device_t dev)
* as a side-effect.
*/
msgs[0].slave = msgs[1].slave = sc->sc_addr;
- error = iicbus_transfer(device_get_parent(dev), msgs, sizeof(msgs) /
- sizeof(*msgs));
- if (error != 0) {
+ if (iicbus_transfer(dev, msgs, nitems(msgs)) != 0) {
device_printf(dev, "%s: cannot read RTC\n", __func__);
- return (error);
+ return;
}
if ((val & PCF8563_R_SECOND_VL) != 0)
device_printf(dev, "%s: battery low\n", __func__);
- sc->sc_year0 = 1900;
clock_register(dev, 1000000); /* 1 second resolution */
- return (0);
}
static int
@@ -122,8 +159,7 @@ pcf8563_gettime(device_t dev, struct timespec *ts)
sc = device_get_softc(dev);
msgs[0].slave = msgs[1].slave = sc->sc_addr;
- error = iicbus_transfer(device_get_parent(dev), msgs, sizeof(msgs) /
- sizeof(*msgs));
+ error = iicbus_transfer(dev, msgs, nitems(msgs));
if (error != 0) {
device_printf(dev, "%s: cannot read RTC\n", __func__);
return (error);
@@ -145,6 +181,7 @@ pcf8563_gettime(device_t dev, struct timespec *ts)
sc->sc_flags |= PCF8563_CPOL;
} else if (ct.year < 100 + sc->sc_year0)
sc->sc_flags |= PCF8563_CPOL;
+
return (clock_ct_to_ts(&ct, ts));
}
@@ -180,10 +217,10 @@ pcf8563_settime(device_t dev, struct timespec *ts)
val[PCF8563_R_MONTH] |= PCF8563_R_MONTH_C;
msgs[0].slave = sc->sc_addr;
- error = iicbus_transfer(device_get_parent(dev), msgs, sizeof(msgs) /
- sizeof(*msgs));
+ error = iicbus_transfer(dev, msgs, nitems(msgs));
if (error != 0)
device_printf(dev, "%s: cannot write RTC\n", __func__);
+
return (error);
}
diff --git a/sys/dev/ipmi/ipmi.c b/sys/dev/ipmi/ipmi.c
index a1edbf6..8101717 100644
--- a/sys/dev/ipmi/ipmi.c
+++ b/sys/dev/ipmi/ipmi.c
@@ -756,17 +756,22 @@ ipmi_startup(void *arg)
}
device_printf(dev, "Number of channels %d\n", i);
- /* probe for watchdog */
- IPMI_INIT_DRIVER_REQUEST(req, IPMI_ADDR(IPMI_APP_REQUEST, 0),
- IPMI_GET_WDOG, 0, 0);
+ /*
+ * Probe for watchdog, but only for backends which support
+ * polled driver requests.
+ */
+ if (sc->ipmi_driver_requests_polled) {
+ IPMI_INIT_DRIVER_REQUEST(req, IPMI_ADDR(IPMI_APP_REQUEST, 0),
+ IPMI_GET_WDOG, 0, 0);
- ipmi_submit_driver_request(sc, req, 0);
+ ipmi_submit_driver_request(sc, req, 0);
- if (req->ir_compcode == 0x00) {
- device_printf(dev, "Attached watchdog\n");
- /* register the watchdog event handler */
- sc->ipmi_watchdog_tag = EVENTHANDLER_REGISTER(watchdog_list,
- ipmi_wd_event, sc, 0);
+ if (req->ir_compcode == 0x00) {
+ device_printf(dev, "Attached watchdog\n");
+ /* register the watchdog event handler */
+ sc->ipmi_watchdog_tag = EVENTHANDLER_REGISTER(
+ watchdog_list, ipmi_wd_event, sc, 0);
+ }
}
sc->ipmi_cdev = make_dev(&ipmi_cdevsw, device_get_unit(dev),
diff --git a/sys/dev/ipmi/ipmi_kcs.c b/sys/dev/ipmi/ipmi_kcs.c
index 1c58646..864e9a0 100644
--- a/sys/dev/ipmi/ipmi_kcs.c
+++ b/sys/dev/ipmi/ipmi_kcs.c
@@ -520,6 +520,7 @@ ipmi_kcs_attach(struct ipmi_softc *sc)
sc->ipmi_startup = kcs_startup;
sc->ipmi_enqueue_request = ipmi_polled_enqueue_request;
sc->ipmi_driver_request = kcs_driver_request;
+ sc->ipmi_driver_requests_polled = 1;
/* See if we can talk to the controller. */
status = INB(sc, KCS_CTL_STS);
diff --git a/sys/dev/ipmi/ipmi_smic.c b/sys/dev/ipmi/ipmi_smic.c
index 4e26553..92cf14e 100644
--- a/sys/dev/ipmi/ipmi_smic.c
+++ b/sys/dev/ipmi/ipmi_smic.c
@@ -415,6 +415,7 @@ ipmi_smic_attach(struct ipmi_softc *sc)
sc->ipmi_startup = smic_startup;
sc->ipmi_enqueue_request = ipmi_polled_enqueue_request;
sc->ipmi_driver_request = smic_driver_request;
+ sc->ipmi_driver_requests_polled = 1;
/* See if we can talk to the controller. */
flags = INB(sc, SMIC_FLAGS);
diff --git a/sys/dev/ipmi/ipmivars.h b/sys/dev/ipmi/ipmivars.h
index 9d7dc32..9a0b435 100644
--- a/sys/dev/ipmi/ipmivars.h
+++ b/sys/dev/ipmi/ipmivars.h
@@ -105,6 +105,7 @@ struct ipmi_softc {
int ipmi_opened;
struct cdev *ipmi_cdev;
TAILQ_HEAD(,ipmi_request) ipmi_pending_requests;
+ int ipmi_driver_requests_polled;
eventhandler_tag ipmi_watchdog_tag;
int ipmi_watchdog_active;
struct intr_config_hook ipmi_ich;
diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c
index 48ff9d1..b9bba31 100644
--- a/sys/dev/ixgbe/if_ix.c
+++ b/sys/dev/ixgbe/if_ix.c
@@ -54,7 +54,7 @@ int ixgbe_display_debug_stats = 0;
/*********************************************************************
* Driver version
*********************************************************************/
-char ixgbe_driver_version[] = "2.7.4";
+char ixgbe_driver_version[] = "2.8.3";
/*********************************************************************
* PCI Device ID Table
@@ -117,6 +117,8 @@ static int ixgbe_probe(device_t);
static int ixgbe_attach(device_t);
static int ixgbe_detach(device_t);
static int ixgbe_shutdown(device_t);
+static int ixgbe_suspend(device_t);
+static int ixgbe_resume(device_t);
static int ixgbe_ioctl(struct ifnet *, u_long, caddr_t);
static void ixgbe_init(void *);
static void ixgbe_init_locked(struct adapter *);
@@ -136,7 +138,12 @@ static int ixgbe_setup_msix(struct adapter *);
static void ixgbe_free_pci_resources(struct adapter *);
static void ixgbe_local_timer(void *);
static int ixgbe_setup_interface(device_t, struct adapter *);
+static void ixgbe_config_dmac(struct adapter *);
+static void ixgbe_config_delay_values(struct adapter *);
static void ixgbe_config_link(struct adapter *);
+static void ixgbe_check_eee_support(struct adapter *);
+static void ixgbe_check_wol_support(struct adapter *);
+static int ixgbe_setup_low_power_mode(struct adapter *);
static void ixgbe_rearm_queues(struct adapter *, u64);
static void ixgbe_initialize_transmit_units(struct adapter *);
@@ -150,9 +157,6 @@ static void ixgbe_update_stats_counters(struct adapter *);
static void ixgbe_set_promisc(struct adapter *);
static void ixgbe_set_multi(struct adapter *);
static void ixgbe_update_link_status(struct adapter *);
-static int ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
-static int ixgbe_set_advertise(SYSCTL_HANDLER_ARGS);
-static int ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS);
static void ixgbe_set_ivar(struct adapter *, u8, u8, s8);
static void ixgbe_configure_ivars(struct adapter *);
static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
@@ -161,7 +165,22 @@ static void ixgbe_setup_vlan_hw_support(struct adapter *);
static void ixgbe_register_vlan(void *, struct ifnet *, u16);
static void ixgbe_unregister_vlan(void *, struct ifnet *, u16);
-static void ixgbe_add_hw_stats(struct adapter *adapter);
+static void ixgbe_add_device_sysctls(struct adapter *);
+static void ixgbe_add_hw_stats(struct adapter *);
+
+/* Sysctl handlers */
+static int ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
+static int ixgbe_set_advertise(SYSCTL_HANDLER_ARGS);
+static int ixgbe_sysctl_thermal_test(SYSCTL_HANDLER_ARGS);
+static int ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS);
+static int ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS);
+static int ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS);
+static int ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS);
+static int ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS);
+static int ixgbe_sysctl_eee_enable(SYSCTL_HANDLER_ARGS);
+static int ixgbe_sysctl_eee_negotiated(SYSCTL_HANDLER_ARGS);
+static int ixgbe_sysctl_eee_rx_lpi_status(SYSCTL_HANDLER_ARGS);
+static int ixgbe_sysctl_eee_tx_lpi_status(SYSCTL_HANDLER_ARGS);
/* Support for pluggable optic modules */
static bool ixgbe_sfp_probe(struct adapter *);
@@ -179,15 +198,12 @@ static void ixgbe_handle_que(void *, int);
static void ixgbe_handle_link(void *, int);
static void ixgbe_handle_msf(void *, int);
static void ixgbe_handle_mod(void *, int);
+static void ixgbe_handle_phy(void *, int);
#ifdef IXGBE_FDIR
static void ixgbe_reinit_fdir(void *, int);
#endif
-
-/* Missing shared code prototype */
-extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw);
-
/*********************************************************************
* FreeBSD Device Interface Entry Points
*********************************************************************/
@@ -198,6 +214,8 @@ static device_method_t ix_methods[] = {
DEVMETHOD(device_attach, ixgbe_attach),
DEVMETHOD(device_detach, ixgbe_detach),
DEVMETHOD(device_shutdown, ixgbe_shutdown),
+ DEVMETHOD(device_suspend, ixgbe_suspend),
+ DEVMETHOD(device_resume, ixgbe_resume),
DEVMETHOD_END
};
@@ -404,32 +422,6 @@ ixgbe_attach(device_t dev)
/* Core Lock Init*/
IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
- /* SYSCTL APIs */
- SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
- SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
- OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW,
- adapter, 0, ixgbe_set_flowcntl, "I", IXGBE_SYSCTL_DESC_SET_FC);
-
- SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
- SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
- OID_AUTO, "enable_aim", CTLFLAG_RW,
- &ixgbe_enable_aim, 1, "Interrupt Moderation");
-
- /*
- ** Allow a kind of speed control by forcing the autoneg
- ** advertised speed list to only a certain value, this
- ** supports 1G on 82599 devices, and 100Mb on x540.
- */
- SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
- SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
- OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW,
- adapter, 0, ixgbe_set_advertise, "I", IXGBE_SYSCTL_DESC_ADV_SPEED);
-
- SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
- SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
- OID_AUTO, "ts", CTLTYPE_INT | CTLFLAG_RW, adapter,
- 0, ixgbe_set_thermal_test, "I", "Thermal Test");
-
/* Set up the timer callout */
callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
@@ -559,22 +551,26 @@ ixgbe_attach(device_t dev)
adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
- /*
- ** Check PCIE slot type/speed/width
- */
+ /* Check PCIE slot type/speed/width */
ixgbe_get_slot_info(hw);
/* Set an initial default flow control value */
adapter->fc = ixgbe_fc_full;
+ /* Check for certain supported features */
+ ixgbe_check_wol_support(adapter);
+ ixgbe_check_eee_support(adapter);
+
+ /* Add sysctls */
+ ixgbe_add_device_sysctls(adapter);
+ ixgbe_add_hw_stats(adapter);
+
/* let hardware know driver is loaded */
ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
- ixgbe_add_hw_stats(adapter);
-
#ifdef DEV_NETMAP
ixgbe_netmap_attach(adapter);
#endif /* DEV_NETMAP */
@@ -618,8 +614,9 @@ ixgbe_detach(device_t dev)
return (EBUSY);
}
+ /* Stop the adapter */
IXGBE_CORE_LOCK(adapter);
- ixgbe_stop(adapter);
+ ixgbe_setup_low_power_mode(adapter);
IXGBE_CORE_UNLOCK(adapter);
for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
@@ -637,6 +634,7 @@ ixgbe_detach(device_t dev)
taskqueue_drain(adapter->tq, &adapter->link_task);
taskqueue_drain(adapter->tq, &adapter->mod_task);
taskqueue_drain(adapter->tq, &adapter->msf_task);
+ taskqueue_drain(adapter->tq, &adapter->phy_task);
#ifdef IXGBE_FDIR
taskqueue_drain(adapter->tq, &adapter->fdir_task);
#endif
@@ -681,9 +679,77 @@ static int
ixgbe_shutdown(device_t dev)
{
struct adapter *adapter = device_get_softc(dev);
+ int error = 0;
+
+ INIT_DEBUGOUT("ixgbe_shutdown: begin");
+
IXGBE_CORE_LOCK(adapter);
- ixgbe_stop(adapter);
+ error = ixgbe_setup_low_power_mode(adapter);
+ IXGBE_CORE_UNLOCK(adapter);
+
+ return (error);
+}
+
+/**
+ * Methods for going from:
+ * D0 -> D3: ixgbe_suspend
+ * D3 -> D0: ixgbe_resume
+ */
+static int
+ixgbe_suspend(device_t dev)
+{
+ struct adapter *adapter = device_get_softc(dev);
+ int error = 0;
+
+ INIT_DEBUGOUT("ixgbe_suspend: begin");
+
+ IXGBE_CORE_LOCK(adapter);
+
+ error = ixgbe_setup_low_power_mode(adapter);
+
+ /* Save state and power down */
+ pci_save_state(dev);
+ pci_set_powerstate(dev, PCI_POWERSTATE_D3);
+
+ IXGBE_CORE_UNLOCK(adapter);
+
+ return (error);
+}
+
+static int
+ixgbe_resume(device_t dev)
+{
+ struct adapter *adapter = device_get_softc(dev);
+ struct ifnet *ifp = adapter->ifp;
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 wus;
+
+ INIT_DEBUGOUT("ixgbe_resume: begin");
+
+ IXGBE_CORE_LOCK(adapter);
+
+ pci_set_powerstate(dev, PCI_POWERSTATE_D0);
+ pci_restore_state(dev);
+
+ /* Read & clear WUS register */
+ wus = IXGBE_READ_REG(hw, IXGBE_WUS);
+ if (wus)
+ device_printf(dev, "Woken up by (WUS): %#010x\n",
+ IXGBE_READ_REG(hw, IXGBE_WUS));
+ IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff);
+ /* And clear WUFC until next low-power transition */
+ IXGBE_WRITE_REG(hw, IXGBE_WUFC, 0);
+
+ /*
+ * Required after D3->D0 transition;
+ * will re-advertise all previous advertised speeds
+ */
+ if (ifp->if_flags & IFF_UP)
+ ixgbe_init_locked(adapter);
+
IXGBE_CORE_UNLOCK(adapter);
+
+ INIT_DEBUGOUT("ixgbe_resume: end");
return (0);
}
@@ -736,13 +802,13 @@ ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data)
break;
case SIOCSIFMTU:
IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
- if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
+ if (ifr->ifr_mtu > IXGBE_MAX_MTU) {
error = EINVAL;
} else {
IXGBE_CORE_LOCK(adapter);
ifp->if_mtu = ifr->ifr_mtu;
adapter->max_frame_size =
- ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
+ ifp->if_mtu + IXGBE_MTU_HDR;
ixgbe_init_locked(adapter);
IXGBE_CORE_UNLOCK(adapter);
}
@@ -891,7 +957,7 @@ ixgbe_init_locked(struct adapter *adapter)
/* Prepare transmit descriptors and buffers */
if (ixgbe_setup_transmit_structures(adapter)) {
- device_printf(dev,"Could not setup transmit structures\n");
+ device_printf(dev, "Could not setup transmit structures\n");
ixgbe_stop(adapter);
return;
}
@@ -917,7 +983,7 @@ ixgbe_init_locked(struct adapter *adapter)
/* Prepare receive descriptors and buffers */
if (ixgbe_setup_receive_structures(adapter)) {
- device_printf(dev,"Could not setup receive structures\n");
+ device_printf(dev, "Could not setup receive structures\n");
ixgbe_stop(adapter);
return;
}
@@ -932,11 +998,16 @@ ixgbe_init_locked(struct adapter *adapter)
/* Add for Module detection */
if (hw->mac.type == ixgbe_mac_82599EB)
- gpie |= IXGBE_SDP2_GPIEN_BY_MAC(hw);
+ gpie |= IXGBE_SDP2_GPIEN;
- /* Thermal Failure Detection */
- if (hw->mac.type == ixgbe_mac_X540)
- gpie |= IXGBE_SDP0_GPIEN_BY_MAC(hw);
+ /*
+ * Thermal Failure Detection (X540)
+ * Link Detection (X552)
+ */
+ if (hw->mac.type == ixgbe_mac_X540 ||
+ hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
+ hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
+ gpie |= IXGBE_SDP0_GPIEN_X540;
if (adapter->msix > 1) {
/* Enable Enhanced MSIX mode */
@@ -948,6 +1019,7 @@ ixgbe_init_locked(struct adapter *adapter)
/* Set MTU size */
if (ifp->if_mtu > ETHERMTU) {
+ /* aka IXGBE_MAXFRS on 82599 and newer */
mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
mhadd &= ~IXGBE_MHADD_MFS_MASK;
mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
@@ -955,7 +1027,6 @@ ixgbe_init_locked(struct adapter *adapter)
}
/* Now enable all the queues */
-
for (int i = 0; i < adapter->num_queues; i++) {
txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
txdctl |= IXGBE_TXDCTL_ENABLE;
@@ -1072,55 +1143,25 @@ ixgbe_init_locked(struct adapter *adapter)
/* Set moderation on the Link interrupt */
IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->vector), IXGBE_LINK_ITR);
+ /* Configure Energy Efficient Ethernet for supported devices */
+ if (adapter->eee_support)
+ ixgbe_setup_eee(hw, adapter->eee_enabled);
+
/* Config/Enable Link */
ixgbe_config_link(adapter);
/* Hardware Packet Buffer & Flow Control setup */
- {
- u32 rxpb, frame, size, tmp;
-
- frame = adapter->max_frame_size;
+ ixgbe_config_delay_values(adapter);
- /* Calculate High Water */
- switch (hw->mac.type) {
- case ixgbe_mac_X540:
- case ixgbe_mac_X550:
- case ixgbe_mac_X550EM_a:
- case ixgbe_mac_X550EM_x:
- tmp = IXGBE_DV_X540(frame, frame);
- break;
- default:
- tmp = IXGBE_DV(frame, frame);
- break;
- }
- size = IXGBE_BT2KB(tmp);
- rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
- hw->fc.high_water[0] = rxpb - size;
-
- /* Now calculate Low Water */
- switch (hw->mac.type) {
- case ixgbe_mac_X540:
- case ixgbe_mac_X550:
- case ixgbe_mac_X550EM_a:
- case ixgbe_mac_X550EM_x:
- tmp = IXGBE_LOW_DV_X540(frame);
- break;
- default:
- tmp = IXGBE_LOW_DV(frame);
- break;
- }
- hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
-
- hw->fc.requested_mode = adapter->fc;
- hw->fc.pause_time = IXGBE_FC_PAUSE;
- hw->fc.send_xon = TRUE;
- }
/* Initialize the FC settings */
ixgbe_start_hw(hw);
/* Set up VLAN support and filter */
ixgbe_setup_vlan_hw_support(adapter);
+ /* Setup DMA Coalescing */
+ ixgbe_config_dmac(adapter);
+
/* And now turn on interrupts */
ixgbe_enable_intr(adapter);
@@ -1141,6 +1182,46 @@ ixgbe_init(void *arg)
return;
}
+static void
+ixgbe_config_delay_values(struct adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 rxpb, frame, size, tmp;
+
+ frame = adapter->max_frame_size;
+
+ /* Calculate High Water */
+ switch (hw->mac.type) {
+ case ixgbe_mac_X540:
+ case ixgbe_mac_X550:
+ case ixgbe_mac_X550EM_x:
+ tmp = IXGBE_DV_X540(frame, frame);
+ break;
+ default:
+ tmp = IXGBE_DV(frame, frame);
+ break;
+ }
+ size = IXGBE_BT2KB(tmp);
+ rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
+ hw->fc.high_water[0] = rxpb - size;
+
+ /* Now calculate Low Water */
+ switch (hw->mac.type) {
+ case ixgbe_mac_X540:
+ case ixgbe_mac_X550:
+ case ixgbe_mac_X550EM_x:
+ tmp = IXGBE_LOW_DV_X540(frame);
+ break;
+ default:
+ tmp = IXGBE_LOW_DV(frame);
+ break;
+ }
+ hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
+
+ hw->fc.requested_mode = adapter->fc;
+ hw->fc.pause_time = IXGBE_FC_PAUSE;
+ hw->fc.send_xon = TRUE;
+}
/*
**
@@ -1195,9 +1276,10 @@ ixgbe_handle_que(void *context, int pending)
struct adapter *adapter = que->adapter;
struct tx_ring *txr = que->txr;
struct ifnet *ifp = adapter->ifp;
+ bool more;
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- ixgbe_rxeof(que);
+ more = ixgbe_rxeof(que);
IXGBE_TX_LOCK(txr);
ixgbe_txeof(txr);
#ifndef IXGBE_LEGACY_TX
@@ -1270,6 +1352,11 @@ ixgbe_legacy_irq(void *arg)
if (reg_eicr & IXGBE_EICR_LSC)
taskqueue_enqueue(adapter->tq, &adapter->link_task);
+ /* External PHY interrupt */
+ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
+ (reg_eicr & IXGBE_EICR_GPI_SDP0_X540))
+ taskqueue_enqueue(adapter->tq, &adapter->phy_task);
+
if (more)
taskqueue_enqueue(que->tq, &que->que_task);
else
@@ -1378,9 +1465,9 @@ ixgbe_msix_link(void *arg)
{
struct adapter *adapter = arg;
struct ixgbe_hw *hw = &adapter->hw;
- u32 reg_eicr;
+ u32 reg_eicr, mod_mask;
- ++adapter->vector_irq;
+ ++adapter->link_irq;
/* First get the cause */
reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
@@ -1408,42 +1495,46 @@ ixgbe_msix_link(void *arg)
device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
"Please Reboot!!\n");
IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
- } else
+ }
- if (ixgbe_is_sfp(hw)) {
- if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
- IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw));
- taskqueue_enqueue(adapter->tq, &adapter->msf_task);
- } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
- IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2_BY_MAC(hw));
- taskqueue_enqueue(adapter->tq, &adapter->mod_task);
- }
+ /* Check for over temp condition */
+ if (reg_eicr & IXGBE_EICR_TS) {
+ device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
+ "PHY IS SHUT DOWN!!\n");
+ device_printf(adapter->dev, "System shutdown required!\n");
+ IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
}
- }
+ }
+
+ /* Pluggable optics-related interrupt */
+ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP)
+ mod_mask = IXGBE_EICR_GPI_SDP0_X540;
+ else
+ mod_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw);
+
+ if (ixgbe_is_sfp(hw)) {
+ if (reg_eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw)) {
+ IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw));
+ taskqueue_enqueue(adapter->tq, &adapter->msf_task);
+ } else if (reg_eicr & mod_mask) {
+ IXGBE_WRITE_REG(hw, IXGBE_EICR, mod_mask);
+ taskqueue_enqueue(adapter->tq, &adapter->mod_task);
+ }
+ }
/* Check for fan failure */
if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
- (reg_eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))) {
+ (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
+ IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
"REPLACE IMMEDIATELY!!\n");
- IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw));
}
- /* Check for over temp condition */
- switch (hw->mac.type) {
- case ixgbe_mac_X540:
- case ixgbe_mac_X550:
- case ixgbe_mac_X550EM_a:
- if (reg_eicr & IXGBE_EICR_TS) {
- device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
- "PHY IS SHUT DOWN!!\n");
- device_printf(adapter->dev, "System shutdown required\n");
- IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
- }
- break;
- default:
- /* Other MACs have no thermal sensor interrupt */
- break;
+ /* External PHY interrupt */
+ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
+ (reg_eicr & IXGBE_EICR_GPI_SDP0_X540)) {
+ IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0_X540);
+ taskqueue_enqueue(adapter->tq, &adapter->phy_task);
}
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
@@ -1542,20 +1633,26 @@ ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR)
switch (adapter->link_speed) {
case IXGBE_LINK_SPEED_10GB_FULL:
- ifmr->ifm_active |= IFM_10_T | IFM_FDX;
+ ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
+ break;
+ case IXGBE_LINK_SPEED_2_5GB_FULL:
+ ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
break;
case IXGBE_LINK_SPEED_1GB_FULL:
- ifmr->ifm_active |= IFM_10_5 | IFM_FDX;
+ ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
break;
}
- if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4
+ else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4
|| layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX)
switch (adapter->link_speed) {
case IXGBE_LINK_SPEED_10GB_FULL:
- ifmr->ifm_active |= IFM_10_2 | IFM_FDX;
+ ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
+ break;
+ case IXGBE_LINK_SPEED_2_5GB_FULL:
+ ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
break;
case IXGBE_LINK_SPEED_1GB_FULL:
- ifmr->ifm_active |= IFM_10_5 | IFM_FDX;
+ ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
break;
}
@@ -1564,10 +1661,12 @@ ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
ifmr->ifm_active |= IFM_UNKNOWN;
#if __FreeBSD_version >= 900025
- /* Flow control setting */
- if (adapter->fc == ixgbe_fc_rx_pause || adapter->fc == ixgbe_fc_full)
+ /* Display current flow control setting used on link */
+ if (hw->fc.current_mode == ixgbe_fc_rx_pause ||
+ hw->fc.current_mode == ixgbe_fc_full)
ifmr->ifm_active |= IFM_ETH_RXPAUSE;
- if (adapter->fc == ixgbe_fc_tx_pause || adapter->fc == ixgbe_fc_full)
+ if (hw->fc.current_mode == ixgbe_fc_tx_pause ||
+ hw->fc.current_mode == ixgbe_fc_full)
ifmr->ifm_active |= IFM_ETH_TXPAUSE;
#endif
@@ -1597,21 +1696,22 @@ ixgbe_media_change(struct ifnet * ifp)
if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
return (EINVAL);
+ if (hw->phy.media_type == ixgbe_media_type_backplane)
+ return (EPERM);
+
/*
** We don't actually need to check against the supported
** media types of the adapter; ifmedia will take care of
** that for us.
- ** NOTE: this relies on falling thru the switch
- ** to get all the values set, it can be confusing.
*/
switch (IFM_SUBTYPE(ifm->ifm_media)) {
case IFM_AUTO:
case IFM_10G_T:
speed |= IXGBE_LINK_SPEED_100_FULL;
case IFM_10G_LRM:
- case IFM_10G_SR: /* KR, too */
+ case IFM_10G_SR: /* KR, too */
case IFM_10G_LR:
- case IFM_10G_CX4: /* KX4 for now */
+ case IFM_10G_CX4: /* KX4 */
speed |= IXGBE_LINK_SPEED_1GB_FULL;
case IFM_10G_TWINAX:
speed |= IXGBE_LINK_SPEED_10GB_FULL;
@@ -1620,7 +1720,7 @@ ixgbe_media_change(struct ifnet * ifp)
speed |= IXGBE_LINK_SPEED_100_FULL;
case IFM_1000_LX:
case IFM_1000_SX:
- case IFM_1000_CX: /* KX until there's real support */
+ case IFM_1000_CX: /* KX */
speed |= IXGBE_LINK_SPEED_1GB_FULL;
break;
case IFM_100_TX:
@@ -1640,7 +1740,7 @@ ixgbe_media_change(struct ifnet * ifp)
return (0);
invalid:
- device_printf(adapter->dev, "Invalid media type\n");
+ device_printf(adapter->dev, "Invalid media type!\n");
return (EINVAL);
}
@@ -1865,7 +1965,6 @@ ixgbe_update_link_status(struct adapter *adapter)
struct ifnet *ifp = adapter->ifp;
device_t dev = adapter->dev;
-
if (adapter->link_up){
if (adapter->link_active == FALSE) {
if (bootverbose)
@@ -1875,6 +1974,8 @@ ixgbe_update_link_status(struct adapter *adapter)
adapter->link_active = TRUE;
/* Update any Flow Control changes */
ixgbe_fc_enable(&adapter->hw);
+ /* Update DMA coalescing config */
+ ixgbe_config_dmac(adapter);
if_link_state_change(ifp, LINK_STATE_UP);
}
} else { /* Link down */
@@ -1961,7 +2062,7 @@ ixgbe_identify_hardware(struct adapter *adapter)
/* We need this here to set the num_segs below */
ixgbe_set_mac_type(hw);
- /* Pick up the 82599 and VF settings */
+ /* Pick up the 82599 settings */
if (hw->mac.type != ixgbe_mac_82598EB) {
hw->phy.smart_speed = ixgbe_smart_speed;
adapter->num_segs = IXGBE_82599_SCATTER;
@@ -2071,6 +2172,7 @@ ixgbe_allocate_legacy(struct adapter *adapter)
TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
+ TASK_INIT(&adapter->phy_task, 0, ixgbe_handle_phy, adapter);
#ifdef IXGBE_FDIR
TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
#endif
@@ -2136,8 +2238,6 @@ ixgbe_allocate_msix(struct adapter *adapter)
}
#endif
-
-
for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
rid = vector + 1;
que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
@@ -2187,14 +2287,11 @@ ixgbe_allocate_msix(struct adapter *adapter)
"Bound RSS bucket %d to CPU %d\n",
i, cpu_id);
#else
-#if 0 // This is too noisy
- device_printf(dev,
- "Bound queue %d to cpu %d\n",
- i, cpu_id);
-#endif
+ if (bootverbose)
+ device_printf(dev,
+ "Bound queue %d to cpu %d\n",
+ i, cpu_id);
#endif
-
-
#ifndef IXGBE_LEGACY_TX
TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
#endif
@@ -2240,6 +2337,7 @@ ixgbe_allocate_msix(struct adapter *adapter)
TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
+ TASK_INIT(&adapter->phy_task, 0, ixgbe_handle_phy, adapter);
#ifdef IXGBE_FDIR
TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
#endif
@@ -2465,6 +2563,12 @@ ixgbe_setup_interface(device_t dev, struct adapter *adapter)
#if __FreeBSD_version >= 1100036
if_setgetcounterfn(ifp, ixgbe_get_counter);
#endif
+#if __FreeBSD_version >= 1100045
+ /* TSO parameters */
+ ifp->if_hw_tsomax = 65518;
+ ifp->if_hw_tsomaxsegcount = IXGBE_82599_SCATTER;
+ ifp->if_hw_tsomaxsegsize = 2048;
+#endif
#ifndef IXGBE_LEGACY_TX
ifp->if_transmit = ixgbe_mq_start;
ifp->if_qflush = ixgbe_qflush;
@@ -2548,10 +2652,6 @@ ixgbe_add_media_types(struct adapter *adapter)
ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL);
if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX)
ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
-#if 0
- if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_LX)
- ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_LX, 0, NULL);
-#endif
/*
** Other (no matching FreeBSD media type):
@@ -2560,25 +2660,24 @@ ixgbe_add_media_types(struct adapter *adapter)
*/
if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
device_printf(dev, "Media supported: 10GbaseKR\n");
- device_printf(dev, "10GbaseKR mapped to 10baseT\n");
- ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
+ device_printf(dev, "10GbaseKR mapped to 10GbaseSR\n");
+ ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_SR, 0, NULL);
}
if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) {
device_printf(dev, "Media supported: 10GbaseKX4\n");
- device_printf(dev, "10GbaseKX4 mapped to 10base2\n");
- ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_2, 0, NULL);
+ device_printf(dev, "10GbaseKX4 mapped to 10GbaseCX4\n");
+ ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL);
}
if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) {
device_printf(dev, "Media supported: 1000baseKX\n");
- device_printf(dev, "1000baseKX mapped to 10base5\n");
- ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_5, 0, NULL);
+ device_printf(dev, "1000baseKX mapped to 1000baseCX\n");
+ ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_CX, 0, NULL);
}
if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_BX) {
/* Someday, someone will care about you... */
device_printf(dev, "Media supported: 1000baseBX\n");
}
- /* Very old */
if (hw->device_id == IXGBE_DEV_ID_82598AT) {
ifmedia_add(&adapter->media,
IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
@@ -2706,7 +2805,8 @@ ixgbe_initialise_rss_mapping(struct adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
uint32_t reta;
- int i, j, queue_id;
+ int i, j, queue_id, table_size;
+ int index_mult;
uint32_t rss_key[10];
uint32_t mrqc;
#ifdef RSS
@@ -2724,8 +2824,23 @@ ixgbe_initialise_rss_mapping(struct adapter *adapter)
arc4rand(&rss_key, sizeof(rss_key), 0);
#endif
+ /* Set multiplier for RETA setup and table size based on MAC */
+ index_mult = 0x1;
+ table_size = 128;
+ switch (adapter->hw.mac.type) {
+ case ixgbe_mac_82598EB:
+ index_mult = 0x11;
+ break;
+ case ixgbe_mac_X550:
+ case ixgbe_mac_X550EM_x:
+ table_size = 512;
+ break;
+ default:
+ break;
+ }
+
/* Set up the redirection table */
- for (i = 0, j = 0; i < 128; i++, j++) {
+ for (i = 0, j = 0; i < table_size; i++, j++) {
if (j == adapter->num_queues) j = 0;
#ifdef RSS
/*
@@ -2736,7 +2851,7 @@ ixgbe_initialise_rss_mapping(struct adapter *adapter)
queue_id = rss_get_indirection_to_bucket(i);
queue_id = queue_id % adapter->num_queues;
#else
- queue_id = (j * 0x11);
+ queue_id = (j * index_mult);
#endif
/*
* The low 8 bits are for hash value (n+0);
@@ -2745,7 +2860,10 @@ ixgbe_initialise_rss_mapping(struct adapter *adapter)
reta = reta >> 8;
reta = reta | ( ((uint32_t) queue_id) << 24);
if ((i & 3) == 3) {
- IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
+ if (i < 128)
+ IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
+ else
+ IXGBE_WRITE_REG(hw, IXGBE_ERETA((i >> 2) - 32), reta);
reta = 0;
}
}
@@ -2834,8 +2952,10 @@ ixgbe_initialize_receive_units(struct adapter *adapter)
/* Enable broadcasts */
fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
fctrl |= IXGBE_FCTRL_BAM;
- fctrl |= IXGBE_FCTRL_DPF;
- fctrl |= IXGBE_FCTRL_PMCF;
+ if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
+ fctrl |= IXGBE_FCTRL_DPF;
+ fctrl |= IXGBE_FCTRL_PMCF;
+ }
IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
/* Set for Jumbo Frames? */
@@ -3045,30 +3165,37 @@ ixgbe_enable_intr(struct adapter *adapter)
mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
/* Enable Fan Failure detection */
if (hw->device_id == IXGBE_DEV_ID_82598AT)
- mask |= IXGBE_EIMS_GPI_SDP1_BY_MAC(hw);
+ mask |= IXGBE_EIMS_GPI_SDP1;
switch (adapter->hw.mac.type) {
case ixgbe_mac_82599EB:
mask |= IXGBE_EIMS_ECC;
/* Temperature sensor on some adapters */
- mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw);
+ mask |= IXGBE_EIMS_GPI_SDP0;
/* SFP+ (RX_LOS_N & MOD_ABS_N) */
- mask |= IXGBE_EIMS_GPI_SDP1_BY_MAC(hw);
- mask |= IXGBE_EIMS_GPI_SDP2_BY_MAC(hw);
+ mask |= IXGBE_EIMS_GPI_SDP1;
+ mask |= IXGBE_EIMS_GPI_SDP2;
#ifdef IXGBE_FDIR
mask |= IXGBE_EIMS_FLOW_DIR;
#endif
break;
case ixgbe_mac_X540:
- case ixgbe_mac_X550:
- case ixgbe_mac_X550EM_a:
- case ixgbe_mac_X550EM_x:
/* Detect if Thermal Sensor is enabled */
fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
if (fwsm & IXGBE_FWSM_TS_ENABLED)
mask |= IXGBE_EIMS_TS;
- /* XXX: Which SFP mode line does this look at? */
- if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP)
+ mask |= IXGBE_EIMS_ECC;
+#ifdef IXGBE_FDIR
+ mask |= IXGBE_EIMS_FLOW_DIR;
+#endif
+ break;
+ case ixgbe_mac_X550:
+ case ixgbe_mac_X550EM_x:
+ /* MAC thermal sensor is automatically enabled */
+ mask |= IXGBE_EIMS_TS;
+ /* Some devices use SDP0 for important information */
+ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
+ hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw);
mask |= IXGBE_EIMS_ECC;
#ifdef IXGBE_FDIR
@@ -3081,7 +3208,7 @@ ixgbe_enable_intr(struct adapter *adapter)
IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
- /* With RSS we use auto clear */
+ /* With MSI-X we use auto clear */
if (adapter->msix_mem) {
mask = IXGBE_EIMS_ENABLE_MASK;
/* Don't autoclear Link */
@@ -3135,10 +3262,12 @@ ixgbe_get_slot_info(struct ixgbe_hw *hw)
if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
ixgbe_get_bus_info(hw);
/* These devices don't use PCI-E */
- if (hw->mac.type == ixgbe_mac_X550EM_x
- || hw->mac.type == ixgbe_mac_X550EM_a)
+ switch (hw->mac.type) {
+ case ixgbe_mac_X550EM_x:
return;
- goto display;
+ default:
+ goto display;
+ }
}
/*
@@ -3260,7 +3389,6 @@ ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
case ixgbe_mac_82599EB:
case ixgbe_mac_X540:
case ixgbe_mac_X550:
- case ixgbe_mac_X550EM_a:
case ixgbe_mac_X550EM_x:
if (type == -1) { /* MISC IVAR */
index = (entry & 1) * 8;
@@ -3289,8 +3417,14 @@ ixgbe_configure_ivars(struct adapter *adapter)
if (ixgbe_max_interrupt_rate > 0)
newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
- else
+ else {
+ /*
+ ** Disable DMA coalescing if interrupt moderation is
+ ** disabled.
+ */
+ adapter->dmac = 0;
newitr = 0;
+ }
for (int i = 0; i < adapter->num_queues; i++, que++) {
/* First the RX queue entry */
@@ -3350,7 +3484,7 @@ ixgbe_handle_link(void *context, int pending)
ixgbe_check_link(&adapter->hw,
&adapter->link_speed, &adapter->link_up, 0);
- ixgbe_update_link_status(adapter);
+ ixgbe_update_link_status(adapter);
}
/*
@@ -3410,6 +3544,28 @@ ixgbe_handle_msf(void *context, int pending)
return;
}
+/*
+** Tasklet for handling interrupts from an external PHY
+*/
+static void
+ixgbe_handle_phy(void *context, int pending)
+{
+ struct adapter *adapter = context;
+ struct ixgbe_hw *hw = &adapter->hw;
+ int error;
+
+ error = hw->phy.ops.handle_lasi(hw);
+ if (error == IXGBE_ERR_OVERTEMP)
+ device_printf(adapter->dev,
+ "CRITICAL: EXTERNAL PHY OVER TEMP!! "
+ " PHY will downshift to lower power state!\n");
+ else if (error)
+ device_printf(adapter->dev,
+ "Error handling LASI interrupt: %d\n",
+ error);
+ return;
+}
+
#ifdef IXGBE_FDIR
/*
** Tasklet for reinitializing the Flow Director filter table
@@ -3432,6 +3588,127 @@ ixgbe_reinit_fdir(void *context, int pending)
}
#endif
+/*********************************************************************
+ *
+ * Configure DMA Coalescing
+ *
+ **********************************************************************/
+static void
+ixgbe_config_dmac(struct adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct ixgbe_dmac_config *dcfg = &hw->mac.dmac_config;
+
+ if (hw->mac.type < ixgbe_mac_X550 ||
+ !hw->mac.ops.dmac_config)
+ return;
+
+ if (dcfg->watchdog_timer ^ adapter->dmac ||
+ dcfg->link_speed ^ adapter->link_speed) {
+ dcfg->watchdog_timer = adapter->dmac;
+ dcfg->fcoe_en = false;
+ dcfg->link_speed = adapter->link_speed;
+ dcfg->num_tcs = 1;
+
+ INIT_DEBUGOUT2("dmac settings: watchdog %d, link speed %d\n",
+ dcfg->watchdog_timer, dcfg->link_speed);
+
+ hw->mac.ops.dmac_config(hw);
+ }
+}
+
+/*
+ * Checks whether the adapter supports Energy Efficient Ethernet
+ * or not, based on device ID.
+ */
+static void
+ixgbe_check_eee_support(struct adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+
+ adapter->eee_support = adapter->eee_enabled =
+ (hw->device_id == IXGBE_DEV_ID_X550T ||
+ hw->device_id == IXGBE_DEV_ID_X550EM_X_KR);
+}
+
+/*
+ * Checks whether the adapter's ports are capable of
+ * Wake On LAN by reading the adapter's NVM.
+ *
+ * Sets each port's hw->wol_enabled value depending
+ * on the value read here.
+ */
+static void
+ixgbe_check_wol_support(struct adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ u16 dev_caps = 0;
+
+ /* Find out WoL support for port */
+ adapter->wol_support = hw->wol_enabled = 0;
+ ixgbe_get_device_caps(hw, &dev_caps);
+ if ((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0_1) ||
+ ((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0) &&
+ hw->bus.func == 0))
+ adapter->wol_support = hw->wol_enabled = 1;
+
+ /* Save initial wake up filter configuration */
+ adapter->wufc = IXGBE_READ_REG(hw, IXGBE_WUFC);
+
+ return;
+}
+
+/*
+ * Prepare the adapter/port for LPLU and/or WoL
+ */
+static int
+ixgbe_setup_low_power_mode(struct adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ device_t dev = adapter->dev;
+ s32 error = 0;
+
+ mtx_assert(&adapter->core_mtx, MA_OWNED);
+
+ /* Limit power management flow to X550EM baseT */
+ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T
+ && hw->phy.ops.enter_lplu) {
+ /* Turn off support for APM wakeup. (Using ACPI instead) */
+ IXGBE_WRITE_REG(hw, IXGBE_GRC,
+ IXGBE_READ_REG(hw, IXGBE_GRC) & ~(u32)2);
+
+ /*
+ * Clear Wake Up Status register to prevent any previous wakeup
+ * events from waking us up immediately after we suspend.
+ */
+ IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff);
+
+ /*
+ * Program the Wakeup Filter Control register with user filter
+ * settings
+ */
+ IXGBE_WRITE_REG(hw, IXGBE_WUFC, adapter->wufc);
+
+ /* Enable wakeups and power management in Wakeup Control */
+ IXGBE_WRITE_REG(hw, IXGBE_WUC,
+ IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN);
+
+ /* X550EM baseT adapters need a special LPLU flow */
+ hw->phy.reset_disable = true;
+ ixgbe_stop(adapter);
+ error = hw->phy.ops.enter_lplu(hw);
+ if (error)
+ device_printf(dev,
+ "Error entering LPLU: %d\n", error);
+ hw->phy.reset_disable = false;
+ } else {
+ /* Just stop for other adapters */
+ ixgbe_stop(adapter);
+ }
+
+ return error;
+}
+
/**********************************************************************
*
* Update the board statistics counters.
@@ -3449,42 +3726,6 @@ ixgbe_update_stats_counters(struct adapter *adapter)
adapter->stats.pf.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
adapter->stats.pf.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
- /*
- ** Note: these are for the 8 possible traffic classes,
- ** which in current implementation is unused,
- ** therefore only 0 should read real data.
- */
- for (int i = 0; i < 8; i++) {
- u32 mp;
- mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
- /* missed_rx tallies misses for the gprc workaround */
- missed_rx += mp;
- /* global total per queue */
- adapter->stats.pf.mpc[i] += mp;
- /* total for stats display */
- total_missed_rx += adapter->stats.pf.mpc[i];
- if (hw->mac.type == ixgbe_mac_82598EB) {
- adapter->stats.pf.rnbc[i] +=
- IXGBE_READ_REG(hw, IXGBE_RNBC(i));
- adapter->stats.pf.qbtc[i] +=
- IXGBE_READ_REG(hw, IXGBE_QBTC(i));
- adapter->stats.pf.qbrc[i] +=
- IXGBE_READ_REG(hw, IXGBE_QBRC(i));
- adapter->stats.pf.pxonrxc[i] +=
- IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
- } else
- adapter->stats.pf.pxonrxc[i] +=
- IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
- adapter->stats.pf.pxontxc[i] +=
- IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
- adapter->stats.pf.pxofftxc[i] +=
- IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
- if (hw->mac.type != ixgbe_mac_X550EM_x)
- adapter->stats.pf.pxoffrxc[i] +=
- IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
- adapter->stats.pf.pxon2offc[i] +=
- IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
- }
for (int i = 0; i < 16; i++) {
adapter->stats.pf.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
adapter->stats.pf.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
@@ -3592,6 +3833,8 @@ static uint64_t
ixgbe_get_counter(struct ifnet *ifp, ift_counter cnt)
{
struct adapter *adapter;
+ struct tx_ring *txr;
+ uint64_t rv;
adapter = if_getsoftc(ifp);
@@ -3612,6 +3855,12 @@ ixgbe_get_counter(struct ifnet *ifp, ift_counter cnt)
return (0);
case IFCOUNTER_IQDROPS:
return (adapter->iqdrops);
+ case IFCOUNTER_OQDROPS:
+ rv = 0;
+ txr = adapter->tx_rings;
+ for (int i = 0; i < adapter->num_queues; i++, txr++)
+ rv += txr->br->br_drops;
+ return (rv);
case IFCOUNTER_IERRORS:
return (adapter->ierrors);
default:
@@ -3720,6 +3969,108 @@ ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
return 0;
}
+static void
+ixgbe_add_device_sysctls(struct adapter *adapter)
+{
+ device_t dev = adapter->dev;
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct sysctl_oid_list *child;
+ struct sysctl_ctx_list *ctx;
+
+ ctx = device_get_sysctl_ctx(dev);
+ child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+
+ /* Sysctls for all devices */
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "fc",
+ CTLTYPE_INT | CTLFLAG_RW, adapter, 0,
+ ixgbe_set_flowcntl, "I", IXGBE_SYSCTL_DESC_SET_FC);
+
+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "enable_aim",
+ CTLFLAG_RW,
+ &ixgbe_enable_aim, 1, "Interrupt Moderation");
+
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "advertise_speed",
+ CTLTYPE_INT | CTLFLAG_RW, adapter, 0,
+ ixgbe_set_advertise, "I", IXGBE_SYSCTL_DESC_ADV_SPEED);
+
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "thermal_test",
+ CTLTYPE_INT | CTLFLAG_RW, adapter, 0,
+ ixgbe_sysctl_thermal_test, "I", "Thermal Test");
+
+ /* for X550 devices */
+ if (hw->mac.type >= ixgbe_mac_X550)
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "dmac",
+ CTLTYPE_INT | CTLFLAG_RW, adapter, 0,
+ ixgbe_sysctl_dmac, "I", "DMA Coalesce");
+
+ /* for X550T and X550EM backplane devices */
+ if (hw->device_id == IXGBE_DEV_ID_X550T ||
+ hw->device_id == IXGBE_DEV_ID_X550EM_X_KR) {
+ struct sysctl_oid *eee_node;
+ struct sysctl_oid_list *eee_list;
+
+ eee_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eee",
+ CTLFLAG_RD, NULL,
+ "Energy Efficient Ethernet sysctls");
+ eee_list = SYSCTL_CHILDREN(eee_node);
+
+ SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "enable",
+ CTLTYPE_INT | CTLFLAG_RW, adapter, 0,
+ ixgbe_sysctl_eee_enable, "I",
+ "Enable or Disable EEE");
+
+ SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "negotiated",
+ CTLTYPE_INT | CTLFLAG_RD, adapter, 0,
+ ixgbe_sysctl_eee_negotiated, "I",
+ "EEE negotiated on link");
+
+ SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "tx_lpi_status",
+ CTLTYPE_INT | CTLFLAG_RD, adapter, 0,
+ ixgbe_sysctl_eee_tx_lpi_status, "I",
+ "Whether or not TX link is in LPI state");
+
+ SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "rx_lpi_status",
+ CTLTYPE_INT | CTLFLAG_RD, adapter, 0,
+ ixgbe_sysctl_eee_rx_lpi_status, "I",
+ "Whether or not RX link is in LPI state");
+ }
+
+ /* for certain 10GBaseT devices */
+ if (hw->device_id == IXGBE_DEV_ID_X550T ||
+ hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) {
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "wol_enable",
+ CTLTYPE_INT | CTLFLAG_RW, adapter, 0,
+ ixgbe_sysctl_wol_enable, "I",
+ "Enable/Disable Wake on LAN");
+
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "wufc",
+ CTLTYPE_INT | CTLFLAG_RW, adapter, 0,
+ ixgbe_sysctl_wufc, "I",
+ "Enable/Disable Wake Up Filters");
+ }
+
+ /* for X550EM 10GBaseT devices */
+ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) {
+ struct sysctl_oid *phy_node;
+ struct sysctl_oid_list *phy_list;
+
+ phy_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "phy",
+ CTLFLAG_RD, NULL,
+ "External PHY sysctls");
+ phy_list = SYSCTL_CHILDREN(phy_node);
+
+ SYSCTL_ADD_PROC(ctx, phy_list, OID_AUTO, "temp",
+ CTLTYPE_INT | CTLFLAG_RD, adapter, 0,
+ ixgbe_sysctl_phy_temp, "I",
+ "Current External PHY Temperature (Celsius)");
+
+ SYSCTL_ADD_PROC(ctx, phy_list, OID_AUTO, "overtemp_occurred",
+ CTLTYPE_INT | CTLFLAG_RD, adapter, 0,
+ ixgbe_sysctl_phy_overtemp_occurred, "I",
+ "External PHY High Temperature Event Occurred");
+ }
+}
+
/*
* Add sysctl variables, one per statistic, to the system.
*/
@@ -3753,7 +4104,7 @@ ixgbe_add_hw_stats(struct adapter *adapter)
CTLFLAG_RD, &adapter->watchdog_events,
"Watchdog timeouts");
SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
- CTLFLAG_RD, &adapter->vector_irq,
+ CTLFLAG_RD, &adapter->link_irq,
"Link MSIX IRQ Handled");
for (int i = 0; i < adapter->num_queues; i++, txr++) {
@@ -3790,6 +4141,9 @@ ixgbe_add_hw_stats(struct adapter *adapter)
SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
CTLFLAG_RD, &txr->total_packets,
"Queue Packets Transmitted");
+ SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "br_drops",
+ CTLFLAG_RD, &txr->br->br_drops,
+ "Packets dropped in buf_ring");
}
for (int i = 0; i < adapter->num_queues; i++, rxr++) {
@@ -4082,20 +4436,77 @@ ixgbe_set_advertise(SYSCTL_HANDLER_ARGS)
}
/*
-** Thermal Shutdown Trigger
-** - cause a Thermal Overtemp IRQ
-** - this now requires firmware enabling
-*/
+ * The following two sysctls are for X550 BaseT devices;
+ * they deal with the external PHY used in them.
+ */
static int
-ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS)
+ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS)
{
- int error, fire = 0;
struct adapter *adapter = (struct adapter *) arg1;
struct ixgbe_hw *hw = &adapter->hw;
+ u16 reg;
+ if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) {
+ device_printf(adapter->dev,
+ "Device has no supported external thermal sensor.\n");
+ return (ENODEV);
+ }
- if (hw->mac.type < ixgbe_mac_X540)
- return (0);
+ if (hw->phy.ops.read_reg(hw, IXGBE_PHY_CURRENT_TEMP,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+ &reg)) {
+ device_printf(adapter->dev,
+ "Error reading from PHY's current temperature register\n");
+ return (EAGAIN);
+ }
+
+ /* Shift temp for output */
+ reg = reg >> 8;
+
+ return (sysctl_handle_int(oidp, NULL, reg, req));
+}
+
+/*
+ * Reports whether the current PHY temperature is over
+ * the overtemp threshold.
+ * - This is reported directly from the PHY
+ */
+static int
+ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *adapter = (struct adapter *) arg1;
+ struct ixgbe_hw *hw = &adapter->hw;
+ u16 reg;
+
+ if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) {
+ device_printf(adapter->dev,
+ "Device has no supported external thermal sensor.\n");
+ return (ENODEV);
+ }
+
+ if (hw->phy.ops.read_reg(hw, IXGBE_PHY_OVERTEMP_STATUS,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+ &reg)) {
+ device_printf(adapter->dev,
+ "Error reading from PHY's temperature status register\n");
+ return (EAGAIN);
+ }
+
+ /* Get occurrence bit */
+ reg = !!(reg & 0x4000);
+ return (sysctl_handle_int(oidp, 0, reg, req));
+}
+
+/*
+** Thermal Shutdown Trigger (internal MAC)
+** - Set this to 1 to cause an overtemp event to occur
+*/
+static int
+ixgbe_sysctl_thermal_test(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *adapter = (struct adapter *) arg1;
+ struct ixgbe_hw *hw = &adapter->hw;
+ int error, fire = 0;
error = sysctl_handle_int(oidp, &fire, 0, req);
if ((error) || (req->newptr == NULL))
@@ -4111,6 +4522,223 @@ ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS)
}
/*
+** Manage DMA Coalescing.
+** Control values:
+** 0/1 - off / on (use default value of 1000)
+**
+** Legal timer values are:
+** 50,100,250,500,1000,2000,5000,10000
+**
+** Turning off interrupt moderation will also turn this off.
+*/
+static int
+ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *adapter = (struct adapter *) arg1;
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct ifnet *ifp = adapter->ifp;
+ int error;
+ u16 oldval;
+
+ oldval = adapter->dmac;
+ error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
+ if ((error) || (req->newptr == NULL))
+ return (error);
+
+ switch (hw->mac.type) {
+ case ixgbe_mac_X550:
+ case ixgbe_mac_X550EM_x:
+ break;
+ default:
+ device_printf(adapter->dev,
+ "DMA Coalescing is only supported on X550 devices\n");
+ return (ENODEV);
+ }
+
+ switch (adapter->dmac) {
+ case 0:
+ /* Disabled */
+ break;
+ case 1: /* Enable and use default */
+ adapter->dmac = 1000;
+ break;
+ case 50:
+ case 100:
+ case 250:
+ case 500:
+ case 1000:
+ case 2000:
+ case 5000:
+ case 10000:
+ /* Legal values - allow */
+ break;
+ default:
+ /* Do nothing, illegal value */
+ adapter->dmac = oldval;
+ return (EINVAL);
+ }
+
+ /* Re-initialize hardware if it's already running */
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ ixgbe_init(adapter);
+
+ return (0);
+}
+
+/*
+ * Sysctl to enable/disable the WoL capability, if supported by the adapter.
+ * Values:
+ * 0 - disabled
+ * 1 - enabled
+ */
+static int
+ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *adapter = (struct adapter *) arg1;
+ struct ixgbe_hw *hw = &adapter->hw;
+ int new_wol_enabled;
+ int error = 0;
+
+ new_wol_enabled = hw->wol_enabled;
+ error = sysctl_handle_int(oidp, &new_wol_enabled, 0, req);
+ if ((error) || (req->newptr == NULL))
+ return (error);
+ if (new_wol_enabled == hw->wol_enabled)
+ return (0);
+
+ if (new_wol_enabled > 0 && !adapter->wol_support)
+ return (ENODEV);
+ else
+ hw->wol_enabled = !!(new_wol_enabled);
+
+ return (0);
+}
+
+/*
+ * Sysctl to enable/disable the Energy Efficient Ethernet capability,
+ * if supported by the adapter.
+ * Values:
+ * 0 - disabled
+ * 1 - enabled
+ */
+static int
+ixgbe_sysctl_eee_enable(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *adapter = (struct adapter *) arg1;
+ struct ifnet *ifp = adapter->ifp;
+ int new_eee_enabled, error = 0;
+
+ new_eee_enabled = adapter->eee_enabled;
+ error = sysctl_handle_int(oidp, &new_eee_enabled, 0, req);
+ if ((error) || (req->newptr == NULL))
+ return (error);
+ if (new_eee_enabled == adapter->eee_enabled)
+ return (0);
+
+ if (new_eee_enabled > 0 && !adapter->eee_support)
+ return (ENODEV);
+ else
+ adapter->eee_enabled = !!(new_eee_enabled);
+
+ /* Re-initialize hardware if it's already running */
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ ixgbe_init(adapter);
+
+ return (0);
+}
+
+/*
+ * Read-only sysctl indicating whether EEE support was negotiated
+ * on the link.
+ */
+static int
+ixgbe_sysctl_eee_negotiated(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *adapter = (struct adapter *) arg1;
+ struct ixgbe_hw *hw = &adapter->hw;
+ bool status;
+
+ status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) & IXGBE_EEE_STAT_NEG);
+
+ return (sysctl_handle_int(oidp, 0, status, req));
+}
+
+/*
+ * Read-only sysctl indicating whether RX Link is in LPI state.
+ */
+static int
+ixgbe_sysctl_eee_rx_lpi_status(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *adapter = (struct adapter *) arg1;
+ struct ixgbe_hw *hw = &adapter->hw;
+ bool status;
+
+ status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) &
+ IXGBE_EEE_RX_LPI_STATUS);
+
+ return (sysctl_handle_int(oidp, 0, status, req));
+}
+
+/*
+ * Read-only sysctl indicating whether TX Link is in LPI state.
+ */
+static int
+ixgbe_sysctl_eee_tx_lpi_status(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *adapter = (struct adapter *) arg1;
+ struct ixgbe_hw *hw = &adapter->hw;
+ bool status;
+
+ status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) &
+ IXGBE_EEE_TX_LPI_STATUS);
+
+ return (sysctl_handle_int(oidp, 0, status, req));
+}
+
+/*
+ * Sysctl to enable/disable the types of packets that the
+ * adapter will wake up on upon receipt.
+ * WUFC - Wake Up Filter Control
+ * Flags:
+ * 0x1 - Link Status Change
+ * 0x2 - Magic Packet
+ * 0x4 - Direct Exact
+ * 0x8 - Directed Multicast
+ * 0x10 - Broadcast
+ * 0x20 - ARP/IPv4 Request Packet
+ * 0x40 - Direct IPv4 Packet
+ * 0x80 - Direct IPv6 Packet
+ *
+ * Setting another flag will cause the sysctl to return an
+ * error.
+ */
+static int
+ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *adapter = (struct adapter *) arg1;
+ int error = 0;
+ u32 new_wufc;
+
+ new_wufc = adapter->wufc;
+
+ error = sysctl_handle_int(oidp, &new_wufc, 0, req);
+ if ((error) || (req->newptr == NULL))
+ return (error);
+ if (new_wufc == adapter->wufc)
+ return (0);
+
+ if (new_wufc & 0xffffff00)
+ return (EINVAL);
+ else {
+ new_wufc &= 0xff;
+ new_wufc |= (0xffffff & adapter->wufc);
+ adapter->wufc = new_wufc;
+ }
+
+ return (0);
+}
+
+/*
** Enable the hardware to drop packets when the buffer is
** full. This is useful when multiqueue,so that no single
** queue being full stalls the entire RX engine. We only
@@ -4154,6 +4782,7 @@ ixgbe_rearm_queues(struct adapter *adapter, u64 queues)
case ixgbe_mac_82599EB:
case ixgbe_mac_X540:
case ixgbe_mac_X550:
+ case ixgbe_mac_X550EM_x:
mask = (queues & 0xFFFFFFFF);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
mask = (queues >> 32);
diff --git a/sys/dev/ixgbe/if_ixv.c b/sys/dev/ixgbe/if_ixv.c
index 672077b..a550a85 100644
--- a/sys/dev/ixgbe/if_ixv.c
+++ b/sys/dev/ixgbe/if_ixv.c
@@ -60,7 +60,6 @@ static ixgbe_vendor_info_t ixv_vendor_info_array[] =
{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_VF, 0, 0, 0},
{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540_VF, 0, 0, 0},
{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550_VF, 0, 0, 0},
- {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_VF, 0, 0, 0},
{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_VF, 0, 0, 0},
/* required last entry */
{0, 0, 0, 0, 0}
@@ -882,7 +881,7 @@ ixv_msix_mbx(void *arg)
struct ixgbe_hw *hw = &adapter->hw;
u32 reg;
- ++adapter->vector_irq;
+ ++adapter->link_irq;
/* First get the cause */
reg = IXGBE_READ_REG(hw, IXGBE_VTEICS);
@@ -2034,8 +2033,8 @@ ixv_add_stats_sysctls(struct adapter *adapter)
SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
CTLFLAG_RD, &(txr->total_packets),
"TX Packets");
- SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_bytes",
- CTLFLAG_RD, &(txr->tx_bytes),
+ SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "tx_bytes",
+ CTLFLAG_RD, &(txr->bytes), 0,
"TX Bytes");
SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_no_desc",
CTLFLAG_RD, &(txr->no_desc_avail),
@@ -2083,7 +2082,7 @@ ixv_print_debug_info(struct adapter *adapter)
}
device_printf(dev,"MBX IRQ Handled: %lu\n",
- (long)adapter->vector_irq);
+ (long)adapter->link_irq);
return;
}
diff --git a/sys/dev/ixgbe/ix_txrx.c b/sys/dev/ixgbe/ix_txrx.c
index 6c9c84a..bfe1607 100644
--- a/sys/dev/ixgbe/ix_txrx.c
+++ b/sys/dev/ixgbe/ix_txrx.c
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -210,7 +210,11 @@ ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
* If everything is setup correctly, it should be the
* same bucket that the current CPU we're on is.
*/
+#if __FreeBSD_version < 1100054
+ if (m->m_flags & M_FLOWID) {
+#else
if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
+#endif
#ifdef RSS
if (rss_hash2bucket(m->m_pkthdr.flowid,
M_HASHTYPE_GET(m), &bucket_id) == 0)
@@ -276,7 +280,12 @@ ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
enqueued++;
#if 0 // this is VF-only
#if __FreeBSD_version >= 1100036
- if (next->m_flags & M_MCAST)
+ /*
+ * Since we're looking at the tx ring, we can check
+ * to see if we're a VF by examing our tail register
+ * address.
+ */
+ if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
#endif
#endif
@@ -312,8 +321,8 @@ ixgbe_deferred_mq_start(void *arg, int pending)
}
/*
-** Flush all ring buffers
-*/
+ * Flush all ring buffers
+ */
void
ixgbe_qflush(struct ifnet *ifp)
{
@@ -387,6 +396,10 @@ retry:
/* Try it again? - one try */
if (remap == TRUE) {
remap = FALSE;
+ /*
+ * XXX: m_defrag will choke on
+ * non-MCLBYTES-sized clusters
+ */
m = m_defrag(*m_headp, M_NOWAIT);
if (m == NULL) {
adapter->mbuf_defrag_failed++;
@@ -418,9 +431,9 @@ retry:
m_head = *m_headp;
/*
- ** Set up the appropriate offload context
- ** this will consume the first descriptor
- */
+ * Set up the appropriate offload context
+ * this will consume the first descriptor
+ */
error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
if (__predict_false(error)) {
if (error == ENOBUFS)
@@ -439,7 +452,6 @@ retry:
}
#endif
- olinfo_status |= IXGBE_ADVTXD_CC;
i = txr->next_avail_desc;
for (j = 0; j < nsegs; j++) {
bus_size_t seglen;
@@ -466,11 +478,11 @@ retry:
txbuf->m_head = m_head;
/*
- ** Here we swap the map so the last descriptor,
- ** which gets the completion interrupt has the
- ** real map, and the first descriptor gets the
- ** unused map from this descriptor.
- */
+ * Here we swap the map so the last descriptor,
+ * which gets the completion interrupt has the
+ * real map, and the first descriptor gets the
+ * unused map from this descriptor.
+ */
txr->tx_buffers[first].map = txbuf->map;
txbuf->map = map;
bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
@@ -493,7 +505,6 @@ retry:
txr->busy = 1;
return (0);
-
}
@@ -732,6 +743,7 @@ static int
ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
u32 *cmd_type_len, u32 *olinfo_status)
{
+ struct adapter *adapter = txr->adapter;
struct ixgbe_adv_tx_context_desc *TXD;
struct ether_vlan_header *eh;
struct ip *ip;
@@ -766,6 +778,8 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
vtag = htole16(mp->m_pkthdr.ether_vtag);
vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
}
+ else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
+ return (0);
/*
* Determine where frame payload starts.
@@ -1727,9 +1741,6 @@ ixgbe_rx_discard(struct rx_ring *rxr, int i)
* the mbufs in the descriptor and sends data which has been
* dma'ed into host memory to upper layer.
*
- * We loop at most count times if count is > 0, or until done if
- * count < 0.
- *
* Return TRUE for more work, FALSE for all clean.
*********************************************************************/
bool
@@ -1792,10 +1803,9 @@ ixgbe_rxeof(struct ix_queue *que)
/* Make sure bad packets are discarded */
if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
-#if 0 // VF-only
#if __FreeBSD_version >= 1100036
- if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
-#endif
+ if (IXGBE_IS_VF(adapter))
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
#endif
rxr->rx_discarded++;
ixgbe_rx_discard(rxr, i);
@@ -1906,6 +1916,9 @@ ixgbe_rxeof(struct ix_queue *que)
#ifdef RSS
sendmp->m_pkthdr.flowid =
le32toh(cur->wb.lower.hi_dword.rss);
+#if __FreeBSD_version < 1100054
+ sendmp->m_flags |= M_FLOWID;
+#endif
switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
@@ -1939,7 +1952,11 @@ ixgbe_rxeof(struct ix_queue *que)
}
#else /* RSS */
sendmp->m_pkthdr.flowid = que->msix;
+#if __FreeBSD_version >= 1100054
M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
+#else
+ sendmp->m_flags |= M_FLOWID;
+#endif
#endif /* RSS */
#endif /* FreeBSD_version */
}
diff --git a/sys/dev/ixgbe/ixgbe.h b/sys/dev/ixgbe/ixgbe.h
index 813ee4f..44e7c3c 100644
--- a/sys/dev/ixgbe/ixgbe.h
+++ b/sys/dev/ixgbe/ixgbe.h
@@ -90,8 +90,11 @@
#include <sys/pcpu.h>
#include <sys/smp.h>
#include <machine/smp.h>
+#include <sys/sbuf.h>
#include "ixgbe_api.h"
+#include "ixgbe_common.h"
+#include "ixgbe_phy.h"
#include "ixgbe_vf.h"
/* Tunables */
@@ -146,7 +149,11 @@
#define IXGBE_TX_CLEANUP_THRESHOLD (adapter->num_tx_desc / 8)
#define IXGBE_TX_OP_THRESHOLD (adapter->num_tx_desc / 32)
-#define IXGBE_MAX_FRAME_SIZE 0x3F00
+/* These defines are used in MTU calculations */
+#define IXGBE_MAX_FRAME_SIZE 9728
+#define IXGBE_MTU_HDR (ETHER_HDR_LEN + ETHER_CRC_LEN + \
+ ETHER_VLAN_ENCAP_LEN)
+#define IXGBE_MAX_MTU (IXGBE_MAX_FRAME_SIZE - IXGBE_MTU_HDR)
/* Flow control constants */
#define IXGBE_FC_PAUSE 0xFFFF
@@ -227,6 +234,17 @@
#define IXGBE_BULK_LATENCY 1200
#define IXGBE_LINK_ITR 2000
+/* MAC type macros */
+#define IXGBE_IS_X550VF(_adapter) \
+ ((_adapter->hw.mac.type == ixgbe_mac_X550_vf) || \
+ (_adapter->hw.mac.type == ixgbe_mac_X550EM_x_vf))
+
+#define IXGBE_IS_VF(_adapter) \
+ (IXGBE_IS_X550VF(_adapter) || \
+ (_adapter->hw.mac.type == ixgbe_mac_X540_vf) || \
+ (_adapter->hw.mac.type == ixgbe_mac_82599_vf))
+
+
/*
*****************************************************************************
* vendor_info_array
@@ -323,8 +341,8 @@ struct tx_ring {
u32 bytes; /* used for AIM */
u32 packets;
/* Soft Stats */
- u64 tx_bytes;
unsigned long tso_tx;
+ unsigned long no_tx_map_avail;
unsigned long no_tx_dma_setup;
u64 no_desc_avail;
u64 total_packets;
@@ -419,6 +437,13 @@ struct adapter {
u32 link_speed;
bool link_up;
u32 vector;
+ u16 dmac;
+ bool eee_support;
+ bool eee_enabled;
+
+ /* Power management-related */
+ bool wol_support;
+ u32 wufc;
/* Mbuf cluster size */
u32 rx_mbuf_sz;
@@ -432,6 +457,7 @@ struct adapter {
int fdir_reinit;
struct task fdir_task;
#endif
+ struct task phy_task; /* PHY intr tasklet */
struct taskqueue *tq;
/*
@@ -467,7 +493,7 @@ struct adapter {
unsigned long mbuf_header_failed;
unsigned long mbuf_packet_failed;
unsigned long watchdog_events;
- unsigned long vector_irq;
+ unsigned long link_irq;
union {
struct ixgbe_hw_stats pf;
struct ixgbevf_hw_stats vf;
@@ -540,12 +566,17 @@ struct adapter {
#define IXGBE_SET_IQDROPS(sc, count) (sc)->ifp->if_iqdrops = (count)
#endif
+/* External PHY register addresses */
+#define IXGBE_PHY_CURRENT_TEMP 0xC820
+#define IXGBE_PHY_OVERTEMP_STATUS 0xC830
+
/* Sysctl help messages; displayed with sysctl -d */
#define IXGBE_SYSCTL_DESC_ADV_SPEED \
"\nControl advertised link speed using these flags:\n" \
"\t0x1 - advertise 100M\n" \
"\t0x2 - advertise 1G\n" \
- "\t0x4 - advertise 10G"
+ "\t0x4 - advertise 10G\n\n" \
+ "\t100M is only supported on certain 10GBaseT adapters.\n"
#define IXGBE_SYSCTL_DESC_SET_FC \
"\nSet flow control mode using these values:\n" \
diff --git a/sys/dev/ixgbe/ixgbe_82598.c b/sys/dev/ixgbe/ixgbe_82598.c
index d9b8985..46e64c5 100644
--- a/sys/dev/ixgbe/ixgbe_82598.c
+++ b/sys/dev/ixgbe/ixgbe_82598.c
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -260,6 +260,8 @@ s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw)
DEBUGFUNC("ixgbe_start_hw_82598");
ret_val = ixgbe_start_hw_generic(hw);
+ if (ret_val)
+ return ret_val;
/* Disable relaxed ordering */
for (i = 0; ((i < hw->mac.max_tx_queues) &&
@@ -278,8 +280,7 @@ s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw)
}
/* set the completion timeout for interface */
- if (ret_val == IXGBE_SUCCESS)
- ixgbe_set_pcie_completion_timeout(hw);
+ ixgbe_set_pcie_completion_timeout(hw);
return ret_val;
}
diff --git a/sys/dev/ixgbe/ixgbe_82598.h b/sys/dev/ixgbe/ixgbe_82598.h
index 621be41..d2241c7 100644
--- a/sys/dev/ixgbe/ixgbe_82598.h
+++ b/sys/dev/ixgbe/ixgbe_82598.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/sys/dev/ixgbe/ixgbe_82599.c b/sys/dev/ixgbe/ixgbe_82599.c
index d49d851..b38620f 100644
--- a/sys/dev/ixgbe/ixgbe_82599.c
+++ b/sys/dev/ixgbe/ixgbe_82599.c
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -421,6 +421,8 @@ s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw,
/* Check if 1G SFP module. */
if (hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 ||
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1 ||
hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1) {
*speed = IXGBE_LINK_SPEED_1GB_FULL;
@@ -1803,7 +1805,6 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
switch (hw->mac.type) {
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
- case ixgbe_mac_X550EM_a:
IXGBE_WRITE_REG(hw, IXGBE_FDIRSCTPM, ~fdirtcpm);
break;
default:
@@ -2465,7 +2466,6 @@ reset_pipeline_out:
return ret_val;
}
-
/**
* ixgbe_read_i2c_byte_82599 - Reads 8 bit word over I2C
* @hw: pointer to hardware structure
diff --git a/sys/dev/ixgbe/ixgbe_82599.h b/sys/dev/ixgbe/ixgbe_82599.h
index 8c973ac..bcfb043 100644
--- a/sys/dev/ixgbe/ixgbe_82599.h
+++ b/sys/dev/ixgbe/ixgbe_82599.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/sys/dev/ixgbe/ixgbe_api.c b/sys/dev/ixgbe/ixgbe_api.c
index 2535664..9784e3c 100644
--- a/sys/dev/ixgbe/ixgbe_api.c
+++ b/sys/dev/ixgbe/ixgbe_api.c
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -35,6 +35,22 @@
#include "ixgbe_api.h"
#include "ixgbe_common.h"
+static const u32 ixgbe_mvals_base[IXGBE_MVALS_IDX_LIMIT] = {
+ IXGBE_MVALS_INIT()
+};
+
+static const u32 ixgbe_mvals_X540[IXGBE_MVALS_IDX_LIMIT] = {
+ IXGBE_MVALS_INIT(_X540)
+};
+
+static const u32 ixgbe_mvals_X550[IXGBE_MVALS_IDX_LIMIT] = {
+ IXGBE_MVALS_INIT(_X550)
+};
+
+static const u32 ixgbe_mvals_X550EM_x[IXGBE_MVALS_IDX_LIMIT] = {
+ IXGBE_MVALS_INIT(_X550EM_x)
+};
+
/**
* ixgbe_dcb_get_rtrup2tc - read rtrup2tc reg
* @hw: pointer to hardware structure
@@ -81,20 +97,16 @@ s32 ixgbe_init_shared_code(struct ixgbe_hw *hw)
case ixgbe_mac_X540:
status = ixgbe_init_ops_X540(hw);
break;
-#if 0 //JFV temporary disable
case ixgbe_mac_X550:
status = ixgbe_init_ops_X550(hw);
break;
case ixgbe_mac_X550EM_x:
- case ixgbe_mac_X550EM_a:
status = ixgbe_init_ops_X550EM(hw);
break;
-#endif
case ixgbe_mac_82599_vf:
case ixgbe_mac_X540_vf:
case ixgbe_mac_X550_vf:
case ixgbe_mac_X550EM_x_vf:
- case ixgbe_mac_X550EM_a_vf:
status = ixgbe_init_ops_vf(hw);
break;
default:
@@ -124,6 +136,8 @@ s32 ixgbe_set_mac_type(struct ixgbe_hw *hw)
return IXGBE_ERR_DEVICE_NOT_SUPPORTED;
}
+ hw->mvals = ixgbe_mvals_base;
+
switch (hw->device_id) {
case IXGBE_DEV_ID_82598:
case IXGBE_DEV_ID_82598_BX:
@@ -164,14 +178,17 @@ s32 ixgbe_set_mac_type(struct ixgbe_hw *hw)
case IXGBE_DEV_ID_X540_VF:
case IXGBE_DEV_ID_X540_VF_HV:
hw->mac.type = ixgbe_mac_X540_vf;
+ hw->mvals = ixgbe_mvals_X540;
break;
case IXGBE_DEV_ID_X540T:
case IXGBE_DEV_ID_X540T1:
case IXGBE_DEV_ID_X540_BYPASS:
hw->mac.type = ixgbe_mac_X540;
+ hw->mvals = ixgbe_mvals_X540;
break;
case IXGBE_DEV_ID_X550T:
hw->mac.type = ixgbe_mac_X550;
+ hw->mvals = ixgbe_mvals_X550;
break;
case IXGBE_DEV_ID_X550EM_X_KX4:
case IXGBE_DEV_ID_X550EM_X_KR:
@@ -179,21 +196,17 @@ s32 ixgbe_set_mac_type(struct ixgbe_hw *hw)
case IXGBE_DEV_ID_X550EM_X_1G_T:
case IXGBE_DEV_ID_X550EM_X_SFP:
hw->mac.type = ixgbe_mac_X550EM_x;
- break;
- case IXGBE_DEV_ID_X550EM_A_KR:
- hw->mac.type = ixgbe_mac_X550EM_a;
+ hw->mvals = ixgbe_mvals_X550EM_x;
break;
case IXGBE_DEV_ID_X550_VF:
case IXGBE_DEV_ID_X550_VF_HV:
hw->mac.type = ixgbe_mac_X550_vf;
+ hw->mvals = ixgbe_mvals_X550;
break;
case IXGBE_DEV_ID_X550EM_X_VF:
case IXGBE_DEV_ID_X550EM_X_VF_HV:
hw->mac.type = ixgbe_mac_X550EM_x_vf;
- break;
- case IXGBE_DEV_ID_X550EM_A_VF:
- case IXGBE_DEV_ID_X550EM_A_VF_HV:
- hw->mac.type = ixgbe_mac_X550EM_a_vf;
+ hw->mvals = ixgbe_mvals_X550EM_x;
break;
default:
ret_val = IXGBE_ERR_DEVICE_NOT_SUPPORTED;
@@ -1283,6 +1296,23 @@ s32 ixgbe_enter_lplu(struct ixgbe_hw *hw)
}
/**
+ * ixgbe_handle_lasi - Handle external Base T PHY interrupt
+ * @hw: pointer to hardware structure
+ *
+ * Handle external Base T PHY interrupt. If high temperature
+ * failure alarm then return error, else if link status change
+ * then setup internal/external PHY link
+ *
+ * Return IXGBE_ERR_OVERTEMP if interrupt is high temperature
+ * failure alarm, else return PHY access status.
+ */
+s32 ixgbe_handle_lasi(struct ixgbe_hw *hw)
+{
+ return ixgbe_call_func(hw, hw->phy.ops.handle_lasi, (hw),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
* ixgbe_read_analog_reg8 - Reads 8 bit analog register
* @hw: pointer to hardware structure
* @reg: analog register to read
@@ -1340,6 +1370,23 @@ s32 ixgbe_read_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr,
}
/**
+ * ixgbe_read_i2c_byte_unlocked - Reads 8 bit word via I2C from device address
+ * @hw: pointer to hardware structure
+ * @byte_offset: byte offset to read
+ * @dev_addr: I2C bus address to read from
+ * @data: value read
+ *
+ * Performs byte read operation to SFP module's EEPROM over I2C interface.
+ **/
+s32 ixgbe_read_i2c_byte_unlocked(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 *data)
+{
+ return ixgbe_call_func(hw, hw->phy.ops.read_i2c_byte_unlocked,
+ (hw, byte_offset, dev_addr, data),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
* ixgbe_read_i2c_combined - Perform I2C read combined operation
* @hw: pointer to the hardware structure
* @addr: I2C bus address to read from
@@ -1355,6 +1402,23 @@ s32 ixgbe_read_i2c_combined(struct ixgbe_hw *hw, u8 addr, u16 reg, u16 *val)
}
/**
+ * ixgbe_read_i2c_combined_unlocked - Perform I2C read combined operation
+ * @hw: pointer to the hardware structure
+ * @addr: I2C bus address to read from
+ * @reg: I2C device register to read from
+ * @val: pointer to location to receive read value
+ *
+ * Returns an error code on error.
+ **/
+s32 ixgbe_read_i2c_combined_unlocked(struct ixgbe_hw *hw, u8 addr, u16 reg,
+ u16 *val)
+{
+ return ixgbe_call_func(hw, hw->phy.ops.read_i2c_combined_unlocked,
+ (hw, addr, reg, val),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
* ixgbe_write_i2c_byte - Writes 8 bit word over I2C
* @hw: pointer to hardware structure
* @byte_offset: byte offset to write
@@ -1372,6 +1436,24 @@ s32 ixgbe_write_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr,
}
/**
+ * ixgbe_write_i2c_byte_unlocked - Writes 8 bit word over I2C
+ * @hw: pointer to hardware structure
+ * @byte_offset: byte offset to write
+ * @dev_addr: I2C bus address to write to
+ * @data: value to write
+ *
+ * Performs byte write operation to SFP module's EEPROM over I2C interface
+ * at a specified device address.
+ **/
+s32 ixgbe_write_i2c_byte_unlocked(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 data)
+{
+ return ixgbe_call_func(hw, hw->phy.ops.write_i2c_byte_unlocked,
+ (hw, byte_offset, dev_addr, data),
+ IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
* ixgbe_write_i2c_combined - Perform I2C write combined operation
* @hw: pointer to the hardware structure
* @addr: I2C bus address to write to
@@ -1387,6 +1469,22 @@ s32 ixgbe_write_i2c_combined(struct ixgbe_hw *hw, u8 addr, u16 reg, u16 val)
}
/**
+ * ixgbe_write_i2c_combined_unlocked - Perform I2C write combined operation
+ * @hw: pointer to the hardware structure
+ * @addr: I2C bus address to write to
+ * @reg: I2C device register to write to
+ * @val: value to write
+ *
+ * Returns an error code on error.
+ **/
+s32 ixgbe_write_i2c_combined_unlocked(struct ixgbe_hw *hw, u8 addr, u16 reg,
+ u16 val)
+{
+ return ixgbe_call_func(hw, hw->phy.ops.write_i2c_combined_unlocked,
+ (hw, addr, reg, val), IXGBE_NOT_IMPLEMENTED);
+}
+
+/**
* ixgbe_write_i2c_eeprom - Writes 8 bit EEPROM word over I2C interface
* @hw: pointer to hardware structure
* @byte_offset: EEPROM byte offset to write
diff --git a/sys/dev/ixgbe/ixgbe_api.h b/sys/dev/ixgbe/ixgbe_api.h
index 650ae67..8c2c4a8 100644
--- a/sys/dev/ixgbe/ixgbe_api.h
+++ b/sys/dev/ixgbe/ixgbe_api.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -173,10 +173,18 @@ u32 ixgbe_atr_compute_sig_hash_82599(union ixgbe_atr_hash_dword input,
bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw);
s32 ixgbe_read_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr,
u8 *data);
+s32 ixgbe_read_i2c_byte_unlocked(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 *data);
s32 ixgbe_read_i2c_combined(struct ixgbe_hw *hw, u8 addr, u16 reg, u16 *val);
+s32 ixgbe_read_i2c_combined_unlocked(struct ixgbe_hw *hw, u8 addr, u16 reg,
+ u16 *val);
s32 ixgbe_write_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr,
u8 data);
+s32 ixgbe_write_i2c_byte_unlocked(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 data);
s32 ixgbe_write_i2c_combined(struct ixgbe_hw *hw, u8 addr, u16 reg, u16 val);
+s32 ixgbe_write_i2c_combined_unlocked(struct ixgbe_hw *hw, u8 addr, u16 reg,
+ u16 val);
s32 ixgbe_write_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 eeprom_data);
s32 ixgbe_get_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr);
s32 ixgbe_set_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr);
@@ -203,6 +211,7 @@ void ixgbe_enable_mdd(struct ixgbe_hw *hw);
void ixgbe_mdd_event(struct ixgbe_hw *hw, u32 *vf_bitmap);
void ixgbe_restore_mdd_vf(struct ixgbe_hw *hw, u32 vf);
s32 ixgbe_enter_lplu(struct ixgbe_hw *hw);
+s32 ixgbe_handle_lasi(struct ixgbe_hw *hw);
void ixgbe_set_rate_select_speed(struct ixgbe_hw *hw, ixgbe_link_speed speed);
void ixgbe_disable_rx(struct ixgbe_hw *hw);
void ixgbe_enable_rx(struct ixgbe_hw *hw);
diff --git a/sys/dev/ixgbe/ixgbe_common.c b/sys/dev/ixgbe/ixgbe_common.c
index 57fe1b5..f0a0776 100644
--- a/sys/dev/ixgbe/ixgbe_common.c
+++ b/sys/dev/ixgbe/ixgbe_common.c
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -188,6 +188,7 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
case IXGBE_DEV_ID_X540T1:
case IXGBE_DEV_ID_X540_BYPASS:
case IXGBE_DEV_ID_X550T:
+ case IXGBE_DEV_ID_X550EM_X_10G_T:
supported = TRUE;
break;
default:
@@ -1090,7 +1091,7 @@ s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw)
msec_delay(2);
/*
- * Prevent the PCI-E bus from from hanging by disabling PCI-E master
+ * Prevent the PCI-E bus from hanging by disabling PCI-E master
* access and verify no pending requests
*/
return ixgbe_disable_pcie_master(hw);
@@ -3573,7 +3574,6 @@ u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
- case ixgbe_mac_X550EM_a:
pcie_offset = IXGBE_PCIE_MSIX_82599_CAPS;
max_msix_count = IXGBE_MAX_MSIX_VECTORS_82599;
break;
diff --git a/sys/dev/ixgbe/ixgbe_common.h b/sys/dev/ixgbe/ixgbe_common.h
index 94c7f97..e685f5b 100644
--- a/sys/dev/ixgbe/ixgbe_common.h
+++ b/sys/dev/ixgbe/ixgbe_common.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/sys/dev/ixgbe/ixgbe_dcb.c b/sys/dev/ixgbe/ixgbe_dcb.c
index 6f848e7..3659d17 100644
--- a/sys/dev/ixgbe/ixgbe_dcb.c
+++ b/sys/dev/ixgbe/ixgbe_dcb.c
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -396,7 +396,6 @@ s32 ixgbe_dcb_get_tc_stats(struct ixgbe_hw *hw, struct ixgbe_hw_stats *stats,
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
- case ixgbe_mac_X550EM_a:
#if !defined(NO_82599_SUPPORT) || !defined(NO_X540_SUPPORT)
ret = ixgbe_dcb_get_tc_stats_82599(hw, stats, tc_count);
break;
@@ -427,7 +426,6 @@ s32 ixgbe_dcb_get_pfc_stats(struct ixgbe_hw *hw, struct ixgbe_hw_stats *stats,
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
- case ixgbe_mac_X550EM_a:
#if !defined(NO_82599_SUPPORT) || !defined(NO_X540_SUPPORT)
ret = ixgbe_dcb_get_pfc_stats_82599(hw, stats, tc_count);
break;
@@ -469,7 +467,6 @@ s32 ixgbe_dcb_config_rx_arbiter_cee(struct ixgbe_hw *hw,
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
- case ixgbe_mac_X550EM_a:
#if !defined(NO_82599_SUPPORT) || !defined(NO_X540_SUPPORT)
ret = ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwgid,
tsa, map);
@@ -511,7 +508,6 @@ s32 ixgbe_dcb_config_tx_desc_arbiter_cee(struct ixgbe_hw *hw,
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
- case ixgbe_mac_X550EM_a:
#if !defined(NO_82599_SUPPORT) || !defined(NO_X540_SUPPORT)
ret = ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max,
bwgid, tsa);
@@ -555,7 +551,6 @@ s32 ixgbe_dcb_config_tx_data_arbiter_cee(struct ixgbe_hw *hw,
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
- case ixgbe_mac_X550EM_a:
#if !defined(NO_82599_SUPPORT) || !defined(NO_X540_SUPPORT)
ret = ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max,
bwgid, tsa,
@@ -593,7 +588,6 @@ s32 ixgbe_dcb_config_pfc_cee(struct ixgbe_hw *hw,
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
- case ixgbe_mac_X550EM_a:
#if !defined(NO_82599_SUPPORT) || !defined(NO_X540_SUPPORT)
ret = ixgbe_dcb_config_pfc_82599(hw, pfc_en, map);
break;
@@ -622,7 +616,6 @@ s32 ixgbe_dcb_config_tc_stats(struct ixgbe_hw *hw)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
- case ixgbe_mac_X550EM_a:
#if !defined(NO_82599_SUPPORT) || !defined(NO_X540_SUPPORT)
ret = ixgbe_dcb_config_tc_stats_82599(hw, NULL);
break;
@@ -670,7 +663,6 @@ s32 ixgbe_dcb_hw_config_cee(struct ixgbe_hw *hw,
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
- case ixgbe_mac_X550EM_a:
#if !defined(NO_82599_SUPPORT) || !defined(NO_X540_SUPPORT)
ixgbe_dcb_config_82599(hw, dcb_config);
ret = ixgbe_dcb_hw_config_82599(hw, dcb_config->link_speed,
@@ -705,7 +697,6 @@ s32 ixgbe_dcb_config_pfc(struct ixgbe_hw *hw, u8 pfc_en, u8 *map)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
- case ixgbe_mac_X550EM_a:
#if !defined(NO_82599_SUPPORT) || !defined(NO_X540_SUPPORT)
ret = ixgbe_dcb_config_pfc_82599(hw, pfc_en, map);
break;
@@ -731,7 +722,6 @@ s32 ixgbe_dcb_hw_config(struct ixgbe_hw *hw, u16 *refill, u16 *max,
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
- case ixgbe_mac_X550EM_a:
#if !defined(NO_82599_SUPPORT) || !defined(NO_X540_SUPPORT)
ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
tsa, map);
diff --git a/sys/dev/ixgbe/ixgbe_dcb.h b/sys/dev/ixgbe/ixgbe_dcb.h
index 878bbf8..871b784 100644
--- a/sys/dev/ixgbe/ixgbe_dcb.h
+++ b/sys/dev/ixgbe/ixgbe_dcb.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/sys/dev/ixgbe/ixgbe_dcb_82598.c b/sys/dev/ixgbe/ixgbe_dcb_82598.c
index a5a090d..fb946c9 100644
--- a/sys/dev/ixgbe/ixgbe_dcb_82598.c
+++ b/sys/dev/ixgbe/ixgbe_dcb_82598.c
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/sys/dev/ixgbe/ixgbe_dcb_82598.h b/sys/dev/ixgbe/ixgbe_dcb_82598.h
index 47f19f6..35974f7 100644
--- a/sys/dev/ixgbe/ixgbe_dcb_82598.h
+++ b/sys/dev/ixgbe/ixgbe_dcb_82598.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/sys/dev/ixgbe/ixgbe_dcb_82599.c b/sys/dev/ixgbe/ixgbe_dcb_82599.c
index 0232d3c..4443411 100644
--- a/sys/dev/ixgbe/ixgbe_dcb_82599.c
+++ b/sys/dev/ixgbe/ixgbe_dcb_82599.c
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/sys/dev/ixgbe/ixgbe_dcb_82599.h b/sys/dev/ixgbe/ixgbe_dcb_82599.h
index 7702dc9..bab7628 100644
--- a/sys/dev/ixgbe/ixgbe_dcb_82599.h
+++ b/sys/dev/ixgbe/ixgbe_dcb_82599.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/sys/dev/ixgbe/ixgbe_mbx.c b/sys/dev/ixgbe/ixgbe_mbx.c
index 067bba0..d8ba55a 100644
--- a/sys/dev/ixgbe/ixgbe_mbx.c
+++ b/sys/dev/ixgbe/ixgbe_mbx.c
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -607,7 +607,6 @@ static s32 ixgbe_check_for_rst_pf(struct ixgbe_hw *hw, u16 vf_number)
break;
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
- case ixgbe_mac_X550EM_a:
case ixgbe_mac_X540:
vflre = IXGBE_READ_REG(hw, IXGBE_VFLREC(reg_offset));
break;
@@ -745,7 +744,6 @@ void ixgbe_init_mbx_params_pf(struct ixgbe_hw *hw)
if (hw->mac.type != ixgbe_mac_82599EB &&
hw->mac.type != ixgbe_mac_X550 &&
hw->mac.type != ixgbe_mac_X550EM_x &&
- hw->mac.type != ixgbe_mac_X550EM_a &&
hw->mac.type != ixgbe_mac_X540)
return;
diff --git a/sys/dev/ixgbe/ixgbe_mbx.h b/sys/dev/ixgbe/ixgbe_mbx.h
index 2cffb8a..ea75cbe 100644
--- a/sys/dev/ixgbe/ixgbe_mbx.h
+++ b/sys/dev/ixgbe/ixgbe_mbx.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/sys/dev/ixgbe/ixgbe_osdep.h b/sys/dev/ixgbe/ixgbe_osdep.h
index ebb55f4..95f6ed5c 100644
--- a/sys/dev/ixgbe/ixgbe_osdep.h
+++ b/sys/dev/ixgbe/ixgbe_osdep.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2013, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -108,13 +108,14 @@
#define UNREFERENCED_3PARAMETER(_p, _q, _r)
#define UNREFERENCED_4PARAMETER(_p, _q, _r, _s)
-
#define IXGBE_NTOHL(_i) ntohl(_i)
#define IXGBE_NTOHS(_i) ntohs(_i)
/* XXX these need to be revisited */
-#define IXGBE_CPU_TO_LE32 le32toh
-#define IXGBE_LE32_TO_CPUS le32dec
+#define IXGBE_CPU_TO_LE32 htole32
+#define IXGBE_LE32_TO_CPUS(x)
+#define IXGBE_CPU_TO_BE16 htobe16
+#define IXGBE_CPU_TO_BE32 htobe32
typedef uint8_t u8;
typedef int8_t s8;
diff --git a/sys/dev/ixgbe/ixgbe_phy.c b/sys/dev/ixgbe/ixgbe_phy.c
index 5ac719c..88206c7 100644
--- a/sys/dev/ixgbe/ixgbe_phy.c
+++ b/sys/dev/ixgbe/ixgbe_phy.c
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -101,16 +101,17 @@ static u8 ixgbe_ones_comp_byte_add(u8 add1, u8 add2)
}
/**
- * ixgbe_read_i2c_combined_generic - Perform I2C read combined operation
+ * ixgbe_read_i2c_combined_generic_int - Perform I2C read combined operation
* @hw: pointer to the hardware structure
* @addr: I2C bus address to read from
* @reg: I2C device register to read from
* @val: pointer to location to receive read value
+ * @lock: TRUE if to take and release semaphore
*
* Returns an error code on error.
*/
-static s32 ixgbe_read_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr,
- u16 reg, u16 *val)
+static s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr,
+ u16 reg, u16 *val, bool lock)
{
u32 swfw_mask = hw->phy.phy_semaphore_mask;
int max_retry = 10;
@@ -121,11 +122,13 @@ static s32 ixgbe_read_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr,
u8 reg_high;
u8 csum;
+ if (hw->mac.type >= ixgbe_mac_X550)
+ max_retry = 3;
reg_high = ((reg >> 7) & 0xFE) | 1; /* Indicate read combined */
csum = ixgbe_ones_comp_byte_add(reg_high, reg & 0xFF);
csum = ~csum;
do {
- if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask))
+ if (lock && hw->mac.ops.acquire_swfw_sync(hw, swfw_mask))
return IXGBE_ERR_SWFW_SYNC;
ixgbe_i2c_start(hw);
/* Device Address and write indication */
@@ -158,13 +161,15 @@ static s32 ixgbe_read_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr,
if (ixgbe_clock_out_i2c_bit(hw, FALSE))
goto fail;
ixgbe_i2c_stop(hw);
- hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+ if (lock)
+ hw->mac.ops.release_swfw_sync(hw, swfw_mask);
*val = (high_bits << 8) | low_bits;
return 0;
fail:
ixgbe_i2c_bus_clear(hw);
- hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+ if (lock)
+ hw->mac.ops.release_swfw_sync(hw, swfw_mask);
retry++;
if (retry < max_retry)
DEBUGOUT("I2C byte read combined error - Retrying.\n");
@@ -176,17 +181,50 @@ fail:
}
/**
- * ixgbe_write_i2c_combined_generic - Perform I2C write combined operation
+ * ixgbe_read_i2c_combined_generic - Perform I2C read combined operation
+ * @hw: pointer to the hardware structure
+ * @addr: I2C bus address to read from
+ * @reg: I2C device register to read from
+ * @val: pointer to location to receive read value
+ *
+ * Returns an error code on error.
+ **/
+static s32 ixgbe_read_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr,
+ u16 reg, u16 *val)
+{
+ return ixgbe_read_i2c_combined_generic_int(hw, addr, reg, val, TRUE);
+}
+
+/**
+ * ixgbe_read_i2c_combined_generic_unlocked - Do I2C read combined operation
+ * @hw: pointer to the hardware structure
+ * @addr: I2C bus address to read from
+ * @reg: I2C device register to read from
+ * @val: pointer to location to receive read value
+ *
+ * Returns an error code on error.
+ **/
+static s32
+ixgbe_read_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr,
+ u16 reg, u16 *val)
+{
+ return ixgbe_read_i2c_combined_generic_int(hw, addr, reg, val, FALSE);
+}
+
+/**
+ * ixgbe_write_i2c_combined_generic_int - Perform I2C write combined operation
* @hw: pointer to the hardware structure
* @addr: I2C bus address to write to
* @reg: I2C device register to write to
* @val: value to write
+ * @lock: TRUE if to take and release semaphore
*
* Returns an error code on error.
*/
-static s32 ixgbe_write_i2c_combined_generic(struct ixgbe_hw *hw,
- u8 addr, u16 reg, u16 val)
+static s32 ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr,
+ u16 reg, u16 val, bool lock)
{
+ u32 swfw_mask = hw->phy.phy_semaphore_mask;
int max_retry = 1;
int retry = 0;
u8 reg_high;
@@ -198,6 +236,8 @@ static s32 ixgbe_write_i2c_combined_generic(struct ixgbe_hw *hw,
csum = ixgbe_ones_comp_byte_add(csum, val & 0xFF);
csum = ~csum;
do {
+ if (lock && hw->mac.ops.acquire_swfw_sync(hw, swfw_mask))
+ return IXGBE_ERR_SWFW_SYNC;
ixgbe_i2c_start(hw);
/* Device Address and write indication */
if (ixgbe_out_i2c_byte_ack(hw, addr))
@@ -218,10 +258,14 @@ static s32 ixgbe_write_i2c_combined_generic(struct ixgbe_hw *hw,
if (ixgbe_out_i2c_byte_ack(hw, csum))
goto fail;
ixgbe_i2c_stop(hw);
+ if (lock)
+ hw->mac.ops.release_swfw_sync(hw, swfw_mask);
return 0;
fail:
ixgbe_i2c_bus_clear(hw);
+ if (lock)
+ hw->mac.ops.release_swfw_sync(hw, swfw_mask);
retry++;
if (retry < max_retry)
DEBUGOUT("I2C byte write combined error - Retrying.\n");
@@ -233,6 +277,37 @@ fail:
}
/**
+ * ixgbe_write_i2c_combined_generic - Perform I2C write combined operation
+ * @hw: pointer to the hardware structure
+ * @addr: I2C bus address to write to
+ * @reg: I2C device register to write to
+ * @val: value to write
+ *
+ * Returns an error code on error.
+ **/
+static s32 ixgbe_write_i2c_combined_generic(struct ixgbe_hw *hw,
+ u8 addr, u16 reg, u16 val)
+{
+ return ixgbe_write_i2c_combined_generic_int(hw, addr, reg, val, TRUE);
+}
+
+/**
+ * ixgbe_write_i2c_combined_generic_unlocked - Do I2C write combined operation
+ * @hw: pointer to the hardware structure
+ * @addr: I2C bus address to write to
+ * @reg: I2C device register to write to
+ * @val: value to write
+ *
+ * Returns an error code on error.
+ **/
+static s32
+ixgbe_write_i2c_combined_generic_unlocked(struct ixgbe_hw *hw,
+ u8 addr, u16 reg, u16 val)
+{
+ return ixgbe_write_i2c_combined_generic_int(hw, addr, reg, val, FALSE);
+}
+
+/**
* ixgbe_init_phy_ops_generic - Inits PHY function ptrs
* @hw: pointer to the hardware structure
*
@@ -265,6 +340,13 @@ s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw)
phy->sfp_type = ixgbe_sfp_type_unknown;
phy->ops.read_i2c_combined = ixgbe_read_i2c_combined_generic;
phy->ops.write_i2c_combined = ixgbe_write_i2c_combined_generic;
+ phy->ops.read_i2c_combined_unlocked =
+ ixgbe_read_i2c_combined_generic_unlocked;
+ phy->ops.write_i2c_combined_unlocked =
+ ixgbe_write_i2c_combined_generic_unlocked;
+ phy->ops.read_i2c_byte_unlocked = ixgbe_read_i2c_byte_generic_unlocked;
+ phy->ops.write_i2c_byte_unlocked =
+ ixgbe_write_i2c_byte_generic_unlocked;
phy->ops.check_overtemp = ixgbe_tn_check_overtemp;
return IXGBE_SUCCESS;
}
@@ -1363,6 +1445,13 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw)
else
hw->phy.sfp_type =
ixgbe_sfp_type_1g_sx_core1;
+ } else if (comp_codes_1g & IXGBE_SFF_1GBASELX_CAPABLE) {
+ if (hw->bus.lan_id == 0)
+ hw->phy.sfp_type =
+ ixgbe_sfp_type_1g_lx_core0;
+ else
+ hw->phy.sfp_type =
+ ixgbe_sfp_type_1g_lx_core1;
} else {
hw->phy.sfp_type = ixgbe_sfp_type_unknown;
}
@@ -1450,6 +1539,8 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw)
if (comp_codes_10g == 0 &&
!(hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 ||
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1 ||
hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1)) {
hw->phy.type = ixgbe_phy_sfp_unsupported;
@@ -1467,6 +1558,8 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw)
if (!(enforce_sfp & IXGBE_DEVICE_CAPS_ALLOW_ANY_SFP) &&
!(hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 ||
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1 ||
hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1)) {
/* Make sure we're a supported PHY type */
@@ -1600,6 +1693,9 @@ s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw)
goto out;
}
+ /* LAN ID is needed for I2C access */
+ hw->mac.ops.set_lan_id(hw);
+
status = hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_IDENTIFIER,
&identifier);
@@ -1614,9 +1710,6 @@ s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw)
hw->phy.id = identifier;
- /* LAN ID is needed for sfp_type determination */
- hw->mac.ops.set_lan_id(hw);
-
status = hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_QSFP_10GBE_COMP,
&comp_codes_10g);
@@ -1804,10 +1897,12 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw,
* SR modules
*/
if (sfp_type == ixgbe_sfp_type_da_act_lmt_core0 ||
+ sfp_type == ixgbe_sfp_type_1g_lx_core0 ||
sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
sfp_type == ixgbe_sfp_type_1g_sx_core0)
sfp_type = ixgbe_sfp_type_srlr_core0;
else if (sfp_type == ixgbe_sfp_type_da_act_lmt_core1 ||
+ sfp_type == ixgbe_sfp_type_1g_lx_core1 ||
sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
sfp_type == ixgbe_sfp_type_1g_sx_core1)
sfp_type = ixgbe_sfp_type_srlr_core1;
@@ -1932,16 +2027,17 @@ static bool ixgbe_is_sfp_probe(struct ixgbe_hw *hw, u8 offset, u8 addr)
}
/**
- * ixgbe_read_i2c_byte_generic - Reads 8 bit word over I2C
+ * ixgbe_read_i2c_byte_generic_int - Reads 8 bit word over I2C
* @hw: pointer to hardware structure
* @byte_offset: byte offset to read
* @data: value read
+ * @lock: TRUE if to take and release semaphore
*
* Performs byte read operation to SFP module's EEPROM over I2C interface at
* a specified device address.
**/
-s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
- u8 dev_addr, u8 *data)
+static s32 ixgbe_read_i2c_byte_generic_int(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 *data, bool lock)
{
s32 status;
u32 max_retry = 10;
@@ -1952,11 +2048,13 @@ s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
DEBUGFUNC("ixgbe_read_i2c_byte_generic");
+ if (hw->mac.type >= ixgbe_mac_X550)
+ max_retry = 3;
if (ixgbe_is_sfp_probe(hw, byte_offset, dev_addr))
max_retry = IXGBE_SFP_DETECT_RETRIES;
do {
- if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask))
+ if (lock && hw->mac.ops.acquire_swfw_sync(hw, swfw_mask))
return IXGBE_ERR_SWFW_SYNC;
ixgbe_i2c_start(hw);
@@ -1998,13 +2096,16 @@ s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
goto fail;
ixgbe_i2c_stop(hw);
- hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+ if (lock)
+ hw->mac.ops.release_swfw_sync(hw, swfw_mask);
return IXGBE_SUCCESS;
fail:
ixgbe_i2c_bus_clear(hw);
- hw->mac.ops.release_swfw_sync(hw, swfw_mask);
- msec_delay(100);
+ if (lock) {
+ hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+ msec_delay(100);
+ }
retry++;
if (retry < max_retry)
DEBUGOUT("I2C byte read error - Retrying.\n");
@@ -2017,28 +2118,60 @@ fail:
}
/**
- * ixgbe_write_i2c_byte_generic - Writes 8 bit word over I2C
+ * ixgbe_read_i2c_byte_generic - Reads 8 bit word over I2C
+ * @hw: pointer to hardware structure
+ * @byte_offset: byte offset to read
+ * @data: value read
+ *
+ * Performs byte read operation to SFP module's EEPROM over I2C interface at
+ * a specified device address.
+ **/
+s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 *data)
+{
+ return ixgbe_read_i2c_byte_generic_int(hw, byte_offset, dev_addr,
+ data, TRUE);
+}
+
+/**
+ * ixgbe_read_i2c_byte_generic_unlocked - Reads 8 bit word over I2C
+ * @hw: pointer to hardware structure
+ * @byte_offset: byte offset to read
+ * @data: value read
+ *
+ * Performs byte read operation to SFP module's EEPROM over I2C interface at
+ * a specified device address.
+ **/
+s32 ixgbe_read_i2c_byte_generic_unlocked(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 *data)
+{
+ return ixgbe_read_i2c_byte_generic_int(hw, byte_offset, dev_addr,
+ data, FALSE);
+}
+
+/**
+ * ixgbe_write_i2c_byte_generic_int - Writes 8 bit word over I2C
* @hw: pointer to hardware structure
* @byte_offset: byte offset to write
* @data: value to write
+ * @lock: TRUE if to take and release semaphore
*
* Performs byte write operation to SFP module's EEPROM over I2C interface at
* a specified device address.
**/
-s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
- u8 dev_addr, u8 data)
+static s32 ixgbe_write_i2c_byte_generic_int(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 data, bool lock)
{
- s32 status = IXGBE_SUCCESS;
+ s32 status;
u32 max_retry = 1;
u32 retry = 0;
u32 swfw_mask = hw->phy.phy_semaphore_mask;
DEBUGFUNC("ixgbe_write_i2c_byte_generic");
- if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != IXGBE_SUCCESS) {
- status = IXGBE_ERR_SWFW_SYNC;
- goto write_byte_out;
- }
+ if (lock && hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) !=
+ IXGBE_SUCCESS)
+ return IXGBE_ERR_SWFW_SYNC;
do {
ixgbe_i2c_start(hw);
@@ -2068,7 +2201,8 @@ s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
goto fail;
ixgbe_i2c_stop(hw);
- hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+ if (lock)
+ hw->mac.ops.release_swfw_sync(hw, swfw_mask);
return IXGBE_SUCCESS;
fail:
@@ -2080,13 +2214,45 @@ fail:
DEBUGOUT("I2C byte write error.\n");
} while (retry < max_retry);
- hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+ if (lock)
+ hw->mac.ops.release_swfw_sync(hw, swfw_mask);
-write_byte_out:
return status;
}
/**
+ * ixgbe_write_i2c_byte_generic - Writes 8 bit word over I2C
+ * @hw: pointer to hardware structure
+ * @byte_offset: byte offset to write
+ * @data: value to write
+ *
+ * Performs byte write operation to SFP module's EEPROM over I2C interface at
+ * a specified device address.
+ **/
+s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 data)
+{
+ return ixgbe_write_i2c_byte_generic_int(hw, byte_offset, dev_addr,
+ data, TRUE);
+}
+
+/**
+ * ixgbe_write_i2c_byte_generic_unlocked - Writes 8 bit word over I2C
+ * @hw: pointer to hardware structure
+ * @byte_offset: byte offset to write
+ * @data: value to write
+ *
+ * Performs byte write operation to SFP module's EEPROM over I2C interface at
+ * a specified device address.
+ **/
+s32 ixgbe_write_i2c_byte_generic_unlocked(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 data)
+{
+ return ixgbe_write_i2c_byte_generic_int(hw, byte_offset, dev_addr,
+ data, FALSE);
+}
+
+/**
* ixgbe_i2c_start - Sets I2C start condition
* @hw: pointer to hardware structure
*
diff --git a/sys/dev/ixgbe/ixgbe_phy.h b/sys/dev/ixgbe/ixgbe_phy.h
index 021d5f0..fad31bd 100644
--- a/sys/dev/ixgbe/ixgbe_phy.h
+++ b/sys/dev/ixgbe/ixgbe_phy.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -89,9 +89,24 @@
#define IXGBE_I2C_EEPROM_STATUS_IN_PROGRESS 0x3
#define IXGBE_CS4227 0xBE /* CS4227 address */
-#define IXGBE_CS4227_SPARE24_LSB 0x12B0 /* Reg to program EDC */
+#define IXGBE_CS4227_GLOBAL_ID_LSB 0
+#define IXGBE_CS4227_SCRATCH 2
+#define IXGBE_CS4227_GLOBAL_ID_VALUE 0x03E5
+#define IXGBE_CS4227_SCRATCH_VALUE 0x5aa5
+#define IXGBE_CS4227_RETRIES 5
+#define IXGBE_CS4227_LINE_SPARE22_MSB 0x12AD /* Reg to program speed */
+#define IXGBE_CS4227_LINE_SPARE24_LSB 0x12B0 /* Reg to program EDC */
+#define IXGBE_CS4227_HOST_SPARE22_MSB 0x1AAD /* Reg to program speed */
+#define IXGBE_CS4227_HOST_SPARE24_LSB 0x1AB0 /* Reg to program EDC */
#define IXGBE_CS4227_EDC_MODE_CX1 0x0002
#define IXGBE_CS4227_EDC_MODE_SR 0x0004
+#define IXGBE_CS4227_RESET_HOLD 500 /* microseconds */
+#define IXGBE_CS4227_RESET_DELAY 500 /* milliseconds */
+#define IXGBE_CS4227_CHECK_DELAY 30 /* milliseconds */
+#define IXGBE_PE 0xE0 /* Port expander address */
+#define IXGBE_PE_OUTPUT 1 /* Output register offset */
+#define IXGBE_PE_CONFIG 3 /* Config register offset */
+#define IXGBE_PE_BIT1 (1 << 1)
/* Flow control defines */
#define IXGBE_TAF_SYM_PAUSE 0x400
@@ -175,8 +190,12 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw,
s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw);
s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
u8 dev_addr, u8 *data);
+s32 ixgbe_read_i2c_byte_generic_unlocked(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 *data);
s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
u8 dev_addr, u8 data);
+s32 ixgbe_write_i2c_byte_generic_unlocked(struct ixgbe_hw *hw, u8 byte_offset,
+ u8 dev_addr, u8 data);
s32 ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
u8 *eeprom_data);
s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
diff --git a/sys/dev/ixgbe/ixgbe_type.h b/sys/dev/ixgbe/ixgbe_type.h
index 24ba046..2a53952 100644
--- a/sys/dev/ixgbe/ixgbe_type.h
+++ b/sys/dev/ixgbe/ixgbe_type.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -111,6 +111,7 @@
#define IXGBE_SUBDEV_ID_82599_LOM_SNAP6 0x2159
#define IXGBE_SUBDEV_ID_82599_SFP_1OCP 0x000D
#define IXGBE_SUBDEV_ID_82599_SFP_2OCP 0x0008
+#define IXGBE_SUBDEV_ID_82599_SFP_LOM 0x06EE
#define IXGBE_DEV_ID_82599_BACKPLANE_FCOE 0x152A
#define IXGBE_DEV_ID_82599_SFP_FCOE 0x1529
#define IXGBE_DEV_ID_82599_SFP_EM 0x1507
@@ -130,8 +131,6 @@
#define IXGBE_DEV_ID_X540_BYPASS 0x155C
#define IXGBE_DEV_ID_X540T1 0x1560
#define IXGBE_DEV_ID_X550T 0x1563
-/* Placeholder value, pending official value. */
-#define IXGBE_DEV_ID_X550EM_A_KR 0xABCD
#define IXGBE_DEV_ID_X550EM_X_KX4 0x15AA
#define IXGBE_DEV_ID_X550EM_X_KR 0x15AB
#define IXGBE_DEV_ID_X550EM_X_SFP 0x15AC
@@ -139,11 +138,13 @@
#define IXGBE_DEV_ID_X550EM_X_1G_T 0x15AE
#define IXGBE_DEV_ID_X550_VF_HV 0x1564
#define IXGBE_DEV_ID_X550_VF 0x1565
-#define IXGBE_DEV_ID_X550EM_A_VF 0x15B3
-#define IXGBE_DEV_ID_X550EM_A_VF_HV 0x15B4
#define IXGBE_DEV_ID_X550EM_X_VF 0x15A8
#define IXGBE_DEV_ID_X550EM_X_VF_HV 0x15A9
+#define IXGBE_CAT(r,m) IXGBE_##r##m
+
+#define IXGBE_BY_MAC(_hw, r) ((_hw)->mvals[IXGBE_CAT(r, _IDX)])
+
/* General Registers */
#define IXGBE_CTRL 0x00000
#define IXGBE_STATUS 0x00008
@@ -151,9 +152,11 @@
#define IXGBE_ESDP 0x00020
#define IXGBE_EODSDP 0x00028
#define IXGBE_I2CCTL_82599 0x00028
+#define IXGBE_I2CCTL IXGBE_I2CCTL_82599
+#define IXGBE_I2CCTL_X540 IXGBE_I2CCTL_82599
#define IXGBE_I2CCTL_X550 0x15F5C
-#define IXGBE_I2CCTL_BY_MAC(_hw) ((((_hw)->mac.type >= ixgbe_mac_X550) ? \
- IXGBE_I2CCTL_X550 : IXGBE_I2CCTL_82599))
+#define IXGBE_I2CCTL_X550EM_x IXGBE_I2CCTL_X550
+#define IXGBE_I2CCTL_BY_MAC(_hw) IXGBE_BY_MAC((_hw), I2CCTL)
#define IXGBE_PHY_GPIO 0x00028
#define IXGBE_MAC_GPIO 0x00030
#define IXGBE_PHYINT_STATUS0 0x00100
@@ -166,18 +169,40 @@
#define IXGBE_EXVET 0x05078
/* NVM Registers */
-#define IXGBE_EEC 0x10010
-#define IXGBE_EERD 0x10014
-#define IXGBE_EEWR 0x10018
-#define IXGBE_FLA 0x1001C
+#define IXGBE_EEC 0x10010
+#define IXGBE_EEC_X540 IXGBE_EEC
+#define IXGBE_EEC_X550 IXGBE_EEC
+#define IXGBE_EEC_X550EM_x IXGBE_EEC
+#define IXGBE_EEC_BY_MAC(_hw) IXGBE_EEC
+
+#define IXGBE_EERD 0x10014
+#define IXGBE_EEWR 0x10018
+
+#define IXGBE_FLA 0x1001C
+#define IXGBE_FLA_X540 IXGBE_FLA
+#define IXGBE_FLA_X550 IXGBE_FLA
+#define IXGBE_FLA_X550EM_x IXGBE_FLA
+#define IXGBE_FLA_BY_MAC(_hw) IXGBE_FLA
+
#define IXGBE_EEMNGCTL 0x10110
#define IXGBE_EEMNGDATA 0x10114
#define IXGBE_FLMNGCTL 0x10118
#define IXGBE_FLMNGDATA 0x1011C
#define IXGBE_FLMNGCNT 0x10120
#define IXGBE_FLOP 0x1013C
-#define IXGBE_GRC 0x10200
-#define IXGBE_SRAMREL 0x10210
+
+#define IXGBE_GRC 0x10200
+#define IXGBE_GRC_X540 IXGBE_GRC
+#define IXGBE_GRC_X550 IXGBE_GRC
+#define IXGBE_GRC_X550EM_x IXGBE_GRC
+#define IXGBE_GRC_BY_MAC(_hw) IXGBE_GRC
+
+#define IXGBE_SRAMREL 0x10210
+#define IXGBE_SRAMREL_X540 IXGBE_SRAMREL
+#define IXGBE_SRAMREL_X550 IXGBE_SRAMREL
+#define IXGBE_SRAMREL_X550EM_x IXGBE_SRAMREL
+#define IXGBE_SRAMREL_BY_MAC(_hw) IXGBE_SRAMREL
+
#define IXGBE_PHYDBG 0x10218
/* General Receive Control */
@@ -188,20 +213,48 @@
#define IXGBE_VPDDIAG1 0x10208
/* I2CCTL Bit Masks */
-#define IXGBE_I2C_CLK_IN_BY_MAC(_hw)(((_hw)->mac.type) >= ixgbe_mac_X550 ? \
- 0x00004000 : 0x00000001)
-#define IXGBE_I2C_CLK_OUT_BY_MAC(_hw)(((_hw)->mac.type) >= ixgbe_mac_X550 ? \
- 0x00000200 : 0x00000002)
-#define IXGBE_I2C_DATA_IN_BY_MAC(_hw)(((_hw)->mac.type) >= ixgbe_mac_X550 ? \
- 0x00001000 : 0x00000004)
-#define IXGBE_I2C_DATA_OUT_BY_MAC(_hw)(((_hw)->mac.type) >= ixgbe_mac_X550 ? \
- 0x00000400 : 0x00000008)
-#define IXGBE_I2C_BB_EN_BY_MAC(hw) ((hw)->mac.type >= ixgbe_mac_X550 ? \
- 0x00000100 : 0)
-#define IXGBE_I2C_DATA_OE_N_EN_BY_MAC(hw) ((hw)->mac.type >= ixgbe_mac_X550 ? \
- 0x00000800 : 0)
-#define IXGBE_I2C_CLK_OE_N_EN_BY_MAC(hw) ((hw)->mac.type >= ixgbe_mac_X550 ? \
- 0x00002000 : 0)
+#define IXGBE_I2C_CLK_IN 0x00000001
+#define IXGBE_I2C_CLK_IN_X540 IXGBE_I2C_CLK_IN
+#define IXGBE_I2C_CLK_IN_X550 0x00004000
+#define IXGBE_I2C_CLK_IN_X550EM_x IXGBE_I2C_CLK_IN_X550
+#define IXGBE_I2C_CLK_IN_BY_MAC(_hw) IXGBE_BY_MAC((_hw), I2C_CLK_IN)
+
+#define IXGBE_I2C_CLK_OUT 0x00000002
+#define IXGBE_I2C_CLK_OUT_X540 IXGBE_I2C_CLK_OUT
+#define IXGBE_I2C_CLK_OUT_X550 0x00000200
+#define IXGBE_I2C_CLK_OUT_X550EM_x IXGBE_I2C_CLK_OUT_X550
+#define IXGBE_I2C_CLK_OUT_BY_MAC(_hw) IXGBE_BY_MAC((_hw), I2C_CLK_OUT)
+
+#define IXGBE_I2C_DATA_IN 0x00000004
+#define IXGBE_I2C_DATA_IN_X540 IXGBE_I2C_DATA_IN
+#define IXGBE_I2C_DATA_IN_X550 0x00001000
+#define IXGBE_I2C_DATA_IN_X550EM_x IXGBE_I2C_DATA_IN_X550
+#define IXGBE_I2C_DATA_IN_BY_MAC(_hw) IXGBE_BY_MAC((_hw), I2C_DATA_IN)
+
+#define IXGBE_I2C_DATA_OUT 0x00000008
+#define IXGBE_I2C_DATA_OUT_X540 IXGBE_I2C_DATA_OUT
+#define IXGBE_I2C_DATA_OUT_X550 0x00000400
+#define IXGBE_I2C_DATA_OUT_X550EM_x IXGBE_I2C_DATA_OUT_X550
+#define IXGBE_I2C_DATA_OUT_BY_MAC(_hw) IXGBE_BY_MAC((_hw), I2C_DATA_OUT)
+
+#define IXGBE_I2C_DATA_OE_N_EN 0
+#define IXGBE_I2C_DATA_OE_N_EN_X540 IXGBE_I2C_DATA_OE_N_EN
+#define IXGBE_I2C_DATA_OE_N_EN_X550 0x00000800
+#define IXGBE_I2C_DATA_OE_N_EN_X550EM_x IXGBE_I2C_DATA_OE_N_EN_X550
+#define IXGBE_I2C_DATA_OE_N_EN_BY_MAC(_hw) IXGBE_BY_MAC((_hw), I2C_DATA_OE_N_EN)
+
+#define IXGBE_I2C_BB_EN 0
+#define IXGBE_I2C_BB_EN_X540 IXGBE_I2C_BB_EN
+#define IXGBE_I2C_BB_EN_X550 0x00000100
+#define IXGBE_I2C_BB_EN_X550EM_x IXGBE_I2C_BB_EN_X550
+
+#define IXGBE_I2C_BB_EN_BY_MAC(_hw) IXGBE_BY_MAC((_hw), I2C_BB_EN)
+
+#define IXGBE_I2C_CLK_OE_N_EN 0
+#define IXGBE_I2C_CLK_OE_N_EN_X540 IXGBE_I2C_CLK_OE_N_EN
+#define IXGBE_I2C_CLK_OE_N_EN_X550 0x00002000
+#define IXGBE_I2C_CLK_OE_N_EN_X550EM_x IXGBE_I2C_CLK_OE_N_EN_X550
+#define IXGBE_I2C_CLK_OE_N_EN_BY_MAC(_hw) IXGBE_BY_MAC((_hw), I2C_CLK_OE_N_EN)
#define IXGBE_I2C_CLOCK_STRETCHING_TIMEOUT 500
@@ -612,6 +665,7 @@ struct ixgbe_dmac_config {
#define IXGBE_EEER 0x043A0 /* EEE register */
#define IXGBE_EEE_STAT 0x04398 /* EEE Status */
#define IXGBE_EEE_SU 0x04380 /* EEE Set up */
+#define IXGBE_EEE_SU_TEEE_DLY_SHIFT 26
#define IXGBE_TLPIC 0x041F4 /* EEE Tx LPI count */
#define IXGBE_RLPIC 0x041F8 /* EEE Rx LPI count */
@@ -989,14 +1043,34 @@ struct ixgbe_dmac_config {
#define IXGBE_GSCN_2 0x11028
#define IXGBE_GSCN_3 0x1102C
#define IXGBE_FACTPS 0x10150
+#define IXGBE_FACTPS_X540 IXGBE_FACTPS
+#define IXGBE_FACTPS_X550 IXGBE_FACTPS
+#define IXGBE_FACTPS_X550EM_x IXGBE_FACTPS
+#define IXGBE_FACTPS_BY_MAC(_hw) IXGBE_FACTPS
+
#define IXGBE_PCIEANACTL 0x11040
#define IXGBE_SWSM 0x10140
+#define IXGBE_SWSM_X540 IXGBE_SWSM
+#define IXGBE_SWSM_X550 IXGBE_SWSM
+#define IXGBE_SWSM_X550EM_x IXGBE_SWSM
+#define IXGBE_SWSM_BY_MAC(_hw) IXGBE_SWSM
+
#define IXGBE_FWSM 0x10148
+#define IXGBE_FWSM_X540 IXGBE_FWSM
+#define IXGBE_FWSM_X550 IXGBE_FWSM
+#define IXGBE_FWSM_X550EM_x IXGBE_FWSM
+#define IXGBE_FWSM_BY_MAC(_hw) IXGBE_FWSM
+
+#define IXGBE_SWFW_SYNC IXGBE_GSSR
+#define IXGBE_SWFW_SYNC_X540 IXGBE_SWFW_SYNC
+#define IXGBE_SWFW_SYNC_X550 IXGBE_SWFW_SYNC
+#define IXGBE_SWFW_SYNC_X550EM_x IXGBE_SWFW_SYNC
+#define IXGBE_SWFW_SYNC_BY_MAC(_hw) IXGBE_SWFW_SYNC
+
#define IXGBE_GSSR 0x10160
#define IXGBE_MREVID 0x11064
#define IXGBE_DCA_ID 0x11070
#define IXGBE_DCA_CTRL 0x11074
-#define IXGBE_SWFW_SYNC IXGBE_GSSR
/* PCI-E registers 82599-Specific */
#define IXGBE_GCR_EXT 0x11050
@@ -1008,14 +1082,18 @@ struct ixgbe_dmac_config {
#define IXGBE_PHYDAT_82599 0x11044
#define IXGBE_PHYCTL_82599 0x11048
#define IXGBE_PBACLR_82599 0x11068
-#define IXGBE_CIAA_82599 0x11088
-#define IXGBE_CIAD_82599 0x1108C
+#define IXGBE_CIAA 0x11088
+#define IXGBE_CIAD 0x1108C
+#define IXGBE_CIAA_82599 IXGBE_CIAA
+#define IXGBE_CIAD_82599 IXGBE_CIAD
+#define IXGBE_CIAA_X540 IXGBE_CIAA
+#define IXGBE_CIAD_X540 IXGBE_CIAD
#define IXGBE_CIAA_X550 0x11508
#define IXGBE_CIAD_X550 0x11510
-#define IXGBE_CIAA_BY_MAC(_hw) ((((_hw)->mac.type >= ixgbe_mac_X550) ? \
- IXGBE_CIAA_X550 : IXGBE_CIAA_82599))
-#define IXGBE_CIAD_BY_MAC(_hw) ((((_hw)->mac.type >= ixgbe_mac_X550) ? \
- IXGBE_CIAD_X550 : IXGBE_CIAD_82599))
+#define IXGBE_CIAA_X550EM_x IXGBE_CIAA_X550
+#define IXGBE_CIAD_X550EM_x IXGBE_CIAD_X550
+#define IXGBE_CIAA_BY_MAC(_hw) IXGBE_BY_MAC((_hw), CIAA)
+#define IXGBE_CIAD_BY_MAC(_hw) IXGBE_BY_MAC((_hw), CIAD)
#define IXGBE_PICAUSE 0x110B0
#define IXGBE_PIENA 0x110B8
#define IXGBE_CDQ_MBR_82599 0x110B4
@@ -1365,6 +1443,8 @@ struct ixgbe_dmac_config {
#define IXGBE_MDIO_AUTO_NEG_STATUS 0x1 /* AUTO_NEG Status Reg */
#define IXGBE_MDIO_AUTO_NEG_VENDOR_STAT 0xC800 /* AUTO_NEG Vendor Status Reg */
#define IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM 0xCC00 /* AUTO_NEG Vendor TX Reg */
+#define IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM2 0xCC01 /* AUTO_NEG Vendor Tx Reg */
+#define IXGBE_MDIO_AUTO_NEG_VEN_LSC 0x1 /* AUTO_NEG Vendor Tx LSC */
#define IXGBE_MDIO_AUTO_NEG_ADVT 0x10 /* AUTO_NEG Advt Reg */
#define IXGBE_MDIO_AUTO_NEG_LP 0x13 /* AUTO_NEG LP Status Reg */
#define IXGBE_MDIO_AUTO_NEG_EEE_ADVT 0x3C /* AUTO_NEG EEE Advt Reg */
@@ -1393,11 +1473,24 @@ struct ixgbe_dmac_config {
#define IXGBE_MDIO_TX_VENDOR_ALARMS_3_RST_MASK 0x3 /* PHY Reset Complete Mask */
#define IXGBE_MDIO_GLOBAL_RES_PR_10 0xC479 /* Global Resv Provisioning 10 Reg */
#define IXGBE_MDIO_POWER_UP_STALL 0x8000 /* Power Up Stall */
-
+#define IXGBE_MDIO_GLOBAL_INT_CHIP_STD_MASK 0xFF00 /* int std mask */
+#define IXGBE_MDIO_GLOBAL_CHIP_STD_INT_FLAG 0xFC00 /* chip std int flag */
+#define IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_MASK 0xFF01 /* int chip-wide mask */
+#define IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_FLAG 0xFC01 /* int chip-wide mask */
+#define IXGBE_MDIO_GLOBAL_ALARM_1 0xCC00 /* Global alarm 1 */
+#define IXGBE_MDIO_GLOBAL_ALM_1_HI_TMP_FAIL 0x4000 /* high temp failure */
+#define IXGBE_MDIO_GLOBAL_INT_MASK 0xD400 /* Global int mask */
+#define IXGBE_MDIO_GLOBAL_AN_VEN_ALM_INT_EN 0x1000 /* autoneg vendor alarm int enable */
+#define IXGBE_MDIO_GLOBAL_ALARM_1_INT 0x4 /* int in Global alarm 1 */
+#define IXGBE_MDIO_GLOBAL_VEN_ALM_INT_EN 0x1 /* vendor alarm int enable */
+#define IXGBE_MDIO_GLOBAL_STD_ALM2_INT 0x200 /* vendor alarm2 int mask */
+#define IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN 0x4000 /* int high temp enable */
#define IXGBE_MDIO_PMA_PMD_CONTROL_ADDR 0x0000 /* PMA/PMD Control Reg */
#define IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR 0xC30A /* PHY_XS SDA/SCL Addr Reg */
#define IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA 0xC30B /* PHY_XS SDA/SCL Data Reg */
#define IXGBE_MDIO_PMA_PMD_SDA_SCL_STAT 0xC30C /* PHY_XS SDA/SCL Status Reg */
+#define IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK 0xD401 /* PHY TX Vendor LASI */
+#define IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN 0x1 /* PHY TX Vendor LASI enable */
#define IXGBE_MDIO_PMD_STD_TX_DISABLE_CNTR 0x9 /* Standard Transmit Dis Reg */
#define IXGBE_MDIO_PMD_GLOBAL_TX_DISABLE 0x0001 /* PMD Global Transmit Dis */
@@ -1479,12 +1572,16 @@ struct ixgbe_dmac_config {
#define IXGBE_SDP0_GPIEN_X540 0x00000002 /* SDP0 on X540 and X550 */
#define IXGBE_SDP1_GPIEN_X540 0x00000004 /* SDP1 on X540 and X550 */
#define IXGBE_SDP2_GPIEN_X540 0x00000008 /* SDP2 on X540 and X550 */
-#define IXGBE_SDP0_GPIEN_BY_MAC(_hw) ((_hw)->mac.type >= ixgbe_mac_X540 ? \
- IXGBE_SDP0_GPIEN_X540 : IXGBE_SDP0_GPIEN)
-#define IXGBE_SDP1_GPIEN_BY_MAC(_hw) ((_hw)->mac.type >= ixgbe_mac_X540 ? \
- IXGBE_SDP1_GPIEN_X540 : IXGBE_SDP1_GPIEN)
-#define IXGBE_SDP2_GPIEN_BY_MAC(_hw) ((_hw)->mac.type >= ixgbe_mac_X540 ? \
- IXGBE_SDP2_GPIEN_X540 : IXGBE_SDP2_GPIEN)
+#define IXGBE_SDP0_GPIEN_X550 IXGBE_SDP0_GPIEN_X540
+#define IXGBE_SDP1_GPIEN_X550 IXGBE_SDP1_GPIEN_X540
+#define IXGBE_SDP2_GPIEN_X550 IXGBE_SDP2_GPIEN_X540
+#define IXGBE_SDP0_GPIEN_X550EM_x IXGBE_SDP0_GPIEN_X540
+#define IXGBE_SDP1_GPIEN_X550EM_x IXGBE_SDP1_GPIEN_X540
+#define IXGBE_SDP2_GPIEN_X550EM_x IXGBE_SDP2_GPIEN_X540
+#define IXGBE_SDP0_GPIEN_BY_MAC(_hw) IXGBE_BY_MAC((_hw), SDP0_GPIEN)
+#define IXGBE_SDP1_GPIEN_BY_MAC(_hw) IXGBE_BY_MAC((_hw), SDP1_GPIEN)
+#define IXGBE_SDP2_GPIEN_BY_MAC(_hw) IXGBE_BY_MAC((_hw), SDP2_GPIEN)
+
#define IXGBE_GPIE_MSIX_MODE 0x00000010 /* MSI-X mode */
#define IXGBE_GPIE_OCD 0x00000020 /* Other Clear Disable */
#define IXGBE_GPIE_EIMEN 0x00000040 /* Immediate Interrupt Enable */
@@ -1665,15 +1762,16 @@ enum {
#define IXGBE_EICR_GPI_SDP0_X540 0x02000000 /* Gen Purpose Interrupt on SDP0 */
#define IXGBE_EICR_GPI_SDP1_X540 0x04000000 /* Gen Purpose Interrupt on SDP1 */
#define IXGBE_EICR_GPI_SDP2_X540 0x08000000 /* Gen Purpose Interrupt on SDP2 */
-#define IXGBE_EICR_GPI_SDP0_BY_MAC(_hw) ((_hw)->mac.type >= ixgbe_mac_X540 ? \
- IXGBE_EICR_GPI_SDP0_X540 : \
- IXGBE_EICR_GPI_SDP0)
-#define IXGBE_EICR_GPI_SDP1_BY_MAC(_hw) ((_hw)->mac.type >= ixgbe_mac_X540 ? \
- IXGBE_EICR_GPI_SDP1_X540 : \
- IXGBE_EICR_GPI_SDP1)
-#define IXGBE_EICR_GPI_SDP2_BY_MAC(_hw) ((_hw)->mac.type >= ixgbe_mac_X540 ? \
- IXGBE_EICR_GPI_SDP2_X540 : \
- IXGBE_EICR_GPI_SDP2)
+#define IXGBE_EICR_GPI_SDP0_X550 IXGBE_EICR_GPI_SDP0_X540
+#define IXGBE_EICR_GPI_SDP1_X550 IXGBE_EICR_GPI_SDP1_X540
+#define IXGBE_EICR_GPI_SDP2_X550 IXGBE_EICR_GPI_SDP2_X540
+#define IXGBE_EICR_GPI_SDP0_X550EM_x IXGBE_EICR_GPI_SDP0_X540
+#define IXGBE_EICR_GPI_SDP1_X550EM_x IXGBE_EICR_GPI_SDP1_X540
+#define IXGBE_EICR_GPI_SDP2_X550EM_x IXGBE_EICR_GPI_SDP2_X540
+#define IXGBE_EICR_GPI_SDP0_BY_MAC(_hw) IXGBE_BY_MAC((_hw), EICR_GPI_SDP0)
+#define IXGBE_EICR_GPI_SDP1_BY_MAC(_hw) IXGBE_BY_MAC((_hw), EICR_GPI_SDP1)
+#define IXGBE_EICR_GPI_SDP2_BY_MAC(_hw) IXGBE_BY_MAC((_hw), EICR_GPI_SDP2)
+
#define IXGBE_EICR_PBUR 0x10000000 /* Packet Buffer Handler Error */
#define IXGBE_EICR_DHER 0x20000000 /* Descriptor Handler Error */
#define IXGBE_EICR_TCP_TIMER 0x40000000 /* TCP Timer */
@@ -1901,6 +1999,9 @@ enum {
#define IXGBE_LED_IVRT(_i) IXGBE_LED_OFFSET(IXGBE_LED_IVRT_BASE, _i)
#define IXGBE_LED_BLINK(_i) IXGBE_LED_OFFSET(IXGBE_LED_BLINK_BASE, _i)
#define IXGBE_LED_MODE_MASK(_i) IXGBE_LED_OFFSET(IXGBE_LED_MODE_MASK_BASE, _i)
+#define IXGBE_X557_LED_MANUAL_SET_MASK (1 << 8)
+#define IXGBE_X557_MAX_LED_INDEX 3
+#define IXGBE_X557_LED_PROVISIONING 0xC430
/* LED modes */
#define IXGBE_LED_LINK_UP 0x0
@@ -2784,6 +2885,7 @@ enum ixgbe_fdir_pballoc_type {
#define IXGBE_HI_FLASH_ERASE_TIMEOUT 1000 /* Process Erase command limit */
#define IXGBE_HI_FLASH_UPDATE_TIMEOUT 5000 /* Process Update command limit */
#define IXGBE_HI_FLASH_APPLY_TIMEOUT 0 /* Process Apply command limit */
+#define IXGBE_HI_PHY_MGMT_REQ_TIMEOUT 2000 /* Wait up to 2 seconds */
/* CEM Support */
#define FW_CEM_HDR_LEN 0x4
@@ -2804,6 +2906,7 @@ enum ixgbe_fdir_pballoc_type {
#define FW_MAX_READ_BUFFER_SIZE 1024
#define FW_DISABLE_RXEN_CMD 0xDE
#define FW_DISABLE_RXEN_LEN 0x1
+#define FW_PHY_MGMT_REQ_CMD 0x20
/* Host Interface Command Structures */
struct ixgbe_hic_hdr {
@@ -3200,6 +3303,36 @@ union ixgbe_atr_hash_dword {
};
+#define IXGBE_MVALS_INIT(m) \
+ IXGBE_CAT(EEC, m), \
+ IXGBE_CAT(FLA, m), \
+ IXGBE_CAT(GRC, m), \
+ IXGBE_CAT(SRAMREL, m), \
+ IXGBE_CAT(FACTPS, m), \
+ IXGBE_CAT(SWSM, m), \
+ IXGBE_CAT(FWSM, m), \
+ IXGBE_CAT(SDP0_GPIEN, m), \
+ IXGBE_CAT(SDP1_GPIEN, m), \
+ IXGBE_CAT(SDP2_GPIEN, m), \
+ IXGBE_CAT(EICR_GPI_SDP0, m), \
+ IXGBE_CAT(EICR_GPI_SDP1, m), \
+ IXGBE_CAT(EICR_GPI_SDP2, m), \
+ IXGBE_CAT(CIAA, m), \
+ IXGBE_CAT(CIAD, m), \
+ IXGBE_CAT(I2C_CLK_IN, m), \
+ IXGBE_CAT(I2C_CLK_OUT, m), \
+ IXGBE_CAT(I2C_DATA_IN, m), \
+ IXGBE_CAT(I2C_DATA_OUT, m), \
+ IXGBE_CAT(I2C_DATA_OE_N_EN, m), \
+ IXGBE_CAT(I2C_BB_EN, m), \
+ IXGBE_CAT(I2C_CLK_OE_N_EN, m), \
+ IXGBE_CAT(I2CCTL, m)
+
+enum ixgbe_mvals {
+ IXGBE_MVALS_INIT(_IDX),
+ IXGBE_MVALS_IDX_LIMIT
+};
+
/*
* Unavailable: The FCoE Boot Option ROM is not present in the flash.
* Disabled: Present; boot order is not set for any targets on the port.
@@ -3225,17 +3358,10 @@ enum ixgbe_mac_type {
ixgbe_mac_82599_vf,
ixgbe_mac_X540,
ixgbe_mac_X540_vf,
- /*
- * X550EM MAC type decoder:
- * ixgbe_mac_X550EM_x: "x" = Xeon
- * ixgbe_mac_X550EM_a: "a" = Atom
- */
ixgbe_mac_X550,
ixgbe_mac_X550EM_x,
- ixgbe_mac_X550EM_a,
ixgbe_mac_X550_vf,
ixgbe_mac_X550EM_x_vf,
- ixgbe_mac_X550EM_a_vf,
ixgbe_num_macs
};
@@ -3294,6 +3420,8 @@ enum ixgbe_sfp_type {
ixgbe_sfp_type_1g_cu_core1 = 10,
ixgbe_sfp_type_1g_sx_core0 = 11,
ixgbe_sfp_type_1g_sx_core1 = 12,
+ ixgbe_sfp_type_1g_lx_core0 = 13,
+ ixgbe_sfp_type_1g_lx_core1 = 14,
ixgbe_sfp_type_not_present = 0xFFFE,
ixgbe_sfp_type_unknown = 0xFFFF
};
@@ -3611,6 +3739,15 @@ struct ixgbe_phy_operations {
s32 (*check_overtemp)(struct ixgbe_hw *);
s32 (*set_phy_power)(struct ixgbe_hw *, bool on);
s32 (*enter_lplu)(struct ixgbe_hw *);
+ s32 (*handle_lasi)(struct ixgbe_hw *hw);
+ s32 (*read_i2c_combined_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg,
+ u16 *value);
+ s32 (*write_i2c_combined_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg,
+ u16 value);
+ s32 (*read_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr,
+ u8 *value);
+ s32 (*write_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr,
+ u8 value);
};
struct ixgbe_eeprom_info {
@@ -3674,6 +3811,7 @@ struct ixgbe_phy_info {
bool multispeed_fiber;
bool reset_if_overtemp;
bool qsfp_shared_i2c_bus;
+ u32 nw_mng_if_sel;
};
#include "ixgbe_mbx.h"
@@ -3717,6 +3855,7 @@ struct ixgbe_hw {
struct ixgbe_eeprom_info eeprom;
struct ixgbe_bus_info bus;
struct ixgbe_mbx_info mbx;
+ const u32 *mvals;
u16 device_id;
u16 vendor_id;
u16 subsystem_device_id;
@@ -3775,6 +3914,10 @@ struct ixgbe_hw {
#define IXGBE_NOT_IMPLEMENTED 0x7FFFFFFF
+#define IXGBE_FUSES0_GROUP(_i) (0x11158 + ((_i) * 4))
+#define IXGBE_FUSES0_300MHZ (1 << 5)
+#define IXGBE_FUSES0_REV1 (1 << 6)
+
#define IXGBE_KRM_PORT_CAR_GEN_CTRL(P) ((P == 0) ? (0x4010) : (0x8010))
#define IXGBE_KRM_LINK_CTRL_1(P) ((P == 0) ? (0x420C) : (0x820C))
#define IXGBE_KRM_AN_CNTL_1(P) ((P == 0) ? (0x422C) : (0x822C))
@@ -3842,8 +3985,10 @@ struct ixgbe_hw {
#define IXGBE_SB_IOSF_CTRL_BUSY_SHIFT 31
#define IXGBE_SB_IOSF_CTRL_BUSY (1 << IXGBE_SB_IOSF_CTRL_BUSY_SHIFT)
#define IXGBE_SB_IOSF_TARGET_KR_PHY 0
-#define IXGBE_SB_IOSF_TARGET_KX4_UNIPHY 1
-#define IXGBE_SB_IOSF_TARGET_KX4_PCS0 2
-#define IXGBE_SB_IOSF_TARGET_KX4_PCS1 3
+#define IXGBE_SB_IOSF_TARGET_KX4_PHY 1
+#define IXGBE_SB_IOSF_TARGET_KX4_PCS 2
+
+#define IXGBE_NW_MNG_IF_SEL 0x00011178
+#define IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE (1 << 24)
#endif /* _IXGBE_TYPE_H_ */
diff --git a/sys/dev/ixgbe/ixgbe_vf.c b/sys/dev/ixgbe/ixgbe_vf.c
index 964514c..c010cf4 100644
--- a/sys/dev/ixgbe/ixgbe_vf.c
+++ b/sys/dev/ixgbe/ixgbe_vf.c
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -669,4 +669,3 @@ int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs,
UNREFERENCED_3PARAMETER(hw, num_tcs, default_tc);
return IXGBE_SUCCESS;
}
-
diff --git a/sys/dev/ixgbe/ixgbe_vf.h b/sys/dev/ixgbe/ixgbe_vf.h
index 8f4d46e..edc8013 100644
--- a/sys/dev/ixgbe/ixgbe_vf.h
+++ b/sys/dev/ixgbe/ixgbe_vf.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/sys/dev/ixgbe/ixgbe_x540.c b/sys/dev/ixgbe/ixgbe_x540.c
index 93c6ca4..ddf0674 100644
--- a/sys/dev/ixgbe/ixgbe_x540.c
+++ b/sys/dev/ixgbe/ixgbe_x540.c
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -932,14 +932,14 @@ static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw)
/* Release both semaphores by writing 0 to the bits REGSMP and SMBI */
- swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
- swsm &= ~IXGBE_SWSM_SMBI;
- IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm);
-
swsm = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC);
swsm &= ~IXGBE_SWFW_REGSMP;
IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swsm);
+ swsm = IXGBE_READ_REG(hw, IXGBE_SWSM);
+ swsm &= ~IXGBE_SWSM_SMBI;
+ IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm);
+
IXGBE_WRITE_FLUSH(hw);
}
@@ -1011,5 +1011,3 @@ s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index)
return IXGBE_SUCCESS;
}
-
-
diff --git a/sys/dev/ixgbe/ixgbe_x540.h b/sys/dev/ixgbe/ixgbe_x540.h
index 12da827..efd0d41 100644
--- a/sys/dev/ixgbe/ixgbe_x540.h
+++ b/sys/dev/ixgbe/ixgbe_x540.h
@@ -1,6 +1,6 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/sys/dev/ixgbe/ixgbe_x550.c b/sys/dev/ixgbe/ixgbe_x550.c
new file mode 100644
index 0000000..65daa17
--- /dev/null
+++ b/sys/dev/ixgbe/ixgbe_x550.c
@@ -0,0 +1,3191 @@
+/******************************************************************************
+
+ Copyright (c) 2001-2015, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+/*$FreeBSD$*/
+
+#include "ixgbe_x550.h"
+#include "ixgbe_x540.h"
+#include "ixgbe_type.h"
+#include "ixgbe_api.h"
+#include "ixgbe_common.h"
+#include "ixgbe_phy.h"
+
+static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed);
+
+/**
+ * ixgbe_init_ops_X550 - Inits func ptrs and MAC type
+ * @hw: pointer to hardware structure
+ *
+ * Initialize the function pointers and assign the MAC type for X550.
+ * Does not touch the hardware.
+ **/
+s32 ixgbe_init_ops_X550(struct ixgbe_hw *hw)
+{
+ struct ixgbe_mac_info *mac = &hw->mac;
+ struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+ s32 ret_val;
+
+ DEBUGFUNC("ixgbe_init_ops_X550");
+
+ ret_val = ixgbe_init_ops_X540(hw);
+ mac->ops.dmac_config = ixgbe_dmac_config_X550;
+ mac->ops.dmac_config_tcs = ixgbe_dmac_config_tcs_X550;
+ mac->ops.dmac_update_tcs = ixgbe_dmac_update_tcs_X550;
+ mac->ops.setup_eee = ixgbe_setup_eee_X550;
+ mac->ops.set_source_address_pruning =
+ ixgbe_set_source_address_pruning_X550;
+ mac->ops.set_ethertype_anti_spoofing =
+ ixgbe_set_ethertype_anti_spoofing_X550;
+
+ mac->ops.get_rtrup2tc = ixgbe_dcb_get_rtrup2tc_generic;
+ eeprom->ops.init_params = ixgbe_init_eeprom_params_X550;
+ eeprom->ops.calc_checksum = ixgbe_calc_eeprom_checksum_X550;
+ eeprom->ops.read = ixgbe_read_ee_hostif_X550;
+ eeprom->ops.read_buffer = ixgbe_read_ee_hostif_buffer_X550;
+ eeprom->ops.write = ixgbe_write_ee_hostif_X550;
+ eeprom->ops.write_buffer = ixgbe_write_ee_hostif_buffer_X550;
+ eeprom->ops.update_checksum = ixgbe_update_eeprom_checksum_X550;
+ eeprom->ops.validate_checksum = ixgbe_validate_eeprom_checksum_X550;
+
+ mac->ops.disable_mdd = ixgbe_disable_mdd_X550;
+ mac->ops.enable_mdd = ixgbe_enable_mdd_X550;
+ mac->ops.mdd_event = ixgbe_mdd_event_X550;
+ mac->ops.restore_mdd_vf = ixgbe_restore_mdd_vf_X550;
+ mac->ops.disable_rx = ixgbe_disable_rx_x550;
+ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) {
+ hw->mac.ops.led_on = ixgbe_led_on_t_X550em;
+ hw->mac.ops.led_off = ixgbe_led_off_t_X550em;
+ }
+ return ret_val;
+}
+
+/**
+ * ixgbe_read_cs4227 - Read CS4227 register
+ * @hw: pointer to hardware structure
+ * @reg: register number to write
+ * @value: pointer to receive value read
+ *
+ * Returns status code
+ **/
+static s32 ixgbe_read_cs4227(struct ixgbe_hw *hw, u16 reg, u16 *value)
+{
+ return ixgbe_read_i2c_combined_unlocked(hw, IXGBE_CS4227, reg, value);
+}
+
+/**
+ * ixgbe_write_cs4227 - Write CS4227 register
+ * @hw: pointer to hardware structure
+ * @reg: register number to write
+ * @value: value to write to register
+ *
+ * Returns status code
+ **/
+static s32 ixgbe_write_cs4227(struct ixgbe_hw *hw, u16 reg, u16 value)
+{
+ return ixgbe_write_i2c_combined_unlocked(hw, IXGBE_CS4227, reg, value);
+}
+
+/**
+ * ixgbe_get_cs4227_status - Return CS4227 status
+ * @hw: pointer to hardware structure
+ *
+ * Returns error if CS4227 not successfully initialized
+ **/
+static s32 ixgbe_get_cs4227_status(struct ixgbe_hw *hw)
+{
+ s32 status;
+ u16 value = 0;
+ u16 reg_slice, reg_val;
+ u8 retry;
+
+ for (retry = 0; retry < IXGBE_CS4227_RETRIES; ++retry) {
+ status = ixgbe_read_cs4227(hw, IXGBE_CS4227_GLOBAL_ID_LSB,
+ &value);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ if (value == IXGBE_CS4227_GLOBAL_ID_VALUE)
+ break;
+ msec_delay(IXGBE_CS4227_CHECK_DELAY);
+ }
+ if (value != IXGBE_CS4227_GLOBAL_ID_VALUE)
+ return IXGBE_ERR_PHY;
+
+ status = ixgbe_read_cs4227(hw, IXGBE_CS4227_SCRATCH, &value);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* If this is the first time after power-on, check the ucode.
+ * Otherwise, this will disrupt link on all ports. Because we
+ * can only do this the first time, we must check all ports,
+ * not just our own.
+ */
+ if (value != IXGBE_CS4227_SCRATCH_VALUE) {
+ reg_slice = IXGBE_CS4227_LINE_SPARE24_LSB;
+ reg_val = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 0x1;
+ status = ixgbe_write_cs4227(hw, reg_slice,
+ reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ reg_slice = IXGBE_CS4227_HOST_SPARE24_LSB;
+ reg_val = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 0x1;
+ status = ixgbe_write_cs4227(hw, reg_slice,
+ reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ reg_slice = IXGBE_CS4227_LINE_SPARE24_LSB + (1 << 12);
+ reg_val = (IXGBE_CS4227_EDC_MODE_SR << 1) | 0x1;
+ status = ixgbe_write_cs4227(hw, reg_slice,
+ reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ reg_slice = IXGBE_CS4227_HOST_SPARE24_LSB + (1 << 12);
+ reg_val = (IXGBE_CS4227_EDC_MODE_SR << 1) | 0x1;
+ status = ixgbe_write_cs4227(hw, reg_slice,
+ reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ msec_delay(10);
+ }
+
+ /* Verify that the ucode is operational on all ports. */
+ reg_slice = IXGBE_CS4227_LINE_SPARE24_LSB;
+ reg_val = 0xFFFF;
+ status = ixgbe_read_cs4227(hw, reg_slice, &reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ if (reg_val != 0)
+ return IXGBE_ERR_PHY;
+
+ reg_slice = IXGBE_CS4227_HOST_SPARE24_LSB;
+ reg_val = 0xFFFF;
+ status = ixgbe_read_cs4227(hw, reg_slice, &reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ if (reg_val != 0)
+ return IXGBE_ERR_PHY;
+
+ reg_slice = IXGBE_CS4227_LINE_SPARE24_LSB + (1 << 12);
+ reg_val = 0xFFFF;
+ status = ixgbe_read_cs4227(hw, reg_slice, &reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ if (reg_val != 0)
+ return IXGBE_ERR_PHY;
+
+ reg_slice = IXGBE_CS4227_HOST_SPARE24_LSB + (1 << 12);
+ reg_val = 0xFFFF;
+ status = ixgbe_read_cs4227(hw, reg_slice, &reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ if (reg_val != 0)
+ return IXGBE_ERR_PHY;
+
+ /* Set scratch for next time. */
+ status = ixgbe_write_cs4227(hw, IXGBE_CS4227_SCRATCH,
+ IXGBE_CS4227_SCRATCH_VALUE);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ status = ixgbe_read_cs4227(hw, IXGBE_CS4227_SCRATCH, &value);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ if (value != IXGBE_CS4227_SCRATCH_VALUE)
+ return IXGBE_ERR_PHY;
+
+ return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbe_read_pe - Read register from port expander
+ * @hw: pointer to hardware structure
+ * @reg: register number to read
+ * @value: pointer to receive read value
+ *
+ * Returns status code
+ **/
+static s32 ixgbe_read_pe(struct ixgbe_hw *hw, u8 reg, u8 *value)
+{
+ s32 status;
+
+ status = ixgbe_read_i2c_byte_unlocked(hw, reg, IXGBE_PE, value);
+ if (status != IXGBE_SUCCESS)
+ ERROR_REPORT2(IXGBE_ERROR_CAUTION,
+ "port expander access failed with %d\n", status);
+ return status;
+}
+
+/**
+ * ixgbe_write_pe - Write register to port expander
+ * @hw: pointer to hardware structure
+ * @reg: register number to write
+ * @value: value to write
+ *
+ * Returns status code
+ **/
+static s32 ixgbe_write_pe(struct ixgbe_hw *hw, u8 reg, u8 value)
+{
+ s32 status;
+
+ status = ixgbe_write_i2c_byte_unlocked(hw, reg, IXGBE_PE, value);
+ if (status != IXGBE_SUCCESS)
+ ERROR_REPORT2(IXGBE_ERROR_CAUTION,
+ "port expander access failed with %d\n", status);
+ return status;
+}
+
+/**
+ * ixgbe_reset_cs4227 - Reset CS4227 using port expander
+ * @hw: pointer to hardware structure
+ *
+ * Returns error code
+ **/
+static s32 ixgbe_reset_cs4227(struct ixgbe_hw *hw)
+{
+ s32 status;
+ u8 reg;
+
+ status = ixgbe_read_pe(hw, IXGBE_PE_OUTPUT, &reg);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ reg |= IXGBE_PE_BIT1;
+ status = ixgbe_write_pe(hw, IXGBE_PE_OUTPUT, reg);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ status = ixgbe_read_pe(hw, IXGBE_PE_CONFIG, &reg);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ reg &= ~IXGBE_PE_BIT1;
+ status = ixgbe_write_pe(hw, IXGBE_PE_CONFIG, reg);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ status = ixgbe_read_pe(hw, IXGBE_PE_OUTPUT, &reg);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ reg &= ~IXGBE_PE_BIT1;
+ status = ixgbe_write_pe(hw, IXGBE_PE_OUTPUT, reg);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ usec_delay(IXGBE_CS4227_RESET_HOLD);
+
+ status = ixgbe_read_pe(hw, IXGBE_PE_OUTPUT, &reg);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ reg |= IXGBE_PE_BIT1;
+ status = ixgbe_write_pe(hw, IXGBE_PE_OUTPUT, reg);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ msec_delay(IXGBE_CS4227_RESET_DELAY);
+
+ return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbe_check_cs4227 - Check CS4227 and reset as needed
+ * @hw: pointer to hardware structure
+ **/
+static void ixgbe_check_cs4227(struct ixgbe_hw *hw)
+{
+ u32 swfw_mask = hw->phy.phy_semaphore_mask;
+ s32 status;
+ u8 retry;
+
+ for (retry = 0; retry < IXGBE_CS4227_RETRIES; retry++) {
+ status = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask);
+ if (status != IXGBE_SUCCESS) {
+ ERROR_REPORT2(IXGBE_ERROR_CAUTION,
+ "semaphore failed with %d\n", status);
+ return;
+ }
+ status = ixgbe_get_cs4227_status(hw);
+ if (status == IXGBE_SUCCESS) {
+ hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+ msec_delay(hw->eeprom.semaphore_delay);
+ return;
+ }
+ ixgbe_reset_cs4227(hw);
+ hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+ msec_delay(hw->eeprom.semaphore_delay);
+ }
+ ERROR_REPORT2(IXGBE_ERROR_CAUTION,
+ "Unable to initialize CS4227, err=%d\n", status);
+}
+
+/**
+ * ixgbe_setup_mux_ctl - Setup ESDP register for I2C mux control
+ * @hw: pointer to hardware structure
+ **/
+static void ixgbe_setup_mux_ctl(struct ixgbe_hw *hw)
+{
+ u32 esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+
+ if (hw->bus.lan_id) {
+ esdp &= ~(IXGBE_ESDP_SDP1_NATIVE | IXGBE_ESDP_SDP1);
+ esdp |= IXGBE_ESDP_SDP1_DIR;
+ }
+ esdp &= ~(IXGBE_ESDP_SDP0_NATIVE | IXGBE_ESDP_SDP0_DIR);
+ IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+ IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_identify_phy_x550em - Get PHY type based on device id
+ * @hw: pointer to hardware structure
+ *
+ * Returns error code
+ */
+static s32 ixgbe_identify_phy_x550em(struct ixgbe_hw *hw)
+{
+ switch (hw->device_id) {
+ case IXGBE_DEV_ID_X550EM_X_SFP:
+ /* set up for CS4227 usage */
+ hw->phy.phy_semaphore_mask = IXGBE_GSSR_SHARED_I2C_SM;
+ ixgbe_setup_mux_ctl(hw);
+ ixgbe_check_cs4227(hw);
+
+ return ixgbe_identify_module_generic(hw);
+ break;
+ case IXGBE_DEV_ID_X550EM_X_KX4:
+ hw->phy.type = ixgbe_phy_x550em_kx4;
+ break;
+ case IXGBE_DEV_ID_X550EM_X_KR:
+ hw->phy.type = ixgbe_phy_x550em_kr;
+ break;
+ case IXGBE_DEV_ID_X550EM_X_1G_T:
+ case IXGBE_DEV_ID_X550EM_X_10G_T:
+ return ixgbe_identify_phy_generic(hw);
+ default:
+ break;
+ }
+ return IXGBE_SUCCESS;
+}
+
+static s32 ixgbe_read_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr,
+ u32 device_type, u16 *phy_data)
+{
+ UNREFERENCED_4PARAMETER(*hw, reg_addr, device_type, *phy_data);
+ return IXGBE_NOT_IMPLEMENTED;
+}
+
+static s32 ixgbe_write_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr,
+ u32 device_type, u16 phy_data)
+{
+ UNREFERENCED_4PARAMETER(*hw, reg_addr, device_type, phy_data);
+ return IXGBE_NOT_IMPLEMENTED;
+}
+
+/**
+* ixgbe_init_ops_X550EM - Inits func ptrs and MAC type
+* @hw: pointer to hardware structure
+*
+* Initialize the function pointers and for MAC type X550EM.
+* Does not touch the hardware.
+**/
+s32 ixgbe_init_ops_X550EM(struct ixgbe_hw *hw)
+{
+ struct ixgbe_mac_info *mac = &hw->mac;
+ struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+ struct ixgbe_phy_info *phy = &hw->phy;
+ s32 ret_val;
+
+ DEBUGFUNC("ixgbe_init_ops_X550EM");
+
+ /* Similar to X550 so start there. */
+ ret_val = ixgbe_init_ops_X550(hw);
+
+ /* Since this function eventually calls
+ * ixgbe_init_ops_540 by design, we are setting
+ * the pointers to NULL explicitly here to overwrite
+ * the values being set in the x540 function.
+ */
+
+ /* FCOE not supported in x550EM */
+ mac->ops.get_san_mac_addr = NULL;
+ mac->ops.set_san_mac_addr = NULL;
+ mac->ops.get_wwn_prefix = NULL;
+ mac->ops.get_fcoe_boot_status = NULL;
+
+ /* IPsec not supported in x550EM */
+ mac->ops.disable_sec_rx_path = NULL;
+ mac->ops.enable_sec_rx_path = NULL;
+
+ /* AUTOC register is not present in x550EM. */
+ mac->ops.prot_autoc_read = NULL;
+ mac->ops.prot_autoc_write = NULL;
+
+ /* X550EM bus type is internal*/
+ hw->bus.type = ixgbe_bus_type_internal;
+ mac->ops.get_bus_info = ixgbe_get_bus_info_X550em;
+
+ mac->ops.read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550;
+ mac->ops.write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550;
+ mac->ops.get_media_type = ixgbe_get_media_type_X550em;
+ mac->ops.setup_sfp = ixgbe_setup_sfp_modules_X550em;
+ mac->ops.get_link_capabilities = ixgbe_get_link_capabilities_X550em;
+ mac->ops.reset_hw = ixgbe_reset_hw_X550em;
+ mac->ops.get_supported_physical_layer =
+ ixgbe_get_supported_physical_layer_X550em;
+
+ if (mac->ops.get_media_type(hw) == ixgbe_media_type_copper)
+ mac->ops.setup_fc = ixgbe_setup_fc_generic;
+ else
+ mac->ops.setup_fc = ixgbe_setup_fc_X550em;
+
+ mac->ops.acquire_swfw_sync = ixgbe_acquire_swfw_sync_X550em;
+ mac->ops.release_swfw_sync = ixgbe_release_swfw_sync_X550em;
+
+ if (hw->device_id != IXGBE_DEV_ID_X550EM_X_KR)
+ mac->ops.setup_eee = NULL;
+
+ /* PHY */
+ phy->ops.init = ixgbe_init_phy_ops_X550em;
+ phy->ops.identify = ixgbe_identify_phy_x550em;
+ if (mac->ops.get_media_type(hw) != ixgbe_media_type_copper)
+ phy->ops.set_phy_power = NULL;
+
+
+ /* EEPROM */
+ eeprom->ops.init_params = ixgbe_init_eeprom_params_X540;
+ eeprom->ops.read = ixgbe_read_ee_hostif_X550;
+ eeprom->ops.read_buffer = ixgbe_read_ee_hostif_buffer_X550;
+ eeprom->ops.write = ixgbe_write_ee_hostif_X550;
+ eeprom->ops.write_buffer = ixgbe_write_ee_hostif_buffer_X550;
+ eeprom->ops.update_checksum = ixgbe_update_eeprom_checksum_X550;
+ eeprom->ops.validate_checksum = ixgbe_validate_eeprom_checksum_X550;
+ eeprom->ops.calc_checksum = ixgbe_calc_eeprom_checksum_X550;
+
+ return ret_val;
+}
+
+/**
+ * ixgbe_dmac_config_X550
+ * @hw: pointer to hardware structure
+ *
+ * Configure DMA coalescing. If enabling dmac, dmac is activated.
+ * When disabling dmac, dmac enable dmac bit is cleared.
+ **/
+s32 ixgbe_dmac_config_X550(struct ixgbe_hw *hw)
+{
+ u32 reg, high_pri_tc;
+
+ DEBUGFUNC("ixgbe_dmac_config_X550");
+
+ /* Disable DMA coalescing before configuring */
+ reg = IXGBE_READ_REG(hw, IXGBE_DMACR);
+ reg &= ~IXGBE_DMACR_DMAC_EN;
+ IXGBE_WRITE_REG(hw, IXGBE_DMACR, reg);
+
+ /* Disable DMA Coalescing if the watchdog timer is 0 */
+ if (!hw->mac.dmac_config.watchdog_timer)
+ goto out;
+
+ ixgbe_dmac_config_tcs_X550(hw);
+
+ /* Configure DMA Coalescing Control Register */
+ reg = IXGBE_READ_REG(hw, IXGBE_DMACR);
+
+ /* Set the watchdog timer in units of 40.96 usec */
+ reg &= ~IXGBE_DMACR_DMACWT_MASK;
+ reg |= (hw->mac.dmac_config.watchdog_timer * 100) / 4096;
+
+ reg &= ~IXGBE_DMACR_HIGH_PRI_TC_MASK;
+ /* If fcoe is enabled, set high priority traffic class */
+ if (hw->mac.dmac_config.fcoe_en) {
+ high_pri_tc = 1 << hw->mac.dmac_config.fcoe_tc;
+ reg |= ((high_pri_tc << IXGBE_DMACR_HIGH_PRI_TC_SHIFT) &
+ IXGBE_DMACR_HIGH_PRI_TC_MASK);
+ }
+ reg |= IXGBE_DMACR_EN_MNG_IND;
+
+ /* Enable DMA coalescing after configuration */
+ reg |= IXGBE_DMACR_DMAC_EN;
+ IXGBE_WRITE_REG(hw, IXGBE_DMACR, reg);
+
+out:
+ return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbe_dmac_config_tcs_X550
+ * @hw: pointer to hardware structure
+ *
+ * Configure DMA coalescing threshold per TC. The dmac enable bit must
+ * be cleared before configuring.
+ **/
+s32 ixgbe_dmac_config_tcs_X550(struct ixgbe_hw *hw)
+{
+ u32 tc, reg, pb_headroom, rx_pb_size, maxframe_size_kb;
+
+ DEBUGFUNC("ixgbe_dmac_config_tcs_X550");
+
+ /* Configure DMA coalescing enabled */
+ switch (hw->mac.dmac_config.link_speed) {
+ case IXGBE_LINK_SPEED_100_FULL:
+ pb_headroom = IXGBE_DMACRXT_100M;
+ break;
+ case IXGBE_LINK_SPEED_1GB_FULL:
+ pb_headroom = IXGBE_DMACRXT_1G;
+ break;
+ default:
+ pb_headroom = IXGBE_DMACRXT_10G;
+ break;
+ }
+
+ maxframe_size_kb = ((IXGBE_READ_REG(hw, IXGBE_MAXFRS) >>
+ IXGBE_MHADD_MFS_SHIFT) / 1024);
+
+ /* Set the per Rx packet buffer receive threshold */
+ for (tc = 0; tc < IXGBE_DCB_MAX_TRAFFIC_CLASS; tc++) {
+ reg = IXGBE_READ_REG(hw, IXGBE_DMCTH(tc));
+ reg &= ~IXGBE_DMCTH_DMACRXT_MASK;
+
+ if (tc < hw->mac.dmac_config.num_tcs) {
+ /* Get Rx PB size */
+ rx_pb_size = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(tc));
+ rx_pb_size = (rx_pb_size & IXGBE_RXPBSIZE_MASK) >>
+ IXGBE_RXPBSIZE_SHIFT;
+
+ /* Calculate receive buffer threshold in kilobytes */
+ if (rx_pb_size > pb_headroom)
+ rx_pb_size = rx_pb_size - pb_headroom;
+ else
+ rx_pb_size = 0;
+
+ /* Minimum of MFS shall be set for DMCTH */
+ reg |= (rx_pb_size > maxframe_size_kb) ?
+ rx_pb_size : maxframe_size_kb;
+ }
+ IXGBE_WRITE_REG(hw, IXGBE_DMCTH(tc), reg);
+ }
+ return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbe_dmac_update_tcs_X550
+ * @hw: pointer to hardware structure
+ *
+ * Disables dmac, updates per TC settings, and then enables dmac.
+ **/
+s32 ixgbe_dmac_update_tcs_X550(struct ixgbe_hw *hw)
+{
+ u32 reg;
+
+ DEBUGFUNC("ixgbe_dmac_update_tcs_X550");
+
+ /* Disable DMA coalescing before configuring */
+ reg = IXGBE_READ_REG(hw, IXGBE_DMACR);
+ reg &= ~IXGBE_DMACR_DMAC_EN;
+ IXGBE_WRITE_REG(hw, IXGBE_DMACR, reg);
+
+ ixgbe_dmac_config_tcs_X550(hw);
+
+ /* Enable DMA coalescing after configuration */
+ reg = IXGBE_READ_REG(hw, IXGBE_DMACR);
+ reg |= IXGBE_DMACR_DMAC_EN;
+ IXGBE_WRITE_REG(hw, IXGBE_DMACR, reg);
+
+ return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbe_init_eeprom_params_X550 - Initialize EEPROM params
+ * @hw: pointer to hardware structure
+ *
+ * Initializes the EEPROM parameters ixgbe_eeprom_info within the
+ * ixgbe_hw struct in order to set up EEPROM access.
+ **/
+s32 ixgbe_init_eeprom_params_X550(struct ixgbe_hw *hw)
+{
+ struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+ u32 eec;
+ u16 eeprom_size;
+
+ DEBUGFUNC("ixgbe_init_eeprom_params_X550");
+
+ if (eeprom->type == ixgbe_eeprom_uninitialized) {
+ eeprom->semaphore_delay = 10;
+ eeprom->type = ixgbe_flash;
+
+ eec = IXGBE_READ_REG(hw, IXGBE_EEC);
+ eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >>
+ IXGBE_EEC_SIZE_SHIFT);
+ eeprom->word_size = 1 << (eeprom_size +
+ IXGBE_EEPROM_WORD_SIZE_SHIFT);
+
+ DEBUGOUT2("Eeprom params: type = %d, size = %d\n",
+ eeprom->type, eeprom->word_size);
+ }
+
+ return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbe_setup_eee_X550 - Enable/disable EEE support
+ * @hw: pointer to the HW structure
+ * @enable_eee: boolean flag to enable EEE
+ *
+ * Enable/disable EEE based on enable_eee flag.
+ * Auto-negotiation must be started after BASE-T EEE bits in PHY register 7.3C
+ * are modified.
+ *
+ **/
+s32 ixgbe_setup_eee_X550(struct ixgbe_hw *hw, bool enable_eee)
+{
+ u32 eeer;
+ u16 autoneg_eee_reg;
+ u32 link_reg;
+ s32 status;
+ u32 fuse;
+
+ DEBUGFUNC("ixgbe_setup_eee_X550");
+
+ eeer = IXGBE_READ_REG(hw, IXGBE_EEER);
+ /* Enable or disable EEE per flag */
+ if (enable_eee) {
+ eeer |= (IXGBE_EEER_TX_LPI_EN | IXGBE_EEER_RX_LPI_EN);
+
+ if (hw->device_id == IXGBE_DEV_ID_X550T) {
+ /* Advertise EEE capability */
+ hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_EEE_ADVT,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_eee_reg);
+
+ autoneg_eee_reg |= (IXGBE_AUTO_NEG_10GBASE_EEE_ADVT |
+ IXGBE_AUTO_NEG_1000BASE_EEE_ADVT |
+ IXGBE_AUTO_NEG_100BASE_EEE_ADVT);
+
+ hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_EEE_ADVT,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_eee_reg);
+ } else if (hw->device_id == IXGBE_DEV_ID_X550EM_X_KR) {
+ /* Not supported on first revision. */
+ fuse = IXGBE_READ_REG(hw, IXGBE_FUSES0_GROUP(0));
+ if (!(fuse & IXGBE_FUSES0_REV1))
+ return IXGBE_SUCCESS;
+
+ status = ixgbe_read_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, &link_reg);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ link_reg |= IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KR |
+ IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KX;
+
+ /* Don't advertise FEC capability when EEE enabled. */
+ link_reg &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_FEC;
+
+ status = ixgbe_write_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, link_reg);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ }
+ } else {
+ eeer &= ~(IXGBE_EEER_TX_LPI_EN | IXGBE_EEER_RX_LPI_EN);
+
+ if (hw->device_id == IXGBE_DEV_ID_X550T) {
+ /* Disable advertised EEE capability */
+ hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_EEE_ADVT,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_eee_reg);
+
+ autoneg_eee_reg &= ~(IXGBE_AUTO_NEG_10GBASE_EEE_ADVT |
+ IXGBE_AUTO_NEG_1000BASE_EEE_ADVT |
+ IXGBE_AUTO_NEG_100BASE_EEE_ADVT);
+
+ hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_EEE_ADVT,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_eee_reg);
+ } else if (hw->device_id == IXGBE_DEV_ID_X550EM_X_KR) {
+ status = ixgbe_read_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, &link_reg);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ link_reg &= ~(IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KR |
+ IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KX);
+
+ /* Advertise FEC capability when EEE is disabled. */
+ link_reg |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_FEC;
+
+ status = ixgbe_write_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, link_reg);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ }
+ }
+ IXGBE_WRITE_REG(hw, IXGBE_EEER, eeer);
+
+ return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbe_set_source_address_pruning_X550 - Enable/Disbale source address pruning
+ * @hw: pointer to hardware structure
+ * @enable: enable or disable source address pruning
+ * @pool: Rx pool to set source address pruning for
+ **/
+void ixgbe_set_source_address_pruning_X550(struct ixgbe_hw *hw, bool enable,
+ unsigned int pool)
+{
+ u64 pfflp;
+
+ /* max rx pool is 63 */
+ if (pool > 63)
+ return;
+
+ pfflp = (u64)IXGBE_READ_REG(hw, IXGBE_PFFLPL);
+ pfflp |= (u64)IXGBE_READ_REG(hw, IXGBE_PFFLPH) << 32;
+
+ if (enable)
+ pfflp |= (1ULL << pool);
+ else
+ pfflp &= ~(1ULL << pool);
+
+ IXGBE_WRITE_REG(hw, IXGBE_PFFLPL, (u32)pfflp);
+ IXGBE_WRITE_REG(hw, IXGBE_PFFLPH, (u32)(pfflp >> 32));
+}
+
+/**
+ * ixgbe_set_ethertype_anti_spoofing_X550 - Enable/Disable Ethertype anti-spoofing
+ * @hw: pointer to hardware structure
+ * @enable: enable or disable switch for Ethertype anti-spoofing
+ * @vf: Virtual Function pool - VF Pool to set for Ethertype anti-spoofing
+ *
+ **/
+void ixgbe_set_ethertype_anti_spoofing_X550(struct ixgbe_hw *hw,
+ bool enable, int vf)
+{
+ int vf_target_reg = vf >> 3;
+ int vf_target_shift = vf % 8 + IXGBE_SPOOF_ETHERTYPEAS_SHIFT;
+ u32 pfvfspoof;
+
+ DEBUGFUNC("ixgbe_set_ethertype_anti_spoofing_X550");
+
+ pfvfspoof = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg));
+ if (enable)
+ pfvfspoof |= (1 << vf_target_shift);
+ else
+ pfvfspoof &= ~(1 << vf_target_shift);
+
+ IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg), pfvfspoof);
+}
+
+/**
+ * ixgbe_iosf_wait - Wait for IOSF command completion
+ * @hw: pointer to hardware structure
+ * @ctrl: pointer to location to receive final IOSF control value
+ *
+ * Returns failing status on timeout
+ *
+ * Note: ctrl can be NULL if the IOSF control register value is not needed
+ **/
+static s32 ixgbe_iosf_wait(struct ixgbe_hw *hw, u32 *ctrl)
+{
+ u32 i, command;
+
+ /* Check every 10 usec to see if the address cycle completed.
+ * The SB IOSF BUSY bit will clear when the operation is
+ * complete
+ */
+ for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) {
+ command = IXGBE_READ_REG(hw, IXGBE_SB_IOSF_INDIRECT_CTRL);
+ if ((command & IXGBE_SB_IOSF_CTRL_BUSY) == 0)
+ break;
+ usec_delay(10);
+ }
+ if (ctrl)
+ *ctrl = command;
+ if (i == IXGBE_MDIO_COMMAND_TIMEOUT) {
+ ERROR_REPORT1(IXGBE_ERROR_POLLING, "Wait timed out\n");
+ return IXGBE_ERR_PHY;
+ }
+
+ return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbe_write_iosf_sb_reg_x550 - Writes a value to specified register of the IOSF
+ * device
+ * @hw: pointer to hardware structure
+ * @reg_addr: 32 bit PHY register to write
+ * @device_type: 3 bit device type
+ * @data: Data to write to the register
+ **/
+s32 ixgbe_write_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr,
+ u32 device_type, u32 data)
+{
+ u32 gssr = IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_PHY0_SM;
+ u32 command, error;
+ s32 ret;
+
+ ret = ixgbe_acquire_swfw_semaphore(hw, gssr);
+ if (ret != IXGBE_SUCCESS)
+ return ret;
+
+ ret = ixgbe_iosf_wait(hw, NULL);
+ if (ret != IXGBE_SUCCESS)
+ goto out;
+
+ command = ((reg_addr << IXGBE_SB_IOSF_CTRL_ADDR_SHIFT) |
+ (device_type << IXGBE_SB_IOSF_CTRL_TARGET_SELECT_SHIFT));
+
+ /* Write IOSF control register */
+ IXGBE_WRITE_REG(hw, IXGBE_SB_IOSF_INDIRECT_CTRL, command);
+
+ /* Write IOSF data register */
+ IXGBE_WRITE_REG(hw, IXGBE_SB_IOSF_INDIRECT_DATA, data);
+
+ ret = ixgbe_iosf_wait(hw, &command);
+
+ if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) {
+ error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >>
+ IXGBE_SB_IOSF_CTRL_CMPL_ERR_SHIFT;
+ ERROR_REPORT2(IXGBE_ERROR_POLLING,
+ "Failed to write, error %x\n", error);
+ ret = IXGBE_ERR_PHY;
+ }
+
+out:
+ ixgbe_release_swfw_semaphore(hw, gssr);
+ return ret;
+}
+
+/**
+ * ixgbe_read_iosf_sb_reg_x550 - Writes a value to specified register of the IOSF
+ * device
+ * @hw: pointer to hardware structure
+ * @reg_addr: 32 bit PHY register to write
+ * @device_type: 3 bit device type
+ * @phy_data: Pointer to read data from the register
+ **/
+s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr,
+ u32 device_type, u32 *data)
+{
+ u32 gssr = IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_PHY0_SM;
+ u32 command, error;
+ s32 ret;
+
+ ret = ixgbe_acquire_swfw_semaphore(hw, gssr);
+ if (ret != IXGBE_SUCCESS)
+ return ret;
+
+ ret = ixgbe_iosf_wait(hw, NULL);
+ if (ret != IXGBE_SUCCESS)
+ goto out;
+
+ command = ((reg_addr << IXGBE_SB_IOSF_CTRL_ADDR_SHIFT) |
+ (device_type << IXGBE_SB_IOSF_CTRL_TARGET_SELECT_SHIFT));
+
+ /* Write IOSF control register */
+ IXGBE_WRITE_REG(hw, IXGBE_SB_IOSF_INDIRECT_CTRL, command);
+
+ ret = ixgbe_iosf_wait(hw, &command);
+
+ if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) {
+ error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >>
+ IXGBE_SB_IOSF_CTRL_CMPL_ERR_SHIFT;
+ ERROR_REPORT2(IXGBE_ERROR_POLLING,
+ "Failed to read, error %x\n", error);
+ ret = IXGBE_ERR_PHY;
+ }
+
+ if (ret == IXGBE_SUCCESS)
+ *data = IXGBE_READ_REG(hw, IXGBE_SB_IOSF_INDIRECT_DATA);
+
+out:
+ ixgbe_release_swfw_semaphore(hw, gssr);
+ return ret;
+}
+
+/**
+ * ixgbe_disable_mdd_X550
+ * @hw: pointer to hardware structure
+ *
+ * Disable malicious driver detection
+ **/
+void ixgbe_disable_mdd_X550(struct ixgbe_hw *hw)
+{
+ u32 reg;
+
+ DEBUGFUNC("ixgbe_disable_mdd_X550");
+
+ /* Disable MDD for TX DMA and interrupt */
+ reg = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
+ reg &= ~(IXGBE_DMATXCTL_MDP_EN | IXGBE_DMATXCTL_MBINTEN);
+ IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, reg);
+
+ /* Disable MDD for RX and interrupt */
+ reg = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
+ reg &= ~(IXGBE_RDRXCTL_MDP_EN | IXGBE_RDRXCTL_MBINTEN);
+ IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, reg);
+}
+
+/**
+ * ixgbe_enable_mdd_X550
+ * @hw: pointer to hardware structure
+ *
+ * Enable malicious driver detection
+ **/
+void ixgbe_enable_mdd_X550(struct ixgbe_hw *hw)
+{
+ u32 reg;
+
+ DEBUGFUNC("ixgbe_enable_mdd_X550");
+
+ /* Enable MDD for TX DMA and interrupt */
+ reg = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
+ reg |= (IXGBE_DMATXCTL_MDP_EN | IXGBE_DMATXCTL_MBINTEN);
+ IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, reg);
+
+ /* Enable MDD for RX and interrupt */
+ reg = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
+ reg |= (IXGBE_RDRXCTL_MDP_EN | IXGBE_RDRXCTL_MBINTEN);
+ IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, reg);
+}
+
+/**
+ * ixgbe_restore_mdd_vf_X550
+ * @hw: pointer to hardware structure
+ * @vf: vf index
+ *
+ * Restore VF that was disabled during malicious driver detection event
+ **/
+void ixgbe_restore_mdd_vf_X550(struct ixgbe_hw *hw, u32 vf)
+{
+ u32 idx, reg, num_qs, start_q, bitmask;
+
+ DEBUGFUNC("ixgbe_restore_mdd_vf_X550");
+
+ /* Map VF to queues */
+ reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
+ switch (reg & IXGBE_MRQC_MRQE_MASK) {
+ case IXGBE_MRQC_VMDQRT8TCEN:
+ num_qs = 8; /* 16 VFs / pools */
+ bitmask = 0x000000FF;
+ break;
+ case IXGBE_MRQC_VMDQRSS32EN:
+ case IXGBE_MRQC_VMDQRT4TCEN:
+ num_qs = 4; /* 32 VFs / pools */
+ bitmask = 0x0000000F;
+ break;
+ default: /* 64 VFs / pools */
+ num_qs = 2;
+ bitmask = 0x00000003;
+ break;
+ }
+ start_q = vf * num_qs;
+
+ /* Release vf's queues by clearing WQBR_TX and WQBR_RX (RW1C) */
+ idx = start_q / 32;
+ reg = 0;
+ reg |= (bitmask << (start_q % 32));
+ IXGBE_WRITE_REG(hw, IXGBE_WQBR_TX(idx), reg);
+ IXGBE_WRITE_REG(hw, IXGBE_WQBR_RX(idx), reg);
+}
+
+/**
+ * ixgbe_mdd_event_X550
+ * @hw: pointer to hardware structure
+ * @vf_bitmap: vf bitmap of malicious vfs
+ *
+ * Handle malicious driver detection event.
+ **/
+void ixgbe_mdd_event_X550(struct ixgbe_hw *hw, u32 *vf_bitmap)
+{
+ u32 wqbr;
+ u32 i, j, reg, q, shift, vf, idx;
+
+ DEBUGFUNC("ixgbe_mdd_event_X550");
+
+ /* figure out pool size for mapping to vf's */
+ reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
+ switch (reg & IXGBE_MRQC_MRQE_MASK) {
+ case IXGBE_MRQC_VMDQRT8TCEN:
+ shift = 3; /* 16 VFs / pools */
+ break;
+ case IXGBE_MRQC_VMDQRSS32EN:
+ case IXGBE_MRQC_VMDQRT4TCEN:
+ shift = 2; /* 32 VFs / pools */
+ break;
+ default:
+ shift = 1; /* 64 VFs / pools */
+ break;
+ }
+
+ /* Read WQBR_TX and WQBR_RX and check for malicious queues */
+ for (i = 0; i < 4; i++) {
+ wqbr = IXGBE_READ_REG(hw, IXGBE_WQBR_TX(i));
+ wqbr |= IXGBE_READ_REG(hw, IXGBE_WQBR_RX(i));
+
+ if (!wqbr)
+ continue;
+
+ /* Get malicious queue */
+ for (j = 0; j < 32 && wqbr; j++) {
+
+ if (!(wqbr & (1 << j)))
+ continue;
+
+ /* Get queue from bitmask */
+ q = j + (i * 32);
+
+ /* Map queue to vf */
+ vf = (q >> shift);
+
+ /* Set vf bit in vf_bitmap */
+ idx = vf / 32;
+ vf_bitmap[idx] |= (1 << (vf % 32));
+ wqbr &= ~(1 << j);
+ }
+ }
+}
+
+/**
+ * ixgbe_get_media_type_X550em - Get media type
+ * @hw: pointer to hardware structure
+ *
+ * Returns the media type (fiber, copper, backplane)
+ */
+enum ixgbe_media_type ixgbe_get_media_type_X550em(struct ixgbe_hw *hw)
+{
+ enum ixgbe_media_type media_type;
+
+ DEBUGFUNC("ixgbe_get_media_type_X550em");
+
+ /* Detect if there is a copper PHY attached. */
+ switch (hw->device_id) {
+ case IXGBE_DEV_ID_X550EM_X_KR:
+ case IXGBE_DEV_ID_X550EM_X_KX4:
+ media_type = ixgbe_media_type_backplane;
+ break;
+ case IXGBE_DEV_ID_X550EM_X_SFP:
+ media_type = ixgbe_media_type_fiber;
+ break;
+ case IXGBE_DEV_ID_X550EM_X_1G_T:
+ case IXGBE_DEV_ID_X550EM_X_10G_T:
+ media_type = ixgbe_media_type_copper;
+ break;
+ default:
+ media_type = ixgbe_media_type_unknown;
+ break;
+ }
+ return media_type;
+}
+
+/**
+ * ixgbe_supported_sfp_modules_X550em - Check if SFP module type is supported
+ * @hw: pointer to hardware structure
+ * @linear: TRUE if SFP module is linear
+ */
+static s32 ixgbe_supported_sfp_modules_X550em(struct ixgbe_hw *hw, bool *linear)
+{
+ DEBUGFUNC("ixgbe_supported_sfp_modules_X550em");
+
+ switch (hw->phy.sfp_type) {
+ case ixgbe_sfp_type_not_present:
+ return IXGBE_ERR_SFP_NOT_PRESENT;
+ case ixgbe_sfp_type_da_cu_core0:
+ case ixgbe_sfp_type_da_cu_core1:
+ *linear = TRUE;
+ break;
+ case ixgbe_sfp_type_srlr_core0:
+ case ixgbe_sfp_type_srlr_core1:
+ case ixgbe_sfp_type_da_act_lmt_core0:
+ case ixgbe_sfp_type_da_act_lmt_core1:
+ case ixgbe_sfp_type_1g_sx_core0:
+ case ixgbe_sfp_type_1g_sx_core1:
+ case ixgbe_sfp_type_1g_lx_core0:
+ case ixgbe_sfp_type_1g_lx_core1:
+ *linear = FALSE;
+ break;
+ case ixgbe_sfp_type_unknown:
+ case ixgbe_sfp_type_1g_cu_core0:
+ case ixgbe_sfp_type_1g_cu_core1:
+ default:
+ return IXGBE_ERR_SFP_NOT_SUPPORTED;
+ }
+
+ return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbe_identify_sfp_module_X550em - Identifies SFP modules
+ * @hw: pointer to hardware structure
+ *
+ * Searches for and identifies the SFP module and assigns appropriate PHY type.
+ **/
+s32 ixgbe_identify_sfp_module_X550em(struct ixgbe_hw *hw)
+{
+ s32 status;
+ bool linear;
+
+ DEBUGFUNC("ixgbe_identify_sfp_module_X550em");
+
+ status = ixgbe_identify_module_generic(hw);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* Check if SFP module is supported */
+ status = ixgbe_supported_sfp_modules_X550em(hw, &linear);
+
+ return status;
+}
+
+/**
+ * ixgbe_setup_sfp_modules_X550em - Setup MAC link ops
+ * @hw: pointer to hardware structure
+ */
+s32 ixgbe_setup_sfp_modules_X550em(struct ixgbe_hw *hw)
+{
+ s32 status;
+ bool linear;
+
+ DEBUGFUNC("ixgbe_setup_sfp_modules_X550em");
+
+ /* Check if SFP module is supported */
+ status = ixgbe_supported_sfp_modules_X550em(hw, &linear);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ ixgbe_init_mac_link_ops_X550em(hw);
+ hw->phy.ops.reset = NULL;
+
+ return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbe_init_mac_link_ops_X550em - init mac link function pointers
+ * @hw: pointer to hardware structure
+ */
+void ixgbe_init_mac_link_ops_X550em(struct ixgbe_hw *hw)
+{
+ struct ixgbe_mac_info *mac = &hw->mac;
+
+ DEBUGFUNC("ixgbe_init_mac_link_ops_X550em");
+
+ switch (hw->mac.ops.get_media_type(hw)) {
+ case ixgbe_media_type_fiber:
+ /* CS4227 does not support autoneg, so disable the laser control
+ * functions for SFP+ fiber
+ */
+ mac->ops.disable_tx_laser = NULL;
+ mac->ops.enable_tx_laser = NULL;
+ mac->ops.flap_tx_laser = NULL;
+ mac->ops.setup_link = ixgbe_setup_mac_link_multispeed_fiber;
+ mac->ops.setup_mac_link = ixgbe_setup_mac_link_sfp_x550em;
+ mac->ops.set_rate_select_speed =
+ ixgbe_set_soft_rate_select_speed;
+ break;
+ case ixgbe_media_type_copper:
+ mac->ops.setup_link = ixgbe_setup_mac_link_t_X550em;
+ mac->ops.check_link = ixgbe_check_link_t_X550em;
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * ixgbe_get_link_capabilities_x550em - Determines link capabilities
+ * @hw: pointer to hardware structure
+ * @speed: pointer to link speed
+ * @autoneg: TRUE when autoneg or autotry is enabled
+ */
+s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw,
+ ixgbe_link_speed *speed,
+ bool *autoneg)
+{
+ DEBUGFUNC("ixgbe_get_link_capabilities_X550em");
+
+ /* SFP */
+ if (hw->phy.media_type == ixgbe_media_type_fiber) {
+
+ /* CS4227 SFP must not enable auto-negotiation */
+ *autoneg = FALSE;
+
+ /* Check if 1G SFP module. */
+ if (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1
+ || hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 ||
+ hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1) {
+ *speed = IXGBE_LINK_SPEED_1GB_FULL;
+ return IXGBE_SUCCESS;
+ }
+
+ /* Link capabilities are based on SFP */
+ if (hw->phy.multispeed_fiber)
+ *speed = IXGBE_LINK_SPEED_10GB_FULL |
+ IXGBE_LINK_SPEED_1GB_FULL;
+ else
+ *speed = IXGBE_LINK_SPEED_10GB_FULL;
+ } else {
+ *speed = IXGBE_LINK_SPEED_10GB_FULL |
+ IXGBE_LINK_SPEED_1GB_FULL;
+ *autoneg = TRUE;
+ }
+
+ return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbe_get_lasi_ext_t_x550em - Determime external Base T PHY interrupt cause
+ * @hw: pointer to hardware structure
+ * @lsc: pointer to boolean flag which indicates whether external Base T
+ * PHY interrupt is lsc
+ *
+ * Determime if external Base T PHY interrupt cause is high temperature
+ * failure alarm or link status change.
+ *
+ * Return IXGBE_ERR_OVERTEMP if interrupt is high temperature
+ * failure alarm, else return PHY access status.
+ */
+static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc)
+{
+ u32 status;
+ u16 reg;
+
+ *lsc = FALSE;
+
+ /* Vendor alarm triggered */
+ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_CHIP_STD_INT_FLAG,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+ &reg);
+
+ if (status != IXGBE_SUCCESS ||
+ !(reg & IXGBE_MDIO_GLOBAL_VEN_ALM_INT_EN))
+ return status;
+
+ /* Vendor Auto-Neg alarm triggered or Global alarm 1 triggered */
+ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_FLAG,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+ &reg);
+
+ if (status != IXGBE_SUCCESS ||
+ !(reg & (IXGBE_MDIO_GLOBAL_AN_VEN_ALM_INT_EN |
+ IXGBE_MDIO_GLOBAL_ALARM_1_INT)))
+ return status;
+
+ /* High temperature failure alarm triggered */
+ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_ALARM_1,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+ &reg);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* If high temperature failure, then return over temp error and exit */
+ if (reg & IXGBE_MDIO_GLOBAL_ALM_1_HI_TMP_FAIL) {
+ /* power down the PHY in case the PHY FW didn't already */
+ ixgbe_set_copper_phy_power(hw, FALSE);
+ return IXGBE_ERR_OVERTEMP;
+ }
+
+ /* Vendor alarm 2 triggered */
+ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_CHIP_STD_INT_FLAG,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &reg);
+
+ if (status != IXGBE_SUCCESS ||
+ !(reg & IXGBE_MDIO_GLOBAL_STD_ALM2_INT))
+ return status;
+
+ /* link connect/disconnect event occurred */
+ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM2,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &reg);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* Indicate LSC */
+ if (reg & IXGBE_MDIO_AUTO_NEG_VEN_LSC)
+ *lsc = TRUE;
+
+ return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbe_enable_lasi_ext_t_x550em - Enable external Base T PHY interrupts
+ * @hw: pointer to hardware structure
+ *
+ * Enable link status change and temperature failure alarm for the external
+ * Base T PHY
+ *
+ * Returns PHY access status
+ */
+static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw)
+{
+ u32 status;
+ u16 reg;
+ bool lsc;
+
+ /* Clear interrupt flags */
+ status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc);
+
+ /* Enable link status change alarm */
+ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &reg);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ reg |= IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN;
+
+ status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE, reg);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* Enables high temperature failure alarm */
+ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+ &reg);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ reg |= IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN;
+
+ status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+ reg);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* Enable vendor Auto-Neg alarm and Global Interrupt Mask 1 alarm */
+ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_MASK,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+ &reg);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ reg |= (IXGBE_MDIO_GLOBAL_AN_VEN_ALM_INT_EN |
+ IXGBE_MDIO_GLOBAL_ALARM_1_INT);
+
+ status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_MASK,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+ reg);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* Enable chip-wide vendor alarm */
+ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_STD_MASK,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+ &reg);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ reg |= IXGBE_MDIO_GLOBAL_VEN_ALM_INT_EN;
+
+ status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_STD_MASK,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+ reg);
+
+ return status;
+}
+
+/**
+ * ixgbe_setup_kr_speed_x550em - Configure the KR PHY for link speed.
+ * @hw: pointer to hardware structure
+ * @speed: link speed
+ *
+ * Configures the integrated KR PHY.
+ **/
+static s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed)
+{
+ s32 status;
+ u32 reg_val;
+
+ status = ixgbe_read_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+ if (status)
+ return status;
+
+ reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE;
+ reg_val &= ~(IXGBE_KRM_LINK_CTRL_1_TETH_AN_FEC_REQ |
+ IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_FEC);
+ reg_val &= ~(IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KR |
+ IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KX);
+
+ /* Advertise 10G support. */
+ if (speed & IXGBE_LINK_SPEED_10GB_FULL)
+ reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KR;
+
+ /* Advertise 1G support. */
+ if (speed & IXGBE_LINK_SPEED_1GB_FULL)
+ reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KX;
+
+ /* Restart auto-negotiation. */
+ reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART;
+ status = ixgbe_write_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+
+ return status;
+}
+
+/**
+ * ixgbe_init_phy_ops_X550em - PHY/SFP specific init
+ * @hw: pointer to hardware structure
+ *
+ * Initialize any function pointers that were not able to be
+ * set during init_shared_code because the PHY/SFP type was
+ * not known. Perform the SFP init if necessary.
+ */
+s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw)
+{
+ struct ixgbe_phy_info *phy = &hw->phy;
+ ixgbe_link_speed speed;
+ s32 ret_val;
+
+ DEBUGFUNC("ixgbe_init_phy_ops_X550em");
+
+ hw->mac.ops.set_lan_id(hw);
+
+ if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) {
+ phy->phy_semaphore_mask = IXGBE_GSSR_SHARED_I2C_SM;
+ ixgbe_setup_mux_ctl(hw);
+
+ /* Save NW management interface connected on board. This is used
+ * to determine internal PHY mode.
+ */
+ phy->nw_mng_if_sel = IXGBE_READ_REG(hw, IXGBE_NW_MNG_IF_SEL);
+
+ /* If internal PHY mode is KR, then initialize KR link */
+ if (phy->nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE) {
+ speed = IXGBE_LINK_SPEED_10GB_FULL |
+ IXGBE_LINK_SPEED_1GB_FULL;
+ ret_val = ixgbe_setup_kr_speed_x550em(hw, speed);
+ }
+
+ phy->ops.identify_sfp = ixgbe_identify_sfp_module_X550em;
+ }
+
+ /* Identify the PHY or SFP module */
+ ret_val = phy->ops.identify(hw);
+ if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED)
+ return ret_val;
+
+ /* Setup function pointers based on detected hardware */
+ ixgbe_init_mac_link_ops_X550em(hw);
+ if (phy->sfp_type != ixgbe_sfp_type_unknown)
+ phy->ops.reset = NULL;
+
+ /* Set functions pointers based on phy type */
+ switch (hw->phy.type) {
+ case ixgbe_phy_x550em_kx4:
+ phy->ops.setup_link = ixgbe_setup_kx4_x550em;
+ phy->ops.read_reg = ixgbe_read_phy_reg_x550em;
+ phy->ops.write_reg = ixgbe_write_phy_reg_x550em;
+ break;
+ case ixgbe_phy_x550em_kr:
+ phy->ops.setup_link = ixgbe_setup_kr_x550em;
+ phy->ops.read_reg = ixgbe_read_phy_reg_x550em;
+ phy->ops.write_reg = ixgbe_write_phy_reg_x550em;
+ break;
+ case ixgbe_phy_x550em_ext_t:
+ /* Save NW management interface connected on board. This is used
+ * to determine internal PHY mode
+ */
+ phy->nw_mng_if_sel = IXGBE_READ_REG(hw, IXGBE_NW_MNG_IF_SEL);
+
+ /* If internal link mode is XFI, then setup iXFI internal link,
+ * else setup KR now.
+ */
+ if (!(phy->nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) {
+ phy->ops.setup_internal_link =
+ ixgbe_setup_internal_phy_t_x550em;
+ } else {
+ speed = IXGBE_LINK_SPEED_10GB_FULL |
+ IXGBE_LINK_SPEED_1GB_FULL;
+ ret_val = ixgbe_setup_kr_speed_x550em(hw, speed);
+ }
+
+ phy->ops.enter_lplu = ixgbe_enter_lplu_t_x550em;
+ phy->ops.handle_lasi = ixgbe_handle_lasi_ext_t_x550em;
+ phy->ops.reset = ixgbe_reset_phy_t_X550em;
+ break;
+ default:
+ break;
+ }
+ return ret_val;
+}
+
+/**
+ * ixgbe_reset_hw_X550em - Perform hardware reset
+ * @hw: pointer to hardware structure
+ *
+ * Resets the hardware by resetting the transmit and receive units, masks
+ * and clears all interrupts, perform a PHY reset, and perform a link (MAC)
+ * reset.
+ */
+s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw)
+{
+ ixgbe_link_speed link_speed;
+ s32 status;
+ u32 ctrl = 0;
+ u32 i;
+ u32 hlreg0;
+ bool link_up = FALSE;
+
+ DEBUGFUNC("ixgbe_reset_hw_X550em");
+
+ /* Call adapter stop to disable Tx/Rx and clear interrupts */
+ status = hw->mac.ops.stop_adapter(hw);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* flush pending Tx transactions */
+ ixgbe_clear_tx_pending(hw);
+
+ /* PHY ops must be identified and initialized prior to reset */
+
+ /* Identify PHY and related function pointers */
+ status = hw->phy.ops.init(hw);
+
+ if (status == IXGBE_ERR_SFP_NOT_SUPPORTED)
+ return status;
+
+ /* start the external PHY */
+ if (hw->phy.type == ixgbe_phy_x550em_ext_t) {
+ status = ixgbe_init_ext_t_x550em(hw);
+ if (status)
+ return status;
+ }
+
+ /* Setup SFP module if there is one present. */
+ if (hw->phy.sfp_setup_needed) {
+ status = hw->mac.ops.setup_sfp(hw);
+ hw->phy.sfp_setup_needed = FALSE;
+ }
+
+ if (status == IXGBE_ERR_SFP_NOT_SUPPORTED)
+ return status;
+
+ /* Reset PHY */
+ if (!hw->phy.reset_disable && hw->phy.ops.reset)
+ hw->phy.ops.reset(hw);
+
+mac_reset_top:
+ /* Issue global reset to the MAC. Needs to be SW reset if link is up.
+ * If link reset is used when link is up, it might reset the PHY when
+ * mng is using it. If link is down or the flag to force full link
+ * reset is set, then perform link reset.
+ */
+ ctrl = IXGBE_CTRL_LNK_RST;
+ if (!hw->force_full_reset) {
+ hw->mac.ops.check_link(hw, &link_speed, &link_up, FALSE);
+ if (link_up)
+ ctrl = IXGBE_CTRL_RST;
+ }
+
+ ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL);
+ IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
+ IXGBE_WRITE_FLUSH(hw);
+
+ /* Poll for reset bit to self-clear meaning reset is complete */
+ for (i = 0; i < 10; i++) {
+ usec_delay(1);
+ ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
+ if (!(ctrl & IXGBE_CTRL_RST_MASK))
+ break;
+ }
+
+ if (ctrl & IXGBE_CTRL_RST_MASK) {
+ status = IXGBE_ERR_RESET_FAILED;
+ DEBUGOUT("Reset polling failed to complete.\n");
+ }
+
+ msec_delay(50);
+
+ /* Double resets are required for recovery from certain error
+ * conditions. Between resets, it is necessary to stall to
+ * allow time for any pending HW events to complete.
+ */
+ if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) {
+ hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED;
+ goto mac_reset_top;
+ }
+
+ /* Store the permanent mac address */
+ hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr);
+
+ /* Store MAC address from RAR0, clear receive address registers, and
+ * clear the multicast table. Also reset num_rar_entries to 128,
+ * since we modify this value when programming the SAN MAC address.
+ */
+ hw->mac.num_rar_entries = 128;
+ hw->mac.ops.init_rx_addrs(hw);
+
+ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) {
+ /* Config MDIO clock speed. */
+ hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+ hlreg0 &= ~IXGBE_HLREG0_MDCSPD;
+ IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
+ }
+
+ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP)
+ ixgbe_setup_mux_ctl(hw);
+
+ return status;
+}
+
+/**
+ * ixgbe_init_ext_t_x550em - Start (unstall) the external Base T PHY.
+ * @hw: pointer to hardware structure
+ */
+s32 ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw)
+{
+ u32 status;
+ u16 reg;
+
+ status = hw->phy.ops.read_reg(hw,
+ IXGBE_MDIO_TX_VENDOR_ALARMS_3,
+ IXGBE_MDIO_PMA_PMD_DEV_TYPE,
+ &reg);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* If PHY FW reset completed bit is set then this is the first
+ * SW instance after a power on so the PHY FW must be un-stalled.
+ */
+ if (reg & IXGBE_MDIO_TX_VENDOR_ALARMS_3_RST_MASK) {
+ status = hw->phy.ops.read_reg(hw,
+ IXGBE_MDIO_GLOBAL_RES_PR_10,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+ &reg);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ reg &= ~IXGBE_MDIO_POWER_UP_STALL;
+
+ status = hw->phy.ops.write_reg(hw,
+ IXGBE_MDIO_GLOBAL_RES_PR_10,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+ reg);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_setup_kr_x550em - Configure the KR PHY.
+ * @hw: pointer to hardware structure
+ *
+ * Configures the integrated KR PHY.
+ **/
+s32 ixgbe_setup_kr_x550em(struct ixgbe_hw *hw)
+{
+ return ixgbe_setup_kr_speed_x550em(hw, hw->phy.autoneg_advertised);
+}
+
+/**
+ * ixgbe_setup_kx4_x550em - Configure the KX4 PHY.
+ * @hw: pointer to hardware structure
+ *
+ * Configures the integrated KX4 PHY.
+ **/
+s32 ixgbe_setup_kx4_x550em(struct ixgbe_hw *hw)
+{
+ s32 status;
+ u32 reg_val;
+
+ status = ixgbe_read_iosf_sb_reg_x550(hw, IXGBE_KX4_LINK_CNTL_1,
+ IXGBE_SB_IOSF_TARGET_KX4_PCS, &reg_val);
+ if (status)
+ return status;
+
+ reg_val &= ~(IXGBE_KX4_LINK_CNTL_1_TETH_AN_CAP_KX4 |
+ IXGBE_KX4_LINK_CNTL_1_TETH_AN_CAP_KX);
+
+ reg_val |= IXGBE_KX4_LINK_CNTL_1_TETH_AN_ENABLE;
+
+ /* Advertise 10G support. */
+ if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL)
+ reg_val |= IXGBE_KX4_LINK_CNTL_1_TETH_AN_CAP_KX4;
+
+ /* Advertise 1G support. */
+ if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL)
+ reg_val |= IXGBE_KX4_LINK_CNTL_1_TETH_AN_CAP_KX;
+
+ /* Restart auto-negotiation. */
+ reg_val |= IXGBE_KX4_LINK_CNTL_1_TETH_AN_RESTART;
+ status = ixgbe_write_iosf_sb_reg_x550(hw, IXGBE_KX4_LINK_CNTL_1,
+ IXGBE_SB_IOSF_TARGET_KX4_PCS, reg_val);
+
+ return status;
+}
+
+/**
+ * ixgbe_setup_mac_link_sfp_x550em - Setup internal/external the PHY for SFP
+ * @hw: pointer to hardware structure
+ *
+ * Configure the external PHY and the integrated KR PHY for SFP support.
+ **/
+s32 ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed,
+ bool autoneg_wait_to_complete)
+{
+ s32 ret_val;
+ u16 reg_slice, reg_val;
+ bool setup_linear = FALSE;
+ UNREFERENCED_1PARAMETER(autoneg_wait_to_complete);
+
+ /* Check if SFP module is supported and linear */
+ ret_val = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear);
+
+ /* If no SFP module present, then return success. Return success since
+ * there is no reason to configure CS4227 and SFP not present error is
+ * not excepted in the setup MAC link flow.
+ */
+ if (ret_val == IXGBE_ERR_SFP_NOT_PRESENT)
+ return IXGBE_SUCCESS;
+
+ if (ret_val != IXGBE_SUCCESS)
+ return ret_val;
+
+ /* Configure CS4227 for LINE connection rate then type. */
+ reg_slice = IXGBE_CS4227_LINE_SPARE22_MSB + (hw->bus.lan_id << 12);
+ reg_val = (speed & IXGBE_LINK_SPEED_10GB_FULL) ? 0 : 0x8000;
+ ret_val = ixgbe_write_i2c_combined(hw, IXGBE_CS4227, reg_slice,
+ reg_val);
+
+ reg_slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12);
+ if (setup_linear)
+ reg_val = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 0x1;
+ else
+ reg_val = (IXGBE_CS4227_EDC_MODE_SR << 1) | 0x1;
+ ret_val = ixgbe_write_i2c_combined(hw, IXGBE_CS4227, reg_slice,
+ reg_val);
+
+ /* Configure CS4227 for HOST connection rate then type. */
+ reg_slice = IXGBE_CS4227_HOST_SPARE22_MSB + (hw->bus.lan_id << 12);
+ reg_val = (speed & IXGBE_LINK_SPEED_10GB_FULL) ? 0 : 0x8000;
+ ret_val = ixgbe_write_i2c_combined(hw, IXGBE_CS4227, reg_slice,
+ reg_val);
+
+ reg_slice = IXGBE_CS4227_HOST_SPARE24_LSB + (hw->bus.lan_id << 12);
+ if (setup_linear)
+ reg_val = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 0x1;
+ else
+ reg_val = (IXGBE_CS4227_EDC_MODE_SR << 1) | 0x1;
+ ret_val = ixgbe_write_i2c_combined(hw, IXGBE_CS4227, reg_slice,
+ reg_val);
+
+ /* If internal link mode is XFI, then setup XFI internal link. */
+ if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE))
+ ret_val = ixgbe_setup_ixfi_x550em(hw, &speed);
+
+ return ret_val;
+}
+
+/**
+ * ixgbe_setup_ixfi_x550em - Configure the KR PHY for iXFI mode.
+ * @hw: pointer to hardware structure
+ * @speed: the link speed to force
+ *
+ * Configures the integrated KR PHY to use iXFI mode. Used to connect an
+ * internal and external PHY at a specific speed, without autonegotiation.
+ **/
+static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
+{
+ s32 status;
+ u32 reg_val;
+
+ /* Disable AN and force speed to 10G Serial. */
+ status = ixgbe_read_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE;
+ reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK;
+
+ /* Select forced link speed for internal PHY. */
+ switch (*speed) {
+ case IXGBE_LINK_SPEED_10GB_FULL:
+ reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_10G;
+ break;
+ case IXGBE_LINK_SPEED_1GB_FULL:
+ reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_1G;
+ break;
+ default:
+ /* Other link speeds are not supported by internal KR PHY. */
+ return IXGBE_ERR_LINK_SETUP;
+ }
+
+ status = ixgbe_write_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* Disable training protocol FSM. */
+ status = ixgbe_read_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ reg_val |= IXGBE_KRM_RX_TRN_LINKUP_CTRL_CONV_WO_PROTOCOL;
+ status = ixgbe_write_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* Disable Flex from training TXFFE. */
+ status = ixgbe_read_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_DSP_TXFFE_STATE_4(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_C0_EN;
+ reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_CP1_CN1_EN;
+ reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_CO_ADAPT_EN;
+ status = ixgbe_write_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_DSP_TXFFE_STATE_4(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ status = ixgbe_read_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_DSP_TXFFE_STATE_5(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_C0_EN;
+ reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_CP1_CN1_EN;
+ reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_CO_ADAPT_EN;
+ status = ixgbe_write_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_DSP_TXFFE_STATE_5(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* Enable override for coefficients. */
+ status = ixgbe_read_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_TX_COEFF_CTRL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ reg_val |= IXGBE_KRM_TX_COEFF_CTRL_1_OVRRD_EN;
+ reg_val |= IXGBE_KRM_TX_COEFF_CTRL_1_CZERO_EN;
+ reg_val |= IXGBE_KRM_TX_COEFF_CTRL_1_CPLUS1_OVRRD_EN;
+ reg_val |= IXGBE_KRM_TX_COEFF_CTRL_1_CMINUS1_OVRRD_EN;
+ status = ixgbe_write_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_TX_COEFF_CTRL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* Toggle port SW reset by AN reset. */
+ status = ixgbe_read_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART;
+ status = ixgbe_write_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+
+ return status;
+}
+
+/**
+ * ixgbe_ext_phy_t_x550em_get_link - Get ext phy link status
+ * @hw: address of hardware structure
+ * @link_up: address of boolean to indicate link status
+ *
+ * Returns error code if unable to get link status.
+ */
+static s32 ixgbe_ext_phy_t_x550em_get_link(struct ixgbe_hw *hw, bool *link_up)
+{
+ u32 ret;
+ u16 autoneg_status;
+
+ *link_up = FALSE;
+
+ /* read this twice back to back to indicate current status */
+ ret = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ &autoneg_status);
+ if (ret != IXGBE_SUCCESS)
+ return ret;
+
+ ret = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ &autoneg_status);
+ if (ret != IXGBE_SUCCESS)
+ return ret;
+
+ *link_up = !!(autoneg_status & IXGBE_MDIO_AUTO_NEG_LINK_STATUS);
+
+ return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbe_setup_internal_phy_t_x550em - Configure KR PHY to X557 link
+ * @hw: point to hardware structure
+ *
+ * Configures the link between the integrated KR PHY and the external X557 PHY
+ * The driver will call this function when it gets a link status change
+ * interrupt from the X557 PHY. This function configures the link speed
+ * between the PHYs to match the link speed of the BASE-T link.
+ *
+ * A return of a non-zero value indicates an error, and the base driver should
+ * not report link up.
+ */
+s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw)
+{
+ ixgbe_link_speed force_speed;
+ bool link_up;
+ u32 status;
+ u16 speed;
+
+ if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper)
+ return IXGBE_ERR_CONFIG;
+
+ /* If link is not up, then there is no setup necessary so return */
+ status = ixgbe_ext_phy_t_x550em_get_link(hw, &link_up);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ if (!link_up)
+ return IXGBE_SUCCESS;
+
+ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_STAT,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ &speed);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* If link is not still up, then no setup is necessary so return */
+ status = ixgbe_ext_phy_t_x550em_get_link(hw, &link_up);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ if (!link_up)
+ return IXGBE_SUCCESS;
+
+ /* clear everything but the speed and duplex bits */
+ speed &= IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_MASK;
+
+ switch (speed) {
+ case IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_10GB_FULL:
+ force_speed = IXGBE_LINK_SPEED_10GB_FULL;
+ break;
+ case IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_1GB_FULL:
+ force_speed = IXGBE_LINK_SPEED_1GB_FULL;
+ break;
+ default:
+ /* Internal PHY does not support anything else */
+ return IXGBE_ERR_INVALID_LINK_SETTINGS;
+ }
+
+ return ixgbe_setup_ixfi_x550em(hw, &force_speed);
+}
+
+/**
+ * ixgbe_setup_phy_loopback_x550em - Configure the KR PHY for loopback.
+ * @hw: pointer to hardware structure
+ *
+ * Configures the integrated KR PHY to use internal loopback mode.
+ **/
+s32 ixgbe_setup_phy_loopback_x550em(struct ixgbe_hw *hw)
+{
+ s32 status;
+ u32 reg_val;
+
+ /* Disable AN and force speed to 10G Serial. */
+ status = ixgbe_read_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE;
+ reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK;
+ reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_10G;
+ status = ixgbe_write_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* Set near-end loopback clocks. */
+ status = ixgbe_read_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_PORT_CAR_GEN_CTRL(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ reg_val |= IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_32B;
+ reg_val |= IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_KRPCS;
+ status = ixgbe_write_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_PORT_CAR_GEN_CTRL(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* Set loopback enable. */
+ status = ixgbe_read_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_PMD_DFX_BURNIN(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ reg_val |= IXGBE_KRM_PMD_DFX_BURNIN_TX_RX_KR_LB_MASK;
+ status = ixgbe_write_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_PMD_DFX_BURNIN(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* Training bypass. */
+ status = ixgbe_read_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+ if (status != IXGBE_SUCCESS)
+ return status;
+ reg_val |= IXGBE_KRM_RX_TRN_LINKUP_CTRL_PROTOCOL_BYPASS;
+ status = ixgbe_write_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+
+ return status;
+}
+
+/**
+ * ixgbe_read_ee_hostif_X550 - Read EEPROM word using a host interface command
+ * assuming that the semaphore is already obtained.
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to read
+ * @data: word read from the EEPROM
+ *
+ * Reads a 16 bit word from the EEPROM using the hostif.
+ **/
+s32 ixgbe_read_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset,
+ u16 *data)
+{
+ s32 status;
+ struct ixgbe_hic_read_shadow_ram buffer;
+
+ DEBUGFUNC("ixgbe_read_ee_hostif_data_X550");
+ buffer.hdr.req.cmd = FW_READ_SHADOW_RAM_CMD;
+ buffer.hdr.req.buf_lenh = 0;
+ buffer.hdr.req.buf_lenl = FW_READ_SHADOW_RAM_LEN;
+ buffer.hdr.req.checksum = FW_DEFAULT_CHECKSUM;
+
+ /* convert offset from words to bytes */
+ buffer.address = IXGBE_CPU_TO_BE32(offset * 2);
+ /* one word */
+ buffer.length = IXGBE_CPU_TO_BE16(sizeof(u16));
+
+ status = ixgbe_host_interface_command(hw, (u32 *)&buffer,
+ sizeof(buffer),
+ IXGBE_HI_COMMAND_TIMEOUT, FALSE);
+
+ if (status)
+ return status;
+
+ *data = (u16)IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG,
+ FW_NVM_DATA_OFFSET);
+
+ return 0;
+}
+
+/**
+ * ixgbe_read_ee_hostif_X550 - Read EEPROM word using a host interface command
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to read
+ * @data: word read from the EEPROM
+ *
+ * Reads a 16 bit word from the EEPROM using the hostif.
+ **/
+s32 ixgbe_read_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset,
+ u16 *data)
+{
+ s32 status = IXGBE_SUCCESS;
+
+ DEBUGFUNC("ixgbe_read_ee_hostif_X550");
+
+ if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) ==
+ IXGBE_SUCCESS) {
+ status = ixgbe_read_ee_hostif_data_X550(hw, offset, data);
+ hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+ } else {
+ status = IXGBE_ERR_SWFW_SYNC;
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_read_ee_hostif_buffer_X550- Read EEPROM word(s) using hostif
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to read
+ * @words: number of words
+ * @data: word(s) read from the EEPROM
+ *
+ * Reads a 16 bit word(s) from the EEPROM using the hostif.
+ **/
+s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
+ u16 offset, u16 words, u16 *data)
+{
+ struct ixgbe_hic_read_shadow_ram buffer;
+ u32 current_word = 0;
+ u16 words_to_read;
+ s32 status;
+ u32 i;
+
+ DEBUGFUNC("ixgbe_read_ee_hostif_buffer_X550");
+
+ /* Take semaphore for the entire operation. */
+ status = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+ if (status) {
+ DEBUGOUT("EEPROM read buffer - semaphore failed\n");
+ return status;
+ }
+ while (words) {
+ if (words > FW_MAX_READ_BUFFER_SIZE / 2)
+ words_to_read = FW_MAX_READ_BUFFER_SIZE / 2;
+ else
+ words_to_read = words;
+
+ buffer.hdr.req.cmd = FW_READ_SHADOW_RAM_CMD;
+ buffer.hdr.req.buf_lenh = 0;
+ buffer.hdr.req.buf_lenl = FW_READ_SHADOW_RAM_LEN;
+ buffer.hdr.req.checksum = FW_DEFAULT_CHECKSUM;
+
+ /* convert offset from words to bytes */
+ buffer.address = IXGBE_CPU_TO_BE32((offset + current_word) * 2);
+ buffer.length = IXGBE_CPU_TO_BE16(words_to_read * 2);
+
+ status = ixgbe_host_interface_command(hw, (u32 *)&buffer,
+ sizeof(buffer),
+ IXGBE_HI_COMMAND_TIMEOUT,
+ FALSE);
+
+ if (status) {
+ DEBUGOUT("Host interface command failed\n");
+ goto out;
+ }
+
+ for (i = 0; i < words_to_read; i++) {
+ u32 reg = IXGBE_FLEX_MNG + (FW_NVM_DATA_OFFSET << 2) +
+ 2 * i;
+ u32 value = IXGBE_READ_REG(hw, reg);
+
+ data[current_word] = (u16)(value & 0xffff);
+ current_word++;
+ i++;
+ if (i < words_to_read) {
+ value >>= 16;
+ data[current_word] = (u16)(value & 0xffff);
+ current_word++;
+ }
+ }
+ words -= words_to_read;
+ }
+
+out:
+ hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+ return status;
+}
+
+/**
+ * ixgbe_write_ee_hostif_X550 - Write EEPROM word using hostif
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to write
+ * @data: word write to the EEPROM
+ *
+ * Write a 16 bit word to the EEPROM using the hostif.
+ **/
+s32 ixgbe_write_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset,
+ u16 data)
+{
+ s32 status;
+ struct ixgbe_hic_write_shadow_ram buffer;
+
+ DEBUGFUNC("ixgbe_write_ee_hostif_data_X550");
+
+ buffer.hdr.req.cmd = FW_WRITE_SHADOW_RAM_CMD;
+ buffer.hdr.req.buf_lenh = 0;
+ buffer.hdr.req.buf_lenl = FW_WRITE_SHADOW_RAM_LEN;
+ buffer.hdr.req.checksum = FW_DEFAULT_CHECKSUM;
+
+ /* one word */
+ buffer.length = IXGBE_CPU_TO_BE16(sizeof(u16));
+ buffer.data = data;
+ buffer.address = IXGBE_CPU_TO_BE32(offset * 2);
+
+ status = ixgbe_host_interface_command(hw, (u32 *)&buffer,
+ sizeof(buffer),
+ IXGBE_HI_COMMAND_TIMEOUT, FALSE);
+
+ return status;
+}
+
+/**
+ * ixgbe_write_ee_hostif_X550 - Write EEPROM word using hostif
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to write
+ * @data: word write to the EEPROM
+ *
+ * Write a 16 bit word to the EEPROM using the hostif.
+ **/
+s32 ixgbe_write_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset,
+ u16 data)
+{
+ s32 status = IXGBE_SUCCESS;
+
+ DEBUGFUNC("ixgbe_write_ee_hostif_X550");
+
+ if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) ==
+ IXGBE_SUCCESS) {
+ status = ixgbe_write_ee_hostif_data_X550(hw, offset, data);
+ hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+ } else {
+ DEBUGOUT("write ee hostif failed to get semaphore");
+ status = IXGBE_ERR_SWFW_SYNC;
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_write_ee_hostif_buffer_X550 - Write EEPROM word(s) using hostif
+ * @hw: pointer to hardware structure
+ * @offset: offset of word in the EEPROM to write
+ * @words: number of words
+ * @data: word(s) write to the EEPROM
+ *
+ * Write a 16 bit word(s) to the EEPROM using the hostif.
+ **/
+s32 ixgbe_write_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
+ u16 offset, u16 words, u16 *data)
+{
+ s32 status = IXGBE_SUCCESS;
+ u32 i = 0;
+
+ DEBUGFUNC("ixgbe_write_ee_hostif_buffer_X550");
+
+ /* Take semaphore for the entire operation. */
+ status = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+ if (status != IXGBE_SUCCESS) {
+ DEBUGOUT("EEPROM write buffer - semaphore failed\n");
+ goto out;
+ }
+
+ for (i = 0; i < words; i++) {
+ status = ixgbe_write_ee_hostif_data_X550(hw, offset + i,
+ data[i]);
+
+ if (status != IXGBE_SUCCESS) {
+ DEBUGOUT("Eeprom buffered write failed\n");
+ break;
+ }
+ }
+
+ hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+out:
+
+ return status;
+}
+
+/**
+ * ixgbe_checksum_ptr_x550 - Checksum one pointer region
+ * @hw: pointer to hardware structure
+ * @ptr: pointer offset in eeprom
+ * @size: size of section pointed by ptr, if 0 first word will be used as size
+ * @csum: address of checksum to update
+ *
+ * Returns error status for any failure
+ */
+static s32 ixgbe_checksum_ptr_x550(struct ixgbe_hw *hw, u16 ptr,
+ u16 size, u16 *csum, u16 *buffer,
+ u32 buffer_size)
+{
+ u16 buf[256];
+ s32 status;
+ u16 length, bufsz, i, start;
+ u16 *local_buffer;
+
+ bufsz = sizeof(buf) / sizeof(buf[0]);
+
+ /* Read a chunk at the pointer location */
+ if (!buffer) {
+ status = ixgbe_read_ee_hostif_buffer_X550(hw, ptr, bufsz, buf);
+ if (status) {
+ DEBUGOUT("Failed to read EEPROM image\n");
+ return status;
+ }
+ local_buffer = buf;
+ } else {
+ if (buffer_size < ptr)
+ return IXGBE_ERR_PARAM;
+ local_buffer = &buffer[ptr];
+ }
+
+ if (size) {
+ start = 0;
+ length = size;
+ } else {
+ start = 1;
+ length = local_buffer[0];
+
+ /* Skip pointer section if length is invalid. */
+ if (length == 0xFFFF || length == 0 ||
+ (ptr + length) >= hw->eeprom.word_size)
+ return IXGBE_SUCCESS;
+ }
+
+ if (buffer && ((u32)start + (u32)length > buffer_size))
+ return IXGBE_ERR_PARAM;
+
+ for (i = start; length; i++, length--) {
+ if (i == bufsz && !buffer) {
+ ptr += bufsz;
+ i = 0;
+ if (length < bufsz)
+ bufsz = length;
+
+ /* Read a chunk at the pointer location */
+ status = ixgbe_read_ee_hostif_buffer_X550(hw, ptr,
+ bufsz, buf);
+ if (status) {
+ DEBUGOUT("Failed to read EEPROM image\n");
+ return status;
+ }
+ }
+ *csum += local_buffer[i];
+ }
+ return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbe_calc_checksum_X550 - Calculates and returns the checksum
+ * @hw: pointer to hardware structure
+ * @buffer: pointer to buffer containing calculated checksum
+ * @buffer_size: size of buffer
+ *
+ * Returns a negative error code on error, or the 16-bit checksum
+ **/
+s32 ixgbe_calc_checksum_X550(struct ixgbe_hw *hw, u16 *buffer, u32 buffer_size)
+{
+ u16 eeprom_ptrs[IXGBE_EEPROM_LAST_WORD + 1];
+ u16 *local_buffer;
+ s32 status;
+ u16 checksum = 0;
+ u16 pointer, i, size;
+
+ DEBUGFUNC("ixgbe_calc_eeprom_checksum_X550");
+
+ hw->eeprom.ops.init_params(hw);
+
+ if (!buffer) {
+ /* Read pointer area */
+ status = ixgbe_read_ee_hostif_buffer_X550(hw, 0,
+ IXGBE_EEPROM_LAST_WORD + 1,
+ eeprom_ptrs);
+ if (status) {
+ DEBUGOUT("Failed to read EEPROM image\n");
+ return status;
+ }
+ local_buffer = eeprom_ptrs;
+ } else {
+ if (buffer_size < IXGBE_EEPROM_LAST_WORD)
+ return IXGBE_ERR_PARAM;
+ local_buffer = buffer;
+ }
+
+ /*
+ * For X550 hardware include 0x0-0x41 in the checksum, skip the
+ * checksum word itself
+ */
+ for (i = 0; i <= IXGBE_EEPROM_LAST_WORD; i++)
+ if (i != IXGBE_EEPROM_CHECKSUM)
+ checksum += local_buffer[i];
+
+ /*
+ * Include all data from pointers 0x3, 0x6-0xE. This excludes the
+ * FW, PHY module, and PCIe Expansion/Option ROM pointers.
+ */
+ for (i = IXGBE_PCIE_ANALOG_PTR_X550; i < IXGBE_FW_PTR; i++) {
+ if (i == IXGBE_PHY_PTR || i == IXGBE_OPTION_ROM_PTR)
+ continue;
+
+ pointer = local_buffer[i];
+
+ /* Skip pointer section if the pointer is invalid. */
+ if (pointer == 0xFFFF || pointer == 0 ||
+ pointer >= hw->eeprom.word_size)
+ continue;
+
+ switch (i) {
+ case IXGBE_PCIE_GENERAL_PTR:
+ size = IXGBE_IXGBE_PCIE_GENERAL_SIZE;
+ break;
+ case IXGBE_PCIE_CONFIG0_PTR:
+ case IXGBE_PCIE_CONFIG1_PTR:
+ size = IXGBE_PCIE_CONFIG_SIZE;
+ break;
+ default:
+ size = 0;
+ break;
+ }
+
+ status = ixgbe_checksum_ptr_x550(hw, pointer, size, &checksum,
+ buffer, buffer_size);
+ if (status)
+ return status;
+ }
+
+ checksum = (u16)IXGBE_EEPROM_SUM - checksum;
+
+ return (s32)checksum;
+}
+
+/**
+ * ixgbe_calc_eeprom_checksum_X550 - Calculates and returns the checksum
+ * @hw: pointer to hardware structure
+ *
+ * Returns a negative error code on error, or the 16-bit checksum
+ **/
+s32 ixgbe_calc_eeprom_checksum_X550(struct ixgbe_hw *hw)
+{
+ return ixgbe_calc_checksum_X550(hw, NULL, 0);
+}
+
+/**
+ * ixgbe_validate_eeprom_checksum_X550 - Validate EEPROM checksum
+ * @hw: pointer to hardware structure
+ * @checksum_val: calculated checksum
+ *
+ * Performs checksum calculation and validates the EEPROM checksum. If the
+ * caller does not need checksum_val, the value can be NULL.
+ **/
+s32 ixgbe_validate_eeprom_checksum_X550(struct ixgbe_hw *hw, u16 *checksum_val)
+{
+ s32 status;
+ u16 checksum;
+ u16 read_checksum = 0;
+
+ DEBUGFUNC("ixgbe_validate_eeprom_checksum_X550");
+
+ /* Read the first word from the EEPROM. If this times out or fails, do
+ * not continue or we could be in for a very long wait while every
+ * EEPROM read fails
+ */
+ status = hw->eeprom.ops.read(hw, 0, &checksum);
+ if (status) {
+ DEBUGOUT("EEPROM read failed\n");
+ return status;
+ }
+
+ status = hw->eeprom.ops.calc_checksum(hw);
+ if (status < 0)
+ return status;
+
+ checksum = (u16)(status & 0xffff);
+
+ status = ixgbe_read_ee_hostif_X550(hw, IXGBE_EEPROM_CHECKSUM,
+ &read_checksum);
+ if (status)
+ return status;
+
+ /* Verify read checksum from EEPROM is the same as
+ * calculated checksum
+ */
+ if (read_checksum != checksum) {
+ status = IXGBE_ERR_EEPROM_CHECKSUM;
+ ERROR_REPORT1(IXGBE_ERROR_INVALID_STATE,
+ "Invalid EEPROM checksum");
+ }
+
+ /* If the user cares, return the calculated checksum */
+ if (checksum_val)
+ *checksum_val = checksum;
+
+ return status;
+}
+
+/**
+ * ixgbe_update_eeprom_checksum_X550 - Updates the EEPROM checksum and flash
+ * @hw: pointer to hardware structure
+ *
+ * After writing EEPROM to shadow RAM using EEWR register, software calculates
+ * checksum and updates the EEPROM and instructs the hardware to update
+ * the flash.
+ **/
+s32 ixgbe_update_eeprom_checksum_X550(struct ixgbe_hw *hw)
+{
+ s32 status;
+ u16 checksum = 0;
+
+ DEBUGFUNC("ixgbe_update_eeprom_checksum_X550");
+
+ /* Read the first word from the EEPROM. If this times out or fails, do
+ * not continue or we could be in for a very long wait while every
+ * EEPROM read fails
+ */
+ status = ixgbe_read_ee_hostif_X550(hw, 0, &checksum);
+ if (status) {
+ DEBUGOUT("EEPROM read failed\n");
+ return status;
+ }
+
+ status = ixgbe_calc_eeprom_checksum_X550(hw);
+ if (status < 0)
+ return status;
+
+ checksum = (u16)(status & 0xffff);
+
+ status = ixgbe_write_ee_hostif_X550(hw, IXGBE_EEPROM_CHECKSUM,
+ checksum);
+ if (status)
+ return status;
+
+ status = ixgbe_update_flash_X550(hw);
+
+ return status;
+}
+
+/**
+ * ixgbe_update_flash_X550 - Instruct HW to copy EEPROM to Flash device
+ * @hw: pointer to hardware structure
+ *
+ * Issue a shadow RAM dump to FW to copy EEPROM from shadow RAM to the flash.
+ **/
+s32 ixgbe_update_flash_X550(struct ixgbe_hw *hw)
+{
+ s32 status = IXGBE_SUCCESS;
+ union ixgbe_hic_hdr2 buffer;
+
+ DEBUGFUNC("ixgbe_update_flash_X550");
+
+ buffer.req.cmd = FW_SHADOW_RAM_DUMP_CMD;
+ buffer.req.buf_lenh = 0;
+ buffer.req.buf_lenl = FW_SHADOW_RAM_DUMP_LEN;
+ buffer.req.checksum = FW_DEFAULT_CHECKSUM;
+
+ status = ixgbe_host_interface_command(hw, (u32 *)&buffer,
+ sizeof(buffer),
+ IXGBE_HI_COMMAND_TIMEOUT, FALSE);
+
+ return status;
+}
+
+/**
+ * ixgbe_get_supported_physical_layer_X550em - Returns physical layer type
+ * @hw: pointer to hardware structure
+ *
+ * Determines physical layer capabilities of the current configuration.
+ **/
+u32 ixgbe_get_supported_physical_layer_X550em(struct ixgbe_hw *hw)
+{
+ u32 physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN;
+ u16 ext_ability = 0;
+
+ DEBUGFUNC("ixgbe_get_supported_physical_layer_X550em");
+
+ hw->phy.ops.identify(hw);
+
+ switch (hw->phy.type) {
+ case ixgbe_phy_x550em_kr:
+ physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KR |
+ IXGBE_PHYSICAL_LAYER_1000BASE_KX;
+ break;
+ case ixgbe_phy_x550em_kx4:
+ physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
+ IXGBE_PHYSICAL_LAYER_1000BASE_KX;
+ break;
+ case ixgbe_phy_x550em_ext_t:
+ hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY,
+ IXGBE_MDIO_PMA_PMD_DEV_TYPE,
+ &ext_ability);
+ if (ext_ability & IXGBE_MDIO_PHY_10GBASET_ABILITY)
+ physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_T;
+ if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY)
+ physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T;
+ break;
+ default:
+ break;
+ }
+
+ if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber)
+ physical_layer = ixgbe_get_supported_phy_sfp_layer_generic(hw);
+
+ return physical_layer;
+}
+
+/**
+ * ixgbe_get_bus_info_x550em - Set PCI bus info
+ * @hw: pointer to hardware structure
+ *
+ * Sets bus link width and speed to unknown because X550em is
+ * not a PCI device.
+ **/
+s32 ixgbe_get_bus_info_X550em(struct ixgbe_hw *hw)
+{
+
+ DEBUGFUNC("ixgbe_get_bus_info_x550em");
+
+ hw->bus.width = ixgbe_bus_width_unknown;
+ hw->bus.speed = ixgbe_bus_speed_unknown;
+
+ hw->mac.ops.set_lan_id(hw);
+
+ return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbe_disable_rx_x550 - Disable RX unit
+ *
+ * Enables the Rx DMA unit for x550
+ **/
+void ixgbe_disable_rx_x550(struct ixgbe_hw *hw)
+{
+ u32 rxctrl, pfdtxgswc;
+ s32 status;
+ struct ixgbe_hic_disable_rxen fw_cmd;
+
+ DEBUGFUNC("ixgbe_enable_rx_dma_x550");
+
+ rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
+ if (rxctrl & IXGBE_RXCTRL_RXEN) {
+ pfdtxgswc = IXGBE_READ_REG(hw, IXGBE_PFDTXGSWC);
+ if (pfdtxgswc & IXGBE_PFDTXGSWC_VT_LBEN) {
+ pfdtxgswc &= ~IXGBE_PFDTXGSWC_VT_LBEN;
+ IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, pfdtxgswc);
+ hw->mac.set_lben = TRUE;
+ } else {
+ hw->mac.set_lben = FALSE;
+ }
+
+ fw_cmd.hdr.cmd = FW_DISABLE_RXEN_CMD;
+ fw_cmd.hdr.buf_len = FW_DISABLE_RXEN_LEN;
+ fw_cmd.hdr.checksum = FW_DEFAULT_CHECKSUM;
+ fw_cmd.port_number = (u8)hw->bus.lan_id;
+
+ status = ixgbe_host_interface_command(hw, (u32 *)&fw_cmd,
+ sizeof(struct ixgbe_hic_disable_rxen),
+ IXGBE_HI_COMMAND_TIMEOUT, TRUE);
+
+ /* If we fail - disable RX using register write */
+ if (status) {
+ rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
+ if (rxctrl & IXGBE_RXCTRL_RXEN) {
+ rxctrl &= ~IXGBE_RXCTRL_RXEN;
+ IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl);
+ }
+ }
+ }
+}
+
+/**
+ * ixgbe_enter_lplu_x550em - Transition to low power states
+ * @hw: pointer to hardware structure
+ *
+ * Configures Low Power Link Up on transition to low power states
+ * (from D0 to non-D0). Link is required to enter LPLU so avoid resetting the
+ * X557 PHY immediately prior to entering LPLU.
+ **/
+s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw)
+{
+ u16 an_10g_cntl_reg, autoneg_reg, speed;
+ s32 status;
+ ixgbe_link_speed lcd_speed;
+ u32 save_autoneg;
+ bool link_up;
+
+ /* If blocked by MNG FW, then don't restart AN */
+ if (ixgbe_check_reset_blocked(hw))
+ return IXGBE_SUCCESS;
+
+ status = ixgbe_ext_phy_t_x550em_get_link(hw, &link_up);
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ status = ixgbe_read_eeprom(hw, NVM_INIT_CTRL_3, &hw->eeprom.ctrl_word_3);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* If link is down, LPLU disabled in NVM, WoL disabled, or manageability
+ * disabled, then force link down by entering low power mode.
+ */
+ if (!link_up || !(hw->eeprom.ctrl_word_3 & NVM_INIT_CTRL_3_LPLU) ||
+ !(hw->wol_enabled || ixgbe_mng_present(hw)))
+ return ixgbe_set_copper_phy_power(hw, FALSE);
+
+ /* Determine LCD */
+ status = ixgbe_get_lcd_t_x550em(hw, &lcd_speed);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* If no valid LCD link speed, then force link down and exit. */
+ if (lcd_speed == IXGBE_LINK_SPEED_UNKNOWN)
+ return ixgbe_set_copper_phy_power(hw, FALSE);
+
+ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_STAT,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ &speed);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* If no link now, speed is invalid so take link down */
+ status = ixgbe_ext_phy_t_x550em_get_link(hw, &link_up);
+ if (status != IXGBE_SUCCESS)
+ return ixgbe_set_copper_phy_power(hw, FALSE);
+
+ /* clear everything but the speed bits */
+ speed &= IXGBE_MDIO_AUTO_NEG_VEN_STAT_SPEED_MASK;
+
+ /* If current speed is already LCD, then exit. */
+ if (((speed == IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_1GB) &&
+ (lcd_speed == IXGBE_LINK_SPEED_1GB_FULL)) ||
+ ((speed == IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_10GB) &&
+ (lcd_speed == IXGBE_LINK_SPEED_10GB_FULL)))
+ return status;
+
+ /* Clear AN completed indication */
+ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ &autoneg_reg);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ status = hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ &an_10g_cntl_reg);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ status = hw->phy.ops.read_reg(hw,
+ IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ &autoneg_reg);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ save_autoneg = hw->phy.autoneg_advertised;
+
+ /* Setup link at least common link speed */
+ status = hw->mac.ops.setup_link(hw, lcd_speed, FALSE);
+
+ /* restore autoneg from before setting lplu speed */
+ hw->phy.autoneg_advertised = save_autoneg;
+
+ return status;
+}
+
+/**
+ * ixgbe_get_lcd_x550em - Determine lowest common denominator
+ * @hw: pointer to hardware structure
+ * @lcd_speed: pointer to lowest common link speed
+ *
+ * Determine lowest common link speed with link partner.
+ **/
+s32 ixgbe_get_lcd_t_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *lcd_speed)
+{
+ u16 an_lp_status;
+ s32 status;
+ u16 word = hw->eeprom.ctrl_word_3;
+
+ *lcd_speed = IXGBE_LINK_SPEED_UNKNOWN;
+
+ status = hw->phy.ops.read_reg(hw, IXGBE_AUTO_NEG_LP_STATUS,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ &an_lp_status);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* If link partner advertised 1G, return 1G */
+ if (an_lp_status & IXGBE_AUTO_NEG_LP_1000BASE_CAP) {
+ *lcd_speed = IXGBE_LINK_SPEED_1GB_FULL;
+ return status;
+ }
+
+ /* If 10G disabled for LPLU via NVM D10GMP, then return no valid LCD */
+ if ((hw->bus.lan_id && (word & NVM_INIT_CTRL_3_D10GMP_PORT1)) ||
+ (word & NVM_INIT_CTRL_3_D10GMP_PORT0))
+ return status;
+
+ /* Link partner not capable of lower speeds, return 10G */
+ *lcd_speed = IXGBE_LINK_SPEED_10GB_FULL;
+ return status;
+}
+
+/**
+ * ixgbe_setup_fc_X550em - Set up flow control
+ * @hw: pointer to hardware structure
+ *
+ * Called at init time to set up flow control.
+ **/
+s32 ixgbe_setup_fc_X550em(struct ixgbe_hw *hw)
+{
+ s32 ret_val = IXGBE_SUCCESS;
+ u32 pause, asm_dir, reg_val;
+
+ DEBUGFUNC("ixgbe_setup_fc_X550em");
+
+ /* Validate the requested mode */
+ if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) {
+ ERROR_REPORT1(IXGBE_ERROR_UNSUPPORTED,
+ "ixgbe_fc_rx_pause not valid in strict IEEE mode\n");
+ ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS;
+ goto out;
+ }
+
+ /* 10gig parts do not have a word in the EEPROM to determine the
+ * default flow control setting, so we explicitly set it to full.
+ */
+ if (hw->fc.requested_mode == ixgbe_fc_default)
+ hw->fc.requested_mode = ixgbe_fc_full;
+
+ /* Determine PAUSE and ASM_DIR bits. */
+ switch (hw->fc.requested_mode) {
+ case ixgbe_fc_none:
+ pause = 0;
+ asm_dir = 0;
+ break;
+ case ixgbe_fc_tx_pause:
+ pause = 0;
+ asm_dir = 1;
+ break;
+ case ixgbe_fc_rx_pause:
+ /* Rx Flow control is enabled and Tx Flow control is
+ * disabled by software override. Since there really
+ * isn't a way to advertise that we are capable of RX
+ * Pause ONLY, we will advertise that we support both
+ * symmetric and asymmetric Rx PAUSE, as such we fall
+ * through to the fc_full statement. Later, we will
+ * disable the adapter's ability to send PAUSE frames.
+ */
+ case ixgbe_fc_full:
+ pause = 1;
+ asm_dir = 1;
+ break;
+ default:
+ ERROR_REPORT1(IXGBE_ERROR_ARGUMENT,
+ "Flow control param set incorrectly\n");
+ ret_val = IXGBE_ERR_CONFIG;
+ goto out;
+ }
+
+ if (hw->phy.media_type == ixgbe_media_type_backplane) {
+ ret_val = ixgbe_read_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+ if (ret_val != IXGBE_SUCCESS)
+ goto out;
+ reg_val &= ~(IXGBE_KRM_AN_CNTL_1_SYM_PAUSE |
+ IXGBE_KRM_AN_CNTL_1_ASM_PAUSE);
+ if (pause)
+ reg_val |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE;
+ if (asm_dir)
+ reg_val |= IXGBE_KRM_AN_CNTL_1_ASM_PAUSE;
+ ret_val = ixgbe_write_iosf_sb_reg_x550(hw,
+ IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+
+ /* Not all devices fully support AN. */
+ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_KR)
+ hw->fc.disable_fc_autoneg = TRUE;
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * ixgbe_set_mux - Set mux for port 1 access with CS4227
+ * @hw: pointer to hardware structure
+ * @state: set mux if 1, clear if 0
+ */
+static void ixgbe_set_mux(struct ixgbe_hw *hw, u8 state)
+{
+ u32 esdp;
+
+ if (!hw->bus.lan_id)
+ return;
+ esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+ if (state)
+ esdp |= IXGBE_ESDP_SDP1;
+ else
+ esdp &= ~IXGBE_ESDP_SDP1;
+ IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+ IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_acquire_swfw_sync_X550em - Acquire SWFW semaphore
+ * @hw: pointer to hardware structure
+ * @mask: Mask to specify which semaphore to acquire
+ *
+ * Acquires the SWFW semaphore and sets the I2C MUX
+ **/
+s32 ixgbe_acquire_swfw_sync_X550em(struct ixgbe_hw *hw, u32 mask)
+{
+ s32 status;
+
+ DEBUGFUNC("ixgbe_acquire_swfw_sync_X550em");
+
+ status = ixgbe_acquire_swfw_sync_X540(hw, mask);
+ if (status)
+ return status;
+
+ if (mask & IXGBE_GSSR_I2C_MASK)
+ ixgbe_set_mux(hw, 1);
+
+ return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbe_release_swfw_sync_X550em - Release SWFW semaphore
+ * @hw: pointer to hardware structure
+ * @mask: Mask to specify which semaphore to release
+ *
+ * Releases the SWFW semaphore and sets the I2C MUX
+ **/
+void ixgbe_release_swfw_sync_X550em(struct ixgbe_hw *hw, u32 mask)
+{
+ DEBUGFUNC("ixgbe_release_swfw_sync_X550em");
+
+ if (mask & IXGBE_GSSR_I2C_MASK)
+ ixgbe_set_mux(hw, 0);
+
+ ixgbe_release_swfw_sync_X540(hw, mask);
+}
+
+/**
+ * ixgbe_handle_lasi_ext_t_x550em - Handle external Base T PHY interrupt
+ * @hw: pointer to hardware structure
+ *
+ * Handle external Base T PHY interrupt. If high temperature
+ * failure alarm then return error, else if link status change
+ * then setup internal/external PHY link
+ *
+ * Return IXGBE_ERR_OVERTEMP if interrupt is high temperature
+ * failure alarm, else return PHY access status.
+ */
+s32 ixgbe_handle_lasi_ext_t_x550em(struct ixgbe_hw *hw)
+{
+ bool lsc;
+ u32 status;
+
+ status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ if (lsc)
+ return ixgbe_setup_internal_phy(hw);
+
+ return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbe_setup_mac_link_t_X550em - Sets the auto advertised link speed
+ * @hw: pointer to hardware structure
+ * @speed: new link speed
+ * @autoneg_wait_to_complete: TRUE when waiting for completion is needed
+ *
+ * Setup internal/external PHY link speed based on link speed, then set
+ * external PHY auto advertised link speed.
+ *
+ * Returns error status for any failure
+ **/
+s32 ixgbe_setup_mac_link_t_X550em(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed,
+ bool autoneg_wait_to_complete)
+{
+ s32 status;
+ ixgbe_link_speed force_speed;
+
+ DEBUGFUNC("ixgbe_setup_mac_link_t_X550em");
+
+ /* Setup internal/external PHY link speed to iXFI (10G), unless
+ * only 1G is auto advertised then setup KX link.
+ */
+ if (speed & IXGBE_LINK_SPEED_10GB_FULL)
+ force_speed = IXGBE_LINK_SPEED_10GB_FULL;
+ else
+ force_speed = IXGBE_LINK_SPEED_1GB_FULL;
+
+ /* If internal link mode is XFI, then setup XFI internal link. */
+ if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) {
+ status = ixgbe_setup_ixfi_x550em(hw, &force_speed);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+ }
+
+ return hw->phy.ops.setup_link_speed(hw, speed, autoneg_wait_to_complete);
+}
+
+/**
+ * ixgbe_check_link_t_X550em - Determine link and speed status
+ * @hw: pointer to hardware structure
+ * @speed: pointer to link speed
+ * @link_up: TRUE when link is up
+ * @link_up_wait_to_complete: bool used to wait for link up or not
+ *
+ * Check that both the MAC and X557 external PHY have link.
+ **/
+s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
+ bool *link_up, bool link_up_wait_to_complete)
+{
+ u32 status;
+ u16 autoneg_status;
+
+ if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper)
+ return IXGBE_ERR_CONFIG;
+
+ status = ixgbe_check_mac_link_generic(hw, speed, link_up,
+ link_up_wait_to_complete);
+
+ /* If check link fails or MAC link is not up, then return */
+ if (status != IXGBE_SUCCESS || !(*link_up))
+ return status;
+
+ /* MAC link is up, so check external PHY link.
+ * Read this twice back to back to indicate current status.
+ */
+ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ &autoneg_status);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
+ IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+ &autoneg_status);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* If external PHY link is not up, then indicate link not up */
+ if (!(autoneg_status & IXGBE_MDIO_AUTO_NEG_LINK_STATUS))
+ *link_up = FALSE;
+
+ return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbe_reset_phy_t_X550em - Performs X557 PHY reset and enables LASI
+ * @hw: pointer to hardware structure
+ **/
+s32 ixgbe_reset_phy_t_X550em(struct ixgbe_hw *hw)
+{
+ s32 status;
+
+ status = ixgbe_reset_phy_generic(hw);
+
+ if (status != IXGBE_SUCCESS)
+ return status;
+
+ /* Configure Link Status Alarm and Temperature Threshold interrupts */
+ return ixgbe_enable_lasi_ext_t_x550em(hw);
+}
+
+/**
+ * ixgbe_led_on_t_X550em - Turns on the software controllable LEDs.
+ * @hw: pointer to hardware structure
+ * @led_idx: led number to turn on
+ **/
+s32 ixgbe_led_on_t_X550em(struct ixgbe_hw *hw, u32 led_idx)
+{
+ u16 phy_data;
+
+ DEBUGFUNC("ixgbe_led_on_t_X550em");
+
+ if (led_idx >= IXGBE_X557_MAX_LED_INDEX)
+ return IXGBE_ERR_PARAM;
+
+ /* To turn on the LED, set mode to ON. */
+ ixgbe_read_phy_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, &phy_data);
+ phy_data |= IXGBE_X557_LED_MANUAL_SET_MASK;
+ ixgbe_write_phy_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, phy_data);
+
+ return IXGBE_SUCCESS;
+}
+
+/**
+ * ixgbe_led_off_t_X550em - Turns off the software controllable LEDs.
+ * @hw: pointer to hardware structure
+ * @led_idx: led number to turn off
+ **/
+s32 ixgbe_led_off_t_X550em(struct ixgbe_hw *hw, u32 led_idx)
+{
+ u16 phy_data;
+
+ DEBUGFUNC("ixgbe_led_off_t_X550em");
+
+ if (led_idx >= IXGBE_X557_MAX_LED_INDEX)
+ return IXGBE_ERR_PARAM;
+
+ /* To turn on the LED, set mode to ON. */
+ ixgbe_read_phy_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, &phy_data);
+ phy_data &= ~IXGBE_X557_LED_MANUAL_SET_MASK;
+ ixgbe_write_phy_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
+ IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, phy_data);
+
+ return IXGBE_SUCCESS;
+}
+
diff --git a/sys/dev/ixgbe/ixgbe_x550.h b/sys/dev/ixgbe/ixgbe_x550.h
new file mode 100644
index 0000000..8a544ec
--- /dev/null
+++ b/sys/dev/ixgbe/ixgbe_x550.h
@@ -0,0 +1,109 @@
+/******************************************************************************
+
+ Copyright (c) 2001-2015, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+/*$FreeBSD$*/
+
+#ifndef _IXGBE_X550_H_
+#define _IXGBE_X550_H_
+
+#include "ixgbe_type.h"
+
+s32 ixgbe_dmac_config_X550(struct ixgbe_hw *hw);
+s32 ixgbe_dmac_config_tcs_X550(struct ixgbe_hw *hw);
+s32 ixgbe_dmac_update_tcs_X550(struct ixgbe_hw *hw);
+
+s32 ixgbe_get_bus_info_X550em(struct ixgbe_hw *hw);
+s32 ixgbe_init_eeprom_params_X550(struct ixgbe_hw *hw);
+s32 ixgbe_update_eeprom_checksum_X550(struct ixgbe_hw *hw);
+s32 ixgbe_calc_eeprom_checksum_X550(struct ixgbe_hw *hw);
+s32 ixgbe_calc_checksum_X550(struct ixgbe_hw *hw, u16 *buffer, u32 buffer_size);
+s32 ixgbe_validate_eeprom_checksum_X550(struct ixgbe_hw *hw, u16 *checksum_val);
+s32 ixgbe_update_flash_X550(struct ixgbe_hw *hw);
+s32 ixgbe_write_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
+ u16 offset, u16 words, u16 *data);
+s32 ixgbe_write_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset,
+ u16 data);
+s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
+ u16 offset, u16 words, u16 *data);
+s32 ixgbe_read_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset,
+u16 *data);
+s32 ixgbe_read_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset,
+ u16 *data);
+s32 ixgbe_write_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset,
+ u16 data);
+s32 ixgbe_set_eee_X550(struct ixgbe_hw *hw, bool enable_eee);
+s32 ixgbe_setup_eee_X550(struct ixgbe_hw *hw, bool enable_eee);
+void ixgbe_set_source_address_pruning_X550(struct ixgbe_hw *hw, bool enable,
+ unsigned int pool);
+void ixgbe_set_ethertype_anti_spoofing_X550(struct ixgbe_hw *hw,
+ bool enable, int vf);
+s32 ixgbe_write_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr,
+ u32 device_type, u32 data);
+s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr,
+ u32 device_type, u32 *data);
+void ixgbe_disable_mdd_X550(struct ixgbe_hw *hw);
+void ixgbe_enable_mdd_X550(struct ixgbe_hw *hw);
+void ixgbe_mdd_event_X550(struct ixgbe_hw *hw, u32 *vf_bitmap);
+void ixgbe_restore_mdd_vf_X550(struct ixgbe_hw *hw, u32 vf);
+enum ixgbe_media_type ixgbe_get_media_type_X550em(struct ixgbe_hw *hw);
+s32 ixgbe_setup_sfp_modules_X550em(struct ixgbe_hw *hw);
+s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw,
+ ixgbe_link_speed *speed, bool *autoneg);
+void ixgbe_init_mac_link_ops_X550em(struct ixgbe_hw *hw);
+s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw);
+s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw);
+s32 ixgbe_setup_kr_x550em(struct ixgbe_hw *hw);
+s32 ixgbe_setup_kx4_x550em(struct ixgbe_hw *hw);
+s32 ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw);
+s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw);
+s32 ixgbe_setup_phy_loopback_x550em(struct ixgbe_hw *hw);
+u32 ixgbe_get_supported_physical_layer_X550em(struct ixgbe_hw *hw);
+void ixgbe_disable_rx_x550(struct ixgbe_hw *hw);
+s32 ixgbe_get_lcd_t_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *lcd_speed);
+s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw);
+s32 ixgbe_acquire_swfw_sync_X550em(struct ixgbe_hw *hw, u32 mask);
+void ixgbe_release_swfw_sync_X550em(struct ixgbe_hw *hw, u32 mask);
+s32 ixgbe_setup_fc_X550em(struct ixgbe_hw *hw);
+s32 ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed,
+ bool autoneg_wait_to_complete);
+s32 ixgbe_handle_lasi_ext_t_x550em(struct ixgbe_hw *hw);
+s32 ixgbe_setup_mac_link_t_X550em(struct ixgbe_hw *hw,
+ ixgbe_link_speed speed,
+ bool autoneg_wait_to_complete);
+s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
+ bool *link_up, bool link_up_wait_to_complete);
+s32 ixgbe_reset_phy_t_X550em(struct ixgbe_hw *hw);
+s32 ixgbe_identify_sfp_module_X550em(struct ixgbe_hw *hw);
+s32 ixgbe_led_on_t_X550em(struct ixgbe_hw *hw, u32 led_idx);
+s32 ixgbe_led_off_t_X550em(struct ixgbe_hw *hw, u32 led_idx);
+#endif /* _IXGBE_X550_H_ */
diff --git a/sys/dev/ixl/ixl.h b/sys/dev/ixl/ixl.h
index df8f04f..1ddfbca3 100644
--- a/sys/dev/ixl/ixl.h
+++ b/sys/dev/ixl/ixl.h
@@ -324,7 +324,7 @@
#define IXL_SET_IMCASTS(vsi, count) (vsi)->imcasts = (count)
#define IXL_SET_OMCASTS(vsi, count) (vsi)->omcasts = (count)
#define IXL_SET_IQDROPS(vsi, count) (vsi)->iqdrops = (count)
-#define IXL_SET_OQDROPS(vsi, count) (vsi)->iqdrops = (count)
+#define IXL_SET_OQDROPS(vsi, count) (vsi)->oqdrops = (count)
#define IXL_SET_NOPROTO(vsi, count) (vsi)->noproto = (count)
#else
#define IXL_SET_IPACKETS(vsi, count) (vsi)->ifp->if_ipackets = (count)
diff --git a/sys/dev/mii/brgphy.c b/sys/dev/mii/brgphy.c
index 0d22430..32914a3 100644
--- a/sys/dev/mii/brgphy.c
+++ b/sys/dev/mii/brgphy.c
@@ -160,25 +160,33 @@ static const struct mii_phy_funcs brgphy_funcs = {
brgphy_reset
};
-#define HS21_PRODUCT_ID "IBM eServer BladeCenter HS21"
-#define HS21_BCM_CHIPID 0x57081021
+static const struct hs21_type {
+ const uint32_t id;
+ const char *prod;
+} hs21_type_lists[] = {
+ { 0x57081021, "IBM eServer BladeCenter HS21" },
+ { 0x57081011, "IBM eServer BladeCenter HS21 -[8853PAU]-" },
+};
static int
detect_hs21(struct bce_softc *bce_sc)
{
char *sysenv;
- int found;
+ int found, i;
found = 0;
- if (bce_sc->bce_chipid == HS21_BCM_CHIPID) {
- sysenv = kern_getenv("smbios.system.product");
- if (sysenv != NULL) {
- if (strncmp(sysenv, HS21_PRODUCT_ID,
- strlen(HS21_PRODUCT_ID)) == 0)
- found = 1;
- freeenv(sysenv);
+ sysenv = kern_getenv("smbios.system.product");
+ if (sysenv == NULL)
+ return (found);
+ for (i = 0; i < nitems(hs21_type_lists); i++) {
+ if (bce_sc->bce_chipid == hs21_type_lists[i].id &&
+ strncmp(sysenv, hs21_type_lists[i].prod,
+ strlen(hs21_type_lists[i].prod)) == 0) {
+ found++;
+ break;
}
}
+ freeenv(sysenv);
return (found);
}
diff --git a/sys/dev/mxge/if_mxge.c b/sys/dev/mxge/if_mxge.c
index fe8c08a..983caa3 100644
--- a/sys/dev/mxge/if_mxge.c
+++ b/sys/dev/mxge/if_mxge.c
@@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/sx.h>
#include <sys/taskqueue.h>
+#include <sys/zlib.h>
#include <net/if.h>
#include <net/if_var.h>
@@ -58,7 +59,6 @@ __FBSDID("$FreeBSD$");
#include <net/if_types.h>
#include <net/if_vlan_var.h>
-#include <net/zlib.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
diff --git a/sys/dev/pccbb/pccbb_pci.c b/sys/dev/pccbb/pccbb_pci.c
index d4f1baa..7dca418 100644
--- a/sys/dev/pccbb/pccbb_pci.c
+++ b/sys/dev/pccbb/pccbb_pci.c
@@ -259,32 +259,6 @@ cbb_pci_probe(device_t brdev)
}
/*
- * Still need this because the pci code only does power for type 0
- * header devices.
- */
-static void
-cbb_powerstate_d0(device_t dev)
-{
- u_int32_t membase, irq;
-
- if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
- /* Save important PCI config data. */
- membase = pci_read_config(dev, CBBR_SOCKBASE, 4);
- irq = pci_read_config(dev, PCIR_INTLINE, 4);
-
- /* Reset the power state. */
- device_printf(dev, "chip is in D%d power mode "
- "-- setting to D0\n", pci_get_powerstate(dev));
-
- pci_set_powerstate(dev, PCI_POWERSTATE_D0);
-
- /* Restore PCI config data. */
- pci_write_config(dev, CBBR_SOCKBASE, membase, 4);
- pci_write_config(dev, PCIR_INTLINE, irq, 4);
- }
-}
-
-/*
* Print out the config space
*/
static void
@@ -321,15 +295,15 @@ cbb_pci_attach(device_t brdev)
sc->cbdev = NULL;
sc->exca[0].pccarddev = NULL;
sc->domain = pci_get_domain(brdev);
- sc->bus.sec = pci_read_config(brdev, PCIR_SECBUS_2, 1);
- sc->bus.sub = pci_read_config(brdev, PCIR_SUBBUS_2, 1);
sc->pribus = pcib_get_bus(parent);
#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
pci_write_config(brdev, PCIR_PRIBUS_2, sc->pribus, 1);
pcib_setup_secbus(brdev, &sc->bus, 1);
+#else
+ sc->bus.sec = pci_read_config(brdev, PCIR_SECBUS_2, 1);
+ sc->bus.sub = pci_read_config(brdev, PCIR_SUBBUS_2, 1);
#endif
SLIST_INIT(&sc->rl);
- cbb_powerstate_d0(brdev);
rid = CBBR_SOCKBASE;
sc->base_res = bus_alloc_resource_any(brdev, SYS_RES_MEMORY, &rid,
@@ -467,18 +441,13 @@ cbb_chipinit(struct cbb_softc *sc)
uint32_t mux, sysctrl, reg;
/* Set CardBus latency timer */
- if (pci_read_config(sc->dev, PCIR_SECLAT_1, 1) < 0x20)
- pci_write_config(sc->dev, PCIR_SECLAT_1, 0x20, 1);
+ if (pci_read_config(sc->dev, PCIR_SECLAT_2, 1) < 0x20)
+ pci_write_config(sc->dev, PCIR_SECLAT_2, 0x20, 1);
/* Set PCI latency timer */
if (pci_read_config(sc->dev, PCIR_LATTIMER, 1) < 0x20)
pci_write_config(sc->dev, PCIR_LATTIMER, 0x20, 1);
- /* Restore bus configuration */
- pci_write_config(sc->dev, PCIR_PRIBUS_2, sc->pribus, 1);
- pci_write_config(sc->dev, PCIR_SECBUS_2, sc->bus.sec, 1);
- pci_write_config(sc->dev, PCIR_SUBBUS_2, sc->bus.sub, 1);
-
/* Enable DMA, memory access for this card and I/O acces for children */
pci_enable_busmaster(sc->dev);
pci_enable_io(sc->dev, SYS_RES_IOPORT);
@@ -906,15 +875,10 @@ cbb_pci_resume(device_t brdev)
* from D0 and back to D0 cause the bridge to lose its config space, so
* all the bus mappings and such are preserved.
*
- * For most drivers, the PCI layer handles this saving. However, since
- * there's much black magic and arcane art hidden in these few lines of
- * code that would be difficult to transition into the PCI
- * layer. chipinit was several years of trial and error to write.
+ * The PCI layer handles standard PCI registers like the
+ * command register and BARs, but cbb-specific registers are
+ * handled here.
*/
- pci_write_config(brdev, CBBR_SOCKBASE, rman_get_start(sc->base_res), 4);
- DEVPRINTF((brdev, "PCI Memory allocated: %08lx\n",
- rman_get_start(sc->base_res)));
-
sc->chipinit(sc);
/* reset interrupt -- Do we really need to do this? */
diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c
index 3fab486..b4c6151 100644
--- a/sys/dev/pci/pci.c
+++ b/sys/dev/pci/pci.c
@@ -583,12 +583,24 @@ pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
case PCIM_HDRTYPE_NORMAL:
cfg->subvendor = REG(PCIR_SUBVEND_0, 2);
cfg->subdevice = REG(PCIR_SUBDEV_0, 2);
+ cfg->mingnt = REG(PCIR_MINGNT, 1);
+ cfg->maxlat = REG(PCIR_MAXLAT, 1);
cfg->nummaps = PCI_MAXMAPS_0;
break;
case PCIM_HDRTYPE_BRIDGE:
+ cfg->bridge.br_seclat = REG(PCIR_SECLAT_1, 1);
+ cfg->bridge.br_subbus = REG(PCIR_SUBBUS_1, 1);
+ cfg->bridge.br_secbus = REG(PCIR_SECBUS_1, 1);
+ cfg->bridge.br_pribus = REG(PCIR_PRIBUS_1, 1);
+ cfg->bridge.br_control = REG(PCIR_BRIDGECTL_1, 2);
cfg->nummaps = PCI_MAXMAPS_1;
break;
case PCIM_HDRTYPE_CARDBUS:
+ cfg->bridge.br_seclat = REG(PCIR_SECLAT_2, 1);
+ cfg->bridge.br_subbus = REG(PCIR_SUBBUS_2, 1);
+ cfg->bridge.br_secbus = REG(PCIR_SECBUS_2, 1);
+ cfg->bridge.br_pribus = REG(PCIR_PRIBUS_2, 1);
+ cfg->bridge.br_control = REG(PCIR_BRIDGECTL_2, 2);
cfg->subvendor = REG(PCIR_SUBVEND_2, 2);
cfg->subdevice = REG(PCIR_SUBDEV_2, 2);
cfg->nummaps = PCI_MAXMAPS_2;
@@ -641,9 +653,6 @@ pci_fill_devinfo(device_t pcib, int d, int b, int s, int f, uint16_t vid,
cfg->intpin = REG(PCIR_INTPIN, 1);
cfg->intline = REG(PCIR_INTLINE, 1);
- cfg->mingnt = REG(PCIR_MINGNT, 1);
- cfg->maxlat = REG(PCIR_MAXLAT, 1);
-
cfg->mfdev = (cfg->hdrtype & PCIM_MFDEV) != 0;
cfg->hdrtype &= ~PCIM_MFDEV;
STAILQ_INIT(&cfg->maps);
@@ -4425,9 +4434,17 @@ pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
*result = cfg->cachelnsz;
break;
case PCI_IVAR_MINGNT:
+ if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
+ *result = -1;
+ return (EINVAL);
+ }
*result = cfg->mingnt;
break;
case PCI_IVAR_MAXLAT:
+ if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
+ *result = -1;
+ return (EINVAL);
+ }
*result = cfg->maxlat;
break;
case PCI_IVAR_LATTIMER:
@@ -5125,16 +5142,6 @@ pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
{
/*
- * Only do header type 0 devices. Type 1 devices are bridges,
- * which we know need special treatment. Type 2 devices are
- * cardbus bridges which also require special treatment.
- * Other types are unknown, and we err on the side of safety
- * by ignoring them.
- */
- if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
- return;
-
- /*
* Restore the device to full power mode. We must do this
* before we restore the registers because moving from D3 to
* D0 will cause the chip's BARs and some other registers to
@@ -5144,16 +5151,44 @@ pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
*/
if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
pci_set_powerstate(dev, PCI_POWERSTATE_D0);
- pci_restore_bars(dev);
pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
- pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
- pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
+ switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
+ case PCIM_HDRTYPE_NORMAL:
+ pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
+ pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
+ break;
+ case PCIM_HDRTYPE_BRIDGE:
+ pci_write_config(dev, PCIR_SECLAT_1,
+ dinfo->cfg.bridge.br_seclat, 1);
+ pci_write_config(dev, PCIR_SUBBUS_1,
+ dinfo->cfg.bridge.br_subbus, 1);
+ pci_write_config(dev, PCIR_SECBUS_1,
+ dinfo->cfg.bridge.br_secbus, 1);
+ pci_write_config(dev, PCIR_PRIBUS_1,
+ dinfo->cfg.bridge.br_pribus, 1);
+ pci_write_config(dev, PCIR_BRIDGECTL_1,
+ dinfo->cfg.bridge.br_control, 2);
+ break;
+ case PCIM_HDRTYPE_CARDBUS:
+ pci_write_config(dev, PCIR_SECLAT_2,
+ dinfo->cfg.bridge.br_seclat, 1);
+ pci_write_config(dev, PCIR_SUBBUS_2,
+ dinfo->cfg.bridge.br_subbus, 1);
+ pci_write_config(dev, PCIR_SECBUS_2,
+ dinfo->cfg.bridge.br_secbus, 1);
+ pci_write_config(dev, PCIR_PRIBUS_2,
+ dinfo->cfg.bridge.br_pribus, 1);
+ pci_write_config(dev, PCIR_BRIDGECTL_2,
+ dinfo->cfg.bridge.br_control, 2);
+ break;
+ }
+ pci_restore_bars(dev);
/*
* Restore extended capabilities for PCI-Express and PCI-X
@@ -5222,40 +5257,57 @@ pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
int ps;
/*
- * Only do header type 0 devices. Type 1 devices are bridges, which
- * we know need special treatment. Type 2 devices are cardbus bridges
- * which also require special treatment. Other types are unknown, and
- * we err on the side of safety by ignoring them. Powering down
- * bridges should not be undertaken lightly.
- */
- if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
- return;
-
- /*
* Some drivers apparently write to these registers w/o updating our
* cached copy. No harm happens if we update the copy, so do so here
* so we can restore them. The COMMAND register is modified by the
* bus w/o updating the cache. This should represent the normally
- * writable portion of the 'defined' part of type 0 headers. In
- * theory we also need to save/restore the PCI capability structures
- * we know about, but apart from power we don't know any that are
- * writable.
+ * writable portion of the 'defined' part of type 0/1/2 headers.
*/
- dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
- dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
- dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
- dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
+ switch (dinfo->cfg.hdrtype & PCIM_HDRTYPE) {
+ case PCIM_HDRTYPE_NORMAL:
+ dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
+ dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
+ dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
+ dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
+ break;
+ case PCIM_HDRTYPE_BRIDGE:
+ dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
+ PCIR_SECLAT_1, 1);
+ dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
+ PCIR_SUBBUS_1, 1);
+ dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
+ PCIR_SECBUS_1, 1);
+ dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
+ PCIR_PRIBUS_1, 1);
+ dinfo->cfg.bridge.br_control = pci_read_config(dev,
+ PCIR_BRIDGECTL_1, 2);
+ break;
+ case PCIM_HDRTYPE_CARDBUS:
+ dinfo->cfg.bridge.br_seclat = pci_read_config(dev,
+ PCIR_SECLAT_2, 1);
+ dinfo->cfg.bridge.br_subbus = pci_read_config(dev,
+ PCIR_SUBBUS_2, 1);
+ dinfo->cfg.bridge.br_secbus = pci_read_config(dev,
+ PCIR_SECBUS_2, 1);
+ dinfo->cfg.bridge.br_pribus = pci_read_config(dev,
+ PCIR_PRIBUS_2, 1);
+ dinfo->cfg.bridge.br_control = pci_read_config(dev,
+ PCIR_BRIDGECTL_2, 2);
+ dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_2, 2);
+ dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_2, 2);
+ break;
+ }
if (dinfo->cfg.pcie.pcie_location != 0)
pci_cfg_save_pcie(dev, dinfo);
diff --git a/sys/dev/pci/pci_pci.c b/sys/dev/pci/pci_pci.c
index d0d8fb7..789a918 100644
--- a/sys/dev/pci/pci_pci.c
+++ b/sys/dev/pci/pci_pci.c
@@ -553,18 +553,22 @@ void
pcib_setup_secbus(device_t dev, struct pcib_secbus *bus, int min_count)
{
char buf[64];
- int error, rid;
+ int error, rid, sec_reg;
switch (pci_read_config(dev, PCIR_HDRTYPE, 1) & PCIM_HDRTYPE) {
case PCIM_HDRTYPE_BRIDGE:
+ sec_reg = PCIR_SECBUS_1;
bus->sub_reg = PCIR_SUBBUS_1;
break;
case PCIM_HDRTYPE_CARDBUS:
+ sec_reg = PCIR_SECBUS_2;
bus->sub_reg = PCIR_SUBBUS_2;
break;
default:
panic("not a PCI bridge");
}
+ bus->sec = pci_read_config(dev, sec_reg, 1);
+ bus->sub = pci_read_config(dev, bus->sub_reg, 1);
bus->dev = dev;
bus->rman.rm_start = 0;
bus->rman.rm_end = PCI_BUSMAX;
@@ -849,20 +853,16 @@ pcib_set_mem_decode(struct pcib_softc *sc)
static void
pcib_cfg_save(struct pcib_softc *sc)
{
+#ifndef NEW_PCIB
device_t dev;
+ uint16_t command;
dev = sc->dev;
- sc->command = pci_read_config(dev, PCIR_COMMAND, 2);
- sc->pribus = pci_read_config(dev, PCIR_PRIBUS_1, 1);
- sc->bus.sec = pci_read_config(dev, PCIR_SECBUS_1, 1);
- sc->bus.sub = pci_read_config(dev, PCIR_SUBBUS_1, 1);
- sc->bridgectl = pci_read_config(dev, PCIR_BRIDGECTL_1, 2);
- sc->seclat = pci_read_config(dev, PCIR_SECLAT_1, 1);
-#ifndef NEW_PCIB
- if (sc->command & PCIM_CMD_PORTEN)
+ command = pci_read_config(dev, PCIR_COMMAND, 2);
+ if (command & PCIM_CMD_PORTEN)
pcib_get_io_decode(sc);
- if (sc->command & PCIM_CMD_MEMEN)
+ if (command & PCIM_CMD_MEMEN)
pcib_get_mem_decode(sc);
#endif
}
@@ -874,21 +874,18 @@ static void
pcib_cfg_restore(struct pcib_softc *sc)
{
device_t dev;
-
+#ifndef NEW_PCIB
+ uint16_t command;
+#endif
dev = sc->dev;
- pci_write_config(dev, PCIR_COMMAND, sc->command, 2);
- pci_write_config(dev, PCIR_PRIBUS_1, sc->pribus, 1);
- pci_write_config(dev, PCIR_SECBUS_1, sc->bus.sec, 1);
- pci_write_config(dev, PCIR_SUBBUS_1, sc->bus.sub, 1);
- pci_write_config(dev, PCIR_BRIDGECTL_1, sc->bridgectl, 2);
- pci_write_config(dev, PCIR_SECLAT_1, sc->seclat, 1);
#ifdef NEW_PCIB
pcib_write_windows(sc, WIN_IO | WIN_MEM | WIN_PMEM);
#else
- if (sc->command & PCIM_CMD_PORTEN)
+ command = pci_read_config(dev, PCIR_COMMAND, 2);
+ if (command & PCIM_CMD_PORTEN)
pcib_set_io_decode(sc);
- if (sc->command & PCIM_CMD_MEMEN)
+ if (command & PCIM_CMD_MEMEN)
pcib_set_mem_decode(sc);
#endif
}
@@ -922,7 +919,11 @@ pcib_attach_common(device_t dev)
* Get current bridge configuration.
*/
sc->domain = pci_get_domain(dev);
- sc->secstat = pci_read_config(dev, PCIR_SECSTAT_1, 2);
+#if !(defined(NEW_PCIB) && defined(PCI_RES_BUS))
+ sc->bus.sec = pci_read_config(dev, PCIR_SECBUS_1, 1);
+ sc->bus.sub = pci_read_config(dev, PCIR_SUBBUS_1, 1);
+#endif
+ sc->bridgectl = pci_read_config(dev, PCIR_BRIDGECTL_1, 2);
pcib_cfg_save(sc);
/*
@@ -950,7 +951,7 @@ pcib_attach_common(device_t dev)
* Quirk handling.
*/
switch (pci_get_devid(dev)) {
-#if !defined(NEW_PCIB) && !defined(PCI_RES_BUS)
+#if !(defined(NEW_PCIB) && defined(PCI_RES_BUS))
case 0x12258086: /* Intel 82454KX/GX (Orion) */
{
uint8_t supbus;
@@ -976,7 +977,7 @@ pcib_attach_common(device_t dev)
sc->flags |= PCIB_SUBTRACTIVE;
break;
-#if !defined(NEW_PCIB) && !defined(PCI_RES_BUS)
+#if !(defined(NEW_PCIB) && defined(PCI_RES_BUS))
/* Compaq R3000 BIOS sets wrong subordinate bus number. */
case 0x00dd10de:
{
@@ -1101,32 +1102,15 @@ pcib_attach(device_t dev)
int
pcib_suspend(device_t dev)
{
- device_t pcib;
- int dstate, error;
pcib_cfg_save(device_get_softc(dev));
- error = bus_generic_suspend(dev);
- if (error == 0 && pci_do_power_suspend) {
- dstate = PCI_POWERSTATE_D3;
- pcib = device_get_parent(device_get_parent(dev));
- if (PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
- pci_set_powerstate(dev, dstate);
- }
- return (error);
+ return (bus_generic_suspend(dev));
}
int
pcib_resume(device_t dev)
{
- device_t pcib;
- int dstate;
-
- if (pci_do_power_resume) {
- pcib = device_get_parent(device_get_parent(dev));
- dstate = PCI_POWERSTATE_D0;
- if (PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
- pci_set_powerstate(dev, dstate);
- }
+
pcib_cfg_restore(device_get_softc(dev));
return (bus_generic_resume(dev));
}
diff --git a/sys/dev/pci/pci_subr.c b/sys/dev/pci/pci_subr.c
index 5d0db36..03bcdc0 100644
--- a/sys/dev/pci/pci_subr.c
+++ b/sys/dev/pci/pci_subr.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2011 Advanced Computing Technologies LLC
+ * Copyright (c) 2011 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
*
diff --git a/sys/dev/pci/pcib_private.h b/sys/dev/pci/pcib_private.h
index d8d82b6..20cb014 100644
--- a/sys/dev/pci/pcib_private.h
+++ b/sys/dev/pci/pcib_private.h
@@ -106,7 +106,6 @@ struct pcib_softc
#define PCIB_DISABLE_MSI 0x2
#define PCIB_DISABLE_MSIX 0x4
#define PCIB_ENABLE_ARI 0x8
- uint16_t command; /* command register */
u_int domain; /* domain number */
u_int pribus; /* primary bus number */
struct pcib_secbus bus; /* secondary bus numbers */
@@ -122,9 +121,7 @@ struct pcib_softc
uint32_t iobase; /* base address of port window */
uint32_t iolimit; /* topmost address of port window */
#endif
- uint16_t secstat; /* secondary bus status register */
uint16_t bridgectl; /* bridge control register */
- uint8_t seclat; /* secondary bus latency timer */
};
#define PCIB_SUPPORTED_ARI_VER 1
diff --git a/sys/dev/pci/pcivar.h b/sys/dev/pci/pcivar.h
index 478c98e..2fd76b6 100644
--- a/sys/dev/pci/pcivar.h
+++ b/sys/dev/pci/pcivar.h
@@ -41,6 +41,15 @@ typedef uint64_t pci_addr_t;
struct nvlist;
+/* Config registers for PCI-PCI and PCI-Cardbus bridges. */
+struct pcicfg_bridge {
+ uint8_t br_seclat;
+ uint8_t br_subbus;
+ uint8_t br_secbus;
+ uint8_t br_pribus;
+ uint16_t br_control;
+};
+
/* Interesting values for PCI power management */
struct pcicfg_pp {
uint16_t pp_cap; /* PCI power management capabilities */
@@ -190,6 +199,7 @@ typedef struct pcicfg {
uint32_t flags; /* flags defined above */
size_t devinfo_size; /* Size of devinfo for this bus type. */
+ struct pcicfg_bridge bridge; /* Bridges */
struct pcicfg_pp pp; /* Power management */
struct pcicfg_vpd vpd; /* Vital product data */
struct pcicfg_msi msi; /* PCI MSI */
diff --git a/sys/dev/smbus/smb.c b/sys/dev/smbus/smb.c
index 579204d..e842441 100644
--- a/sys/dev/smbus/smb.c
+++ b/sys/dev/smbus/smb.c
@@ -26,10 +26,6 @@
* $FreeBSD$
*/
-#ifdef HAVE_KERNEL_OPTION_HEADERS
-#include "opt_compat.h"
-#endif
-
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
@@ -104,19 +100,24 @@ smb_identify(driver_t *driver, device_t parent)
static int
smb_probe(device_t dev)
{
- device_set_desc(dev, "SMBus generic I/O");
+ if (smbus_get_addr(dev) != -1)
+ return (ENXIO);
- return (0);
+ device_set_desc(dev, "SMBus generic I/O");
+ return (BUS_PROBE_NOWILDCARD);
}
static int
smb_attach(device_t dev)
{
struct smb_softc *sc = device_get_softc(dev);
-
+ int unit;
+
+ unit = device_get_unit(dev);
sc->sc_dev = dev;
- sc->sc_devnode = make_dev(&smb_cdevsw, device_get_unit(dev),
- UID_ROOT, GID_WHEEL, 0600, "smb%d", device_get_unit(dev));
+
+ sc->sc_devnode = make_dev(&smb_cdevsw, unit, UID_ROOT, GID_WHEEL,
+ 0600, "smb%d", unit);
sc->sc_devnode->si_drv1 = sc;
mtx_init(&sc->sc_lock, device_get_nameunit(dev), NULL, MTX_DEF);
@@ -174,9 +175,16 @@ smbioctl(struct cdev *dev, u_long cmd, caddr_t data, int flags, struct thread *t
struct smb_softc *sc = dev->si_drv1;
device_t smbdev = sc->sc_dev;
int error;
- short w;
- u_char count;
- char c;
+ int unit;
+ u_char bcount;
+
+ /*
+ * If a specific slave device is being used, override any passed-in
+ * slave.
+ */
+ unit = dev2unit(dev);
+ if (unit & 0x0400)
+ s->slave = unit & 0x03ff;
parent = device_get_parent(smbdev);
@@ -208,77 +216,101 @@ smbioctl(struct cdev *dev, u_long cmd, caddr_t data, int flags, struct thread *t
case SMB_WRITEB:
error = smbus_error(smbus_writeb(parent, s->slave, s->cmd,
- s->data.byte));
+ s->wdata.byte));
break;
case SMB_WRITEW:
error = smbus_error(smbus_writew(parent, s->slave,
- s->cmd, s->data.word));
+ s->cmd, s->wdata.word));
break;
case SMB_READB:
- if (s->data.byte_ptr) {
- error = smbus_error(smbus_readb(parent, s->slave,
- s->cmd, &c));
- if (error)
- break;
- error = copyout(&c, s->data.byte_ptr,
- sizeof(*(s->data.byte_ptr)));
+ error = smbus_error(smbus_readb(parent, s->slave, s->cmd,
+ &s->rdata.byte));
+ if (error)
+ break;
+ if (s->rbuf && s->rcount >= 1) {
+ error = copyout(&s->rdata.byte, s->rbuf, 1);
+ s->rcount = 1;
}
break;
case SMB_READW:
- if (s->data.word_ptr) {
- error = smbus_error(smbus_readw(parent, s->slave,
- s->cmd, &w));
- if (error == 0) {
- error = copyout(&w, s->data.word_ptr,
- sizeof(*(s->data.word_ptr)));
- }
+ error = smbus_error(smbus_readw(parent, s->slave, s->cmd,
+ &s->rdata.word));
+ if (error)
+ break;
+ if (s->rbuf && s->rcount >= 2) {
+ buf[0] = (u_char)s->rdata.word;
+ buf[1] = (u_char)(s->rdata.word >> 8);
+ error = copyout(buf, s->rbuf, 2);
+ s->rcount = 2;
}
break;
case SMB_PCALL:
- if (s->data.process.rdata) {
-
- error = smbus_error(smbus_pcall(parent, s->slave, s->cmd,
- s->data.process.sdata, &w));
- if (error)
- break;
- error = copyout(&w, s->data.process.rdata,
- sizeof(*(s->data.process.rdata)));
+ error = smbus_error(smbus_pcall(parent, s->slave, s->cmd,
+ s->wdata.word, &s->rdata.word));
+ if (error)
+ break;
+ if (s->rbuf && s->rcount >= 2) {
+ buf[0] = (u_char)s->rdata.word;
+ buf[1] = (u_char)(s->rdata.word >> 8);
+ error = copyout(buf, s->rbuf, 2);
+ s->rcount = 2;
}
-
+
break;
case SMB_BWRITE:
- if (s->count && s->data.byte_ptr) {
- if (s->count > SMB_MAXBLOCKSIZE)
- s->count = SMB_MAXBLOCKSIZE;
- error = copyin(s->data.byte_ptr, buf, s->count);
- if (error)
- break;
- error = smbus_error(smbus_bwrite(parent, s->slave,
- s->cmd, s->count, buf));
+ if (s->wcount < 0) {
+ error = EINVAL;
+ break;
}
+ if (s->wcount > SMB_MAXBLOCKSIZE)
+ s->wcount = SMB_MAXBLOCKSIZE;
+ if (s->wcount)
+ error = copyin(s->wbuf, buf, s->wcount);
+ if (error)
+ break;
+ error = smbus_error(smbus_bwrite(parent, s->slave, s->cmd,
+ s->wcount, buf));
break;
-#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD6)
- case SMB_OLD_BREAD:
-#endif
case SMB_BREAD:
- if (s->count && s->data.byte_ptr) {
- count = min(s->count, SMB_MAXBLOCKSIZE);
- error = smbus_error(smbus_bread(parent, s->slave,
- s->cmd, &count, buf));
- if (error)
- break;
- error = copyout(buf, s->data.byte_ptr,
- min(count, s->count));
- s->count = count;
+ if (s->rcount < 0) {
+ error = EINVAL;
+ break;
+ }
+ if (s->rcount > SMB_MAXBLOCKSIZE)
+ s->rcount = SMB_MAXBLOCKSIZE;
+ error = smbus_error(smbus_bread(parent, s->slave, s->cmd,
+ &bcount, buf));
+ if (error)
+ break;
+ if (s->rcount > bcount)
+ s->rcount = bcount;
+ error = copyout(buf, s->rbuf, s->rcount);
+ break;
+
+ case SMB_TRANS:
+ if (s->rcount < 0 || s->wcount < 0) {
+ error = EINVAL;
+ break;
}
+ if (s->rcount > SMB_MAXBLOCKSIZE)
+ s->rcount = SMB_MAXBLOCKSIZE;
+ if (s->wcount > SMB_MAXBLOCKSIZE)
+ s->wcount = SMB_MAXBLOCKSIZE;
+ if (s->wcount)
+ error = copyin(s->wbuf, buf, s->wcount);
+ if (error)
+ break;
+ error = smbus_error(smbus_trans(parent, s->slave, s->cmd,
+ s->op, buf, s->wcount, buf, s->rcount, &s->rcount));
+ if (error == 0)
+ error = copyout(buf, s->rbuf, s->rcount);
break;
-
default:
error = ENOTTY;
}
diff --git a/sys/dev/smbus/smb.h b/sys/dev/smbus/smb.h
index 32347f8..8007653 100644
--- a/sys/dev/smbus/smb.h
+++ b/sys/dev/smbus/smb.h
@@ -32,27 +32,33 @@
#include <sys/ioccom.h>
struct smbcmd {
- char cmd;
- int count;
- u_char slave;
+ u_char cmd;
+ u_char reserved;
+ u_short op;
union {
- char byte;
- short word;
-
- char *byte_ptr;
- short *word_ptr;
-
- struct {
- short sdata;
- short *rdata;
- } process;
- } data;
+ char byte;
+ char buf[2];
+ short word;
+ } wdata;
+ union {
+ char byte;
+ char buf[2];
+ short word;
+ } rdata;
+ int slave;
+ char *wbuf; /* use wdata if NULL */
+ int wcount;
+ char *rbuf; /* use rdata if NULL */
+ int rcount;
};
/*
* SMBus spec 2.0 says block transfers may be at most 32 bytes.
+ * We use SMBus for i2c as well, make the size limit something more
+ * reasonable. Keep in mind that a char buf array is declared on the
+ * kernel stack.
*/
-#define SMB_MAXBLOCKSIZE 32
+#define SMB_MAXBLOCKSIZE 1024
#define SMB_QUICK_WRITE _IOW('i', 1, struct smbcmd)
#define SMB_QUICK_READ _IOW('i', 2, struct smbcmd)
@@ -66,5 +72,6 @@ struct smbcmd {
#define SMB_BWRITE _IOW('i', 10, struct smbcmd)
#define SMB_OLD_BREAD _IOW('i', 11, struct smbcmd)
#define SMB_BREAD _IOWR('i', 11, struct smbcmd)
+#define SMB_TRANS _IOWR('i', 12, struct smbcmd)
#endif
diff --git a/sys/dev/smbus/smbconf.h b/sys/dev/smbus/smbconf.h
index a3d403d..f39c442 100644
--- a/sys/dev/smbus/smbconf.h
+++ b/sys/dev/smbus/smbconf.h
@@ -68,9 +68,30 @@
#define SMB_QREAD 0x1
/*
+ * smbus transction op with pass-thru capabilities
+ *
+ * This smbus function is capable of doing a smbus command transaction
+ * (read or write), and can be flagged to not issue the 'cmd' and/or
+ * issue or expect a count field as well as flagged for chaining (no STOP),
+ * which gives it an i2c pass-through capability.
+ *
+ * NOSTOP- Caller chaining transactions, do not issue STOP
+ * NOCMD- Do not transmit the command field
+ * NOCNT- Do not transmit (wr) or expect (rd) the count field
+ */
+#define SMB_TRANS_NOSTOP 0x0001 /* do not send STOP at end */
+#define SMB_TRANS_NOCMD 0x0002 /* ignore cmd field (do not tx) */
+#define SMB_TRANS_NOCNT 0x0004 /* do not tx or rx count field */
+#define SMB_TRANS_7BIT 0x0008 /* change address mode to 7-bit */
+#define SMB_TRANS_10BIT 0x0010 /* change address mode to 10-bit */
+#define SMB_TRANS_NOREPORT 0x0020 /* do not report errors */
+
+/*
* ivars codes
*/
-#define SMBUS_IVAR_ADDR 0x1 /* slave address of the device */
+enum smbus_ivars {
+ SMBUS_IVAR_ADDR, /* slave address of the device */
+};
int smbus_request_bus(device_t, device_t, int);
int smbus_release_bus(device_t, device_t);
@@ -79,7 +100,12 @@ int smbus_error(int error);
void smbus_intr(device_t, u_char, char low, char high, int error);
-u_char smbus_get_addr(device_t);
+#define SMBUS_ACCESSOR(var, ivar, type) \
+ __BUS_ACCESSOR(smbus, var, SMBUS, ivar, type)
+
+SMBUS_ACCESSOR(addr, ADDR, int)
+
+#undef SMBUS_ACCESSOR
extern driver_t smbus_driver;
extern devclass_t smbus_devclass;
@@ -104,6 +130,9 @@ extern devclass_t smbus_devclass;
(SMBUS_BWRITE(device_get_parent(bus), slave, cmd, count, buf))
#define smbus_bread(bus,slave,cmd,count,buf) \
(SMBUS_BREAD(device_get_parent(bus), slave, cmd, count, buf))
+#define smbus_trans(bus,slave,cmd,op,wbuf,wcount,rbuf,rcount,actualp) \
+ (SMBUS_TRANS(device_get_parent(bus), slave, cmd, op, \
+ wbuf, wcount, rbuf, rcount, actualp))
#define SMBUS_MODVER 1
#define SMBUS_MINVER 1
diff --git a/sys/dev/smbus/smbus.c b/sys/dev/smbus/smbus.c
index a111332..389efac 100644
--- a/sys/dev/smbus/smbus.c
+++ b/sys/dev/smbus/smbus.c
@@ -33,11 +33,15 @@ __FBSDID("$FreeBSD$");
#include <sys/lock.h>
#include <sys/module.h>
#include <sys/mutex.h>
-#include <sys/bus.h>
+#include <sys/bus.h>
#include <dev/smbus/smbconf.h>
#include <dev/smbus/smbus.h>
+#include "smbus_if.h"
+#include "bus_if.h"
+
+
/*
* Autoconfiguration and support routines for System Management bus
*/
@@ -49,6 +53,13 @@ static int smbus_probe(device_t);
static int smbus_attach(device_t);
static int smbus_detach(device_t);
+static int smbus_child_location_str(device_t parent, device_t child,
+ char *buf, size_t buflen);
+static int smbus_print_child(device_t parent, device_t child);
+static void smbus_probe_device(device_t dev, u_char* addr);
+static int smbus_read_ivar(device_t parent, device_t child, int which,
+ uintptr_t *result);
+
static device_method_t smbus_methods[] = {
/* device interface */
DEVMETHOD(device_probe, smbus_probe),
@@ -57,6 +68,10 @@ static device_method_t smbus_methods[] = {
/* bus interface */
DEVMETHOD(bus_add_child, bus_generic_add_child),
+ DEVMETHOD(bus_child_location_str, smbus_child_location_str),
+ DEVMETHOD(bus_driver_added, bus_generic_driver_added),
+ DEVMETHOD(bus_print_child, smbus_print_child),
+ DEVMETHOD(bus_read_ivar, smbus_read_ivar),
DEVMETHOD_END
};
@@ -87,9 +102,14 @@ static int
smbus_attach(device_t dev)
{
struct smbus_softc *sc = device_get_softc(dev);
+ unsigned char addr;
mtx_init(&sc->lock, device_get_nameunit(dev), "smbus", MTX_DEF);
bus_generic_probe(dev);
+ for (addr = SMBUS_ADDR_MIN; addr < SMBUS_ADDR_MAX; ++addr) {
+ sc->addrs[addr] = addr;
+ smbus_probe_device(dev, &sc->addrs[addr]);
+ }
bus_generic_attach(dev);
return (0);
@@ -114,4 +134,70 @@ smbus_generic_intr(device_t dev, u_char devaddr, char low, char high, int err)
{
}
+static void
+smbus_probe_device(device_t dev, u_char* addr)
+{
+ device_t child;
+ int error;
+ u_char cmd;
+ u_char buf[2];
+
+ cmd = 0x01;
+ error = smbus_trans(dev, *addr, cmd,
+ SMB_TRANS_NOCNT | SMB_TRANS_NOREPORT,
+ NULL, 0, buf, 1, NULL);
+ if (error == 0) {
+ if (bootverbose)
+ device_printf(dev, "Probed address 0x%02x\n", *addr);
+ child = device_add_child(dev, NULL, -1);
+ device_set_ivars(child, addr);
+ }
+}
+
+static int
+smbus_child_location_str(device_t parent, device_t child, char *buf,
+ size_t buflen)
+{
+ unsigned char *addr;
+
+ addr = device_get_ivars(child);
+ if (addr)
+ snprintf(buf, buflen, "addr=0x%x", *addr);
+ else if (buflen)
+ buf[0] = 0;
+ return (0);
+}
+
+static int
+smbus_print_child(device_t parent, device_t child)
+{
+ unsigned char *addr;
+ int retval;
+
+ addr = device_get_ivars(child);
+ retval = bus_print_child_header(parent, child);
+ if (addr)
+ retval += printf(" at addr 0x%x", *addr);
+ retval += bus_print_child_footer(parent, child);
+
+ return (retval);
+}
+
+static int
+smbus_read_ivar(device_t parent, device_t child, int which,
+ uintptr_t *result)
+{
+ unsigned char *addr;
+
+ addr = device_get_ivars(child);
+ switch (which) {
+ case SMBUS_IVAR_ADDR:
+ *result = (addr == NULL) ? -1 : *addr;
+ break;
+ default:
+ return (ENOENT);
+ }
+ return (0);
+}
+
MODULE_VERSION(smbus, SMBUS_MODVER);
diff --git a/sys/dev/smbus/smbus.h b/sys/dev/smbus/smbus.h
index 5626835..2093aa0 100644
--- a/sys/dev/smbus/smbus.h
+++ b/sys/dev/smbus/smbus.h
@@ -29,9 +29,13 @@
#ifndef __SMBUS_H
#define __SMBUS_H
+#define SMBUS_ADDR_MIN 0x10
+#define SMBUS_ADDR_MAX 0x70
+
struct smbus_softc {
device_t owner; /* smbus owner device structure */
struct mtx lock;
+ unsigned char addrs[SMBUS_ADDR_MAX];
};
void smbus_generic_intr(device_t dev, u_char devaddr, char low, char high, int err);
diff --git a/sys/dev/smbus/smbus_if.m b/sys/dev/smbus/smbus_if.m
index d969e25..6a5acf5 100644
--- a/sys/dev/smbus/smbus_if.m
+++ b/sys/dev/smbus/smbus_if.m
@@ -149,3 +149,20 @@ METHOD int bread {
u_char *count;
char *buf;
};
+
+#
+# SMB roll-up transaction with flags that also allow it to be
+# used for (mostly) i2c pass-through and with 10-bit addresses.
+# This function can be used to roll-up all of the above functions.
+#
+METHOD int trans {
+ device_t dev;
+ int slave;
+ char cmd;
+ int op;
+ char *wbuf;
+ int wcount;
+ char *rbuf;
+ int rcount;
+ int *actualp;
+};
diff --git a/sys/dev/sound/pcm/dsp.c b/sys/dev/sound/pcm/dsp.c
index 68204ff..5b92963 100644
--- a/sys/dev/sound/pcm/dsp.c
+++ b/sys/dev/sound/pcm/dsp.c
@@ -48,6 +48,11 @@ SYSCTL_INT(_hw_snd, OID_AUTO, compat_linux_mmap, CTLFLAG_RWTUN,
&dsp_mmap_allow_prot_exec, 0,
"linux mmap compatibility (-1=force disable 0=auto 1=force enable)");
+static int dsp_basename_clone = 1;
+SYSCTL_INT(_hw_snd, OID_AUTO, basename_clone, CTLFLAG_RWTUN,
+ &dsp_basename_clone, 0,
+ "DSP basename cloning (0: Disable; 1: Enabled)");
+
struct dsp_cdevinfo {
struct pcm_channel *rdch, *wrch;
struct pcm_channel *volch;
@@ -2357,9 +2362,10 @@ dsp_clone(void *arg,
devname = devcmp;
devhw = dsp_cdevs[i].hw;
devcmax = dsp_cdevs[i].max - 1;
- if (strcmp(name, devcmp) == 0)
- unit = snd_unit;
- else if (dsp_stdclone(name, devcmp, devsep,
+ if (strcmp(name, devcmp) == 0) {
+ if (dsp_basename_clone != 0)
+ unit = snd_unit;
+ } else if (dsp_stdclone(name, devcmp, devsep,
dsp_cdevs[i].use_sep, &unit, &cunit) != 0) {
unit = -1;
cunit = -1;
diff --git a/sys/dev/usb/controller/dwc_otg.c b/sys/dev/usb/controller/dwc_otg.c
index 05d2f71..37b5333 100644
--- a/sys/dev/usb/controller/dwc_otg.c
+++ b/sys/dev/usb/controller/dwc_otg.c
@@ -3876,20 +3876,18 @@ dwc_otg_init(struct dwc_otg_softc *sc)
if (temp & GHWCFG2_MPI) {
uint8_t x;
- DPRINTF("Multi Process Interrupts\n");
+ DPRINTF("Disable Multi Process Interrupts\n");
for (x = 0; x != sc->sc_dev_in_ep_max; x++) {
- DWC_OTG_WRITE_4(sc, DOTG_DIEPEACHINTMSK(x),
- DIEPMSK_XFERCOMPLMSK);
+ DWC_OTG_WRITE_4(sc, DOTG_DIEPEACHINTMSK(x), 0);
DWC_OTG_WRITE_4(sc, DOTG_DOEPEACHINTMSK(x), 0);
}
- DWC_OTG_WRITE_4(sc, DOTG_DEACHINTMSK, 0xFFFF);
- } else {
- DWC_OTG_WRITE_4(sc, DOTG_DIEPMSK,
- DIEPMSK_XFERCOMPLMSK);
- DWC_OTG_WRITE_4(sc, DOTG_DOEPMSK, 0);
- DWC_OTG_WRITE_4(sc, DOTG_DAINTMSK, 0xFFFF);
+ DWC_OTG_WRITE_4(sc, DOTG_DEACHINTMSK, 0);
}
+ DWC_OTG_WRITE_4(sc, DOTG_DIEPMSK,
+ DIEPMSK_XFERCOMPLMSK);
+ DWC_OTG_WRITE_4(sc, DOTG_DOEPMSK, 0);
+ DWC_OTG_WRITE_4(sc, DOTG_DAINTMSK, 0xFFFF);
}
if (sc->sc_mode == DWC_MODE_OTG || sc->sc_mode == DWC_MODE_HOST) {
diff --git a/sys/dev/usb/usbdevs b/sys/dev/usb/usbdevs
index 4a10610..d972bdc 100644
--- a/sys/dev/usb/usbdevs
+++ b/sys/dev/usb/usbdevs
@@ -3755,6 +3755,7 @@ product REALTEK RTL8188CU_1 0x817a RTL8188CU
product REALTEK RTL8188CU_2 0x817b RTL8188CU
product REALTEK RTL8187 0x8187 RTL8187 Wireless Adapter
product REALTEK RTL8187B_0 0x8189 RTL8187B Wireless Adapter
+product REALTEK RTL8188CU_3 0x8191 RTL8188CU
product REALTEK RTL8196EU 0x8196 RTL8196EU
product REALTEK RTL8187B_1 0x8197 RTL8187B Wireless Adapter
product REALTEK RTL8187B_2 0x8198 RTL8187B Wireless Adapter
diff --git a/sys/dev/usb/wlan/if_urtwn.c b/sys/dev/usb/wlan/if_urtwn.c
index a25114f..72004d9 100644
--- a/sys/dev/usb/wlan/if_urtwn.c
+++ b/sys/dev/usb/wlan/if_urtwn.c
@@ -137,6 +137,7 @@ static const STRUCT_USB_HOST_ID urtwn_devs[] = {
URTWN_DEV(REALTEK, RTL8188CU_0),
URTWN_DEV(REALTEK, RTL8188CU_1),
URTWN_DEV(REALTEK, RTL8188CU_2),
+ URTWN_DEV(REALTEK, RTL8188CU_3),
URTWN_DEV(REALTEK, RTL8188CU_COMBO),
URTWN_DEV(REALTEK, RTL8188CUS),
URTWN_DEV(REALTEK, RTL8188RU_1),
@@ -145,7 +146,6 @@ static const STRUCT_USB_HOST_ID urtwn_devs[] = {
URTWN_DEV(REALTEK, RTL8191CU),
URTWN_DEV(REALTEK, RTL8192CE),
URTWN_DEV(REALTEK, RTL8192CU),
- URTWN_DEV(REALTEK, RTL8188CU_0),
URTWN_DEV(SITECOMEU, RTL8188CU_1),
URTWN_DEV(SITECOMEU, RTL8188CU_2),
URTWN_DEV(SITECOMEU, RTL8192CU),
@@ -1236,9 +1236,11 @@ urtwn_efuse_read(struct urtwn_softc *sc)
static void
urtwn_efuse_switch_power(struct urtwn_softc *sc)
{
- uint8_t vol;
uint32_t reg;
+ if (sc->chip & URTWN_CHIP_88E)
+ urtwn_write_1(sc, R92C_EFUSE_ACCESS, R92C_EFUSE_ACCESS_ON);
+
reg = urtwn_read_2(sc, R92C_SYS_ISO_CTRL);
if (!(reg & R92C_SYS_ISO_CTRL_PWC_EV12V)) {
urtwn_write_2(sc, R92C_SYS_ISO_CTRL,
@@ -1256,11 +1258,15 @@ urtwn_efuse_switch_power(struct urtwn_softc *sc)
reg | R92C_SYS_CLKR_LOADER_EN | R92C_SYS_CLKR_ANA8M);
}
- /* Enable LDO 2.5V. */
- vol = urtwn_read_1(sc, R92C_EFUSE_TEST + 3);
- vol &= 0x0f;
- vol |= 0x30;
- urtwn_write_1(sc, R92C_EFUSE_TEST + 3, (vol | 0x80));
+ if (!(sc->chip & URTWN_CHIP_88E)) {
+ uint8_t vol;
+
+ /* Enable LDO 2.5V. */
+ vol = urtwn_read_1(sc, R92C_EFUSE_TEST + 3);
+ vol &= 0x0f;
+ vol |= 0x30;
+ urtwn_write_1(sc, R92C_EFUSE_TEST + 3, (vol | 0x80));
+ }
}
static int
@@ -1328,7 +1334,7 @@ urtwn_r88e_read_rom(struct urtwn_softc *sc)
/* Read full ROM image. */
memset(&sc->r88e_rom, 0xff, sizeof(sc->r88e_rom));
- while (addr < 1024) {
+ while (addr < 512) {
reg = urtwn_efuse_read_1(sc, addr);
if (reg == 0xff)
break;
@@ -1354,6 +1360,8 @@ urtwn_r88e_read_rom(struct urtwn_softc *sc)
}
}
+ urtwn_write_1(sc, R92C_EFUSE_ACCESS, R92C_EFUSE_ACCESS_OFF);
+
addr = 0x10;
for (i = 0; i < 6; i++)
sc->cck_tx_pwr[i] = sc->r88e_rom[addr++];
@@ -2193,14 +2201,12 @@ urtwn_r92c_power_on(struct urtwn_softc *sc)
static int
urtwn_r88e_power_on(struct urtwn_softc *sc)
{
- uint8_t val;
uint32_t reg;
int ntries;
/* Wait for power ready bit. */
for (ntries = 0; ntries < 5000; ntries++) {
- val = urtwn_read_1(sc, 0x6) & 0x2;
- if (val == 0x2)
+ if (urtwn_read_4(sc, R92C_APS_FSMCO) & R92C_APS_FSMCO_SUS_HOST)
break;
urtwn_ms_delay(sc);
}
@@ -2215,17 +2221,23 @@ urtwn_r88e_power_on(struct urtwn_softc *sc)
urtwn_read_1(sc, R92C_SYS_FUNC_EN) & ~(R92C_SYS_FUNC_EN_BBRSTB |
R92C_SYS_FUNC_EN_BB_GLB_RST));
- urtwn_write_1(sc, 0x26, urtwn_read_1(sc, 0x26) | 0x80);
+ urtwn_write_1(sc, R92C_AFE_XTAL_CTRL + 2,
+ urtwn_read_1(sc, R92C_AFE_XTAL_CTRL + 2) | 0x80);
/* Disable HWPDN. */
- urtwn_write_1(sc, 0x5, urtwn_read_1(sc, 0x5) & ~0x80);
+ urtwn_write_2(sc, R92C_APS_FSMCO,
+ urtwn_read_2(sc, R92C_APS_FSMCO) & ~R92C_APS_FSMCO_APDM_HPDN);
/* Disable WL suspend. */
- urtwn_write_1(sc, 0x5, urtwn_read_1(sc, 0x5) & ~0x18);
+ urtwn_write_2(sc, R92C_APS_FSMCO,
+ urtwn_read_2(sc, R92C_APS_FSMCO) &
+ ~(R92C_APS_FSMCO_AFSM_HSUS | R92C_APS_FSMCO_AFSM_PCIE));
- urtwn_write_1(sc, 0x5, urtwn_read_1(sc, 0x5) | 0x1);
+ urtwn_write_2(sc, R92C_APS_FSMCO,
+ urtwn_read_2(sc, R92C_APS_FSMCO) | R92C_APS_FSMCO_APFM_ONMAC);
for (ntries = 0; ntries < 5000; ntries++) {
- if (!(urtwn_read_1(sc, 0x5) & 0x1))
+ if (!(urtwn_read_2(sc, R92C_APS_FSMCO) &
+ R92C_APS_FSMCO_APFM_ONMAC))
break;
urtwn_ms_delay(sc);
}
@@ -2233,7 +2245,8 @@ urtwn_r88e_power_on(struct urtwn_softc *sc)
return (ETIMEDOUT);
/* Enable LDO normal mode. */
- urtwn_write_1(sc, 0x23, urtwn_read_1(sc, 0x23) & ~0x10);
+ urtwn_write_1(sc, R92C_LPLDO_CTRL,
+ urtwn_read_1(sc, R92C_LPLDO_CTRL) & ~0x10);
/* Enable MAC DMA/WMAC/SCHEDULE/SEC blocks. */
urtwn_write_2(sc, R92C_CR, 0);
@@ -2565,7 +2578,6 @@ urtwn_r88e_dma_init(struct urtwn_softc *sc)
return (EIO);
/* Set number of pages for normal priority queue. */
- urtwn_write_2(sc, R92C_RQPN_NPQ, 0);
urtwn_write_2(sc, R92C_RQPN_NPQ, 0x000d);
urtwn_write_4(sc, R92C_RQPN, 0x808e000d);
@@ -3384,16 +3396,17 @@ urtwn_init_locked(void *arg)
urtwn_write_1(sc, R92C_TRXDMA_CTRL,
urtwn_read_1(sc, R92C_TRXDMA_CTRL) |
R92C_TRXDMA_CTRL_RXDMA_AGG_EN);
- urtwn_write_1(sc, R92C_USB_SPECIAL_OPTION,
- urtwn_read_1(sc, R92C_USB_SPECIAL_OPTION) |
- R92C_USB_SPECIAL_OPTION_AGG_EN);
urtwn_write_1(sc, R92C_RXDMA_AGG_PG_TH, 48);
if (sc->chip & URTWN_CHIP_88E)
urtwn_write_1(sc, R92C_RXDMA_AGG_PG_TH + 1, 4);
- else
+ else {
urtwn_write_1(sc, R92C_USB_DMA_AGG_TO, 4);
- urtwn_write_1(sc, R92C_USB_AGG_TH, 8);
- urtwn_write_1(sc, R92C_USB_AGG_TO, 6);
+ urtwn_write_1(sc, R92C_USB_SPECIAL_OPTION,
+ urtwn_read_1(sc, R92C_USB_SPECIAL_OPTION) |
+ R92C_USB_SPECIAL_OPTION_AGG_EN);
+ urtwn_write_1(sc, R92C_USB_AGG_TH, 8);
+ urtwn_write_1(sc, R92C_USB_AGG_TO, 6);
+ }
/* Initialize beacon parameters. */
urtwn_write_2(sc, R92C_BCN_CTRL, 0x1010);
diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c
index 39fc28d..704a3d9 100644
--- a/sys/dev/virtio/network/if_vtnet.c
+++ b/sys/dev/virtio/network/if_vtnet.c
@@ -2745,6 +2745,11 @@ vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
struct vtnet_txq *txq;
int i;
+#ifdef DEV_NETMAP
+ if (nm_native_on(NA(sc->vtnet_ifp)))
+ return;
+#endif /* DEV_NETMAP */
+
for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
rxq = &sc->vtnet_rxqs[i];
vtnet_rxq_free_mbufs(rxq);
diff --git a/sys/dev/vt/hw/fb/vt_fb.c b/sys/dev/vt/hw/fb/vt_fb.c
index 40f56fc..b2babd1 100644
--- a/sys/dev/vt/hw/fb/vt_fb.c
+++ b/sys/dev/vt/hw/fb/vt_fb.c
@@ -264,46 +264,40 @@ vt_fb_bitblt_bitmap(struct vt_device *vd, const struct vt_window *vw,
{
struct fb_info *info;
uint32_t fgc, bgc, cc, o;
- int c, l, bpp, bpl;
- u_long line;
- uint8_t b, m;
- const uint8_t *ch;
+ int bpp, bpl, xi, yi;
+ int bit, byte;
info = vd->vd_softc;
bpp = FBTYPE_GET_BYTESPP(info);
fgc = info->fb_cmap[fg];
bgc = info->fb_cmap[bg];
- b = m = 0;
- bpl = (width + 7) >> 3; /* Bytes per source line. */
+ bpl = (width + 7) / 8; /* Bytes per source line. */
if (info->fb_flags & FB_FLAG_NOWRITE)
return;
KASSERT((info->fb_vbase != 0), ("Unmapped framebuffer"));
- line = (info->fb_stride * y) + (x * bpp);
- for (l = 0;
- l < height && y + l < vw->vw_draw_area.tr_end.tp_row;
- l++) {
- ch = pattern;
- for (c = 0;
- c < width && x + c < vw->vw_draw_area.tr_end.tp_col;
- c++) {
- if (c % 8 == 0)
- b = *ch++;
- else
- b <<= 1;
- if (mask != NULL) {
- if (c % 8 == 0)
- m = *mask++;
- else
- m <<= 1;
- /* Skip pixel write, if mask has no bit set. */
- if ((m & 0x80) == 0)
- continue;
- }
- o = line + (c * bpp);
- cc = b & 0x80 ? fgc : bgc;
+ /* Bound by right and bottom edges. */
+ if (y + height > vw->vw_draw_area.tr_end.tp_row) {
+ if (y >= vw->vw_draw_area.tr_end.tp_row)
+ return;
+ height = vw->vw_draw_area.tr_end.tp_row - y;
+ }
+ if (x + width > vw->vw_draw_area.tr_end.tp_col) {
+ if (x >= vw->vw_draw_area.tr_end.tp_col)
+ return;
+ width = vw->vw_draw_area.tr_end.tp_col - x;
+ }
+ for (yi = 0; yi < height; yi++) {
+ for (xi = 0; xi < width; xi++) {
+ byte = yi * bpl + xi / 8;
+ bit = 0x80 >> (xi % 8);
+ /* Skip pixel write, if mask bit not set. */
+ if (mask != NULL && (mask[byte] & bit) == 0)
+ continue;
+ o = (y + yi) * info->fb_stride + (x + xi) * bpp;
+ cc = pattern[byte] & bit ? fgc : bgc;
switch(bpp) {
case 1:
@@ -326,8 +320,6 @@ vt_fb_bitblt_bitmap(struct vt_device *vd, const struct vt_window *vw,
break;
}
}
- line += info->fb_stride;
- pattern += bpl;
}
}
diff --git a/sys/dev/vt/hw/vga/vt_vga.c b/sys/dev/vt/hw/vga/vt_vga.c
index e939fdd..689692e 100644
--- a/sys/dev/vt/hw/vga/vt_vga.c
+++ b/sys/dev/vt/hw/vga/vt_vga.c
@@ -1035,11 +1035,12 @@ vga_initialize_graphics(struct vt_device *vd)
REG_WRITE1(sc, VGA_GC_DATA, 0xff);
}
-static void
+static int
vga_initialize(struct vt_device *vd, int textmode)
{
struct vga_softc *sc = vd->vd_softc;
uint8_t x;
+ int timeout;
/* Make sure the VGA adapter is not in monochrome emulation mode. */
x = REG_READ1(sc, VGA_GEN_MISC_OUTPUT_R);
@@ -1060,10 +1061,16 @@ vga_initialize(struct vt_device *vd, int textmode)
* code therefore also removes that guarantee and appropriate measures
* need to be taken.
*/
+ timeout = 10000;
do {
+ DELAY(10);
x = REG_READ1(sc, VGA_GEN_INPUT_STAT_1);
x &= VGA_GEN_IS1_VR | VGA_GEN_IS1_DE;
- } while (x != (VGA_GEN_IS1_VR | VGA_GEN_IS1_DE));
+ } while (x != (VGA_GEN_IS1_VR | VGA_GEN_IS1_DE) && --timeout != 0);
+ if (timeout == 0) {
+ printf("Timeout initializing vt_vga\n");
+ return (ENXIO);
+ }
/* Now, disable the sync. signals. */
REG_WRITE1(sc, VGA_CRTC_ADDRESS, VGA_CRTC_MODE_CONTROL);
@@ -1194,6 +1201,8 @@ vga_initialize(struct vt_device *vd, int textmode)
*/
sc->vga_curfg = sc->vga_curbg = 0xff;
}
+
+ return (0);
}
static int
@@ -1235,7 +1244,8 @@ vga_init(struct vt_device *vd)
vd->vd_width = VT_VGA_WIDTH;
vd->vd_height = VT_VGA_HEIGHT;
}
- vga_initialize(vd, textmode);
+ if (vga_initialize(vd, textmode) != 0)
+ return (CN_DEAD);
sc->vga_enabled = true;
return (CN_INTERNAL);
diff --git a/sys/dev/vt/vt_core.c b/sys/dev/vt/vt_core.c
index 1f8731e..a637055 100644
--- a/sys/dev/vt/vt_core.c
+++ b/sys/dev/vt/vt_core.c
@@ -1029,7 +1029,7 @@ vt_determine_colors(term_char_t c, int cursor,
int
vt_is_cursor_in_area(const struct vt_device *vd, const term_rect_t *area)
{
- unsigned int mx, my, x1, y1, x2, y2;
+ unsigned int mx, my;
/*
* We use the cursor position saved during the current refresh,
@@ -1038,18 +1038,12 @@ vt_is_cursor_in_area(const struct vt_device *vd, const term_rect_t *area)
mx = vd->vd_mx_drawn + vd->vd_curwindow->vw_draw_area.tr_begin.tp_col;
my = vd->vd_my_drawn + vd->vd_curwindow->vw_draw_area.tr_begin.tp_row;
- x1 = area->tr_begin.tp_col;
- y1 = area->tr_begin.tp_row;
- x2 = area->tr_end.tp_col;
- y2 = area->tr_end.tp_row;
-
- if (((mx >= x1 && x2 - 1 >= mx) ||
- (mx < x1 && mx + vd->vd_mcursor->width >= x1)) &&
- ((my >= y1 && y2 - 1 >= my) ||
- (my < y1 && my + vd->vd_mcursor->height >= y1)))
- return (1);
-
- return (0);
+ if (mx >= area->tr_end.tp_col ||
+ mx + vd->vd_mcursor->width <= area->tr_begin.tp_col ||
+ my >= area->tr_end.tp_row ||
+ my + vd->vd_mcursor->height <= area->tr_begin.tp_row)
+ return (0);
+ return (1);
}
static void
diff --git a/sys/dev/xen/balloon/balloon.c b/sys/dev/xen/balloon/balloon.c
index e113e2c..a6036d8 100644
--- a/sys/dev/xen/balloon/balloon.c
+++ b/sys/dev/xen/balloon/balloon.c
@@ -118,11 +118,6 @@ current_target(void)
static unsigned long
minimum_target(void)
{
-#ifdef XENHVM
-#define max_pfn realmem
-#else
-#define max_pfn HYPERVISOR_shared_info->arch.max_pfn
-#endif
unsigned long min_pages, curr_pages = current_target();
#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
@@ -139,16 +134,15 @@ minimum_target(void)
* 32768 1320
* 131072 4392
*/
- if (max_pfn < MB2PAGES(128))
- min_pages = MB2PAGES(8) + (max_pfn >> 1);
- else if (max_pfn < MB2PAGES(512))
- min_pages = MB2PAGES(40) + (max_pfn >> 2);
- else if (max_pfn < MB2PAGES(2048))
- min_pages = MB2PAGES(104) + (max_pfn >> 3);
+ if (realmem < MB2PAGES(128))
+ min_pages = MB2PAGES(8) + (realmem >> 1);
+ else if (realmem < MB2PAGES(512))
+ min_pages = MB2PAGES(40) + (realmem >> 2);
+ else if (realmem < MB2PAGES(2048))
+ min_pages = MB2PAGES(104) + (realmem >> 3);
else
- min_pages = MB2PAGES(296) + (max_pfn >> 5);
+ min_pages = MB2PAGES(296) + (realmem >> 5);
#undef MB2PAGES
-#undef max_pfn
/* Don't enforce growth */
return (min(min_pages, curr_pages));
@@ -204,12 +198,9 @@ increase_reservation(unsigned long nr_pages)
bs.balloon_low--;
pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
- KASSERT((xen_feature(XENFEAT_auto_translated_physmap) ||
- !phys_to_machine_mapping_valid(pfn)),
+ KASSERT(xen_feature(XENFEAT_auto_translated_physmap),
("auto translated physmap but mapping is valid"));
- set_phys_to_machine(pfn, frame_list[i]);
-
vm_page_free(page);
}
@@ -258,9 +249,8 @@ decrease_reservation(unsigned long nr_pages)
}
pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
- frame_list[i] = PFNTOMFN(pfn);
+ frame_list[i] = pfn;
- set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q);
bs.balloon_low++;
}
@@ -393,21 +383,11 @@ static int
xenballoon_attach(device_t dev)
{
int err;
-#ifndef XENHVM
- vm_page_t page;
- unsigned long pfn;
-
-#define max_pfn HYPERVISOR_shared_info->arch.max_pfn
-#endif
mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF);
-#ifndef XENHVM
- bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
-#else
bs.current_pages = xen_pv_domain() ?
HYPERVISOR_start_info->nr_pages : realmem;
-#endif
bs.target_pages = bs.current_pages;
bs.balloon_low = 0;
bs.balloon_high = 0;
@@ -416,16 +396,6 @@ xenballoon_attach(device_t dev)
kproc_create(balloon_process, NULL, NULL, 0, 0, "balloon");
-#ifndef XENHVM
- /* Initialise the balloon with excess memory space. */
- for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
- page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT);
- TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q);
- bs.balloon_low++;
- }
-#undef max_pfn
-#endif
-
target_watch.callback = watch_target;
err = xs_register_watch(&target_watch);
diff --git a/sys/dev/xen/blkback/blkback.c b/sys/dev/xen/blkback/blkback.c
index 1273961..b647fec 100644
--- a/sys/dev/xen/blkback/blkback.c
+++ b/sys/dev/xen/blkback/blkback.c
@@ -742,7 +742,6 @@ struct xbb_softc {
/** Mutex protecting per-instance data. */
struct mtx lock;
-#ifdef XENHVM
/**
* Resource representing allocated physical address space
* associated with our per-instance kva region.
@@ -751,7 +750,6 @@ struct xbb_softc {
/** Resource id for allocated physical address space. */
int pseudo_phys_res_id;
-#endif
/**
* I/O statistics from BlockBack dispatch down. These are
@@ -2818,16 +2816,12 @@ static void
xbb_free_communication_mem(struct xbb_softc *xbb)
{
if (xbb->kva != 0) {
-#ifndef XENHVM
- kva_free(xbb->kva, xbb->kva_size);
-#else
if (xbb->pseudo_phys_res != NULL) {
bus_release_resource(xbb->dev, SYS_RES_MEMORY,
xbb->pseudo_phys_res_id,
xbb->pseudo_phys_res);
xbb->pseudo_phys_res = NULL;
}
-#endif
}
xbb->kva = 0;
xbb->gnt_base_addr = 0;
@@ -3055,12 +3049,6 @@ xbb_alloc_communication_mem(struct xbb_softc *xbb)
DPRINTF("%s: kva_size = %d, reqlist_kva_size = %d\n",
device_get_nameunit(xbb->dev), xbb->kva_size,
xbb->reqlist_kva_size);
-#ifndef XENHVM
- xbb->kva = kva_alloc(xbb->kva_size);
- if (xbb->kva == 0)
- return (ENOMEM);
- xbb->gnt_base_addr = xbb->kva;
-#else /* XENHVM */
/*
* Reserve a range of pseudo physical memory that we can map
* into kva. These pages will only be backed by machine
@@ -3078,7 +3066,6 @@ xbb_alloc_communication_mem(struct xbb_softc *xbb)
}
xbb->kva = (vm_offset_t)rman_get_virtual(xbb->pseudo_phys_res);
xbb->gnt_base_addr = rman_get_start(xbb->pseudo_phys_res);
-#endif /* XENHVM */
DPRINTF("%s: kva: %#jx, gnt_base_addr: %#jx\n",
device_get_nameunit(xbb->dev), (uintmax_t)xbb->kva,
diff --git a/sys/dev/xen/control/control.c b/sys/dev/xen/control/control.c
index 665a5ac..2a0d459 100644
--- a/sys/dev/xen/control/control.c
+++ b/sys/dev/xen/control/control.c
@@ -138,9 +138,7 @@ __FBSDID("$FreeBSD$");
#include <xen/gnttab.h>
#include <xen/xen_intr.h>
-#ifdef XENHVM
#include <xen/hvm.h>
-#endif
#include <xen/interface/event_channel.h>
#include <xen/interface/grant_table.h>
@@ -192,133 +190,6 @@ xctrl_reboot()
shutdown_nice(0);
}
-#ifndef XENHVM
-extern void xencons_suspend(void);
-extern void xencons_resume(void);
-
-/* Full PV mode suspension. */
-static void
-xctrl_suspend()
-{
- int i, j, k, fpp, suspend_cancelled;
- unsigned long max_pfn, start_info_mfn;
-
- EVENTHANDLER_INVOKE(power_suspend);
-
-#ifdef SMP
- struct thread *td;
- cpuset_t map;
- u_int cpuid;
-
- /*
- * Bind us to CPU 0 and stop any other VCPUs.
- */
- td = curthread;
- thread_lock(td);
- sched_bind(td, 0);
- thread_unlock(td);
- cpuid = PCPU_GET(cpuid);
- KASSERT(cpuid == 0, ("xen_suspend: not running on cpu 0"));
-
- map = all_cpus;
- CPU_CLR(cpuid, &map);
- CPU_NAND(&map, &stopped_cpus);
- if (!CPU_EMPTY(&map))
- stop_cpus(map);
-#endif
-
- /*
- * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
- * drivers need this.
- */
- mtx_lock(&Giant);
- if (DEVICE_SUSPEND(root_bus) != 0) {
- mtx_unlock(&Giant);
- printf("%s: device_suspend failed\n", __func__);
-#ifdef SMP
- if (!CPU_EMPTY(&map))
- restart_cpus(map);
-#endif
- return;
- }
- mtx_unlock(&Giant);
-
- local_irq_disable();
-
- xencons_suspend();
- gnttab_suspend();
- intr_suspend();
-
- max_pfn = HYPERVISOR_shared_info->arch.max_pfn;
-
- void *shared_info = HYPERVISOR_shared_info;
- HYPERVISOR_shared_info = NULL;
- pmap_kremove((vm_offset_t) shared_info);
- PT_UPDATES_FLUSH();
-
- xen_start_info->store_mfn = MFNTOPFN(xen_start_info->store_mfn);
- xen_start_info->console.domU.mfn = MFNTOPFN(xen_start_info->console.domU.mfn);
-
- /*
- * We'll stop somewhere inside this hypercall. When it returns,
- * we'll start resuming after the restore.
- */
- start_info_mfn = VTOMFN(xen_start_info);
- pmap_suspend();
- suspend_cancelled = HYPERVISOR_suspend(start_info_mfn);
- pmap_resume();
-
- pmap_kenter_ma((vm_offset_t) shared_info, xen_start_info->shared_info);
- HYPERVISOR_shared_info = shared_info;
-
- HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
- VTOMFN(xen_pfn_to_mfn_frame_list_list);
-
- fpp = PAGE_SIZE/sizeof(unsigned long);
- for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
- if ((j % fpp) == 0) {
- k++;
- xen_pfn_to_mfn_frame_list_list[k] =
- VTOMFN(xen_pfn_to_mfn_frame_list[k]);
- j = 0;
- }
- xen_pfn_to_mfn_frame_list[k][j] =
- VTOMFN(&xen_phys_machine[i]);
- }
- HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
-
- gnttab_resume(NULL);
- intr_resume(suspend_cancelled != 0);
- local_irq_enable();
- xencons_resume();
-
-#ifdef CONFIG_SMP
- for_each_cpu(i)
- vcpu_prepare(i);
-
-#endif
-
- /*
- * Only resume xenbus /after/ we've prepared our VCPUs; otherwise
- * the VCPU hotplug callback can race with our vcpu_prepare
- */
- mtx_lock(&Giant);
- DEVICE_RESUME(root_bus);
- mtx_unlock(&Giant);
-
-#ifdef SMP
- thread_lock(curthread);
- sched_unbind(curthread);
- thread_unlock(curthread);
- if (!CPU_EMPTY(&map))
- restart_cpus(map);
-#endif
- EVENTHANDLER_INVOKE(power_resume);
-}
-
-#else
-
-/* HVM mode suspension. */
static void
xctrl_suspend()
{
@@ -417,7 +288,6 @@ xctrl_suspend()
printf("System resumed after suspension\n");
}
-#endif
static void
xctrl_crash()
diff --git a/sys/dev/xen/grant_table/grant_table.c b/sys/dev/xen/grant_table/grant_table.c
index 2511657..ad65fe0 100644
--- a/sys/dev/xen/grant_table/grant_table.c
+++ b/sys/dev/xen/grant_table/grant_table.c
@@ -53,7 +53,6 @@ static int gnttab_free_count;
static grant_ref_t gnttab_free_head;
static struct mtx gnttab_list_lock;
-#ifdef XENHVM
/*
* Resource representing allocated physical address space
* for the grant table metainfo
@@ -62,7 +61,6 @@ static struct resource *gnttab_pseudo_phys_res;
/* Resource id for allocated physical address space. */
static int gnttab_pseudo_phys_res_id;
-#endif
static grant_entry_t *shared;
@@ -510,72 +508,6 @@ unmap_pte_fn(pte_t *pte, struct page *pmd_page,
}
#endif
-#ifndef XENHVM
-
-static int
-gnttab_map(unsigned int start_idx, unsigned int end_idx)
-{
- struct gnttab_setup_table setup;
- u_long *frames;
-
- unsigned int nr_gframes = end_idx + 1;
- int i, rc;
-
- frames = malloc(nr_gframes * sizeof(unsigned long), M_DEVBUF, M_NOWAIT);
- if (!frames)
- return (ENOMEM);
-
- setup.dom = DOMID_SELF;
- setup.nr_frames = nr_gframes;
- set_xen_guest_handle(setup.frame_list, frames);
-
- rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
- if (rc == -ENOSYS) {
- free(frames, M_DEVBUF);
- return (ENOSYS);
- }
- KASSERT(!(rc || setup.status),
- ("unexpected result from grant_table_op"));
-
- if (shared == NULL) {
- vm_offset_t area;
-
- area = kva_alloc(PAGE_SIZE * max_nr_grant_frames());
- KASSERT(area, ("can't allocate VM space for grant table"));
- shared = (grant_entry_t *)area;
- }
-
- for (i = 0; i < nr_gframes; i++)
- PT_SET_MA(((caddr_t)shared) + i*PAGE_SIZE,
- ((vm_paddr_t)frames[i]) << PAGE_SHIFT | PG_RW | PG_V);
-
- free(frames, M_DEVBUF);
-
- return (0);
-}
-
-int
-gnttab_resume(device_t dev)
-{
-
- if (max_nr_grant_frames() < nr_grant_frames)
- return (ENOSYS);
- return (gnttab_map(0, nr_grant_frames - 1));
-}
-
-int
-gnttab_suspend(void)
-{
- int i;
-
- for (i = 0; i < nr_grant_frames; i++)
- pmap_kremove((vm_offset_t) shared + i * PAGE_SIZE);
-
- return (0);
-}
-
-#else /* XENHVM */
-
static vm_paddr_t resume_frames;
static int
@@ -638,8 +570,6 @@ gnttab_resume(device_t dev)
return (gnttab_map(0, nr_gframes - 1));
}
-#endif
-
static int
gnttab_expand(unsigned int req_entries)
{
diff --git a/sys/dev/xen/netback/netback.c b/sys/dev/xen/netback/netback.c
index 63337ad..b5c1c13 100644
--- a/sys/dev/xen/netback/netback.c
+++ b/sys/dev/xen/netback/netback.c
@@ -473,7 +473,6 @@ struct xnb_softc {
*/
gnttab_copy_table tx_gnttab;
-#ifdef XENHVM
/**
* Resource representing allocated physical address space
* associated with our per-instance kva region.
@@ -482,7 +481,6 @@ struct xnb_softc {
/** Resource id for allocated physical address space. */
int pseudo_phys_res_id;
-#endif
/** Ring mapping and interrupt configuration data. */
struct xnb_ring_config ring_configs[XNB_NUM_RING_TYPES];
@@ -626,16 +624,12 @@ static void
xnb_free_communication_mem(struct xnb_softc *xnb)
{
if (xnb->kva != 0) {
-#ifndef XENHVM
- kva_free(xnb->kva, xnb->kva_size);
-#else
if (xnb->pseudo_phys_res != NULL) {
bus_release_resource(xnb->dev, SYS_RES_MEMORY,
xnb->pseudo_phys_res_id,
xnb->pseudo_phys_res);
xnb->pseudo_phys_res = NULL;
}
-#endif /* XENHVM */
}
xnb->kva = 0;
xnb->gnt_base_addr = 0;
@@ -816,12 +810,7 @@ xnb_alloc_communication_mem(struct xnb_softc *xnb)
for (i=0; i < XNB_NUM_RING_TYPES; i++) {
xnb->kva_size += xnb->ring_configs[i].ring_pages * PAGE_SIZE;
}
-#ifndef XENHVM
- xnb->kva = kva_alloc(xnb->kva_size);
- if (xnb->kva == 0)
- return (ENOMEM);
- xnb->gnt_base_addr = xnb->kva;
-#else /* defined XENHVM */
+
/*
* Reserve a range of pseudo physical memory that we can map
* into kva. These pages will only be backed by machine
@@ -840,7 +829,6 @@ xnb_alloc_communication_mem(struct xnb_softc *xnb)
}
xnb->kva = (vm_offset_t)rman_get_virtual(xnb->pseudo_phys_res);
xnb->gnt_base_addr = rman_get_start(xnb->pseudo_phys_res);
-#endif /* !defined XENHVM */
return (0);
}
diff --git a/sys/dev/xen/netfront/netfront.c b/sys/dev/xen/netfront/netfront.c
index 27900ba..3c1f952 100644
--- a/sys/dev/xen/netfront/netfront.c
+++ b/sys/dev/xen/netfront/netfront.c
@@ -879,12 +879,11 @@ refill:
if (sc->copying_receiver == 0) {
gnttab_grant_foreign_transfer_ref(ref,
otherend_id, pfn);
- sc->rx_pfn_array[nr_flips] = PFNTOMFN(pfn);
+ sc->rx_pfn_array[nr_flips] = pfn;
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
/* Remove this page before passing
* back to Xen.
*/
- set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
MULTI_update_va_mapping(&sc->rx_mcl[i],
vaddr, 0, 0);
}
@@ -892,7 +891,7 @@ refill:
} else {
gnttab_grant_foreign_access_ref(ref,
otherend_id,
- PFNTOMFN(pfn), 0);
+ pfn, 0);
}
req->id = id;
req->gref = ref;
@@ -907,7 +906,6 @@ refill:
* We may have allocated buffers which have entries outstanding
* in the page * update queue -- make sure we flush those first!
*/
- PT_UPDATES_FLUSH();
if (nr_flips != 0) {
#ifdef notyet
/* Tell the ballon driver what is going on. */
@@ -1361,8 +1359,6 @@ xennet_get_responses(struct netfront_info *np,
mmu->ptr = ((vm_paddr_t)mfn << PAGE_SHIFT) |
MMU_MACHPHYS_UPDATE;
mmu->val = pfn;
-
- set_phys_to_machine(pfn, mfn);
}
pages_flipped++;
} else {
@@ -1927,7 +1923,7 @@ network_connect(struct netfront_info *np)
} else {
gnttab_grant_foreign_access_ref(ref,
xenbus_get_otherend_id(np->xbdev),
- PFNTOMFN(pfn), 0);
+ pfn, 0);
}
req->gref = ref;
req->id = requeue_idx;
diff --git a/sys/fs/nfs/nfsport.h b/sys/fs/nfs/nfsport.h
index 9f08854..42a7f94 100644
--- a/sys/fs/nfs/nfsport.h
+++ b/sys/fs/nfs/nfsport.h
@@ -950,7 +950,7 @@ struct nfsreq {
};
#ifndef NFS_MAXBSIZE
-#define NFS_MAXBSIZE MAXBSIZE
+#define NFS_MAXBSIZE MAXBCACHEBUF
#endif
/*
diff --git a/sys/fs/nfsclient/nfs_clvfsops.c b/sys/fs/nfsclient/nfs_clvfsops.c
index 964ea1f..73c6eb6 100644
--- a/sys/fs/nfsclient/nfs_clvfsops.c
+++ b/sys/fs/nfsclient/nfs_clvfsops.c
@@ -201,16 +201,16 @@ newnfs_iosize(struct nfsmount *nmp)
}
if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
nmp->nm_rsize = maxio;
- if (nmp->nm_rsize > MAXBSIZE)
- nmp->nm_rsize = MAXBSIZE;
+ if (nmp->nm_rsize > NFS_MAXBSIZE)
+ nmp->nm_rsize = NFS_MAXBSIZE;
if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
nmp->nm_readdirsize = maxio;
if (nmp->nm_readdirsize > nmp->nm_rsize)
nmp->nm_readdirsize = nmp->nm_rsize;
if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
nmp->nm_wsize = maxio;
- if (nmp->nm_wsize > MAXBSIZE)
- nmp->nm_wsize = MAXBSIZE;
+ if (nmp->nm_wsize > NFS_MAXBSIZE)
+ nmp->nm_wsize = NFS_MAXBSIZE;
/*
* Calculate the size used for io buffers. Use the larger
diff --git a/sys/fs/nfsserver/nfs_nfsdkrpc.c b/sys/fs/nfsserver/nfs_nfsdkrpc.c
index 4fb9c93..f9b8eb8 100644
--- a/sys/fs/nfsserver/nfs_nfsdkrpc.c
+++ b/sys/fs/nfsserver/nfs_nfsdkrpc.c
@@ -117,7 +117,8 @@ nfssvc_program(struct svc_req *rqst, SVCXPRT *xprt)
memset(&nd, 0, sizeof(nd));
if (rqst->rq_vers == NFS_VER2) {
- if (rqst->rq_proc > NFSV2PROC_STATFS) {
+ if (rqst->rq_proc > NFSV2PROC_STATFS ||
+ newnfs_nfsv3_procid[rqst->rq_proc] == NFSPROC_NOOP) {
svcerr_noproc(rqst);
svc_freereq(rqst);
goto out;
diff --git a/sys/geom/uncompress/g_uncompress.c b/sys/geom/uncompress/g_uncompress.c
index d819371..8c2d5cb 100644
--- a/sys/geom/uncompress/g_uncompress.c
+++ b/sys/geom/uncompress/g_uncompress.c
@@ -45,10 +45,10 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/malloc.h>
#include <sys/systm.h>
+#include <sys/zlib.h>
#include <geom/geom.h>
-#include <net/zlib.h>
#include <contrib/xz-embedded/linux/include/linux/xz.h>
#ifdef GEOM_UNCOMPRESS_DEBUG
diff --git a/sys/geom/uzip/g_uzip.c b/sys/geom/uzip/g_uzip.c
index c2ed64b..732de9d 100644
--- a/sys/geom/uzip/g_uzip.c
+++ b/sys/geom/uzip/g_uzip.c
@@ -38,9 +38,9 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
+#include <sys/zlib.h>
#include <geom/geom.h>
-#include <net/zlib.h>
FEATURE(geom_uzip, "GEOM uzip read-only compressed disks support");
diff --git a/sys/i386/conf/DEFAULTS b/sys/i386/conf/DEFAULTS
index 78d807c..d5bdb1d 100644
--- a/sys/i386/conf/DEFAULTS
+++ b/sys/i386/conf/DEFAULTS
@@ -26,7 +26,6 @@ options GEOM_PART_EBR_COMPAT
options GEOM_PART_MBR
# enable support for native hardware
-options NATIVE
device atpic
options NEW_PCIB
diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC
index 086844a..b1740f3 100644
--- a/sys/i386/conf/GENERIC
+++ b/sys/i386/conf/GENERIC
@@ -358,7 +358,9 @@ device virtio_blk # VirtIO Block device
device virtio_scsi # VirtIO SCSI device
device virtio_balloon # VirtIO Memory Balloon device
-# HyperV drivers
+# HyperV drivers and enchancement support
+# NOTE: HYPERV depends on hyperv. They must be added or removed together.
+options HYPERV # Hyper-V kernel infrastructure
device hyperv # HyperV drivers
# Xen HVM Guest Optimizations
diff --git a/sys/i386/conf/XEN b/sys/i386/conf/XEN
deleted file mode 100644
index dd83670..0000000
--- a/sys/i386/conf/XEN
+++ /dev/null
@@ -1,96 +0,0 @@
-#
-# XEN -- Kernel configuration for i386 XEN DomU
-#
-# $FreeBSD$
-
-cpu I686_CPU
-ident XEN
-
-makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols
-
-# The following drivers don't build with PAE or XEN enabled.
-makeoptions WITHOUT_MODULES="ctl dpt drm drm2 hptmv ida"
-
-# The following drivers don't work with PAE enabled.
-makeoptions WITHOUT_MODULES+="ncr pst"
-
-options SCHED_ULE # ULE scheduler
-options PREEMPTION # Enable kernel thread preemption
-
-options INET # InterNETworking
-options INET6 # IPv6 communications protocols
-options SCTP # Stream Control Transmission Protocol
-options FFS # Berkeley Fast Filesystem
-options SOFTUPDATES # Enable FFS soft updates support
-options UFS_ACL # Support for access control lists
-options UFS_DIRHASH # Improve performance on big directories
-options UFS_GJOURNAL # Enable gjournal-based UFS journaling
-options NFSCL # Network Filesystem Client
-options NFSD # Network Filesystem Server
-options NFSLOCKD # Network Lock Manager
-options NFS_ROOT # NFS usable as /, requires NFSCL
-options MSDOSFS # MSDOS Filesystem
-options CD9660 # ISO 9660 Filesystem
-options PROCFS # Process filesystem (requires PSEUDOFS)
-options PSEUDOFS # Pseudo-filesystem framework
-options GEOM_PART_GPT # GUID Partition Tables.
-options GEOM_LABEL # Provides labelization
-options COMPAT_FREEBSD4 # Compatible with FreeBSD4
-options COMPAT_FREEBSD5 # Compatible with FreeBSD5
-options COMPAT_FREEBSD6 # Compatible with FreeBSD6
-options COMPAT_FREEBSD7 # Compatible with FreeBSD7
-options COMPAT_FREEBSD9 # Compatible with FreeBSD9
-options COMPAT_FREEBSD10 # Compatible with FreeBSD10
-options KTRACE # ktrace(1) support
-options STACK # stack(9) support
-options SYSVSHM # SYSV-style shared memory
-options SYSVMSG # SYSV-style message queues
-options SYSVSEM # SYSV-style semaphores
-options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
-options KBD_INSTALL_CDEV # install a CDEV entry in /dev
-options AUDIT # Security event auditing
-
-# Debugging for use in -current
-options KDB # Enable kernel debugger support.
-options DDB # Support DDB.
-options GDB # Support remote GDB.
-options DEADLKRES # Enable the deadlock resolver
-options INVARIANTS # Enable calls of extra sanity checking
-options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS
-options WITNESS # Enable checks to detect deadlocks and cycles
-options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed
-
-options PAE
-nooption NATIVE
-option XEN
-nodevice atpic
-nodevice isa
-options MCLSHIFT=12
-
-# To make an SMP kernel, the next two lines are needed
-options SMP # Symmetric MultiProcessor Kernel
-device apic # I/O APIC
-
-#device atkbdc # AT keyboard controller
-#device atkbd # AT keyboard
-device psm # PS/2 mouse
-device pci
-
-#device kbdmux # keyboard multiplexer
-
-# Pseudo devices.
-device loop # Network loopback
-device random # Entropy device
-device ether # Ethernet support
-device tun # Packet tunnel.
-device md # Memory "disks"
-device gif # IPv6 and IPv4 tunneling
-
-# Wireless cards
-options IEEE80211_SUPPORT_MESH
-options AH_SUPPORT_AR5416
-
-# The `bpf' device enables the Berkeley Packet Filter.
-# Be aware of the administrative consequences of enabling this!
-# Note that 'bpf' is required for DHCP.
-device bpf # Berkeley packet filter
diff --git a/sys/i386/i386/apic_vector.s b/sys/i386/i386/apic_vector.s
index 316bc7a..18b3c5d 100644
--- a/sys/i386/i386/apic_vector.s
+++ b/sys/i386/i386/apic_vector.s
@@ -181,6 +181,25 @@ IDTVEC(xen_intr_upcall)
jmp doreti
#endif
+#ifdef HYPERV
+/*
+ * This is the Hyper-V vmbus channel direct callback interrupt.
+ * Only used when it is running on Hyper-V.
+ */
+ .text
+ SUPERALIGN_TEXT
+IDTVEC(hv_vmbus_callback)
+ PUSH_FRAME
+ SET_KERNEL_SREGS
+ cld
+ FAKE_MCOUNT(TF_EIP(%esp))
+ pushl %esp
+ call hv_vector_handler
+ add $4, %esp
+ MEXITCOUNT
+ jmp doreti
+#endif
+
#ifdef SMP
/*
* Global address space TLB shootdown.
@@ -247,7 +266,6 @@ IDTVEC(invlcache)
/*
* Handler for IPIs sent via the per-cpu IPI bitmap.
*/
-#ifndef XEN
.text
SUPERALIGN_TEXT
IDTVEC(ipi_intr_bitmap_handler)
@@ -262,7 +280,7 @@ IDTVEC(ipi_intr_bitmap_handler)
call ipi_bitmap_handler
MEXITCOUNT
jmp doreti
-#endif
+
/*
* Executed by a CPU when it receives an IPI_STOP from another CPU.
*/
@@ -282,7 +300,6 @@ IDTVEC(cpustop)
/*
* Executed by a CPU when it receives an IPI_SUSPEND from another CPU.
*/
-#ifndef XEN
.text
SUPERALIGN_TEXT
IDTVEC(cpususpend)
@@ -295,7 +312,6 @@ IDTVEC(cpususpend)
POP_FRAME
jmp doreti_iret
-#endif
/*
* Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU.
diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c
index 97e2e97..7a00740 100644
--- a/sys/i386/i386/genassym.c
+++ b/sys/i386/i386/genassym.c
@@ -238,11 +238,6 @@ ASSYM(BUS_SPACE_HANDLE_BASE, offsetof(struct bus_space_handle, bsh_base));
ASSYM(BUS_SPACE_HANDLE_IAT, offsetof(struct bus_space_handle, bsh_iat));
#endif
-#ifdef XEN
-ASSYM(PC_CR3, offsetof(struct pcpu, pc_cr3));
-ASSYM(XEN_HYPERVISOR_VIRT_START, HYPERVISOR_VIRT_START);
-#endif
-
#ifdef HWPMC_HOOKS
ASSYM(PMC_FN_USER_CALLCHAIN, PMC_FN_USER_CALLCHAIN);
#endif
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index 72f7685..a5867c4 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -160,24 +160,6 @@ int arch_i386_is_xbox = 0;
uint32_t arch_i386_xbox_memsize = 0;
#endif
-#ifdef XEN
-/* XEN includes */
-#include <xen/xen-os.h>
-#include <xen/hypervisor.h>
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenfunc.h>
-#include <xen/xen_intr.h>
-
-void Xhypervisor_callback(void);
-void failsafe_callback(void);
-
-extern trap_info_t trap_table[];
-struct proc_ldt default_proc_ldt;
-extern int init_first;
-int running_xen = 1;
-extern unsigned long physfree;
-#endif /* XEN */
-
/* Sanity check for __curthread() */
CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
@@ -356,9 +338,7 @@ cpu_startup(dummy)
*/
bufinit();
vm_pager_bufferinit();
-#ifndef XEN
cpu_setregs();
-#endif
}
/*
@@ -1291,13 +1271,8 @@ SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0,
int _default_ldt;
-#ifdef XEN
-union descriptor *gdt;
-union descriptor *ldt;
-#else
union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */
union descriptor ldt[NLDT]; /* local descriptor table */
-#endif
static struct gate_descriptor idt0[NIDT];
struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
struct region_descriptor r_gdt, r_idt; /* table descriptors */
@@ -1397,7 +1372,6 @@ struct soft_segment_descriptor gdt_segs[] = {
.ssd_xx = 0, .ssd_xx1 = 0,
.ssd_def32 = 1,
.ssd_gran = 1 },
-#ifndef XEN
/* GPROC0_SEL 9 Proc 0 Tss Descriptor */
{
.ssd_base = 0x0,
@@ -1489,7 +1463,6 @@ struct soft_segment_descriptor gdt_segs[] = {
.ssd_xx = 0, .ssd_xx1 = 0,
.ssd_def32 = 0,
.ssd_gran = 0 },
-#endif /* !XEN */
};
static struct soft_segment_descriptor ldt_segs[] = {
@@ -1641,7 +1614,7 @@ sdtossd(sd, ssd)
ssd->ssd_gran = sd->sd_gran;
}
-#if !defined(PC98) && !defined(XEN)
+#if !defined(PC98)
static int
add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
int *physmap_idxp)
@@ -1748,9 +1721,8 @@ add_smap_entries(struct bios_smap *smapbase, vm_paddr_t *physmap,
if (!add_smap_entry(smap, physmap, physmap_idxp))
break;
}
-#endif /* !PC98 && !XEN */
+#endif /* !PC98 */
-#ifndef XEN
static void
basemem_setup(void)
{
@@ -1798,7 +1770,6 @@ basemem_setup(void)
for (i = basemem / 4; i < 160; i++)
pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
}
-#endif /* !XEN */
/*
* Populate the (physmap) array with base/bound pairs describing the
@@ -2074,8 +2045,6 @@ do_next:
for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE)
pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] +
off);
-
- PT_UPDATES_FLUSH();
}
#else /* PC98 */
static void
@@ -2086,7 +2055,6 @@ getmemsize(int first)
vm_paddr_t physmap[PHYSMAP_SIZE];
pt_entry_t *pte;
quad_t dcons_addr, dcons_size, physmem_tunable;
-#ifndef XEN
int hasbrokenint12, i, res;
u_int extmem;
struct vm86frame vmf;
@@ -2094,17 +2062,8 @@ getmemsize(int first)
vm_paddr_t pa;
struct bios_smap *smap, *smapbase;
caddr_t kmdp;
-#endif
has_smap = 0;
-#if defined(XEN)
- Maxmem = xen_start_info->nr_pages - init_first;
- physmem = Maxmem;
- basemem = 0;
- physmap[0] = init_first << PAGE_SHIFT;
- physmap[1] = ptoa(Maxmem) - round_page(msgbufsize);
- physmap_idx = 0;
-#else
#ifdef XBOX
if (arch_i386_is_xbox) {
/*
@@ -2247,7 +2206,6 @@ have_smap:
physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
physmap_done:
-#endif
/*
* Now, physmap contains a map of physical memory.
*/
@@ -2321,7 +2279,6 @@ physmap_done:
getenv_quad("dcons.size", &dcons_size) == 0)
dcons_addr = 0;
-#ifndef XEN
/*
* physmap is in bytes, so when converting to page boundaries,
* round up the start address and round down the end address.
@@ -2442,13 +2399,6 @@ do_next:
}
*pte = 0;
invltlb();
-#else
- phys_avail[0] = physfree;
- phys_avail[1] = xen_start_info->nr_pages*PAGE_SIZE;
- dump_avail[0] = 0;
- dump_avail[1] = xen_start_info->nr_pages*PAGE_SIZE;
-
-#endif
/*
* XXX
@@ -2472,272 +2422,9 @@ do_next:
for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE)
pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] +
off);
-
- PT_UPDATES_FLUSH();
}
#endif /* PC98 */
-#ifdef XEN
-#define MTOPSIZE (1<<(14 + PAGE_SHIFT))
-
-register_t
-init386(first)
- int first;
-{
- unsigned long gdtmachpfn;
- int error, gsel_tss, metadata_missing, x, pa;
- struct pcpu *pc;
-#ifdef CPU_ENABLE_SSE
- struct xstate_hdr *xhdr;
-#endif
- struct callback_register event = {
- .type = CALLBACKTYPE_event,
- .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback },
- };
- struct callback_register failsafe = {
- .type = CALLBACKTYPE_failsafe,
- .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback },
- };
-
- thread0.td_kstack = proc0kstack;
- thread0.td_kstack_pages = KSTACK_PAGES;
-
- /*
- * This may be done better later if it gets more high level
- * components in it. If so just link td->td_proc here.
- */
- proc_linkup0(&proc0, &thread0);
-
- metadata_missing = 0;
- if (xen_start_info->mod_start) {
- preload_metadata = (caddr_t)xen_start_info->mod_start;
- preload_bootstrap_relocate(KERNBASE);
- } else {
- metadata_missing = 1;
- }
- if (envmode == 1)
- kern_envp = static_env;
- else if ((caddr_t)xen_start_info->cmd_line)
- kern_envp = xen_setbootenv((caddr_t)xen_start_info->cmd_line);
-
- boothowto |= xen_boothowto(kern_envp);
-
- /* Init basic tunables, hz etc */
- init_param1();
-
- /*
- * XEN occupies a portion of the upper virtual address space
- * At its base it manages an array mapping machine page frames
- * to physical page frames - hence we need to be able to
- * access 4GB - (64MB - 4MB + 64k)
- */
- gdt_segs[GPRIV_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
- gdt_segs[GUFS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
- gdt_segs[GUGS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
- gdt_segs[GCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
- gdt_segs[GDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
- gdt_segs[GUCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
- gdt_segs[GUDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
- gdt_segs[GBIOSLOWMEM_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
-
- pc = &__pcpu[0];
- gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
- gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
-
- PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V | PG_RW);
- bzero(gdt, PAGE_SIZE);
- for (x = 0; x < NGDT; x++)
- ssdtosd(&gdt_segs[x], &gdt[x].sd);
-
- mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN);
-
- gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
- PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V);
- PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, 512) != 0);
- lgdt(&r_gdt);
- gdtset = 1;
-
- if ((error = HYPERVISOR_set_trap_table(trap_table)) != 0) {
- panic("set_trap_table failed - error %d\n", error);
- }
-
- error = HYPERVISOR_callback_op(CALLBACKOP_register, &event);
- if (error == 0)
- error = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
-#if CONFIG_XEN_COMPAT <= 0x030002
- if (error == -ENOXENSYS)
- HYPERVISOR_set_callbacks(GSEL(GCODE_SEL, SEL_KPL),
- (unsigned long)Xhypervisor_callback,
- GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback);
-#endif
- pcpu_init(pc, 0, sizeof(struct pcpu));
- for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE)
- pmap_kenter(pa + KERNBASE, pa);
- dpcpu_init((void *)(first + KERNBASE), 0);
- first += DPCPU_SIZE;
- physfree += DPCPU_SIZE;
- init_first += DPCPU_SIZE / PAGE_SIZE;
-
- PCPU_SET(prvspace, pc);
- PCPU_SET(curthread, &thread0);
-
- /*
- * Initialize mutexes.
- *
- * icu_lock: in order to allow an interrupt to occur in a critical
- * section, to set pcpu->ipending (etc...) properly, we
- * must be able to get the icu lock, so it can't be
- * under witness.
- */
- mutex_init();
- mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE);
-
- /* make ldt memory segments */
- PT_SET_MA(ldt, xpmap_ptom(VTOP(ldt)) | PG_V | PG_RW);
- bzero(ldt, PAGE_SIZE);
- ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
- ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
- for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
- ssdtosd(&ldt_segs[x], &ldt[x].sd);
-
- default_proc_ldt.ldt_base = (caddr_t)ldt;
- default_proc_ldt.ldt_len = 6;
- _default_ldt = (int)&default_proc_ldt;
- PCPU_SET(currentldt, _default_ldt);
- PT_SET_MA(ldt, *vtopte((unsigned long)ldt) & ~PG_RW);
- xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof ldt_segs[0]));
-
-#if defined(XEN_PRIVILEGED)
- /*
- * Initialize the i8254 before the console so that console
- * initialization can use DELAY().
- */
- i8254_init();
-#endif
-
- /*
- * Initialize the console before we print anything out.
- */
- cninit();
-
- if (metadata_missing)
- printf("WARNING: loader(8) metadata is missing!\n");
-
-#ifdef DEV_ISA
-#ifdef DEV_ATPIC
- elcr_probe();
- atpic_startup();
-#else
- /* Reset and mask the atpics and leave them shut down. */
- atpic_reset();
-
- /*
- * Point the ICU spurious interrupt vectors at the APIC spurious
- * interrupt handler.
- */
- setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
-#endif
-#endif
-
-#ifdef DDB
- db_fetch_ksymtab(bootinfo.bi_symtab, bootinfo.bi_esymtab);
-#endif
-
- kdb_init();
-
-#ifdef KDB
- if (boothowto & RB_KDB)
- kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
-#endif
-
- finishidentcpu(); /* Final stage of CPU initialization */
- setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- initializecpu(); /* Initialize CPU registers */
- initializecpucache();
-
- /* pointer to selector slot for %fs/%gs */
- PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
-
- dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
- dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
- dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
- dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
-#if defined(PAE) || defined(PAE_TABLES)
- dblfault_tss.tss_cr3 = (int)IdlePDPT;
-#else
- dblfault_tss.tss_cr3 = (int)IdlePTD;
-#endif
- dblfault_tss.tss_eip = (int)dblfault_handler;
- dblfault_tss.tss_eflags = PSL_KERNEL;
- dblfault_tss.tss_ds = dblfault_tss.tss_es =
- dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
- dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
- dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
- dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
-
- vm86_initialize();
- getmemsize(first);
- init_param2(physmem);
-
- /* now running on new page tables, configured,and u/iom is accessible */
-
- msgbufinit(msgbufp, msgbufsize);
-#ifdef DEV_NPX
- npxinit(true);
-#endif
- /*
- * Set up thread0 pcb after npxinit calculated pcb + fpu save
- * area size. Zero out the extended state header in fpu save
- * area.
- */
- thread0.td_pcb = get_pcb_td(&thread0);
- bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
-#ifdef CPU_ENABLE_SSE
- if (use_xsave) {
- xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
- 1);
- xhdr->xstate_bv = xsave_mask;
- }
-#endif
- PCPU_SET(curpcb, thread0.td_pcb);
- /* make an initial tss so cpu can get interrupt stack on syscall! */
- /* Note: -16 is so we can grow the trapframe if we came from vm86 */
- PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16);
- PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
- gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
- HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL),
- PCPU_GET(common_tss.tss_esp0));
-
- /* transfer to user mode */
-
- _ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
- _udatasel = GSEL(GUDATA_SEL, SEL_UPL);
-
- /* setup proc 0's pcb */
- thread0.td_pcb->pcb_flags = 0;
-#if defined(PAE) || defined(PAE_TABLES)
- thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
-#else
- thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
-#endif
- thread0.td_pcb->pcb_ext = 0;
- thread0.td_frame = &proc0_tf;
- thread0.td_pcb->pcb_fsd = PCPU_GET(fsgs_gdt)[0];
- thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1];
-
- cpu_probe_amdc1e();
-
- /* Location of kernel stack for locore */
- return ((register_t)thread0.td_pcb);
-}
-
-#else
register_t
init386(first)
int first;
@@ -3061,7 +2748,6 @@ init386(first)
/* Location of kernel stack for locore */
return ((register_t)thread0.td_pcb);
}
-#endif
void
cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
diff --git a/sys/i386/i386/minidump_machdep.c b/sys/i386/i386/minidump_machdep.c
index a7d2a69..8f78abc 100644
--- a/sys/i386/i386/minidump_machdep.c
+++ b/sys/i386/i386/minidump_machdep.c
@@ -68,10 +68,6 @@ static void *dump_va;
static uint64_t counter, progress;
CTASSERT(sizeof(*vm_page_dump) == 4);
-#ifndef XEN
-#define xpmap_mtop(x) (x)
-#define xpmap_ptom(x) (x)
-#endif
static int
@@ -205,7 +201,7 @@ minidumpsys(struct dumperinfo *di)
j = va >> PDRSHIFT;
if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
/* This is an entire 2M page. */
- pa = xpmap_mtop(pd[j] & PG_PS_FRAME);
+ pa = pd[j] & PG_PS_FRAME;
for (k = 0; k < NPTEPG; k++) {
if (is_dumpable(pa))
dump_add_page(pa);
@@ -215,10 +211,10 @@ minidumpsys(struct dumperinfo *di)
}
if ((pd[j] & PG_V) == PG_V) {
/* set bit for each valid page in this 2MB block */
- pt = pmap_kenter_temporary(xpmap_mtop(pd[j] & PG_FRAME), 0);
+ pt = pmap_kenter_temporary(pd[j] & PG_FRAME, 0);
for (k = 0; k < NPTEPG; k++) {
if ((pt[k] & PG_V) == PG_V) {
- pa = xpmap_mtop(pt[k] & PG_FRAME);
+ pa = pt[k] & PG_FRAME;
if (is_dumpable(pa))
dump_add_page(pa);
}
@@ -318,24 +314,8 @@ minidumpsys(struct dumperinfo *di)
continue;
}
if ((pd[j] & PG_V) == PG_V) {
- pa = xpmap_mtop(pd[j] & PG_FRAME);
-#ifndef XEN
+ pa = pd[j] & PG_FRAME;
error = blk_write(di, 0, pa, PAGE_SIZE);
-#else
- pt = pmap_kenter_temporary(pa, 0);
- memcpy(fakept, pt, PAGE_SIZE);
- for (i = 0; i < NPTEPG; i++)
- fakept[i] = xpmap_mtop(fakept[i]);
- error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
- if (error)
- goto fail;
- /* flush, in case we reuse fakept in the same block */
- error = blk_flush(di);
- if (error)
- goto fail;
- bzero(fakept, sizeof(fakept));
-#endif
-
if (error)
goto fail;
} else {
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index b396752..bad69ee 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -130,331 +130,23 @@ __FBSDID("$FreeBSD$");
#endif /* CHECK_POINTS */
-/* lock region used by kernel profiling */
-int mcount_lock;
-
-int mp_naps; /* # of Applications processors */
-int boot_cpu_id = -1; /* designated BSP */
-
extern struct pcpu __pcpu[];
-/* AP uses this during bootstrap. Do not staticize. */
-char *bootSTK;
-static int bootAP;
-
-/* Free these after use */
-void *bootstacks[MAXCPU];
-static void *dpcpu;
-
-struct pcb stoppcbs[MAXCPU];
-struct susppcb **susppcbs;
-
/* Variables needed for SMP tlb shootdown. */
vm_offset_t smp_tlb_addr1;
vm_offset_t smp_tlb_addr2;
volatile int smp_tlb_wait;
-#ifdef COUNT_IPIS
-/* Interrupt counts. */
-static u_long *ipi_preempt_counts[MAXCPU];
-static u_long *ipi_ast_counts[MAXCPU];
-u_long *ipi_invltlb_counts[MAXCPU];
-u_long *ipi_invlrng_counts[MAXCPU];
-u_long *ipi_invlpg_counts[MAXCPU];
-u_long *ipi_invlcache_counts[MAXCPU];
-u_long *ipi_rendezvous_counts[MAXCPU];
-static u_long *ipi_hardclock_counts[MAXCPU];
-#endif
-
-/* Default cpu_ops implementation. */
-struct cpu_ops cpu_ops;
-
/*
* Local data and functions.
*/
-static volatile cpuset_t ipi_nmi_pending;
-
-/* used to hold the AP's until we are ready to release them */
-static struct mtx ap_boot_mtx;
-
-/* Set to 1 once we're ready to let the APs out of the pen. */
-static volatile int aps_ready = 0;
-
-/*
- * Store data from cpu_add() until later in the boot when we actually setup
- * the APs.
- */
-struct cpu_info {
- int cpu_present:1;
- int cpu_bsp:1;
- int cpu_disabled:1;
- int cpu_hyperthread:1;
-} static cpu_info[MAX_APIC_ID + 1];
-int cpu_apic_ids[MAXCPU];
-int apic_cpuids[MAX_APIC_ID + 1];
-
-/* Holds pending bitmap based IPIs per CPU */
-volatile u_int cpu_ipi_pending[MAXCPU];
-
-static u_int boot_address;
-static int cpu_logical; /* logical cpus per core */
-static int cpu_cores; /* cores per package */
-
-static void assign_cpu_ids(void);
static void install_ap_tramp(void);
-static void set_interrupt_apic_ids(void);
static int start_all_aps(void);
static int start_ap(int apic_id);
static void release_aps(void *dummy);
-static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */
-static int hyperthreading_allowed = 1;
-
-static void
-mem_range_AP_init(void)
-{
- if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
- mem_range_softc.mr_op->initAP(&mem_range_softc);
-}
-
-static void
-topo_probe_amd(void)
-{
- int core_id_bits;
- int id;
-
- /* AMD processors do not support HTT. */
- cpu_logical = 1;
-
- if ((amd_feature2 & AMDID2_CMP) == 0) {
- cpu_cores = 1;
- return;
- }
-
- core_id_bits = (cpu_procinfo2 & AMDID_COREID_SIZE) >>
- AMDID_COREID_SIZE_SHIFT;
- if (core_id_bits == 0) {
- cpu_cores = (cpu_procinfo2 & AMDID_CMP_CORES) + 1;
- return;
- }
-
- /* Fam 10h and newer should get here. */
- for (id = 0; id <= MAX_APIC_ID; id++) {
- /* Check logical CPU availability. */
- if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled)
- continue;
- /* Check if logical CPU has the same package ID. */
- if ((id >> core_id_bits) != (boot_cpu_id >> core_id_bits))
- continue;
- cpu_cores++;
- }
-}
-
-/*
- * Round up to the next power of two, if necessary, and then
- * take log2.
- * Returns -1 if argument is zero.
- */
-static __inline int
-mask_width(u_int x)
-{
-
- return (fls(x << (1 - powerof2(x))) - 1);
-}
-
-static void
-topo_probe_0x4(void)
-{
- u_int p[4];
- int pkg_id_bits;
- int core_id_bits;
- int max_cores;
- int max_logical;
- int id;
-
- /* Both zero and one here mean one logical processor per package. */
- max_logical = (cpu_feature & CPUID_HTT) != 0 ?
- (cpu_procinfo & CPUID_HTT_CORES) >> 16 : 1;
- if (max_logical <= 1)
- return;
-
- /*
- * Because of uniformity assumption we examine only
- * those logical processors that belong to the same
- * package as BSP. Further, we count number of
- * logical processors that belong to the same core
- * as BSP thus deducing number of threads per core.
- */
- if (cpu_high >= 0x4) {
- cpuid_count(0x04, 0, p);
- max_cores = ((p[0] >> 26) & 0x3f) + 1;
- } else
- max_cores = 1;
- core_id_bits = mask_width(max_logical/max_cores);
- if (core_id_bits < 0)
- return;
- pkg_id_bits = core_id_bits + mask_width(max_cores);
-
- for (id = 0; id <= MAX_APIC_ID; id++) {
- /* Check logical CPU availability. */
- if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled)
- continue;
- /* Check if logical CPU has the same package ID. */
- if ((id >> pkg_id_bits) != (boot_cpu_id >> pkg_id_bits))
- continue;
- cpu_cores++;
- /* Check if logical CPU has the same package and core IDs. */
- if ((id >> core_id_bits) == (boot_cpu_id >> core_id_bits))
- cpu_logical++;
- }
-
- KASSERT(cpu_cores >= 1 && cpu_logical >= 1,
- ("topo_probe_0x4 couldn't find BSP"));
-
- cpu_cores /= cpu_logical;
- hyperthreading_cpus = cpu_logical;
-}
-
-static void
-topo_probe_0xb(void)
-{
- u_int p[4];
- int bits;
- int cnt;
- int i;
- int logical;
- int type;
- int x;
-
- /* We only support three levels for now. */
- for (i = 0; i < 3; i++) {
- cpuid_count(0x0b, i, p);
-
- /* Fall back if CPU leaf 11 doesn't really exist. */
- if (i == 0 && p[1] == 0) {
- topo_probe_0x4();
- return;
- }
-
- bits = p[0] & 0x1f;
- logical = p[1] &= 0xffff;
- type = (p[2] >> 8) & 0xff;
- if (type == 0 || logical == 0)
- break;
- /*
- * Because of uniformity assumption we examine only
- * those logical processors that belong to the same
- * package as BSP.
- */
- for (cnt = 0, x = 0; x <= MAX_APIC_ID; x++) {
- if (!cpu_info[x].cpu_present ||
- cpu_info[x].cpu_disabled)
- continue;
- if (x >> bits == boot_cpu_id >> bits)
- cnt++;
- }
- if (type == CPUID_TYPE_SMT)
- cpu_logical = cnt;
- else if (type == CPUID_TYPE_CORE)
- cpu_cores = cnt;
- }
- if (cpu_logical == 0)
- cpu_logical = 1;
- cpu_cores /= cpu_logical;
-}
-
-/*
- * Both topology discovery code and code that consumes topology
- * information assume top-down uniformity of the topology.
- * That is, all physical packages must be identical and each
- * core in a package must have the same number of threads.
- * Topology information is queried only on BSP, on which this
- * code runs and for which it can query CPUID information.
- * Then topology is extrapolated on all packages using the
- * uniformity assumption.
- */
-static void
-topo_probe(void)
-{
- static int cpu_topo_probed = 0;
-
- if (cpu_topo_probed)
- return;
-
- CPU_ZERO(&logical_cpus_mask);
- if (mp_ncpus <= 1)
- cpu_cores = cpu_logical = 1;
- else if (cpu_vendor_id == CPU_VENDOR_AMD)
- topo_probe_amd();
- else if (cpu_vendor_id == CPU_VENDOR_INTEL) {
- /*
- * See Intel(R) 64 Architecture Processor
- * Topology Enumeration article for details.
- *
- * Note that 0x1 <= cpu_high < 4 case should be
- * compatible with topo_probe_0x4() logic when
- * CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1)
- * or it should trigger the fallback otherwise.
- */
- if (cpu_high >= 0xb)
- topo_probe_0xb();
- else if (cpu_high >= 0x1)
- topo_probe_0x4();
- }
-
- /*
- * Fallback: assume each logical CPU is in separate
- * physical package. That is, no multi-core, no SMT.
- */
- if (cpu_cores == 0 || cpu_logical == 0)
- cpu_cores = cpu_logical = 1;
- cpu_topo_probed = 1;
-}
-
-struct cpu_group *
-cpu_topo(void)
-{
- int cg_flags;
-
- /*
- * Determine whether any threading flags are
- * necessry.
- */
- topo_probe();
- if (cpu_logical > 1 && hyperthreading_cpus)
- cg_flags = CG_FLAG_HTT;
- else if (cpu_logical > 1)
- cg_flags = CG_FLAG_SMT;
- else
- cg_flags = 0;
- if (mp_ncpus % (cpu_cores * cpu_logical) != 0) {
- printf("WARNING: Non-uniform processors.\n");
- printf("WARNING: Using suboptimal topology.\n");
- return (smp_topo_none());
- }
- /*
- * No multi-core or hyper-threaded.
- */
- if (cpu_logical * cpu_cores == 1)
- return (smp_topo_none());
- /*
- * Only HTT no multi-core.
- */
- if (cpu_logical > 1 && cpu_cores == 1)
- return (smp_topo_1level(CG_SHARE_L1, cpu_logical, cg_flags));
- /*
- * Only multi-core no HTT.
- */
- if (cpu_cores > 1 && cpu_logical == 1)
- return (smp_topo_1level(CG_SHARE_L2, cpu_cores, cg_flags));
- /*
- * Both HTT and multi-core.
- */
- return (smp_topo_2level(CG_SHARE_L2, cpu_cores,
- CG_SHARE_L1, cpu_logical, cg_flags));
-}
-
+static u_int boot_address;
/*
* Calculate usable address in base memory for AP trampoline code.
@@ -470,85 +162,6 @@ mp_bootaddress(u_int basemem)
return boot_address;
}
-void
-cpu_add(u_int apic_id, char boot_cpu)
-{
-
- if (apic_id > MAX_APIC_ID) {
- panic("SMP: APIC ID %d too high", apic_id);
- return;
- }
- KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
- apic_id));
- cpu_info[apic_id].cpu_present = 1;
- if (boot_cpu) {
- KASSERT(boot_cpu_id == -1,
- ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
- boot_cpu_id));
- boot_cpu_id = apic_id;
- cpu_info[apic_id].cpu_bsp = 1;
- }
- if (mp_ncpus < MAXCPU) {
- mp_ncpus++;
- mp_maxid = mp_ncpus - 1;
- }
- if (bootverbose)
- printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
- "AP");
-}
-
-void
-cpu_mp_setmaxid(void)
-{
-
- /*
- * mp_maxid should be already set by calls to cpu_add().
- * Just sanity check its value here.
- */
- if (mp_ncpus == 0)
- KASSERT(mp_maxid == 0,
- ("%s: mp_ncpus is zero, but mp_maxid is not", __func__));
- else if (mp_ncpus == 1)
- mp_maxid = 0;
- else
- KASSERT(mp_maxid >= mp_ncpus - 1,
- ("%s: counters out of sync: max %d, count %d", __func__,
- mp_maxid, mp_ncpus));
-}
-
-int
-cpu_mp_probe(void)
-{
-
- /*
- * Always record BSP in CPU map so that the mbuf init code works
- * correctly.
- */
- CPU_SETOF(0, &all_cpus);
- if (mp_ncpus == 0) {
- /*
- * No CPUs were found, so this must be a UP system. Setup
- * the variables to represent a system with a single CPU
- * with an id of 0.
- */
- mp_ncpus = 1;
- return (0);
- }
-
- /* At least one CPU was found. */
- if (mp_ncpus == 1) {
- /*
- * One CPU was found, so this must be a UP system with
- * an I/O APIC.
- */
- mp_maxid = 0;
- return (0);
- }
-
- /* At least two CPUs were found. */
- return (1);
-}
-
/*
* Initialize the IPI handlers and start up the AP's.
*/
@@ -610,48 +223,6 @@ cpu_mp_start(void)
set_interrupt_apic_ids();
}
-
-/*
- * Print various information about the SMP system hardware and setup.
- */
-void
-cpu_mp_announce(void)
-{
- const char *hyperthread;
- int i;
-
- printf("FreeBSD/SMP: %d package(s) x %d core(s)",
- mp_ncpus / (cpu_cores * cpu_logical), cpu_cores);
- if (hyperthreading_cpus > 1)
- printf(" x %d HTT threads", cpu_logical);
- else if (cpu_logical > 1)
- printf(" x %d SMT threads", cpu_logical);
- printf("\n");
-
- /* List active CPUs first. */
- printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
- for (i = 1; i < mp_ncpus; i++) {
- if (cpu_info[cpu_apic_ids[i]].cpu_hyperthread)
- hyperthread = "/HT";
- else
- hyperthread = "";
- printf(" cpu%d (AP%s): APIC ID: %2d\n", i, hyperthread,
- cpu_apic_ids[i]);
- }
-
- /* List disabled CPUs last. */
- for (i = 0; i <= MAX_APIC_ID; i++) {
- if (!cpu_info[i].cpu_present || !cpu_info[i].cpu_disabled)
- continue;
- if (cpu_info[i].cpu_hyperthread)
- hyperthread = "/HT";
- else
- hyperthread = "";
- printf(" cpu (AP%s): APIC ID: %2d (disabled)\n", hyperthread,
- i);
- }
-}
-
/*
* AP CPU's call this to initialize themselves.
*/
@@ -662,7 +233,7 @@ init_secondary(void)
vm_offset_t addr;
int gsel_tss;
int x, myid;
- u_int cpuid, cr0;
+ u_int cr0;
/* bootAP is set in start_ap() to our ID. */
myid = bootAP;
@@ -731,84 +302,7 @@ init_secondary(void)
lidt(&r_idt);
#endif
- /*
- * On real hardware, switch to x2apic mode if possible. Do it
- * after aps_ready was signalled, to avoid manipulating the
- * mode while BSP might still want to send some IPI to us
- * (second startup IPI is ignored on modern hardware etc).
- */
- lapic_xapic_mode();
-
- /* Initialize the PAT MSR if present. */
- pmap_init_pat();
-
- /* set up CPU registers and state */
- cpu_setregs();
-
- /* set up SSE/NX */
- initializecpu();
-
- /* set up FPU state on the AP */
- npxinit(false);
-
- if (cpu_ops.cpu_init)
- cpu_ops.cpu_init();
-
- /* A quick check from sanity claus */
- cpuid = PCPU_GET(cpuid);
- if (PCPU_GET(apic_id) != lapic_id()) {
- printf("SMP: cpuid = %d\n", cpuid);
- printf("SMP: actual apic_id = %d\n", lapic_id());
- printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
- panic("cpuid mismatch! boom!!");
- }
-
- /* Initialize curthread. */
- KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
- PCPU_SET(curthread, PCPU_GET(idlethread));
-
- mca_init();
-
- mtx_lock_spin(&ap_boot_mtx);
-
- /* Init local apic for irq's */
- lapic_setup(1);
-
- /* Set memory range attributes for this CPU to match the BSP */
- mem_range_AP_init();
-
- smp_cpus++;
-
- CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid);
- printf("SMP: AP CPU #%d Launched!\n", cpuid);
-
- /* Determine if we are a logical CPU. */
- /* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */
- if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0)
- CPU_SET(cpuid, &logical_cpus_mask);
-
- if (bootverbose)
- lapic_dump("AP");
-
- if (smp_cpus == mp_ncpus) {
- /* enable IPI's, tlb shootdown, freezes etc */
- atomic_store_rel_int(&smp_started, 1);
- }
-
- mtx_unlock_spin(&ap_boot_mtx);
-
- /* Wait until all the AP's are up. */
- while (smp_started == 0)
- ia32_pause();
-
- /* Start per-CPU event timers. */
- cpu_initclocks_ap();
-
- /* Enter the scheduler. */
- sched_throw(NULL);
-
- panic("scheduler returned us to %s", __func__);
- /* NOTREACHED */
+ init_secondary_tail();
}
/*******************************************************************
@@ -816,108 +310,6 @@ init_secondary(void)
*/
/*
- * We tell the I/O APIC code about all the CPUs we want to receive
- * interrupts. If we don't want certain CPUs to receive IRQs we
- * can simply not tell the I/O APIC code about them in this function.
- * We also do not tell it about the BSP since it tells itself about
- * the BSP internally to work with UP kernels and on UP machines.
- */
-static void
-set_interrupt_apic_ids(void)
-{
- u_int i, apic_id;
-
- for (i = 0; i < MAXCPU; i++) {
- apic_id = cpu_apic_ids[i];
- if (apic_id == -1)
- continue;
- if (cpu_info[apic_id].cpu_bsp)
- continue;
- if (cpu_info[apic_id].cpu_disabled)
- continue;
-
- /* Don't let hyperthreads service interrupts. */
- if (cpu_logical > 1 &&
- apic_id % cpu_logical != 0)
- continue;
-
- intr_add_cpu(i);
- }
-}
-
-/*
- * Assign logical CPU IDs to local APICs.
- */
-static void
-assign_cpu_ids(void)
-{
- u_int i;
-
- TUNABLE_INT_FETCH("machdep.hyperthreading_allowed",
- &hyperthreading_allowed);
-
- /* Check for explicitly disabled CPUs. */
- for (i = 0; i <= MAX_APIC_ID; i++) {
- if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
- continue;
-
- if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) {
- cpu_info[i].cpu_hyperthread = 1;
-
- /*
- * Don't use HT CPU if it has been disabled by a
- * tunable.
- */
- if (hyperthreading_allowed == 0) {
- cpu_info[i].cpu_disabled = 1;
- continue;
- }
- }
-
- /* Don't use this CPU if it has been disabled by a tunable. */
- if (resource_disabled("lapic", i)) {
- cpu_info[i].cpu_disabled = 1;
- continue;
- }
- }
-
- if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) {
- hyperthreading_cpus = 0;
- cpu_logical = 1;
- }
-
- /*
- * Assign CPU IDs to local APIC IDs and disable any CPUs
- * beyond MAXCPU. CPU 0 is always assigned to the BSP.
- *
- * To minimize confusion for userland, we attempt to number
- * CPUs such that all threads and cores in a package are
- * grouped together. For now we assume that the BSP is always
- * the first thread in a package and just start adding APs
- * starting with the BSP's APIC ID.
- */
- mp_ncpus = 1;
- cpu_apic_ids[0] = boot_cpu_id;
- apic_cpuids[boot_cpu_id] = 0;
- for (i = boot_cpu_id + 1; i != boot_cpu_id;
- i == MAX_APIC_ID ? i = 0 : i++) {
- if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
- cpu_info[i].cpu_disabled)
- continue;
-
- if (mp_ncpus < MAXCPU) {
- cpu_apic_ids[mp_ncpus] = i;
- apic_cpuids[i] = mp_ncpus;
- mp_ncpus++;
- } else
- cpu_info[i].cpu_disabled = 1;
- }
- KASSERT(mp_maxid >= mp_ncpus - 1,
- ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
- mp_ncpus));
-}
-
-/*
* start each AP in our list
*/
/* Lowest 1MB is already mapped: don't touch*/
@@ -1094,129 +486,6 @@ start_ap(int apic_id)
return 0; /* return FAILURE */
}
-#ifdef COUNT_XINVLTLB_HITS
-u_int xhits_gbl[MAXCPU];
-u_int xhits_pg[MAXCPU];
-u_int xhits_rng[MAXCPU];
-static SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
-SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
- sizeof(xhits_gbl), "IU", "");
-SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
- sizeof(xhits_pg), "IU", "");
-SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
- sizeof(xhits_rng), "IU", "");
-
-u_int ipi_global;
-u_int ipi_page;
-u_int ipi_range;
-u_int ipi_range_size;
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
- 0, "");
-
-u_int ipi_masked_global;
-u_int ipi_masked_page;
-u_int ipi_masked_range;
-u_int ipi_masked_range_size;
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
- &ipi_masked_global, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
- &ipi_masked_page, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
- &ipi_masked_range, 0, "");
-SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
- &ipi_masked_range_size, 0, "");
-#endif /* COUNT_XINVLTLB_HITS */
-
-/*
- * Init and startup IPI.
- */
-void
-ipi_startup(int apic_id, int vector)
-{
-
- /*
- * This attempts to follow the algorithm described in the
- * Intel Multiprocessor Specification v1.4 in section B.4.
- * For each IPI, we allow the local APIC ~20us to deliver the
- * IPI. If that times out, we panic.
- */
-
- /*
- * first we do an INIT IPI: this INIT IPI might be run, resetting
- * and running the target CPU. OR this INIT IPI might be latched (P5
- * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
- * ignored.
- */
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
- APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
- lapic_ipi_wait(100);
-
- /* Explicitly deassert the INIT IPI. */
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
- APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT,
- apic_id);
-
- DELAY(10000); /* wait ~10mS */
-
- /*
- * next we do a STARTUP IPI: the previous INIT IPI might still be
- * latched, (P5 bug) this 1st STARTUP would then terminate
- * immediately, and the previously started INIT IPI would continue. OR
- * the previous INIT IPI has already run. and this STARTUP IPI will
- * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
- * will run.
- */
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
- APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
- vector, apic_id);
- if (!lapic_ipi_wait(100))
- panic("Failed to deliver first STARTUP IPI to APIC %d",
- apic_id);
- DELAY(200); /* wait ~200uS */
-
- /*
- * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
- * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
- * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
- * recognized after hardware RESET or INIT IPI.
- */
- lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
- APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
- vector, apic_id);
- if (!lapic_ipi_wait(100))
- panic("Failed to deliver second STARTUP IPI to APIC %d",
- apic_id);
-
- DELAY(200); /* wait ~200uS */
-}
-
-/*
- * Send an IPI to specified CPU handling the bitmap logic.
- */
-static void
-ipi_send_cpu(int cpu, u_int ipi)
-{
- u_int bitmap, old_pending, new_pending;
-
- KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
-
- if (IPI_IS_BITMAPED(ipi)) {
- bitmap = 1 << ipi;
- ipi = IPI_BITMAP_VECTOR;
- do {
- old_pending = cpu_ipi_pending[cpu];
- new_pending = old_pending | bitmap;
- } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
- old_pending, new_pending));
- if (old_pending)
- return;
- }
- lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
-}
-
/*
* Flush the TLB on all other CPU's
*/
@@ -1280,14 +549,6 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_of
}
void
-smp_cache_flush(void)
-{
-
- if (smp_started)
- smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
-}
-
-void
smp_invltlb(void)
{
@@ -1362,203 +623,11 @@ smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
}
void
-ipi_bitmap_handler(struct trapframe frame)
-{
- struct trapframe *oldframe;
- struct thread *td;
- int cpu = PCPU_GET(cpuid);
- u_int ipi_bitmap;
-
- critical_enter();
- td = curthread;
- td->td_intr_nesting_level++;
- oldframe = td->td_intr_frame;
- td->td_intr_frame = &frame;
- ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
- if (ipi_bitmap & (1 << IPI_PREEMPT)) {
-#ifdef COUNT_IPIS
- (*ipi_preempt_counts[cpu])++;
-#endif
- sched_preempt(td);
- }
- if (ipi_bitmap & (1 << IPI_AST)) {
-#ifdef COUNT_IPIS
- (*ipi_ast_counts[cpu])++;
-#endif
- /* Nothing to do for AST */
- }
- if (ipi_bitmap & (1 << IPI_HARDCLOCK)) {
-#ifdef COUNT_IPIS
- (*ipi_hardclock_counts[cpu])++;
-#endif
- hardclockintr();
- }
- td->td_intr_frame = oldframe;
- td->td_intr_nesting_level--;
- critical_exit();
-}
-
-/*
- * send an IPI to a set of cpus.
- */
-void
-ipi_selected(cpuset_t cpus, u_int ipi)
-{
- int cpu;
-
- /*
- * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
- * of help in order to understand what is the source.
- * Set the mask of receiving CPUs for this purpose.
- */
- if (ipi == IPI_STOP_HARD)
- CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
-
- while ((cpu = CPU_FFS(&cpus)) != 0) {
- cpu--;
- CPU_CLR(cpu, &cpus);
- CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
- ipi_send_cpu(cpu, ipi);
- }
-}
-
-/*
- * send an IPI to a specific CPU.
- */
-void
-ipi_cpu(int cpu, u_int ipi)
-{
-
- /*
- * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
- * of help in order to understand what is the source.
- * Set the mask of receiving CPUs for this purpose.
- */
- if (ipi == IPI_STOP_HARD)
- CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
-
- CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
- ipi_send_cpu(cpu, ipi);
-}
-
-/*
- * send an IPI to all CPUs EXCEPT myself
- */
-void
-ipi_all_but_self(u_int ipi)
-{
- cpuset_t other_cpus;
-
- other_cpus = all_cpus;
- CPU_CLR(PCPU_GET(cpuid), &other_cpus);
- if (IPI_IS_BITMAPED(ipi)) {
- ipi_selected(other_cpus, ipi);
- return;
- }
-
- /*
- * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
- * of help in order to understand what is the source.
- * Set the mask of receiving CPUs for this purpose.
- */
- if (ipi == IPI_STOP_HARD)
- CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
-
- CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
- lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
-}
-
-int
-ipi_nmi_handler()
-{
- u_int cpuid;
-
- /*
- * As long as there is not a simple way to know about a NMI's
- * source, if the bitmask for the current CPU is present in
- * the global pending bitword an IPI_STOP_HARD has been issued
- * and should be handled.
- */
- cpuid = PCPU_GET(cpuid);
- if (!CPU_ISSET(cpuid, &ipi_nmi_pending))
- return (1);
-
- CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending);
- cpustop_handler();
- return (0);
-}
-
-/*
- * Handle an IPI_STOP by saving our current context and spinning until we
- * are resumed.
- */
-void
-cpustop_handler(void)
-{
- u_int cpu;
-
- cpu = PCPU_GET(cpuid);
-
- savectx(&stoppcbs[cpu]);
-
- /* Indicate that we are stopped */
- CPU_SET_ATOMIC(cpu, &stopped_cpus);
-
- /* Wait for restart */
- while (!CPU_ISSET(cpu, &started_cpus))
- ia32_pause();
-
- CPU_CLR_ATOMIC(cpu, &started_cpus);
- CPU_CLR_ATOMIC(cpu, &stopped_cpus);
-
- if (cpu == 0 && cpustop_restartfunc != NULL) {
- cpustop_restartfunc();
- cpustop_restartfunc = NULL;
- }
-}
-
-/*
- * Handle an IPI_SUSPEND by saving our current context and spinning until we
- * are resumed.
- */
-void
-cpususpend_handler(void)
+smp_cache_flush(void)
{
- u_int cpu;
-
- mtx_assert(&smp_ipi_mtx, MA_NOTOWNED);
-
- cpu = PCPU_GET(cpuid);
- if (savectx(&susppcbs[cpu]->sp_pcb)) {
- npxsuspend(susppcbs[cpu]->sp_fpususpend);
- wbinvd();
- CPU_SET_ATOMIC(cpu, &suspended_cpus);
- } else {
- npxresume(susppcbs[cpu]->sp_fpususpend);
- pmap_init_pat();
- initializecpu();
- PCPU_SET(switchtime, 0);
- PCPU_SET(switchticks, ticks);
-
- /* Indicate that we are resumed */
- CPU_CLR_ATOMIC(cpu, &suspended_cpus);
- }
-
- /* Wait for resume */
- while (!CPU_ISSET(cpu, &started_cpus))
- ia32_pause();
-
- if (cpu_ops.cpu_resume)
- cpu_ops.cpu_resume();
- /* Resume MCA and local APIC */
- lapic_xapic_mode();
- mca_resume();
- lapic_setup(0);
-
- /* Indicate that we are resumed */
- CPU_CLR_ATOMIC(cpu, &suspended_cpus);
- CPU_CLR_ATOMIC(cpu, &started_cpus);
+ if (smp_started)
+ smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
}
/*
@@ -1614,62 +683,3 @@ invlrng_handler(void)
atomic_add_int(&smp_tlb_wait, 1);
}
-
-void
-invlcache_handler(void)
-{
-#ifdef COUNT_IPIS
- (*ipi_invlcache_counts[PCPU_GET(cpuid)])++;
-#endif /* COUNT_IPIS */
-
- wbinvd();
- atomic_add_int(&smp_tlb_wait, 1);
-}
-
-/*
- * This is called once the rest of the system is up and running and we're
- * ready to let the AP's out of the pen.
- */
-static void
-release_aps(void *dummy __unused)
-{
-
- if (mp_ncpus == 1)
- return;
- atomic_store_rel_int(&aps_ready, 1);
- while (smp_started == 0)
- ia32_pause();
-}
-SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
-
-#ifdef COUNT_IPIS
-/*
- * Setup interrupt counters for IPI handlers.
- */
-static void
-mp_ipi_intrcnt(void *dummy)
-{
- char buf[64];
- int i;
-
- CPU_FOREACH(i) {
- snprintf(buf, sizeof(buf), "cpu%d:invltlb", i);
- intrcnt_add(buf, &ipi_invltlb_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:invlrng", i);
- intrcnt_add(buf, &ipi_invlrng_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:invlpg", i);
- intrcnt_add(buf, &ipi_invlpg_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:invlcache", i);
- intrcnt_add(buf, &ipi_invlcache_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:preempt", i);
- intrcnt_add(buf, &ipi_preempt_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:ast", i);
- intrcnt_add(buf, &ipi_ast_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i);
- intrcnt_add(buf, &ipi_rendezvous_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:hardclock", i);
- intrcnt_add(buf, &ipi_hardclock_counts[i]);
- }
-}
-SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
-#endif
diff --git a/sys/i386/i386/support.s b/sys/i386/i386/support.s
index 0a08012..78d76ef 100644
--- a/sys/i386/i386/support.s
+++ b/sys/i386/i386/support.s
@@ -695,11 +695,9 @@ END(bcmp)
*/
/* void lgdt(struct region_descriptor *rdp); */
ENTRY(lgdt)
-#ifndef XEN
/* reload the descriptor table */
movl 4(%esp),%eax
lgdt (%eax)
-#endif
/* flush the prefetch q */
jmp 1f
diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s
index 26bfd3b..6bedb48 100644
--- a/sys/i386/i386/swtch.s
+++ b/sys/i386/i386/swtch.s
@@ -88,7 +88,7 @@ ENTRY(cpu_throw)
movl 8(%esp),%ecx /* New thread */
movl TD_PCB(%ecx),%edx
movl PCB_CR3(%edx),%eax
- LOAD_CR3(%eax)
+ movl %eax,%cr3
/* set bit in new pm_active */
movl TD_PROC(%ecx),%eax
movl P_VMSPACE(%eax), %ebx
@@ -174,10 +174,10 @@ ENTRY(cpu_switch)
/* switch address space */
movl PCB_CR3(%edx),%eax
- READ_CR3(%ebx) /* The same address space? */
+ movl %cr3,%ebx /* The same address space? */
cmpl %ebx,%eax
je sw0
- LOAD_CR3(%eax) /* new address space */
+ movl %eax,%cr3 /* new address space */
movl %esi,%eax
movl PCPU(CPUID),%esi
SETOP %eax,TD_LOCK(%edi) /* Switchout td_lock */
@@ -204,18 +204,6 @@ sw0:
SETOP %esi,TD_LOCK(%edi) /* Switchout td_lock */
sw1:
BLOCK_SPIN(%ecx)
-#ifdef XEN
- pushl %eax
- pushl %ecx
- pushl %edx
- call xen_handle_thread_switch
- popl %edx
- popl %ecx
- popl %eax
- /*
- * XXX set IOPL
- */
-#else
/*
* At this point, we've switched address spaces and are ready
* to load up the rest of the next context.
@@ -264,7 +252,7 @@ sw1:
movl 12(%esi), %ebx
movl %eax, 8(%edi)
movl %ebx, 12(%edi)
-#endif
+
/* Restore context. */
movl PCB_EBX(%edx),%ebx
movl PCB_ESP(%edx),%esp
@@ -290,7 +278,7 @@ sw1:
movl _default_ldt,%eax
cmpl PCPU(CURRENTLDT),%eax
je 2f
- LLDT(_default_ldt)
+ lldt _default_ldt
movl %eax,PCPU(CURRENTLDT)
jmp 2f
1:
diff --git a/sys/i386/i386/sys_machdep.c b/sys/i386/i386/sys_machdep.c
index 9518862..9044d19 100644
--- a/sys/i386/i386/sys_machdep.c
+++ b/sys/i386/i386/sys_machdep.c
@@ -59,20 +59,6 @@ __FBSDID("$FreeBSD$");
#include <security/audit/audit.h>
-#ifdef XEN
-#include <machine/xen/xenfunc.h>
-
-void i386_reset_ldt(struct proc_ldt *pldt);
-
-void
-i386_reset_ldt(struct proc_ldt *pldt)
-{
- xen_set_ldt((vm_offset_t)pldt->ldt_base, pldt->ldt_len);
-}
-#else
-#define i386_reset_ldt(x)
-#endif
-
#include <vm/vm_kern.h> /* for kernel_map */
#define MAX_LD 8192
@@ -211,12 +197,7 @@ sysarch(td, uap)
*/
sd.sd_lobase = base & 0xffffff;
sd.sd_hibase = (base >> 24) & 0xff;
-#ifdef XEN
- /* need to do nosegneg like Linux */
- sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff;
-#else
sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */
-#endif
sd.sd_hilimit = 0xf;
sd.sd_type = SDT_MEMRWA;
sd.sd_dpl = SEL_UPL;
@@ -226,12 +207,7 @@ sysarch(td, uap)
sd.sd_gran = 1;
critical_enter();
td->td_pcb->pcb_fsd = sd;
-#ifdef XEN
- HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[0]),
- *(uint64_t *)&sd);
-#else
PCPU_GET(fsgs_gdt)[0] = sd;
-#endif
critical_exit();
td->td_frame->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
}
@@ -252,12 +228,7 @@ sysarch(td, uap)
sd.sd_lobase = base & 0xffffff;
sd.sd_hibase = (base >> 24) & 0xff;
-#ifdef XEN
- /* need to do nosegneg like Linux */
- sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff;
-#else
sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */
-#endif
sd.sd_hilimit = 0xf;
sd.sd_type = SDT_MEMRWA;
sd.sd_dpl = SEL_UPL;
@@ -267,12 +238,7 @@ sysarch(td, uap)
sd.sd_gran = 1;
critical_enter();
td->td_pcb->pcb_gsd = sd;
-#ifdef XEN
- HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[1]),
- *(uint64_t *)&sd);
-#else
PCPU_GET(fsgs_gdt)[1] = sd;
-#endif
critical_exit();
load_gs(GSEL(GUGS_SEL, SEL_UPL));
}
@@ -434,10 +400,6 @@ set_user_ldt(struct mdproc *mdp)
}
pldt = mdp->md_ldt;
-#ifdef XEN
- i386_reset_ldt(pldt);
- PCPU_SET(currentldt, (int)pldt);
-#else
#ifdef SMP
gdt[PCPU_GET(cpuid) * NGDT + GUSERLDT_SEL].sd = pldt->ldt_sd;
#else
@@ -445,7 +407,6 @@ set_user_ldt(struct mdproc *mdp)
#endif
lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL));
-#endif /* XEN */
if (dtlocked)
mtx_unlock_spin(&dt_lock);
}
@@ -464,43 +425,6 @@ set_user_ldt_rv(struct vmspace *vmsp)
}
#endif
-#ifdef XEN
-
-/*
- * dt_lock must be held. Returns with dt_lock held.
- */
-struct proc_ldt *
-user_ldt_alloc(struct mdproc *mdp, int len)
-{
- struct proc_ldt *pldt, *new_ldt;
-
- mtx_assert(&dt_lock, MA_OWNED);
- mtx_unlock_spin(&dt_lock);
- new_ldt = malloc(sizeof(struct proc_ldt),
- M_SUBPROC, M_WAITOK);
-
- new_ldt->ldt_len = len = NEW_MAX_LD(len);
- new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena,
- round_page(len * sizeof(union descriptor)), M_WAITOK);
- new_ldt->ldt_refcnt = 1;
- new_ldt->ldt_active = 0;
-
- mtx_lock_spin(&dt_lock);
- if ((pldt = mdp->md_ldt)) {
- if (len > pldt->ldt_len)
- len = pldt->ldt_len;
- bcopy(pldt->ldt_base, new_ldt->ldt_base,
- len * sizeof(union descriptor));
- } else {
- bcopy(ldt, new_ldt->ldt_base, PAGE_SIZE);
- }
- mtx_unlock_spin(&dt_lock); /* XXX kill once pmap locking fixed. */
- pmap_map_readonly(kernel_pmap, (vm_offset_t)new_ldt->ldt_base,
- new_ldt->ldt_len*sizeof(union descriptor));
- mtx_lock_spin(&dt_lock); /* XXX kill once pmap locking fixed. */
- return (new_ldt);
-}
-#else
/*
* dt_lock must be held. Returns with dt_lock held.
*/
@@ -535,7 +459,6 @@ user_ldt_alloc(struct mdproc *mdp, int len)
return (new_ldt);
}
-#endif /* !XEN */
/*
* Must be called with dt_lock held. Returns with dt_lock unheld.
@@ -553,13 +476,8 @@ user_ldt_free(struct thread *td)
}
if (td == curthread) {
-#ifdef XEN
- i386_reset_ldt(&default_proc_ldt);
- PCPU_SET(currentldt, (int)&default_proc_ldt);
-#else
lldt(_default_ldt);
PCPU_SET(currentldt, _default_ldt);
-#endif
}
mdp->md_ldt = NULL;
@@ -785,27 +703,7 @@ again:
td->td_retval[0] = uap->start;
return (error);
}
-#ifdef XEN
-static int
-i386_set_ldt_data(struct thread *td, int start, int num,
- union descriptor *descs)
-{
- struct mdproc *mdp = &td->td_proc->p_md;
- struct proc_ldt *pldt = mdp->md_ldt;
- mtx_assert(&dt_lock, MA_OWNED);
-
- while (num) {
- xen_update_descriptor(
- &((union descriptor *)(pldt->ldt_base))[start],
- descs);
- num--;
- start++;
- descs++;
- }
- return (0);
-}
-#else
static int
i386_set_ldt_data(struct thread *td, int start, int num,
union descriptor *descs)
@@ -821,7 +719,6 @@ i386_set_ldt_data(struct thread *td, int start, int num,
num * sizeof(union descriptor));
return (0);
}
-#endif /* !XEN */
static int
i386_ldt_grow(struct thread *td, int len)
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index 5fd4a16..5671dc9 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -89,9 +89,6 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_map.h>
#include <vm/vm_param.h>
-#ifdef XEN
-#include <xen/hypervisor.h>
-#endif
#ifdef PC98
#include <pc98/cbus/cbus.h>
#else
@@ -304,10 +301,8 @@ cpu_fork(td1, p2, td2, flags)
/* Setup to release spin count in fork_exit(). */
td2->td_md.md_spinlock_count = 1;
- /*
- * XXX XEN need to check on PSL_USER is handled
- */
td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
+
/*
* Now, cpu_switch() can schedule the new process.
* pcb_esp is loaded pointing to the cpu_switch() stack frame
@@ -698,12 +693,6 @@ cpu_reset_real()
#endif
disable_intr();
-#ifdef XEN
- if (smp_processor_id() == 0)
- HYPERVISOR_shutdown(SHUTDOWN_reboot);
- else
- HYPERVISOR_shutdown(SHUTDOWN_poweroff);
-#endif
#ifdef CPU_ELAN
if (elan_mmcr != NULL)
elan_mmcr->RESCFG = 1;
@@ -797,13 +786,8 @@ sf_buf_map(struct sf_buf *sf, int flags)
*/
ptep = vtopte(sf->kva);
opte = *ptep;
-#ifdef XEN
- PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(sf->m)) | pgeflag
- | PG_RW | PG_V | pmap_cache_bits(sf->m->md.pat_mode, 0));
-#else
*ptep = VM_PAGE_TO_PHYS(sf->m) | pgeflag | PG_RW | PG_V |
pmap_cache_bits(sf->m->md.pat_mode, 0);
-#endif
/*
* Avoid unnecessary TLB invalidations: If the sf_buf's old
@@ -854,15 +838,8 @@ sf_buf_shootdown(struct sf_buf *sf, int flags)
int
sf_buf_unmap(struct sf_buf *sf)
{
-#ifdef XEN
- /*
- * Xen doesn't like having dangling R/W mappings
- */
- pmap_qremove(sf->kva, 1);
- return (1);
-#else
+
return (0);
-#endif
}
static void
diff --git a/sys/i386/include/asmacros.h b/sys/i386/include/asmacros.h
index c1c3f64..716915c 100644
--- a/sys/i386/include/asmacros.h
+++ b/sys/i386/include/asmacros.h
@@ -176,37 +176,6 @@
movl $KPSEL, %eax ; /* reload with per-CPU data segment */ \
movl %eax, %fs
-#ifdef XEN
-#define LOAD_CR3(reg) \
- movl reg,PCPU(CR3); \
- pushl %ecx ; \
- pushl %edx ; \
- pushl %esi ; \
- pushl reg ; \
- call xen_load_cr3 ; \
- addl $4,%esp ; \
- popl %esi ; \
- popl %edx ; \
- popl %ecx ; \
-
-#define READ_CR3(reg) movl PCPU(CR3),reg;
-#define LLDT(arg) \
- pushl %edx ; \
- pushl %eax ; \
- xorl %eax,%eax ; \
- movl %eax,%gs ; \
- call i386_reset_ldt ; \
- popl %eax ; \
- popl %edx
-#define CLI call ni_cli
-#else
-#define LOAD_CR3(reg) movl reg,%cr3;
-#define READ_CR3(reg) movl %cr3,reg;
-#define LLDT(arg) lldt arg;
-#define CLI cli
-#endif /* !XEN */
-
-
#endif /* LOCORE */
#ifdef __STDC__
diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h
index f80a898..3bc25d4 100644
--- a/sys/i386/include/cpufunc.h
+++ b/sys/i386/include/cpufunc.h
@@ -42,17 +42,6 @@
#error this file needs sys/cdefs.h as a prerequisite
#endif
-#ifdef XEN
-extern void xen_cli(void);
-extern void xen_sti(void);
-extern u_int xen_rcr2(void);
-extern void xen_load_cr3(u_int data);
-extern void xen_tlb_flush(void);
-extern void xen_invlpg(u_int addr);
-extern void write_eflags(u_int eflags);
-extern u_int read_eflags(void);
-#endif
-
struct region_descriptor;
#define readb(va) (*(volatile uint8_t *) (va))
@@ -106,11 +95,8 @@ clts(void)
static __inline void
disable_intr(void)
{
-#ifdef XEN
- xen_cli();
-#else
+
__asm __volatile("cli" : : : "memory");
-#endif
}
static __inline void
@@ -132,11 +118,8 @@ cpuid_count(u_int ax, u_int cx, u_int *p)
static __inline void
enable_intr(void)
{
-#ifdef XEN
- xen_sti();
-#else
+
__asm __volatile("sti");
-#endif
}
static __inline void
@@ -325,11 +308,7 @@ ia32_pause(void)
}
static __inline u_int
-#ifdef XEN
-_read_eflags(void)
-#else
read_eflags(void)
-#endif
{
u_int ef;
@@ -389,11 +368,7 @@ wbinvd(void)
}
static __inline void
-#ifdef XEN
-_write_eflags(u_int ef)
-#else
write_eflags(u_int ef)
-#endif
{
__asm __volatile("pushl %0; popfl" : : "r" (ef));
}
@@ -425,9 +400,6 @@ rcr2(void)
{
u_int data;
-#ifdef XEN
- return (xen_rcr2());
-#endif
__asm __volatile("movl %%cr2,%0" : "=r" (data));
return (data);
}
@@ -435,11 +407,8 @@ rcr2(void)
static __inline void
load_cr3(u_int data)
{
-#ifdef XEN
- xen_load_cr3(data);
-#else
+
__asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory");
-#endif
}
static __inline u_int
@@ -491,11 +460,8 @@ load_xcr(u_int reg, uint64_t val)
static __inline void
invltlb(void)
{
-#ifdef XEN
- xen_tlb_flush();
-#else
+
load_cr3(rcr3());
-#endif
}
/*
@@ -506,11 +472,7 @@ static __inline void
invlpg(u_int addr)
{
-#ifdef XEN
- xen_invlpg(addr);
-#else
__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
-#endif
}
static __inline u_short
diff --git a/sys/i386/include/intr_machdep.h b/sys/i386/include/intr_machdep.h
index 082b649..96ac06a 100644
--- a/sys/i386/include/intr_machdep.h
+++ b/sys/i386/include/intr_machdep.h
@@ -58,13 +58,7 @@
(FIRST_MSI_INT + NUM_MSI_INTS)
#define LAST_EVTCHN_INT \
(FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1)
-#elif defined(XEN)
-#include <xen/xen-os.h>
-#define NUM_EVTCHN_INTS NR_EVENT_CHANNELS
-#define FIRST_EVTCHN_INT 0
-#define LAST_EVTCHN_INT \
- (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1)
-#else /* !XEN && !XENHVM */
+#else /* !XENHVM */
#define NUM_EVTCHN_INTS 0
#endif
#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS + NUM_EVTCHN_INTS)
diff --git a/sys/i386/include/pcpu.h b/sys/i386/include/pcpu.h
index dc29b6d..231f80f 100644
--- a/sys/i386/include/pcpu.h
+++ b/sys/i386/include/pcpu.h
@@ -44,34 +44,6 @@
* other processors"
*/
-#if defined(XEN)
-
-/* These are peridically updated in shared_info, and then copied here. */
-struct shadow_time_info {
- uint64_t tsc_timestamp; /* TSC at last update of time vals. */
- uint64_t system_timestamp; /* Time, in nanosecs, since boot. */
- uint32_t tsc_to_nsec_mul;
- uint32_t tsc_to_usec_mul;
- int tsc_shift;
- uint32_t version;
-};
-
-#define PCPU_XEN_FIELDS \
- ; \
- u_int pc_cr3; /* track cr3 for R1/R3*/ \
- vm_paddr_t *pc_pdir_shadow; \
- uint64_t pc_processed_system_time; \
- struct shadow_time_info pc_shadow_time; \
- char __pad[185]
-
-#else /* !XEN */
-
-#define PCPU_XEN_FIELDS \
- ; \
- char __pad[233]
-
-#endif
-
#define PCPU_MD_FIELDS \
char pc_monitorbuf[128] __aligned(128); /* cache line */ \
struct pcpu *pc_prvspace; /* Self-reference */ \
@@ -85,8 +57,8 @@ struct shadow_time_info {
u_int pc_apic_id; \
int pc_private_tss; /* Flag indicating private tss*/\
u_int pc_cmci_mask; /* MCx banks for CMCI */ \
- u_int pc_vcpu_id /* Xen vCPU ID */ \
- PCPU_XEN_FIELDS
+ u_int pc_vcpu_id; /* Xen vCPU ID */ \
+ char __pad[233]
#ifdef _KERNEL
diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h
index 0d8057f..76822b1 100644
--- a/sys/i386/include/pmap.h
+++ b/sys/i386/include/pmap.h
@@ -219,76 +219,6 @@ extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */
*/
#define vtophys(va) pmap_kextract((vm_offset_t)(va))
-#if defined(XEN)
-#include <sys/param.h>
-
-#include <xen/xen-os.h>
-
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenpmap.h>
-
-extern pt_entry_t pg_nx;
-
-#define PG_KERNEL (PG_V | PG_A | PG_RW | PG_M)
-
-#define MACH_TO_VM_PAGE(ma) PHYS_TO_VM_PAGE(xpmap_mtop((ma)))
-#define VM_PAGE_TO_MACH(m) xpmap_ptom(VM_PAGE_TO_PHYS((m)))
-
-#define VTOM(va) xpmap_ptom(VTOP(va))
-
-static __inline vm_paddr_t
-pmap_kextract_ma(vm_offset_t va)
-{
- vm_paddr_t ma;
- if ((ma = PTD[va >> PDRSHIFT]) & PG_PS) {
- ma = (ma & ~(NBPDR - 1)) | (va & (NBPDR - 1));
- } else {
- ma = (*vtopte(va) & PG_FRAME) | (va & PAGE_MASK);
- }
- return ma;
-}
-
-static __inline vm_paddr_t
-pmap_kextract(vm_offset_t va)
-{
- return xpmap_mtop(pmap_kextract_ma(va));
-}
-#define vtomach(va) pmap_kextract_ma(((vm_offset_t) (va)))
-
-vm_paddr_t pmap_extract_ma(struct pmap *pmap, vm_offset_t va);
-
-void pmap_kenter_ma(vm_offset_t va, vm_paddr_t pa);
-void pmap_map_readonly(struct pmap *pmap, vm_offset_t va, int len);
-void pmap_map_readwrite(struct pmap *pmap, vm_offset_t va, int len);
-
-static __inline pt_entry_t
-pte_load_store(pt_entry_t *ptep, pt_entry_t v)
-{
- pt_entry_t r;
-
- r = *ptep;
- PT_SET_VA(ptep, v, TRUE);
- return (r);
-}
-
-static __inline pt_entry_t
-pte_load_store_ma(pt_entry_t *ptep, pt_entry_t v)
-{
- pt_entry_t r;
-
- r = *ptep;
- PT_SET_VA_MA(ptep, v, TRUE);
- return (r);
-}
-
-#define pte_load_clear(ptep) pte_load_store((ptep), (pt_entry_t)0ULL)
-
-#define pte_store(ptep, pte) pte_load_store((ptep), (pt_entry_t)pte)
-#define pte_store_ma(ptep, pte) pte_load_store_ma((ptep), (pt_entry_t)pte)
-#define pde_store_ma(ptep, pte) pte_load_store_ma((ptep), (pt_entry_t)pte)
-
-#elif !defined(XEN)
-
/*
* KPTmap is a linear mapping of the kernel page table. It differs from the
* recursive mapping in two ways: (1) it only provides access to kernel page
@@ -328,13 +258,8 @@ pmap_kextract(vm_offset_t va)
}
return (pa);
}
-#endif
-
-#if !defined(XEN)
-#define PT_UPDATES_FLUSH()
-#endif
-#if (defined(PAE) || defined(PAE_TABLES)) && !defined(XEN)
+#if (defined(PAE) || defined(PAE_TABLES))
#define pde_cmpset(pdep, old, new) atomic_cmpset_64_i586(pdep, old, new)
#define pte_load_store(ptep, pte) atomic_swap_64_i586(ptep, pte)
@@ -343,7 +268,7 @@ pmap_kextract(vm_offset_t va)
extern pt_entry_t pg_nx;
-#elif !defined(PAE) && !defined(PAE_TABLES) && !defined(XEN)
+#else /* !(PAE || PAE_TABLES) */
#define pde_cmpset(pdep, old, new) atomic_cmpset_int(pdep, old, new)
#define pte_load_store(ptep, pte) atomic_swap_int(ptep, pte)
@@ -352,7 +277,7 @@ extern pt_entry_t pg_nx;
*(u_int *)(ptep) = (u_int)(pte); \
} while (0)
-#endif /* PAE */
+#endif /* !(PAE || PAE_TABLES) */
#define pte_clear(ptep) pte_store(ptep, 0)
diff --git a/sys/i386/include/segments.h b/sys/i386/include/segments.h
index d67f2e0..635dffc 100644
--- a/sys/i386/include/segments.h
+++ b/sys/i386/include/segments.h
@@ -82,14 +82,8 @@ struct region_descriptor {
#ifdef _KERNEL
extern int _default_ldt;
-#ifdef XEN
-extern struct proc_ldt default_proc_ldt;
-extern union descriptor *gdt;
-extern union descriptor *ldt;
-#else
extern union descriptor gdt[];
extern union descriptor ldt[NLDT];
-#endif
extern struct soft_segment_descriptor gdt_segs[];
extern struct gate_descriptor *idt;
extern struct region_descriptor r_gdt, r_idt;
diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h
index 0abbca4..71c830e 100644
--- a/sys/i386/include/smp.h
+++ b/sys/i386/include/smp.h
@@ -36,6 +36,37 @@ extern int mp_naps;
extern int boot_cpu_id;
extern struct pcb stoppcbs[];
extern int cpu_apic_ids[];
+extern int bootAP;
+extern void *dpcpu;
+extern char *bootSTK;
+extern int bootAP;
+extern void *bootstacks[];
+extern volatile u_int cpu_ipi_pending[];
+extern volatile int aps_ready;
+extern struct mtx ap_boot_mtx;
+extern int cpu_logical;
+extern int cpu_cores;
+extern volatile int smp_tlb_wait;
+extern u_int xhits_gbl[];
+extern u_int xhits_pg[];
+extern u_int xhits_rng[];
+extern u_int ipi_global;
+extern u_int ipi_page;
+extern u_int ipi_range;
+extern u_int ipi_range_size;
+extern u_int ipi_masked_global;
+extern u_int ipi_masked_page;
+extern u_int ipi_masked_range;
+extern u_int ipi_masked_range_size;
+
+struct cpu_info {
+ int cpu_present:1;
+ int cpu_bsp:1;
+ int cpu_disabled:1;
+ int cpu_hyperthread:1;
+};
+extern struct cpu_info cpu_info[];
+
#ifdef COUNT_IPIS
extern u_long *ipi_invltlb_counts[MAXCPU];
extern u_long *ipi_invlrng_counts[MAXCPU];
@@ -56,11 +87,11 @@ inthand_t
IDTVEC(rendezvous); /* handle CPU rendezvous */
/* functions in mp_machdep.c */
+void assign_cpu_ids(void);
void cpu_add(u_int apic_id, char boot_cpu);
void cpustop_handler(void);
-#ifndef XEN
void cpususpend_handler(void);
-#endif
+void init_secondary_tail(void);
void invltlb_handler(void);
void invlpg_handler(void);
void invlrng_handler(void);
@@ -68,13 +99,12 @@ void invlcache_handler(void);
void init_secondary(void);
void ipi_startup(int apic_id, int vector);
void ipi_all_but_self(u_int ipi);
-#ifndef XEN
void ipi_bitmap_handler(struct trapframe frame);
-#endif
void ipi_cpu(int cpu, u_int ipi);
int ipi_nmi_handler(void);
void ipi_selected(cpuset_t cpus, u_int ipi);
u_int mp_bootaddress(u_int);
+void set_interrupt_apic_ids(void);
void smp_cache_flush(void);
void smp_invlpg(vm_offset_t addr);
void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr);
@@ -83,10 +113,10 @@ void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva,
vm_offset_t endva);
void smp_invltlb(void);
void smp_masked_invltlb(cpuset_t mask);
+void mem_range_AP_init(void);
+void topo_probe(void);
+void ipi_send_cpu(int cpu, u_int ipi);
-#ifdef XEN
-void ipi_to_irq_init(void);
-#endif
#endif /* !LOCORE */
#endif /* SMP */
diff --git a/sys/i386/include/vm.h b/sys/i386/include/vm.h
index 6573e37..22d2eca 100644
--- a/sys/i386/include/vm.h
+++ b/sys/i386/include/vm.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009 Advanced Computing Technologies LLC
+ * Copyright (c) 2009 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
*
diff --git a/sys/i386/include/vmparam.h b/sys/i386/include/vmparam.h
index 7775669..cb23b05 100644
--- a/sys/i386/include/vmparam.h
+++ b/sys/i386/include/vmparam.h
@@ -135,11 +135,7 @@
* Kernel physical load address.
*/
#ifndef KERNLOAD
-#if defined(XEN) && !defined(XEN_PRIVILEGED_GUEST)
-#define KERNLOAD 0
-#else
#define KERNLOAD (1 << PDRSHIFT)
-#endif
#endif /* !defined(KERNLOAD) */
/*
@@ -149,11 +145,7 @@
* messy at times, but hey, we'll do anything to save a page :-)
*/
-#ifdef XEN
-#define VM_MAX_KERNEL_ADDRESS HYPERVISOR_VIRT_START
-#else
#define VM_MAX_KERNEL_ADDRESS VADDR(KPTDI+NKPDE-1, NPTEPG-1)
-#endif
#define VM_MIN_KERNEL_ADDRESS VADDR(PTDPTDI, PTDPTDI)
diff --git a/sys/i386/include/xen/features.h b/sys/i386/include/xen/features.h
deleted file mode 100644
index fb4f680..0000000
--- a/sys/i386/include/xen/features.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/******************************************************************************
- * features.h
- *
- * Query the features reported by Xen.
- *
- * Copyright (c) 2006, Ian Campbell
- *
- * $FreeBSD$
- */
-
-#ifndef __ASM_XEN_FEATURES_H__
-#define __ASM_XEN_FEATURES_H__
-
-#include <xen/interface/version.h>
-
-extern void setup_xen_features(void);
-
-extern uint8_t xen_features[XENFEAT_NR_SUBMAPS * 32];
-
-#define xen_feature(flag) (xen_features[flag])
-
-#endif /* __ASM_XEN_FEATURES_H__ */
diff --git a/sys/i386/include/xen/hypercall.h b/sys/i386/include/xen/hypercall.h
index c7e2a00..1c4d529 100644
--- a/sys/i386/include/xen/hypercall.h
+++ b/sys/i386/include/xen/hypercall.h
@@ -246,14 +246,8 @@ HYPERVISOR_memory_op(
return _hypercall2(int, memory_op, cmd, arg);
}
-#if defined(XEN)
-int HYPERVISOR_multicall(multicall_entry_t *, int);
-static inline int
-_HYPERVISOR_multicall(
-#else /* XENHVM */
static inline int
HYPERVISOR_multicall(
-#endif
void *call_list, int nr_calls)
{
return _hypercall2(int, multicall, call_list, nr_calls);
diff --git a/sys/i386/include/xen/xen-os.h b/sys/i386/include/xen/xen-os.h
index 3d1ef04..9b9b63f 100644
--- a/sys/i386/include/xen/xen-os.h
+++ b/sys/i386/include/xen/xen-os.h
@@ -44,105 +44,6 @@ static inline void rep_nop(void)
}
#define cpu_relax() rep_nop()
-#ifndef XENHVM
-
-#ifdef SMP
-extern int gdtset;
-
-#include <sys/time.h> /* XXX for pcpu.h */
-#include <sys/pcpu.h> /* XXX for PCPU_GET */
-static inline int
-smp_processor_id(void)
-{
- if (__predict_true(gdtset))
- return PCPU_GET(cpuid);
- return 0;
-}
-
-#else
-#define smp_processor_id() 0
-#endif
-
-#ifndef PANIC_IF
-#define PANIC_IF(exp) if (__predict_false(exp)) {printf("panic - %s: %s:%d\n",#exp, __FILE__, __LINE__); panic("%s: %s:%d", #exp, __FILE__, __LINE__);}
-#endif
-
-/*
- * Crude memory allocator for memory allocation early in boot.
- */
-void *bootmem_alloc(unsigned int size);
-void bootmem_free(void *ptr, unsigned int size);
-
-/*
- * STI/CLI equivalents. These basically set and clear the virtual
- * event_enable flag in the shared_info structure. Note that when
- * the enable bit is set, there may be pending events to be handled.
- * We may therefore call into do_hypervisor_callback() directly.
- */
-
-#define __cli() \
-do { \
- vcpu_info_t *_vcpu; \
- _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
- _vcpu->evtchn_upcall_mask = 1; \
- barrier(); \
-} while (0)
-
-#define __sti() \
-do { \
- vcpu_info_t *_vcpu; \
- barrier(); \
- _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
- _vcpu->evtchn_upcall_mask = 0; \
- barrier(); /* unmask then check (avoid races) */ \
- if (__predict_false(_vcpu->evtchn_upcall_pending)) \
- force_evtchn_callback(); \
-} while (0)
-
-#define __restore_flags(x) \
-do { \
- vcpu_info_t *_vcpu; \
- barrier(); \
- _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
- if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \
- barrier(); /* unmask then check (avoid races) */ \
- if (__predict_false(_vcpu->evtchn_upcall_pending)) \
- force_evtchn_callback(); \
- } \
-} while (0)
-
-/*
- * Add critical_{enter, exit}?
- *
- */
-#define __save_and_cli(x) \
-do { \
- vcpu_info_t *_vcpu; \
- _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
- (x) = _vcpu->evtchn_upcall_mask; \
- _vcpu->evtchn_upcall_mask = 1; \
- barrier(); \
-} while (0)
-
-
-#define cli() __cli()
-#define sti() __sti()
-#define save_flags(x) __save_flags(x)
-#define restore_flags(x) __restore_flags(x)
-#define save_and_cli(x) __save_and_cli(x)
-
-#define local_irq_save(x) __save_and_cli(x)
-#define local_irq_restore(x) __restore_flags(x)
-#define local_irq_disable() __cli()
-#define local_irq_enable() __sti()
-
-#define mtx_lock_irqsave(lock, x) {local_irq_save((x)); mtx_lock_spin((lock));}
-#define mtx_unlock_irqrestore(lock, x) {mtx_unlock_spin((lock)); local_irq_restore((x)); }
-#define spin_lock_irqsave mtx_lock_irqsave
-#define spin_unlock_irqrestore mtx_unlock_irqrestore
-
-#endif /* !XENHVM */
-
/* This is a barrier for the compiler only, NOT the processor! */
#define barrier() __asm__ __volatile__("": : :"memory")
diff --git a/sys/i386/include/xen/xenfunc.h b/sys/i386/include/xen/xenfunc.h
index f02ee12..f48b1f1 100644
--- a/sys/i386/include/xen/xenfunc.h
+++ b/sys/i386/include/xen/xenfunc.h
@@ -34,7 +34,6 @@
#include <vm/pmap.h>
-#include <machine/xen/xenpmap.h>
#include <machine/segments.h>
#include <sys/pcpu.h>
diff --git a/sys/i386/include/xen/xenpmap.h b/sys/i386/include/xen/xenpmap.h
deleted file mode 100644
index 8287e72..0000000
--- a/sys/i386/include/xen/xenpmap.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * Copyright (c) 2004,2005 Kip Macy
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * $FreeBSD$
- */
-
-#ifndef _XEN_XENPMAP_H_
-#define _XEN_XENPMAP_H_
-
-#if defined(XEN)
-void _xen_queue_pt_update(vm_paddr_t, vm_paddr_t, char *, int);
-void xen_pt_switch(vm_paddr_t);
-void xen_set_ldt(vm_paddr_t, unsigned long);
-void xen_pgdpt_pin(vm_paddr_t);
-void xen_pgd_pin(vm_paddr_t);
-void xen_pgd_unpin(vm_paddr_t);
-void xen_pt_pin(vm_paddr_t);
-void xen_pt_unpin(vm_paddr_t);
-void xen_flush_queue(void);
-void pmap_ref(pt_entry_t *pte, vm_paddr_t ma);
-void pmap_suspend(void);
-void pmap_resume(void);
-void xen_check_queue(void);
-
-#ifdef INVARIANTS
-#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), __FILE__, __LINE__)
-#else
-#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), NULL, 0)
-#endif
-
-
-#include <sys/param.h>
-#include <sys/pcpu.h>
-
-#ifdef PMAP_DEBUG
-#define PMAP_REF pmap_ref
-#define PMAP_DEC_REF_PAGE pmap_dec_ref_page
-#define PMAP_MARK_PRIV pmap_mark_privileged
-#define PMAP_MARK_UNPRIV pmap_mark_unprivileged
-#else
-#define PMAP_MARK_PRIV(a)
-#define PMAP_MARK_UNPRIV(a)
-#define PMAP_REF(a, b)
-#define PMAP_DEC_REF_PAGE(a)
-#endif
-
-#define ALWAYS_SYNC 0
-
-#ifdef PT_DEBUG
-#define PT_LOG() printk("WP PT_SET %s:%d\n", __FILE__, __LINE__)
-#else
-#define PT_LOG()
-#endif
-
-#define INVALID_P2M_ENTRY (~0UL)
-
-#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */
-
-#define SH_PD_SET_VA 1
-#define SH_PD_SET_VA_MA 2
-#define SH_PD_SET_VA_CLEAR 3
-
-struct pmap;
-void pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type);
-#ifdef notyet
-static vm_paddr_t
-vptetomachpte(vm_paddr_t *pte)
-{
- vm_offset_t offset, ppte;
- vm_paddr_t pgoffset, retval, *pdir_shadow_ptr;
- int pgindex;
-
- ppte = (vm_offset_t)pte;
- pgoffset = (ppte & PAGE_MASK);
- offset = ppte - (vm_offset_t)PTmap;
- pgindex = ppte >> PDRSHIFT;
-
- pdir_shadow_ptr = (vm_paddr_t *)PCPU_GET(pdir_shadow);
- retval = (pdir_shadow_ptr[pgindex] & ~PAGE_MASK) + pgoffset;
- return (retval);
-}
-#endif
-#define PT_GET(_ptp) \
- (pmap_valid_entry(*(_ptp)) ? xpmap_mtop(*(_ptp)) : (0))
-
-#ifdef WRITABLE_PAGETABLES
-
-#define PT_SET_VA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- PT_LOG(); \
- *(_ptp) = xpmap_ptom((_npte)); \
-} while (/*CONSTCOND*/0)
-#define PT_SET_VA_MA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- PT_LOG(); \
- *(_ptp) = (_npte); \
-} while (/*CONSTCOND*/0)
-#define PT_CLEAR_VA(_ptp, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- PT_LOG(); \
- *(_ptp) = 0; \
-} while (/*CONSTCOND*/0)
-
-#define PD_SET_VA(_pmap, _ptp, _npte, sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_SET_VA_MA(_pmap, _ptp, _npte, sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA_MA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_CLEAR_VA(_pmap, _ptp, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- pd_set((_pmap),(_ptp), 0, SH_PD_SET_VA_CLEAR); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-#else /* !WRITABLE_PAGETABLES */
-
-#define PT_SET_VA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- xen_queue_pt_update(vtomach(_ptp), \
- xpmap_ptom(_npte)); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PT_SET_VA_MA(_ptp,_npte,sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- xen_queue_pt_update(vtomach(_ptp), _npte); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PT_CLEAR_VA(_ptp, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- xen_queue_pt_update(vtomach(_ptp), 0); \
- if (sync || ALWAYS_SYNC) \
- xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-#define PD_SET_VA(_pmap, _ptepindex,_npte,sync) do { \
- PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_SET_VA_MA(_pmap, _ptepindex,_npte,sync) do { \
- PMAP_REF((_ptp), (_npte)); \
- pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA_MA); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-#define PD_CLEAR_VA(_pmap, _ptepindex, sync) do { \
- PMAP_REF((pt_entry_t *)(_ptp), 0); \
- pd_set((_pmap),(_ptepindex), 0, SH_PD_SET_VA_CLEAR); \
- if (sync || ALWAYS_SYNC) xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-#endif
-
-#define PT_SET_MA(_va, _ma) \
-do { \
- PANIC_IF(HYPERVISOR_update_va_mapping(((unsigned long)(_va)),\
- (_ma), \
- UVMF_INVLPG| UVMF_ALL) < 0); \
-} while (/*CONSTCOND*/0)
-
-#define PT_UPDATES_FLUSH() do { \
- xen_flush_queue(); \
-} while (/*CONSTCOND*/0)
-
-static __inline vm_paddr_t
-xpmap_mtop(vm_paddr_t mpa)
-{
- vm_paddr_t tmp = (mpa & PG_FRAME);
-
- return machtophys(tmp) | (mpa & ~PG_FRAME);
-}
-
-static __inline vm_paddr_t
-xpmap_ptom(vm_paddr_t ppa)
-{
- vm_paddr_t tmp = (ppa & PG_FRAME);
-
- return phystomach(tmp) | (ppa & ~PG_FRAME);
-}
-
-static __inline void
-set_phys_to_machine(unsigned long pfn, unsigned long mfn)
-{
-#ifdef notyet
- PANIC_IF(max_mapnr && pfn >= max_mapnr);
-#endif
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
-#ifdef notyet
- PANIC_IF((pfn != mfn && mfn != INVALID_P2M_ENTRY));
-#endif
- return;
- }
- xen_phys_machine[pfn] = mfn;
-}
-
-static __inline int
-phys_to_machine_mapping_valid(unsigned long pfn)
-{
- return xen_phys_machine[pfn] != INVALID_P2M_ENTRY;
-}
-
-#endif /* !XEN */
-
-#endif /* _XEN_XENPMAP_H_ */
diff --git a/sys/i386/include/xen/xenstored.h b/sys/i386/include/xen/xenstored.h
deleted file mode 100644
index e584fa5..0000000
--- a/sys/i386/include/xen/xenstored.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Simple prototyle Xen Store Daemon providing simple tree-like database.
- * Copyright (C) 2005 Rusty Russell IBM Corporation
- *
- * This file may be distributed separately from the Linux kernel, or
- * incorporated into other software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef _XENSTORED_H
-#define _XENSTORED_H
-
-enum xsd_sockmsg_type
-{
- XS_DEBUG,
- XS_SHUTDOWN,
- XS_DIRECTORY,
- XS_READ,
- XS_GET_PERMS,
- XS_WATCH,
- XS_WATCH_ACK,
- XS_UNWATCH,
- XS_TRANSACTION_START,
- XS_TRANSACTION_END,
- XS_OP_READ_ONLY = XS_TRANSACTION_END,
- XS_INTRODUCE,
- XS_RELEASE,
- XS_GETDOMAINPATH,
- XS_WRITE,
- XS_MKDIR,
- XS_RM,
- XS_SET_PERMS,
- XS_WATCH_EVENT,
- XS_ERROR,
-};
-
-#define XS_WRITE_NONE "NONE"
-#define XS_WRITE_CREATE "CREATE"
-#define XS_WRITE_CREATE_EXCL "CREATE|EXCL"
-
-/* We hand errors as strings, for portability. */
-struct xsd_errors
-{
- int errnum;
- const char *errstring;
-};
-#define XSD_ERROR(x) { x, #x }
-static struct xsd_errors xsd_errors[] __attribute__((unused)) = {
- XSD_ERROR(EINVAL),
- XSD_ERROR(EACCES),
- XSD_ERROR(EEXIST),
- XSD_ERROR(EISDIR),
- XSD_ERROR(ENOENT),
- XSD_ERROR(ENOMEM),
- XSD_ERROR(ENOSPC),
- XSD_ERROR(EIO),
- XSD_ERROR(ENOTEMPTY),
- XSD_ERROR(ENOSYS),
- XSD_ERROR(EROFS),
- XSD_ERROR(EBUSY),
- XSD_ERROR(ETIMEDOUT),
- XSD_ERROR(EISCONN),
-};
-struct xsd_sockmsg
-{
- uint32_t type;
- uint32_t len; /* Length of data following this. */
-
- /* Generally followed by nul-terminated string(s). */
-};
-
-#endif /* _XENSTORED_H */
diff --git a/sys/i386/include/xen/xenvar.h b/sys/i386/include/xen/xenvar.h
index 5694607..484c279 100644
--- a/sys/i386/include/xen/xenvar.h
+++ b/sys/i386/include/xen/xenvar.h
@@ -29,91 +29,8 @@
#ifndef XENVAR_H_
#define XENVAR_H_
-#include <machine/xen/features.h>
-
-#if defined(XEN)
-
-#define XBOOTUP 0x1
-#define XPMAP 0x2
-extern int xendebug_flags;
-#ifndef NOXENDEBUG
-/* Print directly to the Xen console during debugging. */
-#define XENPRINTF xc_printf
-#else
-#define XENPRINTF printf
-#endif
-
-extern xen_pfn_t *xen_phys_machine;
-extern xen_pfn_t *xen_pfn_to_mfn_frame_list[16];
-extern xen_pfn_t *xen_pfn_to_mfn_frame_list_list;
-
-#if 0
-#define TRACE_ENTER XENPRINTF("(file=%s, line=%d) entered %s\n", __FILE__, __LINE__, __FUNCTION__)
-#define TRACE_EXIT XENPRINTF("(file=%s, line=%d) exiting %s\n", __FILE__, __LINE__, __FUNCTION__)
-#define TRACE_DEBUG(argflags, _f, _a...) \
-if (xendebug_flags & argflags) XENPRINTF("(file=%s, line=%d) " _f "\n", __FILE__, __LINE__, ## _a);
-#else
-#define TRACE_ENTER
-#define TRACE_EXIT
-#define TRACE_DEBUG(argflags, _f, _a...)
-#endif
-
-extern xen_pfn_t *xen_machine_phys;
-/* Xen starts physical pages after the 4MB ISA hole -
- * FreeBSD doesn't
- */
-
-
-#undef ADD_ISA_HOLE /* XXX */
-
-#ifdef ADD_ISA_HOLE
-#define ISA_INDEX_OFFSET 1024
-#define ISA_PDR_OFFSET 1
-#else
-#define ISA_INDEX_OFFSET 0
-#define ISA_PDR_OFFSET 0
-#endif
-
-
-#define PFNTOMFN(i) (xen_phys_machine[(i)])
-#define MFNTOPFN(i) ((vm_paddr_t)xen_machine_phys[(i)])
-
-#define VTOP(x) ((((uintptr_t)(x))) - KERNBASE)
-#define PTOV(x) (((uintptr_t)(x)) + KERNBASE)
-
-#define VTOPFN(x) (VTOP(x) >> PAGE_SHIFT)
-#define PFNTOV(x) PTOV((vm_paddr_t)(x) << PAGE_SHIFT)
-
-#define VTOMFN(va) (vtomach(va) >> PAGE_SHIFT)
-#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-
-#define phystomach(pa) (((vm_paddr_t)(PFNTOMFN((pa) >> PAGE_SHIFT))) << PAGE_SHIFT)
-#define machtophys(ma) (((vm_paddr_t)(MFNTOPFN((ma) >> PAGE_SHIFT))) << PAGE_SHIFT)
-
-
-void xpq_init(void);
-
-#define BITS_PER_LONG 32
-#define NR_CPUS XEN_LEGACY_MAX_VCPUS
-
-#define BITS_TO_LONGS(bits) \
- (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
-#define DECLARE_BITMAP(name,bits) \
- unsigned long name[BITS_TO_LONGS(bits)]
-
-int xen_create_contiguous_region(vm_page_t pages, int npages);
-
-void xen_destroy_contiguous_region(void * addr, int npages);
-
-#elif defined(XENHVM)
+#include <xen/features.h>
#define vtomach(va) pmap_kextract((vm_offset_t) (va))
-#define PFNTOMFN(pa) (pa)
-#define MFNTOPFN(ma) (ma)
-
-#define set_phys_to_machine(pfn, mfn) ((void)0)
-#define phys_to_machine_mapping_valid(pfn) (TRUE)
-
-#endif /* !XEN && !XENHVM */
#endif
diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c
index b0ea4e7..a7113e2 100644
--- a/sys/i386/isa/npx.c
+++ b/sys/i386/isa/npx.c
@@ -69,10 +69,6 @@ __FBSDID("$FreeBSD$");
#include <machine/ucontext.h>
#include <machine/intr_machdep.h>
-#ifdef XEN
-#include <xen/xen-os.h>
-#include <xen/hypervisor.h>
-#endif
#ifdef DEV_ISA
#include <isa/isavar.h>
@@ -157,13 +153,8 @@ void xsaveopt(char *addr, uint64_t mask);
#endif /* __GNUCLIKE_ASM && !lint */
-#ifdef XEN
-#define start_emulating() (HYPERVISOR_fpu_taskswitch(1))
-#define stop_emulating() (HYPERVISOR_fpu_taskswitch(0))
-#else
#define start_emulating() load_cr0(rcr0() | CR0_TS)
#define stop_emulating() clts()
-#endif
#ifdef CPU_ENABLE_SSE
#define GET_FPU_CW(thread) \
diff --git a/sys/i386/pci/pci_cfgreg.c b/sys/i386/pci/pci_cfgreg.c
index 5d57e89..2716a7a 100644
--- a/sys/i386/pci/pci_cfgreg.c
+++ b/sys/i386/pci/pci_cfgreg.c
@@ -93,9 +93,7 @@ static uint32_t pci_docfgregread(int bus, int slot, int func, int reg,
int bytes);
static int pcireg_cfgread(int bus, int slot, int func, int reg, int bytes);
static void pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes);
-#ifndef XEN
static int pcireg_cfgopen(void);
-#endif
static int pciereg_cfgread(int bus, unsigned slot, unsigned func,
unsigned reg, unsigned bytes);
static void pciereg_cfgwrite(int bus, unsigned slot, unsigned func,
@@ -116,7 +114,6 @@ pci_i386_map_intline(int line)
return (line);
}
-#ifndef XEN
static u_int16_t
pcibios_get_version(void)
{
@@ -137,7 +134,6 @@ pcibios_get_version(void)
}
return (args.ebx & 0xffff);
}
-#endif
/*
* Initialise access to PCI configuration space
@@ -145,9 +141,6 @@ pcibios_get_version(void)
int
pci_cfgregopen(void)
{
-#ifdef XEN
- return (0);
-#else
static int opened = 0;
uint64_t pciebar;
u_int16_t vid, did;
@@ -202,7 +195,6 @@ pci_cfgregopen(void)
}
return(1);
-#endif
}
static uint32_t
@@ -390,7 +382,6 @@ pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes)
mtx_unlock_spin(&pcicfg_mtx);
}
-#ifndef XEN
/* check whether the configuration mechanism has been correctly identified */
static int
pci_cfgcheck(int maxdev)
@@ -607,7 +598,6 @@ pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus)
return (1);
}
-#endif /* !XEN */
#define PCIE_PADDR(base, reg, bus, slot, func) \
((base) + \
diff --git a/sys/i386/pci/pci_pir.c b/sys/i386/pci/pci_pir.c
index 0d64cab..6aeaae3 100644
--- a/sys/i386/pci/pci_pir.c
+++ b/sys/i386/pci/pci_pir.c
@@ -137,9 +137,6 @@ pci_pir_open(void)
int i;
uint8_t ck, *cv;
-#ifdef XEN
- return;
-#else
/* Don't try if we've already found a table. */
if (pci_route_table != NULL)
return;
@@ -150,7 +147,7 @@ pci_pir_open(void)
sigaddr = bios_sigsearch(0, "_PIR", 4, 16, 0);
if (sigaddr == 0)
return;
-#endif
+
/* If we found something, check the checksum and length. */
/* XXX - Use pmap_mapdev()? */
pt = (struct PIR_table *)(uintptr_t)BIOS_PADDRTOVADDR(sigaddr);
@@ -481,11 +478,7 @@ pci_pir_biosroute(int bus, int device, int func, int pin, int irq)
args.eax = PCIBIOS_ROUTE_INTERRUPT;
args.ebx = (bus << 8) | (device << 3) | func;
args.ecx = (irq << 8) | (0xa + pin);
-#ifdef XEN
- return (0);
-#else
return (bios32(&args, PCIbios.ventry, GSEL(GCODE_SEL, SEL_KPL)));
-#endif
}
diff --git a/sys/i386/xen/clock.c b/sys/i386/xen/clock.c
deleted file mode 100644
index ffb436e..0000000
--- a/sys/i386/xen/clock.c
+++ /dev/null
@@ -1,570 +0,0 @@
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz and Don Ahn.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)clock.c 7.2 (Berkeley) 5/12/91
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/* #define DELAYDEBUG */
-/*
- * Routines to handle clock hardware.
- */
-
-#include "opt_ddb.h"
-#include "opt_clock.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/clock.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/time.h>
-#include <sys/timeet.h>
-#include <sys/timetc.h>
-#include <sys/kernel.h>
-#include <sys/limits.h>
-#include <sys/sysctl.h>
-#include <sys/cons.h>
-#include <sys/power.h>
-
-#include <machine/clock.h>
-#include <machine/cputypes.h>
-#include <machine/frame.h>
-#include <machine/intr_machdep.h>
-#include <machine/md_var.h>
-#include <machine/psl.h>
-#include <machine/pvclock.h>
-#if defined(SMP)
-#include <machine/smp.h>
-#endif
-#include <machine/specialreg.h>
-#include <machine/timerreg.h>
-
-#include <x86/isa/icu.h>
-#include <isa/isareg.h>
-#include <isa/rtc.h>
-
-#include <vm/vm.h>
-#include <vm/pmap.h>
-#include <machine/pmap.h>
-#include <xen/hypervisor.h>
-#include <xen/xen-os.h>
-#include <machine/xen/xenfunc.h>
-#include <xen/interface/vcpu.h>
-#include <machine/cpu.h>
-#include <xen/xen_intr.h>
-
-/*
- * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
- * can use a simple formula for leap years.
- */
-#define LEAPYEAR(y) (!((y) % 4))
-#define DAYSPERYEAR (28+30*4+31*7)
-
-#ifndef TIMER_FREQ
-#define TIMER_FREQ 1193182
-#endif
-
-#ifdef CYC2NS_SCALE_FACTOR
-#undef CYC2NS_SCALE_FACTOR
-#endif
-#define CYC2NS_SCALE_FACTOR 10
-
-/* Values for timerX_state: */
-#define RELEASED 0
-#define RELEASE_PENDING 1
-#define ACQUIRED 2
-#define ACQUIRE_PENDING 3
-
-struct mtx clock_lock;
-#define RTC_LOCK_INIT \
- mtx_init(&clock_lock, "clk", NULL, MTX_SPIN | MTX_NOPROFILE)
-#define RTC_LOCK mtx_lock_spin(&clock_lock)
-#define RTC_UNLOCK mtx_unlock_spin(&clock_lock)
-#define NS_PER_TICK (1000000000ULL/hz)
-
-int adjkerntz; /* local offset from UTC in seconds */
-int clkintr_pending;
-int pscnt = 1;
-int psdiv = 1;
-int wall_cmos_clock;
-u_int timer_freq = TIMER_FREQ;
-static u_long cyc2ns_scale;
-static uint64_t processed_system_time; /* stime (ns) at last processing. */
-
-#define do_div(n,base) ({ \
- unsigned long __upper, __low, __high, __mod, __base; \
- __base = (base); \
- __asm("":"=a" (__low), "=d" (__high):"A" (n)); \
- __upper = __high; \
- if (__high) { \
- __upper = __high % (__base); \
- __high = __high / (__base); \
- } \
- __asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (__base), "0" (__low), "1" (__upper)); \
- __asm("":"=A" (n):"a" (__low),"d" (__high)); \
- __mod; \
-})
-
-
-/* convert from cycles(64bits) => nanoseconds (64bits)
- * basic equation:
- * ns = cycles / (freq / ns_per_sec)
- * ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_mhz * 10^6))
- * ns = cycles * (10^3 / cpu_mhz)
- *
- * Then we use scaling math (suggested by george@mvista.com) to get:
- * ns = cycles * (10^3 * SC / cpu_mhz) / SC
- * ns = cycles * cyc2ns_scale / SC
- *
- * And since SC is a constant power of two, we can convert the div
- * into a shift.
- * -johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
-{
- cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return ((cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR);
-}
-
-static uint32_t
-getit(void)
-{
- return (pvclock_get_last_cycles());
-}
-
-
-/*
- * XXX: timer needs more SMP work.
- */
-void
-i8254_init(void)
-{
-
- RTC_LOCK_INIT;
-}
-
-/*
- * Wait "n" microseconds.
- * Relies on timer 1 counting down from (timer_freq / hz)
- * Note: timer had better have been programmed before this is first used!
- */
-void
-i8254_delay(int n)
-{
- int delta, ticks_left;
- uint32_t tick, prev_tick;
-#ifdef DELAYDEBUG
- int getit_calls = 1;
- int n1;
- static int state = 0;
-
- if (state == 0) {
- state = 1;
- for (n1 = 1; n1 <= 10000000; n1 *= 10)
- DELAY(n1);
- state = 2;
- }
- if (state == 1)
- printf("DELAY(%d)...", n);
-#endif
- /*
- * Read the counter first, so that the rest of the setup overhead is
- * counted. Guess the initial overhead is 20 usec (on most systems it
- * takes about 1.5 usec for each of the i/o's in getit(). The loop
- * takes about 6 usec on a 486/33 and 13 usec on a 386/20. The
- * multiplications and divisions to scale the count take a while).
- *
- * However, if ddb is active then use a fake counter since reading
- * the i8254 counter involves acquiring a lock. ddb must not go
- * locking for many reasons, but it calls here for at least atkbd
- * input.
- */
- prev_tick = getit();
-
- n -= 0; /* XXX actually guess no initial overhead */
- /*
- * Calculate (n * (timer_freq / 1e6)) without using floating point
- * and without any avoidable overflows.
- */
- if (n <= 0)
- ticks_left = 0;
- else if (n < 256)
- /*
- * Use fixed point to avoid a slow division by 1000000.
- * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest.
- * 2^15 is the first power of 2 that gives exact results
- * for n between 0 and 256.
- */
- ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15;
- else
- /*
- * Don't bother using fixed point, although gcc-2.7.2
- * generates particularly poor code for the long long
- * division, since even the slow way will complete long
- * before the delay is up (unless we're interrupted).
- */
- ticks_left = ((u_int)n * (long long)timer_freq + 999999)
- / 1000000;
-
- while (ticks_left > 0) {
- tick = getit();
-#ifdef DELAYDEBUG
- ++getit_calls;
-#endif
- delta = tick - prev_tick;
- prev_tick = tick;
- if (delta < 0) {
- /*
- * Guard against timer0_max_count being wrong.
- * This shouldn't happen in normal operation,
- * but it may happen if set_timer_freq() is
- * traced.
- */
- /* delta += timer0_max_count; ??? */
- if (delta < 0)
- delta = 0;
- }
- ticks_left -= delta;
- }
-#ifdef DELAYDEBUG
- if (state == 1)
- printf(" %d calls to getit() at %d usec each\n",
- getit_calls, (n + 5) / getit_calls);
-#endif
-}
-
-void
-startrtclock()
-{
- uint64_t __cpu_khz;
- uint32_t cpu_khz;
- struct vcpu_time_info *info;
-
- __cpu_khz = 1000000ULL << 32;
- info = &HYPERVISOR_shared_info->vcpu_info[0].time;
-
- (void)do_div(__cpu_khz, info->tsc_to_system_mul);
- if ( info->tsc_shift < 0 )
- cpu_khz = __cpu_khz << -info->tsc_shift;
- else
- cpu_khz = __cpu_khz >> info->tsc_shift;
-
- printf("Xen reported: %u.%03u MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
-
- /* (10^6 * 2^32) / cpu_hz = (10^3 * 2^32) / cpu_khz =
- (2^32 * 1 / (clocks/us)) */
-
- set_cyc2ns_scale(cpu_khz/1000);
- tsc_freq = cpu_khz * 1000;
-}
-
-/*
- * RTC support routines
- */
-
-
-static __inline int
-readrtc(int port)
-{
- return(bcd2bin(rtcin(port)));
-}
-
-
-#ifdef XEN_PRIVILEGED_GUEST
-
-/*
- * Initialize the time of day register, based on the time base which is, e.g.
- * from a filesystem.
- */
-static void
-domu_inittodr(time_t base)
-{
- unsigned long sec;
- int s, y;
- struct timespec ts;
-
- update_wallclock();
- add_uptime_to_wallclock();
-
- RTC_LOCK;
-
- if (base) {
- ts.tv_sec = base;
- ts.tv_nsec = 0;
- tc_setclock(&ts);
- }
-
- sec += tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
-
- y = time_second - shadow_tv.tv_sec;
- if (y <= -2 || y >= 2) {
- /* badly off, adjust it */
- tc_setclock(&shadow_tv);
- }
- RTC_UNLOCK;
-}
-
-/*
- * Write system time back to RTC.
- */
-static void
-domu_resettodr(void)
-{
- unsigned long tm;
- int s;
- dom0_op_t op;
- struct shadow_time_info *shadow;
- struct pcpu *pc;
-
- pc = pcpu_find(smp_processor_id());
- shadow = &pc->pc_shadow_time;
- if (xen_disable_rtc_set)
- return;
-
- s = splclock();
- tm = time_second;
- splx(s);
-
- tm -= tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
-
- if ((xen_start_info->flags & SIF_INITDOMAIN) &&
- !independent_wallclock)
- {
- op.cmd = DOM0_SETTIME;
- op.u.settime.secs = tm;
- op.u.settime.nsecs = 0;
- op.u.settime.system_time = shadow->system_timestamp;
- HYPERVISOR_dom0_op(&op);
- update_wallclock();
- add_uptime_to_wallclock();
- } else if (independent_wallclock) {
- /* notyet */
- ;
- }
-}
-
-/*
- * Initialize the time of day register, based on the time base which is, e.g.
- * from a filesystem.
- */
-void
-inittodr(time_t base)
-{
- unsigned long sec, days;
- int year, month;
- int y, m, s;
- struct timespec ts;
-
- if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
- domu_inittodr(base);
- return;
- }
-
- if (base) {
- s = splclock();
- ts.tv_sec = base;
- ts.tv_nsec = 0;
- tc_setclock(&ts);
- splx(s);
- }
-
- /* Look if we have a RTC present and the time is valid */
- if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
- goto wrong_time;
-
- /* wait for time update to complete */
- /* If RTCSA_TUP is zero, we have at least 244us before next update */
- s = splhigh();
- while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
- splx(s);
- s = splhigh();
- }
-
- days = 0;
-#ifdef USE_RTC_CENTURY
- year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
-#else
- year = readrtc(RTC_YEAR) + 1900;
- if (year < 1970)
- year += 100;
-#endif
- if (year < 1970) {
- splx(s);
- goto wrong_time;
- }
- month = readrtc(RTC_MONTH);
- for (m = 1; m < month; m++)
- days += daysinmonth[m-1];
- if ((month > 2) && LEAPYEAR(year))
- days ++;
- days += readrtc(RTC_DAY) - 1;
- for (y = 1970; y < year; y++)
- days += DAYSPERYEAR + LEAPYEAR(y);
- sec = ((( days * 24 +
- readrtc(RTC_HRS)) * 60 +
- readrtc(RTC_MIN)) * 60 +
- readrtc(RTC_SEC));
- /* sec now contains the number of seconds, since Jan 1 1970,
- in the local time zone */
-
- sec += tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
-
- y = time_second - sec;
- if (y <= -2 || y >= 2) {
- /* badly off, adjust it */
- ts.tv_sec = sec;
- ts.tv_nsec = 0;
- tc_setclock(&ts);
- }
- splx(s);
- return;
-
- wrong_time:
- printf("Invalid time in real time clock.\n");
- printf("Check and reset the date immediately!\n");
-}
-
-
-/*
- * Write system time back to RTC
- */
-void
-resettodr()
-{
- unsigned long tm;
- int y, m, s;
-
- if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
- domu_resettodr();
- return;
- }
-
- if (xen_disable_rtc_set)
- return;
-
- s = splclock();
- tm = time_second;
- splx(s);
-
- /* Disable RTC updates and interrupts. */
- writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
-
- /* Calculate local time to put in RTC */
-
- tm -= tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
-
- writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */
- writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */
- writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */
-
- /* We have now the days since 01-01-1970 in tm */
- writertc(RTC_WDAY, (tm + 4) % 7 + 1); /* Write back Weekday */
- for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
- tm >= m;
- y++, m = DAYSPERYEAR + LEAPYEAR(y))
- tm -= m;
-
- /* Now we have the years in y and the day-of-the-year in tm */
- writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */
-#ifdef USE_RTC_CENTURY
- writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */
-#endif
- for (m = 0; ; m++) {
- int ml;
-
- ml = daysinmonth[m];
- if (m == 1 && LEAPYEAR(y))
- ml++;
- if (tm < ml)
- break;
- tm -= ml;
- }
-
- writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */
- writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */
-
- /* Reenable RTC updates and interrupts. */
- writertc(RTC_STATUSB, RTCSB_24HR);
- rtcin(RTC_INTR);
-}
-#endif
-
-/*
- * Start clocks running.
- */
-void
-cpu_initclocks(void)
-{
- cpu_initclocks_bsp();
-}
-
-/* Return system time offset by ticks */
-uint64_t
-get_system_time(int ticks)
-{
- return (processed_system_time + (ticks * NS_PER_TICK));
-}
-
-int
-timer_spkr_acquire(void)
-{
-
- return (0);
-}
-
-int
-timer_spkr_release(void)
-{
-
- return (0);
-}
-
-void
-timer_spkr_setfreq(int freq)
-{
-
-}
-
diff --git a/sys/i386/xen/exception.s b/sys/i386/xen/exception.s
deleted file mode 100644
index 95f1c0e..0000000
--- a/sys/i386/xen/exception.s
+++ /dev/null
@@ -1,494 +0,0 @@
-/*-
- * Copyright (c) 1989, 1990 William F. Jolitz.
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#include "opt_apic.h"
-#include "opt_npx.h"
-
-#include <machine/asmacros.h>
-#include <machine/psl.h>
-#include <machine/trap.h>
-
-#include "assym.s"
-
-#define SEL_RPL_MASK 0x0002
-#define __HYPERVISOR_iret 23
-
-/* Offsets into shared_info_t. */
-
-#define evtchn_upcall_pending /* 0 */
-#define evtchn_upcall_mask 1
-
-#define sizeof_vcpu_shift 6
-
-
-#ifdef SMP
-#define GET_VCPU_INFO(reg) movl PCPU(CPUID),reg ; \
- shl $sizeof_vcpu_shift,reg ; \
- addl HYPERVISOR_shared_info,reg
-#else
-#define GET_VCPU_INFO(reg) movl HYPERVISOR_shared_info,reg
-#endif
-
-#define __DISABLE_INTERRUPTS(reg) movb $1,evtchn_upcall_mask(reg)
-#define __ENABLE_INTERRUPTS(reg) movb $0,evtchn_upcall_mask(reg)
-#define DISABLE_INTERRUPTS(reg) GET_VCPU_INFO(reg) ; \
- __DISABLE_INTERRUPTS(reg)
-#define ENABLE_INTERRUPTS(reg) GET_VCPU_INFO(reg) ; \
- __ENABLE_INTERRUPTS(reg)
-#define __TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(reg)
-
-#define POPA \
- popl %edi; \
- popl %esi; \
- popl %ebp; \
- popl %ebx; \
- popl %ebx; \
- popl %edx; \
- popl %ecx; \
- popl %eax;
-
- .text
-
-/*****************************************************************************/
-/* Trap handling */
-/*****************************************************************************/
-/*
- * Trap and fault vector routines.
- *
- * Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on
- * the stack that mostly looks like an interrupt, but does not disable
- * interrupts. A few of the traps we are use are interrupt gates,
- * SDT_SYS386IGT, which are nearly the same thing except interrupts are
- * disabled on entry.
- *
- * The cpu will push a certain amount of state onto the kernel stack for
- * the current process. The amount of state depends on the type of trap
- * and whether the trap crossed rings or not. See i386/include/frame.h.
- * At the very least the current EFLAGS (status register, which includes
- * the interrupt disable state prior to the trap), the code segment register,
- * and the return instruction pointer are pushed by the cpu. The cpu
- * will also push an 'error' code for certain traps. We push a dummy
- * error code for those traps where the cpu doesn't in order to maintain
- * a consistent frame. We also push a contrived 'trap number'.
- *
- * The cpu does not push the general registers, we must do that, and we
- * must restore them prior to calling 'iret'. The cpu adjusts the %cs and
- * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we
- * must load them with appropriate values for supervisor mode operation.
- */
-
-MCOUNT_LABEL(user)
-MCOUNT_LABEL(btrap)
-
-#define TRAP(a) pushl $(a) ; jmp alltraps
-
-IDTVEC(div)
- pushl $0; TRAP(T_DIVIDE)
-IDTVEC(dbg)
- pushl $0; TRAP(T_TRCTRAP)
-IDTVEC(nmi)
- pushl $0; TRAP(T_NMI)
-IDTVEC(bpt)
- pushl $0; TRAP(T_BPTFLT)
-IDTVEC(ofl)
- pushl $0; TRAP(T_OFLOW)
-IDTVEC(bnd)
- pushl $0; TRAP(T_BOUND)
-IDTVEC(ill)
- pushl $0; TRAP(T_PRIVINFLT)
-IDTVEC(dna)
- pushl $0; TRAP(T_DNA)
-IDTVEC(fpusegm)
- pushl $0; TRAP(T_FPOPFLT)
-IDTVEC(tss)
- TRAP(T_TSSFLT)
-IDTVEC(missing)
- TRAP(T_SEGNPFLT)
-IDTVEC(stk)
- TRAP(T_STKFLT)
-IDTVEC(prot)
- TRAP(T_PROTFLT)
-IDTVEC(page)
- TRAP(T_PAGEFLT)
-IDTVEC(mchk)
- pushl $0; TRAP(T_MCHK)
-IDTVEC(rsvd)
- pushl $0; TRAP(T_RESERVED)
-IDTVEC(fpu)
- pushl $0; TRAP(T_ARITHTRAP)
-IDTVEC(align)
- TRAP(T_ALIGNFLT)
-IDTVEC(xmm)
- pushl $0; TRAP(T_XMMFLT)
-
-IDTVEC(hypervisor_callback)
- pushl $0;
- pushl $0;
- pushal
- pushl %ds
- pushl %es
- pushl %fs
-upcall_with_regs_pushed:
- SET_KERNEL_SREGS
- FAKE_MCOUNT(TF_EIP(%esp))
-call_evtchn_upcall:
- movl TF_EIP(%esp),%eax
- cmpl $scrit,%eax
- jb 10f
- cmpl $ecrit,%eax
- jb critical_region_fixup
-
-10: pushl %esp
- call xen_intr_handle_upcall
- addl $4,%esp
-
- /*
- * Return via doreti to handle ASTs.
- */
- MEXITCOUNT
- jmp doreti
-
-
-hypervisor_callback_pending:
- DISABLE_INTERRUPTS(%esi) /* cli */
- jmp 10b
- /*
- * alltraps entry point. Interrupts are enabled if this was a trap
- * gate (TGT), else disabled if this was an interrupt gate (IGT).
- * Note that int0x80_syscall is a trap gate. Only page faults
- * use an interrupt gate.
- */
- SUPERALIGN_TEXT
- .globl alltraps
- .type alltraps,@function
-alltraps:
- pushal
- pushl %ds
- pushl %es
- pushl %fs
-
-alltraps_with_regs_pushed:
- SET_KERNEL_SREGS
- FAKE_MCOUNT(TF_EIP(%esp))
-
-calltrap:
- push %esp
- call trap
- add $4, %esp
-
- /*
- * Return via doreti to handle ASTs.
- */
- MEXITCOUNT
- jmp doreti
-
-/*
- * SYSCALL CALL GATE (old entry point for a.out binaries)
- *
- * The intersegment call has been set up to specify one dummy parameter.
- *
- * This leaves a place to put eflags so that the call frame can be
- * converted to a trap frame. Note that the eflags is (semi-)bogusly
- * pushed into (what will be) tf_err and then copied later into the
- * final spot. It has to be done this way because esp can't be just
- * temporarily altered for the pushfl - an interrupt might come in
- * and clobber the saved cs/eip.
- */
- SUPERALIGN_TEXT
-IDTVEC(lcall_syscall)
- pushfl /* save eflags */
- popl 8(%esp) /* shuffle into tf_eflags */
- pushl $7 /* sizeof "lcall 7,0" */
- subl $4,%esp /* skip over tf_trapno */
- pushal
- pushl %ds
- pushl %es
- pushl %fs
- SET_KERNEL_SREGS
- FAKE_MCOUNT(TF_EIP(%esp))
- pushl %esp
- call syscall
- add $4, %esp
- MEXITCOUNT
- jmp doreti
-
-/*
- * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80)
- *
- * Even though the name says 'int0x80', this is actually a TGT (trap gate)
- * rather then an IGT (interrupt gate). Thus interrupts are enabled on
- * entry just as they are for a normal syscall.
- */
- SUPERALIGN_TEXT
-IDTVEC(int0x80_syscall)
- pushl $2 /* sizeof "int 0x80" */
- pushl $0xBEEF /* for debug */
- pushal
- pushl %ds
- pushl %es
- pushl %fs
- SET_KERNEL_SREGS
- FAKE_MCOUNT(TF_EIP(%esp))
- pushl %esp
- call syscall
- add $4, %esp
- MEXITCOUNT
- jmp doreti
-
-ENTRY(fork_trampoline)
- pushl %esp /* trapframe pointer */
- pushl %ebx /* arg1 */
- pushl %esi /* function */
- call fork_exit
- addl $12,%esp
- /* cut from syscall */
-
- /*
- * Return via doreti to handle ASTs.
- */
- MEXITCOUNT
- jmp doreti
-
-
-/*
- * To efficiently implement classification of trap and interrupt handlers
- * for profiling, there must be only trap handlers between the labels btrap
- * and bintr, and only interrupt handlers between the labels bintr and
- * eintr. This is implemented (partly) by including files that contain
- * some of the handlers. Before including the files, set up a normal asm
- * environment so that the included files doen't need to know that they are
- * included.
- */
-
- .data
- .p2align 4
- .text
- SUPERALIGN_TEXT
-MCOUNT_LABEL(bintr)
-
-#ifdef DEV_APIC
- .data
- .p2align 4
- .text
- SUPERALIGN_TEXT
-
-#include <i386/i386/apic_vector.s>
-#endif
-
- .data
- .p2align 4
- .text
- SUPERALIGN_TEXT
-#include <i386/i386/vm86bios.s>
-
- .text
-MCOUNT_LABEL(eintr)
-
-/*
- * void doreti(struct trapframe)
- *
- * Handle return from interrupts, traps and syscalls.
- */
- .text
- SUPERALIGN_TEXT
- .type doreti,@function
-doreti:
- FAKE_MCOUNT($bintr) /* init "from" bintr -> doreti */
-doreti_next:
-#ifdef notyet
- /*
- * Check if ASTs can be handled now. PSL_VM must be checked first
- * since segment registers only have an RPL in non-VM86 mode.
- */
- testl $PSL_VM,TF_EFLAGS(%esp) /* are we in vm86 mode? */
- jz doreti_notvm86
- movl PCPU(CURPCB),%ecx
- testl $PCB_VM86CALL,PCB_FLAGS(%ecx) /* are we in a vm86 call? */
- jz doreti_ast /* can handle ASTS now if not */
- jmp doreti_exit
-
-doreti_notvm86:
-#endif
- testb $SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */
- jz doreti_exit /* can't handle ASTs now if not */
-
-doreti_ast:
- /*
- * Check for ASTs atomically with returning. Disabling CPU
- * interrupts provides sufficient locking even in the SMP case,
- * since we will be informed of any new ASTs by an IPI.
- */
- DISABLE_INTERRUPTS(%esi) /* cli */
- movl PCPU(CURTHREAD),%eax
- testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%eax)
- je doreti_exit
- ENABLE_INTERRUPTS(%esi) /* sti */
- pushl %esp /* pass a pointer to the trapframe */
- call ast
- add $4,%esp
- jmp doreti_ast
-
- /*
- * doreti_exit: pop registers, iret.
- *
- * The segment register pop is a special case, since it may
- * fault if (for example) a sigreturn specifies bad segment
- * registers. The fault is handled in trap.c.
- */
-doreti_exit:
- ENABLE_INTERRUPTS(%esi) # reenable event callbacks (sti)
-
- .globl scrit
-scrit:
- __TEST_PENDING(%esi)
- jnz hypervisor_callback_pending /* More to go */
-
- MEXITCOUNT
-
- .globl doreti_popl_fs
-doreti_popl_fs:
- popl %fs
- .globl doreti_popl_es
-doreti_popl_es:
- popl %es
- .globl doreti_popl_ds
-doreti_popl_ds:
- popl %ds
-
- /*
- * This is important: as nothing is atomic over here (we can get
- * interrupted any time), we use the critical_region_fixup() in
- * order to figure out where out stack is. Therefore, do NOT use
- * 'popal' here without fixing up the table!
- */
- POPA
- addl $8,%esp
- .globl doreti_iret
-doreti_iret:
- jmp hypercall_page + (__HYPERVISOR_iret * 32)
- .globl ecrit
-ecrit:
- /*
- * doreti_iret_fault and friends. Alternative return code for
- * the case where we get a fault in the doreti_exit code
- * above. trap() (i386/i386/trap.c) catches this specific
- * case, sends the process a signal and continues in the
- * corresponding place in the code below.
- */
- ALIGN_TEXT
- .globl doreti_iret_fault
-doreti_iret_fault:
- subl $8,%esp
- pushal
- pushl %ds
- .globl doreti_popl_ds_fault
-doreti_popl_ds_fault:
- pushl %es
- .globl doreti_popl_es_fault
-doreti_popl_es_fault:
- pushl %fs
- .globl doreti_popl_fs_fault
-doreti_popl_fs_fault:
- movl $0,TF_ERR(%esp) /* XXX should be the error code */
- movl $T_PROTFLT,TF_TRAPNO(%esp)
- jmp alltraps_with_regs_pushed
-
- /*
-# [How we do the fixup]. We want to merge the current stack frame with the
-# just-interrupted frame. How we do this depends on where in the critical
-# region the interrupted handler was executing, and so how many saved
-# registers are in each frame. We do this quickly using the lookup table
-# 'critical_fixup_table'. For each byte offset in the critical region, it
-# provides the number of bytes which have already been popped from the
-# interrupted stack frame.
-*/
-
-.globl critical_region_fixup
-critical_region_fixup:
- addl $critical_fixup_table-scrit,%eax
- movzbl (%eax),%eax # %eax contains num bytes popped
- movl %esp,%esi
- add %eax,%esi # %esi points at end of src region
- movl %esp,%edi
- add $0x40,%edi # %edi points at end of dst region
- movl %eax,%ecx
- shr $2,%ecx # convert bytes to words
- je 16f # skip loop if nothing to copy
-15: subl $4,%esi # pre-decrementing copy loop
- subl $4,%edi
- movl (%esi),%eax
- movl %eax,(%edi)
- loop 15b
-16: movl %edi,%esp # final %edi is top of merged stack
- jmp hypervisor_callback_pending
-
-
-critical_fixup_table:
-.byte 0x0,0x0,0x0 #testb $0x1,(%esi)
-.byte 0x0,0x0,0x0,0x0,0x0,0x0 #jne ea
-.byte 0x0,0x0 #pop %fs
-.byte 0x04 #pop %es
-.byte 0x08 #pop %ds
-.byte 0x0c #pop %edi
-.byte 0x10 #pop %esi
-.byte 0x14 #pop %ebp
-.byte 0x18 #pop %ebx
-.byte 0x1c #pop %ebx
-.byte 0x20 #pop %edx
-.byte 0x24 #pop %ecx
-.byte 0x28 #pop %eax
-.byte 0x2c,0x2c,0x2c #add $0x8,%esp
-#if 0
- .byte 0x34 #iret
-#endif
-.byte 0x34,0x34,0x34,0x34,0x34 #HYPERVISOR_iret
-
-
-/* # Hypervisor uses this for application faults while it executes.*/
-ENTRY(failsafe_callback)
- pushal
- call xen_failsafe_handler
-/*# call install_safe_pf_handler */
- movl 28(%esp),%ebx
-1: movl %ebx,%ds
- movl 32(%esp),%ebx
-2: movl %ebx,%es
- movl 36(%esp),%ebx
-3: movl %ebx,%fs
- movl 40(%esp),%ebx
-4: movl %ebx,%gs
-/*# call install_normal_pf_handler */
- popal
- addl $12,%esp
- iret
-
-
diff --git a/sys/i386/xen/locore.s b/sys/i386/xen/locore.s
deleted file mode 100644
index 7e67684..0000000
--- a/sys/i386/xen/locore.s
+++ /dev/null
@@ -1,360 +0,0 @@
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)locore.s 7.3 (Berkeley) 5/13/91
- * $FreeBSD$
- *
- * originally from: locore.s, by William F. Jolitz
- *
- * Substantially rewritten by David Greenman, Rod Grimes,
- * Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp
- * and many others.
- */
-
-#include "opt_bootp.h"
-#include "opt_compat.h"
-#include "opt_nfsroot.h"
-#include "opt_pmap.h"
-
-#include <sys/syscall.h>
-#include <sys/reboot.h>
-
-#include <machine/asmacros.h>
-#include <machine/cputypes.h>
-#include <machine/psl.h>
-#include <machine/pmap.h>
-#include <machine/specialreg.h>
-
-#define __ASSEMBLY__
-#include <xen/interface/elfnote.h>
-
-/* The defines below have been lifted out of <machine/xen-public/arch-x86_32.h> */
-#define FLAT_RING1_CS 0xe019 /* GDT index 259 */
-#define FLAT_RING1_DS 0xe021 /* GDT index 260 */
-#define KERNEL_CS FLAT_RING1_CS
-#define KERNEL_DS FLAT_RING1_DS
-
-#include "assym.s"
-
-.section __xen_guest
- .ascii "LOADER=generic,GUEST_OS=freebsd,GUEST_VER=7.0,XEN_VER=xen-3.0,BSD_SYMTAB,VIRT_BASE=0xc0000000"
- .byte 0
-
- ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "FreeBSD")
- ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "HEAD")
- ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0")
- ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long, KERNBASE)
- ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, KERNBASE)
- ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long, btext)
- ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long, hypercall_page)
- ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long, XEN_HYPERVISOR_VIRT_START)
-#if 0
- ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel")
-#endif
- ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_page_tables|supervisor_mode_kernel|writable_descriptor_tables")
-
-#ifdef PAE
- ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "yes")
- ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long, PG_V, PG_V)
-#else
- ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "no")
- ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long, PG_V, PG_V)
-#endif
- ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic")
- ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 1)
-
-
-
-/*
- * XXX
- *
- * Note: This version greatly munged to avoid various assembler errors
- * that may be fixed in newer versions of gas. Perhaps newer versions
- * will have more pleasant appearance.
- */
-
-/*
- * PTmap is recursive pagemap at top of virtual address space.
- * Within PTmap, the page directory can be found (third indirection).
- */
- .globl PTmap,PTD,PTDpde
- .set PTmap,(PTDPTDI << PDRSHIFT)
- .set PTD,PTmap + (PTDPTDI * PAGE_SIZE)
- .set PTDpde,PTD + (PTDPTDI * PDESIZE)
-
-/*
- * Compiled KERNBASE location and the kernel load address
- */
- .globl kernbase
- .set kernbase,KERNBASE
- .globl kernload
- .set kernload,KERNLOAD
-
-/*
- * Globals
- */
- .data
- ALIGN_DATA /* just to be sure */
-
- .space 0x2000 /* space for tmpstk - temporary stack */
-tmpstk:
-
- .globl bootinfo
-bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */
-
- .globl KERNend
-KERNend: .long 0 /* phys addr end of kernel (just after bss) */
- .globl physfree
-physfree: .long 0 /* phys addr of next free page */
-
- .globl IdlePTD
-IdlePTD: .long 0 /* phys addr of kernel PTD */
-
-#ifdef PAE
- .globl IdlePDPT
-IdlePDPT: .long 0 /* phys addr of kernel PDPT */
-#endif
-
-#ifdef SMP
- .globl KPTphys
-#endif
-KPTphys: .long 0 /* phys addr of kernel page tables */
- .globl gdtset
-gdtset: .long 0 /* GDT is valid */
-
- .globl proc0kstack
-proc0kstack: .long 0 /* address of proc 0 kstack space */
-p0kpa: .long 0 /* phys addr of proc0's STACK */
-
-vm86phystk: .long 0 /* PA of vm86/bios stack */
-
- .globl vm86paddr, vm86pa
-vm86paddr: .long 0 /* address of vm86 region */
-vm86pa: .long 0 /* phys addr of vm86 region */
-
-#ifdef PC98
- .globl pc98_system_parameter
-pc98_system_parameter:
- .space 0x240
-#endif
-
- .globl avail_space
-avail_space: .long 0
-
-/**********************************************************************
- *
- * Some handy macros
- *
- */
-
-/*
- * We're already in protected mode, so no remapping is needed.
- */
-#define R(foo) (foo)
-
-#define ALLOCPAGES(foo) \
- movl R(physfree), %esi ; \
- movl $((foo)*PAGE_SIZE), %eax ; \
- addl %esi, %eax ; \
- movl %eax, R(physfree) ; \
- movl %esi, %edi ; \
- movl $((foo)*PAGE_SIZE),%ecx ; \
- xorl %eax,%eax ; \
- cld ; \
- rep ; \
- stosb
-
-/*
- * fillkpt
- * eax = page frame address
- * ebx = index into page table
- * ecx = how many pages to map
- * base = base address of page dir/table
- * prot = protection bits
- */
-#define fillkpt(base, prot) \
- shll $PTESHIFT,%ebx ; \
- addl base,%ebx ; \
- orl $PG_V,%eax ; \
- orl prot,%eax ; \
-1: movl %eax,(%ebx) ; \
- addl $PAGE_SIZE,%eax ; /* increment physical address */ \
- addl $PTESIZE,%ebx ; /* next pte */ \
- loop 1b
-
-/*
- * fillkptphys(prot)
- * eax = physical address
- * ecx = how many pages to map
- * prot = protection bits
- */
-#define fillkptphys(prot) \
- movl %eax, %ebx ; \
- shrl $PAGE_SHIFT, %ebx ; \
- fillkpt(R(KPTphys), prot)
-
-/* Temporary stack */
-.space 8192
-tmpstack:
- .long tmpstack, KERNEL_DS
-
- .text
-
-.p2align 12, 0x90
-
-#define HYPERCALL_PAGE_OFFSET 0x1000
-.org HYPERCALL_PAGE_OFFSET
-ENTRY(hypercall_page)
- .cfi_startproc
- .skip 0x1000
- .cfi_endproc
-
-/**********************************************************************
- *
- * This is where the bootblocks start us, set the ball rolling...
- *
- */
-NON_GPROF_ENTRY(btext)
- /* At the end of our stack, we shall have free space - so store it */
- movl %esp,%ebx
- movl %ebx,R(avail_space)
-
- lss tmpstack,%esp
-
- pushl %esi
- call initvalues
- popl %esi
-
- /* Store the CPUID information */
- xorl %eax,%eax
- cpuid # cpuid 0
- movl %eax,R(cpu_high) # highest capability
- movl %ebx,R(cpu_vendor) # store vendor string
- movl %edx,R(cpu_vendor+4)
- movl %ecx,R(cpu_vendor+8)
- movb $0,R(cpu_vendor+12)
-
- movl $1,%eax
- cpuid # cpuid 1
- movl %eax,R(cpu_id) # store cpu_id
- movl %ebx,R(cpu_procinfo) # store cpu_procinfo
- movl %edx,R(cpu_feature) # store cpu_feature
- movl %ecx,R(cpu_feature2) # store cpu_feature2
- rorl $8,%eax # extract family type
- andl $15,%eax
- cmpl $5,%eax
- movl $CPU_686,R(cpu)
-
- movl proc0kstack,%eax
- leal (KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp
- xorl %ebp,%ebp /* mark end of frames */
-#ifdef PAE
- movl IdlePDPT,%esi
-#else
- movl IdlePTD,%esi
-#endif
- movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
- pushl physfree
- call init386
- addl $4, %esp
- call mi_startup
- /* NOTREACHED */
- int $3
-
-/*
- * Signal trampoline, copied to top of user stack
- */
-NON_GPROF_ENTRY(sigcode)
- calll *SIGF_HANDLER(%esp)
- leal SIGF_UC(%esp),%eax /* get ucontext */
- pushl %eax
- testl $PSL_VM,UC_EFLAGS(%eax)
- jne 1f
- mov UC_GS(%eax), %gs /* restore %gs */
-1:
- movl $SYS_sigreturn,%eax
- pushl %eax /* junk to fake return addr. */
- int $0x80 /* enter kernel with args */
- /* on stack */
-1:
- jmp 1b
-
-#ifdef COMPAT_FREEBSD4
- ALIGN_TEXT
-freebsd4_sigcode:
- calll *SIGF_HANDLER(%esp)
- leal SIGF_UC4(%esp),%eax /* get ucontext */
- pushl %eax
- testl $PSL_VM,UC4_EFLAGS(%eax)
- jne 1f
- mov UC4_GS(%eax),%gs /* restore %gs */
-1:
- movl $344,%eax /* 4.x SYS_sigreturn */
- pushl %eax /* junk to fake return addr. */
- int $0x80 /* enter kernel with args */
- /* on stack */
-1:
- jmp 1b
-#endif
-
-#ifdef COMPAT_43
- ALIGN_TEXT
-osigcode:
- call *SIGF_HANDLER(%esp) /* call signal handler */
- lea SIGF_SC(%esp),%eax /* get sigcontext */
- pushl %eax
- testl $PSL_VM,SC_PS(%eax)
- jne 9f
- movl SC_GS(%eax),%gs /* restore %gs */
-9:
- movl $103,%eax /* 3.x SYS_sigreturn */
- pushl %eax /* junk to fake return addr. */
- int $0x80 /* enter kernel with args */
-0: jmp 0b
-#endif /* COMPAT_43 */
-
- ALIGN_TEXT
-esigcode:
-
- .data
- .globl szsigcode
-szsigcode:
- .long esigcode-sigcode
-#ifdef COMPAT_FREEBSD4
- .globl szfreebsd4_sigcode
-szfreebsd4_sigcode:
- .long esigcode-freebsd4_sigcode
-#endif
-#ifdef COMPAT_43
- .globl szosigcode
-szosigcode:
- .long esigcode-osigcode
-#endif
diff --git a/sys/i386/xen/mp_machdep.c b/sys/i386/xen/mp_machdep.c
deleted file mode 100644
index 329bba1..0000000
--- a/sys/i386/xen/mp_machdep.c
+++ /dev/null
@@ -1,1300 +0,0 @@
-/*-
- * Copyright (c) 1996, by Steve Passe
- * Copyright (c) 2008, by Kip Macy
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. The name of the developer may NOT be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include "opt_apic.h"
-#include "opt_cpu.h"
-#include "opt_kstack_pages.h"
-#include "opt_mp_watchdog.h"
-#include "opt_pmap.h"
-#include "opt_sched.h"
-#include "opt_smp.h"
-
-#if !defined(lint)
-#if !defined(SMP)
-#error How did you get here?
-#endif
-
-#ifndef DEV_APIC
-#error The apic device is required for SMP, add "device apic" to your config file.
-#endif
-#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
-#error SMP not supported with CPU_DISABLE_CMPXCHG
-#endif
-#endif /* not lint */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/cons.h> /* cngetc() */
-#include <sys/cpuset.h>
-#ifdef GPROF
-#include <sys/gmon.h>
-#endif
-#include <sys/kernel.h>
-#include <sys/ktr.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/memrange.h>
-#include <sys/mutex.h>
-#include <sys/pcpu.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/sched.h>
-#include <sys/smp.h>
-#include <sys/sysctl.h>
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/pmap.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_extern.h>
-#include <vm/vm_page.h>
-
-#include <x86/apicreg.h>
-#include <machine/md_var.h>
-#include <machine/mp_watchdog.h>
-#include <machine/pcb.h>
-#include <machine/psl.h>
-#include <machine/smp.h>
-#include <machine/specialreg.h>
-#include <machine/pcpu.h>
-
-#include <xen/xen-os.h>
-#include <xen/evtchn.h>
-#include <xen/xen_intr.h>
-#include <xen/hypervisor.h>
-#include <xen/interface/vcpu.h>
-
-/*---------------------------- Extern Declarations ---------------------------*/
-extern struct pcpu __pcpu[];
-
-extern void Xhypervisor_callback(void);
-extern void failsafe_callback(void);
-
-/*--------------------------- Forward Declarations ---------------------------*/
-static driver_filter_t smp_reschedule_interrupt;
-static driver_filter_t smp_call_function_interrupt;
-static void assign_cpu_ids(void);
-static void set_interrupt_apic_ids(void);
-static int start_all_aps(void);
-static int start_ap(int apic_id);
-static void release_aps(void *dummy);
-
-/*---------------------------------- Macros ----------------------------------*/
-#define IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS)
-
-/*-------------------------------- Local Types -------------------------------*/
-typedef void call_data_func_t(uintptr_t , uintptr_t);
-
-struct cpu_info {
- int cpu_present:1;
- int cpu_bsp:1;
- int cpu_disabled:1;
-};
-
-struct xen_ipi_handler
-{
- driver_filter_t *filter;
- const char *description;
-};
-
-enum {
- RESCHEDULE_VECTOR,
- CALL_FUNCTION_VECTOR,
-};
-
-/*-------------------------------- Global Data -------------------------------*/
-static u_int hyperthreading_cpus;
-static cpuset_t hyperthreading_cpus_mask;
-
-int mp_naps; /* # of Applications processors */
-int boot_cpu_id = -1; /* designated BSP */
-
-static int bootAP;
-static union descriptor *bootAPgdt;
-
-/* Free these after use */
-void *bootstacks[MAXCPU];
-
-struct pcb stoppcbs[MAXCPU];
-
-/* Variables needed for SMP tlb shootdown. */
-vm_offset_t smp_tlb_addr1;
-vm_offset_t smp_tlb_addr2;
-volatile int smp_tlb_wait;
-
-static u_int logical_cpus;
-static volatile cpuset_t ipi_nmi_pending;
-
-/* used to hold the AP's until we are ready to release them */
-static struct mtx ap_boot_mtx;
-
-/* Set to 1 once we're ready to let the APs out of the pen. */
-static volatile int aps_ready = 0;
-
-/*
- * Store data from cpu_add() until later in the boot when we actually setup
- * the APs.
- */
-static struct cpu_info cpu_info[MAX_APIC_ID + 1];
-int cpu_apic_ids[MAXCPU];
-int apic_cpuids[MAX_APIC_ID + 1];
-
-/* Holds pending bitmap based IPIs per CPU */
-static volatile u_int cpu_ipi_pending[MAXCPU];
-
-static int cpu_logical;
-static int cpu_cores;
-
-static const struct xen_ipi_handler xen_ipis[] =
-{
- [RESCHEDULE_VECTOR] = { smp_reschedule_interrupt, "resched" },
- [CALL_FUNCTION_VECTOR] = { smp_call_function_interrupt,"callfunc" }
-};
-
-/*------------------------------- Per-CPU Data -------------------------------*/
-DPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]);
-DPCPU_DEFINE(struct vcpu_info *, vcpu_info);
-
-/*------------------------------ Implementation ------------------------------*/
-struct cpu_group *
-cpu_topo(void)
-{
- if (cpu_cores == 0)
- cpu_cores = 1;
- if (cpu_logical == 0)
- cpu_logical = 1;
- if (mp_ncpus % (cpu_cores * cpu_logical) != 0) {
- printf("WARNING: Non-uniform processors.\n");
- printf("WARNING: Using suboptimal topology.\n");
- return (smp_topo_none());
- }
- /*
- * No multi-core or hyper-threaded.
- */
- if (cpu_logical * cpu_cores == 1)
- return (smp_topo_none());
- /*
- * Only HTT no multi-core.
- */
- if (cpu_logical > 1 && cpu_cores == 1)
- return (smp_topo_1level(CG_SHARE_L1, cpu_logical, CG_FLAG_HTT));
- /*
- * Only multi-core no HTT.
- */
- if (cpu_cores > 1 && cpu_logical == 1)
- return (smp_topo_1level(CG_SHARE_NONE, cpu_cores, 0));
- /*
- * Both HTT and multi-core.
- */
- return (smp_topo_2level(CG_SHARE_NONE, cpu_cores,
- CG_SHARE_L1, cpu_logical, CG_FLAG_HTT));
-}
-
-/*
- * Calculate usable address in base memory for AP trampoline code.
- */
-u_int
-mp_bootaddress(u_int basemem)
-{
-
- return (basemem);
-}
-
-void
-cpu_add(u_int apic_id, char boot_cpu)
-{
-
- if (apic_id > MAX_APIC_ID) {
- panic("SMP: APIC ID %d too high", apic_id);
- return;
- }
- KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
- apic_id));
- cpu_info[apic_id].cpu_present = 1;
- if (boot_cpu) {
- KASSERT(boot_cpu_id == -1,
- ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
- boot_cpu_id));
- boot_cpu_id = apic_id;
- cpu_info[apic_id].cpu_bsp = 1;
- }
- if (mp_ncpus < MAXCPU)
- mp_ncpus++;
- if (bootverbose)
- printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
- "AP");
-}
-
-void
-cpu_mp_setmaxid(void)
-{
-
- mp_maxid = MAXCPU - 1;
-}
-
-int
-cpu_mp_probe(void)
-{
-
- /*
- * Always record BSP in CPU map so that the mbuf init code works
- * correctly.
- */
- CPU_SETOF(0, &all_cpus);
- if (mp_ncpus == 0) {
- /*
- * No CPUs were found, so this must be a UP system. Setup
- * the variables to represent a system with a single CPU
- * with an id of 0.
- */
- mp_ncpus = 1;
- return (0);
- }
-
- /* At least one CPU was found. */
- if (mp_ncpus == 1) {
- /*
- * One CPU was found, so this must be a UP system with
- * an I/O APIC.
- */
- return (0);
- }
-
- /* At least two CPUs were found. */
- return (1);
-}
-
-/*
- * Initialize the IPI handlers and start up the AP's.
- */
-void
-cpu_mp_start(void)
-{
- int i;
-
- /* Initialize the logical ID to APIC ID table. */
- for (i = 0; i < MAXCPU; i++) {
- cpu_apic_ids[i] = -1;
- cpu_ipi_pending[i] = 0;
- }
-
- /* Set boot_cpu_id if needed. */
- if (boot_cpu_id == -1) {
- boot_cpu_id = PCPU_GET(apic_id);
- cpu_info[boot_cpu_id].cpu_bsp = 1;
- } else
- KASSERT(boot_cpu_id == PCPU_GET(apic_id),
- ("BSP's APIC ID doesn't match boot_cpu_id"));
- cpu_apic_ids[0] = boot_cpu_id;
- apic_cpuids[boot_cpu_id] = 0;
-
- assign_cpu_ids();
-
- /* Start each Application Processor */
- start_all_aps();
-
- /* Setup the initial logical CPUs info. */
- logical_cpus = 0;
- CPU_ZERO(&logical_cpus_mask);
- if (cpu_feature & CPUID_HTT)
- logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
-
- set_interrupt_apic_ids();
-}
-
-
-static void
-iv_rendezvous(uintptr_t a, uintptr_t b)
-{
- smp_rendezvous_action();
-}
-
-static void
-iv_invltlb(uintptr_t a, uintptr_t b)
-{
- xen_tlb_flush();
-}
-
-static void
-iv_invlpg(uintptr_t a, uintptr_t b)
-{
- xen_invlpg(a);
-}
-
-static void
-iv_invlrng(uintptr_t a, uintptr_t b)
-{
- vm_offset_t start = (vm_offset_t)a;
- vm_offset_t end = (vm_offset_t)b;
-
- while (start < end) {
- xen_invlpg(start);
- start += PAGE_SIZE;
- }
-}
-
-
-static void
-iv_invlcache(uintptr_t a, uintptr_t b)
-{
-
- wbinvd();
- atomic_add_int(&smp_tlb_wait, 1);
-}
-
-/*
- * These start from "IPI offset" APIC_IPI_INTS
- */
-static call_data_func_t *ipi_vectors[5] =
-{
- iv_rendezvous,
- iv_invltlb,
- iv_invlpg,
- iv_invlrng,
- iv_invlcache,
-};
-
-/*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
- */
-static int
-smp_reschedule_interrupt(void *unused)
-{
- int cpu = PCPU_GET(cpuid);
- u_int ipi_bitmap;
-
- ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
-
- if (ipi_bitmap & (1 << IPI_PREEMPT)) {
-#ifdef COUNT_IPIS
- (*ipi_preempt_counts[cpu])++;
-#endif
- sched_preempt(curthread);
- }
-
- if (ipi_bitmap & (1 << IPI_AST)) {
-#ifdef COUNT_IPIS
- (*ipi_ast_counts[cpu])++;
-#endif
- /* Nothing to do for AST */
- }
- return (FILTER_HANDLED);
-}
-
-struct _call_data {
- uint16_t func_id;
- uint16_t wait;
- uintptr_t arg1;
- uintptr_t arg2;
- atomic_t started;
- atomic_t finished;
-};
-
-static struct _call_data *call_data;
-
-static int
-smp_call_function_interrupt(void *unused)
-{
- call_data_func_t *func;
- uintptr_t arg1 = call_data->arg1;
- uintptr_t arg2 = call_data->arg2;
- int wait = call_data->wait;
- atomic_t *started = &call_data->started;
- atomic_t *finished = &call_data->finished;
-
- /* We only handle function IPIs, not bitmap IPIs */
- if (call_data->func_id < APIC_IPI_INTS ||
- call_data->func_id > IPI_BITMAP_VECTOR)
- panic("invalid function id %u", call_data->func_id);
-
- func = ipi_vectors[IPI_TO_IDX(call_data->func_id)];
- /*
- * Notify initiating CPU that I've grabbed the data and am
- * about to execute the function
- */
- mb();
- atomic_inc(started);
- /*
- * At this point the info structure may be out of scope unless wait==1
- */
- (*func)(arg1, arg2);
-
- if (wait) {
- mb();
- atomic_inc(finished);
- }
- atomic_add_int(&smp_tlb_wait, 1);
- return (FILTER_HANDLED);
-}
-
-/*
- * Print various information about the SMP system hardware and setup.
- */
-void
-cpu_mp_announce(void)
-{
- int i, x;
-
- /* List CPUs */
- printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
- for (i = 1, x = 0; x <= MAX_APIC_ID; x++) {
- if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp)
- continue;
- if (cpu_info[x].cpu_disabled)
- printf(" cpu (AP): APIC ID: %2d (disabled)\n", x);
- else {
- KASSERT(i < mp_ncpus,
- ("mp_ncpus and actual cpus are out of whack"));
- printf(" cpu%d (AP): APIC ID: %2d\n", i++, x);
- }
- }
-}
-
-static int
-xen_smp_cpu_init(unsigned int cpu)
-{
- xen_intr_handle_t *ipi_handle;
- const struct xen_ipi_handler *ipi;
- int idx, rc;
-
- ipi_handle = DPCPU_ID_GET(cpu, ipi_handle);
- for (ipi = xen_ipis, idx = 0; idx < nitems(xen_ipis); ipi++, idx++) {
-
- /*
- * The PCPU variable pc_device is not initialized on i386 PV,
- * so we have to use the root_bus device in order to setup
- * the IPIs.
- */
- rc = xen_intr_alloc_and_bind_ipi(root_bus, cpu,
- ipi->filter, INTR_TYPE_TTY, &ipi_handle[idx]);
- if (rc != 0) {
- printf("Unable to allocate a XEN IPI port. "
- "Error %d\n", rc);
- break;
- }
- xen_intr_describe(ipi_handle[idx], "%s", ipi->description);
- }
-
- for (;idx < nitems(xen_ipis); idx++)
- ipi_handle[idx] = NULL;
-
- if (rc == 0)
- return (0);
-
- /* Either all are successfully mapped, or none at all. */
- for (idx = 0; idx < nitems(xen_ipis); idx++) {
- if (ipi_handle[idx] == NULL)
- continue;
-
- xen_intr_unbind(ipi_handle[idx]);
- ipi_handle[idx] = NULL;
- }
-
- return (rc);
-}
-
-static void
-xen_smp_intr_init_cpus(void *unused)
-{
- int i;
-
- for (i = 0; i < mp_ncpus; i++)
- xen_smp_cpu_init(i);
-}
-
-static void
-xen_smp_intr_setup_cpus(void *unused)
-{
- int i;
-
- for (i = 0; i < mp_ncpus; i++)
- DPCPU_ID_SET(i, vcpu_info,
- &HYPERVISOR_shared_info->vcpu_info[i]);
-}
-
-#define MTOPSIZE (1<<(14 + PAGE_SHIFT))
-
-/*
- * AP CPU's call this to initialize themselves.
- */
-void
-init_secondary(void)
-{
- vm_offset_t addr;
- u_int cpuid;
- int gsel_tss;
-
-
- /* bootAP is set in start_ap() to our ID. */
- PCPU_SET(currentldt, _default_ldt);
- gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
-#if 0
- gdt[bootAP * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
-#endif
- PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
- PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
- PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
-#if 0
- PCPU_SET(tss_gdt, &gdt[bootAP * NGDT + GPROC0_SEL].sd);
-
- PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
-#endif
- PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
-
- /*
- * Set to a known state:
- * Set by mpboot.s: CR0_PG, CR0_PE
- * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
- */
- /*
- * signal our startup to the BSP.
- */
- mp_naps++;
-
- /* Spin until the BSP releases the AP's. */
- while (!aps_ready)
- ia32_pause();
-
- /* BSP may have changed PTD while we were waiting */
- invltlb();
- for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE)
- invlpg(addr);
-
-#if 0
- /* set up SSE/NX */
- initializecpu();
-#endif
-
- /* set up FPU state on the AP */
- npxinit(false);
-#if 0
- /* A quick check from sanity claus */
- if (PCPU_GET(apic_id) != lapic_id()) {
- printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
- printf("SMP: actual apic_id = %d\n", lapic_id());
- printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
- panic("cpuid mismatch! boom!!");
- }
-#endif
-
- /* Initialize curthread. */
- KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
- PCPU_SET(curthread, PCPU_GET(idlethread));
-
- mtx_lock_spin(&ap_boot_mtx);
-#if 0
-
- /* Init local apic for irq's */
- lapic_setup(1);
-#endif
- smp_cpus++;
-
- cpuid = PCPU_GET(cpuid);
- CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid);
- printf("SMP: AP CPU #%d Launched!\n", cpuid);
-
- /* Determine if we are a logical CPU. */
- if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
- CPU_SET(cpuid, &logical_cpus_mask);
-
- /* Determine if we are a hyperthread. */
- if (hyperthreading_cpus > 1 &&
- PCPU_GET(apic_id) % hyperthreading_cpus != 0)
- CPU_SET(cpuid, &hyperthreading_cpus_mask);
-#if 0
- if (bootverbose)
- lapic_dump("AP");
-#endif
- if (smp_cpus == mp_ncpus) {
- /* enable IPI's, tlb shootdown, freezes etc */
- atomic_store_rel_int(&smp_started, 1);
- }
-
- mtx_unlock_spin(&ap_boot_mtx);
-
- /* wait until all the AP's are up */
- while (smp_started == 0)
- ia32_pause();
-
- PCPU_SET(curthread, PCPU_GET(idlethread));
-
- /* Start per-CPU event timers. */
- cpu_initclocks_ap();
-
- /* enter the scheduler */
- sched_throw(NULL);
-
- panic("scheduler returned us to %s", __func__);
- /* NOTREACHED */
-}
-
-/*******************************************************************
- * local functions and data
- */
-
-/*
- * We tell the I/O APIC code about all the CPUs we want to receive
- * interrupts. If we don't want certain CPUs to receive IRQs we
- * can simply not tell the I/O APIC code about them in this function.
- * We also do not tell it about the BSP since it tells itself about
- * the BSP internally to work with UP kernels and on UP machines.
- */
-static void
-set_interrupt_apic_ids(void)
-{
- u_int i, apic_id;
-
- for (i = 0; i < MAXCPU; i++) {
- apic_id = cpu_apic_ids[i];
- if (apic_id == -1)
- continue;
- if (cpu_info[apic_id].cpu_bsp)
- continue;
- if (cpu_info[apic_id].cpu_disabled)
- continue;
-
- /* Don't let hyperthreads service interrupts. */
- if (hyperthreading_cpus > 1 &&
- apic_id % hyperthreading_cpus != 0)
- continue;
-
- intr_add_cpu(i);
- }
-}
-
-/*
- * Assign logical CPU IDs to local APICs.
- */
-static void
-assign_cpu_ids(void)
-{
- u_int i;
-
- /* Check for explicitly disabled CPUs. */
- for (i = 0; i <= MAX_APIC_ID; i++) {
- if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
- continue;
-
- /* Don't use this CPU if it has been disabled by a tunable. */
- if (resource_disabled("lapic", i)) {
- cpu_info[i].cpu_disabled = 1;
- continue;
- }
- }
-
- /*
- * Assign CPU IDs to local APIC IDs and disable any CPUs
- * beyond MAXCPU. CPU 0 has already been assigned to the BSP,
- * so we only have to assign IDs for APs.
- */
- mp_ncpus = 1;
- for (i = 0; i <= MAX_APIC_ID; i++) {
- if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
- cpu_info[i].cpu_disabled)
- continue;
-
- if (mp_ncpus < MAXCPU) {
- cpu_apic_ids[mp_ncpus] = i;
- apic_cpuids[i] = mp_ncpus;
- mp_ncpus++;
- } else
- cpu_info[i].cpu_disabled = 1;
- }
- KASSERT(mp_maxid >= mp_ncpus - 1,
- ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
- mp_ncpus));
-}
-
-/*
- * start each AP in our list
- */
-/* Lowest 1MB is already mapped: don't touch*/
-#define TMPMAP_START 1
-int
-start_all_aps(void)
-{
- int x,apic_id, cpu;
- struct pcpu *pc;
-
- mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
-
- /* set up temporary P==V mapping for AP boot */
- /* XXX this is a hack, we should boot the AP on its own stack/PTD */
-
- /* start each AP */
- for (cpu = 1; cpu < mp_ncpus; cpu++) {
- apic_id = cpu_apic_ids[cpu];
-
-
- bootAP = cpu;
- bootAPgdt = gdt + (512*cpu);
-
- /* Get per-cpu data */
- pc = &__pcpu[bootAP];
- pcpu_init(pc, bootAP, sizeof(struct pcpu));
- dpcpu_init((void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
- M_WAITOK | M_ZERO), bootAP);
- pc->pc_apic_id = cpu_apic_ids[bootAP];
- pc->pc_vcpu_id = cpu_apic_ids[bootAP];
- pc->pc_prvspace = pc;
- pc->pc_curthread = 0;
-
- gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
- gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
-
- PT_SET_MA(bootAPgdt, VTOM(bootAPgdt) | PG_V | PG_RW);
- bzero(bootAPgdt, PAGE_SIZE);
- for (x = 0; x < NGDT; x++)
- ssdtosd(&gdt_segs[x], &bootAPgdt[x].sd);
- PT_SET_MA(bootAPgdt, vtomach(bootAPgdt) | PG_V);
-#ifdef notyet
-
- if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) {
- apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
- acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id);
-#ifdef CONFIG_ACPI
- if (acpiid != 0xff)
- x86_acpiid_to_apicid[acpiid] = apicid;
-#endif
- }
-#endif
-
- /* attempt to start the Application Processor */
- if (!start_ap(cpu)) {
- printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
- /* better panic as the AP may be running loose */
- printf("panic y/n? [y] ");
- if (cngetc() != 'n')
- panic("bye-bye");
- }
-
- CPU_SET(cpu, &all_cpus); /* record AP in CPU map */
- }
-
-
- pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
-
- /* number of APs actually started */
- return (mp_naps);
-}
-
-extern uint8_t *pcpu_boot_stack;
-extern trap_info_t trap_table[];
-
-static void
-smp_trap_init(trap_info_t *trap_ctxt)
-{
- const trap_info_t *t = trap_table;
-
- for (t = trap_table; t->address; t++) {
- trap_ctxt[t->vector].flags = t->flags;
- trap_ctxt[t->vector].cs = t->cs;
- trap_ctxt[t->vector].address = t->address;
- }
-}
-
-extern struct rwlock pvh_global_lock;
-extern int nkpt;
-static void
-cpu_initialize_context(unsigned int cpu)
-{
- /* vcpu_guest_context_t is too large to allocate on the stack.
- * Hence we allocate statically and protect it with a lock */
- vm_page_t m[NPGPTD + 2];
- static vcpu_guest_context_t ctxt;
- vm_offset_t boot_stack;
- vm_offset_t newPTD;
- vm_paddr_t ma[NPGPTD];
- int i;
-
- /*
- * Page 0,[0-3] PTD
- * Page 1, [4] boot stack
- * Page [5] PDPT
- *
- */
- for (i = 0; i < NPGPTD + 2; i++) {
- m[i] = vm_page_alloc(NULL, 0,
- VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
- VM_ALLOC_ZERO);
-
- pmap_zero_page(m[i]);
-
- }
- boot_stack = kva_alloc(PAGE_SIZE);
- newPTD = kva_alloc(NPGPTD * PAGE_SIZE);
- ma[0] = VM_PAGE_TO_MACH(m[0])|PG_V;
-
-#ifdef PAE
- pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1]));
- for (i = 0; i < NPGPTD; i++) {
- ((vm_paddr_t *)boot_stack)[i] =
- ma[i] = VM_PAGE_TO_MACH(m[i])|PG_V;
- }
-#endif
-
- /*
- * Copy cpu0 IdlePTD to new IdlePTD - copying only
- * kernel mappings
- */
- pmap_qenter(newPTD, m, 4);
-
- memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t),
- (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t),
- nkpt*sizeof(vm_paddr_t));
-
- pmap_qremove(newPTD, 4);
- kva_free(newPTD, 4 * PAGE_SIZE);
- /*
- * map actual idle stack to boot_stack
- */
- pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD]));
-
-
- xen_pgdpt_pin(VM_PAGE_TO_MACH(m[NPGPTD + 1]));
- rw_wlock(&pvh_global_lock);
- for (i = 0; i < 4; i++) {
- int pdir = (PTDPTDI + i) / NPDEPG;
- int curoffset = (PTDPTDI + i) % NPDEPG;
-
- xen_queue_pt_update((vm_paddr_t)
- ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))),
- ma[i]);
- }
- PT_UPDATES_FLUSH();
- rw_wunlock(&pvh_global_lock);
-
- memset(&ctxt, 0, sizeof(ctxt));
- ctxt.flags = VGCF_IN_KERNEL;
- ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL);
- ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL);
- ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL);
- ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL);
- ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL);
- ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL);
- ctxt.user_regs.eip = (unsigned long)init_secondary;
- ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */
-
- memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
-
- smp_trap_init(ctxt.trap_ctxt);
-
- ctxt.ldt_ents = 0;
- ctxt.gdt_frames[0] =
- (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT);
- ctxt.gdt_ents = 512;
-
-#ifdef __i386__
- ctxt.user_regs.esp = boot_stack + PAGE_SIZE;
-
- ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
- ctxt.kernel_sp = boot_stack + PAGE_SIZE;
-
- ctxt.event_callback_cs = GSEL(GCODE_SEL, SEL_KPL);
- ctxt.event_callback_eip = (unsigned long)Xhypervisor_callback;
- ctxt.failsafe_callback_cs = GSEL(GCODE_SEL, SEL_KPL);
- ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
-
- ctxt.ctrlreg[3] = VM_PAGE_TO_MACH(m[NPGPTD + 1]);
-#else /* __x86_64__ */
- ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
- ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
- ctxt.kernel_sp = idle->thread.rsp0;
-
- ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
- ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
- ctxt.syscall_callback_eip = (unsigned long)system_call;
-
- ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
-
- ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
-#endif
-
- printf("gdtpfn=%lx pdptpfn=%lx\n",
- ctxt.gdt_frames[0],
- ctxt.ctrlreg[3] >> PAGE_SHIFT);
-
- PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt));
- DELAY(3000);
- PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL));
-}
-
-/*
- * This function starts the AP (application processor) identified
- * by the APIC ID 'physicalCpu'. It does quite a "song and dance"
- * to accomplish this. This is necessary because of the nuances
- * of the different hardware we might encounter. It isn't pretty,
- * but it seems to work.
- */
-
-int cpus;
-static int
-start_ap(int apic_id)
-{
- int ms;
-
- /* used as a watchpoint to signal AP startup */
- cpus = mp_naps;
-
- cpu_initialize_context(apic_id);
-
- /* Wait up to 5 seconds for it to start. */
- for (ms = 0; ms < 5000; ms++) {
- if (mp_naps > cpus)
- return (1); /* return SUCCESS */
- DELAY(1000);
- }
- return (0); /* return FAILURE */
-}
-
-static void
-ipi_pcpu(int cpu, u_int ipi)
-{
- KASSERT((ipi <= nitems(xen_ipis)), ("invalid IPI"));
- xen_intr_signal(DPCPU_ID_GET(cpu, ipi_handle[ipi]));
-}
-
-/*
- * send an IPI to a specific CPU.
- */
-static void
-ipi_send_cpu(int cpu, u_int ipi)
-{
- u_int bitmap, old_pending, new_pending;
-
- if (IPI_IS_BITMAPED(ipi)) {
- bitmap = 1 << ipi;
- ipi = IPI_BITMAP_VECTOR;
- do {
- old_pending = cpu_ipi_pending[cpu];
- new_pending = old_pending | bitmap;
- } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
- old_pending, new_pending));
- if (!old_pending)
- ipi_pcpu(cpu, RESCHEDULE_VECTOR);
- } else {
- KASSERT(call_data != NULL, ("call_data not set"));
- ipi_pcpu(cpu, CALL_FUNCTION_VECTOR);
- }
-}
-
-/*
- * Flush the TLB on all other CPU's
- */
-static void
-smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
-{
- u_int ncpu;
- struct _call_data data;
-
- ncpu = mp_ncpus - 1; /* does not shootdown self */
- if (ncpu < 1)
- return; /* no other cpus */
- if (!(read_eflags() & PSL_I))
- panic("%s: interrupts disabled", __func__);
- mtx_lock_spin(&smp_ipi_mtx);
- KASSERT(call_data == NULL, ("call_data isn't null?!"));
- call_data = &data;
- call_data->func_id = vector;
- call_data->arg1 = addr1;
- call_data->arg2 = addr2;
- atomic_store_rel_int(&smp_tlb_wait, 0);
- ipi_all_but_self(vector);
- while (smp_tlb_wait < ncpu)
- ia32_pause();
- call_data = NULL;
- mtx_unlock_spin(&smp_ipi_mtx);
-}
-
-static void
-smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1,
- vm_offset_t addr2)
-{
- int cpu, ncpu, othercpus;
- struct _call_data data;
-
- othercpus = mp_ncpus - 1;
- if (CPU_ISFULLSET(&mask)) {
- if (othercpus < 1)
- return;
- } else {
- CPU_CLR(PCPU_GET(cpuid), &mask);
- if (CPU_EMPTY(&mask))
- return;
- }
- if (!(read_eflags() & PSL_I))
- panic("%s: interrupts disabled", __func__);
- mtx_lock_spin(&smp_ipi_mtx);
- KASSERT(call_data == NULL, ("call_data isn't null?!"));
- call_data = &data;
- call_data->func_id = vector;
- call_data->arg1 = addr1;
- call_data->arg2 = addr2;
- atomic_store_rel_int(&smp_tlb_wait, 0);
- if (CPU_ISFULLSET(&mask)) {
- ncpu = othercpus;
- ipi_all_but_self(vector);
- } else {
- ncpu = 0;
- while ((cpu = CPU_FFS(&mask)) != 0) {
- cpu--;
- CPU_CLR(cpu, &mask);
- CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu,
- vector);
- ipi_send_cpu(cpu, vector);
- ncpu++;
- }
- }
- while (smp_tlb_wait < ncpu)
- ia32_pause();
- call_data = NULL;
- mtx_unlock_spin(&smp_ipi_mtx);
-}
-
-void
-smp_cache_flush(void)
-{
-
- if (smp_started)
- smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
-}
-
-void
-smp_invltlb(void)
-{
-
- if (smp_started) {
- smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
- }
-}
-
-void
-smp_invlpg(vm_offset_t addr)
-{
-
- if (smp_started) {
- smp_tlb_shootdown(IPI_INVLPG, addr, 0);
- }
-}
-
-void
-smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
-{
-
- if (smp_started) {
- smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
- }
-}
-
-void
-smp_masked_invltlb(cpuset_t mask)
-{
-
- if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
- }
-}
-
-void
-smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
-{
-
- if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
- }
-}
-
-void
-smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
-{
-
- if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
- }
-}
-
-/*
- * send an IPI to a set of cpus.
- */
-void
-ipi_selected(cpuset_t cpus, u_int ipi)
-{
- int cpu;
-
- /*
- * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
- * of help in order to understand what is the source.
- * Set the mask of receiving CPUs for this purpose.
- */
- if (ipi == IPI_STOP_HARD)
- CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
-
- while ((cpu = CPU_FFS(&cpus)) != 0) {
- cpu--;
- CPU_CLR(cpu, &cpus);
- CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
- ipi_send_cpu(cpu, ipi);
- }
-}
-
-/*
- * send an IPI to a specific CPU.
- */
-void
-ipi_cpu(int cpu, u_int ipi)
-{
-
- /*
- * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
- * of help in order to understand what is the source.
- * Set the mask of receiving CPUs for this purpose.
- */
- if (ipi == IPI_STOP_HARD)
- CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
-
- CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
- ipi_send_cpu(cpu, ipi);
-}
-
-/*
- * send an IPI to all CPUs EXCEPT myself
- */
-void
-ipi_all_but_self(u_int ipi)
-{
- cpuset_t other_cpus;
-
- /*
- * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
- * of help in order to understand what is the source.
- * Set the mask of receiving CPUs for this purpose.
- */
- other_cpus = all_cpus;
- CPU_CLR(PCPU_GET(cpuid), &other_cpus);
- if (ipi == IPI_STOP_HARD)
- CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
-
- CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
- ipi_selected(other_cpus, ipi);
-}
-
-int
-ipi_nmi_handler()
-{
- u_int cpuid;
-
- /*
- * As long as there is not a simple way to know about a NMI's
- * source, if the bitmask for the current CPU is present in
- * the global pending bitword an IPI_STOP_HARD has been issued
- * and should be handled.
- */
- cpuid = PCPU_GET(cpuid);
- if (!CPU_ISSET(cpuid, &ipi_nmi_pending))
- return (1);
-
- CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending);
- cpustop_handler();
- return (0);
-}
-
-/*
- * Handle an IPI_STOP by saving our current context and spinning until we
- * are resumed.
- */
-void
-cpustop_handler(void)
-{
- int cpu;
-
- cpu = PCPU_GET(cpuid);
-
- savectx(&stoppcbs[cpu]);
-
- /* Indicate that we are stopped */
- CPU_SET_ATOMIC(cpu, &stopped_cpus);
-
- /* Wait for restart */
- while (!CPU_ISSET(cpu, &started_cpus))
- ia32_pause();
-
- CPU_CLR_ATOMIC(cpu, &started_cpus);
- CPU_CLR_ATOMIC(cpu, &stopped_cpus);
-
- if (cpu == 0 && cpustop_restartfunc != NULL) {
- cpustop_restartfunc();
- cpustop_restartfunc = NULL;
- }
-}
-
-/*
- * Handlers for TLB related IPIs
- *
- * On i386 Xen PV this are no-ops since this port doesn't support SMP.
- */
-void
-invltlb_handler(void)
-{
-}
-
-void
-invlpg_handler(void)
-{
-}
-
-void
-invlrng_handler(void)
-{
-}
-
-void
-invlcache_handler(void)
-{
-}
-
-/*
- * This is called once the rest of the system is up and running and we're
- * ready to let the AP's out of the pen.
- */
-static void
-release_aps(void *dummy __unused)
-{
-
- if (mp_ncpus == 1)
- return;
- atomic_store_rel_int(&aps_ready, 1);
- while (smp_started == 0)
- ia32_pause();
-}
-SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
-SYSINIT(start_ipis, SI_SUB_SMP, SI_ORDER_ANY, xen_smp_intr_init_cpus, NULL);
-SYSINIT(start_cpu, SI_SUB_INTR, SI_ORDER_ANY, xen_smp_intr_setup_cpus, NULL);
diff --git a/sys/i386/xen/mptable.c b/sys/i386/xen/mptable.c
deleted file mode 100644
index 81d7c1b..0000000
--- a/sys/i386/xen/mptable.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/*-
- * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
- * Copyright (c) 1996, by Steve Passe
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. The name of the developer may NOT be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/bus.h>
-#include <sys/kernel.h>
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/pmap.h>
-
-#include <machine/frame.h>
-#include <machine/intr_machdep.h>
-#include <x86/apicvar.h>
-
-#include <xen/hypervisor.h>
-#include <xen/xen-os.h>
-#include <machine/smp.h>
-#include <xen/interface/vcpu.h>
-
-
-static int mptable_probe(void);
-static int mptable_probe_cpus(void);
-static void mptable_register(void *dummy);
-static int mptable_setup_local(void);
-static int mptable_setup_io(void);
-
-static struct apic_enumerator mptable_enumerator = {
- "MPTable",
- mptable_probe,
- mptable_probe_cpus,
- mptable_setup_local,
- mptable_setup_io
-};
-
-static int
-mptable_probe(void)
-{
-
- return (-100);
-}
-
-static int
-mptable_probe_cpus(void)
-{
- int i, rc;
-
- for (i = 0; i < MAXCPU; i++) {
- rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
- if (rc >= 0)
- cpu_add(i, (i == 0));
- }
-
- return (0);
-}
-
-/*
- * Initialize the local APIC on the BSP.
- */
-static int
-mptable_setup_local(void)
-{
-
- PCPU_SET(apic_id, 0);
- PCPU_SET(vcpu_id, 0);
- return (0);
-}
-
-static int
-mptable_setup_io(void)
-{
-
- return (0);
-}
-
-static void
-mptable_register(void *dummy __unused)
-{
-
- apic_register_enumerator(&mptable_enumerator);
-}
-SYSINIT(mptable_register, SI_SUB_TUNABLES - 1, SI_ORDER_FIRST, mptable_register,
- NULL);
diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c
deleted file mode 100644
index 757fc36..0000000
--- a/sys/i386/xen/pmap.c
+++ /dev/null
@@ -1,4420 +0,0 @@
-/*-
- * Copyright (c) 1991 Regents of the University of California.
- * All rights reserved.
- * Copyright (c) 1994 John S. Dyson
- * All rights reserved.
- * Copyright (c) 1994 David Greenman
- * All rights reserved.
- * Copyright (c) 2005 Alan L. Cox <alc@cs.rice.edu>
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * the Systems Programming Group of the University of Utah Computer
- * Science Department and William Jolitz of UUNET Technologies Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
- */
-/*-
- * Copyright (c) 2003 Networks Associates Technology, Inc.
- * All rights reserved.
- *
- * This software was developed for the FreeBSD Project by Jake Burkholder,
- * Safeport Network Services, and Network Associates Laboratories, the
- * Security Research Division of Network Associates, Inc. under
- * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
- * CHATS research program.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/*
- * Manages physical address maps.
- *
- * Since the information managed by this module is
- * also stored by the logical address mapping module,
- * this module may throw away valid virtual-to-physical
- * mappings at almost any time. However, invalidations
- * of virtual-to-physical mappings must be done as
- * requested.
- *
- * In order to cope with hardware architectures which
- * make virtual-to-physical map invalidates expensive,
- * this module may delay invalidate or reduced protection
- * operations until such time as they are actually
- * necessary. This module is given full information as
- * to which processors are currently using which maps,
- * and to when physical maps must be made correct.
- */
-
-#include "opt_cpu.h"
-#include "opt_pmap.h"
-#include "opt_smp.h"
-#include "opt_xbox.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/ktr.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/mman.h>
-#include <sys/msgbuf.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/sf_buf.h>
-#include <sys/sx.h>
-#include <sys/vmmeter.h>
-#include <sys/sched.h>
-#include <sys/sysctl.h>
-#ifdef SMP
-#include <sys/smp.h>
-#else
-#include <sys/cpuset.h>
-#endif
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_page.h>
-#include <vm/vm_map.h>
-#include <vm/vm_object.h>
-#include <vm/vm_extern.h>
-#include <vm/vm_pageout.h>
-#include <vm/vm_pager.h>
-#include <vm/uma.h>
-
-#include <machine/cpu.h>
-#include <machine/cputypes.h>
-#include <machine/md_var.h>
-#include <machine/pcb.h>
-#include <machine/specialreg.h>
-#ifdef SMP
-#include <machine/smp.h>
-#endif
-
-#ifdef XBOX
-#include <machine/xbox.h>
-#endif
-
-#include <xen/interface/xen.h>
-#include <xen/hypervisor.h>
-#include <machine/xen/hypercall.h>
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenfunc.h>
-
-#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
-#define CPU_ENABLE_SSE
-#endif
-
-#ifndef PMAP_SHPGPERPROC
-#define PMAP_SHPGPERPROC 200
-#endif
-
-#define DIAGNOSTIC
-
-#if !defined(DIAGNOSTIC)
-#ifdef __GNUC_GNU_INLINE__
-#define PMAP_INLINE __attribute__((__gnu_inline__)) inline
-#else
-#define PMAP_INLINE extern inline
-#endif
-#else
-#define PMAP_INLINE
-#endif
-
-#ifdef PV_STATS
-#define PV_STAT(x) do { x ; } while (0)
-#else
-#define PV_STAT(x) do { } while (0)
-#endif
-
-/*
- * Get PDEs and PTEs for user/kernel address space
- */
-#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
-#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
-
-#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0)
-#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0)
-#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0)
-#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0)
-#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0)
-
-#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
-
-#define HAMFISTED_LOCKING
-#ifdef HAMFISTED_LOCKING
-static struct mtx createdelete_lock;
-#endif
-
-struct pmap kernel_pmap_store;
-LIST_HEAD(pmaplist, pmap);
-static struct pmaplist allpmaps;
-static struct mtx allpmaps_lock;
-
-vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
-vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
-int pgeflag = 0; /* PG_G or-in */
-int pseflag = 0; /* PG_PS or-in */
-
-int nkpt;
-vm_offset_t kernel_vm_end;
-extern u_int32_t KERNend;
-
-#ifdef PAE
-pt_entry_t pg_nx;
-#endif
-
-static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
-
-static int pat_works; /* Is page attribute table sane? */
-
-/*
- * This lock is defined as static in other pmap implementations. It cannot,
- * however, be defined as static here, because it is (ab)used to serialize
- * queued page table changes in other sources files.
- */
-struct rwlock pvh_global_lock;
-
-/*
- * Data for the pv entry allocation mechanism
- */
-static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
-static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
-static int shpgperproc = PMAP_SHPGPERPROC;
-
-struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */
-int pv_maxchunks; /* How many chunks we have KVA for */
-vm_offset_t pv_vafree; /* freelist stored in the PTE */
-
-/*
- * All those kernel PT submaps that BSD is so fond of
- */
-struct sysmaps {
- struct mtx lock;
- pt_entry_t *CMAP1;
- pt_entry_t *CMAP2;
- caddr_t CADDR1;
- caddr_t CADDR2;
-};
-static struct sysmaps sysmaps_pcpu[MAXCPU];
-pt_entry_t *CMAP3;
-caddr_t ptvmmap = 0;
-caddr_t CADDR3;
-struct msgbuf *msgbufp = 0;
-
-/*
- * Crashdump maps.
- */
-static caddr_t crashdumpmap;
-
-static pt_entry_t *PMAP1 = 0, *PMAP2;
-static pt_entry_t *PADDR1 = 0, *PADDR2;
-#ifdef SMP
-static int PMAP1cpu;
-static int PMAP1changedcpu;
-SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD,
- &PMAP1changedcpu, 0,
- "Number of times pmap_pte_quick changed CPU with same PMAP1");
-#endif
-static int PMAP1changed;
-SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD,
- &PMAP1changed, 0,
- "Number of times pmap_pte_quick changed PMAP1");
-static int PMAP1unchanged;
-SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD,
- &PMAP1unchanged, 0,
- "Number of times pmap_pte_quick didn't change PMAP1");
-static struct mtx PMAP2mutex;
-
-static void free_pv_chunk(struct pv_chunk *pc);
-static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
-static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
-static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
-static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
- vm_offset_t va);
-
-static vm_page_t pmap_enter_quick_locked(multicall_entry_t **mcl, int *count, pmap_t pmap, vm_offset_t va,
- vm_page_t m, vm_prot_t prot, vm_page_t mpte);
-static void pmap_flush_page(vm_page_t m);
-static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
-static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
- vm_page_t *free);
-static void pmap_remove_page(struct pmap *pmap, vm_offset_t va,
- vm_page_t *free);
-static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
- vm_offset_t va);
-static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
- vm_page_t m);
-
-static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags);
-
-static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags);
-static void _pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free);
-static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
-static void pmap_pte_release(pt_entry_t *pte);
-static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *);
-static boolean_t pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr);
-
-static __inline void pagezero(void *page);
-
-CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
-CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
-
-/*
- * If you get an error here, then you set KVA_PAGES wrong! See the
- * description of KVA_PAGES in sys/i386/include/pmap.h. It must be
- * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE.
- */
-CTASSERT(KERNBASE % (1 << 24) == 0);
-
-void
-pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type)
-{
- vm_paddr_t pdir_ma = vtomach(&pmap->pm_pdir[ptepindex]);
-
- switch (type) {
- case SH_PD_SET_VA:
-#if 0
- xen_queue_pt_update(shadow_pdir_ma,
- xpmap_ptom(val & ~(PG_RW)));
-#endif
- xen_queue_pt_update(pdir_ma,
- xpmap_ptom(val));
- break;
- case SH_PD_SET_VA_MA:
-#if 0
- xen_queue_pt_update(shadow_pdir_ma,
- val & ~(PG_RW));
-#endif
- xen_queue_pt_update(pdir_ma, val);
- break;
- case SH_PD_SET_VA_CLEAR:
-#if 0
- xen_queue_pt_update(shadow_pdir_ma, 0);
-#endif
- xen_queue_pt_update(pdir_ma, 0);
- break;
- }
-}
-
-/*
- * Bootstrap the system enough to run with virtual memory.
- *
- * On the i386 this is called after mapping has already been enabled
- * and just syncs the pmap module with what has already been done.
- * [We can't call it easily with mapping off since the kernel is not
- * mapped with PA == VA, hence we would have to relocate every address
- * from the linked base (virtual) address "KERNBASE" to the actual
- * (physical) address starting relative to 0]
- */
-void
-pmap_bootstrap(vm_paddr_t firstaddr)
-{
- vm_offset_t va;
- pt_entry_t *pte, *unused;
- struct sysmaps *sysmaps;
- int i;
-
- /*
- * Initialize the first available kernel virtual address. However,
- * using "firstaddr" may waste a few pages of the kernel virtual
- * address space, because locore may not have mapped every physical
- * page that it allocated. Preferably, locore would provide a first
- * unused virtual address in addition to "firstaddr".
- */
- virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
-
- virtual_end = VM_MAX_KERNEL_ADDRESS;
-
- /*
- * Initialize the kernel pmap (which is statically allocated).
- */
- PMAP_LOCK_INIT(kernel_pmap);
- kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
-#ifdef PAE
- kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
-#endif
- CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
- TAILQ_INIT(&kernel_pmap->pm_pvchunk);
-
- /*
- * Initialize the global pv list lock.
- */
- rw_init_flags(&pvh_global_lock, "pmap pv global", RW_RECURSE);
-
- LIST_INIT(&allpmaps);
- mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
- mtx_lock_spin(&allpmaps_lock);
- LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
- mtx_unlock_spin(&allpmaps_lock);
- if (nkpt == 0)
- nkpt = NKPT;
-
- /*
- * Reserve some special page table entries/VA space for temporary
- * mapping of pages.
- */
-#define SYSMAP(c, p, v, n) \
- v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
-
- va = virtual_avail;
- pte = vtopte(va);
-
- /*
- * CMAP1/CMAP2 are used for zeroing and copying pages.
- * CMAP3 is used for the idle process page zeroing.
- */
- for (i = 0; i < MAXCPU; i++) {
- sysmaps = &sysmaps_pcpu[i];
- mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF);
- SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1)
- SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1)
- PT_SET_MA(sysmaps->CADDR1, 0);
- PT_SET_MA(sysmaps->CADDR2, 0);
- }
- SYSMAP(caddr_t, CMAP3, CADDR3, 1)
- PT_SET_MA(CADDR3, 0);
-
- /*
- * Crashdump maps.
- */
- SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS)
-
- /*
- * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
- */
- SYSMAP(caddr_t, unused, ptvmmap, 1)
-
- /*
- * msgbufp is used to map the system message buffer.
- */
- SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(msgbufsize)))
-
- /*
- * PADDR1 and PADDR2 are used by pmap_pte_quick() and pmap_pte(),
- * respectively.
- */
- SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1)
- SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1)
-
- mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF);
-
- virtual_avail = va;
-
- /*
- * Leave in place an identity mapping (virt == phys) for the low 1 MB
- * physical memory region that is used by the ACPI wakeup code. This
- * mapping must not have PG_G set.
- */
-#ifndef XEN
- /*
- * leave here deliberately to show that this is not supported
- */
-#ifdef XBOX
- /* FIXME: This is gross, but needed for the XBOX. Since we are in such
- * an early stadium, we cannot yet neatly map video memory ... :-(
- * Better fixes are very welcome! */
- if (!arch_i386_is_xbox)
-#endif
- for (i = 1; i < NKPT; i++)
- PTD[i] = 0;
-
- /* Initialize the PAT MSR if present. */
- pmap_init_pat();
-
- /* Turn on PG_G on kernel page(s) */
- pmap_set_pg();
-#endif
-
-#ifdef HAMFISTED_LOCKING
- mtx_init(&createdelete_lock, "pmap create/delete", NULL, MTX_DEF);
-#endif
-}
-
-/*
- * Setup the PAT MSR.
- */
-void
-pmap_init_pat(void)
-{
- uint64_t pat_msr;
-
- /* Bail if this CPU doesn't implement PAT. */
- if (!(cpu_feature & CPUID_PAT))
- return;
-
- if (cpu_vendor_id != CPU_VENDOR_INTEL ||
- (CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe)) {
- /*
- * Leave the indices 0-3 at the default of WB, WT, UC, and UC-.
- * Program 4 and 5 as WP and WC.
- * Leave 6 and 7 as UC and UC-.
- */
- pat_msr = rdmsr(MSR_PAT);
- pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5));
- pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) |
- PAT_VALUE(5, PAT_WRITE_COMBINING);
- pat_works = 1;
- } else {
- /*
- * Due to some Intel errata, we can only safely use the lower 4
- * PAT entries. Thus, just replace PAT Index 2 with WC instead
- * of UC-.
- *
- * Intel Pentium III Processor Specification Update
- * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B
- * or Mode C Paging)
- *
- * Intel Pentium IV Processor Specification Update
- * Errata N46 (PAT Index MSB May Be Calculated Incorrectly)
- */
- pat_msr = rdmsr(MSR_PAT);
- pat_msr &= ~PAT_MASK(2);
- pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING);
- pat_works = 0;
- }
- wrmsr(MSR_PAT, pat_msr);
-}
-
-/*
- * Initialize a vm_page's machine-dependent fields.
- */
-void
-pmap_page_init(vm_page_t m)
-{
-
- TAILQ_INIT(&m->md.pv_list);
- m->md.pat_mode = PAT_WRITE_BACK;
-}
-
-/*
- * ABuse the pte nodes for unmapped kva to thread a kva freelist through.
- * Requirements:
- * - Must deal with pages in order to ensure that none of the PG_* bits
- * are ever set, PG_V in particular.
- * - Assumes we can write to ptes without pte_store() atomic ops, even
- * on PAE systems. This should be ok.
- * - Assumes nothing will ever test these addresses for 0 to indicate
- * no mapping instead of correctly checking PG_V.
- * - Assumes a vm_offset_t will fit in a pte (true for i386).
- * Because PG_V is never set, there can be no mappings to invalidate.
- */
-static int ptelist_count = 0;
-static vm_offset_t
-pmap_ptelist_alloc(vm_offset_t *head)
-{
- vm_offset_t va;
- vm_offset_t *phead = (vm_offset_t *)*head;
-
- if (ptelist_count == 0) {
- printf("out of memory!!!!!!\n");
- return (0); /* Out of memory */
- }
- ptelist_count--;
- va = phead[ptelist_count];
- return (va);
-}
-
-static void
-pmap_ptelist_free(vm_offset_t *head, vm_offset_t va)
-{
- vm_offset_t *phead = (vm_offset_t *)*head;
-
- phead[ptelist_count++] = va;
-}
-
-static void
-pmap_ptelist_init(vm_offset_t *head, void *base, int npages)
-{
- int i, nstackpages;
- vm_offset_t va;
- vm_page_t m;
-
- nstackpages = (npages + PAGE_SIZE/sizeof(vm_offset_t) - 1)/ (PAGE_SIZE/sizeof(vm_offset_t));
- for (i = 0; i < nstackpages; i++) {
- va = (vm_offset_t)base + i * PAGE_SIZE;
- m = vm_page_alloc(NULL, i,
- VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
- VM_ALLOC_ZERO);
- pmap_qenter(va, &m, 1);
- }
-
- *head = (vm_offset_t)base;
- for (i = npages - 1; i >= nstackpages; i--) {
- va = (vm_offset_t)base + i * PAGE_SIZE;
- pmap_ptelist_free(head, va);
- }
-}
-
-
-/*
- * Initialize the pmap module.
- * Called by vm_init, to initialize any structures that the pmap
- * system needs to map virtual memory.
- */
-void
-pmap_init(void)
-{
-
- /*
- * Initialize the address space (zone) for the pv entries. Set a
- * high water mark so that the system can recover from excessive
- * numbers of pv entries.
- */
- TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
- pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count;
- TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
- pv_entry_max = roundup(pv_entry_max, _NPCPV);
- pv_entry_high_water = 9 * (pv_entry_max / 10);
-
- pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc);
- pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks);
- if (pv_chunkbase == NULL)
- panic("pmap_init: not enough kvm for pv chunks");
- pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks);
-}
-
-
-SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
- "Max number of PV entries");
-SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0,
- "Page share factor per proc");
-
-static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
- "2/4MB page mapping counters");
-
-static u_long pmap_pde_mappings;
-SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
- &pmap_pde_mappings, 0, "2/4MB page mappings");
-
-/***************************************************
- * Low level helper routines.....
- ***************************************************/
-
-/*
- * Determine the appropriate bits to set in a PTE or PDE for a specified
- * caching mode.
- */
-int
-pmap_cache_bits(int mode, boolean_t is_pde)
-{
- int pat_flag, pat_index, cache_bits;
-
- /* The PAT bit is different for PTE's and PDE's. */
- pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT;
-
- /* If we don't support PAT, map extended modes to older ones. */
- if (!(cpu_feature & CPUID_PAT)) {
- switch (mode) {
- case PAT_UNCACHEABLE:
- case PAT_WRITE_THROUGH:
- case PAT_WRITE_BACK:
- break;
- case PAT_UNCACHED:
- case PAT_WRITE_COMBINING:
- case PAT_WRITE_PROTECTED:
- mode = PAT_UNCACHEABLE;
- break;
- }
- }
-
- /* Map the caching mode to a PAT index. */
- if (pat_works) {
- switch (mode) {
- case PAT_UNCACHEABLE:
- pat_index = 3;
- break;
- case PAT_WRITE_THROUGH:
- pat_index = 1;
- break;
- case PAT_WRITE_BACK:
- pat_index = 0;
- break;
- case PAT_UNCACHED:
- pat_index = 2;
- break;
- case PAT_WRITE_COMBINING:
- pat_index = 5;
- break;
- case PAT_WRITE_PROTECTED:
- pat_index = 4;
- break;
- default:
- panic("Unknown caching mode %d\n", mode);
- }
- } else {
- switch (mode) {
- case PAT_UNCACHED:
- case PAT_UNCACHEABLE:
- case PAT_WRITE_PROTECTED:
- pat_index = 3;
- break;
- case PAT_WRITE_THROUGH:
- pat_index = 1;
- break;
- case PAT_WRITE_BACK:
- pat_index = 0;
- break;
- case PAT_WRITE_COMBINING:
- pat_index = 2;
- break;
- default:
- panic("Unknown caching mode %d\n", mode);
- }
- }
-
- /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */
- cache_bits = 0;
- if (pat_index & 0x4)
- cache_bits |= pat_flag;
- if (pat_index & 0x2)
- cache_bits |= PG_NC_PCD;
- if (pat_index & 0x1)
- cache_bits |= PG_NC_PWT;
- return (cache_bits);
-}
-#ifdef SMP
-/*
- * For SMP, these functions have to use the IPI mechanism for coherence.
- *
- * N.B.: Before calling any of the following TLB invalidation functions,
- * the calling processor must ensure that all stores updating a non-
- * kernel page table are globally performed. Otherwise, another
- * processor could cache an old, pre-update entry without being
- * invalidated. This can happen one of two ways: (1) The pmap becomes
- * active on another processor after its pm_active field is checked by
- * one of the following functions but before a store updating the page
- * table is globally performed. (2) The pmap becomes active on another
- * processor before its pm_active field is checked but due to
- * speculative loads one of the following functions stills reads the
- * pmap as inactive on the other processor.
- *
- * The kernel page table is exempt because its pm_active field is
- * immutable. The kernel page table is always active on every
- * processor.
- */
-void
-pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
-{
- cpuset_t other_cpus;
- u_int cpuid;
-
- CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
- pmap, va);
-
- sched_pin();
- if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
- invlpg(va);
- smp_invlpg(va);
- } else {
- cpuid = PCPU_GET(cpuid);
- other_cpus = all_cpus;
- CPU_CLR(cpuid, &other_cpus);
- if (CPU_ISSET(cpuid, &pmap->pm_active))
- invlpg(va);
- CPU_AND(&other_cpus, &pmap->pm_active);
- if (!CPU_EMPTY(&other_cpus))
- smp_masked_invlpg(other_cpus, va);
- }
- sched_unpin();
- PT_UPDATES_FLUSH();
-}
-
-void
-pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
- cpuset_t other_cpus;
- vm_offset_t addr;
- u_int cpuid;
-
- CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x",
- pmap, sva, eva);
-
- sched_pin();
- if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
- for (addr = sva; addr < eva; addr += PAGE_SIZE)
- invlpg(addr);
- smp_invlpg_range(sva, eva);
- } else {
- cpuid = PCPU_GET(cpuid);
- other_cpus = all_cpus;
- CPU_CLR(cpuid, &other_cpus);
- if (CPU_ISSET(cpuid, &pmap->pm_active))
- for (addr = sva; addr < eva; addr += PAGE_SIZE)
- invlpg(addr);
- CPU_AND(&other_cpus, &pmap->pm_active);
- if (!CPU_EMPTY(&other_cpus))
- smp_masked_invlpg_range(other_cpus, sva, eva);
- }
- sched_unpin();
- PT_UPDATES_FLUSH();
-}
-
-void
-pmap_invalidate_all(pmap_t pmap)
-{
- cpuset_t other_cpus;
- u_int cpuid;
-
- CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap);
-
- sched_pin();
- if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
- invltlb();
- smp_invltlb();
- } else {
- cpuid = PCPU_GET(cpuid);
- other_cpus = all_cpus;
- CPU_CLR(cpuid, &other_cpus);
- if (CPU_ISSET(cpuid, &pmap->pm_active))
- invltlb();
- CPU_AND(&other_cpus, &pmap->pm_active);
- if (!CPU_EMPTY(&other_cpus))
- smp_masked_invltlb(other_cpus);
- }
- sched_unpin();
-}
-
-void
-pmap_invalidate_cache(void)
-{
-
- sched_pin();
- wbinvd();
- smp_cache_flush();
- sched_unpin();
-}
-#else /* !SMP */
-/*
- * Normal, non-SMP, 486+ invalidation functions.
- * We inline these within pmap.c for speed.
- */
-PMAP_INLINE void
-pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
-{
- CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
- pmap, va);
-
- if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
- invlpg(va);
- PT_UPDATES_FLUSH();
-}
-
-PMAP_INLINE void
-pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
- vm_offset_t addr;
-
- if (eva - sva > PAGE_SIZE)
- CTR3(KTR_PMAP, "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x",
- pmap, sva, eva);
-
- if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
- for (addr = sva; addr < eva; addr += PAGE_SIZE)
- invlpg(addr);
- PT_UPDATES_FLUSH();
-}
-
-PMAP_INLINE void
-pmap_invalidate_all(pmap_t pmap)
-{
-
- CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap);
-
- if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
- invltlb();
-}
-
-PMAP_INLINE void
-pmap_invalidate_cache(void)
-{
-
- wbinvd();
-}
-#endif /* !SMP */
-
-#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024)
-
-void
-pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
-{
-
- if (force) {
- sva &= ~(vm_offset_t)cpu_clflush_line_size;
- } else {
- KASSERT((sva & PAGE_MASK) == 0,
- ("pmap_invalidate_cache_range: sva not page-aligned"));
- KASSERT((eva & PAGE_MASK) == 0,
- ("pmap_invalidate_cache_range: eva not page-aligned"));
- }
-
- if ((cpu_feature & CPUID_SS) != 0 && !force)
- ; /* If "Self Snoop" is supported, do nothing. */
- else if ((cpu_feature & CPUID_CLFSH) != 0 &&
- eva - sva < PMAP_CLFLUSH_THRESHOLD) {
-
- /*
- * Otherwise, do per-cache line flush. Use the mfence
- * instruction to insure that previous stores are
- * included in the write-back. The processor
- * propagates flush to other processors in the cache
- * coherence domain.
- */
- mfence();
- for (; sva < eva; sva += cpu_clflush_line_size)
- clflush(sva);
- mfence();
- } else {
-
- /*
- * No targeted cache flush methods are supported by CPU,
- * or the supplied range is bigger than 2MB.
- * Globally invalidate cache.
- */
- pmap_invalidate_cache();
- }
-}
-
-void
-pmap_invalidate_cache_pages(vm_page_t *pages, int count)
-{
- int i;
-
- if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE ||
- (cpu_feature & CPUID_CLFSH) == 0) {
- pmap_invalidate_cache();
- } else {
- for (i = 0; i < count; i++)
- pmap_flush_page(pages[i]);
- }
-}
-
-/*
- * Are we current address space or kernel? N.B. We return FALSE when
- * a pmap's page table is in use because a kernel thread is borrowing
- * it. The borrowed page table can change spontaneously, making any
- * dependence on its continued use subject to a race condition.
- */
-static __inline int
-pmap_is_current(pmap_t pmap)
-{
-
- return (pmap == kernel_pmap ||
- (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
- (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
-}
-
-/*
- * If the given pmap is not the current or kernel pmap, the returned pte must
- * be released by passing it to pmap_pte_release().
- */
-pt_entry_t *
-pmap_pte(pmap_t pmap, vm_offset_t va)
-{
- pd_entry_t newpf;
- pd_entry_t *pde;
-
- pde = pmap_pde(pmap, va);
- if (*pde & PG_PS)
- return (pde);
- if (*pde != 0) {
- /* are we current address space or kernel? */
- if (pmap_is_current(pmap))
- return (vtopte(va));
- mtx_lock(&PMAP2mutex);
- newpf = *pde & PG_FRAME;
- if ((*PMAP2 & PG_FRAME) != newpf) {
- PT_SET_MA(PADDR2, newpf | PG_V | PG_A | PG_M);
- CTR3(KTR_PMAP, "pmap_pte: pmap=%p va=0x%x newpte=0x%08x",
- pmap, va, (*PMAP2 & 0xffffffff));
- }
- return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
- }
- return (NULL);
-}
-
-/*
- * Releases a pte that was obtained from pmap_pte(). Be prepared for the pte
- * being NULL.
- */
-static __inline void
-pmap_pte_release(pt_entry_t *pte)
-{
-
- if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) {
- CTR1(KTR_PMAP, "pmap_pte_release: pte=0x%jx",
- *PMAP2);
- rw_wlock(&pvh_global_lock);
- PT_SET_VA(PMAP2, 0, TRUE);
- rw_wunlock(&pvh_global_lock);
- mtx_unlock(&PMAP2mutex);
- }
-}
-
-static __inline void
-invlcaddr(void *caddr)
-{
-
- invlpg((u_int)caddr);
- PT_UPDATES_FLUSH();
-}
-
-/*
- * Super fast pmap_pte routine best used when scanning
- * the pv lists. This eliminates many coarse-grained
- * invltlb calls. Note that many of the pv list
- * scans are across different pmaps. It is very wasteful
- * to do an entire invltlb for checking a single mapping.
- *
- * If the given pmap is not the current pmap, pvh_global_lock
- * must be held and curthread pinned to a CPU.
- */
-static pt_entry_t *
-pmap_pte_quick(pmap_t pmap, vm_offset_t va)
-{
- pd_entry_t newpf;
- pd_entry_t *pde;
-
- pde = pmap_pde(pmap, va);
- if (*pde & PG_PS)
- return (pde);
- if (*pde != 0) {
- /* are we current address space or kernel? */
- if (pmap_is_current(pmap))
- return (vtopte(va));
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
- newpf = *pde & PG_FRAME;
- if ((*PMAP1 & PG_FRAME) != newpf) {
- PT_SET_MA(PADDR1, newpf | PG_V | PG_A | PG_M);
- CTR3(KTR_PMAP, "pmap_pte_quick: pmap=%p va=0x%x newpte=0x%08x",
- pmap, va, (u_long)*PMAP1);
-
-#ifdef SMP
- PMAP1cpu = PCPU_GET(cpuid);
-#endif
- PMAP1changed++;
- } else
-#ifdef SMP
- if (PMAP1cpu != PCPU_GET(cpuid)) {
- PMAP1cpu = PCPU_GET(cpuid);
- invlcaddr(PADDR1);
- PMAP1changedcpu++;
- } else
-#endif
- PMAP1unchanged++;
- return (PADDR1 + (i386_btop(va) & (NPTEPG - 1)));
- }
- return (0);
-}
-
-/*
- * Routine: pmap_extract
- * Function:
- * Extract the physical page address associated
- * with the given map/virtual_address pair.
- */
-vm_paddr_t
-pmap_extract(pmap_t pmap, vm_offset_t va)
-{
- vm_paddr_t rtval;
- pt_entry_t *pte;
- pd_entry_t pde;
- pt_entry_t pteval;
-
- rtval = 0;
- PMAP_LOCK(pmap);
- pde = pmap->pm_pdir[va >> PDRSHIFT];
- if (pde != 0) {
- if ((pde & PG_PS) != 0) {
- rtval = xpmap_mtop(pde & PG_PS_FRAME) | (va & PDRMASK);
- PMAP_UNLOCK(pmap);
- return rtval;
- }
- pte = pmap_pte(pmap, va);
- pteval = *pte ? xpmap_mtop(*pte) : 0;
- rtval = (pteval & PG_FRAME) | (va & PAGE_MASK);
- pmap_pte_release(pte);
- }
- PMAP_UNLOCK(pmap);
- return (rtval);
-}
-
-/*
- * Routine: pmap_extract_ma
- * Function:
- * Like pmap_extract, but returns machine address
- */
-vm_paddr_t
-pmap_extract_ma(pmap_t pmap, vm_offset_t va)
-{
- vm_paddr_t rtval;
- pt_entry_t *pte;
- pd_entry_t pde;
-
- rtval = 0;
- PMAP_LOCK(pmap);
- pde = pmap->pm_pdir[va >> PDRSHIFT];
- if (pde != 0) {
- if ((pde & PG_PS) != 0) {
- rtval = (pde & ~PDRMASK) | (va & PDRMASK);
- PMAP_UNLOCK(pmap);
- return rtval;
- }
- pte = pmap_pte(pmap, va);
- rtval = (*pte & PG_FRAME) | (va & PAGE_MASK);
- pmap_pte_release(pte);
- }
- PMAP_UNLOCK(pmap);
- return (rtval);
-}
-
-/*
- * Routine: pmap_extract_and_hold
- * Function:
- * Atomically extract and hold the physical page
- * with the given pmap and virtual address pair
- * if that mapping permits the given protection.
- */
-vm_page_t
-pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
-{
- pd_entry_t pde;
- pt_entry_t pte, *ptep;
- vm_page_t m;
- vm_paddr_t pa;
-
- pa = 0;
- m = NULL;
- PMAP_LOCK(pmap);
-retry:
- pde = PT_GET(pmap_pde(pmap, va));
- if (pde != 0) {
- if (pde & PG_PS) {
- if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
- if (vm_page_pa_tryrelock(pmap, (pde &
- PG_PS_FRAME) | (va & PDRMASK), &pa))
- goto retry;
- m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
- (va & PDRMASK));
- vm_page_hold(m);
- }
- } else {
- ptep = pmap_pte(pmap, va);
- pte = PT_GET(ptep);
- pmap_pte_release(ptep);
- if (pte != 0 &&
- ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
- if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME,
- &pa))
- goto retry;
- m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
- vm_page_hold(m);
- }
- }
- }
- PA_UNLOCK_COND(pa);
- PMAP_UNLOCK(pmap);
- return (m);
-}
-
-/***************************************************
- * Low level mapping routines.....
- ***************************************************/
-
-/*
- * Add a wired page to the kva.
- * Note: not SMP coherent.
- *
- * This function may be used before pmap_bootstrap() is called.
- */
-void
-pmap_kenter(vm_offset_t va, vm_paddr_t pa)
-{
-
- PT_SET_MA(va, xpmap_ptom(pa)| PG_RW | PG_V | pgeflag);
-}
-
-void
-pmap_kenter_ma(vm_offset_t va, vm_paddr_t ma)
-{
- pt_entry_t *pte;
-
- pte = vtopte(va);
- pte_store_ma(pte, ma | PG_RW | PG_V | pgeflag);
-}
-
-static __inline void
-pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
-{
-
- PT_SET_MA(va, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0));
-}
-
-/*
- * Remove a page from the kernel pagetables.
- * Note: not SMP coherent.
- *
- * This function may be used before pmap_bootstrap() is called.
- */
-PMAP_INLINE void
-pmap_kremove(vm_offset_t va)
-{
- pt_entry_t *pte;
-
- pte = vtopte(va);
- PT_CLEAR_VA(pte, FALSE);
-}
-
-/*
- * Used to map a range of physical addresses into kernel
- * virtual address space.
- *
- * The value passed in '*virt' is a suggested virtual address for
- * the mapping. Architectures which can support a direct-mapped
- * physical to virtual region can return the appropriate address
- * within that region, leaving '*virt' unchanged. Other
- * architectures should map the pages starting at '*virt' and
- * update '*virt' with the first usable address after the mapped
- * region.
- */
-vm_offset_t
-pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
-{
- vm_offset_t va, sva;
-
- va = sva = *virt;
- CTR4(KTR_PMAP, "pmap_map: va=0x%x start=0x%jx end=0x%jx prot=0x%x",
- va, start, end, prot);
- while (start < end) {
- pmap_kenter(va, start);
- va += PAGE_SIZE;
- start += PAGE_SIZE;
- }
- pmap_invalidate_range(kernel_pmap, sva, va);
- *virt = va;
- return (sva);
-}
-
-
-/*
- * Add a list of wired pages to the kva
- * this routine is only used for temporary
- * kernel mappings that do not need to have
- * page modification or references recorded.
- * Note that old mappings are simply written
- * over. The page *must* be wired.
- * Note: SMP coherent. Uses a ranged shootdown IPI.
- */
-void
-pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
-{
- pt_entry_t *endpte, *pte;
- vm_paddr_t pa;
- vm_offset_t va = sva;
- int mclcount = 0;
- multicall_entry_t mcl[16];
- multicall_entry_t *mclp = mcl;
- int error;
-
- CTR2(KTR_PMAP, "pmap_qenter:sva=0x%x count=%d", va, count);
- pte = vtopte(sva);
- endpte = pte + count;
- while (pte < endpte) {
- pa = VM_PAGE_TO_MACH(*ma) | pgeflag | PG_RW | PG_V | PG_M | PG_A;
-
- mclp->op = __HYPERVISOR_update_va_mapping;
- mclp->args[0] = va;
- mclp->args[1] = (uint32_t)(pa & 0xffffffff);
- mclp->args[2] = (uint32_t)(pa >> 32);
- mclp->args[3] = (*pte & PG_V) ? UVMF_INVLPG|UVMF_ALL : 0;
-
- va += PAGE_SIZE;
- pte++;
- ma++;
- mclp++;
- mclcount++;
- if (mclcount == 16) {
- error = HYPERVISOR_multicall(mcl, mclcount);
- mclp = mcl;
- mclcount = 0;
- KASSERT(error == 0, ("bad multicall %d", error));
- }
- }
- if (mclcount) {
- error = HYPERVISOR_multicall(mcl, mclcount);
- KASSERT(error == 0, ("bad multicall %d", error));
- }
-
-#ifdef INVARIANTS
- for (pte = vtopte(sva), mclcount = 0; mclcount < count; mclcount++, pte++)
- KASSERT(*pte, ("pte not set for va=0x%x", sva + mclcount*PAGE_SIZE));
-#endif
-}
-
-/*
- * This routine tears out page mappings from the
- * kernel -- it is meant only for temporary mappings.
- * Note: SMP coherent. Uses a ranged shootdown IPI.
- */
-void
-pmap_qremove(vm_offset_t sva, int count)
-{
- vm_offset_t va;
-
- CTR2(KTR_PMAP, "pmap_qremove: sva=0x%x count=%d", sva, count);
- va = sva;
- rw_wlock(&pvh_global_lock);
- critical_enter();
- while (count-- > 0) {
- pmap_kremove(va);
- va += PAGE_SIZE;
- }
- PT_UPDATES_FLUSH();
- pmap_invalidate_range(kernel_pmap, sva, va);
- critical_exit();
- rw_wunlock(&pvh_global_lock);
-}
-
-/***************************************************
- * Page table page management routines.....
- ***************************************************/
-static __inline void
-pmap_free_zero_pages(vm_page_t free)
-{
- vm_page_t m;
-
- while (free != NULL) {
- m = free;
- free = (void *)m->object;
- m->object = NULL;
- vm_page_free_zero(m);
- }
-}
-
-/*
- * Decrements a page table page's wire count, which is used to record the
- * number of valid page table entries within the page. If the wire count
- * drops to zero, then the page table page is unmapped. Returns TRUE if the
- * page table page was unmapped and FALSE otherwise.
- */
-static inline boolean_t
-pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free)
-{
-
- --m->wire_count;
- if (m->wire_count == 0) {
- _pmap_unwire_ptp(pmap, m, free);
- return (TRUE);
- } else
- return (FALSE);
-}
-
-static void
-_pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free)
-{
- vm_offset_t pteva;
-
- PT_UPDATES_FLUSH();
- /*
- * unmap the page table page
- */
- xen_pt_unpin(pmap->pm_pdir[m->pindex]);
- /*
- * page *might* contain residual mapping :-/
- */
- PD_CLEAR_VA(pmap, m->pindex, TRUE);
- pmap_zero_page(m);
- --pmap->pm_stats.resident_count;
-
- /*
- * This is a release store so that the ordinary store unmapping
- * the page table page is globally performed before TLB shoot-
- * down is begun.
- */
- atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1);
-
- /*
- * Do an invltlb to make the invalidated mapping
- * take effect immediately.
- */
- pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex);
- pmap_invalidate_page(pmap, pteva);
-
- /*
- * Put page on a list so that it is released after
- * *ALL* TLB shootdown is done
- */
- m->object = (void *)*free;
- *free = m;
-}
-
-/*
- * After removing a page table entry, this routine is used to
- * conditionally free the page, and manage the hold/wire counts.
- */
-static int
-pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free)
-{
- pd_entry_t ptepde;
- vm_page_t mpte;
-
- if (va >= VM_MAXUSER_ADDRESS)
- return (0);
- ptepde = PT_GET(pmap_pde(pmap, va));
- mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
- return (pmap_unwire_ptp(pmap, mpte, free));
-}
-
-/*
- * Initialize the pmap for the swapper process.
- */
-void
-pmap_pinit0(pmap_t pmap)
-{
-
- PMAP_LOCK_INIT(pmap);
- /*
- * Since the page table directory is shared with the kernel pmap,
- * which is already included in the list "allpmaps", this pmap does
- * not need to be inserted into that list.
- */
- pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
-#ifdef PAE
- pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
-#endif
- CPU_ZERO(&pmap->pm_active);
- PCPU_SET(curpmap, pmap);
- TAILQ_INIT(&pmap->pm_pvchunk);
- bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
-}
-
-/*
- * Initialize a preallocated and zeroed pmap structure,
- * such as one in a vmspace structure.
- */
-int
-pmap_pinit(pmap_t pmap)
-{
- vm_page_t m, ptdpg[NPGPTD + 1];
- int npgptd = NPGPTD + 1;
- int i;
-
-#ifdef HAMFISTED_LOCKING
- mtx_lock(&createdelete_lock);
-#endif
-
- /*
- * No need to allocate page table space yet but we do need a valid
- * page directory table.
- */
- if (pmap->pm_pdir == NULL) {
- pmap->pm_pdir = (pd_entry_t *)kva_alloc(NBPTD);
- if (pmap->pm_pdir == NULL) {
-#ifdef HAMFISTED_LOCKING
- mtx_unlock(&createdelete_lock);
-#endif
- return (0);
- }
-#ifdef PAE
- pmap->pm_pdpt = (pd_entry_t *)kva_alloc(1);
-#endif
- }
-
- /*
- * allocate the page directory page(s)
- */
- for (i = 0; i < npgptd;) {
- m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
- VM_ALLOC_WIRED | VM_ALLOC_ZERO);
- if (m == NULL)
- VM_WAIT;
- else {
- ptdpg[i++] = m;
- }
- }
-
- pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
-
- for (i = 0; i < NPGPTD; i++)
- if ((ptdpg[i]->flags & PG_ZERO) == 0)
- pagezero(pmap->pm_pdir + (i * NPDEPG));
-
- mtx_lock_spin(&allpmaps_lock);
- LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
- /* Copy the kernel page table directory entries. */
- bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
- mtx_unlock_spin(&allpmaps_lock);
-
-#ifdef PAE
- pmap_qenter((vm_offset_t)pmap->pm_pdpt, &ptdpg[NPGPTD], 1);
- if ((ptdpg[NPGPTD]->flags & PG_ZERO) == 0)
- bzero(pmap->pm_pdpt, PAGE_SIZE);
- for (i = 0; i < NPGPTD; i++) {
- vm_paddr_t ma;
-
- ma = VM_PAGE_TO_MACH(ptdpg[i]);
- pmap->pm_pdpt[i] = ma | PG_V;
-
- }
-#endif
- for (i = 0; i < NPGPTD; i++) {
- pt_entry_t *pd;
- vm_paddr_t ma;
-
- ma = VM_PAGE_TO_MACH(ptdpg[i]);
- pd = pmap->pm_pdir + (i * NPDEPG);
- PT_SET_MA(pd, *vtopte((vm_offset_t)pd) & ~(PG_M|PG_A|PG_U|PG_RW));
-#if 0
- xen_pgd_pin(ma);
-#endif
- }
-
-#ifdef PAE
- PT_SET_MA(pmap->pm_pdpt, *vtopte((vm_offset_t)pmap->pm_pdpt) & ~PG_RW);
-#endif
- rw_wlock(&pvh_global_lock);
- xen_flush_queue();
- xen_pgdpt_pin(VM_PAGE_TO_MACH(ptdpg[NPGPTD]));
- for (i = 0; i < NPGPTD; i++) {
- vm_paddr_t ma = VM_PAGE_TO_MACH(ptdpg[i]);
- PT_SET_VA_MA(&pmap->pm_pdir[PTDPTDI + i], ma | PG_V | PG_A, FALSE);
- }
- xen_flush_queue();
- rw_wunlock(&pvh_global_lock);
- CPU_ZERO(&pmap->pm_active);
- TAILQ_INIT(&pmap->pm_pvchunk);
- bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
-
-#ifdef HAMFISTED_LOCKING
- mtx_unlock(&createdelete_lock);
-#endif
- return (1);
-}
-
-/*
- * this routine is called if the page table page is not
- * mapped correctly.
- */
-static vm_page_t
-_pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags)
-{
- vm_paddr_t ptema;
- vm_page_t m;
-
- /*
- * Allocate a page table page.
- */
- if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
- VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
- if ((flags & PMAP_ENTER_NOSLEEP) == 0) {
- PMAP_UNLOCK(pmap);
- rw_wunlock(&pvh_global_lock);
- VM_WAIT;
- rw_wlock(&pvh_global_lock);
- PMAP_LOCK(pmap);
- }
-
- /*
- * Indicate the need to retry. While waiting, the page table
- * page may have been allocated.
- */
- return (NULL);
- }
- if ((m->flags & PG_ZERO) == 0)
- pmap_zero_page(m);
-
- /*
- * Map the pagetable page into the process address space, if
- * it isn't already there.
- */
-
- pmap->pm_stats.resident_count++;
-
- ptema = VM_PAGE_TO_MACH(m);
- xen_pt_pin(ptema);
- PT_SET_VA_MA(&pmap->pm_pdir[ptepindex],
- (ptema | PG_U | PG_RW | PG_V | PG_A | PG_M), TRUE);
-
- KASSERT(pmap->pm_pdir[ptepindex],
- ("_pmap_allocpte: ptepindex=%d did not get mapped", ptepindex));
- return (m);
-}
-
-static vm_page_t
-pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags)
-{
- u_int ptepindex;
- pd_entry_t ptema;
- vm_page_t m;
-
- /*
- * Calculate pagetable page index
- */
- ptepindex = va >> PDRSHIFT;
-retry:
- /*
- * Get the page directory entry
- */
- ptema = pmap->pm_pdir[ptepindex];
-
- /*
- * This supports switching from a 4MB page to a
- * normal 4K page.
- */
- if (ptema & PG_PS) {
- /*
- * XXX
- */
- pmap->pm_pdir[ptepindex] = 0;
- ptema = 0;
- pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
- pmap_invalidate_all(kernel_pmap);
- }
-
- /*
- * If the page table page is mapped, we just increment the
- * hold count, and activate it.
- */
- if (ptema & PG_V) {
- m = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME);
- m->wire_count++;
- } else {
- /*
- * Here if the pte page isn't mapped, or if it has
- * been deallocated.
- */
- CTR3(KTR_PMAP, "pmap_allocpte: pmap=%p va=0x%08x flags=0x%x",
- pmap, va, flags);
- m = _pmap_allocpte(pmap, ptepindex, flags);
- if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0)
- goto retry;
-
- KASSERT(pmap->pm_pdir[ptepindex], ("ptepindex=%d did not get mapped", ptepindex));
- }
- return (m);
-}
-
-
-/***************************************************
-* Pmap allocation/deallocation routines.
- ***************************************************/
-
-
-/*
- * Release any resources held by the given physical map.
- * Called when a pmap initialized by pmap_pinit is being released.
- * Should only be called if the map contains no valid mappings.
- */
-void
-pmap_release(pmap_t pmap)
-{
- vm_page_t m, ptdpg[2*NPGPTD+1];
- vm_paddr_t ma;
- int i;
-#ifdef PAE
- int npgptd = NPGPTD + 1;
-#else
- int npgptd = NPGPTD;
-#endif
-
- KASSERT(pmap->pm_stats.resident_count == 0,
- ("pmap_release: pmap resident count %ld != 0",
- pmap->pm_stats.resident_count));
- PT_UPDATES_FLUSH();
-
-#ifdef HAMFISTED_LOCKING
- mtx_lock(&createdelete_lock);
-#endif
-
- KASSERT(CPU_EMPTY(&pmap->pm_active),
- ("releasing active pmap %p", pmap));
- mtx_lock_spin(&allpmaps_lock);
- LIST_REMOVE(pmap, pm_list);
- mtx_unlock_spin(&allpmaps_lock);
-
- for (i = 0; i < NPGPTD; i++)
- ptdpg[i] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdir + (i*NPDEPG)) & PG_FRAME);
- pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
-#ifdef PAE
- ptdpg[NPGPTD] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdpt));
-#endif
-
- for (i = 0; i < npgptd; i++) {
- m = ptdpg[i];
- ma = VM_PAGE_TO_MACH(m);
- /* unpinning L1 and L2 treated the same */
-#if 0
- xen_pgd_unpin(ma);
-#else
- if (i == NPGPTD)
- xen_pgd_unpin(ma);
-#endif
-#ifdef PAE
- if (i < NPGPTD)
- KASSERT(VM_PAGE_TO_MACH(m) == (pmap->pm_pdpt[i] & PG_FRAME),
- ("pmap_release: got wrong ptd page"));
-#endif
- m->wire_count--;
- atomic_subtract_int(&vm_cnt.v_wire_count, 1);
- vm_page_free(m);
- }
-#ifdef PAE
- pmap_qremove((vm_offset_t)pmap->pm_pdpt, 1);
-#endif
-
-#ifdef HAMFISTED_LOCKING
- mtx_unlock(&createdelete_lock);
-#endif
-}
-
-static int
-kvm_size(SYSCTL_HANDLER_ARGS)
-{
- unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
-
- return (sysctl_handle_long(oidp, &ksize, 0, req));
-}
-SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD,
- 0, 0, kvm_size, "IU", "Size of KVM");
-
-static int
-kvm_free(SYSCTL_HANDLER_ARGS)
-{
- unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
-
- return (sysctl_handle_long(oidp, &kfree, 0, req));
-}
-SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD,
- 0, 0, kvm_free, "IU", "Amount of KVM free");
-
-/*
- * grow the number of kernel page table entries, if needed
- */
-void
-pmap_growkernel(vm_offset_t addr)
-{
- struct pmap *pmap;
- vm_paddr_t ptppaddr;
- vm_page_t nkpg;
- pd_entry_t newpdir;
-
- mtx_assert(&kernel_map->system_mtx, MA_OWNED);
- if (kernel_vm_end == 0) {
- kernel_vm_end = KERNBASE;
- nkpt = 0;
- while (pdir_pde(PTD, kernel_vm_end)) {
- kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
- nkpt++;
- if (kernel_vm_end - 1 >= kernel_map->max_offset) {
- kernel_vm_end = kernel_map->max_offset;
- break;
- }
- }
- }
- addr = roundup2(addr, NBPDR);
- if (addr - 1 >= kernel_map->max_offset)
- addr = kernel_map->max_offset;
- while (kernel_vm_end < addr) {
- if (pdir_pde(PTD, kernel_vm_end)) {
- kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
- if (kernel_vm_end - 1 >= kernel_map->max_offset) {
- kernel_vm_end = kernel_map->max_offset;
- break;
- }
- continue;
- }
-
- nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT,
- VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
- VM_ALLOC_ZERO);
- if (nkpg == NULL)
- panic("pmap_growkernel: no memory to grow kernel");
-
- nkpt++;
-
- if ((nkpg->flags & PG_ZERO) == 0)
- pmap_zero_page(nkpg);
- ptppaddr = VM_PAGE_TO_PHYS(nkpg);
- newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
- rw_wlock(&pvh_global_lock);
- PD_SET_VA(kernel_pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE);
- mtx_lock_spin(&allpmaps_lock);
- LIST_FOREACH(pmap, &allpmaps, pm_list)
- PD_SET_VA(pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE);
-
- mtx_unlock_spin(&allpmaps_lock);
- rw_wunlock(&pvh_global_lock);
-
- kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
- if (kernel_vm_end - 1 >= kernel_map->max_offset) {
- kernel_vm_end = kernel_map->max_offset;
- break;
- }
- }
-}
-
-
-/***************************************************
- * page management routines.
- ***************************************************/
-
-CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
-CTASSERT(_NPCM == 11);
-CTASSERT(_NPCPV == 336);
-
-static __inline struct pv_chunk *
-pv_to_chunk(pv_entry_t pv)
-{
-
- return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
-}
-
-#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
-
-#define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */
-#define PC_FREE10 0x0000fffful /* Free values for index 10 */
-
-static const uint32_t pc_freemask[_NPCM] = {
- PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
- PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
- PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
- PC_FREE0_9, PC_FREE10
-};
-
-SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
- "Current number of pv entries");
-
-#ifdef PV_STATS
-static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
-
-SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
- "Current number of pv entry chunks");
-SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
- "Current number of pv entry chunks allocated");
-SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
- "Current number of pv entry chunks frees");
-SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
- "Number of times tried to get a chunk page but failed.");
-
-static long pv_entry_frees, pv_entry_allocs;
-static int pv_entry_spare;
-
-SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
- "Current number of pv entry frees");
-SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
- "Current number of pv entry allocs");
-SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
- "Current number of spare pv entries");
-#endif
-
-/*
- * We are in a serious low memory condition. Resort to
- * drastic measures to free some pages so we can allocate
- * another pv entry chunk.
- */
-static vm_page_t
-pmap_pv_reclaim(pmap_t locked_pmap)
-{
- struct pch newtail;
- struct pv_chunk *pc;
- pmap_t pmap;
- pt_entry_t *pte, tpte;
- pv_entry_t pv;
- vm_offset_t va;
- vm_page_t free, m, m_pc;
- uint32_t inuse;
- int bit, field, freed;
-
- PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
- pmap = NULL;
- free = m_pc = NULL;
- TAILQ_INIT(&newtail);
- while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 ||
- free == NULL)) {
- TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
- if (pmap != pc->pc_pmap) {
- if (pmap != NULL) {
- pmap_invalidate_all(pmap);
- if (pmap != locked_pmap)
- PMAP_UNLOCK(pmap);
- }
- pmap = pc->pc_pmap;
- /* Avoid deadlock and lock recursion. */
- if (pmap > locked_pmap)
- PMAP_LOCK(pmap);
- else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
- pmap = NULL;
- TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
- continue;
- }
- }
-
- /*
- * Destroy every non-wired, 4 KB page mapping in the chunk.
- */
- freed = 0;
- for (field = 0; field < _NPCM; field++) {
- for (inuse = ~pc->pc_map[field] & pc_freemask[field];
- inuse != 0; inuse &= ~(1UL << bit)) {
- bit = bsfl(inuse);
- pv = &pc->pc_pventry[field * 32 + bit];
- va = pv->pv_va;
- pte = pmap_pte(pmap, va);
- tpte = *pte;
- if ((tpte & PG_W) == 0)
- tpte = pte_load_clear(pte);
- pmap_pte_release(pte);
- if ((tpte & PG_W) != 0)
- continue;
- KASSERT(tpte != 0,
- ("pmap_pv_reclaim: pmap %p va %x zero pte",
- pmap, va));
- if ((tpte & PG_G) != 0)
- pmap_invalidate_page(pmap, va);
- m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
- if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
- vm_page_dirty(m);
- if ((tpte & PG_A) != 0)
- vm_page_aflag_set(m, PGA_REFERENCED);
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
- if (TAILQ_EMPTY(&m->md.pv_list))
- vm_page_aflag_clear(m, PGA_WRITEABLE);
- pc->pc_map[field] |= 1UL << bit;
- pmap_unuse_pt(pmap, va, &free);
- freed++;
- }
- }
- if (freed == 0) {
- TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
- continue;
- }
- /* Every freed mapping is for a 4 KB page. */
- pmap->pm_stats.resident_count -= freed;
- PV_STAT(pv_entry_frees += freed);
- PV_STAT(pv_entry_spare += freed);
- pv_entry_count -= freed;
- TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- for (field = 0; field < _NPCM; field++)
- if (pc->pc_map[field] != pc_freemask[field]) {
- TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
- pc_list);
- TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
-
- /*
- * One freed pv entry in locked_pmap is
- * sufficient.
- */
- if (pmap == locked_pmap)
- goto out;
- break;
- }
- if (field == _NPCM) {
- PV_STAT(pv_entry_spare -= _NPCPV);
- PV_STAT(pc_chunk_count--);
- PV_STAT(pc_chunk_frees++);
- /* Entire chunk is free; return it. */
- m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
- pmap_qremove((vm_offset_t)pc, 1);
- pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
- break;
- }
- }
-out:
- TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
- if (pmap != NULL) {
- pmap_invalidate_all(pmap);
- if (pmap != locked_pmap)
- PMAP_UNLOCK(pmap);
- }
- if (m_pc == NULL && pv_vafree != 0 && free != NULL) {
- m_pc = free;
- free = (void *)m_pc->object;
- /* Recycle a freed page table page. */
- m_pc->wire_count = 1;
- atomic_add_int(&vm_cnt.v_wire_count, 1);
- }
- pmap_free_zero_pages(free);
- return (m_pc);
-}
-
-/*
- * free the pv_entry back to the free list
- */
-static void
-free_pv_entry(pmap_t pmap, pv_entry_t pv)
-{
- struct pv_chunk *pc;
- int idx, field, bit;
-
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- PV_STAT(pv_entry_frees++);
- PV_STAT(pv_entry_spare++);
- pv_entry_count--;
- pc = pv_to_chunk(pv);
- idx = pv - &pc->pc_pventry[0];
- field = idx / 32;
- bit = idx % 32;
- pc->pc_map[field] |= 1ul << bit;
- for (idx = 0; idx < _NPCM; idx++)
- if (pc->pc_map[idx] != pc_freemask[idx]) {
- /*
- * 98% of the time, pc is already at the head of the
- * list. If it isn't already, move it to the head.
- */
- if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
- pc)) {
- TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
- pc_list);
- }
- return;
- }
- TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- free_pv_chunk(pc);
-}
-
-static void
-free_pv_chunk(struct pv_chunk *pc)
-{
- vm_page_t m;
-
- TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
- PV_STAT(pv_entry_spare -= _NPCPV);
- PV_STAT(pc_chunk_count--);
- PV_STAT(pc_chunk_frees++);
- /* entire chunk is free, return it */
- m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
- pmap_qremove((vm_offset_t)pc, 1);
- vm_page_unwire(m, PQ_INACTIVE);
- vm_page_free(m);
- pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
-}
-
-/*
- * get a new pv_entry, allocating a block from the system
- * when needed.
- */
-static pv_entry_t
-get_pv_entry(pmap_t pmap, boolean_t try)
-{
- static const struct timeval printinterval = { 60, 0 };
- static struct timeval lastprint;
- int bit, field;
- pv_entry_t pv;
- struct pv_chunk *pc;
- vm_page_t m;
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- PV_STAT(pv_entry_allocs++);
- pv_entry_count++;
- if (pv_entry_count > pv_entry_high_water)
- if (ratecheck(&lastprint, &printinterval))
- printf("Approaching the limit on PV entries, consider "
- "increasing either the vm.pmap.shpgperproc or the "
- "vm.pmap.pv_entry_max tunable.\n");
-retry:
- pc = TAILQ_FIRST(&pmap->pm_pvchunk);
- if (pc != NULL) {
- for (field = 0; field < _NPCM; field++) {
- if (pc->pc_map[field]) {
- bit = bsfl(pc->pc_map[field]);
- break;
- }
- }
- if (field < _NPCM) {
- pv = &pc->pc_pventry[field * 32 + bit];
- pc->pc_map[field] &= ~(1ul << bit);
- /* If this was the last item, move it to tail */
- for (field = 0; field < _NPCM; field++)
- if (pc->pc_map[field] != 0) {
- PV_STAT(pv_entry_spare--);
- return (pv); /* not full, return */
- }
- TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
- PV_STAT(pv_entry_spare--);
- return (pv);
- }
- }
- /*
- * Access to the ptelist "pv_vafree" is synchronized by the page
- * queues lock. If "pv_vafree" is currently non-empty, it will
- * remain non-empty until pmap_ptelist_alloc() completes.
- */
- if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
- VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
- if (try) {
- pv_entry_count--;
- PV_STAT(pc_chunk_tryfail++);
- return (NULL);
- }
- m = pmap_pv_reclaim(pmap);
- if (m == NULL)
- goto retry;
- }
- PV_STAT(pc_chunk_count++);
- PV_STAT(pc_chunk_allocs++);
- pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree);
- pmap_qenter((vm_offset_t)pc, &m, 1);
- if ((m->flags & PG_ZERO) == 0)
- pagezero(pc);
- pc->pc_pmap = pmap;
- pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */
- for (field = 1; field < _NPCM; field++)
- pc->pc_map[field] = pc_freemask[field];
- TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
- pv = &pc->pc_pventry[0];
- TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
- PV_STAT(pv_entry_spare += _NPCPV - 1);
- return (pv);
-}
-
-static __inline pv_entry_t
-pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
-{
- pv_entry_t pv;
-
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
- if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
- TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
- break;
- }
- }
- return (pv);
-}
-
-static void
-pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
-{
- pv_entry_t pv;
-
- pv = pmap_pvh_remove(pvh, pmap, va);
- KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
- free_pv_entry(pmap, pv);
-}
-
-static void
-pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
-{
-
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- pmap_pvh_free(&m->md, pmap, va);
- if (TAILQ_EMPTY(&m->md.pv_list))
- vm_page_aflag_clear(m, PGA_WRITEABLE);
-}
-
-/*
- * Conditionally create a pv entry.
- */
-static boolean_t
-pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
-{
- pv_entry_t pv;
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- if (pv_entry_count < pv_entry_high_water &&
- (pv = get_pv_entry(pmap, TRUE)) != NULL) {
- pv->pv_va = va;
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
- return (TRUE);
- } else
- return (FALSE);
-}
-
-/*
- * pmap_remove_pte: do the things to unmap a page in a process
- */
-static int
-pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free)
-{
- pt_entry_t oldpte;
- vm_page_t m;
-
- CTR3(KTR_PMAP, "pmap_remove_pte: pmap=%p *ptq=0x%x va=0x%x",
- pmap, (u_long)*ptq, va);
-
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- oldpte = *ptq;
- PT_SET_VA_MA(ptq, 0, TRUE);
- KASSERT(oldpte != 0,
- ("pmap_remove_pte: pmap %p va %x zero pte", pmap, va));
- if (oldpte & PG_W)
- pmap->pm_stats.wired_count -= 1;
- /*
- * Machines that don't support invlpg, also don't support
- * PG_G.
- */
- if (oldpte & PG_G)
- pmap_invalidate_page(kernel_pmap, va);
- pmap->pm_stats.resident_count -= 1;
- if (oldpte & PG_MANAGED) {
- m = PHYS_TO_VM_PAGE(xpmap_mtop(oldpte) & PG_FRAME);
- if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
- vm_page_dirty(m);
- if (oldpte & PG_A)
- vm_page_aflag_set(m, PGA_REFERENCED);
- pmap_remove_entry(pmap, m, va);
- }
- return (pmap_unuse_pt(pmap, va, free));
-}
-
-/*
- * Remove a single page from a process address space
- */
-static void
-pmap_remove_page(pmap_t pmap, vm_offset_t va, vm_page_t *free)
-{
- pt_entry_t *pte;
-
- CTR2(KTR_PMAP, "pmap_remove_page: pmap=%p va=0x%x",
- pmap, va);
-
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- if ((pte = pmap_pte_quick(pmap, va)) == NULL || (*pte & PG_V) == 0)
- return;
- pmap_remove_pte(pmap, pte, va, free);
- pmap_invalidate_page(pmap, va);
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
-
-}
-
-/*
- * Remove the given range of addresses from the specified map.
- *
- * It is assumed that the start and end are properly
- * rounded to the page size.
- */
-void
-pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
- vm_offset_t pdnxt;
- pd_entry_t ptpaddr;
- pt_entry_t *pte;
- vm_page_t free = NULL;
- int anyvalid;
-
- CTR3(KTR_PMAP, "pmap_remove: pmap=%p sva=0x%x eva=0x%x",
- pmap, sva, eva);
-
- /*
- * Perform an unsynchronized read. This is, however, safe.
- */
- if (pmap->pm_stats.resident_count == 0)
- return;
-
- anyvalid = 0;
-
- rw_wlock(&pvh_global_lock);
- sched_pin();
- PMAP_LOCK(pmap);
-
- /*
- * special handling of removing one page. a very
- * common operation and easy to short circuit some
- * code.
- */
- if ((sva + PAGE_SIZE == eva) &&
- ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
- pmap_remove_page(pmap, sva, &free);
- goto out;
- }
-
- for (; sva < eva; sva = pdnxt) {
- u_int pdirindex;
-
- /*
- * Calculate index for next page table.
- */
- pdnxt = (sva + NBPDR) & ~PDRMASK;
- if (pdnxt < sva)
- pdnxt = eva;
- if (pmap->pm_stats.resident_count == 0)
- break;
-
- pdirindex = sva >> PDRSHIFT;
- ptpaddr = pmap->pm_pdir[pdirindex];
-
- /*
- * Weed out invalid mappings. Note: we assume that the page
- * directory table is always allocated, and in kernel virtual.
- */
- if (ptpaddr == 0)
- continue;
-
- /*
- * Check for large page.
- */
- if ((ptpaddr & PG_PS) != 0) {
- PD_CLEAR_VA(pmap, pdirindex, TRUE);
- pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
- anyvalid = 1;
- continue;
- }
-
- /*
- * Limit our scan to either the end of the va represented
- * by the current page table page, or to the end of the
- * range being removed.
- */
- if (pdnxt > eva)
- pdnxt = eva;
-
- for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
- sva += PAGE_SIZE) {
- if ((*pte & PG_V) == 0)
- continue;
-
- /*
- * The TLB entry for a PG_G mapping is invalidated
- * by pmap_remove_pte().
- */
- if ((*pte & PG_G) == 0)
- anyvalid = 1;
- if (pmap_remove_pte(pmap, pte, sva, &free))
- break;
- }
- }
- PT_UPDATES_FLUSH();
- if (*PMAP1)
- PT_SET_VA_MA(PMAP1, 0, TRUE);
-out:
- if (anyvalid)
- pmap_invalidate_all(pmap);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
- pmap_free_zero_pages(free);
-}
-
-/*
- * Routine: pmap_remove_all
- * Function:
- * Removes this physical page from
- * all physical maps in which it resides.
- * Reflects back modify bits to the pager.
- *
- * Notes:
- * Original versions of this routine were very
- * inefficient because they iteratively called
- * pmap_remove (slow...)
- */
-
-void
-pmap_remove_all(vm_page_t m)
-{
- pv_entry_t pv;
- pmap_t pmap;
- pt_entry_t *pte, tpte;
- vm_page_t free;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_remove_all: page %p is not managed", m));
- free = NULL;
- rw_wlock(&pvh_global_lock);
- sched_pin();
- while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pmap->pm_stats.resident_count--;
- pte = pmap_pte_quick(pmap, pv->pv_va);
- tpte = *pte;
- PT_SET_VA_MA(pte, 0, TRUE);
- KASSERT(tpte != 0, ("pmap_remove_all: pmap %p va %x zero pte",
- pmap, pv->pv_va));
- if (tpte & PG_W)
- pmap->pm_stats.wired_count--;
- if (tpte & PG_A)
- vm_page_aflag_set(m, PGA_REFERENCED);
-
- /*
- * Update the vm_page_t clean and reference bits.
- */
- if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
- vm_page_dirty(m);
- pmap_unuse_pt(pmap, pv->pv_va, &free);
- pmap_invalidate_page(pmap, pv->pv_va);
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
- free_pv_entry(pmap, pv);
- PMAP_UNLOCK(pmap);
- }
- vm_page_aflag_clear(m, PGA_WRITEABLE);
- PT_UPDATES_FLUSH();
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- pmap_free_zero_pages(free);
-}
-
-/*
- * Set the physical protection on the
- * specified range of this map as requested.
- */
-void
-pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
-{
- vm_offset_t pdnxt;
- pd_entry_t ptpaddr;
- pt_entry_t *pte;
- int anychanged;
-
- CTR4(KTR_PMAP, "pmap_protect: pmap=%p sva=0x%x eva=0x%x prot=0x%x",
- pmap, sva, eva, prot);
-
- if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
- pmap_remove(pmap, sva, eva);
- return;
- }
-
-#ifdef PAE
- if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
- (VM_PROT_WRITE|VM_PROT_EXECUTE))
- return;
-#else
- if (prot & VM_PROT_WRITE)
- return;
-#endif
-
- anychanged = 0;
-
- rw_wlock(&pvh_global_lock);
- sched_pin();
- PMAP_LOCK(pmap);
- for (; sva < eva; sva = pdnxt) {
- pt_entry_t obits, pbits;
- u_int pdirindex;
-
- pdnxt = (sva + NBPDR) & ~PDRMASK;
- if (pdnxt < sva)
- pdnxt = eva;
-
- pdirindex = sva >> PDRSHIFT;
- ptpaddr = pmap->pm_pdir[pdirindex];
-
- /*
- * Weed out invalid mappings. Note: we assume that the page
- * directory table is always allocated, and in kernel virtual.
- */
- if (ptpaddr == 0)
- continue;
-
- /*
- * Check for large page.
- */
- if ((ptpaddr & PG_PS) != 0) {
- if ((prot & VM_PROT_WRITE) == 0)
- pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
-#ifdef PAE
- if ((prot & VM_PROT_EXECUTE) == 0)
- pmap->pm_pdir[pdirindex] |= pg_nx;
-#endif
- anychanged = 1;
- continue;
- }
-
- if (pdnxt > eva)
- pdnxt = eva;
-
- for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
- sva += PAGE_SIZE) {
- vm_page_t m;
-
-retry:
- /*
- * Regardless of whether a pte is 32 or 64 bits in
- * size, PG_RW, PG_A, and PG_M are among the least
- * significant 32 bits.
- */
- obits = pbits = *pte;
- if ((pbits & PG_V) == 0)
- continue;
-
- if ((prot & VM_PROT_WRITE) == 0) {
- if ((pbits & (PG_MANAGED | PG_M | PG_RW)) ==
- (PG_MANAGED | PG_M | PG_RW)) {
- m = PHYS_TO_VM_PAGE(xpmap_mtop(pbits) &
- PG_FRAME);
- vm_page_dirty(m);
- }
- pbits &= ~(PG_RW | PG_M);
- }
-#ifdef PAE
- if ((prot & VM_PROT_EXECUTE) == 0)
- pbits |= pg_nx;
-#endif
-
- if (pbits != obits) {
- obits = *pte;
- PT_SET_VA_MA(pte, pbits, TRUE);
- if (*pte != pbits)
- goto retry;
- if (obits & PG_G)
- pmap_invalidate_page(pmap, sva);
- else
- anychanged = 1;
- }
- }
- }
- PT_UPDATES_FLUSH();
- if (*PMAP1)
- PT_SET_VA_MA(PMAP1, 0, TRUE);
- if (anychanged)
- pmap_invalidate_all(pmap);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
-}
-
-/*
- * Insert the given physical page (p) at
- * the specified virtual address (v) in the
- * target physical map with the protection requested.
- *
- * If specified, the page will be wired down, meaning
- * that the related pte can not be reclaimed.
- *
- * NB: This is the only routine which MAY NOT lazy-evaluate
- * or lose information. That is, this routine must actually
- * insert this page into the given map NOW.
- */
-int
-pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
- u_int flags, int8_t psind __unused)
-{
- pd_entry_t *pde;
- pt_entry_t *pte;
- pt_entry_t newpte, origpte;
- pv_entry_t pv;
- vm_paddr_t opa, pa;
- vm_page_t mpte, om;
- boolean_t invlva, wired;
-
- CTR5(KTR_PMAP,
- "pmap_enter: pmap=%08p va=0x%08x ma=0x%08x prot=0x%x flags=0x%x",
- pmap, va, VM_PAGE_TO_MACH(m), prot, flags);
- va = trunc_page(va);
- KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
- KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
- ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)",
- va));
- if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
- VM_OBJECT_ASSERT_LOCKED(m->object);
-
- mpte = NULL;
- wired = (flags & PMAP_ENTER_WIRED) != 0;
-
- rw_wlock(&pvh_global_lock);
- PMAP_LOCK(pmap);
- sched_pin();
-
- /*
- * In the case that a page table page is not
- * resident, we are creating it here.
- */
- if (va < VM_MAXUSER_ADDRESS) {
- mpte = pmap_allocpte(pmap, va, flags);
- if (mpte == NULL) {
- KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0,
- ("pmap_allocpte failed with sleep allowed"));
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
- return (KERN_RESOURCE_SHORTAGE);
- }
- }
-
- pde = pmap_pde(pmap, va);
- if ((*pde & PG_PS) != 0)
- panic("pmap_enter: attempted pmap_enter on 4MB page");
- pte = pmap_pte_quick(pmap, va);
-
- /*
- * Page Directory table entry not valid, we need a new PT page
- */
- if (pte == NULL) {
- panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x",
- (uintmax_t)pmap->pm_pdir[va >> PDRSHIFT], va);
- }
-
- pa = VM_PAGE_TO_PHYS(m);
- om = NULL;
- opa = origpte = 0;
-
-#if 0
- KASSERT((*pte & PG_V) || (*pte == 0), ("address set but not valid pte=%p *pte=0x%016jx",
- pte, *pte));
-#endif
- origpte = *pte;
- if (origpte)
- origpte = xpmap_mtop(origpte);
- opa = origpte & PG_FRAME;
-
- /*
- * Mapping has not changed, must be protection or wiring change.
- */
- if (origpte && (opa == pa)) {
- /*
- * Wiring change, just update stats. We don't worry about
- * wiring PT pages as they remain resident as long as there
- * are valid mappings in them. Hence, if a user page is wired,
- * the PT page will be also.
- */
- if (wired && ((origpte & PG_W) == 0))
- pmap->pm_stats.wired_count++;
- else if (!wired && (origpte & PG_W))
- pmap->pm_stats.wired_count--;
-
- /*
- * Remove extra pte reference
- */
- if (mpte)
- mpte->wire_count--;
-
- if (origpte & PG_MANAGED) {
- om = m;
- pa |= PG_MANAGED;
- }
- goto validate;
- }
-
- pv = NULL;
-
- /*
- * Mapping has changed, invalidate old range and fall through to
- * handle validating new mapping.
- */
- if (opa) {
- if (origpte & PG_W)
- pmap->pm_stats.wired_count--;
- if (origpte & PG_MANAGED) {
- om = PHYS_TO_VM_PAGE(opa);
- pv = pmap_pvh_remove(&om->md, pmap, va);
- } else if (va < VM_MAXUSER_ADDRESS)
- printf("va=0x%x is unmanaged :-( \n", va);
-
- if (mpte != NULL) {
- mpte->wire_count--;
- KASSERT(mpte->wire_count > 0,
- ("pmap_enter: missing reference to page table page,"
- " va: 0x%x", va));
- }
- } else
- pmap->pm_stats.resident_count++;
-
- /*
- * Enter on the PV list if part of our managed memory.
- */
- if ((m->oflags & VPO_UNMANAGED) == 0) {
- KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
- ("pmap_enter: managed mapping within the clean submap"));
- if (pv == NULL)
- pv = get_pv_entry(pmap, FALSE);
- pv->pv_va = va;
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
- pa |= PG_MANAGED;
- } else if (pv != NULL)
- free_pv_entry(pmap, pv);
-
- /*
- * Increment counters
- */
- if (wired)
- pmap->pm_stats.wired_count++;
-
-validate:
- /*
- * Now validate mapping with desired protection/wiring.
- */
- newpte = (pt_entry_t)(pa | PG_V);
- if ((prot & VM_PROT_WRITE) != 0) {
- newpte |= PG_RW;
- if ((newpte & PG_MANAGED) != 0)
- vm_page_aflag_set(m, PGA_WRITEABLE);
- }
-#ifdef PAE
- if ((prot & VM_PROT_EXECUTE) == 0)
- newpte |= pg_nx;
-#endif
- if (wired)
- newpte |= PG_W;
- if (va < VM_MAXUSER_ADDRESS)
- newpte |= PG_U;
- if (pmap == kernel_pmap)
- newpte |= pgeflag;
-
- critical_enter();
- /*
- * if the mapping or permission bits are different, we need
- * to update the pte.
- */
- if ((origpte & ~(PG_M|PG_A)) != newpte) {
- if (origpte) {
- invlva = FALSE;
- origpte = *pte;
- PT_SET_VA(pte, newpte | PG_A, FALSE);
- if (origpte & PG_A) {
- if (origpte & PG_MANAGED)
- vm_page_aflag_set(om, PGA_REFERENCED);
- if (opa != VM_PAGE_TO_PHYS(m))
- invlva = TRUE;
-#ifdef PAE
- if ((origpte & PG_NX) == 0 &&
- (newpte & PG_NX) != 0)
- invlva = TRUE;
-#endif
- }
- if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
- if ((origpte & PG_MANAGED) != 0)
- vm_page_dirty(om);
- if ((prot & VM_PROT_WRITE) == 0)
- invlva = TRUE;
- }
- if ((origpte & PG_MANAGED) != 0 &&
- TAILQ_EMPTY(&om->md.pv_list))
- vm_page_aflag_clear(om, PGA_WRITEABLE);
- if (invlva)
- pmap_invalidate_page(pmap, va);
- } else{
- PT_SET_VA(pte, newpte | PG_A, FALSE);
- }
-
- }
- PT_UPDATES_FLUSH();
- critical_exit();
- if (*PMAP1)
- PT_SET_VA_MA(PMAP1, 0, TRUE);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
- return (KERN_SUCCESS);
-}
-
-/*
- * Maps a sequence of resident pages belonging to the same object.
- * The sequence begins with the given page m_start. This page is
- * mapped at the given virtual address start. Each subsequent page is
- * mapped at a virtual address that is offset from start by the same
- * amount as the page is offset from m_start within the object. The
- * last page in the sequence is the page with the largest offset from
- * m_start that can be mapped at a virtual address less than the given
- * virtual address end. Not every virtual page between start and end
- * is mapped; only those for which a resident page exists with the
- * corresponding offset from m_start are mapped.
- */
-void
-pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
- vm_page_t m_start, vm_prot_t prot)
-{
- vm_page_t m, mpte;
- vm_pindex_t diff, psize;
- multicall_entry_t mcl[16];
- multicall_entry_t *mclp = mcl;
- int error, count = 0;
-
- VM_OBJECT_ASSERT_LOCKED(m_start->object);
-
- psize = atop(end - start);
- mpte = NULL;
- m = m_start;
- rw_wlock(&pvh_global_lock);
- PMAP_LOCK(pmap);
- while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
- mpte = pmap_enter_quick_locked(&mclp, &count, pmap, start + ptoa(diff), m,
- prot, mpte);
- m = TAILQ_NEXT(m, listq);
- if (count == 16) {
- error = HYPERVISOR_multicall(mcl, count);
- KASSERT(error == 0, ("bad multicall %d", error));
- mclp = mcl;
- count = 0;
- }
- }
- if (count) {
- error = HYPERVISOR_multicall(mcl, count);
- KASSERT(error == 0, ("bad multicall %d", error));
- }
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
-}
-
-/*
- * this code makes some *MAJOR* assumptions:
- * 1. Current pmap & pmap exists.
- * 2. Not wired.
- * 3. Read access.
- * 4. No page table pages.
- * but is *MUCH* faster than pmap_enter...
- */
-
-void
-pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
-{
- multicall_entry_t mcl, *mclp;
- int count = 0;
- mclp = &mcl;
-
- CTR4(KTR_PMAP, "pmap_enter_quick: pmap=%p va=0x%x m=%p prot=0x%x",
- pmap, va, m, prot);
-
- rw_wlock(&pvh_global_lock);
- PMAP_LOCK(pmap);
- (void)pmap_enter_quick_locked(&mclp, &count, pmap, va, m, prot, NULL);
- if (count)
- HYPERVISOR_multicall(&mcl, count);
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
-}
-
-#ifdef notyet
-void
-pmap_enter_quick_range(pmap_t pmap, vm_offset_t *addrs, vm_page_t *pages, vm_prot_t *prots, int count)
-{
- int i, error, index = 0;
- multicall_entry_t mcl[16];
- multicall_entry_t *mclp = mcl;
-
- PMAP_LOCK(pmap);
- for (i = 0; i < count; i++, addrs++, pages++, prots++) {
- if (!pmap_is_prefaultable_locked(pmap, *addrs))
- continue;
-
- (void) pmap_enter_quick_locked(&mclp, &index, pmap, *addrs, *pages, *prots, NULL);
- if (index == 16) {
- error = HYPERVISOR_multicall(mcl, index);
- mclp = mcl;
- index = 0;
- KASSERT(error == 0, ("bad multicall %d", error));
- }
- }
- if (index) {
- error = HYPERVISOR_multicall(mcl, index);
- KASSERT(error == 0, ("bad multicall %d", error));
- }
-
- PMAP_UNLOCK(pmap);
-}
-#endif
-
-static vm_page_t
-pmap_enter_quick_locked(multicall_entry_t **mclpp, int *count, pmap_t pmap, vm_offset_t va, vm_page_t m,
- vm_prot_t prot, vm_page_t mpte)
-{
- pt_entry_t *pte;
- vm_paddr_t pa;
- vm_page_t free;
- multicall_entry_t *mcl = *mclpp;
-
- KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
- (m->oflags & VPO_UNMANAGED) != 0,
- ("pmap_enter_quick_locked: managed mapping within the clean submap"));
- rw_assert(&pvh_global_lock, RA_WLOCKED);
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-
- /*
- * In the case that a page table page is not
- * resident, we are creating it here.
- */
- if (va < VM_MAXUSER_ADDRESS) {
- u_int ptepindex;
- pd_entry_t ptema;
-
- /*
- * Calculate pagetable page index
- */
- ptepindex = va >> PDRSHIFT;
- if (mpte && (mpte->pindex == ptepindex)) {
- mpte->wire_count++;
- } else {
- /*
- * Get the page directory entry
- */
- ptema = pmap->pm_pdir[ptepindex];
-
- /*
- * If the page table page is mapped, we just increment
- * the hold count, and activate it.
- */
- if (ptema & PG_V) {
- if (ptema & PG_PS)
- panic("pmap_enter_quick: unexpected mapping into 4MB page");
- mpte = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME);
- mpte->wire_count++;
- } else {
- mpte = _pmap_allocpte(pmap, ptepindex,
- PMAP_ENTER_NOSLEEP);
- if (mpte == NULL)
- return (mpte);
- }
- }
- } else {
- mpte = NULL;
- }
-
- /*
- * This call to vtopte makes the assumption that we are
- * entering the page into the current pmap. In order to support
- * quick entry into any pmap, one would likely use pmap_pte_quick.
- * But that isn't as quick as vtopte.
- */
- KASSERT(pmap_is_current(pmap), ("entering pages in non-current pmap"));
- pte = vtopte(va);
- if (*pte & PG_V) {
- if (mpte != NULL) {
- mpte->wire_count--;
- mpte = NULL;
- }
- return (mpte);
- }
-
- /*
- * Enter on the PV list if part of our managed memory.
- */
- if ((m->oflags & VPO_UNMANAGED) == 0 &&
- !pmap_try_insert_pv_entry(pmap, va, m)) {
- if (mpte != NULL) {
- free = NULL;
- if (pmap_unwire_ptp(pmap, mpte, &free)) {
- pmap_invalidate_page(pmap, va);
- pmap_free_zero_pages(free);
- }
-
- mpte = NULL;
- }
- return (mpte);
- }
-
- /*
- * Increment counters
- */
- pmap->pm_stats.resident_count++;
-
- pa = VM_PAGE_TO_PHYS(m);
-#ifdef PAE
- if ((prot & VM_PROT_EXECUTE) == 0)
- pa |= pg_nx;
-#endif
-
-#if 0
- /*
- * Now validate mapping with RO protection
- */
- if ((m->oflags & VPO_UNMANAGED) != 0)
- pte_store(pte, pa | PG_V | PG_U);
- else
- pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
-#else
- /*
- * Now validate mapping with RO protection
- */
- if ((m->oflags & VPO_UNMANAGED) != 0)
- pa = xpmap_ptom(pa | PG_V | PG_U);
- else
- pa = xpmap_ptom(pa | PG_V | PG_U | PG_MANAGED);
-
- mcl->op = __HYPERVISOR_update_va_mapping;
- mcl->args[0] = va;
- mcl->args[1] = (uint32_t)(pa & 0xffffffff);
- mcl->args[2] = (uint32_t)(pa >> 32);
- mcl->args[3] = 0;
- *mclpp = mcl + 1;
- *count = *count + 1;
-#endif
- return (mpte);
-}
-
-/*
- * Make a temporary mapping for a physical address. This is only intended
- * to be used for panic dumps.
- */
-void *
-pmap_kenter_temporary(vm_paddr_t pa, int i)
-{
- vm_offset_t va;
- vm_paddr_t ma = xpmap_ptom(pa);
-
- va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
- PT_SET_MA(va, (ma & ~PAGE_MASK) | PG_V | pgeflag);
- invlpg(va);
- return ((void *)crashdumpmap);
-}
-
-/*
- * This code maps large physical mmap regions into the
- * processor address space. Note that some shortcuts
- * are taken, but the code works.
- */
-void
-pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
- vm_pindex_t pindex, vm_size_t size)
-{
- pd_entry_t *pde;
- vm_paddr_t pa, ptepa;
- vm_page_t p;
- int pat_mode;
-
- VM_OBJECT_ASSERT_WLOCKED(object);
- KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
- ("pmap_object_init_pt: non-device object"));
- if (pseflag &&
- (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) {
- if (!vm_object_populate(object, pindex, pindex + atop(size)))
- return;
- p = vm_page_lookup(object, pindex);
- KASSERT(p->valid == VM_PAGE_BITS_ALL,
- ("pmap_object_init_pt: invalid page %p", p));
- pat_mode = p->md.pat_mode;
-
- /*
- * Abort the mapping if the first page is not physically
- * aligned to a 2/4MB page boundary.
- */
- ptepa = VM_PAGE_TO_PHYS(p);
- if (ptepa & (NBPDR - 1))
- return;
-
- /*
- * Skip the first page. Abort the mapping if the rest of
- * the pages are not physically contiguous or have differing
- * memory attributes.
- */
- p = TAILQ_NEXT(p, listq);
- for (pa = ptepa + PAGE_SIZE; pa < ptepa + size;
- pa += PAGE_SIZE) {
- KASSERT(p->valid == VM_PAGE_BITS_ALL,
- ("pmap_object_init_pt: invalid page %p", p));
- if (pa != VM_PAGE_TO_PHYS(p) ||
- pat_mode != p->md.pat_mode)
- return;
- p = TAILQ_NEXT(p, listq);
- }
-
- /*
- * Map using 2/4MB pages. Since "ptepa" is 2/4M aligned and
- * "size" is a multiple of 2/4M, adding the PAT setting to
- * "pa" will not affect the termination of this loop.
- */
- PMAP_LOCK(pmap);
- for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa +
- size; pa += NBPDR) {
- pde = pmap_pde(pmap, addr);
- if (*pde == 0) {
- pde_store(pde, pa | PG_PS | PG_M | PG_A |
- PG_U | PG_RW | PG_V);
- pmap->pm_stats.resident_count += NBPDR /
- PAGE_SIZE;
- pmap_pde_mappings++;
- }
- /* Else continue on if the PDE is already valid. */
- addr += NBPDR;
- }
- PMAP_UNLOCK(pmap);
- }
-}
-
-/*
- * Clear the wired attribute from the mappings for the specified range of
- * addresses in the given pmap. Every valid mapping within that range
- * must have the wired attribute set. In contrast, invalid mappings
- * cannot have the wired attribute set, so they are ignored.
- *
- * The wired attribute of the page table entry is not a hardware feature,
- * so there is no need to invalidate any TLB entries.
- */
-void
-pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
- vm_offset_t pdnxt;
- pd_entry_t *pde;
- pt_entry_t *pte;
-
- CTR3(KTR_PMAP, "pmap_unwire: pmap=%p sva=0x%x eva=0x%x", pmap, sva,
- eva);
- rw_wlock(&pvh_global_lock);
- sched_pin();
- PMAP_LOCK(pmap);
- for (; sva < eva; sva = pdnxt) {
- pdnxt = (sva + NBPDR) & ~PDRMASK;
- if (pdnxt < sva)
- pdnxt = eva;
- pde = pmap_pde(pmap, sva);
- if ((*pde & PG_V) == 0)
- continue;
- if ((*pde & PG_PS) != 0)
- panic("pmap_unwire: unexpected PG_PS in pde %#jx",
- (uintmax_t)*pde);
- if (pdnxt > eva)
- pdnxt = eva;
- for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
- sva += PAGE_SIZE) {
- if ((*pte & PG_V) == 0)
- continue;
- if ((*pte & PG_W) == 0)
- panic("pmap_unwire: pte %#jx is missing PG_W",
- (uintmax_t)*pte);
- PT_SET_VA_MA(pte, *pte & ~PG_W, FALSE);
- pmap->pm_stats.wired_count--;
- }
- }
- if (*PMAP1)
- PT_CLEAR_VA(PMAP1, FALSE);
- PT_UPDATES_FLUSH();
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
-}
-
-
-/*
- * Copy the range specified by src_addr/len
- * from the source map to the range dst_addr/len
- * in the destination map.
- *
- * This routine is only advisory and need not do anything.
- */
-
-void
-pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
- vm_offset_t src_addr)
-{
- vm_page_t free;
- vm_offset_t addr;
- vm_offset_t end_addr = src_addr + len;
- vm_offset_t pdnxt;
-
- if (dst_addr != src_addr)
- return;
-
- if (!pmap_is_current(src_pmap)) {
- CTR2(KTR_PMAP,
- "pmap_copy, skipping: pdir[PTDPTDI]=0x%jx PTDpde[0]=0x%jx",
- (src_pmap->pm_pdir[PTDPTDI] & PG_FRAME), (PTDpde[0] & PG_FRAME));
-
- return;
- }
- CTR5(KTR_PMAP, "pmap_copy: dst_pmap=%p src_pmap=%p dst_addr=0x%x len=%d src_addr=0x%x",
- dst_pmap, src_pmap, dst_addr, len, src_addr);
-
-#ifdef HAMFISTED_LOCKING
- mtx_lock(&createdelete_lock);
-#endif
-
- rw_wlock(&pvh_global_lock);
- if (dst_pmap < src_pmap) {
- PMAP_LOCK(dst_pmap);
- PMAP_LOCK(src_pmap);
- } else {
- PMAP_LOCK(src_pmap);
- PMAP_LOCK(dst_pmap);
- }
- sched_pin();
- for (addr = src_addr; addr < end_addr; addr = pdnxt) {
- pt_entry_t *src_pte, *dst_pte;
- vm_page_t dstmpte, srcmpte;
- pd_entry_t srcptepaddr;
- u_int ptepindex;
-
- KASSERT(addr < UPT_MIN_ADDRESS,
- ("pmap_copy: invalid to pmap_copy page tables"));
-
- pdnxt = (addr + NBPDR) & ~PDRMASK;
- if (pdnxt < addr)
- pdnxt = end_addr;
- ptepindex = addr >> PDRSHIFT;
-
- srcptepaddr = PT_GET(&src_pmap->pm_pdir[ptepindex]);
- if (srcptepaddr == 0)
- continue;
-
- if (srcptepaddr & PG_PS) {
- if (dst_pmap->pm_pdir[ptepindex] == 0) {
- PD_SET_VA(dst_pmap, ptepindex, srcptepaddr & ~PG_W, TRUE);
- dst_pmap->pm_stats.resident_count +=
- NBPDR / PAGE_SIZE;
- }
- continue;
- }
-
- srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME);
- KASSERT(srcmpte->wire_count > 0,
- ("pmap_copy: source page table page is unused"));
-
- if (pdnxt > end_addr)
- pdnxt = end_addr;
-
- src_pte = vtopte(addr);
- while (addr < pdnxt) {
- pt_entry_t ptetemp;
- ptetemp = *src_pte;
- /*
- * we only virtual copy managed pages
- */
- if ((ptetemp & PG_MANAGED) != 0) {
- dstmpte = pmap_allocpte(dst_pmap, addr,
- PMAP_ENTER_NOSLEEP);
- if (dstmpte == NULL)
- goto out;
- dst_pte = pmap_pte_quick(dst_pmap, addr);
- if (*dst_pte == 0 &&
- pmap_try_insert_pv_entry(dst_pmap, addr,
- PHYS_TO_VM_PAGE(xpmap_mtop(ptetemp) & PG_FRAME))) {
- /*
- * Clear the wired, modified, and
- * accessed (referenced) bits
- * during the copy.
- */
- KASSERT(ptetemp != 0, ("src_pte not set"));
- PT_SET_VA_MA(dst_pte, ptetemp & ~(PG_W | PG_M | PG_A), TRUE /* XXX debug */);
- KASSERT(*dst_pte == (ptetemp & ~(PG_W | PG_M | PG_A)),
- ("no pmap copy expected: 0x%jx saw: 0x%jx",
- ptetemp & ~(PG_W | PG_M | PG_A), *dst_pte));
- dst_pmap->pm_stats.resident_count++;
- } else {
- free = NULL;
- if (pmap_unwire_ptp(dst_pmap, dstmpte,
- &free)) {
- pmap_invalidate_page(dst_pmap,
- addr);
- pmap_free_zero_pages(free);
- }
- goto out;
- }
- if (dstmpte->wire_count >= srcmpte->wire_count)
- break;
- }
- addr += PAGE_SIZE;
- src_pte++;
- }
- }
-out:
- PT_UPDATES_FLUSH();
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(src_pmap);
- PMAP_UNLOCK(dst_pmap);
-
-#ifdef HAMFISTED_LOCKING
- mtx_unlock(&createdelete_lock);
-#endif
-}
-
-static __inline void
-pagezero(void *page)
-{
-#if defined(I686_CPU)
- if (cpu_class == CPUCLASS_686) {
-#if defined(CPU_ENABLE_SSE)
- if (cpu_feature & CPUID_SSE2)
- sse2_pagezero(page);
- else
-#endif
- i686_pagezero(page);
- } else
-#endif
- bzero(page, PAGE_SIZE);
-}
-
-/*
- * pmap_zero_page zeros the specified hardware page by mapping
- * the page into KVM and using bzero to clear its contents.
- */
-void
-pmap_zero_page(vm_page_t m)
-{
- struct sysmaps *sysmaps;
-
- sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
- mtx_lock(&sysmaps->lock);
- if (*sysmaps->CMAP2)
- panic("pmap_zero_page: CMAP2 busy");
- sched_pin();
- PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
- pagezero(sysmaps->CADDR2);
- PT_SET_MA(sysmaps->CADDR2, 0);
- sched_unpin();
- mtx_unlock(&sysmaps->lock);
-}
-
-/*
- * pmap_zero_page_area zeros the specified hardware page by mapping
- * the page into KVM and using bzero to clear its contents.
- *
- * off and size may not cover an area beyond a single hardware page.
- */
-void
-pmap_zero_page_area(vm_page_t m, int off, int size)
-{
- struct sysmaps *sysmaps;
-
- sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
- mtx_lock(&sysmaps->lock);
- if (*sysmaps->CMAP2)
- panic("pmap_zero_page_area: CMAP2 busy");
- sched_pin();
- PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
-
- if (off == 0 && size == PAGE_SIZE)
- pagezero(sysmaps->CADDR2);
- else
- bzero((char *)sysmaps->CADDR2 + off, size);
- PT_SET_MA(sysmaps->CADDR2, 0);
- sched_unpin();
- mtx_unlock(&sysmaps->lock);
-}
-
-/*
- * pmap_zero_page_idle zeros the specified hardware page by mapping
- * the page into KVM and using bzero to clear its contents. This
- * is intended to be called from the vm_pagezero process only and
- * outside of Giant.
- */
-void
-pmap_zero_page_idle(vm_page_t m)
-{
-
- if (*CMAP3)
- panic("pmap_zero_page_idle: CMAP3 busy");
- sched_pin();
- PT_SET_MA(CADDR3, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
- pagezero(CADDR3);
- PT_SET_MA(CADDR3, 0);
- sched_unpin();
-}
-
-/*
- * pmap_copy_page copies the specified (machine independent)
- * page by mapping the page into virtual memory and using
- * bcopy to copy the page, one machine dependent page at a
- * time.
- */
-void
-pmap_copy_page(vm_page_t src, vm_page_t dst)
-{
- struct sysmaps *sysmaps;
-
- sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
- mtx_lock(&sysmaps->lock);
- if (*sysmaps->CMAP1)
- panic("pmap_copy_page: CMAP1 busy");
- if (*sysmaps->CMAP2)
- panic("pmap_copy_page: CMAP2 busy");
- sched_pin();
- PT_SET_MA(sysmaps->CADDR1, PG_V | VM_PAGE_TO_MACH(src) | PG_A);
- PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(dst) | PG_A | PG_M);
- bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE);
- PT_SET_MA(sysmaps->CADDR1, 0);
- PT_SET_MA(sysmaps->CADDR2, 0);
- sched_unpin();
- mtx_unlock(&sysmaps->lock);
-}
-
-int unmapped_buf_allowed = 1;
-
-void
-pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
- vm_offset_t b_offset, int xfersize)
-{
- struct sysmaps *sysmaps;
- vm_page_t a_pg, b_pg;
- char *a_cp, *b_cp;
- vm_offset_t a_pg_offset, b_pg_offset;
- int cnt;
-
- sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
- mtx_lock(&sysmaps->lock);
- if (*sysmaps->CMAP1 != 0)
- panic("pmap_copy_pages: CMAP1 busy");
- if (*sysmaps->CMAP2 != 0)
- panic("pmap_copy_pages: CMAP2 busy");
- sched_pin();
- while (xfersize > 0) {
- a_pg = ma[a_offset >> PAGE_SHIFT];
- a_pg_offset = a_offset & PAGE_MASK;
- cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
- b_pg = mb[b_offset >> PAGE_SHIFT];
- b_pg_offset = b_offset & PAGE_MASK;
- cnt = min(cnt, PAGE_SIZE - b_pg_offset);
- PT_SET_MA(sysmaps->CADDR1, PG_V | VM_PAGE_TO_MACH(a_pg) | PG_A);
- PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW |
- VM_PAGE_TO_MACH(b_pg) | PG_A | PG_M);
- a_cp = sysmaps->CADDR1 + a_pg_offset;
- b_cp = sysmaps->CADDR2 + b_pg_offset;
- bcopy(a_cp, b_cp, cnt);
- a_offset += cnt;
- b_offset += cnt;
- xfersize -= cnt;
- }
- PT_SET_MA(sysmaps->CADDR1, 0);
- PT_SET_MA(sysmaps->CADDR2, 0);
- sched_unpin();
- mtx_unlock(&sysmaps->lock);
-}
-
-/*
- * Returns true if the pmap's pv is one of the first
- * 16 pvs linked to from this page. This count may
- * be changed upwards or downwards in the future; it
- * is only necessary that true be returned for a small
- * subset of pmaps for proper page aging.
- */
-boolean_t
-pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
-{
- pv_entry_t pv;
- int loops = 0;
- boolean_t rv;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_page_exists_quick: page %p is not managed", m));
- rv = FALSE;
- rw_wlock(&pvh_global_lock);
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
- if (PV_PMAP(pv) == pmap) {
- rv = TRUE;
- break;
- }
- loops++;
- if (loops >= 16)
- break;
- }
- rw_wunlock(&pvh_global_lock);
- return (rv);
-}
-
-/*
- * pmap_page_wired_mappings:
- *
- * Return the number of managed mappings to the given physical page
- * that are wired.
- */
-int
-pmap_page_wired_mappings(vm_page_t m)
-{
- pv_entry_t pv;
- pt_entry_t *pte;
- pmap_t pmap;
- int count;
-
- count = 0;
- if ((m->oflags & VPO_UNMANAGED) != 0)
- return (count);
- rw_wlock(&pvh_global_lock);
- sched_pin();
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pte = pmap_pte_quick(pmap, pv->pv_va);
- if ((*pte & PG_W) != 0)
- count++;
- PMAP_UNLOCK(pmap);
- }
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- return (count);
-}
-
-/*
- * Returns TRUE if the given page is mapped. Otherwise, returns FALSE.
- */
-boolean_t
-pmap_page_is_mapped(vm_page_t m)
-{
-
- if ((m->oflags & VPO_UNMANAGED) != 0)
- return (FALSE);
- return (!TAILQ_EMPTY(&m->md.pv_list));
-}
-
-/*
- * Remove all pages from specified address space
- * this aids process exit speeds. Also, this code
- * is special cased for current process only, but
- * can have the more generic (and slightly slower)
- * mode enabled. This is much faster than pmap_remove
- * in the case of running down an entire address space.
- */
-void
-pmap_remove_pages(pmap_t pmap)
-{
- pt_entry_t *pte, tpte;
- vm_page_t m, free = NULL;
- pv_entry_t pv;
- struct pv_chunk *pc, *npc;
- int field, idx;
- int32_t bit;
- uint32_t inuse, bitmask;
- int allfree;
-
- CTR1(KTR_PMAP, "pmap_remove_pages: pmap=%p", pmap);
-
- if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
- printf("warning: pmap_remove_pages called with non-current pmap\n");
- return;
- }
- rw_wlock(&pvh_global_lock);
- KASSERT(pmap_is_current(pmap), ("removing pages from non-current pmap"));
- PMAP_LOCK(pmap);
- sched_pin();
- TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
- KASSERT(pc->pc_pmap == pmap, ("Wrong pmap %p %p", pmap,
- pc->pc_pmap));
- allfree = 1;
- for (field = 0; field < _NPCM; field++) {
- inuse = ~pc->pc_map[field] & pc_freemask[field];
- while (inuse != 0) {
- bit = bsfl(inuse);
- bitmask = 1UL << bit;
- idx = field * 32 + bit;
- pv = &pc->pc_pventry[idx];
- inuse &= ~bitmask;
-
- pte = vtopte(pv->pv_va);
- tpte = *pte ? xpmap_mtop(*pte) : 0;
-
- if (tpte == 0) {
- printf(
- "TPTE at %p IS ZERO @ VA %08x\n",
- pte, pv->pv_va);
- panic("bad pte");
- }
-
-/*
- * We cannot remove wired pages from a process' mapping at this time
- */
- if (tpte & PG_W) {
- allfree = 0;
- continue;
- }
-
- m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
- KASSERT(m->phys_addr == (tpte & PG_FRAME),
- ("vm_page_t %p phys_addr mismatch %016jx %016jx",
- m, (uintmax_t)m->phys_addr,
- (uintmax_t)tpte));
-
- KASSERT(m < &vm_page_array[vm_page_array_size],
- ("pmap_remove_pages: bad tpte %#jx",
- (uintmax_t)tpte));
-
-
- PT_CLEAR_VA(pte, FALSE);
-
- /*
- * Update the vm_page_t clean/reference bits.
- */
- if (tpte & PG_M)
- vm_page_dirty(m);
-
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
- if (TAILQ_EMPTY(&m->md.pv_list))
- vm_page_aflag_clear(m, PGA_WRITEABLE);
-
- pmap_unuse_pt(pmap, pv->pv_va, &free);
-
- /* Mark free */
- PV_STAT(pv_entry_frees++);
- PV_STAT(pv_entry_spare++);
- pv_entry_count--;
- pc->pc_map[field] |= bitmask;
- pmap->pm_stats.resident_count--;
- }
- }
- PT_UPDATES_FLUSH();
- if (allfree) {
- TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- free_pv_chunk(pc);
- }
- }
- PT_UPDATES_FLUSH();
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
-
- sched_unpin();
- pmap_invalidate_all(pmap);
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
- pmap_free_zero_pages(free);
-}
-
-/*
- * pmap_is_modified:
- *
- * Return whether or not the specified physical page was modified
- * in any physical maps.
- */
-boolean_t
-pmap_is_modified(vm_page_t m)
-{
- pv_entry_t pv;
- pt_entry_t *pte;
- pmap_t pmap;
- boolean_t rv;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_is_modified: page %p is not managed", m));
- rv = FALSE;
-
- /*
- * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
- * concurrently set while the object is locked. Thus, if PGA_WRITEABLE
- * is clear, no PTEs can have PG_M set.
- */
- VM_OBJECT_ASSERT_WLOCKED(m->object);
- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
- return (rv);
- rw_wlock(&pvh_global_lock);
- sched_pin();
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pte = pmap_pte_quick(pmap, pv->pv_va);
- rv = (*pte & PG_M) != 0;
- PMAP_UNLOCK(pmap);
- if (rv)
- break;
- }
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- return (rv);
-}
-
-/*
- * pmap_is_prefaultable:
- *
- * Return whether or not the specified virtual address is elgible
- * for prefault.
- */
-static boolean_t
-pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr)
-{
- pt_entry_t *pte;
- boolean_t rv = FALSE;
-
- return (rv);
-
- if (pmap_is_current(pmap) && *pmap_pde(pmap, addr)) {
- pte = vtopte(addr);
- rv = (*pte == 0);
- }
- return (rv);
-}
-
-boolean_t
-pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
-{
- boolean_t rv;
-
- PMAP_LOCK(pmap);
- rv = pmap_is_prefaultable_locked(pmap, addr);
- PMAP_UNLOCK(pmap);
- return (rv);
-}
-
-boolean_t
-pmap_is_referenced(vm_page_t m)
-{
- pv_entry_t pv;
- pt_entry_t *pte;
- pmap_t pmap;
- boolean_t rv;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_is_referenced: page %p is not managed", m));
- rv = FALSE;
- rw_wlock(&pvh_global_lock);
- sched_pin();
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pte = pmap_pte_quick(pmap, pv->pv_va);
- rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V);
- PMAP_UNLOCK(pmap);
- if (rv)
- break;
- }
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- return (rv);
-}
-
-void
-pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len)
-{
- int i, npages = round_page(len) >> PAGE_SHIFT;
- for (i = 0; i < npages; i++) {
- pt_entry_t *pte;
- pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
- rw_wlock(&pvh_global_lock);
- pte_store(pte, xpmap_mtop(*pte & ~(PG_RW|PG_M)));
- rw_wunlock(&pvh_global_lock);
- PMAP_MARK_PRIV(xpmap_mtop(*pte));
- pmap_pte_release(pte);
- }
-}
-
-void
-pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len)
-{
- int i, npages = round_page(len) >> PAGE_SHIFT;
- for (i = 0; i < npages; i++) {
- pt_entry_t *pte;
- pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
- PMAP_MARK_UNPRIV(xpmap_mtop(*pte));
- rw_wlock(&pvh_global_lock);
- pte_store(pte, xpmap_mtop(*pte) | (PG_RW|PG_M));
- rw_wunlock(&pvh_global_lock);
- pmap_pte_release(pte);
- }
-}
-
-/*
- * Clear the write and modified bits in each of the given page's mappings.
- */
-void
-pmap_remove_write(vm_page_t m)
-{
- pv_entry_t pv;
- pmap_t pmap;
- pt_entry_t oldpte, *pte;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_remove_write: page %p is not managed", m));
-
- /*
- * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
- * set by another thread while the object is locked. Thus,
- * if PGA_WRITEABLE is clear, no page table entries need updating.
- */
- VM_OBJECT_ASSERT_WLOCKED(m->object);
- if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
- return;
- rw_wlock(&pvh_global_lock);
- sched_pin();
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pte = pmap_pte_quick(pmap, pv->pv_va);
-retry:
- oldpte = *pte;
- if ((oldpte & PG_RW) != 0) {
- vm_paddr_t newpte = oldpte & ~(PG_RW | PG_M);
-
- /*
- * Regardless of whether a pte is 32 or 64 bits
- * in size, PG_RW and PG_M are among the least
- * significant 32 bits.
- */
- PT_SET_VA_MA(pte, newpte, TRUE);
- if (*pte != newpte)
- goto retry;
-
- if ((oldpte & PG_M) != 0)
- vm_page_dirty(m);
- pmap_invalidate_page(pmap, pv->pv_va);
- }
- PMAP_UNLOCK(pmap);
- }
- vm_page_aflag_clear(m, PGA_WRITEABLE);
- PT_UPDATES_FLUSH();
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
-}
-
-/*
- * pmap_ts_referenced:
- *
- * Return a count of reference bits for a page, clearing those bits.
- * It is not necessary for every reference bit to be cleared, but it
- * is necessary that 0 only be returned when there are truly no
- * reference bits set.
- *
- * XXX: The exact number of bits to check and clear is a matter that
- * should be tested and standardized at some point in the future for
- * optimal aging of shared pages.
- */
-int
-pmap_ts_referenced(vm_page_t m)
-{
- pv_entry_t pv, pvf, pvn;
- pmap_t pmap;
- pt_entry_t *pte;
- int rtval = 0;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_ts_referenced: page %p is not managed", m));
- rw_wlock(&pvh_global_lock);
- sched_pin();
- if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
- pvf = pv;
- do {
- pvn = TAILQ_NEXT(pv, pv_next);
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pte = pmap_pte_quick(pmap, pv->pv_va);
- if ((*pte & PG_A) != 0) {
- PT_SET_VA_MA(pte, *pte & ~PG_A, FALSE);
- pmap_invalidate_page(pmap, pv->pv_va);
- rtval++;
- if (rtval > 4)
- pvn = NULL;
- }
- PMAP_UNLOCK(pmap);
- } while ((pv = pvn) != NULL && pv != pvf);
- }
- PT_UPDATES_FLUSH();
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- return (rtval);
-}
-
-/*
- * Apply the given advice to the specified range of addresses within the
- * given pmap. Depending on the advice, clear the referenced and/or
- * modified flags in each mapping and set the mapped page's dirty field.
- */
-void
-pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
-{
- pd_entry_t oldpde;
- pt_entry_t *pte;
- vm_offset_t pdnxt;
- vm_page_t m;
- boolean_t anychanged;
-
- if (advice != MADV_DONTNEED && advice != MADV_FREE)
- return;
- anychanged = FALSE;
- rw_wlock(&pvh_global_lock);
- sched_pin();
- PMAP_LOCK(pmap);
- for (; sva < eva; sva = pdnxt) {
- pdnxt = (sva + NBPDR) & ~PDRMASK;
- if (pdnxt < sva)
- pdnxt = eva;
- oldpde = pmap->pm_pdir[sva >> PDRSHIFT];
- if ((oldpde & (PG_PS | PG_V)) != PG_V)
- continue;
- if (pdnxt > eva)
- pdnxt = eva;
- for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
- sva += PAGE_SIZE) {
- if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED |
- PG_V))
- continue;
- else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
- if (advice == MADV_DONTNEED) {
- /*
- * Future calls to pmap_is_modified()
- * can be avoided by making the page
- * dirty now.
- */
- m = PHYS_TO_VM_PAGE(xpmap_mtop(*pte) &
- PG_FRAME);
- vm_page_dirty(m);
- }
- PT_SET_VA_MA(pte, *pte & ~(PG_M | PG_A), TRUE);
- } else if ((*pte & PG_A) != 0)
- PT_SET_VA_MA(pte, *pte & ~PG_A, TRUE);
- else
- continue;
- if ((*pte & PG_G) != 0)
- pmap_invalidate_page(pmap, sva);
- else
- anychanged = TRUE;
- }
- }
- PT_UPDATES_FLUSH();
- if (*PMAP1)
- PT_SET_VA_MA(PMAP1, 0, TRUE);
- if (anychanged)
- pmap_invalidate_all(pmap);
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(pmap);
-}
-
-/*
- * Clear the modify bits on the specified physical page.
- */
-void
-pmap_clear_modify(vm_page_t m)
-{
- pv_entry_t pv;
- pmap_t pmap;
- pt_entry_t *pte;
-
- KASSERT((m->oflags & VPO_UNMANAGED) == 0,
- ("pmap_clear_modify: page %p is not managed", m));
- VM_OBJECT_ASSERT_WLOCKED(m->object);
- KASSERT(!vm_page_xbusied(m),
- ("pmap_clear_modify: page %p is exclusive busied", m));
-
- /*
- * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
- * If the object containing the page is locked and the page is not
- * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
- */
- if ((m->aflags & PGA_WRITEABLE) == 0)
- return;
- rw_wlock(&pvh_global_lock);
- sched_pin();
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
- pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
- pte = pmap_pte_quick(pmap, pv->pv_va);
- if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
- /*
- * Regardless of whether a pte is 32 or 64 bits
- * in size, PG_M is among the least significant
- * 32 bits.
- */
- PT_SET_VA_MA(pte, *pte & ~PG_M, FALSE);
- pmap_invalidate_page(pmap, pv->pv_va);
- }
- PMAP_UNLOCK(pmap);
- }
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
-}
-
-/*
- * Miscellaneous support routines follow
- */
-
-/*
- * Map a set of physical memory pages into the kernel virtual
- * address space. Return a pointer to where it is mapped. This
- * routine is intended to be used for mapping device memory,
- * NOT real memory.
- */
-void *
-pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
-{
- vm_offset_t va, offset;
- vm_size_t tmpsize;
-
- offset = pa & PAGE_MASK;
- size = round_page(offset + size);
- pa = pa & PG_FRAME;
-
- if (pa < KERNLOAD && pa + size <= KERNLOAD)
- va = KERNBASE + pa;
- else
- va = kva_alloc(size);
- if (!va)
- panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
-
- for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
- pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
- pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
- pmap_invalidate_cache_range(va, va + size, FALSE);
- return ((void *)(va + offset));
-}
-
-void *
-pmap_mapdev(vm_paddr_t pa, vm_size_t size)
-{
-
- return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE));
-}
-
-void *
-pmap_mapbios(vm_paddr_t pa, vm_size_t size)
-{
-
- return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK));
-}
-
-void
-pmap_unmapdev(vm_offset_t va, vm_size_t size)
-{
- vm_offset_t base, offset;
-
- if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD)
- return;
- base = trunc_page(va);
- offset = va & PAGE_MASK;
- size = round_page(offset + size);
- kva_free(base, size);
-}
-
-/*
- * Sets the memory attribute for the specified page.
- */
-void
-pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
-{
-
- m->md.pat_mode = ma;
- if ((m->flags & PG_FICTITIOUS) != 0)
- return;
-
- /*
- * If "m" is a normal page, flush it from the cache.
- * See pmap_invalidate_cache_range().
- *
- * First, try to find an existing mapping of the page by sf
- * buffer. sf_buf_invalidate_cache() modifies mapping and
- * flushes the cache.
- */
- if (sf_buf_invalidate_cache(m))
- return;
-
- /*
- * If page is not mapped by sf buffer, but CPU does not
- * support self snoop, map the page transient and do
- * invalidation. In the worst case, whole cache is flushed by
- * pmap_invalidate_cache_range().
- */
- if ((cpu_feature & CPUID_SS) == 0)
- pmap_flush_page(m);
-}
-
-static void
-pmap_flush_page(vm_page_t m)
-{
- struct sysmaps *sysmaps;
- vm_offset_t sva, eva;
-
- if ((cpu_feature & CPUID_CLFSH) != 0) {
- sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
- mtx_lock(&sysmaps->lock);
- if (*sysmaps->CMAP2)
- panic("pmap_flush_page: CMAP2 busy");
- sched_pin();
- PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW |
- VM_PAGE_TO_MACH(m) | PG_A | PG_M |
- pmap_cache_bits(m->md.pat_mode, 0));
- invlcaddr(sysmaps->CADDR2);
- sva = (vm_offset_t)sysmaps->CADDR2;
- eva = sva + PAGE_SIZE;
-
- /*
- * Use mfence despite the ordering implied by
- * mtx_{un,}lock() because clflush is not guaranteed
- * to be ordered by any other instruction.
- */
- mfence();
- for (; sva < eva; sva += cpu_clflush_line_size)
- clflush(sva);
- mfence();
- PT_SET_MA(sysmaps->CADDR2, 0);
- sched_unpin();
- mtx_unlock(&sysmaps->lock);
- } else
- pmap_invalidate_cache();
-}
-
-/*
- * Changes the specified virtual address range's memory type to that given by
- * the parameter "mode". The specified virtual address range must be
- * completely contained within either the kernel map.
- *
- * Returns zero if the change completed successfully, and either EINVAL or
- * ENOMEM if the change failed. Specifically, EINVAL is returned if some part
- * of the virtual address range was not mapped, and ENOMEM is returned if
- * there was insufficient memory available to complete the change.
- */
-int
-pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
-{
- vm_offset_t base, offset, tmpva;
- pt_entry_t *pte;
- u_int opte, npte;
- pd_entry_t *pde;
- boolean_t changed;
-
- base = trunc_page(va);
- offset = va & PAGE_MASK;
- size = round_page(offset + size);
-
- /* Only supported on kernel virtual addresses. */
- if (base <= VM_MAXUSER_ADDRESS)
- return (EINVAL);
-
- /* 4MB pages and pages that aren't mapped aren't supported. */
- for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) {
- pde = pmap_pde(kernel_pmap, tmpva);
- if (*pde & PG_PS)
- return (EINVAL);
- if ((*pde & PG_V) == 0)
- return (EINVAL);
- pte = vtopte(va);
- if ((*pte & PG_V) == 0)
- return (EINVAL);
- }
-
- changed = FALSE;
-
- /*
- * Ok, all the pages exist and are 4k, so run through them updating
- * their cache mode.
- */
- for (tmpva = base; size > 0; ) {
- pte = vtopte(tmpva);
-
- /*
- * The cache mode bits are all in the low 32-bits of the
- * PTE, so we can just spin on updating the low 32-bits.
- */
- do {
- opte = *(u_int *)pte;
- npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT);
- npte |= pmap_cache_bits(mode, 0);
- PT_SET_VA_MA(pte, npte, TRUE);
- } while (npte != opte && (*pte != npte));
- if (npte != opte)
- changed = TRUE;
- tmpva += PAGE_SIZE;
- size -= PAGE_SIZE;
- }
-
- /*
- * Flush CPU caches to make sure any data isn't cached that
- * shouldn't be, etc.
- */
- if (changed) {
- pmap_invalidate_range(kernel_pmap, base, tmpva);
- pmap_invalidate_cache_range(base, tmpva, FALSE);
- }
- return (0);
-}
-
-/*
- * perform the pmap work for mincore
- */
-int
-pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
-{
- pt_entry_t *ptep, pte;
- vm_paddr_t pa;
- int val;
-
- PMAP_LOCK(pmap);
-retry:
- ptep = pmap_pte(pmap, addr);
- pte = (ptep != NULL) ? PT_GET(ptep) : 0;
- pmap_pte_release(ptep);
- val = 0;
- if ((pte & PG_V) != 0) {
- val |= MINCORE_INCORE;
- if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW))
- val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
- if ((pte & PG_A) != 0)
- val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
- }
- if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
- (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
- (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) {
- pa = pte & PG_FRAME;
- /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
- if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
- goto retry;
- } else
- PA_UNLOCK_COND(*locked_pa);
- PMAP_UNLOCK(pmap);
- return (val);
-}
-
-void
-pmap_activate(struct thread *td)
-{
- pmap_t pmap, oldpmap;
- u_int cpuid;
- u_int32_t cr3;
-
- critical_enter();
- pmap = vmspace_pmap(td->td_proc->p_vmspace);
- oldpmap = PCPU_GET(curpmap);
- cpuid = PCPU_GET(cpuid);
-#if defined(SMP)
- CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
- CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
-#else
- CPU_CLR(cpuid, &oldpmap->pm_active);
- CPU_SET(cpuid, &pmap->pm_active);
-#endif
-#ifdef PAE
- cr3 = vtophys(pmap->pm_pdpt);
-#else
- cr3 = vtophys(pmap->pm_pdir);
-#endif
- /*
- * pmap_activate is for the current thread on the current cpu
- */
- td->td_pcb->pcb_cr3 = cr3;
- PT_UPDATES_FLUSH();
- load_cr3(cr3);
- PCPU_SET(curpmap, pmap);
- critical_exit();
-}
-
-void
-pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
-{
-}
-
-/*
- * Increase the starting virtual address of the given mapping if a
- * different alignment might result in more superpage mappings.
- */
-void
-pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
- vm_offset_t *addr, vm_size_t size)
-{
- vm_offset_t superpage_offset;
-
- if (size < NBPDR)
- return;
- if (object != NULL && (object->flags & OBJ_COLORED) != 0)
- offset += ptoa(object->pg_color);
- superpage_offset = offset & PDRMASK;
- if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR ||
- (*addr & PDRMASK) == superpage_offset)
- return;
- if ((*addr & PDRMASK) < superpage_offset)
- *addr = (*addr & ~PDRMASK) + superpage_offset;
- else
- *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset;
-}
-
-void
-pmap_suspend()
-{
- pmap_t pmap;
- int i, pdir, offset;
- vm_paddr_t pdirma;
- mmu_update_t mu[4];
-
- /*
- * We need to remove the recursive mapping structure from all
- * our pmaps so that Xen doesn't get confused when it restores
- * the page tables. The recursive map lives at page directory
- * index PTDPTDI. We assume that the suspend code has stopped
- * the other vcpus (if any).
- */
- LIST_FOREACH(pmap, &allpmaps, pm_list) {
- for (i = 0; i < 4; i++) {
- /*
- * Figure out which page directory (L2) page
- * contains this bit of the recursive map and
- * the offset within that page of the map
- * entry
- */
- pdir = (PTDPTDI + i) / NPDEPG;
- offset = (PTDPTDI + i) % NPDEPG;
- pdirma = pmap->pm_pdpt[pdir] & PG_FRAME;
- mu[i].ptr = pdirma + offset * sizeof(pd_entry_t);
- mu[i].val = 0;
- }
- HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF);
- }
-}
-
-void
-pmap_resume()
-{
- pmap_t pmap;
- int i, pdir, offset;
- vm_paddr_t pdirma;
- mmu_update_t mu[4];
-
- /*
- * Restore the recursive map that we removed on suspend.
- */
- LIST_FOREACH(pmap, &allpmaps, pm_list) {
- for (i = 0; i < 4; i++) {
- /*
- * Figure out which page directory (L2) page
- * contains this bit of the recursive map and
- * the offset within that page of the map
- * entry
- */
- pdir = (PTDPTDI + i) / NPDEPG;
- offset = (PTDPTDI + i) % NPDEPG;
- pdirma = pmap->pm_pdpt[pdir] & PG_FRAME;
- mu[i].ptr = pdirma + offset * sizeof(pd_entry_t);
- mu[i].val = (pmap->pm_pdpt[i] & PG_FRAME) | PG_V;
- }
- HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF);
- }
-}
-
-#if defined(PMAP_DEBUG)
-pmap_pid_dump(int pid)
-{
- pmap_t pmap;
- struct proc *p;
- int npte = 0;
- int index;
-
- sx_slock(&allproc_lock);
- FOREACH_PROC_IN_SYSTEM(p) {
- if (p->p_pid != pid)
- continue;
-
- if (p->p_vmspace) {
- int i,j;
- index = 0;
- pmap = vmspace_pmap(p->p_vmspace);
- for (i = 0; i < NPDEPTD; i++) {
- pd_entry_t *pde;
- pt_entry_t *pte;
- vm_offset_t base = i << PDRSHIFT;
-
- pde = &pmap->pm_pdir[i];
- if (pde && pmap_pde_v(pde)) {
- for (j = 0; j < NPTEPG; j++) {
- vm_offset_t va = base + (j << PAGE_SHIFT);
- if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
- if (index) {
- index = 0;
- printf("\n");
- }
- sx_sunlock(&allproc_lock);
- return (npte);
- }
- pte = pmap_pte(pmap, va);
- if (pte && pmap_pte_v(pte)) {
- pt_entry_t pa;
- vm_page_t m;
- pa = PT_GET(pte);
- m = PHYS_TO_VM_PAGE(pa & PG_FRAME);
- printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
- va, pa, m->hold_count, m->wire_count, m->flags);
- npte++;
- index++;
- if (index >= 2) {
- index = 0;
- printf("\n");
- } else {
- printf(" ");
- }
- }
- }
- }
- }
- }
- }
- sx_sunlock(&allproc_lock);
- return (npte);
-}
-#endif
-
-#if defined(DEBUG)
-
-static void pads(pmap_t pm);
-void pmap_pvdump(vm_paddr_t pa);
-
-/* print address space of pmap*/
-static void
-pads(pmap_t pm)
-{
- int i, j;
- vm_paddr_t va;
- pt_entry_t *ptep;
-
- if (pm == kernel_pmap)
- return;
- for (i = 0; i < NPDEPTD; i++)
- if (pm->pm_pdir[i])
- for (j = 0; j < NPTEPG; j++) {
- va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
- if (pm == kernel_pmap && va < KERNBASE)
- continue;
- if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
- continue;
- ptep = pmap_pte(pm, va);
- if (pmap_pte_v(ptep))
- printf("%x:%x ", va, *ptep);
- };
-
-}
-
-void
-pmap_pvdump(vm_paddr_t pa)
-{
- pv_entry_t pv;
- pmap_t pmap;
- vm_page_t m;
-
- printf("pa %x", pa);
- m = PHYS_TO_VM_PAGE(pa);
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
- pmap = PV_PMAP(pv);
- printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va);
- pads(pmap);
- }
- printf(" ");
-}
-#endif
diff --git a/sys/i386/xen/xen_machdep.c b/sys/i386/xen/xen_machdep.c
deleted file mode 100644
index dbaa7ad..0000000
--- a/sys/i386/xen/xen_machdep.c
+++ /dev/null
@@ -1,1236 +0,0 @@
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * Copyright (c) 2004-2006,2008 Kip Macy
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/ktr.h>
-#include <sys/lock.h>
-#include <sys/mount.h>
-#include <sys/malloc.h>
-#include <sys/mutex.h>
-#include <sys/kernel.h>
-#include <sys/proc.h>
-#include <sys/reboot.h>
-#include <sys/rwlock.h>
-#include <sys/sysproto.h>
-#include <sys/boot.h>
-
-#include <xen/xen-os.h>
-
-#include <vm/vm.h>
-#include <vm/pmap.h>
-#include <machine/segments.h>
-#include <machine/pcb.h>
-#include <machine/stdarg.h>
-#include <machine/vmparam.h>
-#include <machine/cpu.h>
-#include <machine/intr_machdep.h>
-#include <machine/md_var.h>
-#include <machine/asmacros.h>
-
-
-
-#include <xen/hypervisor.h>
-#include <xen/xenstore/xenstorevar.h>
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenfunc.h>
-#include <machine/xen/xenpmap.h>
-#include <machine/xen/xenfunc.h>
-#include <xen/interface/memory.h>
-#include <machine/xen/features.h>
-#ifdef SMP
-#include <machine/privatespace.h>
-#endif
-
-
-#include <vm/vm_page.h>
-
-
-#define IDTVEC(name) __CONCAT(X,name)
-
-extern inthand_t
-IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
- IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
- IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
- IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
- IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
-
-
-int xendebug_flags;
-start_info_t *xen_start_info;
-start_info_t *HYPERVISOR_start_info;
-shared_info_t *HYPERVISOR_shared_info;
-xen_pfn_t *xen_machine_phys = machine_to_phys_mapping;
-xen_pfn_t *xen_phys_machine;
-xen_pfn_t *xen_pfn_to_mfn_frame_list[16];
-xen_pfn_t *xen_pfn_to_mfn_frame_list_list;
-int preemptable, init_first;
-extern unsigned int avail_space;
-int xen_vector_callback_enabled = 0;
-enum xen_domain_type xen_domain_type = XEN_PV_DOMAIN;
-
-void ni_cli(void);
-void ni_sti(void);
-
-
-void
-ni_cli(void)
-{
- CTR0(KTR_SPARE2, "ni_cli disabling interrupts");
- __asm__("pushl %edx;"
- "pushl %eax;"
- );
- __cli();
- __asm__("popl %eax;"
- "popl %edx;"
- );
-}
-
-
-void
-ni_sti(void)
-{
- __asm__("pushl %edx;"
- "pushl %esi;"
- "pushl %eax;"
- );
- __sti();
- __asm__("popl %eax;"
- "popl %esi;"
- "popl %edx;"
- );
-}
-
-void
-force_evtchn_callback(void)
-{
- (void)HYPERVISOR_xen_version(0, NULL);
-}
-
-/*
- * Modify the cmd_line by converting ',' to NULLs so that it is in a format
- * suitable for the static env vars.
- */
-char *
-xen_setbootenv(char *cmd_line)
-{
- char *cmd_line_next;
-
- /* Skip leading spaces */
- for (; *cmd_line == ' '; cmd_line++);
-
- xc_printf("xen_setbootenv(): cmd_line='%s'\n", cmd_line);
-
- for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;);
- return cmd_line;
-}
-
-int
-xen_boothowto(char *envp)
-{
- int i, howto = 0;
-
- /* get equivalents from the environment */
- for (i = 0; howto_names[i].ev != NULL; i++)
- if (kern_getenv(howto_names[i].ev) != NULL)
- howto |= howto_names[i].mask;
- return howto;
-}
-
-
-#define XPQUEUE_SIZE 128
-
-struct mmu_log {
- char *file;
- int line;
-};
-
-#ifdef SMP
-/* per-cpu queues and indices */
-#ifdef INVARIANTS
-static struct mmu_log xpq_queue_log[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
-#endif
-
-static int xpq_idx[XEN_LEGACY_MAX_VCPUS];
-static mmu_update_t xpq_queue[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
-
-#define XPQ_QUEUE_LOG xpq_queue_log[vcpu]
-#define XPQ_QUEUE xpq_queue[vcpu]
-#define XPQ_IDX xpq_idx[vcpu]
-#define SET_VCPU() int vcpu = smp_processor_id()
-#else
-
-static mmu_update_t xpq_queue[XPQUEUE_SIZE];
-#ifdef INVARIANTS
-static struct mmu_log xpq_queue_log[XPQUEUE_SIZE];
-#endif
-static int xpq_idx = 0;
-
-#define XPQ_QUEUE_LOG xpq_queue_log
-#define XPQ_QUEUE xpq_queue
-#define XPQ_IDX xpq_idx
-#define SET_VCPU()
-#endif /* !SMP */
-
-#define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1);
-
-#if 0
-static void
-xen_dump_queue(void)
-{
- int _xpq_idx = XPQ_IDX;
- int i;
-
- if (_xpq_idx <= 1)
- return;
-
- xc_printf("xen_dump_queue(): %u entries\n", _xpq_idx);
- for (i = 0; i < _xpq_idx; i++) {
- xc_printf(" val: %llx ptr: %llx\n", XPQ_QUEUE[i].val,
- XPQ_QUEUE[i].ptr);
- }
-}
-#endif
-
-
-static __inline void
-_xen_flush_queue(void)
-{
- SET_VCPU();
- int _xpq_idx = XPQ_IDX;
- int error, i;
-
-#ifdef INVARIANTS
- if (__predict_true(gdtset))
- CRITICAL_ASSERT(curthread);
-#endif
-
- XPQ_IDX = 0;
- /* Make sure index is cleared first to avoid double updates. */
- error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE,
- _xpq_idx, NULL, DOMID_SELF);
-
-#if 0
- if (__predict_true(gdtset))
- for (i = _xpq_idx; i > 0;) {
- if (i >= 3) {
- CTR6(KTR_PMAP, "mmu:val: %lx ptr: %lx val: %lx "
- "ptr: %lx val: %lx ptr: %lx",
- (XPQ_QUEUE[i-1].val & 0xffffffff),
- (XPQ_QUEUE[i-1].ptr & 0xffffffff),
- (XPQ_QUEUE[i-2].val & 0xffffffff),
- (XPQ_QUEUE[i-2].ptr & 0xffffffff),
- (XPQ_QUEUE[i-3].val & 0xffffffff),
- (XPQ_QUEUE[i-3].ptr & 0xffffffff));
- i -= 3;
- } else if (i == 2) {
- CTR4(KTR_PMAP, "mmu: val: %lx ptr: %lx val: %lx ptr: %lx",
- (XPQ_QUEUE[i-1].val & 0xffffffff),
- (XPQ_QUEUE[i-1].ptr & 0xffffffff),
- (XPQ_QUEUE[i-2].val & 0xffffffff),
- (XPQ_QUEUE[i-2].ptr & 0xffffffff));
- i = 0;
- } else {
- CTR2(KTR_PMAP, "mmu: val: %lx ptr: %lx",
- (XPQ_QUEUE[i-1].val & 0xffffffff),
- (XPQ_QUEUE[i-1].ptr & 0xffffffff));
- i = 0;
- }
- }
-#endif
- if (__predict_false(error < 0)) {
- for (i = 0; i < _xpq_idx; i++)
- printf("val: %llx ptr: %llx\n",
- XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
- panic("Failed to execute MMU updates: %d", error);
- }
-
-}
-
-void
-xen_flush_queue(void)
-{
- SET_VCPU();
-
- if (__predict_true(gdtset))
- critical_enter();
- if (XPQ_IDX != 0) _xen_flush_queue();
- if (__predict_true(gdtset))
- critical_exit();
-}
-
-static __inline void
-xen_increment_idx(void)
-{
- SET_VCPU();
-
- XPQ_IDX++;
- if (__predict_false(XPQ_IDX == XPQUEUE_SIZE))
- xen_flush_queue();
-}
-
-void
-xen_check_queue(void)
-{
-#ifdef INVARIANTS
- SET_VCPU();
-
- KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
-#endif
-}
-
-void
-xen_invlpg(vm_offset_t va)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_INVLPG_ALL;
- op.arg1.linear_addr = va & ~PAGE_MASK;
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_load_cr3(u_int val)
-{
- struct mmuext_op op;
-#ifdef INVARIANTS
- SET_VCPU();
-
- KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
-#endif
- op.cmd = MMUEXT_NEW_BASEPTR;
- op.arg1.mfn = xpmap_ptom(val) >> PAGE_SHIFT;
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-#ifdef KTR
-static __inline u_int
-rebp(void)
-{
- u_int data;
-
- __asm __volatile("movl 4(%%ebp),%0" : "=r" (data));
- return (data);
-}
-#endif
-
-u_int
-read_eflags(void)
-{
- vcpu_info_t *_vcpu;
- u_int eflags;
-
- eflags = _read_eflags();
- _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()];
- if (_vcpu->evtchn_upcall_mask)
- eflags &= ~PSL_I;
-
- return (eflags);
-}
-
-void
-write_eflags(u_int eflags)
-{
- u_int intr;
-
- CTR2(KTR_SPARE2, "%x xen_restore_flags eflags %x", rebp(), eflags);
- intr = ((eflags & PSL_I) == 0);
- __restore_flags(intr);
- _write_eflags(eflags);
-}
-
-void
-xen_cli(void)
-{
- CTR1(KTR_SPARE2, "%x xen_cli disabling interrupts", rebp());
- __cli();
-}
-
-void
-xen_sti(void)
-{
- CTR1(KTR_SPARE2, "%x xen_sti enabling interrupts", rebp());
- __sti();
-}
-
-u_int
-xen_rcr2(void)
-{
-
- return (HYPERVISOR_shared_info->vcpu_info[curcpu].arch.cr2);
-}
-
-void
-_xen_machphys_update(vm_paddr_t mfn, vm_paddr_t pfn, char *file, int line)
-{
- SET_VCPU();
-
- if (__predict_true(gdtset))
- critical_enter();
- XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
- XPQ_QUEUE[XPQ_IDX].val = pfn;
-#ifdef INVARIANTS
- XPQ_QUEUE_LOG[XPQ_IDX].file = file;
- XPQ_QUEUE_LOG[XPQ_IDX].line = line;
-#endif
- xen_increment_idx();
- if (__predict_true(gdtset))
- critical_exit();
-}
-
-extern struct rwlock pvh_global_lock;
-
-void
-_xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val, char *file, int line)
-{
- SET_VCPU();
-
- if (__predict_true(gdtset))
- rw_assert(&pvh_global_lock, RA_WLOCKED);
-
- KASSERT((ptr & 7) == 0, ("misaligned update"));
-
- if (__predict_true(gdtset))
- critical_enter();
-
- XPQ_QUEUE[XPQ_IDX].ptr = ((uint64_t)ptr) | MMU_NORMAL_PT_UPDATE;
- XPQ_QUEUE[XPQ_IDX].val = (uint64_t)val;
-#ifdef INVARIANTS
- XPQ_QUEUE_LOG[XPQ_IDX].file = file;
- XPQ_QUEUE_LOG[XPQ_IDX].line = line;
-#endif
- xen_increment_idx();
- if (__predict_true(gdtset))
- critical_exit();
-}
-
-void
-xen_pgdpt_pin(vm_paddr_t ma)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_PIN_L3_TABLE;
- op.arg1.mfn = ma >> PAGE_SHIFT;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_pgd_pin(vm_paddr_t ma)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_PIN_L2_TABLE;
- op.arg1.mfn = ma >> PAGE_SHIFT;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_pgd_unpin(vm_paddr_t ma)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_UNPIN_TABLE;
- op.arg1.mfn = ma >> PAGE_SHIFT;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_pt_pin(vm_paddr_t ma)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_PIN_L1_TABLE;
- op.arg1.mfn = ma >> PAGE_SHIFT;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_pt_unpin(vm_paddr_t ma)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_UNPIN_TABLE;
- op.arg1.mfn = ma >> PAGE_SHIFT;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_set_ldt(vm_paddr_t ptr, unsigned long len)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_SET_LDT;
- op.arg1.linear_addr = ptr;
- op.arg2.nr_ents = len;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void xen_tlb_flush(void)
-{
- struct mmuext_op op;
- op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
- xen_flush_queue();
- PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_update_descriptor(union descriptor *table, union descriptor *entry)
-{
- vm_paddr_t pa;
- pt_entry_t *ptp;
-
- ptp = vtopte((vm_offset_t)table);
- pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK);
- if (HYPERVISOR_update_descriptor(pa, *(uint64_t *)entry))
- panic("HYPERVISOR_update_descriptor failed\n");
-}
-
-
-#if 0
-/*
- * Bitmap is indexed by page number. If bit is set, the page is part of a
- * xen_create_contiguous_region() area of memory.
- */
-unsigned long *contiguous_bitmap;
-
-static void
-contiguous_bitmap_set(unsigned long first_page, unsigned long nr_pages)
-{
- unsigned long start_off, end_off, curr_idx, end_idx;
-
- curr_idx = first_page / BITS_PER_LONG;
- start_off = first_page & (BITS_PER_LONG-1);
- end_idx = (first_page + nr_pages) / BITS_PER_LONG;
- end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
-
- if (curr_idx == end_idx) {
- contiguous_bitmap[curr_idx] |=
- ((1UL<<end_off)-1) & -(1UL<<start_off);
- } else {
- contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
- while ( ++curr_idx < end_idx )
- contiguous_bitmap[curr_idx] = ~0UL;
- contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
- }
-}
-
-static void
-contiguous_bitmap_clear(unsigned long first_page, unsigned long nr_pages)
-{
- unsigned long start_off, end_off, curr_idx, end_idx;
-
- curr_idx = first_page / BITS_PER_LONG;
- start_off = first_page & (BITS_PER_LONG-1);
- end_idx = (first_page + nr_pages) / BITS_PER_LONG;
- end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
-
- if (curr_idx == end_idx) {
- contiguous_bitmap[curr_idx] &=
- -(1UL<<end_off) | ((1UL<<start_off)-1);
- } else {
- contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
- while ( ++curr_idx != end_idx )
- contiguous_bitmap[curr_idx] = 0;
- contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
- }
-}
-#endif
-
-/* Ensure multi-page extents are contiguous in machine memory. */
-int
-xen_create_contiguous_region(vm_page_t pages, int npages)
-{
- unsigned long mfn, i, flags;
- int order;
- struct xen_memory_reservation reservation = {
- .nr_extents = 1,
- .extent_order = 0,
- .domid = DOMID_SELF
- };
- set_xen_guest_handle(reservation.extent_start, &mfn);
-
- balloon_lock(flags);
-
- /* can currently only handle power of two allocation */
- PANIC_IF(ffs(npages) != fls(npages));
-
- /* 0. determine order */
- order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
-
- /* 1. give away machine pages. */
- for (i = 0; i < (1 << order); i++) {
- int pfn;
- pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
- mfn = PFNTOMFN(pfn);
- PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
- PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != 1);
- }
-
-
- /* 2. Get a new contiguous memory extent. */
- reservation.extent_order = order;
- /* xenlinux hardcodes this because of aacraid - maybe set to 0 if we're not
- * running with a broxen driver XXXEN
- */
- reservation.address_bits = 31;
- if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1)
- goto fail;
-
- /* 3. Map the new extent in place of old pages. */
- for (i = 0; i < (1 << order); i++) {
- int pfn;
- pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
- xen_machphys_update(mfn+i, pfn);
- PFNTOMFN(pfn) = mfn+i;
- }
-
- xen_tlb_flush();
-
-#if 0
- contiguous_bitmap_set(VM_PAGE_TO_PHYS(&pages[0]) >> PAGE_SHIFT, 1UL << order);
-#endif
-
- balloon_unlock(flags);
-
- return 0;
-
- fail:
- reservation.extent_order = 0;
- reservation.address_bits = 0;
-
- for (i = 0; i < (1 << order); i++) {
- int pfn;
- pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
- PANIC_IF(HYPERVISOR_memory_op(
- XENMEM_increase_reservation, &reservation) != 1);
- xen_machphys_update(mfn, pfn);
- PFNTOMFN(pfn) = mfn;
- }
-
- xen_tlb_flush();
-
- balloon_unlock(flags);
-
- return ENOMEM;
-}
-
-void
-xen_destroy_contiguous_region(void *addr, int npages)
-{
- unsigned long mfn, i, flags, order, pfn0;
- struct xen_memory_reservation reservation = {
- .nr_extents = 1,
- .extent_order = 0,
- .domid = DOMID_SELF
- };
- set_xen_guest_handle(reservation.extent_start, &mfn);
-
- pfn0 = vtophys(addr) >> PAGE_SHIFT;
-#if 0
- scrub_pages(vstart, 1 << order);
-#endif
- /* can currently only handle power of two allocation */
- PANIC_IF(ffs(npages) != fls(npages));
-
- /* 0. determine order */
- order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
-
- balloon_lock(flags);
-
-#if 0
- contiguous_bitmap_clear(vtophys(addr) >> PAGE_SHIFT, 1UL << order);
-#endif
-
- /* 1. Zap current PTEs, giving away the underlying pages. */
- for (i = 0; i < (1 << order); i++) {
- int pfn;
- uint64_t new_val = 0;
- pfn = vtomach((char *)addr + i*PAGE_SIZE) >> PAGE_SHIFT;
-
- PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)((char *)addr + (i * PAGE_SIZE)), new_val, 0));
- PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
- PANIC_IF(HYPERVISOR_memory_op(
- XENMEM_decrease_reservation, &reservation) != 1);
- }
-
- /* 2. Map new pages in place of old pages. */
- for (i = 0; i < (1 << order); i++) {
- int pfn;
- uint64_t new_val;
- pfn = pfn0 + i;
- PANIC_IF(HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1);
-
- new_val = mfn << PAGE_SHIFT;
- PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)addr + (i * PAGE_SIZE),
- new_val, PG_KERNEL));
- xen_machphys_update(mfn, pfn);
- PFNTOMFN(pfn) = mfn;
- }
-
- xen_tlb_flush();
-
- balloon_unlock(flags);
-}
-
-extern vm_offset_t proc0kstack;
-extern int vm86paddr, vm86phystk;
-char *bootmem_start, *bootmem_current, *bootmem_end;
-
-pteinfo_t *pteinfo_list;
-void initvalues(start_info_t *startinfo);
-
-void *
-bootmem_alloc(unsigned int size)
-{
- char *retptr;
-
- retptr = bootmem_current;
- PANIC_IF(retptr + size > bootmem_end);
- bootmem_current += size;
-
- return retptr;
-}
-
-void
-bootmem_free(void *ptr, unsigned int size)
-{
- char *tptr;
-
- tptr = ptr;
- PANIC_IF(tptr != bootmem_current - size ||
- bootmem_current - size < bootmem_start);
-
- bootmem_current -= size;
-}
-
-#if 0
-static vm_paddr_t
-xpmap_mtop2(vm_paddr_t mpa)
-{
- return ((machine_to_phys_mapping[mpa >> PAGE_SHIFT] << PAGE_SHIFT)
- ) | (mpa & ~PG_FRAME);
-}
-
-static pd_entry_t
-xpmap_get_bootpde(vm_paddr_t va)
-{
-
- return ((pd_entry_t *)xen_start_info->pt_base)[va >> 22];
-}
-
-static pd_entry_t
-xpmap_get_vbootpde(vm_paddr_t va)
-{
- pd_entry_t pde;
-
- pde = xpmap_get_bootpde(va);
- if ((pde & PG_V) == 0)
- return (pde & ~PG_FRAME);
- return (pde & ~PG_FRAME) |
- (xpmap_mtop2(pde & PG_FRAME) + KERNBASE);
-}
-
-static pt_entry_t 8*
-xpmap_get_bootptep(vm_paddr_t va)
-{
- pd_entry_t pde;
-
- pde = xpmap_get_vbootpde(va);
- if ((pde & PG_V) == 0)
- return (void *)-1;
-#define PT_MASK 0x003ff000 /* page table address bits */
- return &(((pt_entry_t *)(pde & PG_FRAME))[(va & PT_MASK) >> PAGE_SHIFT]);
-}
-
-static pt_entry_t
-xpmap_get_bootpte(vm_paddr_t va)
-{
-
- return xpmap_get_bootptep(va)[0];
-}
-#endif
-
-
-#ifdef ADD_ISA_HOLE
-static void
-shift_phys_machine(unsigned long *phys_machine, int nr_pages)
-{
-
- unsigned long *tmp_page, *current_page, *next_page;
- int i;
-
- tmp_page = bootmem_alloc(PAGE_SIZE);
- current_page = phys_machine + nr_pages - (PAGE_SIZE/sizeof(unsigned long));
- next_page = current_page - (PAGE_SIZE/sizeof(unsigned long));
- bcopy(phys_machine, tmp_page, PAGE_SIZE);
-
- while (current_page > phys_machine) {
- /* save next page */
- bcopy(next_page, tmp_page, PAGE_SIZE);
- /* shift down page */
- bcopy(current_page, next_page, PAGE_SIZE);
- /* finish swap */
- bcopy(tmp_page, current_page, PAGE_SIZE);
-
- current_page -= (PAGE_SIZE/sizeof(unsigned long));
- next_page -= (PAGE_SIZE/sizeof(unsigned long));
- }
- bootmem_free(tmp_page, PAGE_SIZE);
-
- for (i = 0; i < nr_pages; i++) {
- xen_machphys_update(phys_machine[i], i);
- }
- memset(phys_machine, INVALID_P2M_ENTRY, PAGE_SIZE);
-
-}
-#endif /* ADD_ISA_HOLE */
-
-/*
- * Build a directory of the pages that make up our Physical to Machine
- * mapping table. The Xen suspend/restore code uses this to find our
- * mapping table.
- */
-static void
-init_frame_list_list(void *arg)
-{
- unsigned long nr_pages = xen_start_info->nr_pages;
-#define FPP (PAGE_SIZE/sizeof(xen_pfn_t))
- int i, j, k;
-
- xen_pfn_to_mfn_frame_list_list = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
- for (i = 0, j = 0, k = -1; i < nr_pages;
- i += FPP, j++) {
- if ((j & (FPP - 1)) == 0) {
- k++;
- xen_pfn_to_mfn_frame_list[k] =
- malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
- xen_pfn_to_mfn_frame_list_list[k] =
- VTOMFN(xen_pfn_to_mfn_frame_list[k]);
- j = 0;
- }
- xen_pfn_to_mfn_frame_list[k][j] =
- VTOMFN(&xen_phys_machine[i]);
- }
-
- HYPERVISOR_shared_info->arch.max_pfn = nr_pages;
- HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
- = VTOMFN(xen_pfn_to_mfn_frame_list_list);
-}
-SYSINIT(init_fll, SI_SUB_DEVFS, SI_ORDER_ANY, init_frame_list_list, NULL);
-
-extern unsigned long physfree;
-
-int pdir, curoffset;
-extern int nkpt;
-
-extern uint32_t kernbase;
-
-void
-initvalues(start_info_t *startinfo)
-{
- vm_offset_t cur_space, cur_space_pt;
- struct physdev_set_iopl set_iopl;
-
- int l3_pages, l2_pages, l1_pages, offset;
- vm_paddr_t console_page_ma, xen_store_ma;
- vm_offset_t tmpva;
- vm_paddr_t shinfo;
-#ifdef PAE
- vm_paddr_t IdlePDPTma, IdlePDPTnewma;
- vm_paddr_t IdlePTDnewma[4];
- pd_entry_t *IdlePDPTnew, *IdlePTDnew;
- vm_paddr_t IdlePTDma[4];
-#else
- vm_paddr_t IdlePTDma[1];
-#endif
- unsigned long i;
- int ncpus = MAXCPU;
-
- nkpt = min(
- min(
- max((startinfo->nr_pages >> NPGPTD_SHIFT), nkpt),
- NPGPTD*NPDEPG - KPTDI),
- (HYPERVISOR_VIRT_START - KERNBASE) >> PDRSHIFT);
-
- HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
-#ifdef notyet
- /*
- * need to install handler
- */
- HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify);
-#endif
- xen_start_info = startinfo;
- HYPERVISOR_start_info = startinfo;
- xen_phys_machine = (xen_pfn_t *)startinfo->mfn_list;
-
- IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE);
- l1_pages = 0;
-
-#ifdef PAE
- l3_pages = 1;
- l2_pages = 0;
- IdlePDPT = (pd_entry_t *)startinfo->pt_base;
- IdlePDPTma = VTOM(startinfo->pt_base);
- for (i = (KERNBASE >> 30);
- (i < 4) && (IdlePDPT[i] != 0); i++)
- l2_pages++;
- /*
- * Note that only one page directory has been allocated at this point.
- * Thus, if KERNBASE
- */
- for (i = 0; i < l2_pages; i++)
- IdlePTDma[i] = VTOM(IdlePTD + i*PAGE_SIZE);
-
- l2_pages = (l2_pages == 0) ? 1 : l2_pages;
-#else
- l3_pages = 0;
- l2_pages = 1;
-#endif
- for (i = (((KERNBASE>>18) & PAGE_MASK)>>PAGE_SHIFT);
- (i<l2_pages*NPDEPG) && (i<(VM_MAX_KERNEL_ADDRESS>>PDRSHIFT)); i++) {
-
- if (IdlePTD[i] == 0)
- break;
- l1_pages++;
- }
-
- /* number of pages allocated after the pts + 1*/;
- cur_space = xen_start_info->pt_base +
- (l3_pages + l2_pages + l1_pages + 1)*PAGE_SIZE;
-
- xc_printf("initvalues(): wooh - availmem=%x,%x\n", avail_space,
- cur_space);
-
- xc_printf("KERNBASE=%x,pt_base=%lx, VTOPFN(base)=%x, nr_pt_frames=%lx\n",
- KERNBASE,xen_start_info->pt_base, VTOPFN(xen_start_info->pt_base),
- xen_start_info->nr_pt_frames);
- xendebug_flags = 0; /* 0xffffffff; */
-
-#ifdef ADD_ISA_HOLE
- shift_phys_machine(xen_phys_machine, xen_start_info->nr_pages);
-#endif
- XENPRINTF("IdlePTD %p\n", IdlePTD);
- XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%x pt_base: 0x%lx "
- "mod_start: 0x%lx mod_len: 0x%lx\n",
- xen_start_info->nr_pages, xen_start_info->shared_info,
- xen_start_info->flags, xen_start_info->pt_base,
- xen_start_info->mod_start, xen_start_info->mod_len);
-
-#ifdef PAE
- IdlePDPTnew = (pd_entry_t *)cur_space; cur_space += PAGE_SIZE;
- bzero(IdlePDPTnew, PAGE_SIZE);
-
- IdlePDPTnewma = VTOM(IdlePDPTnew);
- IdlePTDnew = (pd_entry_t *)cur_space; cur_space += 4*PAGE_SIZE;
- bzero(IdlePTDnew, 4*PAGE_SIZE);
-
- for (i = 0; i < 4; i++)
- IdlePTDnewma[i] = VTOM((uint8_t *)IdlePTDnew + i*PAGE_SIZE);
- /*
- * L3
- *
- * Copy the 4 machine addresses of the new PTDs in to the PDPT
- *
- */
- for (i = 0; i < 4; i++)
- IdlePDPTnew[i] = IdlePTDnewma[i] | PG_V;
-
- __asm__("nop;");
- /*
- *
- * re-map the new PDPT read-only
- */
- PT_SET_MA(IdlePDPTnew, IdlePDPTnewma | PG_V);
- /*
- *
- * Unpin the current PDPT
- */
- xen_pt_unpin(IdlePDPTma);
-
-#endif /* PAE */
-
- /* Map proc0's KSTACK */
- proc0kstack = cur_space; cur_space += (KSTACK_PAGES * PAGE_SIZE);
- xc_printf("proc0kstack=%u\n", proc0kstack);
-
- /* vm86/bios stack */
- cur_space += PAGE_SIZE;
-
- /* Map space for the vm86 region */
- vm86paddr = (vm_offset_t)cur_space;
- cur_space += (PAGE_SIZE * 3);
-
- /* allocate 4 pages for bootmem allocator */
- bootmem_start = bootmem_current = (char *)cur_space;
- cur_space += (4 * PAGE_SIZE);
- bootmem_end = (char *)cur_space;
-
- /* allocate pages for gdt */
- gdt = (union descriptor *)cur_space;
- cur_space += PAGE_SIZE*ncpus;
-
- /* allocate page for ldt */
- ldt = (union descriptor *)cur_space; cur_space += PAGE_SIZE;
- cur_space += PAGE_SIZE;
-
- /* unmap remaining pages from initial chunk
- *
- */
- for (tmpva = cur_space; tmpva < (((uint32_t)&kernbase) + (l1_pages<<PDRSHIFT));
- tmpva += PAGE_SIZE) {
- bzero((char *)tmpva, PAGE_SIZE);
- PT_SET_MA(tmpva, (vm_paddr_t)0);
- }
-
- PT_UPDATES_FLUSH();
-
- memcpy(((uint8_t *)IdlePTDnew) + ((unsigned int)(KERNBASE >> 18)),
- ((uint8_t *)IdlePTD) + ((KERNBASE >> 18) & PAGE_MASK),
- l1_pages*sizeof(pt_entry_t));
-
- for (i = 0; i < 4; i++) {
- PT_SET_MA((uint8_t *)IdlePTDnew + i*PAGE_SIZE,
- IdlePTDnewma[i] | PG_V);
- }
- xen_load_cr3(VTOP(IdlePDPTnew));
- xen_pgdpt_pin(VTOM(IdlePDPTnew));
-
- /* allocate remainder of nkpt pages */
- cur_space_pt = cur_space;
- for (offset = (KERNBASE >> PDRSHIFT), i = l1_pages; i < nkpt;
- i++, cur_space += PAGE_SIZE) {
- pdir = (offset + i) / NPDEPG;
- curoffset = ((offset + i) % NPDEPG);
- if (((offset + i) << PDRSHIFT) == VM_MAX_KERNEL_ADDRESS)
- break;
-
- /*
- * make sure that all the initial page table pages
- * have been zeroed
- */
- PT_SET_MA(cur_space, VTOM(cur_space) | PG_V | PG_RW);
- bzero((char *)cur_space, PAGE_SIZE);
- PT_SET_MA(cur_space, (vm_paddr_t)0);
- xen_pt_pin(VTOM(cur_space));
- xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
- curoffset*sizeof(vm_paddr_t)),
- VTOM(cur_space) | PG_KERNEL);
- PT_UPDATES_FLUSH();
- }
-
- for (i = 0; i < 4; i++) {
- pdir = (PTDPTDI + i) / NPDEPG;
- curoffset = (PTDPTDI + i) % NPDEPG;
-
- xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
- curoffset*sizeof(vm_paddr_t)),
- IdlePTDnewma[i] | PG_V);
- }
-
- PT_UPDATES_FLUSH();
-
- IdlePTD = IdlePTDnew;
- IdlePDPT = IdlePDPTnew;
- IdlePDPTma = IdlePDPTnewma;
-
- HYPERVISOR_shared_info = (shared_info_t *)cur_space;
- cur_space += PAGE_SIZE;
-
- xen_store = (struct xenstore_domain_interface *)cur_space;
- cur_space += PAGE_SIZE;
-
- console_page = (char *)cur_space;
- cur_space += PAGE_SIZE;
-
- /*
- * shared_info is an unsigned long so this will randomly break if
- * it is allocated above 4GB - I guess people are used to that
- * sort of thing with Xen ... sigh
- */
- shinfo = xen_start_info->shared_info;
- PT_SET_MA(HYPERVISOR_shared_info, shinfo | PG_KERNEL);
-
- xc_printf("#4\n");
-
- xen_store_ma = (((vm_paddr_t)xen_start_info->store_mfn) << PAGE_SHIFT);
- PT_SET_MA(xen_store, xen_store_ma | PG_KERNEL);
- console_page_ma = (((vm_paddr_t)xen_start_info->console.domU.mfn) << PAGE_SHIFT);
- PT_SET_MA(console_page, console_page_ma | PG_KERNEL);
-
- xc_printf("#5\n");
-
- set_iopl.iopl = 1;
- PANIC_IF(HYPERVISOR_physdev_op(PHYSDEVOP_SET_IOPL, &set_iopl));
- xc_printf("#6\n");
-#if 0
- /* add page table for KERNBASE */
- xen_queue_pt_update(IdlePTDma + KPTDI*sizeof(vm_paddr_t),
- VTOM(cur_space) | PG_KERNEL);
- xen_flush_queue();
-#ifdef PAE
- xen_queue_pt_update(pdir_shadow_ma[3] + KPTDI*sizeof(vm_paddr_t),
- VTOM(cur_space) | PG_V | PG_A);
-#else
- xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t),
- VTOM(cur_space) | PG_V | PG_A);
-#endif
- xen_flush_queue();
- cur_space += PAGE_SIZE;
- xc_printf("#6\n");
-#endif /* 0 */
-#ifdef notyet
- if (xen_start_info->flags & SIF_INITDOMAIN) {
- /* Map first megabyte */
- for (i = 0; i < (256 << PAGE_SHIFT); i += PAGE_SIZE)
- PT_SET_MA(KERNBASE + i, i | PG_KERNEL | PG_NC_PCD);
- xen_flush_queue();
- }
-#endif
- /*
- * re-map kernel text read-only
- *
- */
- for (i = (((vm_offset_t)&btext) & ~PAGE_MASK);
- i < (((vm_offset_t)&etext) & ~PAGE_MASK); i += PAGE_SIZE)
- PT_SET_MA(i, VTOM(i) | PG_V | PG_A);
-
- xc_printf("#7\n");
- physfree = VTOP(cur_space);
- init_first = physfree >> PAGE_SHIFT;
- IdlePTD = (pd_entry_t *)VTOP(IdlePTD);
- IdlePDPT = (pd_entry_t *)VTOP(IdlePDPT);
- setup_xen_features();
- xc_printf("#8, proc0kstack=%u\n", proc0kstack);
-}
-
-
-trap_info_t trap_table[] = {
- { 0, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
- { 1, 0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
- { 3, 3|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
- { 4, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
- /* This is UPL on Linux and KPL on BSD */
- { 5, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
- { 6, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
- { 7, 0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
- /*
- * { 8, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
- * no handler for double fault
- */
- { 9, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
- {10, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
- {11, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
- {12, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
- {13, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
- {14, 0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
- {15, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
- {16, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
- {17, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
- {18, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
- {19, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
- {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
- { 0, 0, 0, 0 }
-};
-
-/* Perform a multicall and check that individual calls succeeded. */
-int
-HYPERVISOR_multicall(struct multicall_entry * call_list, int nr_calls)
-{
- int ret = 0;
- int i;
-
- /* Perform the multicall. */
- PANIC_IF(_HYPERVISOR_multicall(call_list, nr_calls));
-
- /* Check the results of individual hypercalls. */
- for (i = 0; i < nr_calls; i++)
- if (__predict_false(call_list[i].result < 0))
- ret++;
- if (__predict_false(ret > 0))
- panic("%d multicall(s) failed: cpu %d\n",
- ret, smp_processor_id());
-
- /* If we didn't panic already, everything succeeded. */
- return (0);
-}
-
-/********** CODE WORTH KEEPING ABOVE HERE *****************/
-
-void xen_failsafe_handler(void);
-
-void
-xen_failsafe_handler(void)
-{
-
- panic("xen_failsafe_handler called!\n");
-}
-
-void xen_handle_thread_switch(struct pcb *pcb);
-
-/* This is called by cpu_switch() when switching threads. */
-/* The pcb arg refers to the process control block of the */
-/* next thread which is to run */
-void
-xen_handle_thread_switch(struct pcb *pcb)
-{
- uint32_t *a = (uint32_t *)&PCPU_GET(fsgs_gdt)[0];
- uint32_t *b = (uint32_t *)&pcb->pcb_fsd;
- multicall_entry_t mcl[3];
- int i = 0;
-
- /* Notify Xen of task switch */
- mcl[i].op = __HYPERVISOR_stack_switch;
- mcl[i].args[0] = GSEL(GDATA_SEL, SEL_KPL);
- mcl[i++].args[1] = (unsigned long)pcb;
-
- /* Check for update of fsd */
- if (*a != *b || *(a+1) != *(b+1)) {
- mcl[i].op = __HYPERVISOR_update_descriptor;
- *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
- *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
- }
-
- a += 2;
- b += 2;
-
- /* Check for update of gsd */
- if (*a != *b || *(a+1) != *(b+1)) {
- mcl[i].op = __HYPERVISOR_update_descriptor;
- *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
- *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
- }
-
- (void)HYPERVISOR_multicall(mcl, i);
-}
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index 39e4df3..3ff3440 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -1238,12 +1238,14 @@ __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags)
coresize = round_page(hdrsize + notesz) + seginfo.size;
#ifdef RACCT
- PROC_LOCK(td->td_proc);
- error = racct_add(td->td_proc, RACCT_CORE, coresize);
- PROC_UNLOCK(td->td_proc);
- if (error != 0) {
- error = EFAULT;
- goto done;
+ if (racct_enable) {
+ PROC_LOCK(td->td_proc);
+ error = racct_add(td->td_proc, RACCT_CORE, coresize);
+ PROC_UNLOCK(td->td_proc);
+ if (error != 0) {
+ error = EFAULT;
+ goto done;
+ }
}
#endif
if (coresize >= limit) {
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index 5290957..329f418 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -857,13 +857,15 @@ do_dup(struct thread *td, int flags, int old, int new)
* the limit on the size of the file descriptor table.
*/
#ifdef RACCT
- PROC_LOCK(p);
- error = racct_set(p, RACCT_NOFILE, new + 1);
- PROC_UNLOCK(p);
- if (error != 0) {
- FILEDESC_XUNLOCK(fdp);
- fdrop(fp, td);
- return (EMFILE);
+ if (racct_enable) {
+ PROC_LOCK(p);
+ error = racct_set(p, RACCT_NOFILE, new + 1);
+ PROC_UNLOCK(p);
+ if (error != 0) {
+ FILEDESC_XUNLOCK(fdp);
+ fdrop(fp, td);
+ return (EMFILE);
+ }
}
#endif
fdgrowtable_exp(fdp, new + 1);
@@ -1609,7 +1611,7 @@ fdalloc(struct thread *td, int minfd, int *result)
{
struct proc *p = td->td_proc;
struct filedesc *fdp = p->p_fd;
- int fd = -1, maxfd, allocfd;
+ int fd, maxfd, allocfd;
#ifdef RACCT
int error;
#endif
@@ -1631,11 +1633,13 @@ fdalloc(struct thread *td, int minfd, int *result)
if (fd >= fdp->fd_nfiles) {
allocfd = min(fd * 2, maxfd);
#ifdef RACCT
- PROC_LOCK(p);
- error = racct_set(p, RACCT_NOFILE, allocfd);
- PROC_UNLOCK(p);
- if (error != 0)
- return (EMFILE);
+ if (racct_enable) {
+ PROC_LOCK(p);
+ error = racct_set(p, RACCT_NOFILE, allocfd);
+ PROC_UNLOCK(p);
+ if (error != 0)
+ return (EMFILE);
+ }
#endif
/*
* fd is already equal to first free descriptor >= minfd, so
@@ -2042,9 +2046,11 @@ fdescfree(struct thread *td)
MPASS(fdp != NULL);
#ifdef RACCT
- PROC_LOCK(td->td_proc);
- racct_set(td->td_proc, RACCT_NOFILE, 0);
- PROC_UNLOCK(td->td_proc);
+ if (racct_enable) {
+ PROC_LOCK(td->td_proc);
+ racct_set(td->td_proc, RACCT_NOFILE, 0);
+ PROC_UNLOCK(td->td_proc);
+ }
#endif
if (td->td_proc->p_fdtol != NULL)
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index ecc2651..9d893f8 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -1060,7 +1060,7 @@ exec_new_vmspace(imgp, sv)
/* Allocate a new stack */
if (imgp->stack_sz != 0) {
- ssiz = imgp->stack_sz;
+ ssiz = trunc_page(imgp->stack_sz);
PROC_LOCK(p);
lim_rlimit(p, RLIMIT_STACK, &rlim_stack);
PROC_UNLOCK(p);
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 4d23fc0..0a601a1 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -907,9 +907,11 @@ proc_reap(struct thread *td, struct proc *p, int *status, int options)
* Destroy resource accounting information associated with the process.
*/
#ifdef RACCT
- PROC_LOCK(p);
- racct_sub(p, RACCT_NPROC, 1);
- PROC_UNLOCK(p);
+ if (racct_enable) {
+ PROC_LOCK(p);
+ racct_sub(p, RACCT_NPROC, 1);
+ PROC_UNLOCK(p);
+ }
#endif
racct_proc_exit(p);
diff --git a/sys/kern/kern_gzio.c b/sys/kern/kern_gzio.c
index a4974a7..cee21f0 100644
--- a/sys/kern/kern_gzio.c
+++ b/sys/kern/kern_gzio.c
@@ -32,8 +32,7 @@ __FBSDID("$FreeBSD$");
#include <sys/gzio.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
-
-#include <net/zutil.h>
+#include <sys/zutil.h>
#define KERN_GZ_HDRLEN 10 /* gzip header length */
#define KERN_GZ_TRAILERLEN 8 /* gzip trailer length */
diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c
index 7a5d936..a84019a 100644
--- a/sys/kern/kern_intr.c
+++ b/sys/kern/kern_intr.c
@@ -1455,12 +1455,7 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame)
/* Schedule the ithread if needed. */
if (thread) {
error = intr_event_schedule_thread(ie);
-#ifndef XEN
KASSERT(error == 0, ("bad stray interrupt"));
-#else
- if (error != 0)
- log(LOG_WARNING, "bad stray interrupt");
-#endif
}
critical_exit();
td->td_intr_nesting_level--;
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 5b23129..c87c4e2 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -1778,7 +1778,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
mtx_unlock(&pr->pr_mtx);
#ifdef RACCT
- if (created)
+ if (racct_enable && created)
prison_racct_attach(pr);
#endif
@@ -1862,7 +1862,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
}
#ifdef RACCT
- if (!created) {
+ if (racct_enable && !created) {
if (!(flags & JAIL_ATTACH))
sx_sunlock(&allprison_lock);
prison_racct_modify(pr);
@@ -2652,7 +2652,8 @@ prison_deref(struct prison *pr, int flags)
cpuset_rel(pr->pr_cpuset);
osd_jail_exit(pr);
#ifdef RACCT
- prison_racct_detach(pr);
+ if (racct_enable)
+ prison_racct_detach(pr);
#endif
free(pr, M_PRISON);
@@ -4460,12 +4461,15 @@ SYSCTL_JAIL_PARAM(_allow_mount, tmpfs, CTLTYPE_INT | CTLFLAG_RW,
SYSCTL_JAIL_PARAM(_allow_mount, zfs, CTLTYPE_INT | CTLFLAG_RW,
"B", "Jail may mount the zfs file system");
+#ifdef RACCT
void
prison_racct_foreach(void (*callback)(struct racct *racct,
void *arg2, void *arg3), void *arg2, void *arg3)
{
struct prison_racct *prr;
+ ASSERT_RACCT_ENABLED();
+
sx_slock(&allprison_lock);
LIST_FOREACH(prr, &allprison_racct, prr_next)
(callback)(prr->prr_racct, arg2, arg3);
@@ -4477,6 +4481,7 @@ prison_racct_find_locked(const char *name)
{
struct prison_racct *prr;
+ ASSERT_RACCT_ENABLED();
sx_assert(&allprison_lock, SA_XLOCKED);
if (name[0] == '\0' || strlen(name) >= MAXHOSTNAMELEN)
@@ -4507,6 +4512,8 @@ prison_racct_find(const char *name)
{
struct prison_racct *prr;
+ ASSERT_RACCT_ENABLED();
+
sx_xlock(&allprison_lock);
prr = prison_racct_find_locked(name);
sx_xunlock(&allprison_lock);
@@ -4517,6 +4524,8 @@ void
prison_racct_hold(struct prison_racct *prr)
{
+ ASSERT_RACCT_ENABLED();
+
refcount_acquire(&prr->prr_refcount);
}
@@ -4524,6 +4533,7 @@ static void
prison_racct_free_locked(struct prison_racct *prr)
{
+ ASSERT_RACCT_ENABLED();
sx_assert(&allprison_lock, SA_XLOCKED);
if (refcount_release(&prr->prr_refcount)) {
@@ -4538,6 +4548,7 @@ prison_racct_free(struct prison_racct *prr)
{
int old;
+ ASSERT_RACCT_ENABLED();
sx_assert(&allprison_lock, SA_UNLOCKED);
old = prr->prr_refcount;
@@ -4549,12 +4560,12 @@ prison_racct_free(struct prison_racct *prr)
sx_xunlock(&allprison_lock);
}
-#ifdef RACCT
static void
prison_racct_attach(struct prison *pr)
{
struct prison_racct *prr;
+ ASSERT_RACCT_ENABLED();
sx_assert(&allprison_lock, SA_XLOCKED);
prr = prison_racct_find_locked(pr->pr_name);
@@ -4574,6 +4585,8 @@ prison_racct_modify(struct prison *pr)
struct ucred *cred;
struct prison_racct *oldprr;
+ ASSERT_RACCT_ENABLED();
+
sx_slock(&allproc_lock);
sx_xlock(&allprison_lock);
@@ -4613,6 +4626,7 @@ static void
prison_racct_detach(struct prison *pr)
{
+ ASSERT_RACCT_ENABLED();
sx_assert(&allprison_lock, SA_UNLOCKED);
if (pr->pr_prison_racct == NULL)
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 505521d..6618c08 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -2822,7 +2822,7 @@ static SYSCTL_NODE(_kern_proc, KERN_PROC_PROC, proc, CTLFLAG_RD | CTLFLAG_MPSAFE
sysctl_kern_proc, "Return process table, no threads");
static SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args,
- CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE,
+ CTLFLAG_RW | CTLFLAG_CAPWR | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE,
sysctl_kern_proc_args, "Process argument list");
static SYSCTL_NODE(_kern_proc, KERN_PROC_ENV, env, CTLFLAG_RD | CTLFLAG_MPSAFE,
diff --git a/sys/kern/kern_racct.c b/sys/kern/kern_racct.c
index 05becea..5cee140 100644
--- a/sys/kern/kern_racct.c
+++ b/sys/kern/kern_racct.c
@@ -70,8 +70,15 @@ FEATURE(racct, "Resource Accounting");
* Do not block processes that have their %cpu usage <= pcpu_threshold.
*/
static int pcpu_threshold = 1;
+#ifdef RACCT_DISABLED
+int racct_enable = 0;
+#else
+int racct_enable = 1;
+#endif
SYSCTL_NODE(_kern, OID_AUTO, racct, CTLFLAG_RW, 0, "Resource Accounting");
+SYSCTL_UINT(_kern_racct, OID_AUTO, enable, CTLFLAG_RDTUN, &racct_enable,
+ 0, "Enable RACCT/RCTL");
SYSCTL_UINT(_kern_racct, OID_AUTO, pcpu_threshold, CTLFLAG_RW, &pcpu_threshold,
0, "Processes with higher %cpu usage than this value can be throttled.");
@@ -313,6 +320,8 @@ racct_getpcpu(struct proc *p, u_int pcpu)
fixpt_t p_pctcpu;
struct thread *td;
+ ASSERT_RACCT_ENABLED();
+
/*
* If the process is swapped out, we count its %cpu usage as zero.
* This behaviour is consistent with the userland ps(1) tool.
@@ -377,6 +386,7 @@ racct_add_racct(struct racct *dest, const struct racct *src)
{
int i;
+ ASSERT_RACCT_ENABLED();
mtx_assert(&racct_lock, MA_OWNED);
/*
@@ -398,6 +408,7 @@ racct_sub_racct(struct racct *dest, const struct racct *src)
{
int i;
+ ASSERT_RACCT_ENABLED();
mtx_assert(&racct_lock, MA_OWNED);
/*
@@ -431,6 +442,9 @@ void
racct_create(struct racct **racctp)
{
+ if (!racct_enable)
+ return;
+
SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0);
KASSERT(*racctp == NULL, ("racct already allocated"));
@@ -444,6 +458,8 @@ racct_destroy_locked(struct racct **racctp)
int i;
struct racct *racct;
+ ASSERT_RACCT_ENABLED();
+
SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0);
mtx_assert(&racct_lock, MA_OWNED);
@@ -470,6 +486,9 @@ void
racct_destroy(struct racct **racct)
{
+ if (!racct_enable)
+ return;
+
mtx_lock(&racct_lock);
racct_destroy_locked(racct);
mtx_unlock(&racct_lock);
@@ -485,6 +504,7 @@ racct_alloc_resource(struct racct *racct, int resource,
uint64_t amount)
{
+ ASSERT_RACCT_ENABLED();
mtx_assert(&racct_lock, MA_OWNED);
KASSERT(racct != NULL, ("NULL racct"));
@@ -516,6 +536,8 @@ racct_add_locked(struct proc *p, int resource, uint64_t amount)
int error;
#endif
+ ASSERT_RACCT_ENABLED();
+
SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0);
/*
@@ -546,6 +568,9 @@ racct_add(struct proc *p, int resource, uint64_t amount)
{
int error;
+ if (!racct_enable)
+ return (0);
+
mtx_lock(&racct_lock);
error = racct_add_locked(p, resource, amount);
mtx_unlock(&racct_lock);
@@ -557,6 +582,8 @@ racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount)
{
struct prison *pr;
+ ASSERT_RACCT_ENABLED();
+
SDT_PROBE(racct, kernel, rusage, add__cred, cred, resource, amount,
0, 0);
@@ -577,6 +604,9 @@ void
racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
{
+ if (!racct_enable)
+ return;
+
mtx_lock(&racct_lock);
racct_add_cred_locked(cred, resource, amount);
mtx_unlock(&racct_lock);
@@ -590,6 +620,9 @@ void
racct_add_force(struct proc *p, int resource, uint64_t amount)
{
+ if (!racct_enable)
+ return;
+
SDT_PROBE(racct, kernel, rusage, add__force, p, resource, amount, 0, 0);
/*
@@ -612,6 +645,8 @@ racct_set_locked(struct proc *p, int resource, uint64_t amount)
int error;
#endif
+ ASSERT_RACCT_ENABLED();
+
SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
/*
@@ -671,6 +706,9 @@ racct_set(struct proc *p, int resource, uint64_t amount)
{
int error;
+ if (!racct_enable)
+ return (0);
+
mtx_lock(&racct_lock);
error = racct_set_locked(p, resource, amount);
mtx_unlock(&racct_lock);
@@ -683,6 +721,8 @@ racct_set_force_locked(struct proc *p, int resource, uint64_t amount)
int64_t old_amount, decayed_amount;
int64_t diff_proc, diff_cred;
+ ASSERT_RACCT_ENABLED();
+
SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
/*
@@ -717,6 +757,10 @@ racct_set_force_locked(struct proc *p, int resource, uint64_t amount)
void
racct_set_force(struct proc *p, int resource, uint64_t amount)
{
+
+ if (!racct_enable)
+ return;
+
mtx_lock(&racct_lock);
racct_set_force_locked(p, resource, amount);
mtx_unlock(&racct_lock);
@@ -732,6 +776,9 @@ uint64_t
racct_get_limit(struct proc *p, int resource)
{
+ if (!racct_enable)
+ return (UINT64_MAX);
+
#ifdef RCTL
return (rctl_get_limit(p, resource));
#else
@@ -749,6 +796,9 @@ uint64_t
racct_get_available(struct proc *p, int resource)
{
+ if (!racct_enable)
+ return (UINT64_MAX);
+
#ifdef RCTL
return (rctl_get_available(p, resource));
#else
@@ -765,6 +815,8 @@ static int64_t
racct_pcpu_available(struct proc *p)
{
+ ASSERT_RACCT_ENABLED();
+
#ifdef RCTL
return (rctl_pcpu_available(p));
#else
@@ -779,6 +831,9 @@ void
racct_sub(struct proc *p, int resource, uint64_t amount)
{
+ if (!racct_enable)
+ return;
+
SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0);
/*
@@ -804,6 +859,8 @@ racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount)
{
struct prison *pr;
+ ASSERT_RACCT_ENABLED();
+
SDT_PROBE(racct, kernel, rusage, sub__cred, cred, resource, amount,
0, 0);
@@ -827,6 +884,9 @@ void
racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
{
+ if (!racct_enable)
+ return;
+
mtx_lock(&racct_lock);
racct_sub_cred_locked(cred, resource, amount);
mtx_unlock(&racct_lock);
@@ -840,6 +900,9 @@ racct_proc_fork(struct proc *parent, struct proc *child)
{
int i, error = 0;
+ if (!racct_enable)
+ return (0);
+
/*
* Create racct for the child process.
*/
@@ -896,6 +959,9 @@ racct_proc_fork_done(struct proc *child)
{
#ifdef RCTL
+ if (!racct_enable)
+ return;
+
PROC_LOCK(child);
mtx_lock(&racct_lock);
rctl_enforce(child, RACCT_NPROC, 0);
@@ -913,6 +979,9 @@ racct_proc_exit(struct proc *p)
struct timeval wallclock;
uint64_t pct_estimate, pct;
+ if (!racct_enable)
+ return;
+
PROC_LOCK(p);
/*
* We don't need to calculate rux, proc_reap() has already done this.
@@ -967,6 +1036,9 @@ racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
struct loginclass *oldlc, *newlc;
struct prison *oldpr, *newpr, *pr;
+ if (!racct_enable)
+ return;
+
PROC_LOCK_ASSERT(p, MA_NOTOWNED);
newuip = newcred->cr_ruidinfo;
@@ -1004,6 +1076,8 @@ void
racct_move(struct racct *dest, struct racct *src)
{
+ ASSERT_RACCT_ENABLED();
+
mtx_lock(&racct_lock);
racct_add_racct(dest, src);
@@ -1020,6 +1094,7 @@ racct_proc_throttle(struct proc *p)
int cpuid;
#endif
+ ASSERT_RACCT_ENABLED();
PROC_LOCK_ASSERT(p, MA_OWNED);
/*
@@ -1065,6 +1140,9 @@ racct_proc_throttle(struct proc *p)
static void
racct_proc_wakeup(struct proc *p)
{
+
+ ASSERT_RACCT_ENABLED();
+
PROC_LOCK_ASSERT(p, MA_OWNED);
if (p->p_throttled) {
@@ -1079,6 +1157,8 @@ racct_decay_resource(struct racct *racct, void * res, void* dummy)
int resource;
int64_t r_old, r_new;
+ ASSERT_RACCT_ENABLED();
+
resource = *(int *)res;
r_old = racct->r_resources[resource];
@@ -1095,6 +1175,9 @@ racct_decay_resource(struct racct *racct, void * res, void* dummy)
static void
racct_decay(int resource)
{
+
+ ASSERT_RACCT_ENABLED();
+
ui_racct_foreach(racct_decay_resource, &resource, NULL);
loginclass_racct_foreach(racct_decay_resource, &resource, NULL);
prison_racct_foreach(racct_decay_resource, &resource, NULL);
@@ -1109,6 +1192,8 @@ racctd(void)
uint64_t runtime;
uint64_t pct, pct_estimate;
+ ASSERT_RACCT_ENABLED();
+
for (;;) {
racct_decay(RACCT_PCTCPU);
@@ -1188,11 +1273,22 @@ static struct kproc_desc racctd_kp = {
racctd,
NULL
};
-SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp);
+
+static void
+racctd_init(void)
+{
+ if (!racct_enable)
+ return;
+
+ kproc_start(&racctd_kp);
+}
+SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, racctd_init, NULL);
static void
racct_init(void)
{
+ if (!racct_enable)
+ return;
racct_zone = uma_zcreate("racct", sizeof(struct racct),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
diff --git a/sys/kern/kern_rctl.c b/sys/kern/kern_rctl.c
index 934327a..c43b83d 100644
--- a/sys/kern/kern_rctl.c
+++ b/sys/kern/kern_rctl.c
@@ -225,6 +225,7 @@ rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
int64_t available = INT64_MAX;
struct ucred *cred = p->p_ucred;
+ ASSERT_RACCT_ENABLED();
rw_assert(&rctl_lock, RA_LOCKED);
resource = rule->rr_resource;
@@ -264,6 +265,8 @@ rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
{
int64_t available;
+ ASSERT_RACCT_ENABLED();
+
rw_assert(&rctl_lock, RA_LOCKED);
available = rctl_available_resource(p, rule);
@@ -283,6 +286,8 @@ rctl_pcpu_available(const struct proc *p) {
struct rctl_rule_link *link;
int64_t available, minavailable, limit;
+ ASSERT_RACCT_ENABLED();
+
minavailable = INT64_MAX;
limit = 0;
@@ -334,6 +339,8 @@ rctl_enforce(struct proc *p, int resource, uint64_t amount)
static int curtime = 0;
static struct timeval lasttime;
+ ASSERT_RACCT_ENABLED();
+
rw_rlock(&rctl_lock);
/*
@@ -457,6 +464,8 @@ rctl_get_limit(struct proc *p, int resource)
struct rctl_rule_link *link;
uint64_t amount = UINT64_MAX;
+ ASSERT_RACCT_ENABLED();
+
rw_rlock(&rctl_lock);
/*
@@ -487,6 +496,8 @@ rctl_get_available(struct proc *p, int resource)
minavailable = INT64_MAX;
+ ASSERT_RACCT_ENABLED();
+
rw_rlock(&rctl_lock);
/*
@@ -521,6 +532,8 @@ static int
rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
{
+ ASSERT_RACCT_ENABLED();
+
if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
if (rule->rr_subject_type != filter->rr_subject_type)
return (0);
@@ -635,6 +648,7 @@ rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
{
struct rctl_rule_link *link;
+ ASSERT_RACCT_ENABLED();
KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
rctl_rule_acquire(rule);
@@ -652,6 +666,7 @@ rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
{
struct rctl_rule_link *link;
+ ASSERT_RACCT_ENABLED();
KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
rw_assert(&rctl_lock, RA_WLOCKED);
@@ -678,6 +693,7 @@ rctl_racct_remove_rules(struct racct *racct,
int removed = 0;
struct rctl_rule_link *link, *linktmp;
+ ASSERT_RACCT_ENABLED();
rw_assert(&rctl_lock, RA_WLOCKED);
LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
@@ -696,6 +712,8 @@ static void
rctl_rule_acquire_subject(struct rctl_rule *rule)
{
+ ASSERT_RACCT_ENABLED();
+
switch (rule->rr_subject_type) {
case RCTL_SUBJECT_TYPE_UNDEFINED:
case RCTL_SUBJECT_TYPE_PROCESS:
@@ -722,6 +740,8 @@ static void
rctl_rule_release_subject(struct rctl_rule *rule)
{
+ ASSERT_RACCT_ENABLED();
+
switch (rule->rr_subject_type) {
case RCTL_SUBJECT_TYPE_UNDEFINED:
case RCTL_SUBJECT_TYPE_PROCESS:
@@ -749,6 +769,8 @@ rctl_rule_alloc(int flags)
{
struct rctl_rule *rule;
+ ASSERT_RACCT_ENABLED();
+
rule = uma_zalloc(rctl_rule_zone, flags);
if (rule == NULL)
return (NULL);
@@ -771,6 +793,8 @@ rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
{
struct rctl_rule *copy;
+ ASSERT_RACCT_ENABLED();
+
copy = uma_zalloc(rctl_rule_zone, flags);
if (copy == NULL)
return (NULL);
@@ -793,6 +817,7 @@ void
rctl_rule_acquire(struct rctl_rule *rule)
{
+ ASSERT_RACCT_ENABLED();
KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
refcount_acquire(&rule->rr_refcount);
@@ -805,6 +830,7 @@ rctl_rule_free(void *context, int pending)
rule = (struct rctl_rule *)context;
+ ASSERT_RACCT_ENABLED();
KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
/*
@@ -819,6 +845,7 @@ void
rctl_rule_release(struct rctl_rule *rule)
{
+ ASSERT_RACCT_ENABLED();
KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
if (refcount_release(&rule->rr_refcount)) {
@@ -838,6 +865,8 @@ static int
rctl_rule_fully_specified(const struct rctl_rule *rule)
{
+ ASSERT_RACCT_ENABLED();
+
switch (rule->rr_subject_type) {
case RCTL_SUBJECT_TYPE_UNDEFINED:
return (0);
@@ -882,6 +911,8 @@ rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
struct rctl_rule *rule;
id_t id;
+ ASSERT_RACCT_ENABLED();
+
rule = rctl_rule_alloc(M_WAITOK);
subjectstr = strsep(&rulestr, ":");
@@ -1008,6 +1039,7 @@ rctl_rule_add(struct rctl_rule *rule)
struct rctl_rule *rule2;
int match;
+ ASSERT_RACCT_ENABLED();
KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
/*
@@ -1118,6 +1150,8 @@ rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
struct rctl_rule *filter = (struct rctl_rule *)arg2;
int found = 0;
+ ASSERT_RACCT_ENABLED();
+
rw_wlock(&rctl_lock);
found += rctl_racct_remove_rules(racct, filter);
rw_wunlock(&rctl_lock);
@@ -1134,6 +1168,8 @@ rctl_rule_remove(struct rctl_rule *filter)
int found = 0;
struct proc *p;
+ ASSERT_RACCT_ENABLED();
+
if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
filter->rr_subject.rs_proc != NULL) {
p = filter->rr_subject.rs_proc;
@@ -1172,6 +1208,8 @@ rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
{
int64_t amount;
+ ASSERT_RACCT_ENABLED();
+
sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
switch (rule->rr_subject_type) {
@@ -1231,6 +1269,8 @@ rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
int error;
char *str;
+ ASSERT_RACCT_ENABLED();
+
if (inbuflen <= 0)
return (EINVAL);
if (inbuflen > RCTL_MAX_INBUFLEN)
@@ -1256,6 +1296,8 @@ rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
{
int error;
+ ASSERT_RACCT_ENABLED();
+
if (outputsbuf == NULL)
return (0);
@@ -1277,6 +1319,8 @@ rctl_racct_to_sbuf(struct racct *racct, int sloppy)
int64_t amount;
struct sbuf *sb;
+ ASSERT_RACCT_ENABLED();
+
sb = sbuf_new_auto();
for (i = 0; i <= RACCT_MAX; i++) {
if (sloppy == 0 && RACCT_IS_SLOPPY(i))
@@ -1302,6 +1346,9 @@ sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
struct loginclass *lc;
struct prison_racct *prr;
+ if (!racct_enable)
+ return (ENOSYS);
+
error = priv_check(td, PRIV_RCTL_GET_RACCT);
if (error != 0)
return (error);
@@ -1372,6 +1419,8 @@ rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
struct rctl_rule_link *link;
struct sbuf *sb = (struct sbuf *)arg3;
+ ASSERT_RACCT_ENABLED();
+
rw_rlock(&rctl_lock);
LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
if (!rctl_rule_matches(link->rrl_rule, filter))
@@ -1393,6 +1442,9 @@ sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
struct rctl_rule_link *link;
struct proc *p;
+ if (!racct_enable)
+ return (ENOSYS);
+
error = priv_check(td, PRIV_RCTL_GET_RULES);
if (error != 0)
return (error);
@@ -1467,6 +1519,9 @@ sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
struct rctl_rule *filter;
struct rctl_rule_link *link;
+ if (!racct_enable)
+ return (ENOSYS);
+
error = priv_check(td, PRIV_RCTL_GET_LIMITS);
if (error != 0)
return (error);
@@ -1538,6 +1593,9 @@ sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
struct rctl_rule *rule;
char *inputstr;
+ if (!racct_enable)
+ return (ENOSYS);
+
error = priv_check(td, PRIV_RCTL_ADD_RULE);
if (error != 0)
return (error);
@@ -1580,6 +1638,9 @@ sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
struct rctl_rule *filter;
char *inputstr;
+ if (!racct_enable)
+ return (ENOSYS);
+
error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
if (error != 0)
return (error);
@@ -1616,6 +1677,8 @@ rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
struct prison_racct *newprr;
LIST_HEAD(, rctl_rule_link) newrules;
+ ASSERT_RACCT_ENABLED();
+
newuip = newcred->cr_ruidinfo;
newlc = newcred->cr_loginclass;
newprr = newcred->cr_prison->pr_prison_racct;
@@ -1756,6 +1819,7 @@ rctl_proc_fork(struct proc *parent, struct proc *child)
LIST_INIT(&child->p_racct->r_rule_links);
+ ASSERT_RACCT_ENABLED();
KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
rw_wlock(&rctl_lock);
@@ -1809,6 +1873,8 @@ rctl_racct_release(struct racct *racct)
{
struct rctl_rule_link *link;
+ ASSERT_RACCT_ENABLED();
+
rw_wlock(&rctl_lock);
while (!LIST_EMPTY(&racct->r_rule_links)) {
link = LIST_FIRST(&racct->r_rule_links);
@@ -1823,6 +1889,9 @@ static void
rctl_init(void)
{
+ if (!racct_enable)
+ return;
+
rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c
index e547c5f..998ee15 100644
--- a/sys/kern/kern_shutdown.c
+++ b/sys/kern/kern_shutdown.c
@@ -154,7 +154,6 @@ static void poweroff_wait(void *, int);
static void shutdown_halt(void *junk, int howto);
static void shutdown_panic(void *junk, int howto);
static void shutdown_reset(void *junk, int howto);
-static void vpanic(const char *fmt, va_list ap) __dead2;
/* register various local shutdown events */
static void
@@ -676,7 +675,7 @@ panic(const char *fmt, ...)
vpanic(fmt, ap);
}
-static void
+void
vpanic(const char *fmt, va_list ap)
{
#ifdef SMP
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index ad10dc7..a238a09 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -66,12 +66,6 @@ __FBSDID("$FreeBSD$");
#include <machine/cpu.h>
-#ifdef XEN
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/pmap.h>
-#endif
-
#define KTDSTATE(td) \
(((td)->td_inhibitors & TDI_SLEEPING) != 0 ? "sleep" : \
((td)->td_inhibitors & TDI_SUSPENDED) != 0 ? "suspended" : \
@@ -475,9 +469,6 @@ mi_switch(int flags, struct thread *newtd)
"lockname:\"%s\"", td->td_lockname);
#endif
SDT_PROBE0(sched, , , preempt);
-#ifdef XEN
- PT_UPDATES_FLUSH();
-#endif
sched_switch(td, newtd, flags);
KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running",
"prio:%d", td->td_priority);
diff --git a/sys/kern/kern_thr.c b/sys/kern/kern_thr.c
index 280bc0b..6911bb97 100644
--- a/sys/kern/kern_thr.c
+++ b/sys/kern/kern_thr.c
@@ -187,11 +187,13 @@ create_thread(struct thread *td, mcontext_t *ctx,
}
#ifdef RACCT
- PROC_LOCK(td->td_proc);
- error = racct_add(p, RACCT_NTHR, 1);
- PROC_UNLOCK(td->td_proc);
- if (error != 0)
- return (EPROCLIM);
+ if (racct_enable) {
+ PROC_LOCK(p);
+ error = racct_add(p, RACCT_NTHR, 1);
+ PROC_UNLOCK(p);
+ if (error != 0)
+ return (EPROCLIM);
+ }
#endif
/* Initialize our td */
@@ -250,9 +252,9 @@ create_thread(struct thread *td, mcontext_t *ctx,
}
}
- PROC_LOCK(td->td_proc);
- td->td_proc->p_flag |= P_HADTHREADS;
- thread_link(newtd, p);
+ PROC_LOCK(p);
+ p->p_flag |= P_HADTHREADS;
+ thread_link(newtd, p);
bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name));
thread_lock(td);
/* let the scheduler know about these things. */
@@ -280,9 +282,11 @@ create_thread(struct thread *td, mcontext_t *ctx,
fail:
#ifdef RACCT
- PROC_LOCK(p);
- racct_sub(p, RACCT_NTHR, 1);
- PROC_UNLOCK(p);
+ if (racct_enable) {
+ PROC_LOCK(p);
+ racct_sub(p, RACCT_NTHR, 1);
+ PROC_UNLOCK(p);
+ }
#endif
return (error);
}
diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c
index 65c8276..26be035 100644
--- a/sys/kern/link_elf.c
+++ b/sys/kern/link_elf.c
@@ -66,7 +66,7 @@ __FBSDID("$FreeBSD$");
#include <sys/link_elf.h>
#ifdef DDB_CTF
-#include <net/zlib.h>
+#include <sys/zlib.h>
#endif
#include "linker_if.h"
diff --git a/sys/kern/link_elf_obj.c b/sys/kern/link_elf_obj.c
index 5c107fe..021381d 100644
--- a/sys/kern/link_elf_obj.c
+++ b/sys/kern/link_elf_obj.c
@@ -60,7 +60,7 @@ __FBSDID("$FreeBSD$");
#include <sys/link_elf.h>
#ifdef DDB_CTF
-#include <net/zlib.h>
+#include <sys/zlib.h>
#endif
#include "linker_if.h"
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index 3e39d55..59bd387 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -1585,7 +1585,7 @@ sched_pctcpu(struct thread *td)
return (ts->ts_pctcpu);
}
-#ifdef RACCT
+#ifdef RACCT
/*
* Calculates the contribution to the thread cpu usage for the latest
* (unfinished) second.
diff --git a/sys/kern/subr_dnvlist.c b/sys/kern/subr_dnvlist.c
index fe17684..9058520 100644
--- a/sys/kern/subr_dnvlist.c
+++ b/sys/kern/subr_dnvlist.c
@@ -89,89 +89,6 @@ dnvlist_get_binary(const nvlist_t *nvl, const char *name, size_t *sizep,
return (value);
}
-#ifndef _KERNEL
-#define DNVLIST_GETF(ftype, type) \
-ftype \
-dnvlist_getf_##type(const nvlist_t *nvl, ftype defval, \
- const char *namefmt, ...) \
-{ \
- va_list nameap; \
- ftype value; \
- \
- va_start(nameap, namefmt); \
- value = dnvlist_getv_##type(nvl, defval, namefmt, nameap); \
- va_end(nameap); \
- \
- return (value); \
-}
-
-DNVLIST_GETF(bool, bool)
-DNVLIST_GETF(uint64_t, number)
-DNVLIST_GETF(const char *, string)
-DNVLIST_GETF(const nvlist_t *, nvlist)
-DNVLIST_GETF(int, descriptor)
-
-#undef DNVLIST_GETF
-
-const void *
-dnvlist_getf_binary(const nvlist_t *nvl, size_t *sizep, const void *defval,
- size_t defsize, const char *namefmt, ...)
-{
- va_list nameap;
- const void *value;
-
- va_start(nameap, namefmt);
- value = dnvlist_getv_binary(nvl, sizep, defval, defsize, namefmt,
- nameap);
- va_end(nameap);
-
- return (value);
-}
-
-#define DNVLIST_GETV(ftype, type) \
-ftype \
-dnvlist_getv_##type(const nvlist_t *nvl, ftype defval, \
- const char *namefmt, va_list nameap) \
-{ \
- char *name; \
- ftype value; \
- \
- vasprintf(&name, namefmt, nameap); \
- if (name == NULL) \
- return (defval); \
- value = dnvlist_get_##type(nvl, name, defval); \
- free(name); \
- return (value); \
-}
-
-DNVLIST_GETV(bool, bool)
-DNVLIST_GETV(uint64_t, number)
-DNVLIST_GETV(const char *, string)
-DNVLIST_GETV(const nvlist_t *, nvlist)
-DNVLIST_GETV(int, descriptor)
-
-#undef DNVLIST_GETV
-
-const void *
-dnvlist_getv_binary(const nvlist_t *nvl, size_t *sizep, const void *defval,
- size_t defsize, const char *namefmt, va_list nameap)
-{
- char *name;
- const void *value;
-
- nv_vasprintf(&name, namefmt, nameap);
- if (name != NULL) {
- value = dnvlist_get_binary(nvl, name, sizep, defval, defsize);
- nv_free(name);
- } else {
- if (sizep != NULL)
- *sizep = defsize;
- value = defval;
- }
- return (value);
-}
-#endif
-
#define DNVLIST_TAKE(ftype, type) \
ftype \
dnvlist_take_##type(nvlist_t *nvl, const char *name, ftype defval) \
@@ -209,86 +126,3 @@ dnvlist_take_binary(nvlist_t *nvl, const char *name, size_t *sizep,
return (value);
}
-#ifndef _KERNEL
-#define DNVLIST_TAKEF(ftype, type) \
-ftype \
-dnvlist_takef_##type(nvlist_t *nvl, ftype defval, \
- const char *namefmt, ...) \
-{ \
- va_list nameap; \
- ftype value; \
- \
- va_start(nameap, namefmt); \
- value = dnvlist_takev_##type(nvl, defval, namefmt, nameap); \
- va_end(nameap); \
- \
- return (value); \
-}
-
-DNVLIST_TAKEF(bool, bool)
-DNVLIST_TAKEF(uint64_t, number)
-DNVLIST_TAKEF(char *, string)
-DNVLIST_TAKEF(nvlist_t *, nvlist)
-DNVLIST_TAKEF(int, descriptor)
-
-#undef DNVLIST_TAKEF
-
-void *
-dnvlist_takef_binary(nvlist_t *nvl, size_t *sizep, void *defval,
- size_t defsize, const char *namefmt, ...)
-{
- va_list nameap;
- void *value;
-
- va_start(nameap, namefmt);
- value = dnvlist_takev_binary(nvl, sizep, defval, defsize, namefmt,
- nameap);
- va_end(nameap);
-
- return (value);
-}
-
-#define DNVLIST_TAKEV(ftype, type) \
-ftype \
-dnvlist_takev_##type(nvlist_t *nvl, ftype defval, const char *namefmt, \
- va_list nameap) \
-{ \
- char *name; \
- ftype value; \
- \
- vasprintf(&name, namefmt, nameap); \
- if (name == NULL) \
- return (defval); \
- value = dnvlist_take_##type(nvl, name, defval); \
- free(name); \
- return (value); \
-}
-
-DNVLIST_TAKEV(bool, bool)
-DNVLIST_TAKEV(uint64_t, number)
-DNVLIST_TAKEV(char *, string)
-DNVLIST_TAKEV(nvlist_t *, nvlist)
-DNVLIST_TAKEV(int, descriptor)
-
-#undef DNVLIST_TAKEV
-
-void *
-dnvlist_takev_binary(nvlist_t *nvl, size_t *sizep, void *defval,
- size_t defsize, const char *namefmt, va_list nameap)
-{
- char *name;
- void *value;
-
- nv_vasprintf(&name, namefmt, nameap);
- if (name != NULL) {
- value = dnvlist_take_binary(nvl, name, sizep, defval, defsize);
- nv_free(name);
- } else {
- if (sizep != NULL)
- *sizep = defsize;
- value = defval;
- }
-
- return (value);
-}
-#endif
diff --git a/sys/kern/subr_nvlist.c b/sys/kern/subr_nvlist.c
index f352c62..f96b890 100644
--- a/sys/kern/subr_nvlist.c
+++ b/sys/kern/subr_nvlist.c
@@ -142,12 +142,11 @@ void
nvlist_destroy(nvlist_t *nvl)
{
nvpair_t *nvp;
- int serrno;
if (nvl == NULL)
return;
- SAVE_ERRNO(serrno);
+ ERRNO_SAVE();
NVLIST_ASSERT(nvl);
@@ -158,7 +157,7 @@ nvlist_destroy(nvlist_t *nvl)
nvl->nvl_magic = 0;
nv_free(nvl);
- RESTORE_ERRNO(serrno);
+ ERRNO_RESTORE();
}
void
@@ -231,6 +230,17 @@ nvlist_empty(const nvlist_t *nvl)
return (nvlist_first_nvpair(nvl) == NULL);
}
+int
+nvlist_flags(const nvlist_t *nvl)
+{
+
+ NVLIST_ASSERT(nvl);
+ PJDLOG_ASSERT(nvl->nvl_error == 0);
+ PJDLOG_ASSERT((nvl->nvl_flags & ~(NV_FLAG_PUBLIC_MASK)) == 0);
+
+ return (nvl->nvl_flags);
+}
+
static void
nvlist_report_missing(int type, const char *name)
{
@@ -264,7 +274,7 @@ nvlist_find(const nvlist_t *nvl, int type, const char *name)
}
if (nvp == NULL)
- RESTORE_ERRNO(ENOENT);
+ ERRNO_SET(ENOENT);
return (nvp);
}
@@ -281,37 +291,6 @@ nvlist_exists_type(const nvlist_t *nvl, const char *name, int type)
return (nvlist_find(nvl, type, name) != NULL);
}
-#ifndef _KERNEL
-bool
-nvlist_existsf_type(const nvlist_t *nvl, int type, const char *namefmt, ...)
-{
- va_list nameap;
- bool ret;
-
- va_start(nameap, namefmt);
- ret = nvlist_existsv_type(nvl, type, namefmt, nameap);
- va_end(nameap);
-
- return (ret);
-}
-
-bool
-nvlist_existsv_type(const nvlist_t *nvl, int type, const char *namefmt,
- va_list nameap)
-{
- char *name;
- bool exists;
-
- nv_vasprintf(&name, namefmt, nameap);
- if (name == NULL)
- return (false);
-
- exists = nvlist_exists_type(nvl, name, type);
- nv_free(name);
- return (exists);
-}
-#endif
-
void
nvlist_free_type(nvlist_t *nvl, const char *name, int type)
{
@@ -329,30 +308,6 @@ nvlist_free_type(nvlist_t *nvl, const char *name, int type)
nvlist_report_missing(type, name);
}
-#ifndef _KERNEL
-void
-nvlist_freef_type(nvlist_t *nvl, int type, const char *namefmt, ...)
-{
- va_list nameap;
-
- va_start(nameap, namefmt);
- nvlist_freev_type(nvl, type, namefmt, nameap);
- va_end(nameap);
-}
-
-void
-nvlist_freev_type(nvlist_t *nvl, int type, const char *namefmt, va_list nameap)
-{
- char *name;
-
- nv_vasprintf(&name, namefmt, nameap);
- if (name == NULL)
- nvlist_report_missing(type, "<unknown>");
- nvlist_free_type(nvl, name, type);
- nv_free(name);
-}
-#endif
-
nvlist_t *
nvlist_clone(const nvlist_t *nvl)
{
@@ -362,7 +317,7 @@ nvlist_clone(const nvlist_t *nvl)
NVLIST_ASSERT(nvl);
if (nvl->nvl_error != 0) {
- RESTORE_ERRNO(nvl->nvl_error);
+ ERRNO_SET(nvl->nvl_error);
return (NULL);
}
@@ -533,27 +488,30 @@ out:
#ifndef _KERNEL
static int *
-nvlist_xdescriptors(const nvlist_t *nvl, int *descs, int level)
+nvlist_xdescriptors(const nvlist_t *nvl, int *descs)
{
- const nvpair_t *nvp;
+ nvpair_t *nvp;
+ const char *name;
+ int type;
NVLIST_ASSERT(nvl);
PJDLOG_ASSERT(nvl->nvl_error == 0);
- PJDLOG_ASSERT(level < 3);
- for (nvp = nvlist_first_nvpair(nvl); nvp != NULL;
- nvp = nvlist_next_nvpair(nvl, nvp)) {
- switch (nvpair_type(nvp)) {
- case NV_TYPE_DESCRIPTOR:
- *descs = nvpair_get_descriptor(nvp);
- descs++;
- break;
- case NV_TYPE_NVLIST:
- descs = nvlist_xdescriptors(nvpair_get_nvlist(nvp),
- descs, level + 1);
- break;
+ nvp = NULL;
+ do {
+ while ((name = nvlist_next(nvl, &type, (void**)&nvp)) != NULL) {
+ switch (type) {
+ case NV_TYPE_DESCRIPTOR:
+ *descs = nvpair_get_descriptor(nvp);
+ descs++;
+ break;
+ case NV_TYPE_NVLIST:
+ nvl = nvpair_get_nvlist(nvp);
+ nvp = NULL;
+ break;
+ }
}
- }
+ } while ((nvl = nvlist_get_parent(nvl, (void**)&nvp)) != NULL);
return (descs);
}
@@ -571,7 +529,7 @@ nvlist_descriptors(const nvlist_t *nvl, size_t *nitemsp)
if (fds == NULL)
return (NULL);
if (nitems > 0)
- nvlist_xdescriptors(nvl, fds, 0);
+ nvlist_xdescriptors(nvl, fds);
fds[nitems] = -1;
if (nitemsp != NULL)
*nitemsp = nitems;
@@ -579,30 +537,33 @@ nvlist_descriptors(const nvlist_t *nvl, size_t *nitemsp)
}
#endif
-static size_t
-nvlist_xndescriptors(const nvlist_t *nvl, int level)
+size_t
+nvlist_ndescriptors(const nvlist_t *nvl)
{
#ifndef _KERNEL
- const nvpair_t *nvp;
+ nvpair_t *nvp;
+ const char *name;
size_t ndescs;
+ int type;
NVLIST_ASSERT(nvl);
PJDLOG_ASSERT(nvl->nvl_error == 0);
- PJDLOG_ASSERT(level < 3);
ndescs = 0;
- for (nvp = nvlist_first_nvpair(nvl); nvp != NULL;
- nvp = nvlist_next_nvpair(nvl, nvp)) {
- switch (nvpair_type(nvp)) {
- case NV_TYPE_DESCRIPTOR:
- ndescs++;
- break;
- case NV_TYPE_NVLIST:
- ndescs += nvlist_xndescriptors(nvpair_get_nvlist(nvp),
- level + 1);
- break;
+ nvp = NULL;
+ do {
+ while ((name = nvlist_next(nvl, &type, (void**)&nvp)) != NULL) {
+ switch (type) {
+ case NV_TYPE_DESCRIPTOR:
+ ndescs++;
+ break;
+ case NV_TYPE_NVLIST:
+ nvl = nvpair_get_nvlist(nvp);
+ nvp = NULL;
+ break;
+ }
}
- }
+ } while ((nvl = nvlist_get_parent(nvl, (void**)&nvp)) != NULL);
return (ndescs);
#else
@@ -610,13 +571,6 @@ nvlist_xndescriptors(const nvlist_t *nvl, int level)
#endif
}
-size_t
-nvlist_ndescriptors(const nvlist_t *nvl)
-{
-
- return (nvlist_xndescriptors(nvl, 0));
-}
-
static unsigned char *
nvlist_pack_header(const nvlist_t *nvl, unsigned char *ptr, size_t *leftp)
{
@@ -640,7 +594,7 @@ nvlist_pack_header(const nvlist_t *nvl, unsigned char *ptr, size_t *leftp)
return (ptr);
}
-void *
+static void *
nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep)
{
unsigned char *buf, *ptr;
@@ -652,7 +606,7 @@ nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep)
NVLIST_ASSERT(nvl);
if (nvl->nvl_error != 0) {
- RESTORE_ERRNO(nvl->nvl_error);
+ ERRNO_SET(nvl->nvl_error);
return (NULL);
}
@@ -742,12 +696,12 @@ nvlist_pack(const nvlist_t *nvl, size_t *sizep)
NVLIST_ASSERT(nvl);
if (nvl->nvl_error != 0) {
- RESTORE_ERRNO(nvl->nvl_error);
+ ERRNO_SET(nvl->nvl_error);
return (NULL);
}
if (nvlist_ndescriptors(nvl) > 0) {
- RESTORE_ERRNO(EOPNOTSUPP);
+ ERRNO_SET(EOPNOTSUPP);
return (NULL);
}
@@ -759,11 +713,11 @@ nvlist_check_header(struct nvlist_header *nvlhdrp)
{
if (nvlhdrp->nvlh_magic != NVLIST_HEADER_MAGIC) {
- RESTORE_ERRNO(EINVAL);
+ ERRNO_SET(EINVAL);
return (false);
}
if ((nvlhdrp->nvlh_flags & ~NV_FLAG_ALL_MASK) != 0) {
- RESTORE_ERRNO(EINVAL);
+ ERRNO_SET(EINVAL);
return (false);
}
#if BYTE_ORDER == BIG_ENDIAN
@@ -815,11 +769,11 @@ nvlist_unpack_header(nvlist_t *nvl, const unsigned char *ptr, size_t nfds,
return (ptr);
failed:
- RESTORE_ERRNO(EINVAL);
+ ERRNO_SET(EINVAL);
return (NULL);
}
-nvlist_t *
+static nvlist_t *
nvlist_xunpack(const void *buf, size_t size, const int *fds, size_t nfds)
{
const unsigned char *ptr;
@@ -909,10 +863,10 @@ nvlist_send(int sock, const nvlist_t *nvl)
int *fds;
void *data;
int64_t fdidx;
- int serrno, ret;
+ int ret;
if (nvlist_error(nvl) != 0) {
- errno = nvlist_error(nvl);
+ ERRNO_SET(nvlist_error(nvl));
return (-1);
}
@@ -938,10 +892,10 @@ nvlist_send(int sock, const nvlist_t *nvl)
ret = 0;
out:
- serrno = errno;
+ ERRNO_SAVE();
free(fds);
free(data);
- errno = serrno;
+ ERRNO_RESTORE();
return (ret);
}
@@ -952,7 +906,7 @@ nvlist_recv(int sock)
nvlist_t *nvl, *ret;
unsigned char *buf;
size_t nfds, size, i;
- int serrno, *fds;
+ int *fds;
if (buf_recv(sock, &nvlhdr, sizeof(nvlhdr)) == -1)
return (NULL);
@@ -963,7 +917,7 @@ nvlist_recv(int sock)
nfds = (size_t)nvlhdr.nvlh_descriptors;
size = sizeof(nvlhdr) + (size_t)nvlhdr.nvlh_size;
- buf = malloc(size);
+ buf = nv_malloc(size);
if (buf == NULL)
return (NULL);
@@ -976,7 +930,7 @@ nvlist_recv(int sock)
goto out;
if (nfds > 0) {
- fds = malloc(nfds * sizeof(fds[0]));
+ fds = nv_malloc(nfds * sizeof(fds[0]));
if (fds == NULL)
goto out;
if (fd_recv(sock, fds, nfds) == -1)
@@ -985,17 +939,19 @@ nvlist_recv(int sock)
nvl = nvlist_xunpack(buf, size, fds, nfds);
if (nvl == NULL) {
+ ERRNO_SAVE();
for (i = 0; i < nfds; i++)
close(fds[i]);
+ ERRNO_RESTORE();
goto out;
}
ret = nvl;
out:
- serrno = errno;
+ ERRNO_SAVE();
free(buf);
free(fds);
- errno = serrno;
+ ERRNO_RESTORE();
return (ret);
}
@@ -1100,86 +1056,6 @@ NVLIST_EXISTS(binary, BINARY)
#undef NVLIST_EXISTS
-#ifndef _KERNEL
-bool
-nvlist_existsf(const nvlist_t *nvl, const char *namefmt, ...)
-{
- va_list nameap;
- bool ret;
-
- va_start(nameap, namefmt);
- ret = nvlist_existsv(nvl, namefmt, nameap);
- va_end(nameap);
- return (ret);
-}
-
-#define NVLIST_EXISTSF(type) \
-bool \
-nvlist_existsf_##type(const nvlist_t *nvl, const char *namefmt, ...) \
-{ \
- va_list nameap; \
- bool ret; \
- \
- va_start(nameap, namefmt); \
- ret = nvlist_existsv_##type(nvl, namefmt, nameap); \
- va_end(nameap); \
- return (ret); \
-}
-
-NVLIST_EXISTSF(null)
-NVLIST_EXISTSF(bool)
-NVLIST_EXISTSF(number)
-NVLIST_EXISTSF(string)
-NVLIST_EXISTSF(nvlist)
-#ifndef _KERNEL
-NVLIST_EXISTSF(descriptor)
-#endif
-NVLIST_EXISTSF(binary)
-
-#undef NVLIST_EXISTSF
-
-bool
-nvlist_existsv(const nvlist_t *nvl, const char *namefmt, va_list nameap)
-{
- char *name;
- bool exists;
-
- nv_vasprintf(&name, namefmt, nameap);
- if (name == NULL)
- return (false);
-
- exists = nvlist_exists(nvl, name);
- nv_free(name);
- return (exists);
-}
-
-#define NVLIST_EXISTSV(type) \
-bool \
-nvlist_existsv_##type(const nvlist_t *nvl, const char *namefmt, \
- va_list nameap) \
-{ \
- char *name; \
- bool exists; \
- \
- vasprintf(&name, namefmt, nameap); \
- if (name == NULL) \
- return (false); \
- exists = nvlist_exists_##type(nvl, name); \
- free(name); \
- return (exists); \
-}
-
-NVLIST_EXISTSV(null)
-NVLIST_EXISTSV(bool)
-NVLIST_EXISTSV(number)
-NVLIST_EXISTSV(string)
-NVLIST_EXISTSV(nvlist)
-NVLIST_EXISTSV(descriptor)
-NVLIST_EXISTSV(binary)
-
-#undef NVLIST_EXISTSV
-#endif
-
void
nvlist_add_nvpair(nvlist_t *nvl, const nvpair_t *nvp)
{
@@ -1188,19 +1064,19 @@ nvlist_add_nvpair(nvlist_t *nvl, const nvpair_t *nvp)
NVPAIR_ASSERT(nvp);
if (nvlist_error(nvl) != 0) {
- RESTORE_ERRNO(nvlist_error(nvl));
+ ERRNO_SET(nvlist_error(nvl));
return;
}
if (nvlist_exists(nvl, nvpair_name(nvp))) {
nvl->nvl_error = EEXIST;
- RESTORE_ERRNO(nvlist_error(nvl));
+ ERRNO_SET(nvlist_error(nvl));
return;
}
newnvp = nvpair_clone(nvp);
if (newnvp == NULL) {
nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
- RESTORE_ERRNO(nvlist_error(nvl));
+ ERRNO_SET(nvlist_error(nvl));
return;
}
@@ -1208,34 +1084,6 @@ nvlist_add_nvpair(nvlist_t *nvl, const nvpair_t *nvp)
}
void
-nvlist_add_null(nvlist_t *nvl, const char *name)
-{
-
- nvlist_addf_null(nvl, "%s", name);
-}
-
-void
-nvlist_add_bool(nvlist_t *nvl, const char *name, bool value)
-{
-
- nvlist_addf_bool(nvl, value, "%s", name);
-}
-
-void
-nvlist_add_number(nvlist_t *nvl, const char *name, uint64_t value)
-{
-
- nvlist_addf_number(nvl, value, "%s", name);
-}
-
-void
-nvlist_add_string(nvlist_t *nvl, const char *name, const char *value)
-{
-
- nvlist_addf_string(nvl, value, "%s", name);
-}
-
-void
nvlist_add_stringf(nvlist_t *nvl, const char *name, const char *valuefmt, ...)
{
va_list valueap;
@@ -1252,213 +1100,117 @@ nvlist_add_stringv(nvlist_t *nvl, const char *name, const char *valuefmt,
nvpair_t *nvp;
if (nvlist_error(nvl) != 0) {
- RESTORE_ERRNO(nvlist_error(nvl));
+ ERRNO_SET(nvlist_error(nvl));
return;
}
nvp = nvpair_create_stringv(name, valuefmt, valueap);
if (nvp == NULL) {
nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
- RESTORE_ERRNO(nvl->nvl_error);
- } else
+ ERRNO_SET(nvl->nvl_error);
+ } else {
nvlist_move_nvpair(nvl, nvp);
+ }
}
void
-nvlist_add_nvlist(nvlist_t *nvl, const char *name, const nvlist_t *value)
-{
-
- nvlist_addf_nvlist(nvl, value, "%s", name);
-}
-
-#ifndef _KERNEL
-void
-nvlist_add_descriptor(nvlist_t *nvl, const char *name, int value)
-{
-
- nvlist_addf_descriptor(nvl, value, "%s", name);
-}
-#endif
-
-void
-nvlist_add_binary(nvlist_t *nvl, const char *name, const void *value,
- size_t size)
-{
-
- nvlist_addf_binary(nvl, value, size, "%s", name);
-}
-
-void
-nvlist_addf_null(nvlist_t *nvl, const char *namefmt, ...)
-{
- va_list nameap;
-
- va_start(nameap, namefmt);
- nvlist_addv_null(nvl, namefmt, nameap);
- va_end(nameap);
-}
-
-void
-nvlist_addf_bool(nvlist_t *nvl, bool value, const char *namefmt, ...)
-{
- va_list nameap;
-
- va_start(nameap, namefmt);
- nvlist_addv_bool(nvl, value, namefmt, nameap);
- va_end(nameap);
-}
-
-void
-nvlist_addf_number(nvlist_t *nvl, uint64_t value, const char *namefmt, ...)
-{
- va_list nameap;
-
- va_start(nameap, namefmt);
- nvlist_addv_number(nvl, value, namefmt, nameap);
- va_end(nameap);
-}
-
-void
-nvlist_addf_string(nvlist_t *nvl, const char *value, const char *namefmt, ...)
-{
- va_list nameap;
-
- va_start(nameap, namefmt);
- nvlist_addv_string(nvl, value, namefmt, nameap);
- va_end(nameap);
-}
-
-void
-nvlist_addf_nvlist(nvlist_t *nvl, const nvlist_t *value, const char *namefmt,
- ...)
-{
- va_list nameap;
-
- va_start(nameap, namefmt);
- nvlist_addv_nvlist(nvl, value, namefmt, nameap);
- va_end(nameap);
-}
-
-#ifndef _KERNEL
-void
-nvlist_addf_descriptor(nvlist_t *nvl, int value, const char *namefmt, ...)
-{
- va_list nameap;
-
- va_start(nameap, namefmt);
- nvlist_addv_descriptor(nvl, value, namefmt, nameap);
- va_end(nameap);
-}
-#endif
-
-void
-nvlist_addf_binary(nvlist_t *nvl, const void *value, size_t size,
- const char *namefmt, ...)
-{
- va_list nameap;
-
- va_start(nameap, namefmt);
- nvlist_addv_binary(nvl, value, size, namefmt, nameap);
- va_end(nameap);
-}
-
-void
-nvlist_addv_null(nvlist_t *nvl, const char *namefmt, va_list nameap)
+nvlist_add_null(nvlist_t *nvl, const char *name)
{
nvpair_t *nvp;
if (nvlist_error(nvl) != 0) {
- RESTORE_ERRNO(nvlist_error(nvl));
+ ERRNO_SET(nvlist_error(nvl));
return;
}
- nvp = nvpair_createv_null(namefmt, nameap);
+ nvp = nvpair_create_null(name);
if (nvp == NULL) {
nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
- RESTORE_ERRNO(nvl->nvl_error);
- } else
+ ERRNO_SET(nvl->nvl_error);
+ } else {
nvlist_move_nvpair(nvl, nvp);
+ }
}
void
-nvlist_addv_bool(nvlist_t *nvl, bool value, const char *namefmt, va_list nameap)
+nvlist_add_bool(nvlist_t *nvl, const char *name, bool value)
{
nvpair_t *nvp;
if (nvlist_error(nvl) != 0) {
- RESTORE_ERRNO(nvlist_error(nvl));
+ ERRNO_SET(nvlist_error(nvl));
return;
}
- nvp = nvpair_createv_bool(value, namefmt, nameap);
+ nvp = nvpair_create_bool(name, value);
if (nvp == NULL) {
nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
- RESTORE_ERRNO(nvl->nvl_error);
- } else
+ ERRNO_SET(nvl->nvl_error);
+ } else {
nvlist_move_nvpair(nvl, nvp);
+ }
}
void
-nvlist_addv_number(nvlist_t *nvl, uint64_t value, const char *namefmt,
- va_list nameap)
+nvlist_add_number(nvlist_t *nvl, const char *name, uint64_t value)
{
nvpair_t *nvp;
if (nvlist_error(nvl) != 0) {
- RESTORE_ERRNO(nvlist_error(nvl));
+ ERRNO_SET(nvlist_error(nvl));
return;
}
- nvp = nvpair_createv_number(value, namefmt, nameap);
+ nvp = nvpair_create_number(name, value);
if (nvp == NULL) {
nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
- RESTORE_ERRNO(nvl->nvl_error);
- } else
+ ERRNO_SET(nvl->nvl_error);
+ } else {
nvlist_move_nvpair(nvl, nvp);
+ }
}
void
-nvlist_addv_string(nvlist_t *nvl, const char *value, const char *namefmt,
- va_list nameap)
+nvlist_add_string(nvlist_t *nvl, const char *name, const char *value)
{
nvpair_t *nvp;
if (nvlist_error(nvl) != 0) {
- RESTORE_ERRNO(nvlist_error(nvl));
+ ERRNO_SET(nvlist_error(nvl));
return;
}
- nvp = nvpair_createv_string(value, namefmt, nameap);
+ nvp = nvpair_create_string(name, value);
if (nvp == NULL) {
nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
- RESTORE_ERRNO(nvl->nvl_error);
- } else
+ ERRNO_SET(nvl->nvl_error);
+ } else {
nvlist_move_nvpair(nvl, nvp);
+ }
}
void
-nvlist_addv_nvlist(nvlist_t *nvl, const nvlist_t *value, const char *namefmt,
- va_list nameap)
+nvlist_add_nvlist(nvlist_t *nvl, const char *name, const nvlist_t *value)
{
nvpair_t *nvp;
if (nvlist_error(nvl) != 0) {
- RESTORE_ERRNO(nvlist_error(nvl));
+ ERRNO_SET(nvlist_error(nvl));
return;
}
- nvp = nvpair_createv_nvlist(value, namefmt, nameap);
+ nvp = nvpair_create_nvlist(name, value);
if (nvp == NULL) {
nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
- RESTORE_ERRNO(nvl->nvl_error);
- } else
+ ERRNO_SET(nvl->nvl_error);
+ } else {
nvlist_move_nvpair(nvl, nvp);
+ }
}
#ifndef _KERNEL
void
-nvlist_addv_descriptor(nvlist_t *nvl, int value, const char *namefmt,
- va_list nameap)
+nvlist_add_descriptor(nvlist_t *nvl, const char *name, int value)
{
nvpair_t *nvp;
@@ -1467,7 +1219,7 @@ nvlist_addv_descriptor(nvlist_t *nvl, int value, const char *namefmt,
return;
}
- nvp = nvpair_createv_descriptor(value, namefmt, nameap);
+ nvp = nvpair_create_descriptor(name, value);
if (nvp == NULL)
nvl->nvl_error = errno = (errno != 0 ? errno : ENOMEM);
else
@@ -1476,22 +1228,23 @@ nvlist_addv_descriptor(nvlist_t *nvl, int value, const char *namefmt,
#endif
void
-nvlist_addv_binary(nvlist_t *nvl, const void *value, size_t size,
- const char *namefmt, va_list nameap)
+nvlist_add_binary(nvlist_t *nvl, const char *name, const void *value,
+ size_t size)
{
nvpair_t *nvp;
if (nvlist_error(nvl) != 0) {
- RESTORE_ERRNO(nvlist_error(nvl));
+ ERRNO_SET(nvlist_error(nvl));
return;
}
- nvp = nvpair_createv_binary(value, size, namefmt, nameap);
+ nvp = nvpair_create_binary(name, value, size);
if (nvp == NULL) {
nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
- RESTORE_ERRNO(nvl->nvl_error);
- } else
+ ERRNO_SET(nvl->nvl_error);
+ } else {
nvlist_move_nvpair(nvl, nvp);
+ }
}
void
@@ -1503,153 +1256,100 @@ nvlist_move_nvpair(nvlist_t *nvl, nvpair_t *nvp)
if (nvlist_error(nvl) != 0) {
nvpair_free(nvp);
- RESTORE_ERRNO(nvlist_error(nvl));
+ ERRNO_SET(nvlist_error(nvl));
return;
}
if (nvlist_exists(nvl, nvpair_name(nvp))) {
nvpair_free(nvp);
nvl->nvl_error = EEXIST;
- RESTORE_ERRNO(nvl->nvl_error);
+ ERRNO_SET(nvl->nvl_error);
return;
}
nvpair_insert(&nvl->nvl_head, nvp, nvl);
}
-#define NVLIST_MOVE(vtype, type) \
-void \
-nvlist_move_##type(nvlist_t *nvl, const char *name, vtype value) \
-{ \
- \
- nvlist_movef_##type(nvl, value, "%s", name); \
-}
-
-NVLIST_MOVE(char *, string)
-NVLIST_MOVE(nvlist_t *, nvlist)
-#ifndef _KERNEL
-NVLIST_MOVE(int, descriptor)
-#endif
-
-#undef NVLIST_MOVE
-
-void
-nvlist_move_binary(nvlist_t *nvl, const char *name, void *value, size_t size)
-{
-
- nvlist_movef_binary(nvl, value, size, "%s", name);
-}
-
-#define NVLIST_MOVEF(vtype, type) \
-void \
-nvlist_movef_##type(nvlist_t *nvl, vtype value, const char *namefmt, \
- ...) \
-{ \
- va_list nameap; \
- \
- va_start(nameap, namefmt); \
- nvlist_movev_##type(nvl, value, namefmt, nameap); \
- va_end(nameap); \
-}
-
-NVLIST_MOVEF(char *, string)
-NVLIST_MOVEF(nvlist_t *, nvlist)
-#ifndef _KERNEL
-NVLIST_MOVEF(int, descriptor)
-#endif
-
-#undef NVLIST_MOVEF
-
-void
-nvlist_movef_binary(nvlist_t *nvl, void *value, size_t size,
- const char *namefmt, ...)
-{
- va_list nameap;
-
- va_start(nameap, namefmt);
- nvlist_movev_binary(nvl, value, size, namefmt, nameap);
- va_end(nameap);
-}
-
void
-nvlist_movev_string(nvlist_t *nvl, char *value, const char *namefmt,
- va_list nameap)
+nvlist_move_string(nvlist_t *nvl, const char *name, char *value)
{
nvpair_t *nvp;
if (nvlist_error(nvl) != 0) {
nv_free(value);
- RESTORE_ERRNO(nvlist_error(nvl));
+ ERRNO_SET(nvlist_error(nvl));
return;
}
- nvp = nvpair_movev_string(value, namefmt, nameap);
+ nvp = nvpair_move_string(name, value);
if (nvp == NULL) {
nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
- RESTORE_ERRNO(nvl->nvl_error);
- } else
+ ERRNO_SET(nvl->nvl_error);
+ } else {
nvlist_move_nvpair(nvl, nvp);
+ }
}
void
-nvlist_movev_nvlist(nvlist_t *nvl, nvlist_t *value, const char *namefmt,
- va_list nameap)
+nvlist_move_nvlist(nvlist_t *nvl, const char *name, nvlist_t *value)
{
nvpair_t *nvp;
if (nvlist_error(nvl) != 0) {
if (value != NULL && nvlist_get_nvpair_parent(value) != NULL)
nvlist_destroy(value);
- RESTORE_ERRNO(nvlist_error(nvl));
+ ERRNO_SET(nvlist_error(nvl));
return;
}
- nvp = nvpair_movev_nvlist(value, namefmt, nameap);
+ nvp = nvpair_move_nvlist(name, value);
if (nvp == NULL) {
nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
- RESTORE_ERRNO(nvl->nvl_error);
- } else
+ ERRNO_SET(nvl->nvl_error);
+ } else {
nvlist_move_nvpair(nvl, nvp);
+ }
}
#ifndef _KERNEL
void
-nvlist_movev_descriptor(nvlist_t *nvl, int value, const char *namefmt,
- va_list nameap)
+nvlist_move_descriptor(nvlist_t *nvl, const char *name, int value)
{
nvpair_t *nvp;
if (nvlist_error(nvl) != 0) {
close(value);
- errno = nvlist_error(nvl);
+ ERRNO_SET(nvlist_error(nvl));
return;
}
- nvp = nvpair_movev_descriptor(value, namefmt, nameap);
- if (nvp == NULL)
- nvl->nvl_error = errno = (errno != 0 ? errno : ENOMEM);
- else
+ nvp = nvpair_move_descriptor(name, value);
+ if (nvp == NULL) {
+ nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
+ ERRNO_SET(nvl->nvl_error);
+ } else {
nvlist_move_nvpair(nvl, nvp);
+ }
}
#endif
void
-nvlist_movev_binary(nvlist_t *nvl, void *value, size_t size,
- const char *namefmt, va_list nameap)
+nvlist_move_binary(nvlist_t *nvl, const char *name, void *value, size_t size)
{
nvpair_t *nvp;
if (nvlist_error(nvl) != 0) {
nv_free(value);
- RESTORE_ERRNO(nvlist_error(nvl));
+ ERRNO_SET(nvlist_error(nvl));
return;
}
- nvp = nvpair_movev_binary(value, size, namefmt, nameap);
+ nvp = nvpair_move_binary(name, value, size);
if (nvp == NULL) {
nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
- RESTORE_ERRNO(nvl->nvl_error);
- } else
+ ERRNO_SET(nvl->nvl_error);
+ } else {
nvlist_move_nvpair(nvl, nvp);
+ }
}
const nvpair_t *
@@ -1693,84 +1393,6 @@ nvlist_get_binary(const nvlist_t *nvl, const char *name, size_t *sizep)
return (nvpair_get_binary(nvp, sizep));
}
-#define NVLIST_GETF(ftype, type) \
-ftype \
-nvlist_getf_##type(const nvlist_t *nvl, const char *namefmt, ...) \
-{ \
- va_list nameap; \
- ftype value; \
- \
- va_start(nameap, namefmt); \
- value = nvlist_getv_##type(nvl, namefmt, nameap); \
- va_end(nameap); \
- \
- return (value); \
-}
-
-#ifndef _KERNEL
-NVLIST_GETF(bool, bool)
-NVLIST_GETF(uint64_t, number)
-NVLIST_GETF(const char *, string)
-NVLIST_GETF(const nvlist_t *, nvlist)
-NVLIST_GETF(int, descriptor)
-
-#undef NVLIST_GETF
-
-const void *
-nvlist_getf_binary(const nvlist_t *nvl, size_t *sizep, const char *namefmt, ...)
-{
- va_list nameap;
- const void *value;
-
- va_start(nameap, namefmt);
- value = nvlist_getv_binary(nvl, sizep, namefmt, nameap);
- va_end(nameap);
-
- return (value);
-}
-
-#define NVLIST_GETV(ftype, type, TYPE) \
-ftype \
-nvlist_getv_##type(const nvlist_t *nvl, const char *namefmt, \
- va_list nameap) \
-{ \
- char *name; \
- ftype value; \
- \
- vasprintf(&name, namefmt, nameap); \
- if (name == NULL) \
- nvlist_report_missing(NV_TYPE_##TYPE, "<unknown>"); \
- value = nvlist_get_##type(nvl, name); \
- free(name); \
- \
- return (value); \
-}
-
-NVLIST_GETV(bool, bool, BOOL)
-NVLIST_GETV(uint64_t, number, NUMBER)
-NVLIST_GETV(const char *, string, STRING)
-NVLIST_GETV(const nvlist_t *, nvlist, NVLIST)
-NVLIST_GETV(int, descriptor, DESCRIPTOR)
-
-#undef NVLIST_GETV
-
-const void *
-nvlist_getv_binary(const nvlist_t *nvl, size_t *sizep, const char *namefmt,
- va_list nameap)
-{
- char *name;
- const void *binary;
-
- nv_vasprintf(&name, namefmt, nameap);
- if (name == NULL)
- nvlist_report_missing(NV_TYPE_BINARY, "<unknown>");
-
- binary = nvlist_get_binary(nvl, name, sizep);
- nv_free(name);
- return (binary);
-}
-#endif
-
#define NVLIST_TAKE(ftype, type, TYPE) \
ftype \
nvlist_take_##type(nvlist_t *nvl, const char *name) \
@@ -1813,82 +1435,6 @@ nvlist_take_binary(nvlist_t *nvl, const char *name, size_t *sizep)
return (value);
}
-#define NVLIST_TAKEF(ftype, type) \
-ftype \
-nvlist_takef_##type(nvlist_t *nvl, const char *namefmt, ...) \
-{ \
- va_list nameap; \
- ftype value; \
- \
- va_start(nameap, namefmt); \
- value = nvlist_takev_##type(nvl, namefmt, nameap); \
- va_end(nameap); \
- \
- return (value); \
-}
-
-#ifndef _KERNEL
-NVLIST_TAKEF(bool, bool)
-NVLIST_TAKEF(uint64_t, number)
-NVLIST_TAKEF(char *, string)
-NVLIST_TAKEF(nvlist_t *, nvlist)
-NVLIST_TAKEF(int, descriptor)
-
-#undef NVLIST_TAKEF
-
-void *
-nvlist_takef_binary(nvlist_t *nvl, size_t *sizep, const char *namefmt, ...)
-{
- va_list nameap;
- void *value;
-
- va_start(nameap, namefmt);
- value = nvlist_takev_binary(nvl, sizep, namefmt, nameap);
- va_end(nameap);
-
- return (value);
-}
-
-#define NVLIST_TAKEV(ftype, type, TYPE) \
-ftype \
-nvlist_takev_##type(nvlist_t *nvl, const char *namefmt, va_list nameap) \
-{ \
- char *name; \
- ftype value; \
- \
- vasprintf(&name, namefmt, nameap); \
- if (name == NULL) \
- nvlist_report_missing(NV_TYPE_##TYPE, "<unknown>"); \
- value = nvlist_take_##type(nvl, name); \
- free(name); \
- return (value); \
-}
-
-NVLIST_TAKEV(bool, bool, BOOL)
-NVLIST_TAKEV(uint64_t, number, NUMBER)
-NVLIST_TAKEV(char *, string, STRING)
-NVLIST_TAKEV(nvlist_t *, nvlist, NVLIST)
-NVLIST_TAKEV(int, descriptor, DESCRIPTOR)
-
-#undef NVLIST_TAKEV
-
-void *
-nvlist_takev_binary(nvlist_t *nvl, size_t *sizep, const char *namefmt,
- va_list nameap)
-{
- char *name;
- void *binary;
-
- nv_vasprintf(&name, namefmt, nameap);
- if (name == NULL)
- nvlist_report_missing(NV_TYPE_BINARY, "<unknown>");
-
- binary = nvlist_take_binary(nvl, name, sizep);
- nv_free(name);
- return (binary);
-}
-#endif
-
void
nvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp)
{
@@ -1927,68 +1473,6 @@ NVLIST_FREE(binary, BINARY)
#undef NVLIST_FREE
-#ifndef _KERNEL
-void
-nvlist_freef(nvlist_t *nvl, const char *namefmt, ...)
-{
- va_list nameap;
-
- va_start(nameap, namefmt);
- nvlist_freev(nvl, namefmt, nameap);
- va_end(nameap);
-}
-
-#define NVLIST_FREEF(type) \
-void \
-nvlist_freef_##type(nvlist_t *nvl, const char *namefmt, ...) \
-{ \
- va_list nameap; \
- \
- va_start(nameap, namefmt); \
- nvlist_freev_##type(nvl, namefmt, nameap); \
- va_end(nameap); \
-}
-
-NVLIST_FREEF(null)
-NVLIST_FREEF(bool)
-NVLIST_FREEF(number)
-NVLIST_FREEF(string)
-NVLIST_FREEF(nvlist)
-NVLIST_FREEF(descriptor)
-NVLIST_FREEF(binary)
-
-#undef NVLIST_FREEF
-
-void
-nvlist_freev(nvlist_t *nvl, const char *namefmt, va_list nameap)
-{
-
- nvlist_freev_type(nvl, NV_TYPE_NONE, namefmt, nameap);
-}
-
-#define NVLIST_FREEV(type, TYPE) \
-void \
-nvlist_freev_##type(nvlist_t *nvl, const char *namefmt, va_list nameap) \
-{ \
- char *name; \
- \
- vasprintf(&name, namefmt, nameap); \
- if (name == NULL) \
- nvlist_report_missing(NV_TYPE_##TYPE, "<unknown>"); \
- nvlist_free_##type(nvl, name); \
- free(name); \
-}
-
-NVLIST_FREEV(null, NULL)
-NVLIST_FREEV(bool, BOOL)
-NVLIST_FREEV(number, NUMBER)
-NVLIST_FREEV(string, STRING)
-NVLIST_FREEV(nvlist, NVLIST)
-NVLIST_FREEV(descriptor, DESCRIPTOR)
-NVLIST_FREEV(binary, BINARY)
-#undef NVLIST_FREEV
-#endif
-
void
nvlist_free_nvpair(nvlist_t *nvl, nvpair_t *nvp)
{
@@ -2000,3 +1484,4 @@ nvlist_free_nvpair(nvlist_t *nvl, nvpair_t *nvp)
nvlist_remove_nvpair(nvl, nvp);
nvpair_free(nvp);
}
+
diff --git a/sys/kern/subr_nvpair.c b/sys/kern/subr_nvpair.c
index 7b88e42..b0fc322 100644
--- a/sys/kern/subr_nvpair.c
+++ b/sys/kern/subr_nvpair.c
@@ -468,7 +468,7 @@ nvpair_unpack_header(bool isbe, nvpair_t *nvp, const unsigned char *ptr,
return (ptr);
failed:
- RESTORE_ERRNO(EINVAL);
+ ERRNO_SET(EINVAL);
return (NULL);
}
@@ -480,7 +480,7 @@ nvpair_unpack_null(bool isbe __unused, nvpair_t *nvp, const unsigned char *ptr,
PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NULL);
if (nvp->nvp_datasize != 0) {
- RESTORE_ERRNO(EINVAL);
+ ERRNO_SET(EINVAL);
return (NULL);
}
@@ -496,11 +496,11 @@ nvpair_unpack_bool(bool isbe __unused, nvpair_t *nvp, const unsigned char *ptr,
PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BOOL);
if (nvp->nvp_datasize != sizeof(value)) {
- RESTORE_ERRNO(EINVAL);
+ ERRNO_SET(EINVAL);
return (NULL);
}
if (*leftp < sizeof(value)) {
- RESTORE_ERRNO(EINVAL);
+ ERRNO_SET(EINVAL);
return (NULL);
}
@@ -509,7 +509,7 @@ nvpair_unpack_bool(bool isbe __unused, nvpair_t *nvp, const unsigned char *ptr,
*leftp -= sizeof(value);
if (value != 0 && value != 1) {
- RESTORE_ERRNO(EINVAL);
+ ERRNO_SET(EINVAL);
return (NULL);
}
@@ -526,11 +526,11 @@ nvpair_unpack_number(bool isbe, nvpair_t *nvp, const unsigned char *ptr,
PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NUMBER);
if (nvp->nvp_datasize != sizeof(uint64_t)) {
- RESTORE_ERRNO(EINVAL);
+ ERRNO_SET(EINVAL);
return (NULL);
}
if (*leftp < sizeof(uint64_t)) {
- RESTORE_ERRNO(EINVAL);
+ ERRNO_SET(EINVAL);
return (NULL);
}
@@ -552,13 +552,13 @@ nvpair_unpack_string(bool isbe __unused, nvpair_t *nvp,
PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_STRING);
if (*leftp < nvp->nvp_datasize || nvp->nvp_datasize == 0) {
- RESTORE_ERRNO(EINVAL);
+ ERRNO_SET(EINVAL);
return (NULL);
}
if (strnlen((const char *)ptr, nvp->nvp_datasize) !=
nvp->nvp_datasize - 1) {
- RESTORE_ERRNO(EINVAL);
+ ERRNO_SET(EINVAL);
return (NULL);
}
@@ -581,7 +581,7 @@ nvpair_unpack_nvlist(bool isbe __unused, nvpair_t *nvp,
PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NVLIST);
if (*leftp < nvp->nvp_datasize || nvp->nvp_datasize == 0) {
- RESTORE_ERRNO(EINVAL);
+ ERRNO_SET(EINVAL);
return (NULL);
}
@@ -609,11 +609,11 @@ nvpair_unpack_descriptor(bool isbe, nvpair_t *nvp, const unsigned char *ptr,
PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_DESCRIPTOR);
if (nvp->nvp_datasize != sizeof(idx)) {
- errno = EINVAL;
+ ERRNO_SET(EINVAL);
return (NULL);
}
if (*leftp < sizeof(idx)) {
- errno = EINVAL;
+ ERRNO_SET(EINVAL);
return (NULL);
}
@@ -623,12 +623,12 @@ nvpair_unpack_descriptor(bool isbe, nvpair_t *nvp, const unsigned char *ptr,
idx = le64dec(ptr);
if (idx < 0) {
- errno = EINVAL;
+ ERRNO_SET(EINVAL);
return (NULL);
}
if ((size_t)idx >= nfds) {
- errno = EINVAL;
+ ERRNO_SET(EINVAL);
return (NULL);
}
@@ -650,7 +650,7 @@ nvpair_unpack_binary(bool isbe __unused, nvpair_t *nvp,
PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BINARY);
if (*leftp < nvp->nvp_datasize || nvp->nvp_datasize == 0) {
- RESTORE_ERRNO(EINVAL);
+ ERRNO_SET(EINVAL);
return (NULL);
}
@@ -716,69 +716,34 @@ nvpair_name(const nvpair_t *nvp)
}
static nvpair_t *
-nvpair_allocv(int type, uint64_t data, size_t datasize, const char *namefmt,
- va_list nameap)
+nvpair_allocv(const char *name, int type, uint64_t data, size_t datasize)
{
nvpair_t *nvp;
- char *name;
- int namelen;
+ size_t namelen;
PJDLOG_ASSERT(type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST);
- namelen = nv_vasprintf(&name, namefmt, nameap);
- if (namelen < 0)
- return (NULL);
-
- PJDLOG_ASSERT(namelen > 0);
+ namelen = strlen(name);
if (namelen >= NV_NAME_MAX) {
- nv_free(name);
- RESTORE_ERRNO(ENAMETOOLONG);
+ ERRNO_SET(ENAMETOOLONG);
return (NULL);
}
nvp = nv_calloc(1, sizeof(*nvp) + namelen + 1);
if (nvp != NULL) {
nvp->nvp_name = (char *)(nvp + 1);
- memcpy(nvp->nvp_name, name, namelen + 1);
+ memcpy(nvp->nvp_name, name, namelen);
+ nvp->nvp_name[namelen + 1] = '\0';
nvp->nvp_type = type;
nvp->nvp_data = data;
nvp->nvp_datasize = datasize;
nvp->nvp_magic = NVPAIR_MAGIC;
}
- nv_free(name);
return (nvp);
};
nvpair_t *
-nvpair_create_null(const char *name)
-{
-
- return (nvpair_createf_null("%s", name));
-}
-
-nvpair_t *
-nvpair_create_bool(const char *name, bool value)
-{
-
- return (nvpair_createf_bool(value, "%s", name));
-}
-
-nvpair_t *
-nvpair_create_number(const char *name, uint64_t value)
-{
-
- return (nvpair_createf_number(value, "%s", name));
-}
-
-nvpair_t *
-nvpair_create_string(const char *name, const char *value)
-{
-
- return (nvpair_createf_string(value, "%s", name));
-}
-
-nvpair_t *
nvpair_create_stringf(const char *name, const char *valuefmt, ...)
{
va_list valueap;
@@ -808,153 +773,36 @@ nvpair_create_stringv(const char *name, const char *valuefmt, va_list valueap)
}
nvpair_t *
-nvpair_create_nvlist(const char *name, const nvlist_t *value)
-{
-
- return (nvpair_createf_nvlist(value, "%s", name));
-}
-
-#ifndef _KERNEL
-nvpair_t *
-nvpair_create_descriptor(const char *name, int value)
-{
-
- return (nvpair_createf_descriptor(value, "%s", name));
-}
-#endif
-
-nvpair_t *
-nvpair_create_binary(const char *name, const void *value, size_t size)
-{
-
- return (nvpair_createf_binary(value, size, "%s", name));
-}
-
-nvpair_t *
-nvpair_createf_null(const char *namefmt, ...)
-{
- va_list nameap;
- nvpair_t *nvp;
-
- va_start(nameap, namefmt);
- nvp = nvpair_createv_null(namefmt, nameap);
- va_end(nameap);
-
- return (nvp);
-}
-
-nvpair_t *
-nvpair_createf_bool(bool value, const char *namefmt, ...)
-{
- va_list nameap;
- nvpair_t *nvp;
-
- va_start(nameap, namefmt);
- nvp = nvpair_createv_bool(value, namefmt, nameap);
- va_end(nameap);
-
- return (nvp);
-}
-
-nvpair_t *
-nvpair_createf_number(uint64_t value, const char *namefmt, ...)
-{
- va_list nameap;
- nvpair_t *nvp;
-
- va_start(nameap, namefmt);
- nvp = nvpair_createv_number(value, namefmt, nameap);
- va_end(nameap);
-
- return (nvp);
-}
-
-nvpair_t *
-nvpair_createf_string(const char *value, const char *namefmt, ...)
-{
- va_list nameap;
- nvpair_t *nvp;
-
- va_start(nameap, namefmt);
- nvp = nvpair_createv_string(value, namefmt, nameap);
- va_end(nameap);
-
- return (nvp);
-}
-
-nvpair_t *
-nvpair_createf_nvlist(const nvlist_t *value, const char *namefmt, ...)
-{
- va_list nameap;
- nvpair_t *nvp;
-
- va_start(nameap, namefmt);
- nvp = nvpair_createv_nvlist(value, namefmt, nameap);
- va_end(nameap);
-
- return (nvp);
-}
-
-#ifndef _KERNEL
-nvpair_t *
-nvpair_createf_descriptor(int value, const char *namefmt, ...)
-{
- va_list nameap;
- nvpair_t *nvp;
-
- va_start(nameap, namefmt);
- nvp = nvpair_createv_descriptor(value, namefmt, nameap);
- va_end(nameap);
-
- return (nvp);
-}
-#endif
-
-nvpair_t *
-nvpair_createf_binary(const void *value, size_t size, const char *namefmt, ...)
-{
- va_list nameap;
- nvpair_t *nvp;
-
- va_start(nameap, namefmt);
- nvp = nvpair_createv_binary(value, size, namefmt, nameap);
- va_end(nameap);
-
- return (nvp);
-}
-
-nvpair_t *
-nvpair_createv_null(const char *namefmt, va_list nameap)
+nvpair_create_null(const char *name)
{
- return (nvpair_allocv(NV_TYPE_NULL, 0, 0, namefmt, nameap));
+ return (nvpair_allocv(name, NV_TYPE_NULL, 0, 0));
}
nvpair_t *
-nvpair_createv_bool(bool value, const char *namefmt, va_list nameap)
+nvpair_create_bool(const char *name, bool value)
{
- return (nvpair_allocv(NV_TYPE_BOOL, value ? 1 : 0, sizeof(uint8_t),
- namefmt, nameap));
+ return (nvpair_allocv(name, NV_TYPE_BOOL, value ? 1 : 0,
+ sizeof(uint8_t)));
}
nvpair_t *
-nvpair_createv_number(uint64_t value, const char *namefmt, va_list nameap)
+nvpair_create_number(const char *name, uint64_t value)
{
- return (nvpair_allocv(NV_TYPE_NUMBER, value, sizeof(value), namefmt,
- nameap));
+ return (nvpair_allocv(name, NV_TYPE_NUMBER, value, sizeof(value)));
}
nvpair_t *
-nvpair_createv_string(const char *value, const char *namefmt, va_list nameap)
+nvpair_create_string(const char *name, const char *value)
{
nvpair_t *nvp;
size_t size;
char *data;
if (value == NULL) {
- RESTORE_ERRNO(EINVAL);
+ ERRNO_SET(EINVAL);
return (NULL);
}
@@ -963,8 +811,8 @@ nvpair_createv_string(const char *value, const char *namefmt, va_list nameap)
return (NULL);
size = strlen(value) + 1;
- nvp = nvpair_allocv(NV_TYPE_STRING, (uint64_t)(uintptr_t)data, size,
- namefmt, nameap);
+ nvp = nvpair_allocv(name, NV_TYPE_STRING, (uint64_t)(uintptr_t)data,
+ size);
if (nvp == NULL)
nv_free(data);
@@ -972,14 +820,13 @@ nvpair_createv_string(const char *value, const char *namefmt, va_list nameap)
}
nvpair_t *
-nvpair_createv_nvlist(const nvlist_t *value, const char *namefmt,
- va_list nameap)
+nvpair_create_nvlist(const char *name, const nvlist_t *value)
{
nvlist_t *nvl;
nvpair_t *nvp;
if (value == NULL) {
- RESTORE_ERRNO(EINVAL);
+ ERRNO_SET(EINVAL);
return (NULL);
}
@@ -987,8 +834,7 @@ nvpair_createv_nvlist(const nvlist_t *value, const char *namefmt,
if (nvl == NULL)
return (NULL);
- nvp = nvpair_allocv(NV_TYPE_NVLIST, (uint64_t)(uintptr_t)nvl, 0,
- namefmt, nameap);
+ nvp = nvpair_allocv(name, NV_TYPE_NVLIST, (uint64_t)(uintptr_t)nvl, 0);
if (nvp == NULL)
nvlist_destroy(nvl);
else
@@ -999,12 +845,12 @@ nvpair_createv_nvlist(const nvlist_t *value, const char *namefmt,
#ifndef _KERNEL
nvpair_t *
-nvpair_createv_descriptor(int value, const char *namefmt, va_list nameap)
+nvpair_create_descriptor(const char *name, int value)
{
nvpair_t *nvp;
if (value < 0 || !fd_is_valid(value)) {
- errno = EBADF;
+ ERRNO_SET(EBADF);
return (NULL);
}
@@ -1012,24 +858,26 @@ nvpair_createv_descriptor(int value, const char *namefmt, va_list nameap)
if (value < 0)
return (NULL);
- nvp = nvpair_allocv(NV_TYPE_DESCRIPTOR, (uint64_t)value,
- sizeof(int64_t), namefmt, nameap);
- if (nvp == NULL)
+ nvp = nvpair_allocv(name, NV_TYPE_DESCRIPTOR, (uint64_t)value,
+ sizeof(int64_t));
+ if (nvp == NULL) {
+ ERRNO_SAVE();
close(value);
+ ERRNO_RESTORE();
+ }
return (nvp);
}
#endif
nvpair_t *
-nvpair_createv_binary(const void *value, size_t size, const char *namefmt,
- va_list nameap)
+nvpair_create_binary(const char *name, const void *value, size_t size)
{
nvpair_t *nvp;
void *data;
if (value == NULL || size == 0) {
- RESTORE_ERRNO(EINVAL);
+ ERRNO_SET(EINVAL);
return (NULL);
}
@@ -1038,8 +886,8 @@ nvpair_createv_binary(const void *value, size_t size, const char *namefmt,
return (NULL);
memcpy(data, value, size);
- nvp = nvpair_allocv(NV_TYPE_BINARY, (uint64_t)(uintptr_t)data, size,
- namefmt, nameap);
+ nvp = nvpair_allocv(name, NV_TYPE_BINARY, (uint64_t)(uintptr_t)data,
+ size);
if (nvp == NULL)
nv_free(data);
@@ -1049,127 +897,42 @@ nvpair_createv_binary(const void *value, size_t size, const char *namefmt,
nvpair_t *
nvpair_move_string(const char *name, char *value)
{
-
- return (nvpair_movef_string(value, "%s", name));
-}
-
-nvpair_t *
-nvpair_move_nvlist(const char *name, nvlist_t *value)
-{
-
- return (nvpair_movef_nvlist(value, "%s", name));
-}
-
-#ifndef _KERNEL
-nvpair_t *
-nvpair_move_descriptor(const char *name, int value)
-{
-
- return (nvpair_movef_descriptor(value, "%s", name));
-}
-#endif
-
-nvpair_t *
-nvpair_move_binary(const char *name, void *value, size_t size)
-{
-
- return (nvpair_movef_binary(value, size, "%s", name));
-}
-
-nvpair_t *
-nvpair_movef_string(char *value, const char *namefmt, ...)
-{
- va_list nameap;
- nvpair_t *nvp;
-
- va_start(nameap, namefmt);
- nvp = nvpair_movev_string(value, namefmt, nameap);
- va_end(nameap);
-
- return (nvp);
-}
-
-nvpair_t *
-nvpair_movef_nvlist(nvlist_t *value, const char *namefmt, ...)
-{
- va_list nameap;
- nvpair_t *nvp;
-
- va_start(nameap, namefmt);
- nvp = nvpair_movev_nvlist(value, namefmt, nameap);
- va_end(nameap);
-
- return (nvp);
-}
-
-#ifndef _KERNEL
-nvpair_t *
-nvpair_movef_descriptor(int value, const char *namefmt, ...)
-{
- va_list nameap;
- nvpair_t *nvp;
-
- va_start(nameap, namefmt);
- nvp = nvpair_movev_descriptor(value, namefmt, nameap);
- va_end(nameap);
-
- return (nvp);
-}
-#endif
-
-nvpair_t *
-nvpair_movef_binary(void *value, size_t size, const char *namefmt, ...)
-{
- va_list nameap;
- nvpair_t *nvp;
-
- va_start(nameap, namefmt);
- nvp = nvpair_movev_binary(value, size, namefmt, nameap);
- va_end(nameap);
-
- return (nvp);
-}
-
-nvpair_t *
-nvpair_movev_string(char *value, const char *namefmt, va_list nameap)
-{
nvpair_t *nvp;
- int serrno;
if (value == NULL) {
- RESTORE_ERRNO(EINVAL);
+ ERRNO_SET(EINVAL);
return (NULL);
}
- nvp = nvpair_allocv(NV_TYPE_STRING, (uint64_t)(uintptr_t)value,
- strlen(value) + 1, namefmt, nameap);
+ nvp = nvpair_allocv(name, NV_TYPE_STRING, (uint64_t)(uintptr_t)value,
+ strlen(value) + 1);
if (nvp == NULL) {
- SAVE_ERRNO(serrno);
+ ERRNO_SAVE();
nv_free(value);
- RESTORE_ERRNO(serrno);
+ ERRNO_RESTORE();
}
return (nvp);
}
nvpair_t *
-nvpair_movev_nvlist(nvlist_t *value, const char *namefmt, va_list nameap)
+nvpair_move_nvlist(const char *name, nvlist_t *value)
{
nvpair_t *nvp;
if (value == NULL || nvlist_get_nvpair_parent(value) != NULL) {
- RESTORE_ERRNO(EINVAL);
+ ERRNO_SET(EINVAL);
return (NULL);
}
if (nvlist_error(value) != 0) {
- RESTORE_ERRNO(nvlist_error(value));
+ ERRNO_SET(nvlist_error(value));
nvlist_destroy(value);
return (NULL);
}
- nvp = nvpair_allocv(NV_TYPE_NVLIST, (uint64_t)(uintptr_t)value, 0,
- namefmt, nameap);
+ nvp = nvpair_allocv(name, NV_TYPE_NVLIST, (uint64_t)(uintptr_t)value,
+ 0);
if (nvp == NULL)
nvlist_destroy(value);
else
@@ -1180,22 +943,21 @@ nvpair_movev_nvlist(nvlist_t *value, const char *namefmt, va_list nameap)
#ifndef _KERNEL
nvpair_t *
-nvpair_movev_descriptor(int value, const char *namefmt, va_list nameap)
+nvpair_move_descriptor(const char *name, int value)
{
nvpair_t *nvp;
- int serrno;
if (value < 0 || !fd_is_valid(value)) {
- errno = EBADF;
+ ERRNO_SET(EBADF);
return (NULL);
}
- nvp = nvpair_allocv(NV_TYPE_DESCRIPTOR, (uint64_t)value,
- sizeof(int64_t), namefmt, nameap);
+ nvp = nvpair_allocv(name, NV_TYPE_DESCRIPTOR, (uint64_t)value,
+ sizeof(int64_t));
if (nvp == NULL) {
- serrno = errno;
+ ERRNO_SAVE();
close(value);
- errno = serrno;
+ ERRNO_RESTORE();
}
return (nvp);
@@ -1203,23 +965,21 @@ nvpair_movev_descriptor(int value, const char *namefmt, va_list nameap)
#endif
nvpair_t *
-nvpair_movev_binary(void *value, size_t size, const char *namefmt,
- va_list nameap)
+nvpair_move_binary(const char *name, void *value, size_t size)
{
nvpair_t *nvp;
- int serrno;
if (value == NULL || size == 0) {
- RESTORE_ERRNO(EINVAL);
+ ERRNO_SET(EINVAL);
return (NULL);
}
- nvp = nvpair_allocv(NV_TYPE_BINARY, (uint64_t)(uintptr_t)value, size,
- namefmt, nameap);
+ nvp = nvpair_allocv(name, NV_TYPE_BINARY, (uint64_t)(uintptr_t)value,
+ size);
if (nvp == NULL) {
- SAVE_ERRNO(serrno);
+ ERRNO_SAVE();
nv_free(value);
- RESTORE_ERRNO(serrno);
+ ERRNO_RESTORE();
}
return (nvp);
@@ -1348,3 +1108,4 @@ nvpair_type_string(int type)
return ("<UNKNOWN>");
}
}
+
diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c
index f662ec2..cba656a 100644
--- a/sys/kern/subr_param.c
+++ b/sys/kern/subr_param.c
@@ -99,11 +99,7 @@ pid_t pid_max = PID_MAX;
long maxswzone; /* max swmeta KVA storage */
long maxbcache; /* max buffer cache KVA storage */
long maxpipekva; /* Limit on pipe KVA */
-#ifdef XEN
-int vm_guest = VM_GUEST_XEN;
-#else
int vm_guest = VM_GUEST_NO; /* Running as virtual machine guest? */
-#endif
u_long maxtsiz; /* max text size */
u_long dfldsiz; /* initial data size limit */
u_long maxdsiz; /* max data size */
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index cfc3ed7..93f7557 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
#include <sys/ktr.h>
#include <sys/pioctl.h>
#include <sys/ptrace.h>
+#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/signalvar.h>
@@ -79,12 +80,6 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>
#endif
-#ifdef XEN
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/pmap.h>
-#endif
-
#ifdef HWPMC_HOOKS
#include <sys/pmckern.h>
#endif
@@ -135,9 +130,6 @@ userret(struct thread *td, struct trapframe *frame)
* Let the scheduler adjust our priority etc.
*/
sched_userret(td);
-#ifdef XEN
- PT_UPDATES_FLUSH();
-#endif
/*
* Check for misbehavior.
@@ -172,11 +164,13 @@ userret(struct thread *td, struct trapframe *frame)
__func__, td, p->p_pid, td->td_name, curvnet,
(td->td_vnet_lpush != NULL) ? td->td_vnet_lpush : "N/A"));
#endif
-#ifdef RACCT
- PROC_LOCK(p);
- while (p->p_throttled == 1)
- msleep(p->p_racct, &p->p_mtx, 0, "racct", 0);
- PROC_UNLOCK(p);
+#ifdef RACCT
+ if (racct_enable) {
+ PROC_LOCK(p);
+ while (p->p_throttled == 1)
+ msleep(p->p_racct, &p->p_mtx, 0, "racct", 0);
+ PROC_UNLOCK(p);
+ }
#endif
}
diff --git a/sys/kern/sysv_msg.c b/sys/kern/sysv_msg.c
index acc44710..59357c4 100644
--- a/sys/kern/sysv_msg.c
+++ b/sys/kern/sysv_msg.c
@@ -617,12 +617,14 @@ sys_msgget(td, uap)
goto done2;
}
#ifdef RACCT
- PROC_LOCK(td->td_proc);
- error = racct_add(td->td_proc, RACCT_NMSGQ, 1);
- PROC_UNLOCK(td->td_proc);
- if (error != 0) {
- error = ENOSPC;
- goto done2;
+ if (racct_enable) {
+ PROC_LOCK(td->td_proc);
+ error = racct_add(td->td_proc, RACCT_NMSGQ, 1);
+ PROC_UNLOCK(td->td_proc);
+ if (error != 0) {
+ error = ENOSPC;
+ goto done2;
+ }
}
#endif
DPRINTF(("msqid %d is available\n", msqid));
@@ -724,20 +726,22 @@ kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype)
#endif
#ifdef RACCT
- PROC_LOCK(td->td_proc);
- if (racct_add(td->td_proc, RACCT_MSGQQUEUED, 1)) {
- PROC_UNLOCK(td->td_proc);
- error = EAGAIN;
- goto done2;
- }
- saved_msgsz = msgsz;
- if (racct_add(td->td_proc, RACCT_MSGQSIZE, msgsz)) {
- racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1);
+ if (racct_enable) {
+ PROC_LOCK(td->td_proc);
+ if (racct_add(td->td_proc, RACCT_MSGQQUEUED, 1)) {
+ PROC_UNLOCK(td->td_proc);
+ error = EAGAIN;
+ goto done2;
+ }
+ saved_msgsz = msgsz;
+ if (racct_add(td->td_proc, RACCT_MSGQSIZE, msgsz)) {
+ racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1);
+ PROC_UNLOCK(td->td_proc);
+ error = EAGAIN;
+ goto done2;
+ }
PROC_UNLOCK(td->td_proc);
- error = EAGAIN;
- goto done2;
}
- PROC_UNLOCK(td->td_proc);
#endif
segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
@@ -994,7 +998,7 @@ kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype)
td->td_retval[0] = 0;
done3:
#ifdef RACCT
- if (error != 0) {
+ if (racct_enable && error != 0) {
PROC_LOCK(td->td_proc);
racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1);
racct_sub(td->td_proc, RACCT_MSGQSIZE, saved_msgsz);
diff --git a/sys/kern/sysv_sem.c b/sys/kern/sysv_sem.c
index 6ff5789..d9324f4 100644
--- a/sys/kern/sysv_sem.c
+++ b/sys/kern/sysv_sem.c
@@ -915,12 +915,14 @@ sys_semget(struct thread *td, struct semget_args *uap)
goto done2;
}
#ifdef RACCT
- PROC_LOCK(td->td_proc);
- error = racct_add(td->td_proc, RACCT_NSEM, nsems);
- PROC_UNLOCK(td->td_proc);
- if (error != 0) {
- error = ENOSPC;
- goto done2;
+ if (racct_enable) {
+ PROC_LOCK(td->td_proc);
+ error = racct_add(td->td_proc, RACCT_NSEM, nsems);
+ PROC_UNLOCK(td->td_proc);
+ if (error != 0) {
+ error = ENOSPC;
+ goto done2;
+ }
}
#endif
DPRINTF(("semid %d is available\n", semid));
@@ -1009,12 +1011,15 @@ sys_semop(struct thread *td, struct semop_args *uap)
return (E2BIG);
} else {
#ifdef RACCT
- PROC_LOCK(td->td_proc);
- if (nsops > racct_get_available(td->td_proc, RACCT_NSEMOP)) {
+ if (racct_enable) {
+ PROC_LOCK(td->td_proc);
+ if (nsops >
+ racct_get_available(td->td_proc, RACCT_NSEMOP)) {
+ PROC_UNLOCK(td->td_proc);
+ return (E2BIG);
+ }
PROC_UNLOCK(td->td_proc);
- return (E2BIG);
}
- PROC_UNLOCK(td->td_proc);
#endif
sops = malloc(nsops * sizeof(*sops), M_TEMP, M_WAITOK);
diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c
index 274deda..3240a5f 100644
--- a/sys/kern/sysv_shm.c
+++ b/sys/kern/sysv_shm.c
@@ -651,17 +651,19 @@ shmget_allocate_segment(struct thread *td, struct shmget_args *uap, int mode)
("segnum %d shmalloced %d", segnum, shmalloced));
shmseg = &shmsegs[segnum];
#ifdef RACCT
- PROC_LOCK(td->td_proc);
- if (racct_add(td->td_proc, RACCT_NSHM, 1)) {
- PROC_UNLOCK(td->td_proc);
- return (ENOSPC);
- }
- if (racct_add(td->td_proc, RACCT_SHMSIZE, size)) {
- racct_sub(td->td_proc, RACCT_NSHM, 1);
+ if (racct_enable) {
+ PROC_LOCK(td->td_proc);
+ if (racct_add(td->td_proc, RACCT_NSHM, 1)) {
+ PROC_UNLOCK(td->td_proc);
+ return (ENOSPC);
+ }
+ if (racct_add(td->td_proc, RACCT_SHMSIZE, size)) {
+ racct_sub(td->td_proc, RACCT_NSHM, 1);
+ PROC_UNLOCK(td->td_proc);
+ return (ENOMEM);
+ }
PROC_UNLOCK(td->td_proc);
- return (ENOMEM);
}
- PROC_UNLOCK(td->td_proc);
#endif
/*
@@ -672,10 +674,12 @@ shmget_allocate_segment(struct thread *td, struct shmget_args *uap, int mode)
0, size, VM_PROT_DEFAULT, 0, cred);
if (shm_object == NULL) {
#ifdef RACCT
- PROC_LOCK(td->td_proc);
- racct_sub(td->td_proc, RACCT_NSHM, 1);
- racct_sub(td->td_proc, RACCT_SHMSIZE, size);
- PROC_UNLOCK(td->td_proc);
+ if (racct_enable) {
+ PROC_LOCK(td->td_proc);
+ racct_sub(td->td_proc, RACCT_NSHM, 1);
+ racct_sub(td->td_proc, RACCT_SHMSIZE, size);
+ PROC_UNLOCK(td->td_proc);
+ }
#endif
return (ENOMEM);
}
@@ -961,39 +965,39 @@ oshmctl(struct thread *td, struct oshmctl_args *uap)
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
return (ENOSYS);
+ if (uap->cmd != IPC_STAT) {
+ return (freebsd7_shmctl(td,
+ (struct freebsd7_shmctl_args *)uap));
+ }
SYSVSHM_LOCK();
shmseg = shm_find_segment(uap->shmid, true);
if (shmseg == NULL) {
SYSVSHM_UNLOCK();
return (EINVAL);
}
- switch (uap->cmd) {
- case IPC_STAT:
- error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
- if (error != 0)
- break;
+ error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
+ if (error != 0) {
+ SYSVSHM_UNLOCK();
+ return (error);
+ }
#ifdef MAC
- error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg,
- uap->cmd);
- if (error != 0)
- break;
-#endif
- ipcperm_new2old(&shmseg->u.shm_perm, &outbuf.shm_perm);
- outbuf.shm_segsz = shmseg->u.shm_segsz;
- outbuf.shm_cpid = shmseg->u.shm_cpid;
- outbuf.shm_lpid = shmseg->u.shm_lpid;
- outbuf.shm_nattch = shmseg->u.shm_nattch;
- outbuf.shm_atime = shmseg->u.shm_atime;
- outbuf.shm_dtime = shmseg->u.shm_dtime;
- outbuf.shm_ctime = shmseg->u.shm_ctime;
- outbuf.shm_handle = shmseg->object;
- error = copyout(&outbuf, uap->ubuf, sizeof(outbuf));
- break;
- default:
- error = freebsd7_shmctl(td, (struct freebsd7_shmctl_args *)uap);
- break;
+ error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg, uap->cmd);
+ if (error != 0) {
+ SYSVSHM_UNLOCK();
+ return (error);
}
+#endif
+ ipcperm_new2old(&shmseg->u.shm_perm, &outbuf.shm_perm);
+ outbuf.shm_segsz = shmseg->u.shm_segsz;
+ outbuf.shm_cpid = shmseg->u.shm_cpid;
+ outbuf.shm_lpid = shmseg->u.shm_lpid;
+ outbuf.shm_nattch = shmseg->u.shm_nattch;
+ outbuf.shm_atime = shmseg->u.shm_atime;
+ outbuf.shm_dtime = shmseg->u.shm_dtime;
+ outbuf.shm_ctime = shmseg->u.shm_ctime;
+ outbuf.shm_handle = shmseg->object;
SYSVSHM_UNLOCK();
+ error = copyout(&outbuf, uap->ubuf, sizeof(outbuf));
return (error);
#else
return (EINVAL);
@@ -1025,9 +1029,7 @@ sys_shmsys(struct thread *td, struct shmsys_args *uap)
return (ENOSYS);
if (uap->which < 0 || uap->which >= nitems(shmcalls))
return (EINVAL);
- SYSVSHM_LOCK();
error = (*shmcalls[uap->which])(td, &uap->a2);
- SYSVSHM_UNLOCK();
return (error);
}
diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
index 8410ed9..93c7ed1 100644
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -170,7 +170,7 @@ uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio)
if (uio->uio_rw == UIO_READ && vm_page_lookup(obj, idx) == NULL &&
!vm_pager_has_page(obj, idx, NULL, NULL)) {
VM_OBJECT_WUNLOCK(obj);
- return (uiomove(__DECONST(void *, zero_region), len, uio));
+ return (uiomove(__DECONST(void *, zero_region), tlen, uio));
}
/*
diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c
index c7e602e..0bfcf2d 100644
--- a/sys/kern/vfs_aio.c
+++ b/sys/kern/vfs_aio.c
@@ -59,10 +59,12 @@ __FBSDID("$FreeBSD$");
#include <sys/conf.h>
#include <sys/event.h>
#include <sys/mount.h>
+#include <geom/geom.h>
#include <machine/atomic.h>
#include <vm/vm.h>
+#include <vm/vm_page.h>
#include <vm/vm_extern.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
@@ -232,9 +234,10 @@ struct aiocblist {
int jobstate; /* (b) job state */
int inputcharge; /* (*) input blockes */
int outputcharge; /* (*) output blockes */
- struct buf *bp; /* (*) private to BIO backend,
- * buffer pointer
- */
+ struct bio *bp; /* (*) BIO backend BIO pointer */
+ struct buf *pbuf; /* (*) BIO backend buffer pointer */
+ struct vm_page *pages[btoc(MAXPHYS)+1]; /* BIO backend pages */
+ int npages; /* BIO backend number of pages */
struct proc *userproc; /* (*) user process */
struct ucred *cred; /* (*) active credential when created */
struct file *fd_file; /* (*) pointer to file structure */
@@ -243,7 +246,6 @@ struct aiocblist {
struct knlist klist; /* (a) list of knotes */
struct aiocb uaiocb; /* (*) kernel I/O control block */
ksiginfo_t ksi; /* (a) realtime signal info */
- struct task biotask; /* (*) private to BIO backend */
uint64_t seqno; /* (*) job number */
int pending; /* (a) number of pending I/O, aio_fsync only */
};
@@ -344,11 +346,10 @@ static void aio_process_mlock(struct aiocblist *aiocbe);
static int aio_newproc(int *);
int aio_aqueue(struct thread *td, struct aiocb *job,
struct aioliojob *lio, int type, struct aiocb_ops *ops);
-static void aio_physwakeup(struct buf *bp);
+static void aio_physwakeup(struct bio *bp);
static void aio_proc_rundown(void *arg, struct proc *p);
static void aio_proc_rundown_exec(void *arg, struct proc *p, struct image_params *imgp);
static int aio_qphysio(struct proc *p, struct aiocblist *iocb);
-static void biohelper(void *, int);
static void aio_daemon(void *param);
static void aio_swake_cb(struct socket *, struct sockbuf *);
static int aio_unload(void);
@@ -1294,13 +1295,15 @@ aio_qphysio(struct proc *p, struct aiocblist *aiocbe)
{
struct aiocb *cb;
struct file *fp;
- struct buf *bp;
+ struct bio *bp;
+ struct buf *pbuf;
struct vnode *vp;
struct cdevsw *csw;
struct cdev *dev;
struct kaioinfo *ki;
struct aioliojob *lj;
- int error, ref;
+ int error, ref, unmap, poff;
+ vm_prot_t prot;
cb = &aiocbe->uaiocb;
fp = aiocbe->fd_file;
@@ -1309,107 +1312,121 @@ aio_qphysio(struct proc *p, struct aiocblist *aiocbe)
return (-1);
vp = fp->f_vnode;
-
- /*
- * If its not a disk, we don't want to return a positive error.
- * It causes the aio code to not fall through to try the thread
- * way when you're talking to a regular file.
- */
- if (!vn_isdisk(vp, &error)) {
- if (error == ENOTBLK)
- return (-1);
- else
- return (error);
- }
-
- if (vp->v_bufobj.bo_bsize == 0)
- return (-1);
-
- if (cb->aio_nbytes % vp->v_bufobj.bo_bsize)
+ if (vp->v_type != VCHR)
return (-1);
-
- if (cb->aio_nbytes >
- MAXPHYS - (((vm_offset_t) cb->aio_buf) & PAGE_MASK))
+ if (vp->v_bufobj.bo_bsize == 0)
return (-1);
-
- ki = p->p_aioinfo;
- if (ki->kaio_buffer_count >= ki->kaio_ballowed_count)
+ if (cb->aio_nbytes % vp->v_bufobj.bo_bsize)
return (-1);
ref = 0;
csw = devvn_refthread(vp, &dev, &ref);
if (csw == NULL)
return (ENXIO);
+
+ if ((csw->d_flags & D_DISK) == 0) {
+ error = -1;
+ goto unref;
+ }
if (cb->aio_nbytes > dev->si_iosize_max) {
error = -1;
goto unref;
}
- /* Create and build a buffer header for a transfer. */
- bp = (struct buf *)getpbuf(NULL);
- BUF_KERNPROC(bp);
+ ki = p->p_aioinfo;
+ poff = (vm_offset_t)cb->aio_buf & PAGE_MASK;
+ unmap = ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed);
+ if (unmap) {
+ if (cb->aio_nbytes > MAXPHYS) {
+ error = -1;
+ goto unref;
+ }
+ } else {
+ if (cb->aio_nbytes > MAXPHYS - poff) {
+ error = -1;
+ goto unref;
+ }
+ if (ki->kaio_buffer_count >= ki->kaio_ballowed_count) {
+ error = -1;
+ goto unref;
+ }
+ }
+ aiocbe->bp = bp = g_alloc_bio();
+ if (!unmap) {
+ aiocbe->pbuf = pbuf = (struct buf *)getpbuf(NULL);
+ BUF_KERNPROC(pbuf);
+ }
AIO_LOCK(ki);
ki->kaio_count++;
- ki->kaio_buffer_count++;
+ if (!unmap)
+ ki->kaio_buffer_count++;
lj = aiocbe->lio;
if (lj)
lj->lioj_count++;
- AIO_UNLOCK(ki);
-
- /*
- * Get a copy of the kva from the physical buffer.
- */
- error = 0;
-
- bp->b_bcount = cb->aio_nbytes;
- bp->b_bufsize = cb->aio_nbytes;
- bp->b_iodone = aio_physwakeup;
- bp->b_saveaddr = bp->b_data;
- bp->b_data = (void *)(uintptr_t)cb->aio_buf;
- bp->b_offset = cb->aio_offset;
- bp->b_iooffset = cb->aio_offset;
- bp->b_blkno = btodb(cb->aio_offset);
- bp->b_iocmd = cb->aio_lio_opcode == LIO_WRITE ? BIO_WRITE : BIO_READ;
-
- /*
- * Bring buffer into kernel space.
- */
- if (vmapbuf(bp, (dev->si_flags & SI_UNMAPPED) == 0) < 0) {
- error = EFAULT;
- goto doerror;
- }
-
- AIO_LOCK(ki);
- aiocbe->bp = bp;
- bp->b_caller1 = (void *)aiocbe;
TAILQ_INSERT_TAIL(&ki->kaio_bufqueue, aiocbe, plist);
TAILQ_INSERT_TAIL(&ki->kaio_all, aiocbe, allist);
aiocbe->jobstate = JOBST_JOBQBUF;
cb->_aiocb_private.status = cb->aio_nbytes;
AIO_UNLOCK(ki);
- atomic_add_int(&num_queue_count, 1);
- atomic_add_int(&num_buf_aio, 1);
-
- bp->b_error = 0;
+ bp->bio_length = cb->aio_nbytes;
+ bp->bio_bcount = cb->aio_nbytes;
+ bp->bio_done = aio_physwakeup;
+ bp->bio_data = (void *)(uintptr_t)cb->aio_buf;
+ bp->bio_offset = cb->aio_offset;
+ bp->bio_cmd = cb->aio_lio_opcode == LIO_WRITE ? BIO_WRITE : BIO_READ;
+ bp->bio_dev = dev;
+ bp->bio_caller1 = (void *)aiocbe;
+
+ prot = VM_PROT_READ;
+ if (cb->aio_lio_opcode == LIO_READ)
+ prot |= VM_PROT_WRITE; /* Less backwards than it looks */
+ if ((aiocbe->npages = vm_fault_quick_hold_pages(
+ &curproc->p_vmspace->vm_map,
+ (vm_offset_t)bp->bio_data, bp->bio_length, prot, aiocbe->pages,
+ sizeof(aiocbe->pages)/sizeof(aiocbe->pages[0]))) < 0) {
+ error = EFAULT;
+ goto doerror;
+ }
+ if (!unmap) {
+ pmap_qenter((vm_offset_t)pbuf->b_data,
+ aiocbe->pages, aiocbe->npages);
+ bp->bio_data = pbuf->b_data + poff;
+ } else {
+ bp->bio_ma = aiocbe->pages;
+ bp->bio_ma_n = aiocbe->npages;
+ bp->bio_ma_offset = poff;
+ bp->bio_data = unmapped_buf;
+ bp->bio_flags |= BIO_UNMAPPED;
+ }
- TASK_INIT(&aiocbe->biotask, 0, biohelper, aiocbe);
+ atomic_add_int(&num_queue_count, 1);
+ if (!unmap)
+ atomic_add_int(&num_buf_aio, 1);
/* Perform transfer. */
- dev_strategy_csw(dev, csw, bp);
+ csw->d_strategy(bp);
dev_relthread(dev, ref);
return (0);
doerror:
AIO_LOCK(ki);
+ aiocbe->jobstate = JOBST_NULL;
+ TAILQ_REMOVE(&ki->kaio_bufqueue, aiocbe, plist);
+ TAILQ_REMOVE(&ki->kaio_all, aiocbe, allist);
ki->kaio_count--;
- ki->kaio_buffer_count--;
+ if (!unmap)
+ ki->kaio_buffer_count--;
if (lj)
lj->lioj_count--;
- aiocbe->bp = NULL;
AIO_UNLOCK(ki);
- relpbuf(bp, NULL);
+ if (pbuf) {
+ relpbuf(pbuf, NULL);
+ aiocbe->pbuf = NULL;
+ }
+ g_destroy_bio(bp);
+ aiocbe->bp = NULL;
unref:
dev_relthread(dev, ref);
return (error);
@@ -1787,8 +1804,6 @@ no_kqueue:
}
#endif
queueit:
- /* No buffer for daemon I/O. */
- aiocbe->bp = NULL;
atomic_add_int(&num_queue_count, 1);
AIO_LOCK(ki);
@@ -2425,54 +2440,43 @@ sys_lio_listio(struct thread *td, struct lio_listio_args *uap)
return (error);
}
-/*
- * Called from interrupt thread for physio, we should return as fast
- * as possible, so we schedule a biohelper task.
- */
static void
-aio_physwakeup(struct buf *bp)
+aio_physwakeup(struct bio *bp)
{
- struct aiocblist *aiocbe;
-
- aiocbe = (struct aiocblist *)bp->b_caller1;
- taskqueue_enqueue(taskqueue_aiod_bio, &aiocbe->biotask);
-}
-
-/*
- * Task routine to perform heavy tasks, process wakeup, and signals.
- */
-static void
-biohelper(void *context, int pending)
-{
- struct aiocblist *aiocbe = context;
- struct buf *bp;
+ struct aiocblist *aiocbe = (struct aiocblist *)bp->bio_caller1;
struct proc *userp;
struct kaioinfo *ki;
int nblks;
+ /* Release mapping into kernel space. */
+ if (aiocbe->pbuf) {
+ pmap_qremove((vm_offset_t)aiocbe->pbuf->b_data, aiocbe->npages);
+ relpbuf(aiocbe->pbuf, NULL);
+ aiocbe->pbuf = NULL;
+ atomic_subtract_int(&num_buf_aio, 1);
+ }
+ vm_page_unhold_pages(aiocbe->pages, aiocbe->npages);
+
bp = aiocbe->bp;
+ aiocbe->bp = NULL;
userp = aiocbe->userproc;
ki = userp->p_aioinfo;
AIO_LOCK(ki);
- aiocbe->uaiocb._aiocb_private.status -= bp->b_resid;
+ aiocbe->uaiocb._aiocb_private.status -= bp->bio_resid;
aiocbe->uaiocb._aiocb_private.error = 0;
- if (bp->b_ioflags & BIO_ERROR)
- aiocbe->uaiocb._aiocb_private.error = bp->b_error;
+ if (bp->bio_flags & BIO_ERROR)
+ aiocbe->uaiocb._aiocb_private.error = bp->bio_error;
nblks = btodb(aiocbe->uaiocb.aio_nbytes);
if (aiocbe->uaiocb.aio_lio_opcode == LIO_WRITE)
aiocbe->outputcharge += nblks;
else
aiocbe->inputcharge += nblks;
- aiocbe->bp = NULL;
TAILQ_REMOVE(&userp->p_aioinfo->kaio_bufqueue, aiocbe, plist);
ki->kaio_buffer_count--;
aio_bio_done_notify(userp, aiocbe, DONE_BUF);
AIO_UNLOCK(ki);
- /* Release mapping into kernel space. */
- vunmapbuf(bp);
- relpbuf(bp, NULL);
- atomic_subtract_int(&num_buf_aio, 1);
+ g_destroy_bio(bp);
}
/* syscall - wait for the next completion of an aio request */
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index dfe2997..5ac04ac 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -113,8 +113,8 @@ static void vfs_setdirty_locked_object(struct buf *bp);
static void vfs_vmio_release(struct buf *bp);
static int vfs_bio_clcheck(struct vnode *vp, int size,
daddr_t lblkno, daddr_t blkno);
-static int buf_flush(int);
-static int flushbufqueues(int, int);
+static int buf_flush(struct vnode *vp, int);
+static int flushbufqueues(struct vnode *, int, int);
static void buf_daemon(void);
static void bremfreel(struct buf *bp);
static __inline void bd_wakeup(void);
@@ -805,6 +805,7 @@ bufinit(void)
struct buf *bp;
int i;
+ CTASSERT(MAXBCACHEBUF >= MAXBSIZE);
mtx_init(&bqclean, "bufq clean lock", NULL, MTX_DEF);
mtx_init(&bqdirty, "bufq dirty lock", NULL, MTX_DEF);
mtx_init(&rbreqlock, "runningbufspace lock", NULL, MTX_DEF);
@@ -846,8 +847,8 @@ bufinit(void)
* by the system.
*/
maxbufspace = (long)nbuf * BKVASIZE;
- hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - MAXBSIZE * 10);
- lobufspace = hibufspace - MAXBSIZE;
+ hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - MAXBCACHEBUF * 10);
+ lobufspace = hibufspace - MAXBCACHEBUF;
/*
* Note: The 16 MiB upper limit for hirunningspace was chosen
@@ -857,9 +858,9 @@ bufinit(void)
* The lower 1 MiB limit is the historical upper limit for
* hirunningspace.
*/
- hirunningspace = lmax(lmin(roundup(hibufspace / 64, MAXBSIZE),
+ hirunningspace = lmax(lmin(roundup(hibufspace / 64, MAXBCACHEBUF),
16 * 1024 * 1024), 1024 * 1024);
- lorunningspace = roundup((hirunningspace * 2) / 3, MAXBSIZE);
+ lorunningspace = roundup((hirunningspace * 2) / 3, MAXBCACHEBUF);
/*
* Limit the amount of malloc memory since it is wired permanently into
@@ -2096,7 +2097,7 @@ getnewbuf_bufd_help(struct vnode *vp, int gbflags, int slpflag, int slptimeo,
{
struct thread *td;
char *waitmsg;
- int cnt, error, flags, norunbuf, wait;
+ int error, fl, flags, norunbuf;
mtx_assert(&bqclean, MA_OWNED);
@@ -2118,8 +2119,6 @@ getnewbuf_bufd_help(struct vnode *vp, int gbflags, int slpflag, int slptimeo,
return;
td = curthread;
- cnt = 0;
- wait = MNT_NOWAIT;
rw_wlock(&nblock);
while ((needsbuffer & flags) != 0) {
if (vp != NULL && vp->v_type != VCHR &&
@@ -2133,20 +2132,23 @@ getnewbuf_bufd_help(struct vnode *vp, int gbflags, int slpflag, int slptimeo,
* cannot be achieved by the buf_daemon, that
* cannot lock the vnode.
*/
- if (cnt++ > 2)
- wait = MNT_WAIT;
- ASSERT_VOP_LOCKED(vp, "bufd_helper");
- error = VOP_ISLOCKED(vp) == LK_EXCLUSIVE ? 0 :
- vn_lock(vp, LK_TRYUPGRADE);
- if (error == 0) {
- /* play bufdaemon */
- norunbuf = curthread_pflags_set(TDP_BUFNEED |
- TDP_NORUNNINGBUF);
- VOP_FSYNC(vp, wait, td);
- atomic_add_long(&notbufdflushes, 1);
- curthread_pflags_restore(norunbuf);
- }
+ norunbuf = ~(TDP_BUFNEED | TDP_NORUNNINGBUF) |
+ (td->td_pflags & TDP_NORUNNINGBUF);
+
+ /*
+ * Play bufdaemon. The getnewbuf() function
+ * may be called while the thread owns lock
+ * for another dirty buffer for the same
+ * vnode, which makes it impossible to use
+ * VOP_FSYNC() there, due to the buffer lock
+ * recursion.
+ */
+ td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF;
+ fl = buf_flush(vp, flushbufqtarget);
+ td->td_pflags &= norunbuf;
rw_wlock(&nblock);
+ if (fl != 0)
+ continue;
if ((needsbuffer & flags) == 0)
break;
}
@@ -2565,18 +2567,20 @@ static struct kproc_desc buf_kp = {
SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp);
static int
-buf_flush(int target)
+buf_flush(struct vnode *vp, int target)
{
int flushed;
- flushed = flushbufqueues(target, 0);
+ flushed = flushbufqueues(vp, target, 0);
if (flushed == 0) {
/*
* Could not find any buffers without rollback
* dependencies, so just write the first one
* in the hopes of eventually making progress.
*/
- flushed = flushbufqueues(target, 1);
+ if (vp != NULL && target > 2)
+ target /= 2;
+ flushbufqueues(vp, target, 1);
}
return (flushed);
}
@@ -2613,7 +2617,7 @@ buf_daemon()
* the I/O system.
*/
while (numdirtybuffers > lodirty) {
- if (buf_flush(numdirtybuffers - lodirty) == 0)
+ if (buf_flush(NULL, numdirtybuffers - lodirty) == 0)
break;
kern_yield(PRI_USER);
}
@@ -2668,7 +2672,7 @@ SYSCTL_INT(_vfs, OID_AUTO, flushwithdeps, CTLFLAG_RW, &flushwithdeps,
0, "Number of buffers flushed with dependecies that require rollbacks");
static int
-flushbufqueues(int target, int flushdeps)
+flushbufqueues(struct vnode *lvp, int target, int flushdeps)
{
struct buf *sentinel;
struct vnode *vp;
@@ -2678,6 +2682,7 @@ flushbufqueues(int target, int flushdeps)
int flushed;
int queue;
int error;
+ bool unlock;
flushed = 0;
queue = QUEUE_DIRTY;
@@ -2699,8 +2704,18 @@ flushbufqueues(int target, int flushdeps)
mtx_unlock(&bqdirty);
break;
}
- KASSERT(bp->b_qindex != QUEUE_SENTINEL,
- ("parallel calls to flushbufqueues() bp %p", bp));
+ /*
+ * Skip sentinels inserted by other invocations of the
+ * flushbufqueues(), taking care to not reorder them.
+ *
+ * Only flush the buffers that belong to the
+ * vnode locked by the curthread.
+ */
+ if (bp->b_qindex == QUEUE_SENTINEL || (lvp != NULL &&
+ bp->b_vp != lvp)) {
+ mtx_unlock(&bqdirty);
+ continue;
+ }
error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL);
mtx_unlock(&bqdirty);
if (error != 0)
@@ -2748,16 +2763,37 @@ flushbufqueues(int target, int flushdeps)
BUF_UNLOCK(bp);
continue;
}
- error = vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT);
+ if (lvp == NULL) {
+ unlock = true;
+ error = vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT);
+ } else {
+ ASSERT_VOP_LOCKED(vp, "getbuf");
+ unlock = false;
+ error = VOP_ISLOCKED(vp) == LK_EXCLUSIVE ? 0 :
+ vn_lock(vp, LK_TRYUPGRADE);
+ }
if (error == 0) {
CTR3(KTR_BUF, "flushbufqueue(%p) vp %p flags %X",
bp, bp->b_vp, bp->b_flags);
- vfs_bio_awrite(bp);
+ if (curproc == bufdaemonproc) {
+ vfs_bio_awrite(bp);
+ } else {
+ bremfree(bp);
+ bwrite(bp);
+ notbufdflushes++;
+ }
vn_finished_write(mp);
- VOP_UNLOCK(vp, 0);
+ if (unlock)
+ VOP_UNLOCK(vp, 0);
flushwithdeps += hasdeps;
flushed++;
- if (runningbufspace > hirunningspace)
+
+ /*
+ * Sleeping on runningbufspace while holding
+ * vnode lock leads to deadlock.
+ */
+ if (curproc == bufdaemonproc &&
+ runningbufspace > hirunningspace)
waitrunningbufspace();
continue;
}
@@ -3073,8 +3109,9 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
KASSERT((flags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC,
("GB_KVAALLOC only makes sense with GB_UNMAPPED"));
ASSERT_VOP_LOCKED(vp, "getblk");
- if (size > MAXBSIZE)
- panic("getblk: size(%d) > MAXBSIZE(%d)\n", size, MAXBSIZE);
+ if (size > MAXBCACHEBUF)
+ panic("getblk: size(%d) > MAXBCACHEBUF(%d)\n", size,
+ MAXBCACHEBUF);
if (!unmapped_buf_allowed)
flags &= ~(GB_UNMAPPED | GB_KVAALLOC);
diff --git a/sys/net/zlib.c b/sys/libkern/zlib.c
index b348248..f5c3854 100644
--- a/sys/net/zlib.c
+++ b/sys/libkern/zlib.c
@@ -54,7 +54,7 @@
#define _Z_UTIL_H
#ifdef _KERNEL
-#include <net/zlib.h>
+#include <sys/zlib.h>
#else
#include "zlib.h"
#endif
diff --git a/sys/mips/mips/busdma_machdep.c b/sys/mips/mips/busdma_machdep.c
index 083e8f5..90ec399 100644
--- a/sys/mips/mips/busdma_machdep.c
+++ b/sys/mips/mips/busdma_machdep.c
@@ -1359,8 +1359,8 @@ add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr,
if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) {
/* Page offset needs to be preserved. */
- bpage->vaddr |= vaddr & PAGE_MASK;
- bpage->busaddr |= vaddr & PAGE_MASK;
+ bpage->vaddr |= addr & PAGE_MASK;
+ bpage->busaddr |= addr & PAGE_MASK;
}
bpage->datavaddr = vaddr;
bpage->dataaddr = addr;
diff --git a/sys/modules/ix/Makefile b/sys/modules/ix/Makefile
index 5a5485d..1f30cb0 100644
--- a/sys/modules/ix/Makefile
+++ b/sys/modules/ix/Makefile
@@ -9,7 +9,7 @@ SRCS += if_ix.c ix_txrx.c
# Shared source
SRCS += ixgbe_common.c ixgbe_api.c ixgbe_phy.c ixgbe_mbx.c ixgbe_vf.c
SRCS += ixgbe_dcb.c ixgbe_dcb_82598.c ixgbe_dcb_82599.c
-SRCS += ixgbe_82599.c ixgbe_82598.c ixgbe_x540.c
+SRCS += ixgbe_82598.c ixgbe_82599.c ixgbe_x540.c ixgbe_x550.c
CFLAGS+= -I${.CURDIR}/../../dev/ixgbe -DSMP
.include <bsd.kmod.mk>
diff --git a/sys/modules/ixv/Makefile b/sys/modules/ixv/Makefile
index 20ecaf1..f8ce347 100644
--- a/sys/modules/ixv/Makefile
+++ b/sys/modules/ixv/Makefile
@@ -9,7 +9,7 @@ SRCS += if_ixv.c ix_txrx.c
# Shared source
SRCS += ixgbe_common.c ixgbe_api.c ixgbe_phy.c ixgbe_mbx.c ixgbe_vf.c
SRCS += ixgbe_dcb.c ixgbe_dcb_82598.c ixgbe_dcb_82599.c
-SRCS += ixgbe_82599.c ixgbe_82598.c ixgbe_x540.c
+SRCS += ixgbe_82598.c ixgbe_82599.c ixgbe_x540.c ixgbe_x550.c
CFLAGS+= -I${.CURDIR}/../../dev/ixgbe -DSMP
.include <bsd.kmod.mk>
diff --git a/sys/modules/oce/Makefile b/sys/modules/oce/Makefile
index 4821533..95e828d 100644
--- a/sys/modules/oce/Makefile
+++ b/sys/modules/oce/Makefile
@@ -3,7 +3,7 @@
#
.PATH: ${.CURDIR}/../../dev/oce
-KMOD = oce
+KMOD = if_oce
SRCS = oce_if.c oce_hw.c oce_mbox.c oce_util.c oce_queue.c oce_sysctl.c
SRCS += bus_if.h device_if.h pci_if.h opt_inet.h opt_inet6.h
diff --git a/sys/modules/zlib/Makefile b/sys/modules/zlib/Makefile
index 0a475b5..235d1c7 100644
--- a/sys/modules/zlib/Makefile
+++ b/sys/modules/zlib/Makefile
@@ -1,6 +1,6 @@
# $FreeBSD$
-.PATH: ${.CURDIR}/../../net
+.PATH: ${.CURDIR}/../../libkern
KMOD= zlib
SRCS= zlib.c
diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c
index 0753e0a..462c907 100644
--- a/sys/net/if_vlan.c
+++ b/sys/net/if_vlan.c
@@ -1705,27 +1705,6 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
}
break;
- case SIOCSIFCAP:
- VLAN_LOCK();
- if (TRUNK(ifv) != NULL) {
- p = PARENT(ifv);
- VLAN_UNLOCK();
- if ((p->if_type != IFT_ETHER) &&
- (ifr->ifr_reqcap & IFCAP_VLAN_HWTAGGING) == 0) {
- error = EINVAL;
- break;
- }
- error = (*p->if_ioctl)(p, cmd, data);
- if (error)
- break;
- /* Propogate vlan interface capabilities */
- vlan_trunk_capabilities(p);
- } else {
- VLAN_UNLOCK();
- error = EINVAL;
- }
- break;
-
default:
error = EINVAL;
break;
diff --git a/sys/net/netisr.c b/sys/net/netisr.c
index 178c3cb..4b3576d 100644
--- a/sys/net/netisr.c
+++ b/sys/net/netisr.c
@@ -156,10 +156,13 @@ SYSCTL_PROC(_net_isr, OID_AUTO, dispatch, CTLTYPE_STRING | CTLFLAG_RWTUN,
/*
* Allow the administrator to limit the number of threads (CPUs) to use for
* netisr. We don't check netisr_maxthreads before creating the thread for
- * CPU 0, so in practice we ignore values <= 1. This must be set at boot.
- * We will create at most one thread per CPU.
+ * CPU 0. This must be set at boot. We will create at most one thread per CPU.
+ * By default we initialize this to 1 which would assign just 1 cpu (cpu0) and
+ * therefore only 1 workstream. If set to -1, netisr would use all cpus
+ * (mp_ncpus) and therefore would have those many workstreams. One workstream
+ * per thread (CPU).
*/
-static int netisr_maxthreads = -1; /* Max number of threads. */
+static int netisr_maxthreads = 1; /* Max number of threads. */
SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RDTUN,
&netisr_maxthreads, 0,
"Use at most this many CPUs for netisr processing");
@@ -1120,8 +1123,10 @@ netisr_init(void *arg)
KASSERT(curcpu == 0, ("%s: not on CPU 0", __func__));
NETISR_LOCK_INIT();
- if (netisr_maxthreads < 1)
- netisr_maxthreads = 1;
+ if (netisr_maxthreads == 0 || netisr_maxthreads < -1 )
+ netisr_maxthreads = 1; /* default behavior */
+ else if (netisr_maxthreads == -1)
+ netisr_maxthreads = mp_ncpus; /* use max cpus */
if (netisr_maxthreads > mp_ncpus) {
printf("netisr_init: forcing maxthreads from %d to %d\n",
netisr_maxthreads, mp_ncpus);
diff --git a/sys/netgraph/ng_deflate.c b/sys/netgraph/ng_deflate.c
index da68e49..be52942 100644
--- a/sys/netgraph/ng_deflate.c
+++ b/sys/netgraph/ng_deflate.c
@@ -39,8 +39,7 @@
#include <sys/endian.h>
#include <sys/errno.h>
#include <sys/syslog.h>
-
-#include <net/zlib.h>
+#include <sys/zlib.h>
#include <netgraph/ng_message.h>
#include <netgraph/netgraph.h>
diff --git a/sys/netinet/in_kdtrace.c b/sys/netinet/in_kdtrace.c
index d37e3c0..edcc853 100644
--- a/sys/netinet/in_kdtrace.c
+++ b/sys/netinet/in_kdtrace.c
@@ -102,6 +102,9 @@ SDT_PROBE_DEFINE5_XLATE(tcp, , , send,
"struct tcpcb *", "tcpsinfo_t *" ,
"struct tcphdr *", "tcpinfo_t *");
+SDT_PROBE_DEFINE1_XLATE(tcp, , , siftr,
+ "struct pkt_node *", "siftrinfo_t *");
+
SDT_PROBE_DEFINE6_XLATE(tcp, , , state__change,
"void *", "void *",
"struct tcpcb *", "csinfo_t *",
diff --git a/sys/netinet/in_kdtrace.h b/sys/netinet/in_kdtrace.h
index 07b3e3a..c0511d0 100644
--- a/sys/netinet/in_kdtrace.h
+++ b/sys/netinet/in_kdtrace.h
@@ -32,6 +32,8 @@
SDT_PROBE6(ip, , , probe, arg0, arg1, arg2, arg3, arg4, arg5)
#define UDP_PROBE(probe, arg0, arg1, arg2, arg3, arg4) \
SDT_PROBE5(udp, , , probe, arg0, arg1, arg2, arg3, arg4)
+#define TCP_PROBE1(probe, arg0) \
+ SDT_PROBE1(tcp, , , probe, arg0)
#define TCP_PROBE5(probe, arg0, arg1, arg2, arg3, arg4) \
SDT_PROBE5(tcp, , , probe, arg0, arg1, arg2, arg3, arg4)
#define TCP_PROBE6(probe, arg0, arg1, arg2, arg3, arg4, arg5) \
@@ -51,6 +53,7 @@ SDT_PROBE_DECLARE(tcp, , , connect__refused);
SDT_PROBE_DECLARE(tcp, , , connect__request);
SDT_PROBE_DECLARE(tcp, , , receive);
SDT_PROBE_DECLARE(tcp, , , send);
+SDT_PROBE_DECLARE(tcp, , , siftr);
SDT_PROBE_DECLARE(tcp, , , state__change);
SDT_PROBE_DECLARE(udp, , , receive);
diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h
index 9c53793..d1764bb 100644
--- a/sys/netinet/ip_fw.h
+++ b/sys/netinet/ip_fw.h
@@ -40,10 +40,12 @@
#define IPFW_MAX_SETS 32 /* Number of sets supported by ipfw*/
/*
- * Default number of ipfw tables.
+ * Compat values for old clients
*/
+#ifndef _KERNEL
#define IPFW_TABLES_MAX 65535
#define IPFW_TABLES_DEFAULT 128
+#endif
/*
* Most commands (queue, pipe, tag, untag, limit...) can have a 16-bit
@@ -963,7 +965,6 @@ typedef struct _ipfw_ta_info {
uint64_t spare1;
} ipfw_ta_info;
-#define IPFW_OBJTYPE_TABLE 1
typedef struct _ipfw_obj_header {
ip_fw3_opheader opheader; /* IP_FW3 opcode */
uint32_t spare;
diff --git a/sys/netinet/ip_ipsec.c b/sys/netinet/ip_ipsec.c
index 1a643fb..47f683d 100644
--- a/sys/netinet/ip_ipsec.c
+++ b/sys/netinet/ip_ipsec.c
@@ -199,6 +199,9 @@ ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *error)
/* NB: callee frees mbuf */
*error = ipsec4_process_packet(*m, sp->req);
+ /* Release SP if an error occured */
+ if (*error != 0)
+ KEY_FREESP(&sp);
if (*error == EJUSTRETURN) {
/*
* We had a SP with a level of 'use' and no SA. We
@@ -234,13 +237,9 @@ ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *error)
/* No IPsec processing for this packet. */
}
done:
- if (sp != NULL)
- KEY_FREESP(&sp);
- return 0;
+ return (0);
reinjected:
- if (sp != NULL)
- KEY_FREESP(&sp);
- return -1;
+ return (-1);
bad:
if (sp != NULL)
KEY_FREESP(&sp);
diff --git a/sys/netinet/libalias/libalias.3 b/sys/netinet/libalias/libalias.3
index 45e7fa4..543420b 100644
--- a/sys/netinet/libalias/libalias.3
+++ b/sys/netinet/libalias/libalias.3
@@ -173,10 +173,12 @@ Mainly useful for debugging when the log file is viewed continuously with
.It Dv PKT_ALIAS_DENY_INCOMING
If this mode bit is set, all incoming packets associated with new TCP
connections or new UDP transactions will be marked for being ignored
-.Fn ( LibAliasIn
+.Po
+.Fn LibAliasIn
returns
.Dv PKT_ALIAS_IGNORED
-code)
+code
+.Pc
by the calling program.
Response packets to connections or transactions initiated from the packet
aliasing host or local network will be unaffected.
@@ -1001,7 +1003,7 @@ If this results in a conflict, then port numbers are randomly chosen until
a unique aliasing link can be established.
In an alternate operating mode, the first choice of an aliasing port is also
random and unrelated to the local port number.
-.Sh MODULAR ARCHITECTURE (AND Xr ipfw 4 Sh SUPPORT)
+.Sh MODULAR ARCHITECTURE Po AND Xr ipfw 4 SUPPORT Pc
One of the latest improvements to
.Nm
was to make its support
diff --git a/sys/netinet/sctp_indata.c b/sys/netinet/sctp_indata.c
index ea3d3e7..d3b4855 100644
--- a/sys/netinet/sctp_indata.c
+++ b/sys/netinet/sctp_indata.c
@@ -3614,24 +3614,17 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack,
send_s = asoc->sending_seq;
}
if (SCTP_TSN_GE(cumack, send_s)) {
-#ifndef INVARIANTS
struct mbuf *op_err;
char msg[SCTP_DIAG_INFO_LEN];
-#endif
-#ifdef INVARIANTS
- panic("Impossible sack 1");
-#else
-
*abort_now = 1;
/* XXX */
- snprintf(msg, sizeof(msg), "Cum ack %8.8x greater or equal then TSN %8.8x",
+ snprintf(msg, sizeof(msg), "Cum ack %8.8x greater or equal than TSN %8.8x",
cumack, send_s);
op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_25;
sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
return;
-#endif
}
}
asoc->this_sack_highest_gap = cumack;
@@ -4195,7 +4188,7 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup,
hopeless_peer:
*abort_now = 1;
/* XXX */
- snprintf(msg, sizeof(msg), "Cum ack %8.8x greater or equal then TSN %8.8x",
+ snprintf(msg, sizeof(msg), "Cum ack %8.8x greater or equal than TSN %8.8x",
cum_ack, send_s);
op_err = sctp_generate_cause(SCTP_CAUSE_PROTOCOL_VIOLATION, msg);
stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_25;
diff --git a/sys/netinet/siftr.c b/sys/netinet/siftr.c
index 2cd58ad..6b31d7b 100644
--- a/sys/netinet/siftr.c
+++ b/sys/netinet/siftr.c
@@ -75,6 +75,7 @@ __FBSDID("$FreeBSD$");
#include <sys/pcpu.h>
#include <sys/proc.h>
#include <sys/sbuf.h>
+#include <sys/sdt.h>
#include <sys/smp.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@@ -86,6 +87,7 @@ __FBSDID("$FreeBSD$");
#include <net/pfil.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
@@ -811,6 +813,8 @@ siftr_siftdata(struct pkt_node *pn, struct inpcb *inp, struct tcpcb *tp,
* maximum pps throughput processing when SIFTR is loaded and enabled.
*/
microtime(&pn->tval);
+ TCP_PROBE1(siftr, &pn);
+
}
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index d7da6c6..d75221c 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -2159,6 +2159,7 @@ tcp_signature_do_compute(struct mbuf *m, int len, int optlen,
break;
#endif
default:
+ KEY_FREESAV(&sav);
return (-1);
/* NOTREACHED */
break;
diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c
index da48615..1ba74d8 100644
--- a/sys/netinet/tcp_timewait.c
+++ b/sys/netinet/tcp_timewait.c
@@ -251,6 +251,13 @@ tcp_twstart(struct tcpcb *tp)
}
}
+
+ /*
+ * For use only by DTrace. We do not reference the state
+ * after this point so modifying it in place is not a problem.
+ */
+ tcp_state_change(tp, TCPS_TIME_WAIT);
+
tw = uma_zalloc(V_tcptw_zone, M_NOWAIT);
if (tw == NULL) {
/*
diff --git a/sys/netinet6/ip6_forward.c b/sys/netinet6/ip6_forward.c
index 08234d5..83e58c1 100644
--- a/sys/netinet6/ip6_forward.c
+++ b/sys/netinet6/ip6_forward.c
@@ -248,8 +248,7 @@ ip6_forward(struct mbuf *m, int srcrt)
/*
* when the kernel forwards a packet, it is not proper to apply
- * IPsec transport mode to the packet is not proper. this check
- * avoid from this.
+ * IPsec transport mode to the packet. This check avoid from this.
* at present, if there is even a transport mode SA request in the
* security policy, the kernel does not apply IPsec to the packet.
* this check is not enough because the following case is valid.
@@ -283,9 +282,9 @@ ip6_forward(struct mbuf *m, int srcrt)
* ipsec6_proces_packet will send the packet using ip6_output
*/
error = ipsec6_process_packet(m, sp->req);
-
- KEY_FREESP(&sp);
-
+ /* Release SP if an error occured */
+ if (error != 0)
+ KEY_FREESP(&sp);
if (error == EJUSTRETURN) {
/*
* We had a SP with a level of 'use' and no SA. We
diff --git a/sys/netinet6/ip6_ipsec.c b/sys/netinet6/ip6_ipsec.c
index 0a416cd..e6e16ed 100644
--- a/sys/netinet6/ip6_ipsec.c
+++ b/sys/netinet6/ip6_ipsec.c
@@ -213,7 +213,9 @@ ip6_ipsec_output(struct mbuf **m, struct inpcb *inp, int *error)
/* NB: callee frees mbuf */
*error = ipsec6_process_packet(*m, sp->req);
-
+ /* Release SP if an error occured */
+ if (*error != 0)
+ KEY_FREESP(&sp);
if (*error == EJUSTRETURN) {
/*
* We had a SP with a level of 'use' and no SA. We
@@ -249,13 +251,9 @@ ip6_ipsec_output(struct mbuf **m, struct inpcb *inp, int *error)
/* No IPsec processing for this packet. */
}
done:
- if (sp != NULL)
- KEY_FREESP(&sp);
- return 0;
+ return (0);
reinjected:
- if (sp != NULL)
- KEY_FREESP(&sp);
- return -1;
+ return (-1);
bad:
if (sp != NULL)
KEY_FREESP(&sp);
diff --git a/sys/netipsec/ipsec.c b/sys/netipsec/ipsec.c
index abad3d2..266f6d0 100644
--- a/sys/netipsec/ipsec.c
+++ b/sys/netipsec/ipsec.c
@@ -238,6 +238,7 @@ SYSCTL_VNET_PCPUSTAT(_net_inet6_ipsec6, IPSECCTL_STATS, ipsecstats,
struct ipsecstat, ipsec6stat, "IPsec IPv6 statistics.");
#endif /* INET6 */
+static int ipsec_in_reject(struct secpolicy *, struct mbuf *);
static int ipsec_setspidx_inpcb(struct mbuf *, struct inpcb *);
static int ipsec_setspidx(struct mbuf *, struct secpolicyindex *, int);
static void ipsec4_get_ulp(struct mbuf *m, struct secpolicyindex *, int);
@@ -1191,7 +1192,7 @@ ipsec_get_reqlevel(struct ipsecrequest *isr)
* 0: valid
* 1: invalid
*/
-int
+static int
ipsec_in_reject(struct secpolicy *sp, struct mbuf *m)
{
struct ipsecrequest *isr;
diff --git a/sys/netipsec/ipsec.h b/sys/netipsec/ipsec.h
index 7f4c25e..442387a 100644
--- a/sys/netipsec/ipsec.h
+++ b/sys/netipsec/ipsec.h
@@ -309,7 +309,6 @@ struct inpcb;
extern int ipsec_init_policy(struct socket *so, struct inpcbpolicy **);
extern int ipsec_copy_policy(struct inpcbpolicy *, struct inpcbpolicy *);
extern u_int ipsec_get_reqlevel(struct ipsecrequest *);
-extern int ipsec_in_reject(struct secpolicy *, struct mbuf *);
extern int ipsec_set_policy(struct inpcb *inp, int optname,
caddr_t request, size_t len, struct ucred *cred);
diff --git a/sys/netipsec/ipsec_input.c b/sys/netipsec/ipsec_input.c
index 6c52781..5837699 100644
--- a/sys/netipsec/ipsec_input.c
+++ b/sys/netipsec/ipsec_input.c
@@ -391,6 +391,7 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip,
ipsec_bpf(m, sav, AF_INET, ENC_IN|ENC_BEFORE);
if ((error = ipsec_filter(&m, PFIL_IN, ENC_IN|ENC_BEFORE)) != 0)
return (error);
+ ip = mtod(m, struct ip *);
#endif /* DEV_ENC */
/* IP-in-IP encapsulation */
diff --git a/sys/netipsec/ipsec_output.c b/sys/netipsec/ipsec_output.c
index 583f4b0..ae36070 100644
--- a/sys/netipsec/ipsec_output.c
+++ b/sys/netipsec/ipsec_output.c
@@ -104,6 +104,7 @@ ipsec_process_done(struct mbuf *m, struct ipsecrequest *isr)
IPSEC_ASSERT(m != NULL, ("null mbuf"));
IPSEC_ASSERT(isr != NULL, ("null ISR"));
+ IPSEC_ASSERT(isr->sp != NULL, ("NULL isr->sp"));
sav = isr->sav;
IPSEC_ASSERT(sav != NULL, ("null SA"));
IPSEC_ASSERT(sav->sah != NULL, ("null SAH"));
@@ -163,6 +164,10 @@ ipsec_process_done(struct mbuf *m, struct ipsecrequest *isr)
* If this is a problem we'll need to introduce a queue
* to set the packet on so we can unwind the stack before
* doing further processing.
+ *
+ * If ipsec[46]_process_packet() will successfully queue
+ * the request, we need to take additional reference to SP,
+ * because xform callback will release reference.
*/
if (isr->next) {
/* XXX-BZ currently only support same AF bundles. */
@@ -170,7 +175,11 @@ ipsec_process_done(struct mbuf *m, struct ipsecrequest *isr)
#ifdef INET
case AF_INET:
IPSECSTAT_INC(ips_out_bundlesa);
- return ipsec4_process_packet(m, isr->next);
+ key_addref(isr->sp);
+ error = ipsec4_process_packet(m, isr->next);
+ if (error != 0)
+ KEY_FREESP(&isr->sp);
+ return (error);
/* NOTREACHED */
#endif
#ifdef notyet
@@ -178,7 +187,11 @@ ipsec_process_done(struct mbuf *m, struct ipsecrequest *isr)
case AF_INET6:
/* XXX */
IPSEC6STAT_INC(ips_out_bundlesa);
- return ipsec6_process_packet(m, isr->next);
+ key_addref(isr->sp);
+ error = ipsec6_process_packet(m, isr->next);
+ if (error != 0)
+ KEY_FREESP(&isr->sp);
+ return (error);
/* NOTREACHED */
#endif /* INET6 */
#endif
@@ -193,8 +206,7 @@ ipsec_process_done(struct mbuf *m, struct ipsecrequest *isr)
/*
* We're done with IPsec processing, transmit the packet using the
- * appropriate network protocol (IP or IPv6). SPD lookup will be
- * performed again there.
+ * appropriate network protocol (IP or IPv6).
*/
switch (saidx->dst.sa.sa_family) {
#ifdef INET
@@ -565,6 +577,7 @@ ipsec4_process_packet(struct mbuf *m, struct ipsecrequest *isr)
/* pass the mbuf to enc0 for packet filtering */
if ((error = ipsec_filter(&m, PFIL_OUT, ENC_OUT|ENC_BEFORE)) != 0)
goto bad;
+ ip = mtod(m, struct ip *);
#endif
/* Do the appropriate encapsulation, if necessary */
if (isr->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */
@@ -686,6 +699,7 @@ ipsec6_process_packet(struct mbuf *m, struct ipsecrequest *isr)
/* pass the mbuf to enc0 for packet filtering */
if ((error = ipsec_filter(&m, PFIL_OUT, ENC_OUT|ENC_BEFORE)) != 0)
goto bad;
+ ip6 = mtod(m, struct ip6_hdr *);
#endif /* DEV_ENC */
/* Do the appropriate encapsulation, if necessary */
diff --git a/sys/netipsec/xform_ah.c b/sys/netipsec/xform_ah.c
index cb257a5..8f791db 100644
--- a/sys/netipsec/xform_ah.c
+++ b/sys/netipsec/xform_ah.c
@@ -1091,6 +1091,7 @@ ah_output_cb(struct cryptop *crp)
m = (struct mbuf *) crp->crp_buf;
isr = tc->tc_isr;
+ IPSEC_ASSERT(isr->sp != NULL, ("NULL isr->sp"));
IPSECREQUEST_LOCK(isr);
sav = tc->tc_sav;
/* With the isr lock released SA pointer can be updated. */
@@ -1154,16 +1155,18 @@ ah_output_cb(struct cryptop *crp)
error = ipsec_process_done(m, isr);
KEY_FREESAV(&sav);
IPSECREQUEST_UNLOCK(isr);
- return error;
+ KEY_FREESP(&isr->sp);
+ return (error);
bad:
if (sav)
KEY_FREESAV(&sav);
IPSECREQUEST_UNLOCK(isr);
+ KEY_FREESP(&isr->sp);
if (m)
m_freem(m);
free(tc, M_XDATA);
crypto_freereq(crp);
- return error;
+ return (error);
}
static struct xformsw ah_xformsw = {
diff --git a/sys/netipsec/xform_esp.c b/sys/netipsec/xform_esp.c
index 2fbdb81..003c514 100644
--- a/sys/netipsec/xform_esp.c
+++ b/sys/netipsec/xform_esp.c
@@ -888,6 +888,7 @@ esp_output_cb(struct cryptop *crp)
m = (struct mbuf *) crp->crp_buf;
isr = tc->tc_isr;
+ IPSEC_ASSERT(isr->sp != NULL, ("NULL isr->sp"));
IPSECREQUEST_LOCK(isr);
sav = tc->tc_sav;
/* With the isr lock released SA pointer can be updated. */
@@ -966,16 +967,18 @@ esp_output_cb(struct cryptop *crp)
error = ipsec_process_done(m, isr);
KEY_FREESAV(&sav);
IPSECREQUEST_UNLOCK(isr);
- return error;
+ KEY_FREESP(&isr->sp);
+ return (error);
bad:
if (sav)
KEY_FREESAV(&sav);
IPSECREQUEST_UNLOCK(isr);
+ KEY_FREESP(&isr->sp);
if (m)
m_freem(m);
free(tc, M_XDATA);
crypto_freereq(crp);
- return error;
+ return (error);
}
static struct xformsw esp_xformsw = {
diff --git a/sys/netipsec/xform_ipcomp.c b/sys/netipsec/xform_ipcomp.c
index 5f3afd9..ef460cd 100644
--- a/sys/netipsec/xform_ipcomp.c
+++ b/sys/netipsec/xform_ipcomp.c
@@ -492,6 +492,7 @@ ipcomp_output_cb(struct cryptop *crp)
skip = tc->tc_skip;
isr = tc->tc_isr;
+ IPSEC_ASSERT(isr->sp != NULL, ("NULL isr->sp"));
IPSECREQUEST_LOCK(isr);
sav = tc->tc_sav;
/* With the isr lock released SA pointer can be updated. */
@@ -606,16 +607,18 @@ ipcomp_output_cb(struct cryptop *crp)
error = ipsec_process_done(m, isr);
KEY_FREESAV(&sav);
IPSECREQUEST_UNLOCK(isr);
- return error;
+ KEY_FREESP(&isr->sp);
+ return (error);
bad:
if (sav)
KEY_FREESAV(&sav);
IPSECREQUEST_UNLOCK(isr);
+ KEY_FREESP(&isr->sp);
if (m)
m_freem(m);
free(tc, M_XDATA);
crypto_freereq(crp);
- return error;
+ return (error);
}
static struct xformsw ipcomp_xformsw = {
diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c
index a30f6bf..4da0ee0 100644
--- a/sys/netpfil/ipfw/ip_fw2.c
+++ b/sys/netpfil/ipfw/ip_fw2.c
@@ -2762,6 +2762,10 @@ vnet_ipfw_init(const void *unused)
LIST_INIT(&chain->nat);
#endif
+ /* Init shared services hash table */
+ ipfw_init_srv(chain);
+
+ ipfw_init_obj_rewriter();
ipfw_init_counters();
/* insert the default rule and create the initial map */
chain->n_rules = 1;
@@ -2860,9 +2864,11 @@ vnet_ipfw_uninit(const void *unused)
if (reap != NULL)
ipfw_reap_rules(reap);
vnet_ipfw_iface_destroy(chain);
+ ipfw_destroy_srv(chain);
IPFW_LOCK_DESTROY(chain);
ipfw_dyn_uninit(1); /* free the remaining parts */
ipfw_destroy_counters();
+ ipfw_destroy_obj_rewriter();
return (0);
}
diff --git a/sys/netpfil/ipfw/ip_fw_nat.c b/sys/netpfil/ipfw/ip_fw_nat.c
index 201be2f..df40398 100644
--- a/sys/netpfil/ipfw/ip_fw_nat.c
+++ b/sys/netpfil/ipfw/ip_fw_nat.c
@@ -242,6 +242,8 @@ add_redir_spool_cfg(char *buf, struct cfg_nat *ptr)
}
if (r->alink[0] == NULL) {
printf("LibAliasRedirect* returned NULL\n");
+ free(r->alink, M_IPFW);
+ free(r, M_IPFW);
return (EINVAL);
}
/* LSNAT handling. */
@@ -263,6 +265,16 @@ add_redir_spool_cfg(char *buf, struct cfg_nat *ptr)
return (0);
}
+static void
+free_nat_instance(struct cfg_nat *ptr)
+{
+
+ del_redir_spool_cfg(ptr, &ptr->redir_chain);
+ LibAliasUninit(ptr->lib);
+ free(ptr, M_IPFW);
+}
+
+
/*
* ipfw_nat - perform mbuf header translation.
*
@@ -536,7 +548,7 @@ nat44_config(struct ip_fw_chain *chain, struct nat44_cfg_nat *ucfg)
IPFW_UH_WUNLOCK(chain);
if (tcfg != NULL)
- free(tcfg, M_IPFW);
+ free_nat_instance(ptr);
}
/*
@@ -626,9 +638,7 @@ nat44_destroy(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
IPFW_WUNLOCK(chain);
IPFW_UH_WUNLOCK(chain);
- del_redir_spool_cfg(ptr, &ptr->redir_chain);
- LibAliasUninit(ptr->lib);
- free(ptr, M_IPFW);
+ free_nat_instance(ptr);
return (0);
}
@@ -994,9 +1004,7 @@ ipfw_nat_del(struct sockopt *sopt)
flush_nat_ptrs(chain, i);
IPFW_WUNLOCK(chain);
IPFW_UH_WUNLOCK(chain);
- del_redir_spool_cfg(ptr, &ptr->redir_chain);
- LibAliasUninit(ptr->lib);
- free(ptr, M_IPFW);
+ free_nat_instance(ptr);
return (0);
}
@@ -1139,9 +1147,7 @@ vnet_ipfw_nat_uninit(const void *arg __unused)
IPFW_WLOCK(chain);
LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) {
LIST_REMOVE(ptr, _next);
- del_redir_spool_cfg(ptr, &ptr->redir_chain);
- LibAliasUninit(ptr->lib);
- free(ptr, M_IPFW);
+ free_nat_instance(ptr);
}
flush_nat_ptrs(chain, -1 /* flush all */);
V_ipfw_nat_ready = 0;
diff --git a/sys/netpfil/ipfw/ip_fw_private.h b/sys/netpfil/ipfw/ip_fw_private.h
index bb5b3ba..504093b 100644
--- a/sys/netpfil/ipfw/ip_fw_private.h
+++ b/sys/netpfil/ipfw/ip_fw_private.h
@@ -264,10 +264,10 @@ struct ip_fw_chain {
struct ip_fw **map; /* array of rule ptrs to ease lookup */
uint32_t id; /* ruleset id */
int n_rules; /* number of static rules */
- LIST_HEAD(nat_list, cfg_nat) nat; /* list of nat entries */
void *tablestate; /* runtime table info */
void *valuestate; /* runtime table value info */
int *idxmap; /* skipto array of rules */
+ void **srvstate; /* runtime service mappings */
#if defined( __linux__ ) || defined( _WIN32 )
spinlock_t rwmtx;
#else
@@ -275,10 +275,12 @@ struct ip_fw_chain {
#endif
int static_len; /* total len of static rules (v0) */
uint32_t gencnt; /* NAT generation count */
+ LIST_HEAD(nat_list, cfg_nat) nat; /* list of nat entries */
struct ip_fw *default_rule;
struct tables_config *tblcfg; /* tables module data */
void *ifcfg; /* interface module data */
int *idxmap_back; /* standby skipto array of rules */
+ struct namedobj_instance *srvmap; /* cfg name->number mappings */
#if defined( __linux__ ) || defined( _WIN32 )
spinlock_t uh_lock;
#else
@@ -306,16 +308,15 @@ struct table_value {
uint64_t refcnt; /* Number of references */
};
-struct namedobj_instance;
struct named_object {
TAILQ_ENTRY(named_object) nn_next; /* namehash */
TAILQ_ENTRY(named_object) nv_next; /* valuehash */
char *name; /* object name */
- uint8_t type; /* object type */
- uint8_t compat; /* Object name is number */
+ uint8_t subtype; /* object subtype within class */
+ uint8_t etlv; /* Export TLV id */
+ uint16_t spare[2];
uint16_t kidx; /* object kernel index */
- uint16_t uidx; /* userland idx for compat records */
uint32_t set; /* set object belongs to */
uint32_t refcnt; /* number of references */
};
@@ -450,7 +451,7 @@ struct obj_idx {
struct rule_check_info {
uint16_t flags; /* rule-specific check flags */
- uint16_t table_opcodes; /* count of opcodes referencing table */
+ uint16_t object_opcodes; /* num of opcodes referencing objects */
uint16_t urule_numoff; /* offset of rulenum in bytes */
uint8_t version; /* rule version */
uint8_t spare;
@@ -507,6 +508,84 @@ struct ip_fw_bcounter0 {
(r)->cmd_len * 4 - 4, 8))
#define RULEKSIZE1(r) roundup2((sizeof(struct ip_fw) + (r)->cmd_len*4 - 4), 8)
+/*
+ * Tables/Objects index rewriting code
+ */
+
+/* Default and maximum number of ipfw tables/objects. */
+#define IPFW_TABLES_MAX 65536
+#define IPFW_TABLES_DEFAULT 128
+#define IPFW_OBJECTS_MAX 65536
+#define IPFW_OBJECTS_DEFAULT 128
+
+#define CHAIN_TO_SRV(ch) ((ch)->srvmap)
+
+struct tid_info {
+ uint32_t set; /* table set */
+ uint16_t uidx; /* table index */
+ uint8_t type; /* table type */
+ uint8_t atype;
+ uint8_t spare;
+ int tlen; /* Total TLV size block */
+ void *tlvs; /* Pointer to first TLV */
+};
+
+/*
+ * Classifier callback. Checks if @cmd opcode contains kernel object reference.
+ * If true, returns its index and type.
+ * Returns 0 if match is found, 1 overwise.
+ */
+typedef int (ipfw_obj_rw_cl)(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype);
+/*
+ * Updater callback. Sets kernel object reference index to @puidx
+ */
+typedef void (ipfw_obj_rw_upd)(ipfw_insn *cmd, uint16_t puidx);
+/*
+ * Finder callback. Tries to find named object by name (specified via @ti).
+ * Stores found named object pointer in @pno.
+ * If object was not found, NULL is stored.
+ *
+ * Return 0 if input data was valid.
+ */
+typedef int (ipfw_obj_fname_cb)(struct ip_fw_chain *ch,
+ struct tid_info *ti, struct named_object **pno);
+/*
+ * Another finder callback. Tries to findex named object by kernel index.
+ *
+ * Returns pointer to named object or NULL.
+ */
+typedef struct named_object *(ipfw_obj_fidx_cb)(struct ip_fw_chain *ch,
+ uint16_t kidx);
+/*
+ * Object creator callback. Tries to create object specified by @ti.
+ * Stores newly-allocated object index in @pkidx.
+ *
+ * Returns 0 on success.
+ */
+typedef int (ipfw_obj_create_cb)(struct ip_fw_chain *ch, struct tid_info *ti,
+ uint16_t *pkidx);
+
+
+struct opcode_obj_rewrite {
+ uint32_t opcode; /* Opcode to act upon */
+ uint32_t etlv; /* Relevant export TLV id */
+ ipfw_obj_rw_cl *classifier; /* Check if rewrite is needed */
+ ipfw_obj_rw_upd *update; /* update cmd with new value */
+ ipfw_obj_fname_cb *find_byname; /* Find named object by name */
+ ipfw_obj_fidx_cb *find_bykidx; /* Find named object by kidx */
+ ipfw_obj_create_cb *create_object; /* Create named object */
+};
+
+#define IPFW_ADD_OBJ_REWRITER(f, c) do { \
+ if ((f) != 0) \
+ ipfw_add_obj_rewriter(c, \
+ sizeof(c) / sizeof(c[0])); \
+ } while(0)
+#define IPFW_DEL_OBJ_REWRITER(l, c) do { \
+ if ((l) != 0) \
+ ipfw_del_obj_rewriter(c, \
+ sizeof(c) / sizeof(c[0])); \
+ } while(0)
/* In ip_fw_iface.c */
int ipfw_iface_init(void);
@@ -562,6 +641,7 @@ caddr_t ipfw_get_sopt_header(struct sockopt_data *sd, size_t needed);
sizeof(c) / sizeof(c[0])); \
} while(0)
+struct namedobj_instance;
typedef void (objhash_cb_t)(struct namedobj_instance *ni, struct named_object *,
void *arg);
typedef uint32_t (objhash_hash_f)(struct namedobj_instance *ni, void *key,
@@ -578,6 +658,8 @@ void ipfw_objhash_bitmap_free(void *idx, int blocks);
void ipfw_objhash_set_hashf(struct namedobj_instance *ni, objhash_hash_f *f);
struct named_object *ipfw_objhash_lookup_name(struct namedobj_instance *ni,
uint32_t set, char *name);
+struct named_object *ipfw_objhash_lookup_name_type(struct namedobj_instance *ni,
+ uint32_t set, uint32_t type, char *name);
struct named_object *ipfw_objhash_lookup_kidx(struct namedobj_instance *ni,
uint16_t idx);
int ipfw_objhash_same_name(struct namedobj_instance *ni, struct named_object *a,
@@ -591,6 +673,25 @@ int ipfw_objhash_free_idx(struct namedobj_instance *ni, uint16_t idx);
int ipfw_objhash_alloc_idx(void *n, uint16_t *pidx);
void ipfw_objhash_set_funcs(struct namedobj_instance *ni,
objhash_hash_f *hash_f, objhash_cmp_f *cmp_f);
+void ipfw_init_obj_rewriter(void);
+void ipfw_destroy_obj_rewriter(void);
+void ipfw_add_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count);
+int ipfw_del_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count);
+
+int ipfw_rewrite_rule_uidx(struct ip_fw_chain *chain,
+ struct rule_check_info *ci);
+int ipfw_mark_object_kidx(struct ip_fw_chain *chain, struct ip_fw *rule,
+ uint32_t *bmask);
+int ref_opcode_object(struct ip_fw_chain *ch, ipfw_insn *cmd, struct tid_info *ti,
+ struct obj_idx *pidx, int *found, int *unresolved);
+void unref_oib_objects(struct ip_fw_chain *ch, ipfw_insn *cmd,
+ struct obj_idx *oib, struct obj_idx *end);
+int create_objects_compat(struct ip_fw_chain *ch, ipfw_insn *cmd,
+ struct obj_idx *oib, struct obj_idx *pidx, struct tid_info *ti);
+void update_opcode_kidx(ipfw_insn *cmd, uint16_t idx);
+int classify_opcode_kidx(ipfw_insn *cmd, uint16_t *puidx);
+void ipfw_init_srv(struct ip_fw_chain *ch);
+void ipfw_destroy_srv(struct ip_fw_chain *ch);
/* In ip_fw_table.c */
struct table_info;
diff --git a/sys/netpfil/ipfw/ip_fw_sockopt.c b/sys/netpfil/ipfw/ip_fw_sockopt.c
index ef1ff6c..d618a6c 100644
--- a/sys/netpfil/ipfw/ip_fw_sockopt.c
+++ b/sys/netpfil/ipfw/ip_fw_sockopt.c
@@ -148,6 +148,21 @@ static struct ipfw_sopt_handler scodes[] = {
{ IP_FW_DUMP_SOPTCODES, 0, HDIR_GET, dump_soptcodes },
};
+static int
+set_legacy_obj_kidx(struct ip_fw_chain *ch, struct ip_fw_rule0 *rule);
+struct opcode_obj_rewrite *ipfw_find_op_rw(uint16_t opcode);
+static int mark_object_kidx(struct ip_fw_chain *ch, struct ip_fw *rule,
+ uint32_t *bmask);
+static void unref_rule_objects(struct ip_fw_chain *chain, struct ip_fw *rule);
+static int export_objhash_ntlv(struct namedobj_instance *ni, uint16_t kidx,
+ struct sockopt_data *sd);
+
+/*
+ * Opcode object rewriter variables
+ */
+struct opcode_obj_rewrite *ctl3_rewriters;
+static size_t ctl3_rsize;
+
/*
* static variables followed by global ones
*/
@@ -646,17 +661,18 @@ commit_rules(struct ip_fw_chain *chain, struct rule_check_info *rci, int count)
struct ip_fw *krule;
struct ip_fw **map; /* the new array of pointers */
- /* Check if we need to do table remap */
+ /* Check if we need to do table/obj index remap */
tcount = 0;
for (ci = rci, i = 0; i < count; ci++, i++) {
- if (ci->table_opcodes == 0)
+ if (ci->object_opcodes == 0)
continue;
/*
- * Rule has some table opcodes.
- * Reference & allocate needed tables/
+ * Rule has some object opcodes.
+ * We need to find (and create non-existing)
+ * kernel objects, and reference existing ones.
*/
- error = ipfw_rewrite_table_uidx(chain, ci);
+ error = ipfw_rewrite_rule_uidx(chain, ci);
if (error != 0) {
/*
@@ -674,9 +690,9 @@ commit_rules(struct ip_fw_chain *chain, struct rule_check_info *rci, int count)
IPFW_UH_WLOCK(chain);
while (ci != rci) {
ci--;
- if (ci->table_opcodes == 0)
+ if (ci->object_opcodes == 0)
continue;
- ipfw_unref_rule_tables(chain,ci->krule);
+ unref_rule_objects(chain,ci->krule);
}
IPFW_UH_WUNLOCK(chain);
@@ -696,10 +712,10 @@ commit_rules(struct ip_fw_chain *chain, struct rule_check_info *rci, int count)
/* Unbind tables */
IPFW_UH_WLOCK(chain);
for (ci = rci, i = 0; i < count; ci++, i++) {
- if (ci->table_opcodes == 0)
+ if (ci->object_opcodes == 0)
continue;
- ipfw_unref_rule_tables(chain, ci->krule);
+ unref_rule_objects(chain, ci->krule);
}
IPFW_UH_WUNLOCK(chain);
}
@@ -759,7 +775,7 @@ ipfw_reap_add(struct ip_fw_chain *chain, struct ip_fw **head,
IPFW_UH_WLOCK_ASSERT(chain);
/* Unlink rule from everywhere */
- ipfw_unref_rule_tables(chain, rule);
+ unref_rule_objects(chain, rule);
*((struct ip_fw **)rule) = *head;
*head = rule;
@@ -1542,7 +1558,7 @@ check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, struct rule_check_info *ci)
cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1 &&
cmdlen != F_INSN_SIZE(ipfw_insn_u32))
goto bad_size;
- ci->table_opcodes++;
+ ci->object_opcodes++;
break;
case O_IP_FLOW_LOOKUP:
if (cmd->arg1 >= V_fw_tables_max) {
@@ -1553,7 +1569,7 @@ check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, struct rule_check_info *ci)
if (cmdlen != F_INSN_SIZE(ipfw_insn) &&
cmdlen != F_INSN_SIZE(ipfw_insn_u32))
goto bad_size;
- ci->table_opcodes++;
+ ci->object_opcodes++;
break;
case O_MACADDR2:
if (cmdlen != F_INSN_SIZE(ipfw_insn_mac))
@@ -1587,7 +1603,7 @@ check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, struct rule_check_info *ci)
case O_XMIT:
case O_VIA:
if (((ipfw_insn_if *)cmd)->name[0] == '\1')
- ci->table_opcodes++;
+ ci->object_opcodes++;
if (cmdlen != F_INSN_SIZE(ipfw_insn_if))
goto bad_size;
break;
@@ -1788,7 +1804,7 @@ ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space)
l = RULESIZE7(rule);
if (bp + l + sizeof(uint32_t) <= ep) {
bcopy(rule, bp, l + sizeof(uint32_t));
- error = ipfw_rewrite_table_kidx(chain,
+ error = set_legacy_obj_kidx(chain,
(struct ip_fw_rule0 *)bp);
if (error != 0)
return (0);
@@ -1817,7 +1833,7 @@ ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space)
}
dst = (struct ip_fw_rule0 *)bp;
export_rule0(rule, dst, l);
- error = ipfw_rewrite_table_kidx(chain, dst);
+ error = set_legacy_obj_kidx(chain, dst);
/*
* XXX HACK. Store the disable mask in the "next"
@@ -1861,6 +1877,34 @@ struct dump_args {
};
/*
+ * Export named object info in instance @ni, identified by @kidx
+ * to ipfw_obj_ntlv. TLV is allocated from @sd space.
+ *
+ * Returns 0 on success.
+ */
+static int
+export_objhash_ntlv(struct namedobj_instance *ni, uint16_t kidx,
+ struct sockopt_data *sd)
+{
+ struct named_object *no;
+ ipfw_obj_ntlv *ntlv;
+
+ no = ipfw_objhash_lookup_kidx(ni, kidx);
+ KASSERT(no != NULL, ("invalid object kernel index passed"));
+
+ ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv));
+ if (ntlv == NULL)
+ return (ENOMEM);
+
+ ntlv->head.type = no->etlv;
+ ntlv->head.length = sizeof(*ntlv);
+ ntlv->idx = no->kidx;
+ strlcpy(ntlv->name, no->name, sizeof(ntlv->name));
+
+ return (0);
+}
+
+/*
* Dumps static rules with table TLVs in buffer @sd.
*
* Returns 0 on success.
@@ -1874,6 +1918,7 @@ dump_static_rules(struct ip_fw_chain *chain, struct dump_args *da,
uint32_t tcount;
ipfw_obj_ctlv *ctlv;
struct ip_fw *krule;
+ struct namedobj_instance *ni;
caddr_t dst;
/* Dump table names first (if any) */
@@ -1891,13 +1936,21 @@ dump_static_rules(struct ip_fw_chain *chain, struct dump_args *da,
i = 0;
tcount = da->tcount;
+ ni = ipfw_get_table_objhash(chain);
while (tcount > 0) {
if ((bmask[i / 32] & (1 << (i % 32))) == 0) {
i++;
continue;
}
- if ((error = ipfw_export_table_ntlv(chain, i, sd)) != 0)
+ /* Jump to shared named object bitmask */
+ if (i >= IPFW_TABLES_MAX) {
+ ni = CHAIN_TO_SRV(chain);
+ i -= IPFW_TABLES_MAX;
+ bmask += IPFW_TABLES_MAX / 32;
+ }
+
+ if ((error = export_objhash_ntlv(ni, i, sd)) != 0)
return (error);
i++;
@@ -1929,6 +1982,52 @@ dump_static_rules(struct ip_fw_chain *chain, struct dump_args *da,
}
/*
+ * Marks every object index used in @rule with bit in @bmask.
+ * Used to generate bitmask of referenced tables/objects for given ruleset
+ * or its part.
+ *
+ * Returns number of newly-referenced objects.
+ */
+static int
+mark_object_kidx(struct ip_fw_chain *ch, struct ip_fw *rule,
+ uint32_t *bmask)
+{
+ int cmdlen, l, count;
+ ipfw_insn *cmd;
+ uint16_t kidx;
+ struct opcode_obj_rewrite *rw;
+ int bidx;
+ uint8_t subtype;
+
+ l = rule->cmd_len;
+ cmd = rule->cmd;
+ cmdlen = 0;
+ count = 0;
+ for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
+ cmdlen = F_LEN(cmd);
+
+ rw = ipfw_find_op_rw(cmd->opcode);
+ if (rw == NULL)
+ continue;
+
+ if (rw->classifier(cmd, &kidx, &subtype) != 0)
+ continue;
+
+ bidx = kidx / 32;
+ /* Maintain separate bitmasks for table and non-table objects */
+ if (rw->etlv != IPFW_TLV_TBL_NAME)
+ bidx += IPFW_TABLES_MAX / 32;
+
+ if ((bmask[bidx] & (1 << (kidx % 32))) == 0)
+ count++;
+
+ bmask[bidx] |= 1 << (kidx % 32);
+ }
+
+ return (count);
+}
+
+/*
* Dumps requested objects data
* Data layout (version 0)(current):
* Request: [ ipfw_cfg_lheader ] + IPFW_CFG_GET_* flags
@@ -1963,9 +2062,9 @@ dump_config(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
error = 0;
bmask = NULL;
- /* Allocate needed state */
+ /* Allocate needed state. Note we allocate 2xspace mask, for table&srv */
if (hdr->flags & IPFW_CFG_GET_STATIC)
- bmask = malloc(IPFW_TABLES_MAX / 8, M_TEMP, M_WAITOK | M_ZERO);
+ bmask = malloc(IPFW_TABLES_MAX / 4, M_TEMP, M_WAITOK | M_ZERO);
IPFW_UH_RLOCK(chain);
@@ -1994,7 +2093,8 @@ dump_config(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
rule = chain->map[i];
da.rsize += RULEUSIZE1(rule) + sizeof(ipfw_obj_tlv);
da.rcount++;
- da.tcount += ipfw_mark_table_kidx(chain, rule, bmask);
+ /* Update bitmask of used objects for given range */
+ da.tcount += mark_object_kidx(chain, rule, bmask);
}
/* Add counters if requested */
if (hdr->flags & IPFW_CFG_GET_COUNTERS) {
@@ -2064,6 +2164,241 @@ check_object_name(ipfw_obj_ntlv *ntlv)
}
/*
+ * Creates non-existent objects referenced by rule.
+ *
+ * Return 0 on success.
+ */
+int
+create_objects_compat(struct ip_fw_chain *ch, ipfw_insn *cmd,
+ struct obj_idx *oib, struct obj_idx *pidx, struct tid_info *ti)
+{
+ struct opcode_obj_rewrite *rw;
+ struct obj_idx *p;
+ uint16_t kidx;
+ int error;
+
+ /*
+ * Compatibility stuff: do actual creation for non-existing,
+ * but referenced objects.
+ */
+ for (p = oib; p < pidx; p++) {
+ if (p->kidx != 0)
+ continue;
+
+ ti->uidx = p->uidx;
+ ti->type = p->type;
+ ti->atype = 0;
+
+ rw = ipfw_find_op_rw((cmd + p->off)->opcode);
+ KASSERT(rw != NULL, ("Unable to find handler for op %d",
+ (cmd + p->off)->opcode));
+
+ error = rw->create_object(ch, ti, &kidx);
+ if (error == 0) {
+ p->kidx = kidx;
+ continue;
+ }
+
+ /*
+ * Error happened. We have to rollback everything.
+ * Drop all already acquired references.
+ */
+ IPFW_UH_WLOCK(ch);
+ unref_oib_objects(ch, cmd, oib, pidx);
+ IPFW_UH_WUNLOCK(ch);
+
+ return (error);
+ }
+
+ return (0);
+}
+
+/*
+ * Compatibility function for old ipfw(8) binaries.
+ * Rewrites table/nat kernel indices with userland ones.
+ * Convert tables matching '/^\d+$/' to their atoi() value.
+ * Use number 65535 for other tables.
+ *
+ * Returns 0 on success.
+ */
+static int
+set_legacy_obj_kidx(struct ip_fw_chain *ch, struct ip_fw_rule0 *rule)
+{
+ int cmdlen, error, l;
+ ipfw_insn *cmd;
+ uint16_t kidx, uidx;
+ struct named_object *no;
+ struct opcode_obj_rewrite *rw;
+ uint8_t subtype;
+ char *end;
+ long val;
+
+ error = 0;
+
+ l = rule->cmd_len;
+ cmd = rule->cmd;
+ cmdlen = 0;
+ for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
+ cmdlen = F_LEN(cmd);
+
+ rw = ipfw_find_op_rw(cmd->opcode);
+ if (rw == NULL)
+ continue;
+
+ /* Check if is index in given opcode */
+ if (rw->classifier(cmd, &kidx, &subtype) != 0)
+ continue;
+
+ /* Try to find referenced kernel object */
+ no = rw->find_bykidx(ch, kidx);
+ if (no == NULL)
+ continue;
+
+ val = strtol(no->name, &end, 10);
+ if (*end == '\0' && val < 65535) {
+ uidx = val;
+ } else {
+
+ /*
+ * We are called via legacy opcode.
+ * Save error and show table as fake number
+ * not to make ipfw(8) hang.
+ */
+ uidx = 65535;
+ error = 2;
+ }
+
+ rw->update(cmd, uidx);
+ }
+
+ return (error);
+}
+
+
+/*
+ * Unreferences all already-referenced objects in given @cmd rule,
+ * using information in @oib.
+ *
+ * Used to rollback partially converted rule on error.
+ */
+void
+unref_oib_objects(struct ip_fw_chain *ch, ipfw_insn *cmd, struct obj_idx *oib,
+ struct obj_idx *end)
+{
+ struct opcode_obj_rewrite *rw;
+ struct named_object *no;
+ struct obj_idx *p;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ for (p = oib; p < end; p++) {
+ if (p->kidx == 0)
+ continue;
+
+ rw = ipfw_find_op_rw((cmd + p->off)->opcode);
+ KASSERT(rw != NULL, ("Unable to find handler for op %d",
+ (cmd + p->off)->opcode));
+
+ /* Find & unref by existing idx */
+ no = rw->find_bykidx(ch, p->kidx);
+ KASSERT(no != NULL, ("Ref'd object %d disappeared", p->kidx));
+ no->refcnt--;
+ }
+}
+
+/*
+ * Remove references from every object used in @rule.
+ * Used at rule removal code.
+ */
+static void
+unref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule)
+{
+ int cmdlen, l;
+ ipfw_insn *cmd;
+ struct named_object *no;
+ uint16_t kidx;
+ struct opcode_obj_rewrite *rw;
+ uint8_t subtype;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ l = rule->cmd_len;
+ cmd = rule->cmd;
+ cmdlen = 0;
+ for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
+ cmdlen = F_LEN(cmd);
+
+ rw = ipfw_find_op_rw(cmd->opcode);
+ if (rw == NULL)
+ continue;
+ if (rw->classifier(cmd, &kidx, &subtype) != 0)
+ continue;
+
+ no = rw->find_bykidx(ch, kidx);
+
+ KASSERT(no != NULL, ("table id %d not found", kidx));
+ KASSERT(no->subtype == subtype,
+ ("wrong type %d (%d) for table id %d",
+ no->subtype, subtype, kidx));
+ KASSERT(no->refcnt > 0, ("refcount for table %d is %d",
+ kidx, no->refcnt));
+
+ no->refcnt--;
+ }
+}
+
+
+/*
+ * Find and reference object (if any) stored in instruction @cmd.
+ *
+ * Saves object info in @pidx, sets
+ * - @found to 1 if object was found and references
+ * - @unresolved to 1 if object should exists but not found
+ *
+ * Returns non-zero value in case of error.
+ */
+int
+ref_opcode_object(struct ip_fw_chain *ch, ipfw_insn *cmd, struct tid_info *ti,
+ struct obj_idx *pidx, int *found, int *unresolved)
+{
+ struct named_object *no;
+ struct opcode_obj_rewrite *rw;
+ int error;
+
+ *found = 0;
+ *unresolved = 0;
+
+ /* Check if this opcode is candidate for rewrite */
+ rw = ipfw_find_op_rw(cmd->opcode);
+ if (rw == NULL)
+ return (0);
+
+ /* Check if we need to rewrite this opcode */
+ if (rw->classifier(cmd, &ti->uidx, &ti->type) != 0)
+ return (0);
+
+ /* Need to rewrite. Save necessary fields */
+ pidx->uidx = ti->uidx;
+ pidx->type = ti->type;
+
+ /* Try to find referenced kernel object */
+ error = rw->find_byname(ch, ti, &no);
+ if (error != 0)
+ return (error);
+ if (no == NULL) {
+ *unresolved = 1;
+ return (0);
+ }
+
+ /* Found. bump refcount */
+ *found = 1;
+ no->refcnt++;
+ pidx->kidx = no->kidx;
+
+ return (0);
+}
+
+/*
* Adds one or more rules to ipfw @chain.
* Data layout (version 0)(current):
* Request:
@@ -2315,6 +2650,160 @@ dump_soptcodes(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
}
/*
+ * Compares two opcodes.
+ * Used both in qsort() and bsearch().
+ *
+ * Returns 0 if match is found.
+ */
+static int
+compare_opcodes(const void *_a, const void *_b)
+{
+ const struct opcode_obj_rewrite *a, *b;
+
+ a = (const struct opcode_obj_rewrite *)_a;
+ b = (const struct opcode_obj_rewrite *)_b;
+
+ if (a->opcode < b->opcode)
+ return (-1);
+ else if (a->opcode > b->opcode)
+ return (1);
+
+ return (0);
+}
+
+/*
+ * Finds opcode object rewriter based on @code.
+ *
+ * Returns pointer to handler or NULL.
+ */
+struct opcode_obj_rewrite *
+ipfw_find_op_rw(uint16_t opcode)
+{
+ struct opcode_obj_rewrite *rw, h;
+
+ memset(&h, 0, sizeof(h));
+ h.opcode = opcode;
+
+ rw = (struct opcode_obj_rewrite *)bsearch(&h, ctl3_rewriters,
+ ctl3_rsize, sizeof(h), compare_opcodes);
+
+ return (rw);
+}
+
+int
+classify_opcode_kidx(ipfw_insn *cmd, uint16_t *puidx)
+{
+ struct opcode_obj_rewrite *rw;
+ uint8_t subtype;
+
+ rw = ipfw_find_op_rw(cmd->opcode);
+ if (rw == NULL)
+ return (1);
+
+ return (rw->classifier(cmd, puidx, &subtype));
+}
+
+void
+update_opcode_kidx(ipfw_insn *cmd, uint16_t idx)
+{
+ struct opcode_obj_rewrite *rw;
+
+ rw = ipfw_find_op_rw(cmd->opcode);
+ KASSERT(rw != NULL, ("No handler to update opcode %d", cmd->opcode));
+ rw->update(cmd, idx);
+}
+
+void
+ipfw_init_obj_rewriter()
+{
+
+ ctl3_rewriters = NULL;
+ ctl3_rsize = 0;
+}
+
+void
+ipfw_destroy_obj_rewriter()
+{
+
+ if (ctl3_rewriters != NULL)
+ free(ctl3_rewriters, M_IPFW);
+ ctl3_rewriters = NULL;
+ ctl3_rsize = 0;
+}
+
+/*
+ * Adds one or more opcode object rewrite handlers to the global array.
+ * Function may sleep.
+ */
+void
+ipfw_add_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count)
+{
+ size_t sz;
+ struct opcode_obj_rewrite *tmp;
+
+ CTL3_LOCK();
+
+ for (;;) {
+ sz = ctl3_rsize + count;
+ CTL3_UNLOCK();
+ tmp = malloc(sizeof(*rw) * sz, M_IPFW, M_WAITOK | M_ZERO);
+ CTL3_LOCK();
+ if (ctl3_rsize + count <= sz)
+ break;
+
+ /* Retry */
+ free(tmp, M_IPFW);
+ }
+
+ /* Merge old & new arrays */
+ sz = ctl3_rsize + count;
+ memcpy(tmp, ctl3_rewriters, ctl3_rsize * sizeof(*rw));
+ memcpy(&tmp[ctl3_rsize], rw, count * sizeof(*rw));
+ qsort(tmp, sz, sizeof(*rw), compare_opcodes);
+ /* Switch new and free old */
+ if (ctl3_rewriters != NULL)
+ free(ctl3_rewriters, M_IPFW);
+ ctl3_rewriters = tmp;
+ ctl3_rsize = sz;
+
+ CTL3_UNLOCK();
+}
+
+/*
+ * Removes one or more object rewrite handlers from the global array.
+ */
+int
+ipfw_del_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count)
+{
+ size_t sz;
+ struct opcode_obj_rewrite *tmp, *h;
+ int i;
+
+ CTL3_LOCK();
+
+ for (i = 0; i < count; i++) {
+ tmp = &rw[i];
+ h = ipfw_find_op_rw(tmp->opcode);
+ if (h == NULL)
+ continue;
+
+ sz = (ctl3_rewriters + ctl3_rsize - (h + 1)) * sizeof(*h);
+ memmove(h, h + 1, sz);
+ ctl3_rsize--;
+ }
+
+ if (ctl3_rsize == 0) {
+ if (ctl3_rewriters != NULL)
+ free(ctl3_rewriters, M_IPFW);
+ ctl3_rewriters = NULL;
+ }
+
+ CTL3_UNLOCK();
+
+ return (0);
+}
+
+/*
* Compares two sopt handlers (code, version and handler ptr).
* Used both as qsort() and bsearch().
* Does not compare handler for latter case.
@@ -3150,6 +3639,23 @@ convert_rule_to_8(struct ip_fw_rule0 *rule)
*
*/
+void
+ipfw_init_srv(struct ip_fw_chain *ch)
+{
+
+ ch->srvmap = ipfw_objhash_create(IPFW_OBJECTS_DEFAULT);
+ ch->srvstate = malloc(sizeof(void *) * IPFW_OBJECTS_DEFAULT,
+ M_IPFW, M_WAITOK | M_ZERO);
+}
+
+void
+ipfw_destroy_srv(struct ip_fw_chain *ch)
+{
+
+ free(ch->srvstate, M_IPFW);
+ ipfw_objhash_destroy(ch->srvmap);
+}
+
/*
* Allocate new bitmask which can be used to enlarge/shrink
* named instance index.
@@ -3323,6 +3829,26 @@ ipfw_objhash_lookup_name(struct namedobj_instance *ni, uint32_t set, char *name)
return (NULL);
}
+/*
+ * Find named object by name, considering also its TLV type.
+ */
+struct named_object *
+ipfw_objhash_lookup_name_type(struct namedobj_instance *ni, uint32_t set,
+ uint32_t type, char *name)
+{
+ struct named_object *no;
+ uint32_t hash;
+
+ hash = ni->hash_f(ni, name, set) % ni->nn_size;
+
+ TAILQ_FOREACH(no, &ni->names[hash], nn_next) {
+ if (ni->cmp_f(no, name, set) == 0 && no->etlv == type)
+ return (no);
+ }
+
+ return (NULL);
+}
+
struct named_object *
ipfw_objhash_lookup_kidx(struct namedobj_instance *ni, uint16_t kidx)
{
diff --git a/sys/netpfil/ipfw/ip_fw_table.c b/sys/netpfil/ipfw/ip_fw_table.c
index 4498ace..ca4e0e4 100644
--- a/sys/netpfil/ipfw/ip_fw_table.c
+++ b/sys/netpfil/ipfw/ip_fw_table.c
@@ -89,6 +89,8 @@ struct table_config {
struct namedobj_instance *vi;
};
+static int find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
+ struct table_config **tc);
static struct table_config *find_table(struct namedobj_instance *ni,
struct tid_info *ti);
static struct table_config *alloc_table_config(struct ip_fw_chain *ch,
@@ -122,7 +124,6 @@ static struct table_algo *find_table_algo(struct tables_config *tableconf,
static void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti);
static void ntlv_to_ti(struct _ipfw_obj_ntlv *ntlv, struct tid_info *ti);
-static int classify_table_opcode(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype);
#define CHAIN_TO_NI(chain) (CHAIN_TO_TCFG(chain)->namehash)
#define KIDX_TO_TI(ch, k) (&(((struct table_info *)(ch)->tablestate)[k]))
@@ -297,7 +298,7 @@ find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
tc = NULL;
if ((tc = find_table(ni, ti)) != NULL) {
/* check table type */
- if (tc->no.type != ti->type)
+ if (tc->no.subtype != ti->type)
return (EINVAL);
if (tc->locked != 0)
@@ -1116,7 +1117,7 @@ find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
}
/* check table type */
- if (tc->no.type != ti.type) {
+ if (tc->no.subtype != ti.type) {
IPFW_UH_RUNLOCK(ch);
return (EINVAL);
}
@@ -1399,7 +1400,7 @@ swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
}
/* Check type and value are the same */
- if (tc_a->no.type != tc_b->no.type || tc_a->tflags != tc_b->tflags) {
+ if (tc_a->no.subtype!=tc_b->no.subtype || tc_a->tflags!=tc_b->tflags) {
IPFW_UH_WUNLOCK(ch);
return (EINVAL);
}
@@ -1613,7 +1614,6 @@ ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int sets)
ipfw_insn *cmd;
int cmdlen, i, l;
uint16_t kidx;
- uint8_t type;
IPFW_UH_WLOCK(ch);
@@ -1636,7 +1636,7 @@ ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int sets)
for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
cmdlen = F_LEN(cmd);
- if (classify_table_opcode(cmd, &kidx, &type) != 0)
+ if (classify_opcode_kidx(cmd, &kidx) != 0)
continue;
no = ipfw_objhash_lookup_kidx(ni, kidx);
@@ -1920,7 +1920,7 @@ create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
* requesting to create existing table
* which has the same type
*/
- if (compat == 0 || tc_new->no.type != tc->no.type) {
+ if (compat == 0 || tc_new->no.subtype != tc->no.subtype) {
IPFW_UH_WUNLOCK(ch);
free_table_config(ni, tc);
return (EEXIST);
@@ -1940,6 +1940,7 @@ create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
return (EBUSY);
}
tc->no.kidx = kidx;
+ tc->no.etlv = IPFW_TLV_TBL_NAME;
IPFW_WLOCK(ch);
link_table(ch, tc);
@@ -1977,6 +1978,13 @@ objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti)
ntlv_to_ti(&oh->ntlv, ti);
}
+struct namedobj_instance *
+ipfw_get_table_objhash(struct ip_fw_chain *ch)
+{
+
+ return (CHAIN_TO_NI(ch));
+}
+
/*
* Exports basic table info as name TLV.
* Used inside dump_static_rules() to provide info
@@ -2009,40 +2017,6 @@ ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx,
return (0);
}
-/*
- * Marks every table kidx used in @rule with bit in @bmask.
- * Used to generate bitmask of referenced tables for given ruleset.
- *
- * Returns number of newly-referenced tables.
- */
-int
-ipfw_mark_table_kidx(struct ip_fw_chain *chain, struct ip_fw *rule,
- uint32_t *bmask)
-{
- int cmdlen, l, count;
- ipfw_insn *cmd;
- uint16_t kidx;
- uint8_t type;
-
- l = rule->cmd_len;
- cmd = rule->cmd;
- cmdlen = 0;
- count = 0;
- for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
- cmdlen = F_LEN(cmd);
-
- if (classify_table_opcode(cmd, &kidx, &type) != 0)
- continue;
-
- if ((bmask[kidx / 32] & (1 << (kidx % 32))) == 0)
- count++;
-
- bmask[kidx / 32] |= 1 << (kidx % 32);
- }
-
- return (count);
-}
-
struct dump_args {
struct ip_fw_chain *ch;
struct table_info *ti;
@@ -2111,7 +2085,7 @@ export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
struct table_info *ti;
struct table_algo *ta;
- i->type = tc->no.type;
+ i->type = tc->no.subtype;
i->tflags = tc->tflags;
i->vmask = tc->vmask;
i->set = tc->no.set;
@@ -2296,7 +2270,7 @@ dump_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
xtbl->cnt = count;
xtbl->size = sz;
- xtbl->type = tc->no.type;
+ xtbl->type = tc->no.subtype;
xtbl->tbl = ti.uidx;
if (sd->valsize < sz) {
@@ -2440,7 +2414,7 @@ ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti,
ta = tc->ta;
/* This dump format supports IPv4 only */
- if (tc->no.type != IPFW_TABLE_ADDR)
+ if (tc->no.subtype != IPFW_TABLE_ADDR)
return (0);
memset(&da, 0, sizeof(da));
@@ -2531,7 +2505,7 @@ dump_table_xentry(void *e, void *arg)
pval = get_table_value(da->ch, da->tc, da->tent.v.kidx);
xent->value = ipfw_export_table_value_legacy(pval);
/* Apply some hacks */
- if (tc->no.type == IPFW_TABLE_ADDR && tent->subtype == AF_INET) {
+ if (tc->no.subtype == IPFW_TABLE_ADDR && tent->subtype == AF_INET) {
xent->k.addr6.s6_addr32[3] = tent->k.addr.s_addr;
xent->flags = IPFW_TCF_INET;
} else
@@ -2781,114 +2755,157 @@ list_table_algo(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
return (0);
}
-/*
- * Tables rewriting code
- */
-
-/*
- * Determine table number and lookup type for @cmd.
- * Fill @tbl and @type with appropriate values.
- * Returns 0 for relevant opcodes, 1 otherwise.
- */
static int
-classify_table_opcode(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+classify_srcdst(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
{
- ipfw_insn_if *cmdif;
- int skip;
- uint16_t v;
-
- skip = 1;
-
- switch (cmd->opcode) {
- case O_IP_SRC_LOOKUP:
- case O_IP_DST_LOOKUP:
- /* Basic IPv4/IPv6 or u32 lookups */
- *puidx = cmd->arg1;
- /* Assume ADDR by default */
- *ptype = IPFW_TABLE_ADDR;
- skip = 0;
+ /* Basic IPv4/IPv6 or u32 lookups */
+ *puidx = cmd->arg1;
+ /* Assume ADDR by default */
+ *ptype = IPFW_TABLE_ADDR;
+ int v;
- if (F_LEN(cmd) > F_INSN_SIZE(ipfw_insn_u32)) {
- /*
- * generic lookup. The key must be
- * in 32bit big-endian format.
- */
- v = ((ipfw_insn_u32 *)cmd)->d[1];
- switch (v) {
- case 0:
- case 1:
- /* IPv4 src/dst */
- break;
- case 2:
- case 3:
- /* src/dst port */
- *ptype = IPFW_TABLE_NUMBER;
- break;
- case 4:
- /* uid/gid */
- *ptype = IPFW_TABLE_NUMBER;
- break;
- case 5:
- /* jid */
- *ptype = IPFW_TABLE_NUMBER;
- break;
- case 6:
- /* dscp */
- *ptype = IPFW_TABLE_NUMBER;
- break;
- }
- }
- break;
- case O_XMIT:
- case O_RECV:
- case O_VIA:
- /* Interface table, possibly */
- cmdif = (ipfw_insn_if *)cmd;
- if (cmdif->name[0] != '\1')
+ if (F_LEN(cmd) > F_INSN_SIZE(ipfw_insn_u32)) {
+ /*
+ * generic lookup. The key must be
+ * in 32bit big-endian format.
+ */
+ v = ((ipfw_insn_u32 *)cmd)->d[1];
+ switch (v) {
+ case 0:
+ case 1:
+ /* IPv4 src/dst */
break;
-
- *ptype = IPFW_TABLE_INTERFACE;
- *puidx = cmdif->p.kidx;
- skip = 0;
- break;
- case O_IP_FLOW_LOOKUP:
- *puidx = cmd->arg1;
- *ptype = IPFW_TABLE_FLOW;
- skip = 0;
- break;
+ case 2:
+ case 3:
+ /* src/dst port */
+ *ptype = IPFW_TABLE_NUMBER;
+ break;
+ case 4:
+ /* uid/gid */
+ *ptype = IPFW_TABLE_NUMBER;
+ break;
+ case 5:
+ /* jid */
+ *ptype = IPFW_TABLE_NUMBER;
+ break;
+ case 6:
+ /* dscp */
+ *ptype = IPFW_TABLE_NUMBER;
+ break;
+ }
}
- return (skip);
+ return (0);
+}
+
+static int
+classify_via(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+ ipfw_insn_if *cmdif;
+
+ /* Interface table, possibly */
+ cmdif = (ipfw_insn_if *)cmd;
+ if (cmdif->name[0] != '\1')
+ return (1);
+
+ *ptype = IPFW_TABLE_INTERFACE;
+ *puidx = cmdif->p.kidx;
+
+ return (0);
+}
+
+static int
+classify_flow(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+
+ *puidx = cmd->arg1;
+ *ptype = IPFW_TABLE_FLOW;
+
+ return (0);
+}
+
+static void
+update_arg1(ipfw_insn *cmd, uint16_t idx)
+{
+
+ cmd->arg1 = idx;
}
-/*
- * Sets new table value for given opcode.
- * Assume the same opcodes as classify_table_opcode()
- */
static void
-update_table_opcode(ipfw_insn *cmd, uint16_t idx)
+update_via(ipfw_insn *cmd, uint16_t idx)
{
ipfw_insn_if *cmdif;
- switch (cmd->opcode) {
- case O_IP_SRC_LOOKUP:
- case O_IP_DST_LOOKUP:
- /* Basic IPv4/IPv6 or u32 lookups */
- cmd->arg1 = idx;
- break;
- case O_XMIT:
- case O_RECV:
- case O_VIA:
- /* Interface table, possibly */
- cmdif = (ipfw_insn_if *)cmd;
- cmdif->p.kidx = idx;
- break;
- case O_IP_FLOW_LOOKUP:
- cmd->arg1 = idx;
- break;
- }
+ cmdif = (ipfw_insn_if *)cmd;
+ cmdif->p.kidx = idx;
}
+static int
+table_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
+ struct named_object **pno)
+{
+ struct table_config *tc;
+ int error;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ error = find_table_err(CHAIN_TO_NI(ch), ti, &tc);
+ if (error != 0)
+ return (error);
+
+ *pno = &tc->no;
+ return (0);
+}
+
+/* XXX: sets-sets! */
+static struct named_object *
+table_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
+{
+ struct namedobj_instance *ni;
+ struct table_config *tc;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+ ni = CHAIN_TO_NI(ch);
+ tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, idx);
+ KASSERT(tc != NULL, ("Table with index %d not found", idx));
+
+ return (&tc->no);
+}
+
+static struct opcode_obj_rewrite opcodes[] = {
+ {
+ O_IP_SRC_LOOKUP, IPFW_TLV_TBL_NAME,
+ classify_srcdst, update_arg1,
+ table_findbyname, table_findbykidx, create_table_compat
+ },
+ {
+ O_IP_DST_LOOKUP, IPFW_TLV_TBL_NAME,
+ classify_srcdst, update_arg1,
+ table_findbyname, table_findbykidx, create_table_compat
+ },
+ {
+ O_IP_FLOW_LOOKUP, IPFW_TLV_TBL_NAME,
+ classify_flow, update_arg1,
+ table_findbyname, table_findbykidx, create_table_compat
+ },
+ {
+ O_XMIT, IPFW_TLV_TBL_NAME,
+ classify_via, update_via,
+ table_findbyname, table_findbykidx, create_table_compat
+ },
+ {
+ O_RECV, IPFW_TLV_TBL_NAME,
+ classify_via, update_via,
+ table_findbyname, table_findbykidx, create_table_compat
+ },
+ {
+ O_VIA, IPFW_TLV_TBL_NAME,
+ classify_via, update_via,
+ table_findbyname, table_findbykidx, create_table_compat
+ },
+};
+
+
/*
* Checks table name for validity.
* Enforce basic length checks, the rest
@@ -2960,10 +2977,11 @@ find_name_tlv(void *tlvs, int len, uint16_t uidx)
* or name in ntlv.
* Note @ti structure contains unchecked data from userland.
*
- * Returns pointer to table_config or NULL.
+ * Returns 0 in success and fills in @tc with found config
*/
-static struct table_config *
-find_table(struct namedobj_instance *ni, struct tid_info *ti)
+static int
+find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
+ struct table_config **tc)
{
char *name, bname[16];
struct named_object *no;
@@ -2973,7 +2991,7 @@ find_table(struct namedobj_instance *ni, struct tid_info *ti)
if (ti->tlvs != NULL) {
ntlv = find_name_tlv(ti->tlvs, ti->tlen, ti->uidx);
if (ntlv == NULL)
- return (NULL);
+ return (EINVAL);
name = ntlv->name;
/*
@@ -2989,8 +3007,27 @@ find_table(struct namedobj_instance *ni, struct tid_info *ti)
}
no = ipfw_objhash_lookup_name(ni, set, name);
+ *tc = (struct table_config *)no;
+
+ return (0);
+}
+
+/*
+ * Finds table config based on either legacy index
+ * or name in ntlv.
+ * Note @ti structure contains unchecked data from userland.
+ *
+ * Returns pointer to table_config or NULL.
+ */
+static struct table_config *
+find_table(struct namedobj_instance *ni, struct tid_info *ti)
+{
+ struct table_config *tc;
+
+ if (find_table_err(ni, ti, &tc) != 0)
+ return (NULL);
- return ((struct table_config *)no);
+ return (tc);
}
/*
@@ -3016,6 +3053,7 @@ alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti,
name = ntlv->name;
set = ntlv->set;
} else {
+ /* Compat part: convert number to string representation */
snprintf(bname, sizeof(bname), "%d", ti->uidx);
name = bname;
set = 0;
@@ -3023,7 +3061,7 @@ alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti,
tc = malloc(sizeof(struct table_config), M_IPFW, M_WAITOK | M_ZERO);
tc->no.name = tc->tablename;
- tc->no.type = ta->type;
+ tc->no.subtype = ta->type;
tc->no.set = set;
tc->tflags = tflags;
tc->ta = ta;
@@ -3031,11 +3069,6 @@ alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti,
/* Set "shared" value type by default */
tc->vshared = 1;
- if (ti->tlvs == NULL) {
- tc->no.compat = 1;
- tc->no.uidx = ti->uidx;
- }
-
/* Preallocate data structures for new tables */
error = ta->init(ch, &tc->astate, &tc->ti_copy, aname, tflags);
if (error != 0) {
@@ -3211,7 +3244,6 @@ ipfw_move_tables_sets(struct ip_fw_chain *ch, ipfw_range_tlv *rt,
struct namedobj_instance *ni;
int bad, i, l, cmdlen;
uint16_t kidx;
- uint8_t type;
ipfw_insn *cmd;
IPFW_UH_WLOCK_ASSERT(ch);
@@ -3229,7 +3261,7 @@ ipfw_move_tables_sets(struct ip_fw_chain *ch, ipfw_range_tlv *rt,
cmdlen = 0;
for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
cmdlen = F_LEN(cmd);
- if (classify_table_opcode(cmd, &kidx, &type) != 0)
+ if (classify_opcode_kidx(cmd, &kidx) != 0)
continue;
no = ipfw_objhash_lookup_kidx(ni, kidx);
KASSERT(no != NULL,
@@ -3252,7 +3284,7 @@ ipfw_move_tables_sets(struct ip_fw_chain *ch, ipfw_range_tlv *rt,
cmdlen = 0;
for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
cmdlen = F_LEN(cmd);
- if (classify_table_opcode(cmd, &kidx, &type) != 0)
+ if (classify_opcode_kidx(cmd, &kidx) != 0)
continue;
no = ipfw_objhash_lookup_kidx(ni, kidx);
KASSERT(no != NULL,
@@ -3291,7 +3323,7 @@ ipfw_move_tables_sets(struct ip_fw_chain *ch, ipfw_range_tlv *rt,
cmdlen = 0;
for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
cmdlen = F_LEN(cmd);
- if (classify_table_opcode(cmd, &kidx, &type) != 0)
+ if (classify_opcode_kidx(cmd, &kidx) != 0)
continue;
no = ipfw_objhash_lookup_kidx(ni, kidx);
KASSERT(no != NULL,
@@ -3313,215 +3345,68 @@ ipfw_move_tables_sets(struct ip_fw_chain *ch, ipfw_range_tlv *rt,
}
/*
- * Finds and bumps refcount for tables referenced by given @rule.
+ * Finds and bumps refcount for objects referenced by given @rule.
* Auto-creates non-existing tables.
* Fills in @oib array with userland/kernel indexes.
- * First free oidx pointer is saved back in @oib.
*
* Returns 0 on success.
*/
static int
-find_ref_rule_tables(struct ip_fw_chain *ch, struct ip_fw *rule,
- struct rule_check_info *ci, struct obj_idx **oib, struct tid_info *ti)
+ref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule,
+ struct rule_check_info *ci, struct obj_idx *oib, struct tid_info *ti)
{
- struct table_config *tc;
- struct namedobj_instance *ni;
- struct named_object *no;
int cmdlen, error, l, numnew;
- uint16_t kidx;
ipfw_insn *cmd;
- struct obj_idx *pidx, *pidx_first, *p;
+ struct obj_idx *pidx;
+ int found, unresolved;
- pidx_first = *oib;
- pidx = pidx_first;
+ pidx = oib;
l = rule->cmd_len;
cmd = rule->cmd;
cmdlen = 0;
error = 0;
numnew = 0;
+ found = 0;
+ unresolved = 0;
IPFW_UH_WLOCK(ch);
- ni = CHAIN_TO_NI(ch);
/* Increase refcount on each existing referenced table. */
for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
cmdlen = F_LEN(cmd);
- if (classify_table_opcode(cmd, &ti->uidx, &ti->type) != 0)
- continue;
-
- pidx->uidx = ti->uidx;
- pidx->type = ti->type;
-
- if ((tc = find_table(ni, ti)) != NULL) {
- if (tc->no.type != ti->type) {
- /* Incompatible types */
- error = EINVAL;
- break;
- }
-
- /* Reference found table and save kidx */
- tc->no.refcnt++;
- pidx->kidx = tc->no.kidx;
+ error = ref_opcode_object(ch, cmd, ti, pidx, &found, &unresolved);
+ if (error != 0)
+ break;
+ if (found || unresolved) {
+ pidx->off = rule->cmd_len - l;
pidx++;
- continue;
}
-
/*
* Compability stuff for old clients:
- * prepare to manually create non-existing tables.
+ * prepare to manually create non-existing objects.
*/
- pidx++;
- numnew++;
+ if (unresolved)
+ numnew++;
}
if (error != 0) {
/* Unref everything we have already done */
- for (p = *oib; p < pidx; p++) {
- if (p->kidx == 0)
- continue;
-
- /* Find & unref by existing idx */
- no = ipfw_objhash_lookup_kidx(ni, p->kidx);
- KASSERT(no != NULL, ("Ref'd table %d disappeared",
- p->kidx));
-
- no->refcnt--;
- }
- }
-
- IPFW_UH_WUNLOCK(ch);
-
- if (numnew == 0) {
- *oib = pidx;
- return (error);
- }
-
- /*
- * Compatibility stuff: do actual creation for non-existing,
- * but referenced tables.
- */
- for (p = pidx_first; p < pidx; p++) {
- if (p->kidx != 0)
- continue;
-
- ti->uidx = p->uidx;
- ti->type = p->type;
- ti->atype = 0;
-
- error = create_table_compat(ch, ti, &kidx);
- if (error == 0) {
- p->kidx = kidx;
- continue;
- }
-
- /* Error. We have to drop references */
- IPFW_UH_WLOCK(ch);
- for (p = pidx_first; p < pidx; p++) {
- if (p->kidx == 0)
- continue;
-
- /* Find & unref by existing idx */
- no = ipfw_objhash_lookup_kidx(ni, p->kidx);
- KASSERT(no != NULL, ("Ref'd table %d disappeared",
- p->kidx));
-
- no->refcnt--;
- }
+ unref_oib_objects(ch, rule->cmd, oib, pidx);
IPFW_UH_WUNLOCK(ch);
-
return (error);
}
- *oib = pidx;
-
- return (error);
-}
-
-/*
- * Remove references from every table used in @rule.
- */
-void
-ipfw_unref_rule_tables(struct ip_fw_chain *chain, struct ip_fw *rule)
-{
- int cmdlen, l;
- ipfw_insn *cmd;
- struct namedobj_instance *ni;
- struct named_object *no;
- uint16_t kidx;
- uint8_t type;
-
- IPFW_UH_WLOCK_ASSERT(chain);
- ni = CHAIN_TO_NI(chain);
-
- l = rule->cmd_len;
- cmd = rule->cmd;
- cmdlen = 0;
- for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
- cmdlen = F_LEN(cmd);
-
- if (classify_table_opcode(cmd, &kidx, &type) != 0)
- continue;
-
- no = ipfw_objhash_lookup_kidx(ni, kidx);
-
- KASSERT(no != NULL, ("table id %d not found", kidx));
- KASSERT(no->type == type, ("wrong type %d (%d) for table id %d",
- no->type, type, kidx));
- KASSERT(no->refcnt > 0, ("refcount for table %d is %d",
- kidx, no->refcnt));
-
- no->refcnt--;
- }
-}
-
-/*
- * Compatibility function for old ipfw(8) binaries.
- * Rewrites table kernel indices with userland ones.
- * Convert tables matching '/^\d+$/' to their atoi() value.
- * Use number 65535 for other tables.
- *
- * Returns 0 on success.
- */
-int
-ipfw_rewrite_table_kidx(struct ip_fw_chain *chain, struct ip_fw_rule0 *rule)
-{
- int cmdlen, error, l;
- ipfw_insn *cmd;
- uint16_t kidx, uidx;
- uint8_t type;
- struct named_object *no;
- struct namedobj_instance *ni;
-
- ni = CHAIN_TO_NI(chain);
- error = 0;
-
- l = rule->cmd_len;
- cmd = rule->cmd;
- cmdlen = 0;
- for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
- cmdlen = F_LEN(cmd);
-
- if (classify_table_opcode(cmd, &kidx, &type) != 0)
- continue;
+ IPFW_UH_WUNLOCK(ch);
- if ((no = ipfw_objhash_lookup_kidx(ni, kidx)) == NULL)
- return (1);
+ found = pidx - oib;
+ KASSERT(found == ci->object_opcodes,
+ ("refcount inconsistency: found: %d total: %d",
+ found, ci->object_opcodes));
- uidx = no->uidx;
- if (no->compat == 0) {
-
- /*
- * We are called via legacy opcode.
- * Save error and show table as fake number
- * not to make ipfw(8) hang.
- */
- uidx = 65535;
- error = 2;
- }
-
- update_table_opcode(cmd, uidx);
- }
+ /* Perform auto-creation for non-existing objects */
+ if (numnew != 0)
+ error = create_objects_compat(ch, rule->cmd, oib, pidx, ti);
return (error);
}
@@ -3534,31 +3419,27 @@ ipfw_rewrite_table_kidx(struct ip_fw_chain *chain, struct ip_fw_rule0 *rule)
* Returns 0 on success and appropriate error code otherwise.
*/
int
-ipfw_rewrite_table_uidx(struct ip_fw_chain *chain,
+ipfw_rewrite_rule_uidx(struct ip_fw_chain *chain,
struct rule_check_info *ci)
{
- int cmdlen, error, l;
+ int error;
ipfw_insn *cmd;
- uint16_t uidx;
uint8_t type;
- struct namedobj_instance *ni;
struct obj_idx *p, *pidx_first, *pidx_last;
struct tid_info ti;
- ni = CHAIN_TO_NI(chain);
-
/*
* Prepare an array for storing opcode indices.
* Use stack allocation by default.
*/
- if (ci->table_opcodes <= (sizeof(ci->obuf)/sizeof(ci->obuf[0]))) {
+ if (ci->object_opcodes <= (sizeof(ci->obuf)/sizeof(ci->obuf[0]))) {
/* Stack */
pidx_first = ci->obuf;
} else
- pidx_first = malloc(ci->table_opcodes * sizeof(struct obj_idx),
+ pidx_first = malloc(ci->object_opcodes * sizeof(struct obj_idx),
M_IPFW, M_WAITOK | M_ZERO);
- pidx_last = pidx_first;
+ pidx_last = pidx_first + ci->object_opcodes;
error = 0;
type = 0;
memset(&ti, 0, sizeof(ti));
@@ -3573,28 +3454,18 @@ ipfw_rewrite_table_uidx(struct ip_fw_chain *chain,
ti.tlen = ci->ctlv->head.length - sizeof(ipfw_obj_ctlv);
}
- /* Reference all used tables */
- error = find_ref_rule_tables(chain, ci->krule, ci, &pidx_last, &ti);
+ /* Reference all used tables and other objects */
+ error = ref_rule_objects(chain, ci->krule, ci, pidx_first, &ti);
if (error != 0)
goto free;
- IPFW_UH_WLOCK(chain);
-
/* Perform rule rewrite */
- l = ci->krule->cmd_len;
- cmd = ci->krule->cmd;
- cmdlen = 0;
p = pidx_first;
- for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
- cmdlen = F_LEN(cmd);
- if (classify_table_opcode(cmd, &uidx, &type) != 0)
- continue;
- update_table_opcode(cmd, p->kidx);
- p++;
+ for (p = pidx_first; p < pidx_last; p++) {
+ cmd = ci->krule->cmd + p->off;
+ update_opcode_kidx(cmd, p->kidx);
}
- IPFW_UH_WUNLOCK(chain);
-
free:
if (pidx_first != ci->obuf)
free(pidx_first, M_IPFW);
@@ -3641,6 +3512,7 @@ ipfw_destroy_tables(struct ip_fw_chain *ch, int last)
{
IPFW_DEL_SOPT_HANDLER(last, scodes);
+ IPFW_DEL_OBJ_REWRITER(last, opcodes);
/* Remove all tables from working set */
IPFW_UH_WLOCK(ch);
@@ -3678,6 +3550,7 @@ ipfw_init_tables(struct ip_fw_chain *ch, int first)
ipfw_table_value_init(ch, first);
ipfw_table_algo_init(ch);
+ IPFW_ADD_OBJ_REWRITER(first, opcodes);
IPFW_ADD_SOPT_HANDLER(first, scodes);
return (0);
}
diff --git a/sys/netpfil/ipfw/ip_fw_table.h b/sys/netpfil/ipfw/ip_fw_table.h
index 028e450..ca49fd4 100644
--- a/sys/netpfil/ipfw/ip_fw_table.h
+++ b/sys/netpfil/ipfw/ip_fw_table.h
@@ -53,16 +53,6 @@ struct table_info {
u_long data; /* Hints for given func */
};
-/* Internal structures for handling sockopt data */
-struct tid_info {
- uint32_t set; /* table set */
- uint16_t uidx; /* table index */
- uint8_t type; /* table type */
- uint8_t atype;
- void *tlvs; /* Pointer to first TLV */
- int tlen; /* Total TLV size block */
-};
-
struct table_value;
struct tentry_info {
void *paddr;
@@ -189,13 +179,12 @@ void rollback_table_values(struct tableop_state *ts);
int ipfw_rewrite_table_uidx(struct ip_fw_chain *chain,
struct rule_check_info *ci);
-int ipfw_rewrite_table_kidx(struct ip_fw_chain *chain,
- struct ip_fw_rule0 *rule);
int ipfw_mark_table_kidx(struct ip_fw_chain *chain, struct ip_fw *rule,
uint32_t *bmask);
int ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx,
struct sockopt_data *sd);
void ipfw_unref_rule_tables(struct ip_fw_chain *chain, struct ip_fw *rule);
+struct namedobj_instance *ipfw_get_table_objhash(struct ip_fw_chain *ch);
/* utility functions */
int ipfw_check_table_name(char *name);
diff --git a/sys/opencrypto/cryptodeflate.c b/sys/opencrypto/cryptodeflate.c
index 0ad86e3..c55210d 100644
--- a/sys/opencrypto/cryptodeflate.c
+++ b/sys/opencrypto/cryptodeflate.c
@@ -29,7 +29,7 @@
/*
* This file contains a wrapper around the deflate algo compression
- * functions using the zlib library (see net/zlib.{c,h})
+ * functions using the zlib library (see libkern/zlib.c and sys/zlib.h})
*/
#include <sys/cdefs.h>
@@ -42,7 +42,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/sdt.h>
#include <sys/systm.h>
-#include <net/zlib.h>
+#include <sys/zlib.h>
#include <opencrypto/cryptodev.h>
#include <opencrypto/deflate.h>
diff --git a/sys/opencrypto/deflate.h b/sys/opencrypto/deflate.h
index dcf7a84..d31a3bf 100644
--- a/sys/opencrypto/deflate.h
+++ b/sys/opencrypto/deflate.h
@@ -36,7 +36,7 @@
#ifndef _CRYPTO_DEFLATE_H_
#define _CRYPTO_DEFLATE_H_
-#include <net/zlib.h>
+#include <sys/zlib.h>
#define Z_METHOD 8
#define Z_MEMLEVEL 8
diff --git a/sys/powerpc/aim/machdep.c b/sys/powerpc/aim/aim_machdep.c
index 1c22220..ab09ab6 100644
--- a/sys/powerpc/aim/machdep.c
+++ b/sys/powerpc/aim/aim_machdep.c
@@ -129,106 +129,14 @@ __FBSDID("$FreeBSD$");
#include <dev/ofw/openfirm.h>
-int cold = 1;
#ifdef __powerpc64__
extern int n_slbs;
-int cacheline_size = 128;
-#else
-int cacheline_size = 32;
#endif
-int hw_direct_map = 1;
-
-extern void *ap_pcpu;
-
-struct pcpu __pcpu[MAXCPU];
-
-static struct trapframe frame0;
-
-char machine[] = "powerpc";
-SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "");
-
-static void cpu_startup(void *);
-SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
-
-SYSCTL_INT(_machdep, CPU_CACHELINE, cacheline_size,
- CTLFLAG_RD, &cacheline_size, 0, "");
-
-uintptr_t powerpc_init(vm_offset_t, vm_offset_t, vm_offset_t, void *);
-
-long Maxmem = 0;
-long realmem = 0;
#ifndef __powerpc64__
struct bat battable[16];
#endif
-struct kva_md_info kmi;
-
-static void
-cpu_startup(void *dummy)
-{
-
- /*
- * Initialise the decrementer-based clock.
- */
- decr_init();
-
- /*
- * Good {morning,afternoon,evening,night}.
- */
- cpu_setup(PCPU_GET(cpuid));
-
-#ifdef PERFMON
- perfmon_init();
-#endif
- printf("real memory = %ld (%ld MB)\n", ptoa(physmem),
- ptoa(physmem) / 1048576);
- realmem = physmem;
-
- if (bootverbose)
- printf("available KVA = %zd (%zd MB)\n",
- virtual_end - virtual_avail,
- (virtual_end - virtual_avail) / 1048576);
-
- /*
- * Display any holes after the first chunk of extended memory.
- */
- if (bootverbose) {
- int indx;
-
- printf("Physical memory chunk(s):\n");
- for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
- vm_offset_t size1 =
- phys_avail[indx + 1] - phys_avail[indx];
-
- #ifdef __powerpc64__
- printf("0x%016lx - 0x%016lx, %ld bytes (%ld pages)\n",
- #else
- printf("0x%08x - 0x%08x, %d bytes (%ld pages)\n",
- #endif
- phys_avail[indx], phys_avail[indx + 1] - 1, size1,
- size1 / PAGE_SIZE);
- }
- }
-
- vm_ksubmap_init(&kmi);
-
- printf("avail memory = %ld (%ld MB)\n", ptoa(vm_cnt.v_free_count),
- ptoa(vm_cnt.v_free_count) / 1048576);
-
- /*
- * Set up buffers, so they can be used to read disk labels.
- */
- bufinit();
- vm_pager_bufferinit();
-}
-
-extern vm_offset_t __startkernel, __endkernel;
-extern unsigned char __bss_start[];
-extern unsigned char __sbss_start[];
-extern unsigned char __sbss_end[];
-extern unsigned char _end[];
-
#ifndef __powerpc64__
/* Bits for running on 64-bit systems in 32-bit mode. */
extern void *testppc64, *testppc64size;
@@ -252,121 +160,25 @@ extern void *imisstrap, *imisssize;
extern void *dlmisstrap, *dlmisssize;
extern void *dsmisstrap, *dsmisssize;
-uintptr_t
-powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp)
+extern void *ap_pcpu;
+
+void aim_cpu_init(vm_offset_t toc);
+
+void
+aim_cpu_init(vm_offset_t toc)
{
- struct pcpu *pc;
- vm_offset_t startkernel, endkernel;
size_t trap_offset, trapsize;
vm_offset_t trap;
- void *kmdp;
- char *env;
register_t msr, scratch;
uint8_t *cache_check;
int cacheline_warn;
#ifndef __powerpc64__
int ppc64;
#endif
-#ifdef DDB
- vm_offset_t ksym_start;
- vm_offset_t ksym_end;
-#endif
- kmdp = NULL;
trap_offset = 0;
cacheline_warn = 0;
- /* First guess at start/end kernel positions */
- startkernel = __startkernel;
- endkernel = __endkernel;
-
- /* Check for ePAPR loader, which puts a magic value into r6 */
- if (mdp == (void *)0x65504150)
- mdp = NULL;
-
- /*
- * Parse metadata if present and fetch parameters. Must be done
- * before console is inited so cninit gets the right value of
- * boothowto.
- */
- if (mdp != NULL) {
- preload_metadata = mdp;
- kmdp = preload_search_by_type("elf kernel");
- if (kmdp != NULL) {
- boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
- kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *);
- endkernel = ulmax(endkernel, MD_FETCH(kmdp,
- MODINFOMD_KERNEND, vm_offset_t));
-#ifdef DDB
- ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
- ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
- db_fetch_ksymtab(ksym_start, ksym_end);
-#endif
- }
- } else {
- bzero(__sbss_start, __sbss_end - __sbss_start);
- bzero(__bss_start, _end - __bss_start);
- }
-
- /* Store boot environment state */
- OF_initial_setup((void *)fdt, NULL, (int (*)(void *))ofentry);
-
- /*
- * Init params/tunables that can be overridden by the loader
- */
- init_param1();
-
- /*
- * Start initializing proc0 and thread0.
- */
- proc_linkup0(&proc0, &thread0);
- thread0.td_frame = &frame0;
-
- /*
- * Set up per-cpu data.
- */
- pc = __pcpu;
- pcpu_init(pc, 0, sizeof(struct pcpu));
- pc->pc_curthread = &thread0;
-#ifdef __powerpc64__
- __asm __volatile("mr 13,%0" :: "r"(pc->pc_curthread));
-#else
- __asm __volatile("mr 2,%0" :: "r"(pc->pc_curthread));
-#endif
- pc->pc_cpuid = 0;
-
- __asm __volatile("mtsprg 0, %0" :: "r"(pc));
-
- /*
- * Init mutexes, which we use heavily in PMAP
- */
-
- mutex_init();
-
- /*
- * Install the OF client interface
- */
-
- OF_bootstrap();
-
- /*
- * Initialize the console before printing anything.
- */
- cninit();
-
- /*
- * Complain if there is no metadata.
- */
- if (mdp == NULL || kmdp == NULL) {
- printf("powerpc_init: no loader metadata.\n");
- }
-
- /*
- * Init KDB
- */
-
- kdb_init();
-
/* Various very early CPU fix ups */
switch (mfpvr() >> 16) {
/*
@@ -431,9 +243,6 @@ powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp)
cacheline_size = 32;
}
- /* Make sure the kernel icache is valid before we go too much further */
- __syncicache((caddr_t)startkernel, endkernel - startkernel);
-
#ifndef __powerpc64__
/*
* Figure out whether we need to use the 64 bit PMAP. This works by
@@ -552,12 +361,6 @@ powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp)
}
/*
- * Choose a platform module so we can get the physical memory map.
- */
-
- platform_probe_and_attach();
-
- /*
* Initialise virtual memory. Use BUS_PROBE_GENERIC priority
* in case the platform module had a better idea of what we
* should do.
@@ -566,96 +369,6 @@ powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp)
pmap_mmu_install(MMU_TYPE_G5, BUS_PROBE_GENERIC);
else
pmap_mmu_install(MMU_TYPE_OEA, BUS_PROBE_GENERIC);
-
- pmap_bootstrap(startkernel, endkernel);
- mtmsr(PSL_KERNSET & ~PSL_EE);
-
- /*
- * Initialize params/tunables that are derived from memsize
- */
- init_param2(physmem);
-
- /*
- * Grab booted kernel's name
- */
- env = kern_getenv("kernelname");
- if (env != NULL) {
- strlcpy(kernelname, env, sizeof(kernelname));
- freeenv(env);
- }
-
- /*
- * Finish setting up thread0.
- */
- thread0.td_pcb = (struct pcb *)
- ((thread0.td_kstack + thread0.td_kstack_pages * PAGE_SIZE -
- sizeof(struct pcb)) & ~15UL);
- bzero((void *)thread0.td_pcb, sizeof(struct pcb));
- pc->pc_curpcb = thread0.td_pcb;
-
- /* Initialise the message buffer. */
- msgbufinit(msgbufp, msgbufsize);
-
-#ifdef KDB
- if (boothowto & RB_KDB)
- kdb_enter(KDB_WHY_BOOTFLAGS,
- "Boot flags requested debugger");
-#endif
-
- return (((uintptr_t)thread0.td_pcb -
- (sizeof(struct callframe) - 3*sizeof(register_t))) & ~15UL);
-}
-
-void
-bzero(void *buf, size_t len)
-{
- caddr_t p;
-
- p = buf;
-
- while (((vm_offset_t) p & (sizeof(u_long) - 1)) && len) {
- *p++ = 0;
- len--;
- }
-
- while (len >= sizeof(u_long) * 8) {
- *(u_long*) p = 0;
- *((u_long*) p + 1) = 0;
- *((u_long*) p + 2) = 0;
- *((u_long*) p + 3) = 0;
- len -= sizeof(u_long) * 8;
- *((u_long*) p + 4) = 0;
- *((u_long*) p + 5) = 0;
- *((u_long*) p + 6) = 0;
- *((u_long*) p + 7) = 0;
- p += sizeof(u_long) * 8;
- }
-
- while (len >= sizeof(u_long)) {
- *(u_long*) p = 0;
- len -= sizeof(u_long);
- p += sizeof(u_long);
- }
-
- while (len) {
- *p++ = 0;
- len--;
- }
-}
-
-void
-cpu_boot(int howto)
-{
-}
-
-/*
- * Flush the D-cache for non-DMA I/O so that the I-cache can
- * be made coherent later.
- */
-void
-cpu_flush_dcache(void *ptr, size_t len)
-{
- /* TBD */
}
/*
@@ -669,17 +382,6 @@ cpu_halt(void)
}
int
-ptrace_set_pc(struct thread *td, unsigned long addr)
-{
- struct trapframe *tf;
-
- tf = td->td_frame;
- tf->srr0 = (register_t)addr;
-
- return (0);
-}
-
-int
ptrace_single_step(struct thread *td)
{
struct trapframe *tf;
@@ -727,41 +429,7 @@ memcpy(pcpu->pc_slb, PCPU_GET(slb), sizeof(pcpu->pc_slb));
#endif
}
-void
-spinlock_enter(void)
-{
- struct thread *td;
- register_t msr;
-
- td = curthread;
- if (td->td_md.md_spinlock_count == 0) {
- __asm __volatile("or 2,2,2"); /* Set high thread priority */
- msr = intr_disable();
- td->td_md.md_spinlock_count = 1;
- td->td_md.md_saved_msr = msr;
- } else
- td->td_md.md_spinlock_count++;
- critical_enter();
-}
-
-void
-spinlock_exit(void)
-{
- struct thread *td;
- register_t msr;
-
- td = curthread;
- critical_exit();
- msr = td->td_md.md_saved_msr;
- td->td_md.md_spinlock_count--;
- if (td->td_md.md_spinlock_count == 0) {
- intr_restore(msr);
- __asm __volatile("or 6,6,6"); /* Set normal thread priority */
- }
-}
-
#ifndef __powerpc64__
-
uint64_t
va_to_vsid(pmap_t pm, vm_offset_t va)
{
@@ -945,3 +613,4 @@ cpu_sleep()
enable_vec(curthread);
powerpc_sync();
}
+
diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c
index 23bd449..c45f941 100644
--- a/sys/powerpc/aim/mmu_oea64.c
+++ b/sys/powerpc/aim/mmu_oea64.c
@@ -137,8 +137,6 @@ struct ofw_map {
extern unsigned char _etext[];
extern unsigned char _end[];
-extern int ofw_real_mode;
-
/*
* Map of physical memory regions.
*/
@@ -852,8 +850,7 @@ moea64_late_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend
*/
chosen = OF_finddevice("/chosen");
- if (!ofw_real_mode && chosen != -1 &&
- OF_getprop(chosen, "mmu", &mmui, 4) != -1) {
+ if (chosen != -1 && OF_getprop(chosen, "mmu", &mmui, 4) != -1) {
mmu = OF_instance_to_package(mmui);
if (mmu == -1 ||
(sz = OF_getproplen(mmu, "translations")) == -1)
diff --git a/sys/powerpc/booke/machdep.c b/sys/powerpc/booke/booke_machdep.c
index 943063c..2a27775 100644
--- a/sys/powerpc/booke/machdep.c
+++ b/sys/powerpc/booke/booke_machdep.c
@@ -163,6 +163,7 @@ extern unsigned char __bss_start[];
extern unsigned char __sbss_start[];
extern unsigned char __sbss_end[];
extern unsigned char _end[];
+extern vm_offset_t __endkernel;
/*
* Bootinfo is passed to us by legacy loaders. Save the address of the
@@ -170,25 +171,6 @@ extern unsigned char _end[];
*/
uint32_t *bootinfo;
-struct kva_md_info kmi;
-struct pcpu __pcpu[MAXCPU];
-struct trapframe frame0;
-int cold = 1;
-long realmem = 0;
-long Maxmem = 0;
-char machine[] = "powerpc";
-SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "");
-
-int cacheline_size = 32;
-
-SYSCTL_INT(_machdep, CPU_CACHELINE, cacheline_size,
- CTLFLAG_RD, &cacheline_size, 0, "");
-
-int hw_direct_map = 0;
-
-static void cpu_booke_startup(void *);
-SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_booke_startup, NULL);
-
void print_kernel_section_addr(void);
void print_kenv(void);
u_int booke_init(uint32_t, uint32_t);
@@ -219,6 +201,16 @@ extern void *int_performance_counter;
("Handler " #handler " too far from interrupt vector base")); \
mtspr(ivor, (uintptr_t)(&handler) & 0xffffUL);
+uintptr_t powerpc_init(vm_offset_t fdt, vm_offset_t, vm_offset_t, void *mdp);
+void booke_cpu_init(void);
+
+void
+booke_cpu_init(void)
+{
+
+ pmap_mmu_install(MMU_TYPE_BOOKE, BUS_PROBE_GENERIC);
+}
+
void
ivor_setup(void)
{
@@ -244,90 +236,6 @@ ivor_setup(void)
#endif
}
-static void
-cpu_booke_startup(void *dummy)
-{
- int indx;
- unsigned long size;
-
- /* Initialise the decrementer-based clock. */
- decr_init();
-
- /* Good {morning,afternoon,evening,night}. */
- cpu_setup(PCPU_GET(cpuid));
-
- printf("real memory = %lu (%ld MB)\n", ptoa(physmem),
- ptoa(physmem) / 1048576);
- realmem = physmem;
-
- /* Display any holes after the first chunk of extended memory. */
- if (bootverbose) {
- printf("Physical memory chunk(s):\n");
- for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
- size = phys_avail[indx + 1] - phys_avail[indx];
-
- printf("0x%08x - 0x%08x, %lu bytes (%lu pages)\n",
- phys_avail[indx], phys_avail[indx + 1] - 1,
- size, size / PAGE_SIZE);
- }
- }
-
- vm_ksubmap_init(&kmi);
-
- printf("avail memory = %lu (%ld MB)\n", ptoa(vm_cnt.v_free_count),
- ptoa(vm_cnt.v_free_count) / 1048576);
-
- /* Set up buffers, so they can be used to read disk labels. */
- bufinit();
- vm_pager_bufferinit();
-}
-
-static char *
-kenv_next(char *cp)
-{
-
- if (cp != NULL) {
- while (*cp != 0)
- cp++;
- cp++;
- if (*cp == 0)
- cp = NULL;
- }
- return (cp);
-}
-
-void
-print_kenv(void)
-{
- int len;
- char *cp;
-
- debugf("loader passed (static) kenv:\n");
- if (kern_envp == NULL) {
- debugf(" no env, null ptr\n");
- return;
- }
- debugf(" kern_envp = 0x%08x\n", (u_int32_t)kern_envp);
-
- len = 0;
- for (cp = kern_envp; cp != NULL; cp = kenv_next(cp))
- debugf(" %x %s\n", (u_int32_t)cp, cp);
-}
-
-void
-print_kernel_section_addr(void)
-{
-
- debugf("kernel image addresses:\n");
- debugf(" kernel_text = 0x%08x\n", (uint32_t)kernel_text);
- debugf(" _etext (sdata) = 0x%08x\n", (uint32_t)_etext);
- debugf(" _edata = 0x%08x\n", (uint32_t)_edata);
- debugf(" __sbss_start = 0x%08x\n", (uint32_t)__sbss_start);
- debugf(" __sbss_end = 0x%08x\n", (uint32_t)__sbss_end);
- debugf(" __sbss_start = 0x%08x\n", (uint32_t)__bss_start);
- debugf(" _end = 0x%08x\n", (uint32_t)_end);
-}
-
static int
booke_check_for_fdt(uint32_t arg1, vm_offset_t *dtbp)
{
@@ -345,24 +253,20 @@ booke_check_for_fdt(uint32_t arg1, vm_offset_t *dtbp)
return (0);
}
-u_int
+uintptr_t
booke_init(uint32_t arg1, uint32_t arg2)
{
- struct pcpu *pc;
- void *kmdp, *mdp;
+ uintptr_t ret;
+ void *mdp;
vm_offset_t dtbp, end;
-#ifdef DDB
- vm_offset_t ksym_start;
- vm_offset_t ksym_end;
-#endif
-
- kmdp = NULL;
end = (uintptr_t)_end;
dtbp = (vm_offset_t)NULL;
/* Set up TLB initially */
bootinfo = NULL;
+ bzero(__sbss_start, __sbss_end - __sbss_start);
+ bzero(__bss_start, _end - __bss_start);
tlb1_init();
/*
@@ -391,152 +295,22 @@ booke_init(uint32_t arg1, uint32_t arg2)
memmove((void *)end, (void *)dtbp, fdt_totalsize((void *)dtbp));
dtbp = end;
end += fdt_totalsize((void *)dtbp);
+ __endkernel = end;
mdp = NULL;
} else if (arg1 > (uintptr_t)kernel_text) /* FreeBSD loader */
mdp = (void *)arg1;
else /* U-Boot */
mdp = NULL;
- /*
- * Parse metadata and fetch parameters.
- */
- if (mdp != NULL) {
- preload_metadata = mdp;
- kmdp = preload_search_by_type("elf kernel");
- if (kmdp != NULL) {
- boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
- kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *);
- dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
- end = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t);
-
- bootinfo = (uint32_t *)preload_search_info(kmdp,
- MODINFO_METADATA | MODINFOMD_BOOTINFO);
-
-#ifdef DDB
- ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
- ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
- db_fetch_ksymtab(ksym_start, ksym_end);
-#endif
- }
- } else {
- bzero(__sbss_start, __sbss_end - __sbss_start);
- bzero(__bss_start, _end - __bss_start);
- }
-
-#if defined(FDT_DTB_STATIC)
- /*
- * In case the device tree blob was not retrieved (from metadata) try
- * to use the statically embedded one.
- */
- if (dtbp == (vm_offset_t)NULL)
- dtbp = (vm_offset_t)&fdt_static_dtb;
-#endif
-
- if (OF_install(OFW_FDT, 0) == FALSE)
- while (1);
-
- if (OF_init((void *)dtbp) != 0)
- while (1);
-
- OF_interpret("perform-fixup", 0);
-
/* Reset TLB1 to get rid of temporary mappings */
tlb1_init();
- /* Reset Time Base */
- mttb(0);
-
- /* Init params/tunables that can be overridden by the loader. */
- init_param1();
-
- /* Start initializing proc0 and thread0. */
- proc_linkup0(&proc0, &thread0);
- thread0.td_frame = &frame0;
-
- /* Set up per-cpu data and store the pointer in SPR general 0. */
- pc = &__pcpu[0];
- pcpu_init(pc, 0, sizeof(struct pcpu));
- pc->pc_curthread = &thread0;
-#ifdef __powerpc64__
- __asm __volatile("mr 13,%0" :: "r"(pc->pc_curthread));
-#else
- __asm __volatile("mr 2,%0" :: "r"(pc->pc_curthread));
-#endif
- __asm __volatile("mtsprg 0, %0" :: "r"(pc));
-
- /* Initialize system mutexes. */
- mutex_init();
-
- /* Initialize the console before printing anything. */
- cninit();
-
- /* Print out some debug info... */
- debugf("%s: console initialized\n", __func__);
- debugf(" arg3 mdp = 0x%08x\n", (u_int32_t)mdp);
- debugf(" end = 0x%08x\n", (u_int32_t)end);
- debugf(" boothowto = 0x%08x\n", boothowto);
-#ifdef MPC85XX
- debugf(" kernel ccsrbar = 0x%08x\n", CCSRBAR_VA);
-#endif
- debugf(" MSR = 0x%08x\n", mfmsr());
-#if defined(BOOKE_E500)
- debugf(" HID0 = 0x%08x\n", mfspr(SPR_HID0));
- debugf(" HID1 = 0x%08x\n", mfspr(SPR_HID1));
- debugf(" BUCSR = 0x%08x\n", mfspr(SPR_BUCSR));
-#endif
-
- debugf(" dtbp = 0x%08x\n", (uint32_t)dtbp);
-
- print_kernel_section_addr();
- print_kenv();
-#if defined(BOOKE_E500)
- //tlb1_print_entries();
- //tlb1_print_tlbentries();
-#endif
-
- kdb_init();
-
-#ifdef KDB
- if (boothowto & RB_KDB)
- kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
-#endif
-
- /* Initialise platform module */
- platform_probe_and_attach();
-
- /* Initialise virtual memory. */
- pmap_mmu_install(MMU_TYPE_BOOKE, 0);
- pmap_bootstrap((uintptr_t)kernel_text, end);
- debugf("MSR = 0x%08x\n", mfmsr());
-#if defined(BOOKE_E500)
- //tlb1_print_entries();
- //tlb1_print_tlbentries();
-#endif
-
- /* Initialize params/tunables that are derived from memsize. */
- init_param2(physmem);
-
- /* Finish setting up thread0. */
- thread0.td_pcb = (struct pcb *)
- ((thread0.td_kstack + thread0.td_kstack_pages * PAGE_SIZE -
- sizeof(struct pcb)) & ~15);
- bzero((void *)thread0.td_pcb, sizeof(struct pcb));
- pc->pc_curpcb = thread0.td_pcb;
-
- /* Initialise the message buffer. */
- msgbufinit(msgbufp, msgbufsize);
-
- /* Enable Machine Check interrupt. */
- mtmsr(mfmsr() | PSL_ME);
- isync();
+ ret = powerpc_init(dtbp, 0, 0, mdp);
/* Enable L1 caches */
booke_enable_l1_cache();
- debugf("%s: SP = 0x%08x\n", __func__,
- ((uintptr_t)thread0.td_pcb - 16) & ~15);
-
- return (((uintptr_t)thread0.td_pcb - 16) & ~15);
+ return (ret);
}
#define RES_GRANULE 32
@@ -560,63 +334,6 @@ cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t sz)
#endif
}
-/*
- * Flush the D-cache for non-DMA I/O so that the I-cache can
- * be made coherent later.
- */
-void
-cpu_flush_dcache(void *ptr, size_t len)
-{
- register_t addr, off;
-
- /*
- * Align the address to a cacheline and adjust the length
- * accordingly. Then round the length to a multiple of the
- * cacheline for easy looping.
- */
- addr = (uintptr_t)ptr;
- off = addr & (cacheline_size - 1);
- addr -= off;
- len = (len + off + cacheline_size - 1) & ~(cacheline_size - 1);
-
- while (len > 0) {
- __asm __volatile ("dcbf 0,%0" :: "r"(addr));
- __asm __volatile ("sync");
- addr += cacheline_size;
- len -= cacheline_size;
- }
-}
-
-void
-spinlock_enter(void)
-{
- struct thread *td;
- register_t msr;
-
- td = curthread;
- if (td->td_md.md_spinlock_count == 0) {
- msr = intr_disable();
- td->td_md.md_spinlock_count = 1;
- td->td_md.md_saved_msr = msr;
- } else
- td->td_md.md_spinlock_count++;
- critical_enter();
-}
-
-void
-spinlock_exit(void)
-{
- struct thread *td;
- register_t msr;
-
- td = curthread;
- critical_exit();
- msr = td->td_md.md_saved_msr;
- td->td_md.md_spinlock_count--;
- if (td->td_md.md_spinlock_count == 0)
- intr_restore(msr);
-}
-
/* Shutdown the CPU as much as possible. */
void
cpu_halt(void)
@@ -628,17 +345,6 @@ cpu_halt(void)
}
int
-ptrace_set_pc(struct thread *td, unsigned long addr)
-{
- struct trapframe *tf;
-
- tf = td->td_frame;
- tf->srr0 = (register_t)addr;
-
- return (0);
-}
-
-int
ptrace_single_step(struct thread *td)
{
struct trapframe *tf;
@@ -680,40 +386,3 @@ kdb_cpu_set_singlestep(void)
kdb_frame->srr1 |= PSL_DE;
}
-void
-bzero(void *buf, size_t len)
-{
- caddr_t p;
-
- p = buf;
-
- while (((vm_offset_t) p & (sizeof(u_long) - 1)) && len) {
- *p++ = 0;
- len--;
- }
-
- while (len >= sizeof(u_long) * 8) {
- *(u_long*) p = 0;
- *((u_long*) p + 1) = 0;
- *((u_long*) p + 2) = 0;
- *((u_long*) p + 3) = 0;
- len -= sizeof(u_long) * 8;
- *((u_long*) p + 4) = 0;
- *((u_long*) p + 5) = 0;
- *((u_long*) p + 6) = 0;
- *((u_long*) p + 7) = 0;
- p += sizeof(u_long) * 8;
- }
-
- while (len >= sizeof(u_long)) {
- *(u_long*) p = 0;
- len -= sizeof(u_long);
- p += sizeof(u_long);
- }
-
- while (len) {
- *p++ = 0;
- len--;
- }
-}
-
diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c
index 2c6df63..c5e3bf3 100644
--- a/sys/powerpc/booke/pmap.c
+++ b/sys/powerpc/booke/pmap.c
@@ -1031,13 +1031,8 @@ mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_offset_t kernelend)
* Align kernel start and end address (kernel image).
* Note that kernel end does not necessarily relate to kernsize.
* kernsize is the size of the kernel that is actually mapped.
- * Also note that "start - 1" is deliberate. With SMP, the
- * entry point is exactly a page from the actual load address.
- * As such, trunc_page() has no effect and we're off by a page.
- * Since we always have the ELF header between the load address
- * and the entry point, we can safely subtract 1 to compensate.
*/
- kernstart = trunc_page(start - 1);
+ kernstart = trunc_page(start);
data_start = round_page(kernelend);
data_end = data_start;
diff --git a/sys/powerpc/conf/GENERIC b/sys/powerpc/conf/GENERIC
index e3939be..b5bf850 100644
--- a/sys/powerpc/conf/GENERIC
+++ b/sys/powerpc/conf/GENERIC
@@ -32,6 +32,7 @@ options PSIM #GDB PSIM ppc simulator
options MAMBO #IBM Mambo Full System Simulator
options PSERIES #PAPR-compliant systems
+options FDT
options SCHED_ULE #ULE scheduler
options PREEMPTION #Enable kernel thread preemption
options INET #InterNETworking
diff --git a/sys/powerpc/ofw/ofw_machdep.c b/sys/powerpc/ofw/ofw_machdep.c
index 05af094..31813fb 100644
--- a/sys/powerpc/ofw/ofw_machdep.c
+++ b/sys/powerpc/ofw/ofw_machdep.c
@@ -62,11 +62,12 @@ __FBSDID("$FreeBSD$");
#include <machine/ofw_machdep.h>
#include <machine/trap.h>
+static void *fdt;
+int ofw_real_mode;
+
#ifdef AIM
extern register_t ofmsr[5];
extern void *openfirmware_entry;
-static void *fdt;
-int ofw_real_mode;
char save_trap_init[0x2f00]; /* EXC_LAST */
char save_trap_of[0x2f00]; /* EXC_LAST */
@@ -336,10 +337,10 @@ ofw_mem_regions(struct mem_region *memp, int *memsz,
*availsz = asz;
}
-#ifdef AIM
void
OF_initial_setup(void *fdt_ptr, void *junk, int (*openfirm)(void *))
{
+#ifdef AIM
ofmsr[0] = mfmsr();
#ifdef __powerpc64__
ofmsr[0] &= ~PSL_SF;
@@ -348,22 +349,25 @@ OF_initial_setup(void *fdt_ptr, void *junk, int (*openfirm)(void *))
__asm __volatile("mfsprg1 %0" : "=&r"(ofmsr[2]));
__asm __volatile("mfsprg2 %0" : "=&r"(ofmsr[3]));
__asm __volatile("mfsprg3 %0" : "=&r"(ofmsr[4]));
+ openfirmware_entry = openfirm;
if (ofmsr[0] & PSL_DR)
ofw_real_mode = 0;
else
ofw_real_mode = 1;
+ ofw_save_trap_vec(save_trap_init);
+#else
+ ofw_real_mode = 1;
+#endif
+
fdt = fdt_ptr;
- openfirmware_entry = openfirm;
#ifdef FDT_DTB_STATIC
/* Check for a statically included blob */
if (fdt == NULL)
fdt = &fdt_static_dtb;
#endif
-
- ofw_save_trap_vec(save_trap_init);
}
boolean_t
@@ -371,6 +375,7 @@ OF_bootstrap()
{
boolean_t status = FALSE;
+#ifdef AIM
if (openfirmware_entry != NULL) {
if (ofw_real_mode) {
status = OF_install(OFW_STD_REAL, 0);
@@ -386,18 +391,22 @@ OF_bootstrap()
return status;
OF_init(openfirmware);
- } else if (fdt != NULL) {
+ } else
+#endif
+ if (fdt != NULL) {
status = OF_install(OFW_FDT, 0);
if (status != TRUE)
return status;
OF_init(fdt);
+ OF_interpret("perform-fixup", 0);
}
return (status);
}
+#ifdef AIM
void
ofw_quiesce(void)
{
diff --git a/sys/powerpc/powerpc/busdma_machdep.c b/sys/powerpc/powerpc/busdma_machdep.c
index bd226c8..9ea51ce 100644
--- a/sys/powerpc/powerpc/busdma_machdep.c
+++ b/sys/powerpc/powerpc/busdma_machdep.c
@@ -1121,8 +1121,8 @@ add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr,
if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) {
/* Page offset needs to be preserved. */
- bpage->vaddr |= vaddr & PAGE_MASK;
- bpage->busaddr |= vaddr & PAGE_MASK;
+ bpage->vaddr |= addr & PAGE_MASK;
+ bpage->busaddr |= addr & PAGE_MASK;
}
bpage->datavaddr = vaddr;
bpage->dataaddr = addr;
diff --git a/sys/powerpc/powerpc/machdep.c b/sys/powerpc/powerpc/machdep.c
new file mode 100644
index 0000000..2bc9496
--- /dev/null
+++ b/sys/powerpc/powerpc/machdep.c
@@ -0,0 +1,502 @@
+/*-
+ * Copyright (C) 1995, 1996 Wolfgang Solfrank.
+ * Copyright (C) 1995, 1996 TooLs GmbH.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*-
+ * Copyright (C) 2001 Benno Rice
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * $NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_compat.h"
+#include "opt_ddb.h"
+#include "opt_kstack_pages.h"
+#include "opt_platform.h"
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/bus.h>
+#include <sys/cons.h>
+#include <sys/cpu.h>
+#include <sys/eventhandler.h>
+#include <sys/exec.h>
+#include <sys/imgact.h>
+#include <sys/kdb.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/linker.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/msgbuf.h>
+#include <sys/mutex.h>
+#include <sys/ptrace.h>
+#include <sys/reboot.h>
+#include <sys/rwlock.h>
+#include <sys/signalvar.h>
+#include <sys/syscallsubr.h>
+#include <sys/sysctl.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+#include <sys/ucontext.h>
+#include <sys/uio.h>
+#include <sys/vmmeter.h>
+#include <sys/vnode.h>
+
+#include <net/netisr.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_pager.h>
+
+#include <machine/altivec.h>
+#ifndef __powerpc64__
+#include <machine/bat.h>
+#endif
+#include <machine/cpu.h>
+#include <machine/elf.h>
+#include <machine/fpu.h>
+#include <machine/hid.h>
+#include <machine/kdb.h>
+#include <machine/md_var.h>
+#include <machine/metadata.h>
+#include <machine/mmuvar.h>
+#include <machine/pcb.h>
+#include <machine/reg.h>
+#include <machine/sigframe.h>
+#include <machine/spr.h>
+#include <machine/trap.h>
+#include <machine/vmparam.h>
+#include <machine/ofw_machdep.h>
+
+#include <ddb/ddb.h>
+
+#include <dev/ofw/openfirm.h>
+
+int cold = 1;
+#ifdef __powerpc64__
+int cacheline_size = 128;
+#else
+int cacheline_size = 32;
+#endif
+int hw_direct_map = 1;
+
+extern void *ap_pcpu;
+
+struct pcpu __pcpu[MAXCPU];
+
+static struct trapframe frame0;
+
+char machine[] = "powerpc";
+SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "");
+
+static void cpu_startup(void *);
+SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
+
+SYSCTL_INT(_machdep, CPU_CACHELINE, cacheline_size,
+ CTLFLAG_RD, &cacheline_size, 0, "");
+
+uintptr_t powerpc_init(vm_offset_t, vm_offset_t, vm_offset_t, void *);
+
+long Maxmem = 0;
+long realmem = 0;
+
+struct kva_md_info kmi;
+
+static void
+cpu_startup(void *dummy)
+{
+
+ /*
+ * Initialise the decrementer-based clock.
+ */
+ decr_init();
+
+ /*
+ * Good {morning,afternoon,evening,night}.
+ */
+ cpu_setup(PCPU_GET(cpuid));
+
+#ifdef PERFMON
+ perfmon_init();
+#endif
+ printf("real memory = %ld (%ld MB)\n", ptoa(physmem),
+ ptoa(physmem) / 1048576);
+ realmem = physmem;
+
+ if (bootverbose)
+ printf("available KVA = %zd (%zd MB)\n",
+ virtual_end - virtual_avail,
+ (virtual_end - virtual_avail) / 1048576);
+
+ /*
+ * Display any holes after the first chunk of extended memory.
+ */
+ if (bootverbose) {
+ int indx;
+
+ printf("Physical memory chunk(s):\n");
+ for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
+ vm_offset_t size1 =
+ phys_avail[indx + 1] - phys_avail[indx];
+
+ #ifdef __powerpc64__
+ printf("0x%016lx - 0x%016lx, %ld bytes (%ld pages)\n",
+ #else
+ printf("0x%08x - 0x%08x, %d bytes (%ld pages)\n",
+ #endif
+ phys_avail[indx], phys_avail[indx + 1] - 1, size1,
+ size1 / PAGE_SIZE);
+ }
+ }
+
+ vm_ksubmap_init(&kmi);
+
+ printf("avail memory = %ld (%ld MB)\n", ptoa(vm_cnt.v_free_count),
+ ptoa(vm_cnt.v_free_count) / 1048576);
+
+ /*
+ * Set up buffers, so they can be used to read disk labels.
+ */
+ bufinit();
+ vm_pager_bufferinit();
+}
+
+extern vm_offset_t __startkernel, __endkernel;
+extern unsigned char __bss_start[];
+extern unsigned char __sbss_start[];
+extern unsigned char __sbss_end[];
+extern unsigned char _end[];
+
+void aim_cpu_init(vm_offset_t toc);
+void booke_cpu_init(void);
+
+uintptr_t
+powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp)
+{
+ struct pcpu *pc;
+ vm_offset_t startkernel, endkernel;
+ void *kmdp;
+ char *env;
+#ifdef DDB
+ vm_offset_t ksym_start;
+ vm_offset_t ksym_end;
+#endif
+
+ kmdp = NULL;
+
+ /* First guess at start/end kernel positions */
+ startkernel = __startkernel;
+ endkernel = __endkernel;
+
+ /* Check for ePAPR loader, which puts a magic value into r6 */
+ if (mdp == (void *)0x65504150)
+ mdp = NULL;
+
+ /*
+ * Parse metadata if present and fetch parameters. Must be done
+ * before console is inited so cninit gets the right value of
+ * boothowto.
+ */
+ if (mdp != NULL) {
+ preload_metadata = mdp;
+ kmdp = preload_search_by_type("elf kernel");
+ if (kmdp != NULL) {
+ boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
+ kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *);
+ endkernel = ulmax(endkernel, MD_FETCH(kmdp,
+ MODINFOMD_KERNEND, vm_offset_t));
+#ifdef DDB
+ ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
+ ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
+ db_fetch_ksymtab(ksym_start, ksym_end);
+#endif
+ }
+ } else {
+ bzero(__sbss_start, __sbss_end - __sbss_start);
+ bzero(__bss_start, _end - __bss_start);
+ }
+#ifdef BOOKE
+ tlb1_init();
+#endif
+
+ /* Store boot environment state */
+ OF_initial_setup((void *)fdt, NULL, (int (*)(void *))ofentry);
+
+ /*
+ * Init params/tunables that can be overridden by the loader
+ */
+ init_param1();
+
+ /*
+ * Start initializing proc0 and thread0.
+ */
+ proc_linkup0(&proc0, &thread0);
+ thread0.td_frame = &frame0;
+
+ /*
+ * Set up per-cpu data.
+ */
+ pc = __pcpu;
+ pcpu_init(pc, 0, sizeof(struct pcpu));
+ pc->pc_curthread = &thread0;
+#ifdef __powerpc64__
+ __asm __volatile("mr 13,%0" :: "r"(pc->pc_curthread));
+#else
+ __asm __volatile("mr 2,%0" :: "r"(pc->pc_curthread));
+#endif
+ pc->pc_cpuid = 0;
+
+ __asm __volatile("mtsprg 0, %0" :: "r"(pc));
+
+ /*
+ * Init mutexes, which we use heavily in PMAP
+ */
+
+ mutex_init();
+
+ /*
+ * Install the OF client interface
+ */
+
+ OF_bootstrap();
+
+ /*
+ * Initialize the console before printing anything.
+ */
+ cninit();
+
+ /*
+ * Complain if there is no metadata.
+ */
+ if (mdp == NULL || kmdp == NULL) {
+ printf("powerpc_init: no loader metadata.\n");
+ }
+
+ /*
+ * Init KDB
+ */
+
+ kdb_init();
+
+#ifdef AIM
+ aim_cpu_init(toc);
+#else /* BOOKE */
+ booke_cpu_init();
+
+ /* Make sure the kernel icache is valid before we go too much further */
+ __syncicache((caddr_t)startkernel, endkernel - startkernel);
+#endif
+
+ /*
+ * Choose a platform module so we can get the physical memory map.
+ */
+
+ platform_probe_and_attach();
+
+ /*
+ * Bring up MMU
+ */
+ pmap_bootstrap(startkernel, endkernel);
+ mtmsr(PSL_KERNSET & ~PSL_EE);
+
+ /*
+ * Initialize params/tunables that are derived from memsize
+ */
+ init_param2(physmem);
+
+ /*
+ * Grab booted kernel's name
+ */
+ env = kern_getenv("kernelname");
+ if (env != NULL) {
+ strlcpy(kernelname, env, sizeof(kernelname));
+ freeenv(env);
+ }
+
+ /*
+ * Finish setting up thread0.
+ */
+ thread0.td_pcb = (struct pcb *)
+ ((thread0.td_kstack + thread0.td_kstack_pages * PAGE_SIZE -
+ sizeof(struct pcb)) & ~15UL);
+ bzero((void *)thread0.td_pcb, sizeof(struct pcb));
+ pc->pc_curpcb = thread0.td_pcb;
+
+ /* Initialise the message buffer. */
+ msgbufinit(msgbufp, msgbufsize);
+
+#ifdef KDB
+ if (boothowto & RB_KDB)
+ kdb_enter(KDB_WHY_BOOTFLAGS,
+ "Boot flags requested debugger");
+#endif
+
+ return (((uintptr_t)thread0.td_pcb -
+ (sizeof(struct callframe) - 3*sizeof(register_t))) & ~15UL);
+}
+
+void
+bzero(void *buf, size_t len)
+{
+ caddr_t p;
+
+ p = buf;
+
+ while (((vm_offset_t) p & (sizeof(u_long) - 1)) && len) {
+ *p++ = 0;
+ len--;
+ }
+
+ while (len >= sizeof(u_long) * 8) {
+ *(u_long*) p = 0;
+ *((u_long*) p + 1) = 0;
+ *((u_long*) p + 2) = 0;
+ *((u_long*) p + 3) = 0;
+ len -= sizeof(u_long) * 8;
+ *((u_long*) p + 4) = 0;
+ *((u_long*) p + 5) = 0;
+ *((u_long*) p + 6) = 0;
+ *((u_long*) p + 7) = 0;
+ p += sizeof(u_long) * 8;
+ }
+
+ while (len >= sizeof(u_long)) {
+ *(u_long*) p = 0;
+ len -= sizeof(u_long);
+ p += sizeof(u_long);
+ }
+
+ while (len) {
+ *p++ = 0;
+ len--;
+ }
+}
+
+/*
+ * Flush the D-cache for non-DMA I/O so that the I-cache can
+ * be made coherent later.
+ */
+void
+cpu_flush_dcache(void *ptr, size_t len)
+{
+ register_t addr, off;
+
+ /*
+ * Align the address to a cacheline and adjust the length
+ * accordingly. Then round the length to a multiple of the
+ * cacheline for easy looping.
+ */
+ addr = (uintptr_t)ptr;
+ off = addr & (cacheline_size - 1);
+ addr -= off;
+ len = (len + off + cacheline_size - 1) & ~(cacheline_size - 1);
+
+ while (len > 0) {
+ __asm __volatile ("dcbf 0,%0" :: "r"(addr));
+ __asm __volatile ("sync");
+ addr += cacheline_size;
+ len -= cacheline_size;
+ }
+}
+
+int
+ptrace_set_pc(struct thread *td, unsigned long addr)
+{
+ struct trapframe *tf;
+
+ tf = td->td_frame;
+ tf->srr0 = (register_t)addr;
+
+ return (0);
+}
+
+void
+spinlock_enter(void)
+{
+ struct thread *td;
+ register_t msr;
+
+ td = curthread;
+ if (td->td_md.md_spinlock_count == 0) {
+ __asm __volatile("or 2,2,2"); /* Set high thread priority */
+ msr = intr_disable();
+ td->td_md.md_spinlock_count = 1;
+ td->td_md.md_saved_msr = msr;
+ } else
+ td->td_md.md_spinlock_count++;
+ critical_enter();
+}
+
+void
+spinlock_exit(void)
+{
+ struct thread *td;
+ register_t msr;
+
+ td = curthread;
+ critical_exit();
+ msr = td->td_md.md_saved_msr;
+ td->td_md.md_spinlock_count--;
+ if (td->td_md.md_spinlock_count == 0) {
+ intr_restore(msr);
+ __asm __volatile("or 6,6,6"); /* Set normal thread priority */
+ }
+}
+
diff --git a/sys/powerpc/aim/uma_machdep.c b/sys/powerpc/powerpc/uma_machdep.c
index 1c27e3e..1c27e3e 100644
--- a/sys/powerpc/aim/uma_machdep.c
+++ b/sys/powerpc/powerpc/uma_machdep.c
diff --git a/sys/sparc64/pci/apb.c b/sys/sparc64/pci/apb.c
index 9b3fa44..c0d6172 100644
--- a/sys/sparc64/pci/apb.c
+++ b/sys/sparc64/pci/apb.c
@@ -171,20 +171,14 @@ apb_attach(device_t dev)
* Get current bridge configuration.
*/
sc->sc_bsc.ops_pcib_sc.domain = pci_get_domain(dev);
- sc->sc_bsc.ops_pcib_sc.secstat =
- pci_read_config(dev, PCIR_SECSTAT_1, 2);
- sc->sc_bsc.ops_pcib_sc.command =
- pci_read_config(dev, PCIR_COMMAND, 2);
- sc->sc_bsc.ops_pcib_sc.pribus =
- pci_read_config(dev, PCIR_PRIBUS_1, 1);
+ sc->sc_bsc.ops_pcib_sc.pribus = pci_get_bus(dev);
+ pci_write_config(dev, PCIR_PRIBUS_1, sc->sc_bsc.ops_pcib_sc.pribus, 1);
sc->sc_bsc.ops_pcib_sc.bus.sec =
pci_read_config(dev, PCIR_SECBUS_1, 1);
sc->sc_bsc.ops_pcib_sc.bus.sub =
pci_read_config(dev, PCIR_SUBBUS_1, 1);
sc->sc_bsc.ops_pcib_sc.bridgectl =
pci_read_config(dev, PCIR_BRIDGECTL_1, 2);
- sc->sc_bsc.ops_pcib_sc.seclat =
- pci_read_config(dev, PCIR_SECLAT_1, 1);
sc->sc_iomap = pci_read_config(dev, APBR_IOMAP, 1);
sc->sc_memmap = pci_read_config(dev, APBR_MEMMAP, 1);
diff --git a/sys/sys/cdefs.h b/sys/sys/cdefs.h
index 553a899..ab7d59d 100644
--- a/sys/sys/cdefs.h
+++ b/sys/sys/cdefs.h
@@ -388,6 +388,12 @@
#define __alloc_size(x)
#endif
+#if __has_attribute(alloc_align) || __GNUC_PREREQ__(4, 9)
+#define __alloc_align(x) __attribute__((__alloc_align__(x)))
+#else
+#define __alloc_align(x)
+#endif
+
/* XXX: should use `#if __STDC_VERSION__ < 199901'. */
#if !__GNUC_PREREQ__(2, 7) && !defined(__INTEL_COMPILER)
#define __func__ NULL
@@ -763,6 +769,24 @@
#endif
/*
+ * Type Safety Checking
+ *
+ * Clang provides additional attributes to enable checking type safety
+ * properties that cannot be enforced by the C type system.
+ */
+
+#if __has_attribute(argument_with_type_tag) && \
+ __has_attribute(type_tag_for_datatype) && !defined(lint)
+#define __arg_type_tag(arg_kind, arg_idx, type_tag_idx) \
+ __attribute__((__argument_with_type_tag__(arg_kind, arg_idx, type_tag_idx)))
+#define __datatype_type_tag(kind, type) \
+ __attribute__((__type_tag_for_datatype__(kind, type)))
+#else
+#define __arg_type_tag(arg_kind, arg_idx, type_tag_idx)
+#define __datatype_type_tag(kind, type)
+#endif
+
+/*
* Lock annotations.
*
* Clang provides support for doing basic thread-safety tests at
diff --git a/sys/sys/nv.h b/sys/sys/nv.h
index 0876e57..5c342dc 100644
--- a/sys/sys/nv.h
+++ b/sys/sys/nv.h
@@ -75,6 +75,7 @@ nvlist_t *nvlist_create(int flags);
void nvlist_destroy(nvlist_t *nvl);
int nvlist_error(const nvlist_t *nvl);
bool nvlist_empty(const nvlist_t *nvl);
+int nvlist_flags(const nvlist_t *nvl);
void nvlist_set_error(nvlist_t *nvl, int error);
nvlist_t *nvlist_clone(const nvlist_t *nvl);
@@ -194,129 +195,6 @@ void nvlist_free_descriptor(nvlist_t *nvl, const char *name);
#endif
void nvlist_free_binary(nvlist_t *nvl, const char *name);
-/*
- * Below are the same functions, but which operate on format strings and
- * variable argument lists.
- *
- * Functions that are not inserting a new pair into the nvlist cannot handle
- * a failure to allocate the memory to hold the new name. Therefore these
- * functions are not provided in the kernel.
- */
-
-#ifndef _KERNEL
-bool nvlist_existsf(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-bool nvlist_existsf_type(const nvlist_t *nvl, int type, const char *namefmt, ...) __printflike(3, 4);
-
-bool nvlist_existsf_null(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-bool nvlist_existsf_bool(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-bool nvlist_existsf_number(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-bool nvlist_existsf_string(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-bool nvlist_existsf_nvlist(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-bool nvlist_existsf_descriptor(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-bool nvlist_existsf_binary(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-
-bool nvlist_existsv(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-bool nvlist_existsv_type(const nvlist_t *nvl, int type, const char *namefmt, va_list nameap) __printflike(3, 0);
-
-bool nvlist_existsv_null(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-bool nvlist_existsv_bool(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-bool nvlist_existsv_number(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-bool nvlist_existsv_string(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-bool nvlist_existsv_nvlist(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-bool nvlist_existsv_descriptor(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-bool nvlist_existsv_binary(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-#endif
-
-void nvlist_addf_null(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-void nvlist_addf_bool(nvlist_t *nvl, bool value, const char *namefmt, ...) __printflike(3, 4);
-void nvlist_addf_number(nvlist_t *nvl, uint64_t value, const char *namefmt, ...) __printflike(3, 4);
-void nvlist_addf_string(nvlist_t *nvl, const char *value, const char *namefmt, ...) __printflike(3, 4);
-void nvlist_addf_nvlist(nvlist_t *nvl, const nvlist_t *value, const char *namefmt, ...) __printflike(3, 4);
-#ifndef _KERNEL
-void nvlist_addf_descriptor(nvlist_t *nvl, int value, const char *namefmt, ...) __printflike(3, 4);
-#endif
-void nvlist_addf_binary(nvlist_t *nvl, const void *value, size_t size, const char *namefmt, ...) __printflike(4, 5);
-
-#if !defined(_KERNEL) || defined(_VA_LIST_DECLARED)
-void nvlist_addv_null(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-void nvlist_addv_bool(nvlist_t *nvl, bool value, const char *namefmt, va_list nameap) __printflike(3, 0);
-void nvlist_addv_number(nvlist_t *nvl, uint64_t value, const char *namefmt, va_list nameap) __printflike(3, 0);
-void nvlist_addv_string(nvlist_t *nvl, const char *value, const char *namefmt, va_list nameap) __printflike(3, 0);
-void nvlist_addv_nvlist(nvlist_t *nvl, const nvlist_t *value, const char *namefmt, va_list nameap) __printflike(3, 0);
-#ifndef _KERNEL
-void nvlist_addv_descriptor(nvlist_t *nvl, int value, const char *namefmt, va_list nameap) __printflike(3, 0);
-#endif
-void nvlist_addv_binary(nvlist_t *nvl, const void *value, size_t size, const char *namefmt, va_list nameap) __printflike(4, 0);
-#endif
-
-void nvlist_movef_string(nvlist_t *nvl, char *value, const char *namefmt, ...) __printflike(3, 4);
-void nvlist_movef_nvlist(nvlist_t *nvl, nvlist_t *value, const char *namefmt, ...) __printflike(3, 4);
-#ifndef _KERNEL
-void nvlist_movef_descriptor(nvlist_t *nvl, int value, const char *namefmt, ...) __printflike(3, 4);
-#endif
-void nvlist_movef_binary(nvlist_t *nvl, void *value, size_t size, const char *namefmt, ...) __printflike(4, 5);
-
-#if !defined(_KERNEL) || defined(_VA_LIST_DECLARED)
-void nvlist_movev_string(nvlist_t *nvl, char *value, const char *namefmt, va_list nameap) __printflike(3, 0);
-void nvlist_movev_nvlist(nvlist_t *nvl, nvlist_t *value, const char *namefmt, va_list nameap) __printflike(3, 0);
-#ifndef _KERNEL
-void nvlist_movev_descriptor(nvlist_t *nvl, int value, const char *namefmt, va_list nameap) __printflike(3, 0);
-#endif
-void nvlist_movev_binary(nvlist_t *nvl, void *value, size_t size, const char *namefmt, va_list nameap) __printflike(4, 0);
-#endif
-
-#ifndef _KERNEL
-bool nvlist_getf_bool(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-uint64_t nvlist_getf_number(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-const char *nvlist_getf_string(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-const nvlist_t *nvlist_getf_nvlist(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-int nvlist_getf_descriptor(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-const void *nvlist_getf_binary(const nvlist_t *nvl, size_t *sizep, const char *namefmt, ...) __printflike(3, 4);
-
-bool nvlist_getv_bool(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-uint64_t nvlist_getv_number(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-const char *nvlist_getv_string(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-const nvlist_t *nvlist_getv_nvlist(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-int nvlist_getv_descriptor(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-const void *nvlist_getv_binary(const nvlist_t *nvl, size_t *sizep, const char *namefmt, va_list nameap) __printflike(3, 0);
-
-bool nvlist_takef_bool(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-uint64_t nvlist_takef_number(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-char *nvlist_takef_string(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-nvlist_t *nvlist_takef_nvlist(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-int nvlist_takef_descriptor(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-void *nvlist_takef_binary(nvlist_t *nvl, size_t *sizep, const char *namefmt, ...) __printflike(3, 4);
-
-bool nvlist_takev_bool(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-uint64_t nvlist_takev_number(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-char *nvlist_takev_string(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-nvlist_t *nvlist_takev_nvlist(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-int nvlist_takev_descriptor(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-void *nvlist_takev_binary(nvlist_t *nvl, size_t *sizep, const char *namefmt, va_list nameap) __printflike(3, 0);
-
-void nvlist_freef(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-void nvlist_freef_type(nvlist_t *nvl, int type, const char *namefmt, ...) __printflike(3, 4);
-
-void nvlist_freef_null(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-void nvlist_freef_bool(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-void nvlist_freef_number(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-void nvlist_freef_string(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-void nvlist_freef_nvlist(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-void nvlist_freef_descriptor(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-void nvlist_freef_binary(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3);
-
-void nvlist_freev(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-void nvlist_freev_type(nvlist_t *nvl, int type, const char *namefmt, va_list nameap) __printflike(3, 0);
-
-void nvlist_freev_null(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-void nvlist_freev_bool(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-void nvlist_freev_number(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-void nvlist_freev_string(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-void nvlist_freev_nvlist(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-void nvlist_freev_descriptor(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-void nvlist_freev_binary(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0);
-#endif /* _KERNEL */
-
__END_DECLS
#endif /* !_NV_H_ */
diff --git a/sys/sys/nv_impl.h b/sys/sys/nv_impl.h
index da90e79..c088c3d 100644
--- a/sys/sys/nv_impl.h
+++ b/sys/sys/nv_impl.h
@@ -41,23 +41,24 @@ typedef struct nvpair nvpair_t;
#define NV_TYPE_NVLIST_UP 255
-#define NV_TYPE_FIRST NV_TYPE_NULL
-#define NV_TYPE_LAST NV_TYPE_BINARY
+#define NV_TYPE_FIRST NV_TYPE_NULL
+#define NV_TYPE_LAST NV_TYPE_BINARY
#define NV_FLAG_BIG_ENDIAN 0x80
#ifdef _KERNEL
-#define nv_malloc(size) malloc((size), M_NVLIST, M_NOWAIT)
+#define nv_malloc(size) malloc((size), M_NVLIST, M_WAITOK)
#define nv_calloc(n, size) malloc((n) * (size), M_NVLIST, \
- M_NOWAIT | M_ZERO)
+ M_WAITOK | M_ZERO)
#define nv_realloc(buf, size) realloc((buf), (size), M_NVLIST, \
- M_NOWAIT)
+ M_WAITOK)
#define nv_free(buf) free((buf), M_NVLIST)
#define nv_strdup(buf) strdup((buf), M_NVLIST)
#define nv_vasprintf(ptr, ...) vasprintf(ptr, M_NVLIST, __VA_ARGS__)
-#define SAVE_ERRNO(var) ((void)(var))
-#define RESTORE_ERRNO(var) ((void)(var))
+#define ERRNO_SET(var) do { } while (0)
+#define ERRNO_SAVE() do { do { } while(0)
+#define ERRNO_RESTORE() } while (0)
#define ERRNO_OR_DEFAULT(default) (default)
@@ -70,8 +71,14 @@ typedef struct nvpair nvpair_t;
#define nv_strdup(buf) strdup((buf))
#define nv_vasprintf(ptr, ...) vasprintf(ptr, __VA_ARGS__)
-#define SAVE_ERRNO(var) (var) = errno
-#define RESTORE_ERRNO(var) errno = (var)
+#define ERRNO_SET(var) do { errno = (var); } while (0)
+#define ERRNO_SAVE() do { \
+ int _serrno; \
+ \
+ _serrno = errno
+
+#define ERRNO_RESTORE() errno = _serrno; \
+ } while (0)
#define ERRNO_OR_DEFAULT(default) (errno == 0 ? (default) : errno)
@@ -128,30 +135,4 @@ const void *nvpair_get_binary(const nvpair_t *nvp, size_t *sizep);
void nvpair_free(nvpair_t *nvp);
-nvpair_t *nvpair_createf_null(const char *namefmt, ...) __printflike(1, 2);
-nvpair_t *nvpair_createf_bool(bool value, const char *namefmt, ...) __printflike(2, 3);
-nvpair_t *nvpair_createf_number(uint64_t value, const char *namefmt, ...) __printflike(2, 3);
-nvpair_t *nvpair_createf_string(const char *value, const char *namefmt, ...) __printflike(2, 3);
-nvpair_t *nvpair_createf_nvlist(const nvlist_t *value, const char *namefmt, ...) __printflike(2, 3);
-nvpair_t *nvpair_createf_descriptor(int value, const char *namefmt, ...) __printflike(2, 3);
-nvpair_t *nvpair_createf_binary(const void *value, size_t size, const char *namefmt, ...) __printflike(3, 4);
-
-nvpair_t *nvpair_createv_null(const char *namefmt, va_list nameap) __printflike(1, 0);
-nvpair_t *nvpair_createv_bool(bool value, const char *namefmt, va_list nameap) __printflike(2, 0);
-nvpair_t *nvpair_createv_number(uint64_t value, const char *namefmt, va_list nameap) __printflike(2, 0);
-nvpair_t *nvpair_createv_string(const char *value, const char *namefmt, va_list nameap) __printflike(2, 0);
-nvpair_t *nvpair_createv_nvlist(const nvlist_t *value, const char *namefmt, va_list nameap) __printflike(2, 0);
-nvpair_t *nvpair_createv_descriptor(int value, const char *namefmt, va_list nameap) __printflike(2, 0);
-nvpair_t *nvpair_createv_binary(const void *value, size_t size, const char *namefmt, va_list nameap) __printflike(3, 0);
-
-nvpair_t *nvpair_movef_string(char *value, const char *namefmt, ...) __printflike(2, 3);
-nvpair_t *nvpair_movef_nvlist(nvlist_t *value, const char *namefmt, ...) __printflike(2, 3);
-nvpair_t *nvpair_movef_descriptor(int value, const char *namefmt, ...) __printflike(2, 3);
-nvpair_t *nvpair_movef_binary(void *value, size_t size, const char *namefmt, ...) __printflike(3, 4);
-
-nvpair_t *nvpair_movev_string(char *value, const char *namefmt, va_list nameap) __printflike(2, 0);
-nvpair_t *nvpair_movev_nvlist(nvlist_t *value, const char *namefmt, va_list nameap) __printflike(2, 0);
-nvpair_t *nvpair_movev_descriptor(int value, const char *namefmt, va_list nameap) __printflike(2, 0);
-nvpair_t *nvpair_movev_binary(void *value, size_t size, const char *namefmt, va_list nameap) __printflike(3, 0);
-
#endif /* !_NV_IMPL_H_ */
diff --git a/sys/sys/nvlist_impl.h b/sys/sys/nvlist_impl.h
index a59a90e..8aeac67 100644
--- a/sys/sys/nvlist_impl.h
+++ b/sys/sys/nvlist_impl.h
@@ -38,10 +38,6 @@
#include "nv.h"
-void *nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep);
-nvlist_t *nvlist_xunpack(const void *buf, size_t size, const int *fds,
- size_t nfds);
-
nvpair_t *nvlist_get_nvpair_parent(const nvlist_t *nvl);
const unsigned char *nvlist_unpack_header(nvlist_t *nvl,
const unsigned char *ptr, size_t nfds, bool *isbep, size_t *leftp);
diff --git a/sys/sys/param.h b/sys/sys/param.h
index 33eaa71..867dbf1 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -58,7 +58,7 @@
* in the range 5 to 9.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 1100070 /* Master, propagated to newvers */
+#define __FreeBSD_version 1100071 /* Master, propagated to newvers */
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
@@ -233,10 +233,19 @@
* and may be made smaller at the risk of not being able to use
* filesystems which require a block size exceeding MAXBSIZE.
*
+ * MAXBCACHEBUF - Maximum size of a buffer in the buffer cache. This must
+ * be >= MAXBSIZE and can be set differently for different
+ * architectures by defining it in <machine/param.h>.
+ * Making this larger allows NFS to do larger reads/writes.
+ *
* BKVASIZE - Nominal buffer space per buffer, in bytes. BKVASIZE is the
* minimum KVM memory reservation the kernel is willing to make.
* Filesystems can of course request smaller chunks. Actual
* backing memory uses a chunk size of a page (PAGE_SIZE).
+ * The default value here can be overridden on a per-architecture
+ * basis by defining it in <machine/param.h>. This should
+ * probably be done to increase its value, when MAXBCACHEBUF is
+ * defined as a larger value in <machine/param.h>.
*
* If you make BKVASIZE too small you risk seriously fragmenting
* the buffer KVM map which may slow things down a bit. If you
@@ -248,7 +257,12 @@
* normal UFS filesystem.
*/
#define MAXBSIZE 65536 /* must be power of 2 */
+#ifndef MAXBCACHEBUF
+#define MAXBCACHEBUF MAXBSIZE /* must be a power of 2 >= MAXBSIZE */
+#endif
+#ifndef BKVASIZE
#define BKVASIZE 16384 /* must be power of 2 */
+#endif
#define BKVAMASK (BKVASIZE-1)
/*
diff --git a/sys/sys/procctl.h b/sys/sys/procctl.h
index c57eee6..75dbf53 100644
--- a/sys/sys/procctl.h
+++ b/sys/sys/procctl.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2013 Advanced Computing Technologies LLC
+ * Copyright (c) 2013 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
*
diff --git a/sys/sys/racct.h b/sys/sys/racct.h
index 966d27a..313d5d0 100644
--- a/sys/sys/racct.h
+++ b/sys/sys/racct.h
@@ -83,6 +83,10 @@ struct ucred;
#define RACCT_DECAYING 0x20
extern int racct_types[];
+extern int racct_enable;
+
+#define ASSERT_RACCT_ENABLED() KASSERT(racct_enable, \
+ ("%s called with !racct_enable", __func__))
/*
* Amount stored in c_resources[] is 10**6 times bigger than what's
diff --git a/sys/sys/seq.h b/sys/sys/seq.h
index eaab86e..21067cb 100644
--- a/sys/sys/seq.h
+++ b/sys/sys/seq.h
@@ -79,7 +79,7 @@ typedef uint32_t seq_t;
* on amd64 but still has unnecessary cost.
*/
static __inline int
-atomic_load_rmb_int(volatile u_int *p)
+atomic_load_rmb_int(volatile const u_int *p)
{
volatile u_int v;
@@ -89,7 +89,7 @@ atomic_load_rmb_int(volatile u_int *p)
}
static __inline int
-atomic_rmb_load_int(volatile u_int *p)
+atomic_rmb_load_int(volatile const u_int *p)
{
volatile u_int v = 0;
@@ -122,7 +122,7 @@ seq_write_end(seq_t *seqp)
}
static __inline seq_t
-seq_read(seq_t *seqp)
+seq_read(const seq_t *seqp)
{
seq_t ret;
@@ -139,7 +139,7 @@ seq_read(seq_t *seqp)
}
static __inline seq_t
-seq_consistent(seq_t *seqp, seq_t oldseq)
+seq_consistent(const seq_t *seqp, seq_t oldseq)
{
return (atomic_rmb_load_int(seqp) == oldseq);
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index 8a358ce..786414f 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -187,6 +187,7 @@ void *phashinit(int count, struct malloc_type *type, u_long *nentries);
void g_waitidle(void);
void panic(const char *, ...) __dead2 __printflike(1, 2);
+void vpanic(const char *, __va_list) __dead2 __printflike(1, 0);
void cpu_boot(int);
void cpu_flush_dcache(void *, size_t);
diff --git a/sys/net/zlib.h b/sys/sys/zlib.h
index 16edae1..16edae1 100644
--- a/sys/net/zlib.h
+++ b/sys/sys/zlib.h
diff --git a/sys/net/zutil.h b/sys/sys/zutil.h
index 74f0221..1431b6f 100644
--- a/sys/net/zutil.h
+++ b/sys/sys/zutil.h
@@ -17,7 +17,7 @@
#define ZEXPORT
#ifdef _KERNEL
-#include <net/zlib.h>
+#include <sys/zlib.h>
#else
#include "zlib.h"
#endif
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index c918cd7..c384064 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -112,8 +112,7 @@ static void ffs_blkfree_trim_task(void *ctx, int pending __unused);
#ifdef INVARIANTS
static int ffs_checkblk(struct inode *, ufs2_daddr_t, long);
#endif
-static ufs2_daddr_t ffs_clusteralloc(struct inode *, u_int, ufs2_daddr_t, int,
- int);
+static ufs2_daddr_t ffs_clusteralloc(struct inode *, u_int, ufs2_daddr_t, int);
static ino_t ffs_dirpref(struct inode *);
static ufs2_daddr_t ffs_fragextend(struct inode *, u_int, ufs2_daddr_t,
int, int);
@@ -460,10 +459,16 @@ nospace:
SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem");
static int doasyncfree = 1;
-SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, "");
+SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0,
+"do not force synchronous writes when blocks are reallocated");
static int doreallocblks = 1;
-SYSCTL_INT(_vfs_ffs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
+SYSCTL_INT(_vfs_ffs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0,
+"enable block reallocation");
+
+static int maxclustersearch = 10;
+SYSCTL_INT(_vfs_ffs, OID_AUTO, maxclustersearch, CTLFLAG_RW, &maxclustersearch,
+0, "max number of cylinder group to search for contigous blocks");
#ifdef DEBUG
static volatile int prtrealloc = 0;
@@ -510,7 +515,7 @@ ffs_reallocblks_ufs1(ap)
ufs1_daddr_t soff, newblk, blkno;
ufs2_daddr_t pref;
struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
- int i, len, start_lvl, end_lvl, ssize;
+ int i, cg, len, start_lvl, end_lvl, ssize;
vp = ap->a_vp;
ip = VTOI(vp);
@@ -597,18 +602,39 @@ ffs_reallocblks_ufs1(ap)
ebap = (ufs1_daddr_t *)ebp->b_data;
}
/*
- * Find the preferred location for the cluster.
+ * Find the preferred location for the cluster. If we have not
+ * previously failed at this endeavor, then follow our standard
+ * preference calculation. If we have failed at it, then pick up
+ * where we last ended our search.
*/
UFS_LOCK(ump);
- pref = ffs_blkpref_ufs1(ip, start_lbn, soff, sbap);
+ if (ip->i_nextclustercg == -1)
+ pref = ffs_blkpref_ufs1(ip, start_lbn, soff, sbap);
+ else
+ pref = cgdata(fs, ip->i_nextclustercg);
/*
* Search the block map looking for an allocation of the desired size.
+ * To avoid wasting too much time, we limit the number of cylinder
+ * groups that we will search.
+ */
+ cg = dtog(fs, pref);
+ for (i = min(maxclustersearch, fs->fs_ncg); i > 0; i--) {
+ if ((newblk = ffs_clusteralloc(ip, cg, pref, len)) != 0)
+ break;
+ cg += 1;
+ if (cg >= fs->fs_ncg)
+ cg = 0;
+ }
+ /*
+ * If we have failed in our search, record where we gave up for
+ * next time. Otherwise, fall back to our usual search citerion.
*/
- if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref,
- len, len, ffs_clusteralloc)) == 0) {
+ if (newblk == 0) {
+ ip->i_nextclustercg = cg;
UFS_UNLOCK(ump);
goto fail;
}
+ ip->i_nextclustercg = -1;
/*
* We have found a new contiguous block.
*
@@ -737,7 +763,7 @@ ffs_reallocblks_ufs2(ap)
ufs_lbn_t start_lbn, end_lbn;
ufs2_daddr_t soff, newblk, blkno, pref;
struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
- int i, len, start_lvl, end_lvl, ssize;
+ int i, cg, len, start_lvl, end_lvl, ssize;
vp = ap->a_vp;
ip = VTOI(vp);
@@ -824,18 +850,39 @@ ffs_reallocblks_ufs2(ap)
ebap = (ufs2_daddr_t *)ebp->b_data;
}
/*
- * Find the preferred location for the cluster.
+ * Find the preferred location for the cluster. If we have not
+ * previously failed at this endeavor, then follow our standard
+ * preference calculation. If we have failed at it, then pick up
+ * where we last ended our search.
*/
UFS_LOCK(ump);
- pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap);
+ if (ip->i_nextclustercg == -1)
+ pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap);
+ else
+ pref = cgdata(fs, ip->i_nextclustercg);
/*
* Search the block map looking for an allocation of the desired size.
+ * To avoid wasting too much time, we limit the number of cylinder
+ * groups that we will search.
+ */
+ cg = dtog(fs, pref);
+ for (i = min(maxclustersearch, fs->fs_ncg); i > 0; i--) {
+ if ((newblk = ffs_clusteralloc(ip, cg, pref, len)) != 0)
+ break;
+ cg += 1;
+ if (cg >= fs->fs_ncg)
+ cg = 0;
+ }
+ /*
+ * If we have failed in our search, record where we gave up for
+ * next time. Otherwise, fall back to our usual search citerion.
*/
- if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref,
- len, len, ffs_clusteralloc)) == 0) {
+ if (newblk == 0) {
+ ip->i_nextclustercg = cg;
UFS_UNLOCK(ump);
goto fail;
}
+ ip->i_nextclustercg = -1;
/*
* We have found a new contiguous block.
*
@@ -1786,12 +1833,11 @@ gotit:
* take the first one that we find following bpref.
*/
static ufs2_daddr_t
-ffs_clusteralloc(ip, cg, bpref, len, unused)
+ffs_clusteralloc(ip, cg, bpref, len)
struct inode *ip;
u_int cg;
ufs2_daddr_t bpref;
int len;
- int unused;
{
struct fs *fs;
struct cg *cgp;
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index 88f952f..2a368bd 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -1688,6 +1688,7 @@ ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
ip->i_dev = dev;
ip->i_number = ino;
ip->i_ea_refs = 0;
+ ip->i_nextclustercg = -1;
#ifdef QUOTA
{
int i;
diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
index 37081d5..cd7c472 100644
--- a/sys/ufs/ufs/inode.h
+++ b/sys/ufs/ufs/inode.h
@@ -87,6 +87,8 @@ struct inode {
daddr_t *snapblklist; /* Collect expunged snapshot blocks. */
} i_un;
+ int i_nextclustercg; /* last cg searched for cluster */
+
/*
* Data for extended attribute modification.
*/
diff --git a/sys/vm/sg_pager.c b/sys/vm/sg_pager.c
index 08e8fc7..e35741e 100644
--- a/sys/vm/sg_pager.c
+++ b/sys/vm/sg_pager.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009 Advanced Computing Technologies LLC
+ * Copyright (c) 2009 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
*
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 55e02c4..15e5b24 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -198,11 +198,13 @@ swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred)
panic("swap_reserve: & PAGE_MASK");
#ifdef RACCT
- PROC_LOCK(curproc);
- error = racct_add(curproc, RACCT_SWAP, incr);
- PROC_UNLOCK(curproc);
- if (error != 0)
- return (0);
+ if (racct_enable) {
+ PROC_LOCK(curproc);
+ error = racct_add(curproc, RACCT_SWAP, incr);
+ PROC_UNLOCK(curproc);
+ if (error != 0)
+ return (0);
+ }
#endif
res = 0;
diff --git a/sys/vm/uma_int.h b/sys/vm/uma_int.h
index 11ab24f..ad2a405 100644
--- a/sys/vm/uma_int.h
+++ b/sys/vm/uma_int.h
@@ -311,7 +311,7 @@ struct uma_zone {
* This HAS to be the last item because we adjust the zone size
* based on NCPU and then allocate the space for the zones.
*/
- struct uma_cache uz_cpu[]; /* Per cpu caches */
+ struct uma_cache uz_cpu[1]; /* Per cpu caches */
};
/*
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index 13f46e1..0c84d44 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -348,6 +348,10 @@ RetryFault:;
vm_map_lock(fs.map);
if (vm_map_lookup_entry(fs.map, vaddr, &fs.entry) &&
(fs.entry->eflags & MAP_ENTRY_IN_TRANSITION)) {
+ if (fs.vp != NULL) {
+ vput(fs.vp);
+ fs.vp = NULL;
+ }
fs.entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
vm_map_unlock_and_wait(fs.map, 0);
} else
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index b7e668b..bad9994 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -300,11 +300,11 @@ vmspace_alloc(vm_offset_t min, vm_offset_t max, pmap_pinit_t pinit)
return (vm);
}
+#ifdef RACCT
static void
vmspace_container_reset(struct proc *p)
{
-#ifdef RACCT
PROC_LOCK(p);
racct_set(p, RACCT_DATA, 0);
racct_set(p, RACCT_STACK, 0);
@@ -312,8 +312,8 @@ vmspace_container_reset(struct proc *p)
racct_set(p, RACCT_MEMLOCK, 0);
racct_set(p, RACCT_VMEM, 0);
PROC_UNLOCK(p);
-#endif
}
+#endif
static inline void
vmspace_dofree(struct vmspace *vm)
@@ -415,7 +415,10 @@ vmspace_exit(struct thread *td)
pmap_activate(td);
vmspace_dofree(vm);
}
- vmspace_container_reset(p);
+#ifdef RACCT
+ if (racct_enable)
+ vmspace_container_reset(p);
+#endif
}
/* Acquire reference to vmspace owned by another process. */
@@ -3652,14 +3655,16 @@ Retry:
return (KERN_NO_SPACE);
}
#ifdef RACCT
- PROC_LOCK(p);
- if (is_procstack &&
- racct_set(p, RACCT_STACK, ctob(vm->vm_ssize) + grow_amount)) {
+ if (racct_enable) {
+ PROC_LOCK(p);
+ if (is_procstack && racct_set(p, RACCT_STACK,
+ ctob(vm->vm_ssize) + grow_amount)) {
+ PROC_UNLOCK(p);
+ vm_map_unlock_read(map);
+ return (KERN_NO_SPACE);
+ }
PROC_UNLOCK(p);
- vm_map_unlock_read(map);
- return (KERN_NO_SPACE);
}
- PROC_UNLOCK(p);
#endif
/* Round up the grow amount modulo sgrowsiz */
@@ -3685,15 +3690,17 @@ Retry:
goto out;
}
#ifdef RACCT
- PROC_LOCK(p);
- if (racct_set(p, RACCT_MEMLOCK,
- ptoa(pmap_wired_count(map->pmap)) + grow_amount)) {
+ if (racct_enable) {
+ PROC_LOCK(p);
+ if (racct_set(p, RACCT_MEMLOCK,
+ ptoa(pmap_wired_count(map->pmap)) + grow_amount)) {
+ PROC_UNLOCK(p);
+ vm_map_unlock_read(map);
+ rv = KERN_NO_SPACE;
+ goto out;
+ }
PROC_UNLOCK(p);
- vm_map_unlock_read(map);
- rv = KERN_NO_SPACE;
- goto out;
}
- PROC_UNLOCK(p);
#endif
}
/* If we would blow our VMEM resource limit, no go */
@@ -3703,14 +3710,16 @@ Retry:
goto out;
}
#ifdef RACCT
- PROC_LOCK(p);
- if (racct_set(p, RACCT_VMEM, map->size + grow_amount)) {
+ if (racct_enable) {
+ PROC_LOCK(p);
+ if (racct_set(p, RACCT_VMEM, map->size + grow_amount)) {
+ PROC_UNLOCK(p);
+ vm_map_unlock_read(map);
+ rv = KERN_NO_SPACE;
+ goto out;
+ }
PROC_UNLOCK(p);
- vm_map_unlock_read(map);
- rv = KERN_NO_SPACE;
- goto out;
}
- PROC_UNLOCK(p);
#endif
if (vm_map_lock_upgrade(map))
@@ -3809,7 +3818,7 @@ Retry:
out:
#ifdef RACCT
- if (rv != KERN_SUCCESS) {
+ if (racct_enable && rv != KERN_SUCCESS) {
PROC_LOCK(p);
error = racct_set(p, RACCT_VMEM, map->size);
KASSERT(error == 0, ("decreasing RACCT_VMEM failed"));
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 02634d6..1dd2479 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -1122,16 +1122,18 @@ vm_mlock(struct proc *proc, struct ucred *cred, const void *addr0, size_t len)
if (npages + vm_cnt.v_wire_count > vm_page_max_wired)
return (EAGAIN);
#ifdef RACCT
- PROC_LOCK(proc);
- error = racct_set(proc, RACCT_MEMLOCK, nsize);
- PROC_UNLOCK(proc);
- if (error != 0)
- return (ENOMEM);
+ if (racct_enable) {
+ PROC_LOCK(proc);
+ error = racct_set(proc, RACCT_MEMLOCK, nsize);
+ PROC_UNLOCK(proc);
+ if (error != 0)
+ return (ENOMEM);
+ }
#endif
error = vm_map_wire(map, start, end,
VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
#ifdef RACCT
- if (error != KERN_SUCCESS) {
+ if (racct_enable && error != KERN_SUCCESS) {
PROC_LOCK(proc);
racct_set(proc, RACCT_MEMLOCK,
ptoa(pmap_wired_count(map->pmap)));
@@ -1179,11 +1181,13 @@ sys_mlockall(td, uap)
PROC_UNLOCK(td->td_proc);
}
#ifdef RACCT
- PROC_LOCK(td->td_proc);
- error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size);
- PROC_UNLOCK(td->td_proc);
- if (error != 0)
- return (ENOMEM);
+ if (racct_enable) {
+ PROC_LOCK(td->td_proc);
+ error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size);
+ PROC_UNLOCK(td->td_proc);
+ if (error != 0)
+ return (ENOMEM);
+ }
#endif
if (uap->how & MCL_FUTURE) {
@@ -1205,7 +1209,7 @@ sys_mlockall(td, uap)
error = (error == KERN_SUCCESS ? 0 : EAGAIN);
}
#ifdef RACCT
- if (error != KERN_SUCCESS) {
+ if (racct_enable && error != KERN_SUCCESS) {
PROC_LOCK(td->td_proc);
racct_set(td->td_proc, RACCT_MEMLOCK,
ptoa(pmap_wired_count(map->pmap)));
@@ -1247,7 +1251,7 @@ sys_munlockall(td, uap)
error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map),
VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK);
#ifdef RACCT
- if (error == KERN_SUCCESS) {
+ if (racct_enable && error == KERN_SUCCESS) {
PROC_LOCK(td->td_proc);
racct_set(td->td_proc, RACCT_MEMLOCK, 0);
PROC_UNLOCK(td->td_proc);
@@ -1291,7 +1295,7 @@ sys_munlock(td, uap)
error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end,
VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
#ifdef RACCT
- if (error == KERN_SUCCESS) {
+ if (racct_enable && error == KERN_SUCCESS) {
PROC_LOCK(td->td_proc);
map = &td->td_proc->p_vmspace->vm_map;
racct_set(td->td_proc, RACCT_MEMLOCK,
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 64ca130..0aec01f 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -91,12 +91,14 @@ __FBSDID("$FreeBSD$");
#include <sys/lock.h>
#include <sys/kernel.h>
#include <sys/limits.h>
+#include <sys/linker.h>
#include <sys/malloc.h>
#include <sys/mman.h>
#include <sys/msgbuf.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/rwlock.h>
+#include <sys/sbuf.h>
#include <sys/sysctl.h>
#include <sys/vmmeter.h>
#include <sys/vnode.h>
@@ -142,6 +144,12 @@ static int pa_tryrelock_restart;
SYSCTL_INT(_vm, OID_AUTO, tryrelock_restart, CTLFLAG_RD,
&pa_tryrelock_restart, 0, "Number of tryrelock restarts");
+static TAILQ_HEAD(, vm_page) blacklist_head;
+static int sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD |
+ CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_page_blacklist, "A", "Blacklist pages");
+
+
static uma_zone_t fakepg_zone;
static struct vnode *vm_page_alloc_init(vm_page_t m);
@@ -216,35 +224,153 @@ vm_set_page_size(void)
}
/*
- * vm_page_blacklist_lookup:
+ * vm_page_blacklist_next:
*
- * See if a physical address in this page has been listed
- * in the blacklist tunable. Entries in the tunable are
- * separated by spaces or commas. If an invalid integer is
- * encountered then the rest of the string is skipped.
+ * Find the next entry in the provided string of blacklist
+ * addresses. Entries are separated by space, comma, or newline.
+ * If an invalid integer is encountered then the rest of the
+ * string is skipped. Updates the list pointer to the next
+ * character, or NULL if the string is exhausted or invalid.
*/
-static int
-vm_page_blacklist_lookup(char *list, vm_paddr_t pa)
+static vm_paddr_t
+vm_page_blacklist_next(char **list, char *end)
{
vm_paddr_t bad;
char *cp, *pos;
- for (pos = list; *pos != '\0'; pos = cp) {
+ if (list == NULL || *list == NULL)
+ return (0);
+ if (**list =='\0') {
+ *list = NULL;
+ return (0);
+ }
+
+ /*
+ * If there's no end pointer then the buffer is coming from
+ * the kenv and we know it's null-terminated.
+ */
+ if (end == NULL)
+ end = *list + strlen(*list);
+
+ /* Ensure that strtoq() won't walk off the end */
+ if (*end != '\0') {
+ if (*end == '\n' || *end == ' ' || *end == ',')
+ *end = '\0';
+ else {
+ printf("Blacklist not terminated, skipping\n");
+ *list = NULL;
+ return (0);
+ }
+ }
+
+ for (pos = *list; *pos != '\0'; pos = cp) {
bad = strtoq(pos, &cp, 0);
- if (*cp != '\0') {
- if (*cp == ' ' || *cp == ',') {
- cp++;
- if (cp == pos)
+ if (*cp == '\0' || *cp == ' ' || *cp == ',' || *cp == '\n') {
+ if (bad == 0) {
+ if (++cp < end)
continue;
- } else
- break;
- }
- if (pa == trunc_page(bad))
- return (1);
+ else
+ break;
+ }
+ } else
+ break;
+ if (*cp == '\0' || ++cp >= end)
+ *list = NULL;
+ else
+ *list = cp;
+ return (trunc_page(bad));
}
+ printf("Garbage in RAM blacklist, skipping\n");
+ *list = NULL;
return (0);
}
+/*
+ * vm_page_blacklist_check:
+ *
+ * Iterate through the provided string of blacklist addresses, pulling
+ * each entry out of the physical allocator free list and putting it
+ * onto a list for reporting via the vm.page_blacklist sysctl.
+ */
+static void
+vm_page_blacklist_check(char *list, char *end)
+{
+ vm_paddr_t pa;
+ vm_page_t m;
+ char *next;
+ int ret;
+
+ next = list;
+ while (next != NULL) {
+ if ((pa = vm_page_blacklist_next(&next, end)) == 0)
+ continue;
+ m = vm_phys_paddr_to_vm_page(pa);
+ if (m == NULL)
+ continue;
+ mtx_lock(&vm_page_queue_free_mtx);
+ ret = vm_phys_unfree_page(m);
+ mtx_unlock(&vm_page_queue_free_mtx);
+ if (ret == TRUE) {
+ TAILQ_INSERT_TAIL(&blacklist_head, m, listq);
+ if (bootverbose)
+ printf("Skipping page with pa 0x%jx\n",
+ (uintmax_t)pa);
+ }
+ }
+}
+
+/*
+ * vm_page_blacklist_load:
+ *
+ * Search for a special module named "ram_blacklist". It'll be a
+ * plain text file provided by the user via the loader directive
+ * of the same name.
+ */
+static void
+vm_page_blacklist_load(char **list, char **end)
+{
+ void *mod;
+ u_char *ptr;
+ u_int len;
+
+ mod = NULL;
+ ptr = NULL;
+
+ mod = preload_search_by_type("ram_blacklist");
+ if (mod != NULL) {
+ ptr = preload_fetch_addr(mod);
+ len = preload_fetch_size(mod);
+ }
+ *list = ptr;
+ if (ptr != NULL)
+ *end = ptr + len;
+ else
+ *end = NULL;
+ return;
+}
+
+static int
+sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS)
+{
+ vm_page_t m;
+ struct sbuf sbuf;
+ int error, first;
+
+ first = 1;
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error != 0)
+ return (error);
+ sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
+ TAILQ_FOREACH(m, &blacklist_head, listq) {
+ sbuf_printf(&sbuf, "%s%#jx", first ? "" : ",",
+ (uintmax_t)m->phys_addr);
+ first = 0;
+ }
+ error = sbuf_finish(&sbuf);
+ sbuf_delete(&sbuf);
+ return (error);
+}
+
static void
vm_page_domain_init(struct vm_domain *vmd)
{
@@ -290,7 +416,7 @@ vm_page_startup(vm_offset_t vaddr)
int i;
vm_paddr_t pa;
vm_paddr_t last_pa;
- char *list;
+ char *list, *listend;
vm_paddr_t end;
vm_paddr_t biggestsize;
vm_paddr_t low_water, high_water;
@@ -305,14 +431,6 @@ vm_page_startup(vm_offset_t vaddr)
phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
}
-#ifdef XEN
- /*
- * There is no obvious reason why i386 PV Xen needs vm_page structs
- * created for these pseudo-physical addresses. XXX
- */
- vm_phys_add_seg(0, phys_avail[0]);
-#endif
-
low_water = phys_avail[0];
high_water = phys_avail[1];
@@ -477,20 +595,22 @@ vm_page_startup(vm_offset_t vaddr)
*/
vm_cnt.v_page_count = 0;
vm_cnt.v_free_count = 0;
- list = kern_getenv("vm.blacklist");
for (i = 0; phys_avail[i + 1] != 0; i += 2) {
pa = phys_avail[i];
last_pa = phys_avail[i + 1];
while (pa < last_pa) {
- if (list != NULL &&
- vm_page_blacklist_lookup(list, pa))
- printf("Skipping page with pa 0x%jx\n",
- (uintmax_t)pa);
- else
- vm_phys_add_page(pa);
+ vm_phys_add_page(pa);
pa += PAGE_SIZE;
}
}
+
+ TAILQ_INIT(&blacklist_head);
+ vm_page_blacklist_load(&list, &listend);
+ vm_page_blacklist_check(list, listend);
+
+ list = kern_getenv("vm.blacklist");
+ vm_page_blacklist_check(list, NULL);
+
freeenv(list);
#if VM_NRESERVLEVEL > 0
/*
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 6f50053..872ffd8 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -1787,11 +1787,13 @@ vm_daemon(void)
while (TRUE) {
mtx_lock(&vm_daemon_mtx);
+ msleep(&vm_daemon_needed, &vm_daemon_mtx, PPAUSE, "psleep",
#ifdef RACCT
- msleep(&vm_daemon_needed, &vm_daemon_mtx, PPAUSE, "psleep", hz);
+ racct_enable ? hz : 0
#else
- msleep(&vm_daemon_needed, &vm_daemon_mtx, PPAUSE, "psleep", 0);
+ 0
#endif
+ );
swapout_flags = vm_pageout_req_swapout;
vm_pageout_req_swapout = 0;
mtx_unlock(&vm_daemon_mtx);
@@ -1866,33 +1868,40 @@ again:
&vm->vm_map, limit);
}
#ifdef RACCT
- rsize = IDX_TO_OFF(size);
- PROC_LOCK(p);
- racct_set(p, RACCT_RSS, rsize);
- ravailable = racct_get_available(p, RACCT_RSS);
- PROC_UNLOCK(p);
- if (rsize > ravailable) {
- /*
- * Don't be overly aggressive; this might be
- * an innocent process, and the limit could've
- * been exceeded by some memory hog. Don't
- * try to deactivate more than 1/4th of process'
- * resident set size.
- */
- if (attempts <= 8) {
- if (ravailable < rsize - (rsize / 4))
- ravailable = rsize - (rsize / 4);
- }
- vm_pageout_map_deactivate_pages(
- &vm->vm_map, OFF_TO_IDX(ravailable));
- /* Update RSS usage after paging out. */
- size = vmspace_resident_count(vm);
+ if (racct_enable) {
rsize = IDX_TO_OFF(size);
PROC_LOCK(p);
racct_set(p, RACCT_RSS, rsize);
+ ravailable = racct_get_available(p, RACCT_RSS);
PROC_UNLOCK(p);
- if (rsize > ravailable)
- tryagain = 1;
+ if (rsize > ravailable) {
+ /*
+ * Don't be overly aggressive; this
+ * might be an innocent process,
+ * and the limit could've been exceeded
+ * by some memory hog. Don't try
+ * to deactivate more than 1/4th
+ * of process' resident set size.
+ */
+ if (attempts <= 8) {
+ if (ravailable < rsize -
+ (rsize / 4)) {
+ ravailable = rsize -
+ (rsize / 4);
+ }
+ }
+ vm_pageout_map_deactivate_pages(
+ &vm->vm_map,
+ OFF_TO_IDX(ravailable));
+ /* Update RSS usage after paging out. */
+ size = vmspace_resident_count(vm);
+ rsize = IDX_TO_OFF(size);
+ PROC_LOCK(p);
+ racct_set(p, RACCT_RSS, rsize);
+ PROC_UNLOCK(p);
+ if (rsize > ravailable)
+ tryagain = 1;
+ }
}
#endif
vmspace_free(vm);
diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c
index de9aa78..cc77ff7 100644
--- a/sys/vm/vm_unix.c
+++ b/sys/vm/vm_unix.c
@@ -130,35 +130,39 @@ sys_obreak(td, uap)
goto done;
}
#ifdef RACCT
- PROC_LOCK(td->td_proc);
- error = racct_set(td->td_proc, RACCT_DATA, new - base);
- if (error != 0) {
- PROC_UNLOCK(td->td_proc);
- error = ENOMEM;
- goto done;
- }
- error = racct_set(td->td_proc, RACCT_VMEM,
- map->size + (new - old));
- if (error != 0) {
- racct_set_force(td->td_proc, RACCT_DATA, old - base);
- PROC_UNLOCK(td->td_proc);
- error = ENOMEM;
- goto done;
- }
- if (!old_mlock && map->flags & MAP_WIREFUTURE) {
- error = racct_set(td->td_proc, RACCT_MEMLOCK,
- ptoa(pmap_wired_count(map->pmap)) + (new - old));
+ if (racct_enable) {
+ PROC_LOCK(td->td_proc);
+ error = racct_set(td->td_proc, RACCT_DATA, new - base);
+ if (error != 0) {
+ PROC_UNLOCK(td->td_proc);
+ error = ENOMEM;
+ goto done;
+ }
+ error = racct_set(td->td_proc, RACCT_VMEM,
+ map->size + (new - old));
if (error != 0) {
racct_set_force(td->td_proc, RACCT_DATA,
old - base);
- racct_set_force(td->td_proc, RACCT_VMEM,
- map->size);
PROC_UNLOCK(td->td_proc);
error = ENOMEM;
goto done;
}
+ if (!old_mlock && map->flags & MAP_WIREFUTURE) {
+ error = racct_set(td->td_proc, RACCT_MEMLOCK,
+ ptoa(pmap_wired_count(map->pmap)) +
+ (new - old));
+ if (error != 0) {
+ racct_set_force(td->td_proc, RACCT_DATA,
+ old - base);
+ racct_set_force(td->td_proc, RACCT_VMEM,
+ map->size);
+ PROC_UNLOCK(td->td_proc);
+ error = ENOMEM;
+ goto done;
+ }
+ }
+ PROC_UNLOCK(td->td_proc);
}
- PROC_UNLOCK(td->td_proc);
#endif
prot = VM_PROT_RW;
#ifdef COMPAT_FREEBSD32
@@ -170,14 +174,19 @@ sys_obreak(td, uap)
rv = vm_map_insert(map, NULL, 0, old, new, prot, VM_PROT_ALL, 0);
if (rv != KERN_SUCCESS) {
#ifdef RACCT
- PROC_LOCK(td->td_proc);
- racct_set_force(td->td_proc, RACCT_DATA, old - base);
- racct_set_force(td->td_proc, RACCT_VMEM, map->size);
- if (!old_mlock && map->flags & MAP_WIREFUTURE) {
- racct_set_force(td->td_proc, RACCT_MEMLOCK,
- ptoa(pmap_wired_count(map->pmap)));
+ if (racct_enable) {
+ PROC_LOCK(td->td_proc);
+ racct_set_force(td->td_proc,
+ RACCT_DATA, old - base);
+ racct_set_force(td->td_proc,
+ RACCT_VMEM, map->size);
+ if (!old_mlock && map->flags & MAP_WIREFUTURE) {
+ racct_set_force(td->td_proc,
+ RACCT_MEMLOCK,
+ ptoa(pmap_wired_count(map->pmap)));
+ }
+ PROC_UNLOCK(td->td_proc);
}
- PROC_UNLOCK(td->td_proc);
#endif
error = ENOMEM;
goto done;
@@ -205,14 +214,16 @@ sys_obreak(td, uap)
}
vm->vm_dsize -= btoc(old - new);
#ifdef RACCT
- PROC_LOCK(td->td_proc);
- racct_set_force(td->td_proc, RACCT_DATA, new - base);
- racct_set_force(td->td_proc, RACCT_VMEM, map->size);
- if (!old_mlock && map->flags & MAP_WIREFUTURE) {
- racct_set_force(td->td_proc, RACCT_MEMLOCK,
- ptoa(pmap_wired_count(map->pmap)));
+ if (racct_enable) {
+ PROC_LOCK(td->td_proc);
+ racct_set_force(td->td_proc, RACCT_DATA, new - base);
+ racct_set_force(td->td_proc, RACCT_VMEM, map->size);
+ if (!old_mlock && map->flags & MAP_WIREFUTURE) {
+ racct_set_force(td->td_proc, RACCT_MEMLOCK,
+ ptoa(pmap_wired_count(map->pmap)));
+ }
+ PROC_UNLOCK(td->td_proc);
}
- PROC_UNLOCK(td->td_proc);
#endif
}
done:
diff --git a/sys/x86/acpica/srat.c b/sys/x86/acpica/srat.c
index 8335a8c..8d2a741 100644
--- a/sys/x86/acpica/srat.c
+++ b/sys/x86/acpica/srat.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2010 Advanced Computing Technologies LLC
+ * Copyright (c) 2010 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
*
diff --git a/sys/x86/include/apicvar.h b/sys/x86/include/apicvar.h
index 818a831..0bd9fe5 100644
--- a/sys/x86/include/apicvar.h
+++ b/sys/x86/include/apicvar.h
@@ -454,6 +454,7 @@ void lapic_handle_error(void);
void lapic_handle_intr(int vector, struct trapframe *frame);
void lapic_handle_timer(struct trapframe *frame);
void xen_intr_handle_upcall(struct trapframe *frame);
+void hv_vector_handler(struct trapframe *frame);
extern int x2apic_mode;
extern int lapic_eoi_suppression;
diff --git a/sys/x86/include/mca.h b/sys/x86/include/mca.h
index 29728b9..6420eb8 100644
--- a/sys/x86/include/mca.h
+++ b/sys/x86/include/mca.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009 Advanced Computing Technologies LLC
+ * Copyright (c) 2009 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
*
diff --git a/sys/x86/include/segments.h b/sys/x86/include/segments.h
index a5c1ea4..65b5870 100644
--- a/sys/x86/include/segments.h
+++ b/sys/x86/include/segments.h
@@ -46,11 +46,7 @@
*/
#define SEL_RPL_MASK 3 /* requester priv level */
#define ISPL(s) ((s)&3) /* priority level of a selector */
-#ifdef XEN
-#define SEL_KPL 1 /* kernel priority level */
-#else
#define SEL_KPL 0 /* kernel priority level */
-#endif
#define SEL_UPL 3 /* user priority level */
#define ISLDT(s) ((s)&SEL_LDT) /* is it local or global */
#define SEL_LDT 4 /* local descriptor table */
@@ -244,11 +240,7 @@ union descriptor {
#define GBIOSUTIL_SEL 16 /* BIOS interface (Utility) */
#define GBIOSARGS_SEL 17 /* BIOS interface (Arguments) */
#define GNDIS_SEL 18 /* For the NDIS layer */
-#ifdef XEN
-#define NGDT 9
-#else
#define NGDT 19
-#endif
/*
* Entries in the Local Descriptor Table (LDT)
diff --git a/sys/x86/pci/qpi.c b/sys/x86/pci/qpi.c
index 2f88891..ae29daa 100644
--- a/sys/x86/pci/qpi.c
+++ b/sys/x86/pci/qpi.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2010 Advanced Computing Technologies LLC
+ * Copyright (c) 2010 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
*
diff --git a/sys/x86/x86/busdma_bounce.c b/sys/x86/x86/busdma_bounce.c
index 1438053..dcdeafa 100644
--- a/sys/x86/x86/busdma_bounce.c
+++ b/sys/x86/x86/busdma_bounce.c
@@ -147,11 +147,6 @@ static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
int flags);
-#ifdef XEN
-#undef pmap_kextract
-#define pmap_kextract pmap_kextract_ma
-#endif
-
/*
* Allocate a device specific dma_tag.
*/
@@ -994,8 +989,8 @@ add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr,
if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) {
/* Page offset needs to be preserved. */
- bpage->vaddr |= vaddr & PAGE_MASK;
- bpage->busaddr |= vaddr & PAGE_MASK;
+ bpage->vaddr |= addr & PAGE_MASK;
+ bpage->busaddr |= addr & PAGE_MASK;
}
bpage->datavaddr = vaddr;
bpage->dataaddr = addr;
diff --git a/sys/x86/x86/cpu_machdep.c b/sys/x86/x86/cpu_machdep.c
index 846a123..f8d1f08 100644
--- a/sys/x86/x86/cpu_machdep.c
+++ b/sys/x86/x86/cpu_machdep.c
@@ -100,15 +100,6 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_pager.h>
#include <vm/vm_param.h>
-#ifdef XEN
-/* XEN includes */
-#include <xen/xen-os.h>
-#include <xen/hypervisor.h>
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenfunc.h>
-#include <xen/xen_intr.h>
-#endif
-
/*
* Machine dependent boot() routine
*
@@ -193,33 +184,6 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate)
return (0);
}
-#if defined(__i386__) && defined(XEN)
-
-static void
-idle_block(void)
-{
-
- HYPERVISOR_sched_op(SCHEDOP_block, 0);
-}
-
-void
-cpu_halt(void)
-{
- HYPERVISOR_shutdown(SHUTDOWN_poweroff);
-}
-
-int scheduler_running;
-
-static void
-cpu_idle_hlt(sbintime_t sbt)
-{
-
- scheduler_running = 1;
- enable_intr();
- idle_block();
-}
-
-#else
/*
* Shutdown the CPU as much as possible
*/
@@ -230,8 +194,6 @@ cpu_halt(void)
halt();
}
-#endif
-
void (*cpu_idle_hook)(sbintime_t) = NULL; /* ACPI idle hook. */
static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */
static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */
@@ -263,7 +225,6 @@ cpu_idle_acpi(sbintime_t sbt)
}
#endif /* !PC98 */
-#if !defined(__i386__) || !defined(XEN)
static void
cpu_idle_hlt(sbintime_t sbt)
{
@@ -295,7 +256,6 @@ cpu_idle_hlt(sbintime_t sbt)
__asm __volatile("sti; hlt");
*state = STATE_RUNNING;
}
-#endif
static void
cpu_idle_mwait(sbintime_t sbt)
@@ -370,7 +330,7 @@ cpu_probe_amdc1e(void)
}
}
-#if defined(__i386__) && (defined(PC98) || defined(XEN))
+#if defined(__i386__) && defined(PC98)
void (*cpu_idle_fn)(sbintime_t) = cpu_idle_hlt;
#else
void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi;
@@ -379,17 +339,15 @@ void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi;
void
cpu_idle(int busy)
{
-#if !defined(__i386__) || !defined(XEN)
uint64_t msr;
-#endif
sbintime_t sbt = -1;
CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
busy, curcpu);
-#if defined(MP_WATCHDOG) && (!defined(__i386__) || !defined(XEN))
+#ifdef MP_WATCHDOG
ap_watchdog(PCPU_GET(cpuid));
#endif
-#if !defined(__i386__) || !defined(XEN)
+
/* If we are busy - try to use fast methods. */
if (busy) {
if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
@@ -397,7 +355,6 @@ cpu_idle(int busy)
goto out;
}
}
-#endif
/* If we have time - switch timers into idle mode. */
if (!busy) {
@@ -405,14 +362,12 @@ cpu_idle(int busy)
sbt = cpu_idleclock();
}
-#if !defined(__i386__) || !defined(XEN)
/* Apply AMD APIC timer C1E workaround. */
if (cpu_ident_amdc1e && cpu_disable_c3_sleep) {
msr = rdmsr(MSR_AMDK8_IPM);
if (msr & AMDK8_CMPHALT)
wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
}
-#endif
/* Call main idle method. */
cpu_idle_fn(sbt);
@@ -422,9 +377,7 @@ cpu_idle(int busy)
cpu_activeclock();
critical_exit();
}
-#if !defined(__i386__) || !defined(XEN)
out:
-#endif
CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
busy, curcpu);
}
diff --git a/sys/x86/x86/identcpu.c b/sys/x86/x86/identcpu.c
index ca2b5ca..9f39d1c 100644
--- a/sys/x86/x86/identcpu.c
+++ b/sys/x86/x86/identcpu.c
@@ -1190,7 +1190,6 @@ hook_tsc_freq(void *arg __unused)
SYSINIT(hook_tsc_freq, SI_SUB_CONFIGURE, SI_ORDER_ANY, hook_tsc_freq, NULL);
-#ifndef XEN
static const char *const vm_bnames[] = {
"QEMU", /* QEMU */
"Plex86", /* Plex86 */
@@ -1281,7 +1280,6 @@ identify_hypervisor(void)
freeenv(p);
}
}
-#endif
/*
* Final stage of CPU identification.
@@ -1314,9 +1312,7 @@ identify_cpu(void)
cpu_feature2 = regs[2];
#endif
-#ifndef XEN
identify_hypervisor();
-#endif
cpu_vendor_id = find_cpu_vendor_id();
/*
diff --git a/sys/x86/x86/intr_machdep.c b/sys/x86/x86/intr_machdep.c
index b27df4a..d81c913 100644
--- a/sys/x86/x86/intr_machdep.c
+++ b/sys/x86/x86/intr_machdep.c
@@ -532,13 +532,6 @@ intr_shuffle_irqs(void *arg __unused)
struct intsrc *isrc;
int i;
-#ifdef XEN
- /*
- * Doesn't work yet
- */
- return;
-#endif
-
/* Don't bother on UP. */
if (mp_ncpus == 1)
return;
diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c
index e0656ef..d75a452 100644
--- a/sys/x86/x86/local_apic.c
+++ b/sys/x86/x86/local_apic.c
@@ -1579,17 +1579,13 @@ apic_setup_io(void *dummy __unused)
* Local APIC must be registered before other PICs and pseudo PICs
* for proper suspend/resume order.
*/
-#ifndef XEN
intr_register_pic(&lapic_pic);
-#endif
retval = best_enum->apic_setup_io();
if (retval != 0)
printf("%s: Failed to setup I/O APICs: returned %d\n",
best_enum->apic_name, retval);
-#ifdef XEN
- return;
-#endif
+
/*
* Finish setting up the local APIC on the BSP once we know
* how to properly program the LINT pins. In particular, this
diff --git a/sys/x86/x86/mca.c b/sys/x86/x86/mca.c
index 5c895a8..8026b15 100644
--- a/sys/x86/x86/mca.c
+++ b/sys/x86/x86/mca.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009 Advanced Computing Technologies LLC
+ * Copyright (c) 2009 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
*
diff --git a/sys/x86/x86/mp_x86.c b/sys/x86/x86/mp_x86.c
new file mode 100644
index 0000000..b4c66a5
--- /dev/null
+++ b/sys/x86/x86/mp_x86.c
@@ -0,0 +1,1120 @@
+/*-
+ * Copyright (c) 1996, by Steve Passe
+ * Copyright (c) 2003, by Peter Wemm
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. The name of the developer may NOT be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifdef __i386__
+#include "opt_apic.h"
+#endif
+#include "opt_cpu.h"
+#include "opt_kstack_pages.h"
+#include "opt_pmap.h"
+#include "opt_sched.h"
+#include "opt_smp.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/cons.h> /* cngetc() */
+#include <sys/cpuset.h>
+#ifdef GPROF
+#include <sys/gmon.h>
+#endif
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/memrange.h>
+#include <sys/mutex.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_extern.h>
+
+#include <x86/apicreg.h>
+#include <machine/clock.h>
+#include <machine/cputypes.h>
+#include <x86/mca.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/psl.h>
+#include <machine/smp.h>
+#include <machine/specialreg.h>
+#include <machine/cpu.h>
+
+#define WARMBOOT_TARGET 0
+#define WARMBOOT_OFF (KERNBASE + 0x0467)
+#define WARMBOOT_SEG (KERNBASE + 0x0469)
+
+#define CMOS_REG (0x70)
+#define CMOS_DATA (0x71)
+#define BIOS_RESET (0x0f)
+#define BIOS_WARM (0x0a)
+
+/* lock region used by kernel profiling */
+int mcount_lock;
+
+int mp_naps; /* # of Applications processors */
+int boot_cpu_id = -1; /* designated BSP */
+
+extern struct pcpu __pcpu[];
+
+/* AP uses this during bootstrap. Do not staticize. */
+char *bootSTK;
+int bootAP;
+
+/* Free these after use */
+void *bootstacks[MAXCPU];
+void *dpcpu;
+
+struct pcb stoppcbs[MAXCPU];
+struct susppcb **susppcbs;
+
+#ifdef COUNT_IPIS
+/* Interrupt counts. */
+static u_long *ipi_preempt_counts[MAXCPU];
+static u_long *ipi_ast_counts[MAXCPU];
+u_long *ipi_invltlb_counts[MAXCPU];
+u_long *ipi_invlrng_counts[MAXCPU];
+u_long *ipi_invlpg_counts[MAXCPU];
+u_long *ipi_invlcache_counts[MAXCPU];
+u_long *ipi_rendezvous_counts[MAXCPU];
+static u_long *ipi_hardclock_counts[MAXCPU];
+#endif
+
+/* Default cpu_ops implementation. */
+struct cpu_ops cpu_ops;
+
+/*
+ * Local data and functions.
+ */
+
+static volatile cpuset_t ipi_nmi_pending;
+
+/* used to hold the AP's until we are ready to release them */
+struct mtx ap_boot_mtx;
+
+/* Set to 1 once we're ready to let the APs out of the pen. */
+volatile int aps_ready = 0;
+
+/*
+ * Store data from cpu_add() until later in the boot when we actually setup
+ * the APs.
+ */
+struct cpu_info cpu_info[MAX_APIC_ID + 1];
+int cpu_apic_ids[MAXCPU];
+int apic_cpuids[MAX_APIC_ID + 1];
+
+/* Holds pending bitmap based IPIs per CPU */
+volatile u_int cpu_ipi_pending[MAXCPU];
+
+int cpu_logical; /* logical cpus per core */
+int cpu_cores; /* cores per package */
+
+static void release_aps(void *dummy);
+
+static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */
+static int hyperthreading_allowed = 1;
+
+void
+mem_range_AP_init(void)
+{
+
+ if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
+ mem_range_softc.mr_op->initAP(&mem_range_softc);
+}
+
+static void
+topo_probe_amd(void)
+{
+ int core_id_bits;
+ int id;
+
+ /* AMD processors do not support HTT. */
+ cpu_logical = 1;
+
+ if ((amd_feature2 & AMDID2_CMP) == 0) {
+ cpu_cores = 1;
+ return;
+ }
+
+ core_id_bits = (cpu_procinfo2 & AMDID_COREID_SIZE) >>
+ AMDID_COREID_SIZE_SHIFT;
+ if (core_id_bits == 0) {
+ cpu_cores = (cpu_procinfo2 & AMDID_CMP_CORES) + 1;
+ return;
+ }
+
+ /* Fam 10h and newer should get here. */
+ for (id = 0; id <= MAX_APIC_ID; id++) {
+ /* Check logical CPU availability. */
+ if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled)
+ continue;
+ /* Check if logical CPU has the same package ID. */
+ if ((id >> core_id_bits) != (boot_cpu_id >> core_id_bits))
+ continue;
+ cpu_cores++;
+ }
+}
+
+/*
+ * Round up to the next power of two, if necessary, and then
+ * take log2.
+ * Returns -1 if argument is zero.
+ */
+static __inline int
+mask_width(u_int x)
+{
+
+ return (fls(x << (1 - powerof2(x))) - 1);
+}
+
+static void
+topo_probe_0x4(void)
+{
+ u_int p[4];
+ int pkg_id_bits;
+ int core_id_bits;
+ int max_cores;
+ int max_logical;
+ int id;
+
+ /* Both zero and one here mean one logical processor per package. */
+ max_logical = (cpu_feature & CPUID_HTT) != 0 ?
+ (cpu_procinfo & CPUID_HTT_CORES) >> 16 : 1;
+ if (max_logical <= 1)
+ return;
+
+ /*
+ * Because of uniformity assumption we examine only
+ * those logical processors that belong to the same
+ * package as BSP. Further, we count number of
+ * logical processors that belong to the same core
+ * as BSP thus deducing number of threads per core.
+ */
+ if (cpu_high >= 0x4) {
+ cpuid_count(0x04, 0, p);
+ max_cores = ((p[0] >> 26) & 0x3f) + 1;
+ } else
+ max_cores = 1;
+ core_id_bits = mask_width(max_logical/max_cores);
+ if (core_id_bits < 0)
+ return;
+ pkg_id_bits = core_id_bits + mask_width(max_cores);
+
+ for (id = 0; id <= MAX_APIC_ID; id++) {
+ /* Check logical CPU availability. */
+ if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled)
+ continue;
+ /* Check if logical CPU has the same package ID. */
+ if ((id >> pkg_id_bits) != (boot_cpu_id >> pkg_id_bits))
+ continue;
+ cpu_cores++;
+ /* Check if logical CPU has the same package and core IDs. */
+ if ((id >> core_id_bits) == (boot_cpu_id >> core_id_bits))
+ cpu_logical++;
+ }
+
+ KASSERT(cpu_cores >= 1 && cpu_logical >= 1,
+ ("topo_probe_0x4 couldn't find BSP"));
+
+ cpu_cores /= cpu_logical;
+ hyperthreading_cpus = cpu_logical;
+}
+
+static void
+topo_probe_0xb(void)
+{
+ u_int p[4];
+ int bits;
+ int cnt;
+ int i;
+ int logical;
+ int type;
+ int x;
+
+ /* We only support three levels for now. */
+ for (i = 0; i < 3; i++) {
+ cpuid_count(0x0b, i, p);
+
+ /* Fall back if CPU leaf 11 doesn't really exist. */
+ if (i == 0 && p[1] == 0) {
+ topo_probe_0x4();
+ return;
+ }
+
+ bits = p[0] & 0x1f;
+ logical = p[1] &= 0xffff;
+ type = (p[2] >> 8) & 0xff;
+ if (type == 0 || logical == 0)
+ break;
+ /*
+ * Because of uniformity assumption we examine only
+ * those logical processors that belong to the same
+ * package as BSP.
+ */
+ for (cnt = 0, x = 0; x <= MAX_APIC_ID; x++) {
+ if (!cpu_info[x].cpu_present ||
+ cpu_info[x].cpu_disabled)
+ continue;
+ if (x >> bits == boot_cpu_id >> bits)
+ cnt++;
+ }
+ if (type == CPUID_TYPE_SMT)
+ cpu_logical = cnt;
+ else if (type == CPUID_TYPE_CORE)
+ cpu_cores = cnt;
+ }
+ if (cpu_logical == 0)
+ cpu_logical = 1;
+ cpu_cores /= cpu_logical;
+}
+
+/*
+ * Both topology discovery code and code that consumes topology
+ * information assume top-down uniformity of the topology.
+ * That is, all physical packages must be identical and each
+ * core in a package must have the same number of threads.
+ * Topology information is queried only on BSP, on which this
+ * code runs and for which it can query CPUID information.
+ * Then topology is extrapolated on all packages using the
+ * uniformity assumption.
+ */
+void
+topo_probe(void)
+{
+ static int cpu_topo_probed = 0;
+
+ if (cpu_topo_probed)
+ return;
+
+ CPU_ZERO(&logical_cpus_mask);
+ if (mp_ncpus <= 1)
+ cpu_cores = cpu_logical = 1;
+ else if (cpu_vendor_id == CPU_VENDOR_AMD)
+ topo_probe_amd();
+ else if (cpu_vendor_id == CPU_VENDOR_INTEL) {
+ /*
+ * See Intel(R) 64 Architecture Processor
+ * Topology Enumeration article for details.
+ *
+ * Note that 0x1 <= cpu_high < 4 case should be
+ * compatible with topo_probe_0x4() logic when
+ * CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1)
+ * or it should trigger the fallback otherwise.
+ */
+ if (cpu_high >= 0xb)
+ topo_probe_0xb();
+ else if (cpu_high >= 0x1)
+ topo_probe_0x4();
+ }
+
+ /*
+ * Fallback: assume each logical CPU is in separate
+ * physical package. That is, no multi-core, no SMT.
+ */
+ if (cpu_cores == 0 || cpu_logical == 0)
+ cpu_cores = cpu_logical = 1;
+ cpu_topo_probed = 1;
+}
+
+struct cpu_group *
+cpu_topo(void)
+{
+ int cg_flags;
+
+ /*
+ * Determine whether any threading flags are
+ * necessry.
+ */
+ topo_probe();
+ if (cpu_logical > 1 && hyperthreading_cpus)
+ cg_flags = CG_FLAG_HTT;
+ else if (cpu_logical > 1)
+ cg_flags = CG_FLAG_SMT;
+ else
+ cg_flags = 0;
+ if (mp_ncpus % (cpu_cores * cpu_logical) != 0) {
+ printf("WARNING: Non-uniform processors.\n");
+ printf("WARNING: Using suboptimal topology.\n");
+ return (smp_topo_none());
+ }
+ /*
+ * No multi-core or hyper-threaded.
+ */
+ if (cpu_logical * cpu_cores == 1)
+ return (smp_topo_none());
+ /*
+ * Only HTT no multi-core.
+ */
+ if (cpu_logical > 1 && cpu_cores == 1)
+ return (smp_topo_1level(CG_SHARE_L1, cpu_logical, cg_flags));
+ /*
+ * Only multi-core no HTT.
+ */
+ if (cpu_cores > 1 && cpu_logical == 1)
+ return (smp_topo_1level(CG_SHARE_L2, cpu_cores, cg_flags));
+ /*
+ * Both HTT and multi-core.
+ */
+ return (smp_topo_2level(CG_SHARE_L2, cpu_cores,
+ CG_SHARE_L1, cpu_logical, cg_flags));
+}
+
+
+void
+cpu_add(u_int apic_id, char boot_cpu)
+{
+
+ if (apic_id > MAX_APIC_ID) {
+ panic("SMP: APIC ID %d too high", apic_id);
+ return;
+ }
+ KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
+ apic_id));
+ cpu_info[apic_id].cpu_present = 1;
+ if (boot_cpu) {
+ KASSERT(boot_cpu_id == -1,
+ ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
+ boot_cpu_id));
+ boot_cpu_id = apic_id;
+ cpu_info[apic_id].cpu_bsp = 1;
+ }
+ if (mp_ncpus < MAXCPU) {
+ mp_ncpus++;
+ mp_maxid = mp_ncpus - 1;
+ }
+ if (bootverbose)
+ printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
+ "AP");
+}
+
+void
+cpu_mp_setmaxid(void)
+{
+
+ /*
+ * mp_maxid should be already set by calls to cpu_add().
+ * Just sanity check its value here.
+ */
+ if (mp_ncpus == 0)
+ KASSERT(mp_maxid == 0,
+ ("%s: mp_ncpus is zero, but mp_maxid is not", __func__));
+ else if (mp_ncpus == 1)
+ mp_maxid = 0;
+ else
+ KASSERT(mp_maxid >= mp_ncpus - 1,
+ ("%s: counters out of sync: max %d, count %d", __func__,
+ mp_maxid, mp_ncpus));
+}
+
+int
+cpu_mp_probe(void)
+{
+
+ /*
+ * Always record BSP in CPU map so that the mbuf init code works
+ * correctly.
+ */
+ CPU_SETOF(0, &all_cpus);
+ if (mp_ncpus == 0) {
+ /*
+ * No CPUs were found, so this must be a UP system. Setup
+ * the variables to represent a system with a single CPU
+ * with an id of 0.
+ */
+ mp_ncpus = 1;
+ return (0);
+ }
+
+ /* At least one CPU was found. */
+ if (mp_ncpus == 1) {
+ /*
+ * One CPU was found, so this must be a UP system with
+ * an I/O APIC.
+ */
+ mp_maxid = 0;
+ return (0);
+ }
+
+ /* At least two CPUs were found. */
+ return (1);
+}
+
+/*
+ * Print various information about the SMP system hardware and setup.
+ */
+void
+cpu_mp_announce(void)
+{
+ const char *hyperthread;
+ int i;
+
+ printf("FreeBSD/SMP: %d package(s) x %d core(s)",
+ mp_ncpus / (cpu_cores * cpu_logical), cpu_cores);
+ if (hyperthreading_cpus > 1)
+ printf(" x %d HTT threads", cpu_logical);
+ else if (cpu_logical > 1)
+ printf(" x %d SMT threads", cpu_logical);
+ printf("\n");
+
+ /* List active CPUs first. */
+ printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
+ for (i = 1; i < mp_ncpus; i++) {
+ if (cpu_info[cpu_apic_ids[i]].cpu_hyperthread)
+ hyperthread = "/HT";
+ else
+ hyperthread = "";
+ printf(" cpu%d (AP%s): APIC ID: %2d\n", i, hyperthread,
+ cpu_apic_ids[i]);
+ }
+
+ /* List disabled CPUs last. */
+ for (i = 0; i <= MAX_APIC_ID; i++) {
+ if (!cpu_info[i].cpu_present || !cpu_info[i].cpu_disabled)
+ continue;
+ if (cpu_info[i].cpu_hyperthread)
+ hyperthread = "/HT";
+ else
+ hyperthread = "";
+ printf(" cpu (AP%s): APIC ID: %2d (disabled)\n", hyperthread,
+ i);
+ }
+}
+
+void
+init_secondary_tail(void)
+{
+ u_int cpuid;
+
+ /*
+ * On real hardware, switch to x2apic mode if possible. Do it
+ * after aps_ready was signalled, to avoid manipulating the
+ * mode while BSP might still want to send some IPI to us
+ * (second startup IPI is ignored on modern hardware etc).
+ */
+ lapic_xapic_mode();
+
+ /* Initialize the PAT MSR. */
+ pmap_init_pat();
+
+ /* set up CPU registers and state */
+ cpu_setregs();
+
+ /* set up SSE/NX */
+ initializecpu();
+
+ /* set up FPU state on the AP */
+#ifdef __amd64__
+ fpuinit();
+#else
+ npxinit(false);
+#endif
+
+ if (cpu_ops.cpu_init)
+ cpu_ops.cpu_init();
+
+ /* A quick check from sanity claus */
+ cpuid = PCPU_GET(cpuid);
+ if (PCPU_GET(apic_id) != lapic_id()) {
+ printf("SMP: cpuid = %d\n", cpuid);
+ printf("SMP: actual apic_id = %d\n", lapic_id());
+ printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
+ panic("cpuid mismatch! boom!!");
+ }
+
+ /* Initialize curthread. */
+ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
+ PCPU_SET(curthread, PCPU_GET(idlethread));
+
+ mca_init();
+
+ mtx_lock_spin(&ap_boot_mtx);
+
+ /* Init local apic for irq's */
+ lapic_setup(1);
+
+ /* Set memory range attributes for this CPU to match the BSP */
+ mem_range_AP_init();
+
+ smp_cpus++;
+
+ CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid);
+ printf("SMP: AP CPU #%d Launched!\n", cpuid);
+
+ /* Determine if we are a logical CPU. */
+ /* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */
+ if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0)
+ CPU_SET(cpuid, &logical_cpus_mask);
+
+ if (bootverbose)
+ lapic_dump("AP");
+
+ if (smp_cpus == mp_ncpus) {
+ /* enable IPI's, tlb shootdown, freezes etc */
+ atomic_store_rel_int(&smp_started, 1);
+ }
+
+#ifdef __amd64__
+ /*
+ * Enable global pages TLB extension
+ * This also implicitly flushes the TLB
+ */
+ load_cr4(rcr4() | CR4_PGE);
+ if (pmap_pcid_enabled)
+ load_cr4(rcr4() | CR4_PCIDE);
+ load_ds(_udatasel);
+ load_es(_udatasel);
+ load_fs(_ufssel);
+#endif
+
+ mtx_unlock_spin(&ap_boot_mtx);
+
+ /* Wait until all the AP's are up. */
+ while (smp_started == 0)
+ ia32_pause();
+
+ /* Start per-CPU event timers. */
+ cpu_initclocks_ap();
+
+ sched_throw(NULL);
+
+ panic("scheduler returned us to %s", __func__);
+ /* NOTREACHED */
+}
+
+/*******************************************************************
+ * local functions and data
+ */
+
+/*
+ * We tell the I/O APIC code about all the CPUs we want to receive
+ * interrupts. If we don't want certain CPUs to receive IRQs we
+ * can simply not tell the I/O APIC code about them in this function.
+ * We also do not tell it about the BSP since it tells itself about
+ * the BSP internally to work with UP kernels and on UP machines.
+ */
+void
+set_interrupt_apic_ids(void)
+{
+ u_int i, apic_id;
+
+ for (i = 0; i < MAXCPU; i++) {
+ apic_id = cpu_apic_ids[i];
+ if (apic_id == -1)
+ continue;
+ if (cpu_info[apic_id].cpu_bsp)
+ continue;
+ if (cpu_info[apic_id].cpu_disabled)
+ continue;
+
+ /* Don't let hyperthreads service interrupts. */
+ if (cpu_logical > 1 &&
+ apic_id % cpu_logical != 0)
+ continue;
+
+ intr_add_cpu(i);
+ }
+}
+
+/*
+ * Assign logical CPU IDs to local APICs.
+ */
+void
+assign_cpu_ids(void)
+{
+ u_int i;
+
+ TUNABLE_INT_FETCH("machdep.hyperthreading_allowed",
+ &hyperthreading_allowed);
+
+ /* Check for explicitly disabled CPUs. */
+ for (i = 0; i <= MAX_APIC_ID; i++) {
+ if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
+ continue;
+
+ if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) {
+ cpu_info[i].cpu_hyperthread = 1;
+
+ /*
+ * Don't use HT CPU if it has been disabled by a
+ * tunable.
+ */
+ if (hyperthreading_allowed == 0) {
+ cpu_info[i].cpu_disabled = 1;
+ continue;
+ }
+ }
+
+ /* Don't use this CPU if it has been disabled by a tunable. */
+ if (resource_disabled("lapic", i)) {
+ cpu_info[i].cpu_disabled = 1;
+ continue;
+ }
+ }
+
+ if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) {
+ hyperthreading_cpus = 0;
+ cpu_logical = 1;
+ }
+
+ /*
+ * Assign CPU IDs to local APIC IDs and disable any CPUs
+ * beyond MAXCPU. CPU 0 is always assigned to the BSP.
+ *
+ * To minimize confusion for userland, we attempt to number
+ * CPUs such that all threads and cores in a package are
+ * grouped together. For now we assume that the BSP is always
+ * the first thread in a package and just start adding APs
+ * starting with the BSP's APIC ID.
+ */
+ mp_ncpus = 1;
+ cpu_apic_ids[0] = boot_cpu_id;
+ apic_cpuids[boot_cpu_id] = 0;
+ for (i = boot_cpu_id + 1; i != boot_cpu_id;
+ i == MAX_APIC_ID ? i = 0 : i++) {
+ if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
+ cpu_info[i].cpu_disabled)
+ continue;
+
+ if (mp_ncpus < MAXCPU) {
+ cpu_apic_ids[mp_ncpus] = i;
+ apic_cpuids[i] = mp_ncpus;
+ mp_ncpus++;
+ } else
+ cpu_info[i].cpu_disabled = 1;
+ }
+ KASSERT(mp_maxid >= mp_ncpus - 1,
+ ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
+ mp_ncpus));
+}
+
+#ifdef COUNT_XINVLTLB_HITS
+u_int xhits_gbl[MAXCPU];
+u_int xhits_pg[MAXCPU];
+u_int xhits_rng[MAXCPU];
+static SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
+ sizeof(xhits_gbl), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
+ sizeof(xhits_pg), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
+ sizeof(xhits_rng), "IU", "");
+
+u_int ipi_global;
+u_int ipi_page;
+u_int ipi_range;
+u_int ipi_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
+ 0, "");
+
+u_int ipi_masked_global;
+u_int ipi_masked_page;
+u_int ipi_masked_range;
+u_int ipi_masked_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
+ &ipi_masked_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
+ &ipi_masked_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
+ &ipi_masked_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
+ &ipi_masked_range_size, 0, "");
+#endif /* COUNT_XINVLTLB_HITS */
+
+/*
+ * Init and startup IPI.
+ */
+void
+ipi_startup(int apic_id, int vector)
+{
+
+ /*
+ * This attempts to follow the algorithm described in the
+ * Intel Multiprocessor Specification v1.4 in section B.4.
+ * For each IPI, we allow the local APIC ~20us to deliver the
+ * IPI. If that times out, we panic.
+ */
+
+ /*
+ * first we do an INIT IPI: this INIT IPI might be run, resetting
+ * and running the target CPU. OR this INIT IPI might be latched (P5
+ * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
+ * ignored.
+ */
+ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
+ APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
+ lapic_ipi_wait(100);
+
+ /* Explicitly deassert the INIT IPI. */
+ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
+ APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT,
+ apic_id);
+
+ DELAY(10000); /* wait ~10mS */
+
+ /*
+ * next we do a STARTUP IPI: the previous INIT IPI might still be
+ * latched, (P5 bug) this 1st STARTUP would then terminate
+ * immediately, and the previously started INIT IPI would continue. OR
+ * the previous INIT IPI has already run. and this STARTUP IPI will
+ * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
+ * will run.
+ */
+ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+ APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+ vector, apic_id);
+ if (!lapic_ipi_wait(100))
+ panic("Failed to deliver first STARTUP IPI to APIC %d",
+ apic_id);
+ DELAY(200); /* wait ~200uS */
+
+ /*
+ * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
+ * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
+ * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
+ * recognized after hardware RESET or INIT IPI.
+ */
+ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+ APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+ vector, apic_id);
+ if (!lapic_ipi_wait(100))
+ panic("Failed to deliver second STARTUP IPI to APIC %d",
+ apic_id);
+
+ DELAY(200); /* wait ~200uS */
+}
+
+/*
+ * Send an IPI to specified CPU handling the bitmap logic.
+ */
+void
+ipi_send_cpu(int cpu, u_int ipi)
+{
+ u_int bitmap, old_pending, new_pending;
+
+ KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
+
+ if (IPI_IS_BITMAPED(ipi)) {
+ bitmap = 1 << ipi;
+ ipi = IPI_BITMAP_VECTOR;
+ do {
+ old_pending = cpu_ipi_pending[cpu];
+ new_pending = old_pending | bitmap;
+ } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
+ old_pending, new_pending));
+ if (old_pending)
+ return;
+ }
+ lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
+}
+
+void
+ipi_bitmap_handler(struct trapframe frame)
+{
+ struct trapframe *oldframe;
+ struct thread *td;
+ int cpu = PCPU_GET(cpuid);
+ u_int ipi_bitmap;
+
+ critical_enter();
+ td = curthread;
+ td->td_intr_nesting_level++;
+ oldframe = td->td_intr_frame;
+ td->td_intr_frame = &frame;
+ ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
+ if (ipi_bitmap & (1 << IPI_PREEMPT)) {
+#ifdef COUNT_IPIS
+ (*ipi_preempt_counts[cpu])++;
+#endif
+ sched_preempt(td);
+ }
+ if (ipi_bitmap & (1 << IPI_AST)) {
+#ifdef COUNT_IPIS
+ (*ipi_ast_counts[cpu])++;
+#endif
+ /* Nothing to do for AST */
+ }
+ if (ipi_bitmap & (1 << IPI_HARDCLOCK)) {
+#ifdef COUNT_IPIS
+ (*ipi_hardclock_counts[cpu])++;
+#endif
+ hardclockintr();
+ }
+ td->td_intr_frame = oldframe;
+ td->td_intr_nesting_level--;
+ critical_exit();
+}
+
+/*
+ * send an IPI to a set of cpus.
+ */
+void
+ipi_selected(cpuset_t cpus, u_int ipi)
+{
+ int cpu;
+
+ /*
+ * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
+ * of help in order to understand what is the source.
+ * Set the mask of receiving CPUs for this purpose.
+ */
+ if (ipi == IPI_STOP_HARD)
+ CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
+
+ while ((cpu = CPU_FFS(&cpus)) != 0) {
+ cpu--;
+ CPU_CLR(cpu, &cpus);
+ CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
+ ipi_send_cpu(cpu, ipi);
+ }
+}
+
+/*
+ * send an IPI to a specific CPU.
+ */
+void
+ipi_cpu(int cpu, u_int ipi)
+{
+
+ /*
+ * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
+ * of help in order to understand what is the source.
+ * Set the mask of receiving CPUs for this purpose.
+ */
+ if (ipi == IPI_STOP_HARD)
+ CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
+
+ CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
+ ipi_send_cpu(cpu, ipi);
+}
+
+/*
+ * send an IPI to all CPUs EXCEPT myself
+ */
+void
+ipi_all_but_self(u_int ipi)
+{
+ cpuset_t other_cpus;
+
+ other_cpus = all_cpus;
+ CPU_CLR(PCPU_GET(cpuid), &other_cpus);
+ if (IPI_IS_BITMAPED(ipi)) {
+ ipi_selected(other_cpus, ipi);
+ return;
+ }
+
+ /*
+ * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
+ * of help in order to understand what is the source.
+ * Set the mask of receiving CPUs for this purpose.
+ */
+ if (ipi == IPI_STOP_HARD)
+ CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
+
+ CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
+ lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
+}
+
+int
+ipi_nmi_handler()
+{
+ u_int cpuid;
+
+ /*
+ * As long as there is not a simple way to know about a NMI's
+ * source, if the bitmask for the current CPU is present in
+ * the global pending bitword an IPI_STOP_HARD has been issued
+ * and should be handled.
+ */
+ cpuid = PCPU_GET(cpuid);
+ if (!CPU_ISSET(cpuid, &ipi_nmi_pending))
+ return (1);
+
+ CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending);
+ cpustop_handler();
+ return (0);
+}
+
+/*
+ * Handle an IPI_STOP by saving our current context and spinning until we
+ * are resumed.
+ */
+void
+cpustop_handler(void)
+{
+ u_int cpu;
+
+ cpu = PCPU_GET(cpuid);
+
+ savectx(&stoppcbs[cpu]);
+
+ /* Indicate that we are stopped */
+ CPU_SET_ATOMIC(cpu, &stopped_cpus);
+
+ /* Wait for restart */
+ while (!CPU_ISSET(cpu, &started_cpus))
+ ia32_pause();
+
+ CPU_CLR_ATOMIC(cpu, &started_cpus);
+ CPU_CLR_ATOMIC(cpu, &stopped_cpus);
+
+#if defined(__amd64__) && defined(DDB)
+ amd64_db_resume_dbreg();
+#endif
+
+ if (cpu == 0 && cpustop_restartfunc != NULL) {
+ cpustop_restartfunc();
+ cpustop_restartfunc = NULL;
+ }
+}
+
+/*
+ * Handle an IPI_SUSPEND by saving our current context and spinning until we
+ * are resumed.
+ */
+void
+cpususpend_handler(void)
+{
+ u_int cpu;
+
+ mtx_assert(&smp_ipi_mtx, MA_NOTOWNED);
+
+ cpu = PCPU_GET(cpuid);
+ if (savectx(&susppcbs[cpu]->sp_pcb)) {
+#ifdef __amd64__
+ fpususpend(susppcbs[cpu]->sp_fpususpend);
+#else
+ npxsuspend(susppcbs[cpu]->sp_fpususpend);
+#endif
+ wbinvd();
+ CPU_SET_ATOMIC(cpu, &suspended_cpus);
+ } else {
+#ifdef __amd64__
+ fpuresume(susppcbs[cpu]->sp_fpususpend);
+#else
+ npxresume(susppcbs[cpu]->sp_fpususpend);
+#endif
+ pmap_init_pat();
+ initializecpu();
+ PCPU_SET(switchtime, 0);
+ PCPU_SET(switchticks, ticks);
+
+ /* Indicate that we are resumed */
+ CPU_CLR_ATOMIC(cpu, &suspended_cpus);
+ }
+
+ /* Wait for resume */
+ while (!CPU_ISSET(cpu, &started_cpus))
+ ia32_pause();
+
+ if (cpu_ops.cpu_resume)
+ cpu_ops.cpu_resume();
+#ifdef __amd64__
+ if (vmm_resume_p)
+ vmm_resume_p();
+#endif
+
+ /* Resume MCA and local APIC */
+ lapic_xapic_mode();
+ mca_resume();
+ lapic_setup(0);
+
+ /* Indicate that we are resumed */
+ CPU_CLR_ATOMIC(cpu, &suspended_cpus);
+ CPU_CLR_ATOMIC(cpu, &started_cpus);
+}
+
+
+void
+invlcache_handler(void)
+{
+#ifdef COUNT_IPIS
+ (*ipi_invlcache_counts[PCPU_GET(cpuid)])++;
+#endif /* COUNT_IPIS */
+
+ wbinvd();
+ atomic_add_int(&smp_tlb_wait, 1);
+}
+
+/*
+ * This is called once the rest of the system is up and running and we're
+ * ready to let the AP's out of the pen.
+ */
+static void
+release_aps(void *dummy __unused)
+{
+
+ if (mp_ncpus == 1)
+ return;
+ atomic_store_rel_int(&aps_ready, 1);
+ while (smp_started == 0)
+ ia32_pause();
+}
+SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
+
+#ifdef COUNT_IPIS
+/*
+ * Setup interrupt counters for IPI handlers.
+ */
+static void
+mp_ipi_intrcnt(void *dummy)
+{
+ char buf[64];
+ int i;
+
+ CPU_FOREACH(i) {
+ snprintf(buf, sizeof(buf), "cpu%d:invltlb", i);
+ intrcnt_add(buf, &ipi_invltlb_counts[i]);
+ snprintf(buf, sizeof(buf), "cpu%d:invlrng", i);
+ intrcnt_add(buf, &ipi_invlrng_counts[i]);
+ snprintf(buf, sizeof(buf), "cpu%d:invlpg", i);
+ intrcnt_add(buf, &ipi_invlpg_counts[i]);
+ snprintf(buf, sizeof(buf), "cpu%d:invlcache", i);
+ intrcnt_add(buf, &ipi_invlcache_counts[i]);
+ snprintf(buf, sizeof(buf), "cpu%d:preempt", i);
+ intrcnt_add(buf, &ipi_preempt_counts[i]);
+ snprintf(buf, sizeof(buf), "cpu%d:ast", i);
+ intrcnt_add(buf, &ipi_ast_counts[i]);
+ snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i);
+ intrcnt_add(buf, &ipi_rendezvous_counts[i]);
+ snprintf(buf, sizeof(buf), "cpu%d:hardclock", i);
+ intrcnt_add(buf, &ipi_hardclock_counts[i]);
+ }
+}
+SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
+#endif
diff --git a/sys/x86/xen/xen_intr.c b/sys/x86/xen/xen_intr.c
index 64979b1..3bc4b43 100644
--- a/sys/x86/xen/xen_intr.c
+++ b/sys/x86/xen/xen_intr.c
@@ -1,7 +1,7 @@
/******************************************************************************
* xen_intr.c
*
- * Xen event and interrupt services for x86 PV and HVM guests.
+ * Xen event and interrupt services for x86 HVM guests.
*
* Copyright (c) 2002-2005, K A Fraser
* Copyright (c) 2005, Intel Corporation <xiaofeng.ling@intel.com>
@@ -864,10 +864,8 @@ xen_intr_assign_cpu(struct intsrc *base_isrc, u_int apic_id)
u_int to_cpu, vcpu_id;
int error, masked;
-#ifdef XENHVM
if (xen_vector_callback_enabled == 0)
return (EOPNOTSUPP);
-#endif
to_cpu = apic_cpuid(apic_id);
vcpu_id = pcpu_find(to_cpu)->pc_vcpu_id;
diff --git a/sys/x86/xen/xen_nexus.c b/sys/x86/xen/xen_nexus.c
index f25f970..73506fc 100644
--- a/sys/x86/xen/xen_nexus.c
+++ b/sys/x86/xen/xen_nexus.c
@@ -66,14 +66,11 @@ static int
nexus_xen_attach(device_t dev)
{
int error;
-#ifndef XEN
device_t acpi_dev = NULL;
-#endif
nexus_init_resources();
bus_generic_probe(dev);
-#ifndef XEN
if (xen_initial_domain()) {
/* Disable some ACPI devices that are not usable by Dom0 */
acpi_cpu_disabled = true;
@@ -84,13 +81,10 @@ nexus_xen_attach(device_t dev)
if (acpi_dev == NULL)
panic("Unable to add ACPI bus to Xen Dom0");
}
-#endif
error = bus_generic_attach(dev);
-#ifndef XEN
if (xen_initial_domain() && (error == 0))
acpi_install_wakeup_handler(device_get_softc(acpi_dev));
-#endif
return (error);
}
OpenPOWER on IntegriCloud