diff options
author | Renato Botelho <renato@netgate.com> | 2016-06-14 14:40:19 -0500 |
---|---|---|
committer | Renato Botelho <renato@netgate.com> | 2016-06-14 14:40:19 -0500 |
commit | 20a32898b6944f9ebcdbb846253d812943036066 (patch) | |
tree | 2d3bb1c4acf6d65a66c132d4c59643a3e99dfe34 /sys | |
parent | 4fdb7654ef71cc3e4f0353cc46f28f652cd35605 (diff) | |
parent | a048478c507785f68e86db1a32431aa36773ee06 (diff) | |
download | FreeBSD-src-20a32898b6944f9ebcdbb846253d812943036066.zip FreeBSD-src-20a32898b6944f9ebcdbb846253d812943036066.tar.gz |
Merge remote-tracking branch 'origin/master' into devel-11
Diffstat (limited to 'sys')
172 files changed, 4953 insertions, 1120 deletions
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 73f1250..c418532 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -386,6 +386,7 @@ static struct mtx pv_chunks_mutex; static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; static u_long pv_invl_gen[NPV_LIST_LOCKS]; static struct md_page *pv_table; +static struct md_page pv_dummy; /* * All those kernel PT submaps that BSD is so fond of @@ -1264,6 +1265,7 @@ pmap_init(void) M_WAITOK | M_ZERO); for (i = 0; i < pv_npg; i++) TAILQ_INIT(&pv_table[i].pv_list); + TAILQ_INIT(&pv_dummy.pv_list); pmap_initialized = 1; for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { @@ -3920,11 +3922,10 @@ pmap_remove_all(vm_page_t m) ("pmap_remove_all: page %p is not managed", m)); SLIST_INIT(&free); lock = VM_PAGE_TO_PV_LIST_LOCK(m); - pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : + pa_to_pvh(VM_PAGE_TO_PHYS(m)); retry: rw_wlock(lock); - if ((m->flags & PG_FICTITIOUS) != 0) - goto small_mappings; while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { @@ -3943,7 +3944,6 @@ retry: (void)pmap_demote_pde_locked(pmap, pde, va, &lock); PMAP_UNLOCK(pmap); } -small_mappings: while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { @@ -5743,11 +5743,10 @@ pmap_remove_write(vm_page_t m) if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; lock = VM_PAGE_TO_PV_LIST_LOCK(m); - pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : + pa_to_pvh(VM_PAGE_TO_PHYS(m)); retry_pv_loop: rw_wlock(lock); - if ((m->flags & PG_FICTITIOUS) != 0) - goto small_mappings; TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { @@ -5771,7 +5770,6 @@ retry_pv_loop: lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); PMAP_UNLOCK(pmap); } -small_mappings: TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { @@ -5877,12 +5875,11 @@ pmap_ts_referenced(vm_page_t m) cleared = 0; pa = VM_PAGE_TO_PHYS(m); lock = PHYS_TO_PV_LIST_LOCK(pa); - pvh = pa_to_pvh(pa); + pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa); rw_wlock(lock); retry: not_cleared = 0; - if ((m->flags & PG_FICTITIOUS) != 0 || - (pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) + if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) goto small_mappings; pv = pvf; do { @@ -6194,12 +6191,11 @@ pmap_clear_modify(vm_page_t m) */ if ((m->aflags & PGA_WRITEABLE) == 0) return; - pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : + pa_to_pvh(VM_PAGE_TO_PHYS(m)); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_wlock(lock); restart: - if ((m->flags & PG_FICTITIOUS) != 0) - goto small_mappings; TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { @@ -6243,7 +6239,6 @@ restart: } PMAP_UNLOCK(pmap); } -small_mappings: TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { diff --git a/sys/arm/arm/cpufunc.c b/sys/arm/arm/cpufunc.c index b2f9982..9a848f5 100644 --- a/sys/arm/arm/cpufunc.c +++ b/sys/arm/arm/cpufunc.c @@ -761,25 +761,19 @@ set_cpufuncs() } #endif /* CPU_ARM1176 */ #if defined(CPU_CORTEXA) || defined(CPU_KRAIT) - if (cputype == CPU_ID_CORTEXA5 || - cputype == CPU_ID_CORTEXA7 || - cputype == CPU_ID_CORTEXA8R1 || - cputype == CPU_ID_CORTEXA8R2 || - cputype == CPU_ID_CORTEXA8R3 || - cputype == CPU_ID_CORTEXA9R1 || - cputype == CPU_ID_CORTEXA9R2 || - cputype == CPU_ID_CORTEXA9R3 || - cputype == CPU_ID_CORTEXA9R4 || - cputype == CPU_ID_CORTEXA12R0 || - cputype == CPU_ID_CORTEXA15R0 || - cputype == CPU_ID_CORTEXA15R1 || - cputype == CPU_ID_CORTEXA15R2 || - cputype == CPU_ID_CORTEXA15R3 || - cputype == CPU_ID_KRAIT300R0 || - cputype == CPU_ID_KRAIT300R1 ) { + switch(cputype & CPU_ID_SCHEME_MASK) { + case CPU_ID_CORTEXA5: + case CPU_ID_CORTEXA7: + case CPU_ID_CORTEXA8: + case CPU_ID_CORTEXA9: + case CPU_ID_CORTEXA12: + case CPU_ID_CORTEXA15: + case CPU_ID_KRAIT300: cpufuncs = cortexa_cpufuncs; get_cachetype_cp15(); goto out; + default: + break; } #endif /* CPU_CORTEXA */ diff --git a/sys/arm/arm/genassym.c b/sys/arm/arm/genassym.c index 944eff8..34df028 100644 --- a/sys/arm/arm/genassym.c +++ b/sys/arm/arm/genassym.c @@ -101,8 +101,10 @@ ASSYM(TD_PROC, offsetof(struct thread, td_proc)); ASSYM(TD_MD, offsetof(struct thread, td_md)); ASSYM(TD_LOCK, offsetof(struct thread, td_lock)); ASSYM(MD_TP, offsetof(struct mdthread, md_tp)); +#if __ARM_ARCH < 6 ASSYM(MD_RAS_START, offsetof(struct mdthread, md_ras_start)); ASSYM(MD_RAS_END, offsetof(struct mdthread, md_ras_end)); +#endif ASSYM(TF_SPSR, offsetof(struct trapframe, tf_spsr)); ASSYM(TF_R0, offsetof(struct trapframe, tf_r0)); diff --git a/sys/arm/arm/mp_machdep.c b/sys/arm/arm/mp_machdep.c index 24b5679..312ee58 100644 --- a/sys/arm/arm/mp_machdep.c +++ b/sys/arm/arm/mp_machdep.c @@ -199,6 +199,9 @@ init_secondary(int cpu) vfp_init(); #endif + /* Configure the interrupt controller */ + intr_pic_init_secondary(); + mtx_lock_spin(&ap_boot_mtx); atomic_add_rel_32(&smp_cpus, 1); @@ -237,7 +240,6 @@ init_secondary(int cpu) cpu_initclocks_ap(); CTR0(KTR_SMP, "go into scheduler"); - intr_pic_init_secondary(); /* Enter the scheduler */ sched_throw(NULL); diff --git a/sys/arm/include/_types.h b/sys/arm/include/_types.h index 267ea83..b627963 100644 --- a/sys/arm/include/_types.h +++ b/sys/arm/include/_types.h @@ -43,10 +43,6 @@ #error this file needs sys/cdefs.h as a prerequisite #endif -#if __ARM_ARCH >= 6 -#define __NO_STRICT_ALIGNMENT -#endif - /* * Basic types upon which most other types are built. */ diff --git a/sys/arm/include/armreg.h b/sys/arm/include/armreg.h index 411517b..b509d7d 100644 --- a/sys/arm/include/armreg.h +++ b/sys/arm/include/armreg.h @@ -72,10 +72,16 @@ #define CPU_ID_IMPLEMENTOR_MASK 0xff000000 #define CPU_ID_ARM_LTD 0x41000000 /* 'A' */ #define CPU_ID_DEC 0x44000000 /* 'D' */ -#define CPU_ID_INTEL 0x69000000 /* 'i' */ +#define CPU_ID_MOTOROLA 0x4D000000 /* 'M' */ +#define CPU_ID_QUALCOM 0x51000000 /* 'Q' */ #define CPU_ID_TI 0x54000000 /* 'T' */ +#define CPU_ID_MARVELL 0x56000000 /* 'V' */ +#define CPU_ID_INTEL 0x69000000 /* 'i' */ #define CPU_ID_FARADAY 0x66000000 /* 'f' */ +#define CPU_ID_VARIANT_SHIFT 20 +#define CPU_ID_VARIANT_MASK 0x00f00000 + /* How to decide what format the CPUID is in. */ #define CPU_ID_ISOLD(x) (((x) & 0x0000f000) == 0x00000000) #define CPU_ID_IS7(x) (((x) & 0x0000f000) == 0x00007000) @@ -92,7 +98,6 @@ #define CPU_ID_ARCH_V5TEJ 0x00060000 #define CPU_ID_ARCH_V6 0x00070000 #define CPU_ID_CPUID_SCHEME 0x000f0000 -#define CPU_ID_VARIANT_MASK 0x00f00000 /* Next three nybbles are part number */ #define CPU_ID_PARTNO_MASK 0x0000fff0 @@ -123,22 +128,35 @@ #define CPU_ID_ARM1136JS 0x4107b360 #define CPU_ID_ARM1136JSR1 0x4117b360 #define CPU_ID_ARM1176JZS 0x410fb760 -#define CPU_ID_CORTEXA5 0x410fc050 -#define CPU_ID_CORTEXA7 0x410fc070 -#define CPU_ID_CORTEXA8R1 0x411fc080 -#define CPU_ID_CORTEXA8R2 0x412fc080 -#define CPU_ID_CORTEXA8R3 0x413fc080 -#define CPU_ID_CORTEXA9R1 0x411fc090 -#define CPU_ID_CORTEXA9R2 0x412fc090 -#define CPU_ID_CORTEXA9R3 0x413fc090 -#define CPU_ID_CORTEXA9R4 0x414fc090 -#define CPU_ID_CORTEXA12R0 0x410fc0d0 -#define CPU_ID_CORTEXA15R0 0x410fc0f0 -#define CPU_ID_CORTEXA15R1 0x411fc0f0 -#define CPU_ID_CORTEXA15R2 0x412fc0f0 -#define CPU_ID_CORTEXA15R3 0x413fc0f0 -#define CPU_ID_KRAIT300R0 0x510f06f0 /* Snapdragon S4 Pro/APQ8064 */ -#define CPU_ID_KRAIT300R1 0x511f06f0 + +/* CPUs that follow the CPUID scheme */ +#define CPU_ID_SCHEME_MASK \ + (CPU_ID_IMPLEMENTOR_MASK | CPU_ID_ARCH_MASK | CPU_ID_PARTNO_MASK) + +#define CPU_ID_CORTEXA5 (CPU_ID_ARM_LTD | CPU_ID_CPUID_SCHEME | 0xc050) +#define CPU_ID_CORTEXA7 (CPU_ID_ARM_LTD | CPU_ID_CPUID_SCHEME | 0xc070) +#define CPU_ID_CORTEXA8 (CPU_ID_ARM_LTD | CPU_ID_CPUID_SCHEME | 0xc080) +#define CPU_ID_CORTEXA8R1 (CPU_ID_CORTEXA8 | (1 << CPU_ID_VARIANT_SHIFT)) +#define CPU_ID_CORTEXA8R2 (CPU_ID_CORTEXA8 | (2 << CPU_ID_VARIANT_SHIFT)) +#define CPU_ID_CORTEXA8R3 (CPU_ID_CORTEXA8 | (3 << CPU_ID_VARIANT_SHIFT)) +#define CPU_ID_CORTEXA9 (CPU_ID_ARM_LTD | CPU_ID_CPUID_SCHEME | 0xc090) +#define CPU_ID_CORTEXA9R1 (CPU_ID_CORTEXA9 | (1 << CPU_ID_VARIANT_SHIFT)) +#define CPU_ID_CORTEXA9R2 (CPU_ID_CORTEXA9 | (2 << CPU_ID_VARIANT_SHIFT)) +#define CPU_ID_CORTEXA9R3 (CPU_ID_CORTEXA9 | (3 << CPU_ID_VARIANT_SHIFT)) +#define CPU_ID_CORTEXA9R4 (CPU_ID_CORTEXA9 | (4 << CPU_ID_VARIANT_SHIFT)) +/* XXX: Cortx-A12 is the old name for this part, it has been renamed the A17 */ +#define CPU_ID_CORTEXA12 (CPU_ID_ARM_LTD | CPU_ID_CPUID_SCHEME | 0xc0d0) +#define CPU_ID_CORTEXA12R0 (CPU_ID_CORTEXA12 | (0 << CPU_ID_VARIANT_SHIFT)) +#define CPU_ID_CORTEXA15 (CPU_ID_ARM_LTD | CPU_ID_CPUID_SCHEME | 0xc0f0) +#define CPU_ID_CORTEXA15R0 (CPU_ID_CORTEXA15 | (0 << CPU_ID_VARIANT_SHIFT)) +#define CPU_ID_CORTEXA15R1 (CPU_ID_CORTEXA15 | (1 << CPU_ID_VARIANT_SHIFT)) +#define CPU_ID_CORTEXA15R2 (CPU_ID_CORTEXA15 | (2 << CPU_ID_VARIANT_SHIFT)) +#define CPU_ID_CORTEXA15R3 (CPU_ID_CORTEXA15 | (3 << CPU_ID_VARIANT_SHIFT)) + +#define CPU_ID_KRAIT300 (CPU_ID_QUALCOM | CPU_ID_CPUID_SCHEME | 0x06f0) +/* Snapdragon S4 Pro/APQ8064 */ +#define CPU_ID_KRAIT300R0 (CPU_ID_KRAIT300 | (0 << CPU_ID_VARIANT_SHIFT)) +#define CPU_ID_KRAIT300R1 (CPU_ID_KRAIT300 | (1 << CPU_ID_VARIANT_SHIFT)) #define CPU_ID_TI925T 0x54029250 #define CPU_ID_MV88FR131 0x56251310 /* Marvell Feroceon 88FR131 Core */ diff --git a/sys/arm/include/proc.h b/sys/arm/include/proc.h index 090aaba..5d42d07 100644 --- a/sys/arm/include/proc.h +++ b/sys/arm/include/proc.h @@ -54,8 +54,10 @@ struct mdthread { int md_ptrace_instr_alt; int md_ptrace_addr_alt; register_t md_tp; +#if __ARM_ARCH < 6 void *md_ras_start; void *md_ras_end; +#endif }; struct mdproc { diff --git a/sys/arm64/conf/GENERIC b/sys/arm64/conf/GENERIC index 9300e77..d08c2db 100644 --- a/sys/arm64/conf/GENERIC +++ b/sys/arm64/conf/GENERIC @@ -70,6 +70,7 @@ options RACCT # Resource accounting framework options RACCT_DEFAULT_TO_DISABLED # Set kern.racct.enable=0 by default options RCTL # Resource limits options SMP +options INTRNG # Debugging support. Always need this: options KDB # Enable kernel debugger support. diff --git a/sys/arm64/conf/GENERIC-INTRNG b/sys/arm64/conf/GENERIC-INTRNG deleted file mode 100644 index 9cab02b..0000000 --- a/sys/arm64/conf/GENERIC-INTRNG +++ /dev/null @@ -1,15 +0,0 @@ -# -# GENERIC-INTRNG -- intrng testing kernel for FreeBSD/arm64 -# -# This config adds intrng support for testing, and to ensure intrng is not -# broken before switching to it. The config is expected to be removed soon -# when intrng becomes the default on arm64. -# -# $FreeBSD$ -# - -include GENERIC - -ident GENERIC-INTRNG - -options INTRNG diff --git a/sys/boot/efi/Makefile.inc b/sys/boot/efi/Makefile.inc index 9ed0cda..9d457f4 100644 --- a/sys/boot/efi/Makefile.inc +++ b/sys/boot/efi/Makefile.inc @@ -18,4 +18,8 @@ CFLAGS+= -mno-red-zone CFLAGS+= -mno-aes .endif +.if ${MACHINE_CPUARCH} == "aarch64" +CFLAGS+= -fshort-wchar +.endif + .include "../Makefile.inc" diff --git a/sys/boot/efi/libefi/efi_console.c b/sys/boot/efi/libefi/efi_console.c index 68c9a6b..838becc 100644 --- a/sys/boot/efi/libefi/efi_console.c +++ b/sys/boot/efi/libefi/efi_console.c @@ -111,9 +111,9 @@ efi_cons_probe(struct console *cp) static int efi_cons_init(int arg) { +#ifdef TERM_EMU conout->SetAttribute(conout, EFI_TEXT_ATTR(DEFAULT_FGCOLOR, DEFAULT_BGCOLOR)); -#ifdef TERM_EMU end_term(); get_pos(&curx, &cury); curs_move(&curx, &cury, curx, cury); @@ -139,8 +139,7 @@ efi_cons_rawputchar(int c) #ifndef TERM_EMU if (c == '\n') efi_cons_efiputchar('\r'); - else - efi_cons_efiputchar(c); + efi_cons_efiputchar(c); #else switch (c) { case '\r': @@ -178,6 +177,7 @@ efi_cons_rawputchar(int c) } } +#ifdef TERM_EMU /* Gracefully exit ESC-sequence processing in case of misunderstanding. */ static void bail_out(int c) @@ -412,6 +412,12 @@ efi_term_emu(int c) break; } } +#else +void +HO(void) +{ +} +#endif void efi_cons_putchar(int c) diff --git a/sys/boot/efi/libefi/efipart.c b/sys/boot/efi/libefi/efipart.c index 7e53643..87e38de 100644 --- a/sys/boot/efi/libefi/efipart.c +++ b/sys/boot/efi/libefi/efipart.c @@ -119,8 +119,11 @@ efipart_init(void) (void**)&blkio); if (EFI_ERROR(status)) continue; - if (!blkio->Media->LogicalPartition) + if (!blkio->Media->LogicalPartition) { + printf("%s%d isn't a logical partition, skipping\n", + efipart_dev.dv_name, n); continue; + } /* * If we come across a logical partition of subtype CDROM diff --git a/sys/boot/fdt/dts/arm/pcduino3b.dts b/sys/boot/fdt/dts/arm/pcduino3b.dts new file mode 100644 index 0000000..9072baa --- /dev/null +++ b/sys/boot/fdt/dts/arm/pcduino3b.dts @@ -0,0 +1,48 @@ +/*- + * Copyright (c) 2016 Emmanuel Vadot <manu@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include "sun7i-a20-pcduino3.dts" +#include "sun7i-a20-hdmi.dtsi" +#include "xpowers-axp209.dtsi" + +/ { + soc@01c00000 { + hdmi@01c16000 { + status = "okay"; + }; + + hdmiaudio { + status = "okay"; + }; + }; +}; + +&gmac { + pinctrl-0 = <&gmac_pins_rgmii_a>; + phy-mode = "rgmii"; +}; diff --git a/sys/cam/ata/ata_da.c b/sys/cam/ata/ata_da.c index 2e75500..72ced86 100644 --- a/sys/cam/ata/ata_da.c +++ b/sys/cam/ata/ata_da.c @@ -1672,6 +1672,7 @@ adaregister(struct cam_periph *periph, void *arg) if (cam_iosched_init(&softc->cam_iosched, periph) != 0) { printf("adaregister: Unable to probe new device. " "Unable to allocate iosched memory\n"); + free(softc, M_DEVBUF); return(CAM_REQ_CMP_ERR); } diff --git a/sys/cam/cam_ccb.h b/sys/cam/cam_ccb.h index 8362f6c..1745644 100644 --- a/sys/cam/cam_ccb.h +++ b/sys/cam/cam_ccb.h @@ -41,6 +41,7 @@ #include <cam/cam_debug.h> #include <cam/scsi/scsi_all.h> #include <cam/ata/ata_all.h> +#include <cam/nvme/nvme_all.h> /* General allocation length definitions for CCB structures */ #define IOCDBLEN CAM_MAX_CDBLEN /* Space for CDB bytes/pointer */ @@ -204,6 +205,12 @@ typedef enum { XPT_SMP_IO = 0x1b | XPT_FC_DEV_QUEUED, /* Serial Management Protocol */ + XPT_NVME_IO = 0x1c | XPT_FC_DEV_QUEUED, + /* Execiute the requestred NVMe I/O operation */ + + XPT_MMCSD_IO = 0x1d | XPT_FC_DEV_QUEUED, + /* Placeholder for MMC / SD / SDIO I/O stuff */ + XPT_SCAN_TGT = 0x1E | XPT_FC_QUEUED | XPT_FC_USER_CCB | XPT_FC_XPT_ONLY, /* Scan Target */ @@ -259,6 +266,7 @@ typedef enum { PROTO_ATAPI, /* AT Attachment Packetized Interface */ PROTO_SATAPM, /* SATA Port Multiplier */ PROTO_SEMB, /* SATA Enclosure Management Bridge */ + PROTO_NVME, /* NVME */ } cam_proto; typedef enum { @@ -274,6 +282,7 @@ typedef enum { XPORT_SATA, /* Serial AT Attachment */ XPORT_ISCSI, /* iSCSI */ XPORT_SRP, /* SCSI RDMA Protocol */ + XPORT_NVME, /* NVMe over PCIe */ } cam_xport; #define XPORT_IS_ATA(t) ((t) == XPORT_ATA || (t) == XPORT_SATA) @@ -350,6 +359,8 @@ struct ccb_getdev { u_int8_t serial_num[252]; u_int8_t inq_flags; u_int8_t serial_num_len; + const struct nvme_controller_data *nvme_cdata; + const struct nvme_namespace_data *nvme_data; }; /* Device Statistics CCB */ @@ -610,6 +621,11 @@ struct ccb_pathinq_settings_fc { struct ccb_pathinq_settings_sas { u_int32_t bitrate; /* Mbps */ }; + +struct ccb_pathinq_settings_nvme { + uint16_t nsid; /* Namespace ID for this path */ +}; + #define PATHINQ_SETTINGS_SIZE 128 struct ccb_pathinq { @@ -640,6 +656,7 @@ struct ccb_pathinq { struct ccb_pathinq_settings_spi spi; struct ccb_pathinq_settings_fc fc; struct ccb_pathinq_settings_sas sas; + struct ccb_pathinq_settings_nvme nvme; char ccb_pathinq_settings_opaque[PATHINQ_SETTINGS_SIZE]; } xport_specific; u_int maxio; /* Max supported I/O size, in bytes. */ @@ -777,6 +794,19 @@ struct ccb_relsim { }; /* + * NVMe I/O Request CCB used for the XPT_NVME_IO function code. + */ +struct ccb_nvmeio { + struct ccb_hdr ccb_h; + union ccb *next_ccb; /* Ptr for next CCB for action */ + struct nvme_command cmd; /* NVME command, per NVME standard */ + struct nvme_completion cpl; /* NVME completion, per NVME standard */ + uint8_t *data_ptr; /* Ptr to the data buf/SG list */ + uint32_t dxfer_len; /* Data transfer length */ + uint32_t resid; /* Transfer residual length: 2's comp unused ?*/ +}; + +/* * Definitions for the asynchronous callback CCB fields. */ typedef enum { @@ -953,6 +983,18 @@ struct ccb_trans_settings_sata { #define CTS_SATA_CAPS_D_APST 0x00020000 }; +struct ccb_trans_settings_nvme +{ + u_int valid; /* Which fields to honor */ +#define CTS_NVME_VALID_SPEC 0x01 +#define CTS_NVME_VALID_CAPS 0x02 + u_int spec_major; /* Major version of spec supported */ + u_int spec_minor; /* Minor verison of spec supported */ + u_int spec_tiny; /* Tiny version of spec supported */ + u_int max_xfer; /* Max transfer size (0 -> unlimited */ + u_int caps; +}; + /* Get/Set transfer rate/width/disconnection/tag queueing settings */ struct ccb_trans_settings { struct ccb_hdr ccb_h; @@ -965,6 +1007,7 @@ struct ccb_trans_settings { u_int valid; /* Which fields to honor */ struct ccb_trans_settings_ata ata; struct ccb_trans_settings_scsi scsi; + struct ccb_trans_settings_nvme nvme; } proto_specific; union { u_int valid; /* Which fields to honor */ @@ -973,6 +1016,7 @@ struct ccb_trans_settings { struct ccb_trans_settings_sas sas; struct ccb_trans_settings_pata ata; struct ccb_trans_settings_sata sata; + struct ccb_trans_settings_nvme nvme; } xport_specific; }; @@ -1228,6 +1272,7 @@ union ccb { struct ccb_ataio ataio; struct ccb_dev_advinfo cdai; struct ccb_async casync; + struct ccb_nvmeio nvmeio; }; #define CCB_CLEAR_ALL_EXCEPT_HDR(ccbp) \ @@ -1244,6 +1289,12 @@ cam_fill_csio(struct ccb_scsiio *csio, u_int32_t retries, u_int32_t timeout); static __inline void +cam_fill_nvmeio(struct ccb_nvmeio *nvmeio, u_int32_t retries, + void (*cbfcnp)(struct cam_periph *, union ccb *), + u_int32_t flags, u_int8_t *data_ptr, u_int32_t dxfer_len, + u_int32_t timeout); + +static __inline void cam_fill_ctio(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int32_t flags, u_int tag_action, u_int tag_id, @@ -1364,6 +1415,20 @@ cam_ccb_status(union ccb *ccb) void cam_calc_geometry(struct ccb_calc_geometry *ccg, int extended); +static __inline void +cam_fill_nvmeio(struct ccb_nvmeio *nvmeio, u_int32_t retries, + void (*cbfcnp)(struct cam_periph *, union ccb *), + u_int32_t flags, u_int8_t *data_ptr, u_int32_t dxfer_len, + u_int32_t timeout) +{ + nvmeio->ccb_h.func_code = XPT_NVME_IO; + nvmeio->ccb_h.flags = flags; + nvmeio->ccb_h.retry_count = retries; + nvmeio->ccb_h.cbfcnp = cbfcnp; + nvmeio->ccb_h.timeout = timeout; + nvmeio->data_ptr = data_ptr; + nvmeio->dxfer_len = dxfer_len; +} __END_DECLS #endif /* _CAM_CAM_CCB_H */ diff --git a/sys/cam/cam_xpt.c b/sys/cam/cam_xpt.c index 8da3ce3..58ea13a 100644 --- a/sys/cam/cam_xpt.c +++ b/sys/cam/cam_xpt.c @@ -309,6 +309,7 @@ static xpt_devicefunc_t xptsetasyncfunc; static xpt_busfunc_t xptsetasyncbusfunc; static cam_status xptregister(struct cam_periph *periph, void *arg); +static const char * xpt_action_name(uint32_t action); static __inline int device_is_queued(struct cam_ed *device); static __inline int @@ -812,6 +813,10 @@ xpt_rescan(union ccb *ccb) xpt_free_ccb(ccb); return; } + CAM_DEBUG(ccb->ccb_h.path, CAM_DEBUG_TRACE, + ("xpt_rescan: func %#x %s\n", ccb->ccb_h.func_code, + xpt_action_name(ccb->ccb_h.func_code))); + ccb->ccb_h.ppriv_ptr1 = ccb->ccb_h.cbfcnp; ccb->ccb_h.cbfcnp = xpt_rescan_done; xpt_setup_ccb(&ccb->ccb_h, ccb->ccb_h.path, CAM_PRIORITY_XPT); @@ -2451,7 +2456,8 @@ xpt_action(union ccb *start_ccb) { CAM_DEBUG(start_ccb->ccb_h.path, CAM_DEBUG_TRACE, - ("xpt_action: func=%#x\n", start_ccb->ccb_h.func_code)); + ("xpt_action: func %#x %s\n", start_ccb->ccb_h.func_code, + xpt_action_name(start_ccb->ccb_h.func_code))); start_ccb->ccb_h.status = CAM_REQ_INPROG; (*(start_ccb->ccb_h.path->bus->xport->action))(start_ccb); @@ -2466,7 +2472,8 @@ xpt_action_default(union ccb *start_ccb) path = start_ccb->ccb_h.path; CAM_DEBUG(path, CAM_DEBUG_TRACE, - ("xpt_action_default: func=%#x\n", start_ccb->ccb_h.func_code)); + ("xpt_action_default: func %#x %s\n", start_ccb->ccb_h.func_code, + xpt_action_name(start_ccb->ccb_h.func_code))); switch (start_ccb->ccb_h.func_code) { case XPT_SCSI_IO: @@ -3012,6 +3019,11 @@ call_sim: } break; } + CAM_DEBUG(path, CAM_DEBUG_TRACE, + ("xpt_action_default: func= %#x %s status %#x\n", + start_ccb->ccb_h.func_code, + xpt_action_name(start_ccb->ccb_h.func_code), + start_ccb->ccb_h.status)); } void @@ -4234,6 +4246,12 @@ xpt_async(u_int32_t async_code, struct cam_path *path, void *async_arg) ccb->casync.async_code = async_code; ccb->casync.async_arg_size = 0; size = xpt_async_size(async_code); + CAM_DEBUG(ccb->ccb_h.path, CAM_DEBUG_TRACE, + ("xpt_async: func %#x %s aync_code %d %s\n", + ccb->ccb_h.func_code, + xpt_action_name(ccb->ccb_h.func_code), + async_code, + xpt_async_string(async_code))); if (size > 0 && async_arg != NULL) { ccb->casync.async_arg_ptr = malloc(size, M_CAMXPT, M_NOWAIT); if (ccb->casync.async_arg_ptr == NULL) { @@ -4441,7 +4459,11 @@ xpt_done(union ccb *done_ccb) struct cam_doneq *queue; int run, hash; - CAM_DEBUG(done_ccb->ccb_h.path, CAM_DEBUG_TRACE, ("xpt_done\n")); + CAM_DEBUG(done_ccb->ccb_h.path, CAM_DEBUG_TRACE, + ("xpt_done: func= %#x %s status %#x\n", + done_ccb->ccb_h.func_code, + xpt_action_name(done_ccb->ccb_h.func_code), + done_ccb->ccb_h.status)); if ((done_ccb->ccb_h.func_code & XPT_FC_QUEUED) == 0) return; @@ -4463,7 +4485,8 @@ void xpt_done_direct(union ccb *done_ccb) { - CAM_DEBUG(done_ccb->ccb_h.path, CAM_DEBUG_TRACE, ("xpt_done_direct\n")); + CAM_DEBUG(done_ccb->ccb_h.path, CAM_DEBUG_TRACE, + ("xpt_done_direct: status %#x\n", done_ccb->ccb_h.status)); if ((done_ccb->ccb_h.func_code & XPT_FC_QUEUED) == 0) return; @@ -5050,6 +5073,9 @@ xpt_register_async(int event, ac_callback_t *cbfunc, void *cbarg, xpt_action((union ccb *)&csa); status = csa.ccb_h.status; + CAM_DEBUG(csa.ccb_h.path, CAM_DEBUG_TRACE, + ("xpt_register_async: func %p\n", cbfunc)); + if (xptpath) { xpt_path_unlock(path); xpt_free_path(path); @@ -5301,3 +5327,69 @@ camisr_runqueue(void) mtx_unlock(&queue->cam_doneq_mtx); } } + +struct kv +{ + uint32_t v; + const char *name; +}; + +static struct kv map[] = { + { XPT_NOOP, "XPT_NOOP" }, + { XPT_SCSI_IO, "XPT_SCSI_IO" }, + { XPT_GDEV_TYPE, "XPT_GDEV_TYPE" }, + { XPT_GDEVLIST, "XPT_GDEVLIST" }, + { XPT_PATH_INQ, "XPT_PATH_INQ" }, + { XPT_REL_SIMQ, "XPT_REL_SIMQ" }, + { XPT_SASYNC_CB, "XPT_SASYNC_CB" }, + { XPT_SDEV_TYPE, "XPT_SDEV_TYPE" }, + { XPT_SCAN_BUS, "XPT_SCAN_BUS" }, + { XPT_DEV_MATCH, "XPT_DEV_MATCH" }, + { XPT_DEBUG, "XPT_DEBUG" }, + { XPT_PATH_STATS, "XPT_PATH_STATS" }, + { XPT_GDEV_STATS, "XPT_GDEV_STATS" }, + { XPT_DEV_ADVINFO, "XPT_DEV_ADVINFO" }, + { XPT_ASYNC, "XPT_ASYNC" }, + { XPT_ABORT, "XPT_ABORT" }, + { XPT_RESET_BUS, "XPT_RESET_BUS" }, + { XPT_RESET_DEV, "XPT_RESET_DEV" }, + { XPT_TERM_IO, "XPT_TERM_IO" }, + { XPT_SCAN_LUN, "XPT_SCAN_LUN" }, + { XPT_GET_TRAN_SETTINGS, "XPT_GET_TRAN_SETTINGS" }, + { XPT_SET_TRAN_SETTINGS, "XPT_SET_TRAN_SETTINGS" }, + { XPT_CALC_GEOMETRY, "XPT_CALC_GEOMETRY" }, + { XPT_ATA_IO, "XPT_ATA_IO" }, + { XPT_GET_SIM_KNOB, "XPT_GET_SIM_KNOB" }, + { XPT_SET_SIM_KNOB, "XPT_SET_SIM_KNOB" }, + { XPT_NVME_IO, "XPT_NVME_IO" }, + { XPT_MMCSD_IO, "XPT_MMCSD_IO" }, + { XPT_SMP_IO, "XPT_SMP_IO" }, + { XPT_SCAN_TGT, "XPT_SCAN_TGT" }, + { XPT_ENG_INQ, "XPT_ENG_INQ" }, + { XPT_ENG_EXEC, "XPT_ENG_EXEC" }, + { XPT_EN_LUN, "XPT_EN_LUN" }, + { XPT_TARGET_IO, "XPT_TARGET_IO" }, + { XPT_ACCEPT_TARGET_IO, "XPT_ACCEPT_TARGET_IO" }, + { XPT_CONT_TARGET_IO, "XPT_CONT_TARGET_IO" }, + { XPT_IMMED_NOTIFY, "XPT_IMMED_NOTIFY" }, + { XPT_NOTIFY_ACK, "XPT_NOTIFY_ACK" }, + { XPT_IMMEDIATE_NOTIFY, "XPT_IMMEDIATE_NOTIFY" }, + { XPT_NOTIFY_ACKNOWLEDGE, "XPT_NOTIFY_ACKNOWLEDGE" }, + { 0, 0 } +}; + +static const char * +xpt_action_name(uint32_t action) +{ + static char buffer[32]; /* Only for unknown messages -- racy */ + struct kv *walker = map; + + while (walker->name != NULL) { + if (walker->v == action) + return (walker->name); + walker++; + } + + snprintf(buffer, sizeof(buffer), "%#x", action); + return (buffer); +} diff --git a/sys/cam/cam_xpt_internal.h b/sys/cam/cam_xpt_internal.h index 23d6d34..b0624af 100644 --- a/sys/cam/cam_xpt_internal.h +++ b/sys/cam/cam_xpt_internal.h @@ -117,6 +117,8 @@ struct cam_ed { STAILQ_ENTRY(cam_ed) highpowerq_entry; struct mtx device_mtx; struct task device_destroy_task; + const struct nvme_controller_data *nvme_cdata; + const struct nvme_namespace_data *nvme_data; }; /* @@ -167,6 +169,7 @@ struct cam_path { struct xpt_xport * scsi_get_xport(void); struct xpt_xport * ata_get_xport(void); +struct xpt_xport * nvme_get_xport(void); struct cam_ed * xpt_alloc_device(struct cam_eb *bus, struct cam_et *target, diff --git a/sys/cam/nvme/nvme_all.c b/sys/cam/nvme/nvme_all.c new file mode 100644 index 0000000..3904891 --- /dev/null +++ b/sys/cam/nvme/nvme_all.c @@ -0,0 +1,124 @@ +/*- + * Copyright (c) 2015 Netflix, Inc + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification, immediately at the beginning of the file. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> + +#ifdef _KERNEL +#include "opt_scsi.h" + +#include <sys/systm.h> +#include <sys/libkern.h> +#include <sys/kernel.h> +#include <sys/sysctl.h> +#else +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#ifndef min +#define min(a,b) (((a)<(b))?(a):(b)) +#endif +#endif + +#include <cam/cam.h> +#include <cam/cam_ccb.h> +#include <cam/cam_queue.h> +#include <cam/cam_xpt.h> +#include <cam/nvme/nvme_all.h> +#include <sys/sbuf.h> +#include <sys/endian.h> + +void +nvme_ns_cmd(struct ccb_nvmeio *nvmeio, uint8_t cmd, uint32_t nsid, + uint32_t cdw10, uint32_t cdw11, uint32_t cdw12, uint32_t cdw13, + uint32_t cdw14, uint32_t cdw15) +{ + bzero(&nvmeio->cmd, sizeof(struct nvme_command)); + nvmeio->cmd.opc = cmd; + nvmeio->cmd.nsid = nsid; + nvmeio->cmd.cdw10 = cdw10; + nvmeio->cmd.cdw11 = cdw11; + nvmeio->cmd.cdw12 = cdw12; + nvmeio->cmd.cdw13 = cdw13; + nvmeio->cmd.cdw14 = cdw14; + nvmeio->cmd.cdw15 = cdw15; +} + +int +nvme_identify_match(caddr_t identbuffer, caddr_t table_entry) +{ + return 0; +} + + +void +nvme_print_ident(const struct nvme_controller_data *cdata, + const struct nvme_namespace_data *data) +{ + printf("I'm a pretty NVME drive\n"); +} + +/* XXX need to do nvme admin opcodes too, but those aren't used yet by nda */ +static const char * +nvme_opc2str[] = { + "FLUSH", + "WRITE", + "READ", + "RSVD-3", + "WRITE_UNCORRECTABLE", + "COMPARE", + "RSVD-6", + "RSVD-7", + "DATASET_MANAGEMENT" +}; + +const char * +nvme_op_string(const struct nvme_command *cmd) +{ + if (cmd->opc > nitems(nvme_opc2str)) + return "UNKNOWN"; + + return nvme_opc2str[cmd->opc]; +} + +const char * +nvme_cmd_string(const struct nvme_command *cmd, char *cmd_string, size_t len) +{ + /* + * cid, rsvd areas and mptr not printed, since they are used + * only internally by the SIM. + */ + snprintf(cmd_string, len, + "opc=%x fuse=%x nsid=%x prp1=%llx prp2=%llx cdw=%x %x %x %x %x %x", + cmd->opc, cmd->fuse, cmd->nsid, + (unsigned long long)cmd->prp1, (unsigned long long)cmd->prp2, + cmd->cdw10, cmd->cdw11, cmd->cdw12, cmd->cdw13, cmd->cdw14, cmd->cdw15); + + return cmd_string; +} diff --git a/sys/cam/nvme/nvme_all.h b/sys/cam/nvme/nvme_all.h new file mode 100644 index 0000000..3cff74d --- /dev/null +++ b/sys/cam/nvme/nvme_all.h @@ -0,0 +1,48 @@ +/*- + * Copyright (c) 2015 Netflix, Inc + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification, immediately at the beginning of the file. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef CAM_NVME_NVME_ALL_H +#define CAM_NVME_NVME_ALL_H 1 + +#include <dev/nvme/nvme.h> + +struct ccb_nvmeio; + +#define NVME_REV_1 1 /* Supports NVMe 1.2 or earlier */ + +void nvme_ns_cmd(struct ccb_nvmeio *nvmeio, uint8_t cmd, uint32_t nsid, + uint32_t cdw10, uint32_t cdw11, uint32_t cdw12, uint32_t cdw13, + uint32_t cdw14, uint32_t cdw15); + +int nvme_identify_match(caddr_t identbuffer, caddr_t table_entry); + +void nvme_print_ident(const struct nvme_controller_data *, const struct nvme_namespace_data *); +const char *nvme_op_string(const struct nvme_command *); +const char *nvme_cmd_string(const struct nvme_command *, char *, size_t); + +#endif /* CAM_NVME_NVME_ALL_H */ diff --git a/sys/cam/nvme/nvme_da.c b/sys/cam/nvme/nvme_da.c new file mode 100644 index 0000000..9628530 --- /dev/null +++ b/sys/cam/nvme/nvme_da.c @@ -0,0 +1,1152 @@ +/*- + * Copyright (c) 2015 Netflix, Inc + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification, immediately at the beginning of the file. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Derived from ata_da.c: + * Copyright (c) 2009 Alexander Motin <mav@FreeBSD.org> + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> + +#ifdef _KERNEL +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/bio.h> +#include <sys/sysctl.h> +#include <sys/taskqueue.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/conf.h> +#include <sys/devicestat.h> +#include <sys/eventhandler.h> +#include <sys/malloc.h> +#include <sys/cons.h> +#include <sys/proc.h> +#include <sys/reboot.h> +#include <geom/geom_disk.h> +#endif /* _KERNEL */ + +#ifndef _KERNEL +#include <stdio.h> +#include <string.h> +#endif /* _KERNEL */ + +#include <cam/cam.h> +#include <cam/cam_ccb.h> +#include <cam/cam_periph.h> +#include <cam/cam_xpt_periph.h> +#include <cam/cam_sim.h> +#include <cam/cam_iosched.h> + +#include <cam/nvme/nvme_all.h> + +typedef enum { + NDA_STATE_NORMAL +} nda_state; + +typedef enum { + NDA_FLAG_OPEN = 0x0001, + NDA_FLAG_DIRTY = 0x0002, + NDA_FLAG_SCTX_INIT = 0x0004, +} nda_flags; + +typedef enum { + NDA_Q_4K = 0x01, + NDA_Q_NONE = 0x00, +} nda_quirks; + +#define NDA_Q_BIT_STRING \ + "\020" \ + "\001Bit 0" + +typedef enum { + NDA_CCB_BUFFER_IO = 0x01, + NDA_CCB_DUMP = 0x02, + NDA_CCB_TRIM = 0x03, + NDA_CCB_TYPE_MASK = 0x0F, +} nda_ccb_state; + +/* Offsets into our private area for storing information */ +#define ccb_state ppriv_field0 +#define ccb_bp ppriv_ptr1 + +struct trim_request { + TAILQ_HEAD(, bio) bps; +}; +struct nda_softc { + struct cam_iosched_softc *cam_iosched; + int outstanding_cmds; /* Number of active commands */ + int refcount; /* Active xpt_action() calls */ + nda_state state; + nda_flags flags; + nda_quirks quirks; + int unmappedio; + uint32_t nsid; /* Namespace ID for this nda device */ + struct disk *disk; + struct task sysctl_task; + struct sysctl_ctx_list sysctl_ctx; + struct sysctl_oid *sysctl_tree; + struct trim_request trim_req; +#ifdef CAM_IO_STATS + struct sysctl_ctx_list sysctl_stats_ctx; + struct sysctl_oid *sysctl_stats_tree; + u_int timeouts; + u_int errors; + u_int invalidations; +#endif +}; + +/* Need quirk table */ + +static disk_strategy_t ndastrategy; +static dumper_t ndadump; +static periph_init_t ndainit; +static void ndaasync(void *callback_arg, u_int32_t code, + struct cam_path *path, void *arg); +static void ndasysctlinit(void *context, int pending); +static periph_ctor_t ndaregister; +static periph_dtor_t ndacleanup; +static periph_start_t ndastart; +static periph_oninv_t ndaoninvalidate; +static void ndadone(struct cam_periph *periph, + union ccb *done_ccb); +static int ndaerror(union ccb *ccb, u_int32_t cam_flags, + u_int32_t sense_flags); +static void ndashutdown(void *arg, int howto); +static void ndasuspend(void *arg); + +#ifndef NDA_DEFAULT_SEND_ORDERED +#define NDA_DEFAULT_SEND_ORDERED 1 +#endif +#ifndef NDA_DEFAULT_TIMEOUT +#define NDA_DEFAULT_TIMEOUT 30 /* Timeout in seconds */ +#endif +#ifndef NDA_DEFAULT_RETRY +#define NDA_DEFAULT_RETRY 4 +#endif + + +//static int nda_retry_count = NDA_DEFAULT_RETRY; +static int nda_send_ordered = NDA_DEFAULT_SEND_ORDERED; +static int nda_default_timeout = NDA_DEFAULT_TIMEOUT; + +/* + * All NVMe media is non-rotational, so all nvme device instances + * share this to implement the sysctl. + */ +static int nda_rotating_media = 0; + +static SYSCTL_NODE(_kern_cam, OID_AUTO, nda, CTLFLAG_RD, 0, + "CAM Direct Access Disk driver"); + +static struct periph_driver ndadriver = +{ + ndainit, "nda", + TAILQ_HEAD_INITIALIZER(ndadriver.units), /* generation */ 0 +}; + +PERIPHDRIVER_DECLARE(nda, ndadriver); + +static MALLOC_DEFINE(M_NVMEDA, "nvme_da", "nvme_da buffers"); + +/* + * nice wrappers. Maybe these belong in nvme_all.c instead of + * here, but this is the only place that uses these. Should + * we ever grow another NVME periph, we should move them + * all there wholesale. + */ + +static void +nda_nvme_flush(struct nda_softc *softc, struct ccb_nvmeio *nvmeio) +{ + cam_fill_nvmeio(nvmeio, + 0, /* retries */ + ndadone, /* cbfcnp */ + CAM_DIR_NONE, /* flags */ + NULL, /* data_ptr */ + 0, /* dxfer_len */ + nda_default_timeout * 1000); /* timeout 5s */ + nvme_ns_flush_cmd(&nvmeio->cmd, softc->nsid); +} + +static void +nda_nvme_trim(struct nda_softc *softc, struct ccb_nvmeio *nvmeio, + void *payload, uint32_t num_ranges) +{ + cam_fill_nvmeio(nvmeio, + 0, /* retries */ + ndadone, /* cbfcnp */ + CAM_DIR_OUT, /* flags */ + payload, /* data_ptr */ + num_ranges * sizeof(struct nvme_dsm_range), /* dxfer_len */ + nda_default_timeout * 1000); /* timeout 5s */ + nvme_ns_trim_cmd(&nvmeio->cmd, softc->nsid, num_ranges); +} + +static void +nda_nvme_write(struct nda_softc *softc, struct ccb_nvmeio *nvmeio, + void *payload, uint64_t lba, uint32_t len, uint32_t count) +{ + cam_fill_nvmeio(nvmeio, + 0, /* retries */ + ndadone, /* cbfcnp */ + CAM_DIR_OUT, /* flags */ + payload, /* data_ptr */ + len, /* dxfer_len */ + nda_default_timeout * 1000); /* timeout 5s */ + nvme_ns_write_cmd(&nvmeio->cmd, softc->nsid, lba, count); +} + +static void +nda_nvme_rw_bio(struct nda_softc *softc, struct ccb_nvmeio *nvmeio, + struct bio *bp, uint32_t rwcmd) +{ + int flags = rwcmd == NVME_OPC_READ ? CAM_DIR_IN : CAM_DIR_OUT; + void *payload; + uint64_t lba; + uint32_t count; + + if (bp->bio_flags & BIO_UNMAPPED) { + flags |= CAM_DATA_BIO; + payload = bp; + } else { + payload = bp->bio_data; + } + + lba = bp->bio_pblkno; + count = bp->bio_bcount / softc->disk->d_sectorsize; + + cam_fill_nvmeio(nvmeio, + 0, /* retries */ + ndadone, /* cbfcnp */ + flags, /* flags */ + payload, /* data_ptr */ + bp->bio_bcount, /* dxfer_len */ + nda_default_timeout * 1000); /* timeout 5s */ + nvme_ns_rw_cmd(&nvmeio->cmd, rwcmd, softc->nsid, lba, count); +} + +static int +ndaopen(struct disk *dp) +{ + struct cam_periph *periph; + struct nda_softc *softc; + int error; + + periph = (struct cam_periph *)dp->d_drv1; + if (cam_periph_acquire(periph) != CAM_REQ_CMP) { + return(ENXIO); + } + + cam_periph_lock(periph); + if ((error = cam_periph_hold(periph, PRIBIO|PCATCH)) != 0) { + cam_periph_unlock(periph); + cam_periph_release(periph); + return (error); + } + + CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH, + ("ndaopen\n")); + + softc = (struct nda_softc *)periph->softc; + softc->flags |= NDA_FLAG_OPEN; + + cam_periph_unhold(periph); + cam_periph_unlock(periph); + return (0); +} + +static int +ndaclose(struct disk *dp) +{ + struct cam_periph *periph; + struct nda_softc *softc; + union ccb *ccb; + int error; + + periph = (struct cam_periph *)dp->d_drv1; + softc = (struct nda_softc *)periph->softc; + cam_periph_lock(periph); + + CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH, + ("ndaclose\n")); + + if ((softc->flags & NDA_FLAG_DIRTY) != 0 && + (periph->flags & CAM_PERIPH_INVALID) == 0 && + cam_periph_hold(periph, PRIBIO) == 0) { + + ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL); + nda_nvme_flush(softc, &ccb->nvmeio); + error = cam_periph_runccb(ccb, ndaerror, /*cam_flags*/0, + /*sense_flags*/0, softc->disk->d_devstat); + + if (error != 0) + xpt_print(periph->path, "Synchronize cache failed\n"); + else + softc->flags &= ~NDA_FLAG_DIRTY; + xpt_release_ccb(ccb); + cam_periph_unhold(periph); + } + + softc->flags &= ~NDA_FLAG_OPEN; + + while (softc->refcount != 0) + cam_periph_sleep(periph, &softc->refcount, PRIBIO, "ndaclose", 1); + cam_periph_unlock(periph); + cam_periph_release(periph); + return (0); +} + +static void +ndaschedule(struct cam_periph *periph) +{ + struct nda_softc *softc = (struct nda_softc *)periph->softc; + + if (softc->state != NDA_STATE_NORMAL) + return; + + cam_iosched_schedule(softc->cam_iosched, periph); +} + +/* + * Actually translate the requested transfer into one the physical driver + * can understand. The transfer is described by a buf and will include + * only one physical transfer. + */ +static void +ndastrategy(struct bio *bp) +{ + struct cam_periph *periph; + struct nda_softc *softc; + + periph = (struct cam_periph *)bp->bio_disk->d_drv1; + softc = (struct nda_softc *)periph->softc; + + cam_periph_lock(periph); + + CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastrategy(%p)\n", bp)); + + /* + * If the device has been made invalid, error out + */ + if ((periph->flags & CAM_PERIPH_INVALID) != 0) { + cam_periph_unlock(periph); + biofinish(bp, NULL, ENXIO); + return; + } + + /* + * Place it in the queue of disk activities for this disk + */ + cam_iosched_queue_work(softc->cam_iosched, bp); + + /* + * Schedule ourselves for performing the work. + */ + ndaschedule(periph); + cam_periph_unlock(periph); + + return; +} + +static int +ndadump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t length) +{ + struct cam_periph *periph; + struct nda_softc *softc; + u_int secsize; + union ccb ccb; + struct disk *dp; + uint64_t lba; + uint32_t count; + int error = 0; + + dp = arg; + periph = dp->d_drv1; + softc = (struct nda_softc *)periph->softc; + cam_periph_lock(periph); + secsize = softc->disk->d_sectorsize; + lba = offset / secsize; + count = length / secsize; + + if ((periph->flags & CAM_PERIPH_INVALID) != 0) { + cam_periph_unlock(periph); + return (ENXIO); + } + + if (length > 0) { + xpt_setup_ccb(&ccb.ccb_h, periph->path, CAM_PRIORITY_NORMAL); + ccb.ccb_h.ccb_state = NDA_CCB_DUMP; + nda_nvme_write(softc, &ccb.nvmeio, virtual, lba, length, count); + xpt_polled_action(&ccb); + + error = cam_periph_error(&ccb, + 0, SF_NO_RECOVERY | SF_NO_RETRY, NULL); + if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0) + cam_release_devq(ccb.ccb_h.path, /*relsim_flags*/0, + /*reduction*/0, /*timeout*/0, /*getcount_only*/0); + if (error != 0) + printf("Aborting dump due to I/O error.\n"); + + cam_periph_unlock(periph); + return (error); + } + + /* Flush */ + xpt_setup_ccb(&ccb.ccb_h, periph->path, CAM_PRIORITY_NORMAL); + + ccb.ccb_h.ccb_state = NDA_CCB_DUMP; + nda_nvme_flush(softc, &ccb.nvmeio); + xpt_polled_action(&ccb); + + error = cam_periph_error(&ccb, + 0, SF_NO_RECOVERY | SF_NO_RETRY, NULL); + if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0) + cam_release_devq(ccb.ccb_h.path, /*relsim_flags*/0, + /*reduction*/0, /*timeout*/0, /*getcount_only*/0); + if (error != 0) + xpt_print(periph->path, "flush cmd failed\n"); + cam_periph_unlock(periph); + return (error); +} + +static void +ndainit(void) +{ + cam_status status; + + /* + * Install a global async callback. This callback will + * receive async callbacks like "new device found". + */ + status = xpt_register_async(AC_FOUND_DEVICE, ndaasync, NULL, NULL); + + if (status != CAM_REQ_CMP) { + printf("nda: Failed to attach master async callback " + "due to status 0x%x!\n", status); + } else if (nda_send_ordered) { + + /* Register our event handlers */ + if ((EVENTHANDLER_REGISTER(power_suspend, ndasuspend, + NULL, EVENTHANDLER_PRI_LAST)) == NULL) + printf("ndainit: power event registration failed!\n"); + if ((EVENTHANDLER_REGISTER(shutdown_post_sync, ndashutdown, + NULL, SHUTDOWN_PRI_DEFAULT)) == NULL) + printf("ndainit: shutdown event registration failed!\n"); + } +} + +/* + * Callback from GEOM, called when it has finished cleaning up its + * resources. + */ +static void +ndadiskgonecb(struct disk *dp) +{ + struct cam_periph *periph; + + periph = (struct cam_periph *)dp->d_drv1; + + cam_periph_release(periph); +} + +static void +ndaoninvalidate(struct cam_periph *periph) +{ + struct nda_softc *softc; + + softc = (struct nda_softc *)periph->softc; + + /* + * De-register any async callbacks. + */ + xpt_register_async(0, ndaasync, periph, periph->path); +#ifdef CAM_IO_STATS + softc->invalidations++; +#endif + + /* + * Return all queued I/O with ENXIO. + * XXX Handle any transactions queued to the card + * with XPT_ABORT_CCB. + */ + cam_iosched_flush(softc->cam_iosched, NULL, ENXIO); + + disk_gone(softc->disk); +} + +static void +ndacleanup(struct cam_periph *periph) +{ + struct nda_softc *softc; + + softc = (struct nda_softc *)periph->softc; + + cam_periph_unlock(periph); + + cam_iosched_fini(softc->cam_iosched); + + /* + * If we can't free the sysctl tree, oh well... + */ + if ((softc->flags & NDA_FLAG_SCTX_INIT) != 0) { +#ifdef CAM_IO_STATS + if (sysctl_ctx_free(&softc->sysctl_stats_ctx) != 0) + xpt_print(periph->path, + "can't remove sysctl stats context\n"); +#endif + if (sysctl_ctx_free(&softc->sysctl_ctx) != 0) + xpt_print(periph->path, + "can't remove sysctl context\n"); + } + + disk_destroy(softc->disk); + free(softc, M_DEVBUF); + cam_periph_lock(periph); +} + +static void +ndaasync(void *callback_arg, u_int32_t code, + struct cam_path *path, void *arg) +{ + struct cam_periph *periph; + + periph = (struct cam_periph *)callback_arg; + switch (code) { + case AC_FOUND_DEVICE: + { + struct ccb_getdev *cgd; + cam_status status; + + cgd = (struct ccb_getdev *)arg; + if (cgd == NULL) + break; + + if (cgd->protocol != PROTO_NVME) + break; + + /* + * Allocate a peripheral instance for + * this device and start the probe + * process. + */ + status = cam_periph_alloc(ndaregister, ndaoninvalidate, + ndacleanup, ndastart, + "nda", CAM_PERIPH_BIO, + path, ndaasync, + AC_FOUND_DEVICE, cgd); + + if (status != CAM_REQ_CMP + && status != CAM_REQ_INPROG) + printf("ndaasync: Unable to attach to new device " + "due to status 0x%x\n", status); + break; + } + case AC_ADVINFO_CHANGED: + { + uintptr_t buftype; + + buftype = (uintptr_t)arg; + if (buftype == CDAI_TYPE_PHYS_PATH) { + struct nda_softc *softc; + + softc = periph->softc; + disk_attr_changed(softc->disk, "GEOM::physpath", + M_NOWAIT); + } + break; + } + case AC_LOST_DEVICE: + default: + cam_periph_async(periph, code, path, arg); + break; + } +} + +static void +ndasysctlinit(void *context, int pending) +{ + struct cam_periph *periph; + struct nda_softc *softc; + char tmpstr[80], tmpstr2[80]; + + periph = (struct cam_periph *)context; + + /* periph was held for us when this task was enqueued */ + if ((periph->flags & CAM_PERIPH_INVALID) != 0) { + cam_periph_release(periph); + return; + } + + softc = (struct nda_softc *)periph->softc; + snprintf(tmpstr, sizeof(tmpstr), "CAM NDA unit %d", periph->unit_number); + snprintf(tmpstr2, sizeof(tmpstr2), "%d", periph->unit_number); + + sysctl_ctx_init(&softc->sysctl_ctx); + softc->flags |= NDA_FLAG_SCTX_INIT; + softc->sysctl_tree = SYSCTL_ADD_NODE(&softc->sysctl_ctx, + SYSCTL_STATIC_CHILDREN(_kern_cam_nda), OID_AUTO, tmpstr2, + CTLFLAG_RD, 0, tmpstr); + if (softc->sysctl_tree == NULL) { + printf("ndasysctlinit: unable to allocate sysctl tree\n"); + cam_periph_release(periph); + return; + } + + SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), + OID_AUTO, "unmapped_io", CTLFLAG_RD | CTLFLAG_MPSAFE, + &softc->unmappedio, 0, "Unmapped I/O leaf"); + + SYSCTL_ADD_INT(&softc->sysctl_ctx, + SYSCTL_CHILDREN(softc->sysctl_tree), + OID_AUTO, + "rotating", + CTLFLAG_RD | CTLFLAG_MPSAFE, + &nda_rotating_media, + 0, + "Rotating media"); + +#ifdef CAM_IO_STATS + softc->sysctl_stats_tree = SYSCTL_ADD_NODE(&softc->sysctl_stats_ctx, + SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "stats", + CTLFLAG_RD, 0, "Statistics"); + if (softc->sysctl_stats_tree == NULL) { + printf("ndasysctlinit: unable to allocate sysctl tree for stats\n"); + cam_periph_release(periph); + return; + } + SYSCTL_ADD_INT(&softc->sysctl_stats_ctx, + SYSCTL_CHILDREN(softc->sysctl_stats_tree), + OID_AUTO, "timeouts", CTLFLAG_RD | CTLFLAG_MPSAFE, + &softc->timeouts, 0, + "Device timeouts reported by the SIM"); + SYSCTL_ADD_INT(&softc->sysctl_stats_ctx, + SYSCTL_CHILDREN(softc->sysctl_stats_tree), + OID_AUTO, "errors", CTLFLAG_RD | CTLFLAG_MPSAFE, + &softc->errors, 0, + "Transport errors reported by the SIM."); + SYSCTL_ADD_INT(&softc->sysctl_stats_ctx, + SYSCTL_CHILDREN(softc->sysctl_stats_tree), + OID_AUTO, "pack_invalidations", CTLFLAG_RD | CTLFLAG_MPSAFE, + &softc->invalidations, 0, + "Device pack invalidations."); +#endif + + cam_iosched_sysctl_init(softc->cam_iosched, &softc->sysctl_ctx, + softc->sysctl_tree); + + cam_periph_release(periph); +} + +static int +ndagetattr(struct bio *bp) +{ + int ret; + struct cam_periph *periph; + + periph = (struct cam_periph *)bp->bio_disk->d_drv1; + cam_periph_lock(periph); + ret = xpt_getattr(bp->bio_data, bp->bio_length, bp->bio_attribute, + periph->path); + cam_periph_unlock(periph); + if (ret == 0) + bp->bio_completed = bp->bio_length; + return ret; +} + +static cam_status +ndaregister(struct cam_periph *periph, void *arg) +{ + struct nda_softc *softc; + struct disk *disk; + struct ccb_pathinq cpi; + struct ccb_getdev *cgd; + const struct nvme_namespace_data *nsd; + const struct nvme_controller_data *cd; + char announce_buf[80]; +// caddr_t match; + u_int maxio; + int quirks; + + cgd = (struct ccb_getdev *)arg; + if (cgd == NULL) { + printf("ndaregister: no getdev CCB, can't register device\n"); + return(CAM_REQ_CMP_ERR); + } + nsd = cgd->nvme_data; + cd = cgd->nvme_cdata; + + softc = (struct nda_softc *)malloc(sizeof(*softc), M_DEVBUF, + M_NOWAIT | M_ZERO); + + if (softc == NULL) { + printf("ndaregister: Unable to probe new device. " + "Unable to allocate softc\n"); + return(CAM_REQ_CMP_ERR); + } + + if (cam_iosched_init(&softc->cam_iosched, periph) != 0) { + printf("ndaregister: Unable to probe new device. " + "Unable to allocate iosched memory\n"); + return(CAM_REQ_CMP_ERR); + } + + /* ident_data parsing */ + + periph->softc = softc; + +#if 0 + /* + * See if this device has any quirks. + */ + match = cam_quirkmatch((caddr_t)&cgd->ident_data, + (caddr_t)nda_quirk_table, + sizeof(nda_quirk_table)/sizeof(*nda_quirk_table), + sizeof(*nda_quirk_table), ata_identify_match); + if (match != NULL) + softc->quirks = ((struct nda_quirk_entry *)match)->quirks; + else +#endif + softc->quirks = NDA_Q_NONE; + + bzero(&cpi, sizeof(cpi)); + xpt_setup_ccb(&cpi.ccb_h, periph->path, CAM_PRIORITY_NONE); + cpi.ccb_h.func_code = XPT_PATH_INQ; + xpt_action((union ccb *)&cpi); + + TASK_INIT(&softc->sysctl_task, 0, ndasysctlinit, periph); + + /* + * The name space ID is the lun, save it for later I/O + */ + softc->nsid = (uint16_t)xpt_path_lun_id(periph->path); + + /* + * Register this media as a disk + */ + (void)cam_periph_hold(periph, PRIBIO); + cam_periph_unlock(periph); + snprintf(announce_buf, sizeof(announce_buf), + "kern.cam.nda.%d.quirks", periph->unit_number); + quirks = softc->quirks; + TUNABLE_INT_FETCH(announce_buf, &quirks); + softc->quirks = quirks; + cam_iosched_set_sort_queue(softc->cam_iosched, 0); + softc->disk = disk = disk_alloc(); + strlcpy(softc->disk->d_descr, cd->mn, + MIN(sizeof(softc->disk->d_descr), sizeof(cd->mn))); + strlcpy(softc->disk->d_ident, cd->sn, + MIN(sizeof(softc->disk->d_ident), sizeof(cd->sn))); + disk->d_rotation_rate = 0; /* Spinning rust need not apply */ + disk->d_open = ndaopen; + disk->d_close = ndaclose; + disk->d_strategy = ndastrategy; + disk->d_getattr = ndagetattr; + disk->d_dump = ndadump; + disk->d_gone = ndadiskgonecb; + disk->d_name = "nda"; + disk->d_drv1 = periph; + disk->d_unit = periph->unit_number; + maxio = cpi.maxio; /* Honor max I/O size of SIM */ + if (maxio == 0) + maxio = DFLTPHYS; /* traditional default */ + else if (maxio > MAXPHYS) + maxio = MAXPHYS; /* for safety */ + disk->d_maxsize = maxio; + disk->d_sectorsize = 1 << nsd->lbaf[nsd->flbas.format].lbads; + disk->d_mediasize = (off_t)(disk->d_sectorsize * nsd->nsze); + disk->d_delmaxsize = disk->d_mediasize; + disk->d_flags = DISKFLAG_DIRECT_COMPLETION; +// if (cd->oncs.dsm) // XXX broken? + disk->d_flags |= DISKFLAG_CANDELETE; + if (cd->vwc.present) + disk->d_flags |= DISKFLAG_CANFLUSHCACHE; + if ((cpi.hba_misc & PIM_UNMAPPED) != 0) { + disk->d_flags |= DISKFLAG_UNMAPPED_BIO; + softc->unmappedio = 1; + } + /* + * d_ident and d_descr are both far bigger than the length of either + * the serial or model number strings. + */ + nvme_strvis(disk->d_descr, cd->mn, + sizeof(disk->d_descr), NVME_MODEL_NUMBER_LENGTH); + nvme_strvis(disk->d_ident, cd->sn, + sizeof(disk->d_ident), NVME_SERIAL_NUMBER_LENGTH); + disk->d_hba_vendor = cpi.hba_vendor; + disk->d_hba_device = cpi.hba_device; + disk->d_hba_subvendor = cpi.hba_subvendor; + disk->d_hba_subdevice = cpi.hba_subdevice; + disk->d_stripesize = disk->d_sectorsize; + disk->d_stripeoffset = 0; + disk->d_devstat = devstat_new_entry(periph->periph_name, + periph->unit_number, disk->d_sectorsize, + DEVSTAT_ALL_SUPPORTED, + DEVSTAT_TYPE_DIRECT | XPORT_DEVSTAT_TYPE(cpi.transport), + DEVSTAT_PRIORITY_DISK); + + /* + * Acquire a reference to the periph before we register with GEOM. + * We'll release this reference once GEOM calls us back (via + * ndadiskgonecb()) telling us that our provider has been freed. + */ + if (cam_periph_acquire(periph) != CAM_REQ_CMP) { + xpt_print(periph->path, "%s: lost periph during " + "registration!\n", __func__); + cam_periph_lock(periph); + return (CAM_REQ_CMP_ERR); + } + disk_create(softc->disk, DISK_VERSION); + cam_periph_lock(periph); + cam_periph_unhold(periph); + + snprintf(announce_buf, sizeof(announce_buf), + "%juMB (%ju %u byte sectors)", + (uintmax_t)((uintmax_t)disk->d_mediasize / (1024*1024)), + (uintmax_t)disk->d_mediasize / disk->d_sectorsize, + disk->d_sectorsize); + xpt_announce_periph(periph, announce_buf); + xpt_announce_quirks(periph, softc->quirks, NDA_Q_BIT_STRING); + + /* + * Create our sysctl variables, now that we know + * we have successfully attached. + */ + if (cam_periph_acquire(periph) == CAM_REQ_CMP) + taskqueue_enqueue(taskqueue_thread, &softc->sysctl_task); + + /* + * Register for device going away and info about the drive + * changing (though with NVMe, it can't) + */ + xpt_register_async(AC_LOST_DEVICE | AC_ADVINFO_CHANGED, + ndaasync, periph, periph->path); + + softc->state = NDA_STATE_NORMAL; + return(CAM_REQ_CMP); +} + +static void +ndastart(struct cam_periph *periph, union ccb *start_ccb) +{ + struct nda_softc *softc = (struct nda_softc *)periph->softc; + struct ccb_nvmeio *nvmeio = &start_ccb->nvmeio; + + CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastart\n")); + + switch (softc->state) { + case NDA_STATE_NORMAL: + { + struct bio *bp; + + bp = cam_iosched_next_bio(softc->cam_iosched); + CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastart: bio %p\n", bp)); + if (bp == NULL) { + xpt_release_ccb(start_ccb); + break; + } + + switch (bp->bio_cmd) { + case BIO_WRITE: + softc->flags |= NDA_FLAG_DIRTY; + /* FALLTHROUGH */ + case BIO_READ: + { +#ifdef NDA_TEST_FAILURE + int fail = 0; + + /* + * Support the failure ioctls. If the command is a + * read, and there are pending forced read errors, or + * if a write and pending write errors, then fail this + * operation with EIO. This is useful for testing + * purposes. Also, support having every Nth read fail. + * + * This is a rather blunt tool. + */ + if (bp->bio_cmd == BIO_READ) { + if (softc->force_read_error) { + softc->force_read_error--; + fail = 1; + } + if (softc->periodic_read_error > 0) { + if (++softc->periodic_read_count >= + softc->periodic_read_error) { + softc->periodic_read_count = 0; + fail = 1; + } + } + } else { + if (softc->force_write_error) { + softc->force_write_error--; + fail = 1; + } + } + if (fail) { + biofinish(bp, NULL, EIO); + xpt_release_ccb(start_ccb); + ndaschedule(periph); + return; + } +#endif + KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 || + round_page(bp->bio_bcount + bp->bio_ma_offset) / + PAGE_SIZE == bp->bio_ma_n, + ("Short bio %p", bp)); + nda_nvme_rw_bio(softc, &start_ccb->nvmeio, bp, bp->bio_cmd == BIO_READ ? + NVME_OPC_READ : NVME_OPC_WRITE); + break; + } + case BIO_DELETE: + { + struct nvme_dsm_range *dsm_range; + + dsm_range = + malloc(sizeof(*dsm_range), M_NVMEDA, M_ZERO | M_WAITOK); + dsm_range->length = + bp->bio_bcount / softc->disk->d_sectorsize; + dsm_range->starting_lba = + bp->bio_offset / softc->disk->d_sectorsize; + bp->bio_driver2 = dsm_range; + nda_nvme_trim(softc, &start_ccb->nvmeio, dsm_range, 1); + start_ccb->ccb_h.ccb_state = NDA_CCB_TRIM; + start_ccb->ccb_h.flags |= CAM_UNLOCKED; + cam_iosched_submit_trim(softc->cam_iosched); /* XXX */ + goto out; + } + case BIO_FLUSH: + nda_nvme_flush(softc, nvmeio); + break; + } + start_ccb->ccb_h.ccb_state = NDA_CCB_BUFFER_IO; + start_ccb->ccb_h.flags |= CAM_UNLOCKED; +out: + start_ccb->ccb_h.ccb_bp = bp; + softc->outstanding_cmds++; + softc->refcount++; + cam_periph_unlock(periph); + xpt_action(start_ccb); + cam_periph_lock(periph); + softc->refcount--; + + /* May have more work to do, so ensure we stay scheduled */ + ndaschedule(periph); + break; + } + } +} + +static void +ndadone(struct cam_periph *periph, union ccb *done_ccb) +{ + struct nda_softc *softc; + struct ccb_nvmeio *nvmeio = &done_ccb->nvmeio; + struct cam_path *path; + int state; + + softc = (struct nda_softc *)periph->softc; + path = done_ccb->ccb_h.path; + + CAM_DEBUG(path, CAM_DEBUG_TRACE, ("ndadone\n")); + + state = nvmeio->ccb_h.ccb_state & NDA_CCB_TYPE_MASK; + switch (state) { + case NDA_CCB_BUFFER_IO: + case NDA_CCB_TRIM: + { + struct bio *bp; + int error; + + cam_periph_lock(periph); + bp = (struct bio *)done_ccb->ccb_h.ccb_bp; + if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { + error = ndaerror(done_ccb, 0, 0); + if (error == ERESTART) { + /* A retry was scheduled, so just return. */ + cam_periph_unlock(periph); + return; + } + if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) + cam_release_devq(path, + /*relsim_flags*/0, + /*reduction*/0, + /*timeout*/0, + /*getcount_only*/0); + } else { + if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) + panic("REQ_CMP with QFRZN"); + error = 0; + } + bp->bio_error = error; + if (error != 0) { + bp->bio_resid = bp->bio_bcount; + bp->bio_flags |= BIO_ERROR; + } else { + if (state == NDA_CCB_TRIM) + bp->bio_resid = 0; + else + bp->bio_resid = nvmeio->resid; + if (bp->bio_resid > 0) + bp->bio_flags |= BIO_ERROR; + } + if (state == NDA_CCB_TRIM) + free(bp->bio_driver2, M_NVMEDA); + softc->outstanding_cmds--; + + cam_iosched_bio_complete(softc->cam_iosched, bp, done_ccb); + xpt_release_ccb(done_ccb); + if (state == NDA_CCB_TRIM) { +#ifdef notyet + TAILQ_HEAD(, bio) queue; + struct bio *bp1; + + TAILQ_INIT(&queue); + TAILQ_CONCAT(&queue, &softc->trim_req.bps, bio_queue); +#endif + cam_iosched_trim_done(softc->cam_iosched); + ndaschedule(periph); + cam_periph_unlock(periph); +#ifdef notyet +/* Not yet collapsing several BIO_DELETE requests into one TRIM */ + while ((bp1 = TAILQ_FIRST(&queue)) != NULL) { + TAILQ_REMOVE(&queue, bp1, bio_queue); + bp1->bio_error = error; + if (error != 0) { + bp1->bio_flags |= BIO_ERROR; + bp1->bio_resid = bp1->bio_bcount; + } else + bp1->bio_resid = 0; + biodone(bp1); + } +#else + biodone(bp); +#endif + } else { + ndaschedule(periph); + cam_periph_unlock(periph); + biodone(bp); + } + return; + } + case NDA_CCB_DUMP: + /* No-op. We're polling */ + return; + default: + break; + } + xpt_release_ccb(done_ccb); +} + +static int +ndaerror(union ccb *ccb, u_int32_t cam_flags, u_int32_t sense_flags) +{ + struct nda_softc *softc; + struct cam_periph *periph; + + periph = xpt_path_periph(ccb->ccb_h.path); + softc = (struct nda_softc *)periph->softc; + + switch (ccb->ccb_h.status & CAM_STATUS_MASK) { + case CAM_CMD_TIMEOUT: +#ifdef CAM_IO_STATS + softc->timeouts++; +#endif + break; + case CAM_REQ_ABORTED: + case CAM_REQ_CMP_ERR: + case CAM_REQ_TERMIO: + case CAM_UNREC_HBA_ERROR: + case CAM_DATA_RUN_ERR: + case CAM_ATA_STATUS_ERROR: +#ifdef CAM_IO_STATS + softc->errors++; +#endif + break; + default: + break; + } + + return(cam_periph_error(ccb, cam_flags, sense_flags, NULL)); +} + +/* + * Step through all NDA peripheral drivers, and if the device is still open, + * sync the disk cache to physical media. + */ +static void +ndaflush(void) +{ + struct cam_periph *periph; + struct nda_softc *softc; + union ccb *ccb; + int error; + + CAM_PERIPH_FOREACH(periph, &ndadriver) { + softc = (struct nda_softc *)periph->softc; + if (SCHEDULER_STOPPED()) { + /* If we paniced with the lock held, do not recurse. */ + if (!cam_periph_owned(periph) && + (softc->flags & NDA_FLAG_OPEN)) { + ndadump(softc->disk, NULL, 0, 0, 0); + } + continue; + } + cam_periph_lock(periph); + /* + * We only sync the cache if the drive is still open, and + * if the drive is capable of it.. + */ + if ((softc->flags & NDA_FLAG_OPEN) == 0) { + cam_periph_unlock(periph); + continue; + } + + ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL); + nda_nvme_flush(softc, &ccb->nvmeio); + error = cam_periph_runccb(ccb, ndaerror, /*cam_flags*/0, + /*sense_flags*/ SF_NO_RECOVERY | SF_NO_RETRY, + softc->disk->d_devstat); + if (error != 0) + xpt_print(periph->path, "Synchronize cache failed\n"); + xpt_release_ccb(ccb); + cam_periph_unlock(periph); + } +} + +static void +ndashutdown(void *arg, int howto) +{ + + ndaflush(); +} + +static void +ndasuspend(void *arg) +{ + + ndaflush(); +} diff --git a/sys/cam/nvme/nvme_xpt.c b/sys/cam/nvme/nvme_xpt.c new file mode 100644 index 0000000..8410fc3 --- /dev/null +++ b/sys/cam/nvme/nvme_xpt.c @@ -0,0 +1,607 @@ +/*- + * Copyright (c) 2015 Netflix, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification, immediately at the beginning of the file. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * derived from ata_xpt.c: Copyright (c) 2009 Alexander Motin <mav@FreeBSD.org> + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/endian.h> +#include <sys/systm.h> +#include <sys/types.h> +#include <sys/malloc.h> +#include <sys/kernel.h> +#include <sys/time.h> +#include <sys/conf.h> +#include <sys/fcntl.h> +#include <sys/interrupt.h> +#include <sys/sbuf.h> + +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/sysctl.h> + +#include <cam/cam.h> +#include <cam/cam_ccb.h> +#include <cam/cam_queue.h> +#include <cam/cam_periph.h> +#include <cam/cam_sim.h> +#include <cam/cam_xpt.h> +#include <cam/cam_xpt_sim.h> +#include <cam/cam_xpt_periph.h> +#include <cam/cam_xpt_internal.h> +#include <cam/cam_debug.h> + +#include <cam/scsi/scsi_all.h> +#include <cam/scsi/scsi_message.h> +#include <cam/nvme/nvme_all.h> +#include <machine/stdarg.h> /* for xpt_print below */ +#include "opt_cam.h" + +struct nvme_quirk_entry { + u_int quirks; +#define CAM_QUIRK_MAXTAGS 1 + u_int mintags; + u_int maxtags; +}; + +/* Not even sure why we need this */ +static periph_init_t nvme_probe_periph_init; + +static struct periph_driver nvme_probe_driver = +{ + nvme_probe_periph_init, "nvme_probe", + TAILQ_HEAD_INITIALIZER(nvme_probe_driver.units), /* generation */ 0, + CAM_PERIPH_DRV_EARLY +}; + +PERIPHDRIVER_DECLARE(nvme_probe, nvme_probe_driver); + +typedef enum { + NVME_PROBE_IDENTIFY, + NVME_PROBE_DONE, + NVME_PROBE_INVALID, + NVME_PROBE_RESET +} nvme_probe_action; + +static char *nvme_probe_action_text[] = { + "NVME_PROBE_IDENTIFY", + "NVME_PROBE_DONE", + "NVME_PROBE_INVALID", + "NVME_PROBE_RESET", +}; + +#define NVME_PROBE_SET_ACTION(softc, newaction) \ +do { \ + char **text; \ + text = nvme_probe_action_text; \ + CAM_DEBUG((softc)->periph->path, CAM_DEBUG_PROBE, \ + ("Probe %s to %s\n", text[(softc)->action], \ + text[(newaction)])); \ + (softc)->action = (newaction); \ +} while(0) + +typedef enum { + NVME_PROBE_NO_ANNOUNCE = 0x04 +} nvme_probe_flags; + +typedef struct { + TAILQ_HEAD(, ccb_hdr) request_ccbs; + nvme_probe_action action; + nvme_probe_flags flags; + int restart; + struct cam_periph *periph; +} nvme_probe_softc; + +static struct nvme_quirk_entry nvme_quirk_table[] = +{ + { +// { +// T_ANY, SIP_MEDIA_REMOVABLE|SIP_MEDIA_FIXED, +// /*vendor*/"*", /*product*/"*", /*revision*/"*" +// }, + .quirks = 0, .mintags = 0, .maxtags = 0 + }, +}; + +static const int nvme_quirk_table_size = + sizeof(nvme_quirk_table) / sizeof(*nvme_quirk_table); + +static cam_status nvme_probe_register(struct cam_periph *periph, + void *arg); +static void nvme_probe_schedule(struct cam_periph *nvme_probe_periph); +static void nvme_probe_start(struct cam_periph *periph, union ccb *start_ccb); +static void nvme_probe_cleanup(struct cam_periph *periph); +//static void nvme_find_quirk(struct cam_ed *device); +static void nvme_scan_lun(struct cam_periph *periph, + struct cam_path *path, cam_flags flags, + union ccb *ccb); +static struct cam_ed * + nvme_alloc_device(struct cam_eb *bus, struct cam_et *target, + lun_id_t lun_id); +static void nvme_device_transport(struct cam_path *path); +static void nvme_dev_async(u_int32_t async_code, + struct cam_eb *bus, + struct cam_et *target, + struct cam_ed *device, + void *async_arg); +static void nvme_action(union ccb *start_ccb); +static void nvme_announce_periph(struct cam_periph *periph); + +static struct xpt_xport nvme_xport = { + .alloc_device = nvme_alloc_device, + .action = nvme_action, + .async = nvme_dev_async, + .announce = nvme_announce_periph, +}; + +struct xpt_xport * +nvme_get_xport(void) +{ + return (&nvme_xport); +} + +static void +nvme_probe_periph_init() +{ + printf("nvme cam probe device init\n"); +} + +static cam_status +nvme_probe_register(struct cam_periph *periph, void *arg) +{ + union ccb *request_ccb; /* CCB representing the probe request */ + cam_status status; + nvme_probe_softc *softc; + + request_ccb = (union ccb *)arg; + if (request_ccb == NULL) { + printf("nvme_probe_register: no probe CCB, " + "can't register device\n"); + return(CAM_REQ_CMP_ERR); + } + + softc = (nvme_probe_softc *)malloc(sizeof(*softc), M_CAMXPT, M_ZERO | M_NOWAIT); + + if (softc == NULL) { + printf("nvme_probe_register: Unable to probe new device. " + "Unable to allocate softc\n"); + return(CAM_REQ_CMP_ERR); + } + TAILQ_INIT(&softc->request_ccbs); + TAILQ_INSERT_TAIL(&softc->request_ccbs, &request_ccb->ccb_h, + periph_links.tqe); + softc->flags = 0; + periph->softc = softc; + softc->periph = periph; + softc->action = NVME_PROBE_INVALID; + status = cam_periph_acquire(periph); + if (status != CAM_REQ_CMP) { + return (status); + } + CAM_DEBUG(periph->path, CAM_DEBUG_PROBE, ("Probe started\n")); + +// nvme_device_transport(periph->path); + nvme_probe_schedule(periph); + + return(CAM_REQ_CMP); +} + +static void +nvme_probe_schedule(struct cam_periph *periph) +{ + union ccb *ccb; + nvme_probe_softc *softc; + + softc = (nvme_probe_softc *)periph->softc; + ccb = (union ccb *)TAILQ_FIRST(&softc->request_ccbs); + + NVME_PROBE_SET_ACTION(softc, NVME_PROBE_IDENTIFY); + + if (ccb->crcn.flags & CAM_EXPECT_INQ_CHANGE) + softc->flags |= NVME_PROBE_NO_ANNOUNCE; + else + softc->flags &= ~NVME_PROBE_NO_ANNOUNCE; + + xpt_schedule(periph, CAM_PRIORITY_XPT); +} + +static void +nvme_probe_start(struct cam_periph *periph, union ccb *start_ccb) +{ + struct ccb_nvmeio *nvmeio; + struct ccb_scsiio *csio; + nvme_probe_softc *softc; + struct cam_path *path; + const struct nvme_namespace_data *nvme_data; + lun_id_t lun; + + CAM_DEBUG(start_ccb->ccb_h.path, CAM_DEBUG_TRACE, ("nvme_probe_start\n")); + + softc = (nvme_probe_softc *)periph->softc; + path = start_ccb->ccb_h.path; + nvmeio = &start_ccb->nvmeio; + csio = &start_ccb->csio; + nvme_data = periph->path->device->nvme_data; + + if (softc->restart) { + softc->restart = 0; + if (periph->path->device->flags & CAM_DEV_UNCONFIGURED) + NVME_PROBE_SET_ACTION(softc, NVME_PROBE_RESET); + else + NVME_PROBE_SET_ACTION(softc, NVME_PROBE_IDENTIFY); + } + + /* + * Other transports have to ask their SIM to do a lot of action. + * NVMe doesn't, so don't do the dance. Just do things + * directly. + */ + switch (softc->action) { + case NVME_PROBE_RESET: + /* FALLTHROUGH */ + case NVME_PROBE_IDENTIFY: + nvme_device_transport(path); + /* + * Test for lun == CAM_LUN_WILDCARD is lame, but + * appears to be necessary here. XXX + */ + lun = xpt_path_lun_id(periph->path); + if (lun == CAM_LUN_WILDCARD || + periph->path->device->flags & CAM_DEV_UNCONFIGURED) { + path->device->flags &= ~CAM_DEV_UNCONFIGURED; + xpt_acquire_device(path->device); + start_ccb->ccb_h.func_code = XPT_GDEV_TYPE; + xpt_action(start_ccb); + xpt_async(AC_FOUND_DEVICE, path, start_ccb); + } + NVME_PROBE_SET_ACTION(softc, NVME_PROBE_DONE); + break; + default: + panic("nvme_probe_start: invalid action state 0x%x\n", softc->action); + } + /* + * Probing is now done. We need to complete any lingering items + * in the queue, though there shouldn't be any. + */ + xpt_release_ccb(start_ccb); + CAM_DEBUG(periph->path, CAM_DEBUG_PROBE, ("Probe completed\n")); + while ((start_ccb = (union ccb *)TAILQ_FIRST(&softc->request_ccbs))) { + TAILQ_REMOVE(&softc->request_ccbs, + &start_ccb->ccb_h, periph_links.tqe); + start_ccb->ccb_h.status = CAM_REQ_CMP; + xpt_done(start_ccb); + } +// XXX not sure I need this +// XXX unlike other XPTs, we never freeze the queue since we have a super-simple +// XXX state machine + /* Drop freeze taken due to CAM_DEV_QFREEZE flag set. -- did we really do this? */ +// cam_release_devq(path, 0, 0, 0, FALSE); + cam_periph_invalidate(periph); + cam_periph_release_locked(periph); +} + +static void +nvme_probe_cleanup(struct cam_periph *periph) +{ + free(periph->softc, M_CAMXPT); +} + +#if 0 +/* XXX should be used, don't delete */ +static void +nvme_find_quirk(struct cam_ed *device) +{ + struct nvme_quirk_entry *quirk; + caddr_t match; + + match = cam_quirkmatch((caddr_t)&device->nvme_data, + (caddr_t)nvme_quirk_table, + nvme_quirk_table_size, + sizeof(*nvme_quirk_table), nvme_identify_match); + + if (match == NULL) + panic("xpt_find_quirk: device didn't match wildcard entry!!"); + + quirk = (struct nvme_quirk_entry *)match; + device->quirk = quirk; + if (quirk->quirks & CAM_QUIRK_MAXTAGS) { + device->mintags = quirk->mintags; + device->maxtags = quirk->maxtags; + } +} +#endif + +static void +nvme_scan_lun(struct cam_periph *periph, struct cam_path *path, + cam_flags flags, union ccb *request_ccb) +{ + struct ccb_pathinq cpi; + cam_status status; + struct cam_periph *old_periph; + int lock; + + CAM_DEBUG(path, CAM_DEBUG_TRACE, ("nvme_scan_lun\n")); + + xpt_setup_ccb(&cpi.ccb_h, path, CAM_PRIORITY_NONE); + cpi.ccb_h.func_code = XPT_PATH_INQ; + xpt_action((union ccb *)&cpi); + + if (cpi.ccb_h.status != CAM_REQ_CMP) { + if (request_ccb != NULL) { + request_ccb->ccb_h.status = cpi.ccb_h.status; + xpt_done(request_ccb); + } + return; + } + + if (xpt_path_lun_id(path) == CAM_LUN_WILDCARD) { + CAM_DEBUG(path, CAM_DEBUG_TRACE, ("nvme_scan_lun ignoring bus\n")); + request_ccb->ccb_h.status = CAM_REQ_CMP; /* XXX signal error ? */ + xpt_done(request_ccb); + return; + } + + lock = (xpt_path_owned(path) == 0); + if (lock) + xpt_path_lock(path); + if ((old_periph = cam_periph_find(path, "nvme_probe")) != NULL) { + if ((old_periph->flags & CAM_PERIPH_INVALID) == 0) { + nvme_probe_softc *softc; + + softc = (nvme_probe_softc *)old_periph->softc; + TAILQ_INSERT_TAIL(&softc->request_ccbs, + &request_ccb->ccb_h, periph_links.tqe); + softc->restart = 1; + CAM_DEBUG(path, CAM_DEBUG_TRACE, + ("restarting nvme_probe device\n")); + } else { + request_ccb->ccb_h.status = CAM_REQ_CMP_ERR; + CAM_DEBUG(path, CAM_DEBUG_TRACE, + ("Failing to restart nvme_probe device\n")); + xpt_done(request_ccb); + } + } else { + CAM_DEBUG(path, CAM_DEBUG_TRACE, + ("Adding nvme_probe device\n")); + status = cam_periph_alloc(nvme_probe_register, NULL, nvme_probe_cleanup, + nvme_probe_start, "nvme_probe", + CAM_PERIPH_BIO, + request_ccb->ccb_h.path, NULL, 0, + request_ccb); + + if (status != CAM_REQ_CMP) { + xpt_print(path, "xpt_scan_lun: cam_alloc_periph " + "returned an error, can't continue probe\n"); + request_ccb->ccb_h.status = status; + xpt_done(request_ccb); + } + } + if (lock) + xpt_path_unlock(path); +} + +static struct cam_ed * +nvme_alloc_device(struct cam_eb *bus, struct cam_et *target, lun_id_t lun_id) +{ + struct nvme_quirk_entry *quirk; + struct cam_ed *device; + + device = xpt_alloc_device(bus, target, lun_id); + if (device == NULL) + return (NULL); + + /* + * Take the default quirk entry until we have inquiry + * data from nvme and can determine a better quirk to use. + */ + quirk = &nvme_quirk_table[nvme_quirk_table_size - 1]; + device->quirk = (void *)quirk; + device->mintags = 0; + device->maxtags = 0; + device->inq_flags = 0; + device->queue_flags = 0; + device->device_id = NULL; /* XXX Need to set this somewhere */ + device->device_id_len = 0; + device->serial_num = NULL; /* XXX Need to set this somewhere */ + device->serial_num_len = 0; + return (device); +} + +static void +nvme_device_transport(struct cam_path *path) +{ + struct ccb_pathinq cpi; + struct ccb_trans_settings cts; + /* XXX get data from nvme namespace and other info ??? */ + + /* Get transport information from the SIM */ + xpt_setup_ccb(&cpi.ccb_h, path, CAM_PRIORITY_NONE); + cpi.ccb_h.func_code = XPT_PATH_INQ; + xpt_action((union ccb *)&cpi); + + path->device->transport = cpi.transport; + path->device->transport_version = cpi.transport_version; + + path->device->protocol = cpi.protocol; + path->device->protocol_version = cpi.protocol_version; + + /* Tell the controller what we think */ + xpt_setup_ccb(&cts.ccb_h, path, CAM_PRIORITY_NONE); + cts.ccb_h.func_code = XPT_SET_TRAN_SETTINGS; + cts.type = CTS_TYPE_CURRENT_SETTINGS; + cts.transport = path->device->transport; + cts.transport_version = path->device->transport_version; + cts.protocol = path->device->protocol; + cts.protocol_version = path->device->protocol_version; + cts.proto_specific.valid = 0; + cts.xport_specific.valid = 0; + xpt_action((union ccb *)&cts); +} + +static void +nvme_dev_advinfo(union ccb *start_ccb) +{ + struct cam_ed *device; + struct ccb_dev_advinfo *cdai; + off_t amt; + + start_ccb->ccb_h.status = CAM_REQ_INVALID; + device = start_ccb->ccb_h.path->device; + cdai = &start_ccb->cdai; + switch(cdai->buftype) { + case CDAI_TYPE_SCSI_DEVID: + if (cdai->flags & CDAI_FLAG_STORE) + return; + cdai->provsiz = device->device_id_len; + if (device->device_id_len == 0) + break; + amt = device->device_id_len; + if (cdai->provsiz > cdai->bufsiz) + amt = cdai->bufsiz; + memcpy(cdai->buf, device->device_id, amt); + break; + case CDAI_TYPE_SERIAL_NUM: + if (cdai->flags & CDAI_FLAG_STORE) + return; + cdai->provsiz = device->serial_num_len; + if (device->serial_num_len == 0) + break; + amt = device->serial_num_len; + if (cdai->provsiz > cdai->bufsiz) + amt = cdai->bufsiz; + memcpy(cdai->buf, device->serial_num, amt); + break; + case CDAI_TYPE_PHYS_PATH: + if (cdai->flags & CDAI_FLAG_STORE) { + if (device->physpath != NULL) + free(device->physpath, M_CAMXPT); + device->physpath_len = cdai->bufsiz; + /* Clear existing buffer if zero length */ + if (cdai->bufsiz == 0) + break; + device->physpath = malloc(cdai->bufsiz, M_CAMXPT, M_NOWAIT); + if (device->physpath == NULL) { + start_ccb->ccb_h.status = CAM_REQ_ABORTED; + return; + } + memcpy(device->physpath, cdai->buf, cdai->bufsiz); + } else { + cdai->provsiz = device->physpath_len; + if (device->physpath_len == 0) + break; + amt = device->physpath_len; + if (cdai->provsiz > cdai->bufsiz) + amt = cdai->bufsiz; + memcpy(cdai->buf, device->physpath, amt); + } + break; + default: + return; + } + start_ccb->ccb_h.status = CAM_REQ_CMP; + + if (cdai->flags & CDAI_FLAG_STORE) { + xpt_async(AC_ADVINFO_CHANGED, start_ccb->ccb_h.path, + (void *)(uintptr_t)cdai->buftype); + } +} + +static void +nvme_action(union ccb *start_ccb) +{ + CAM_DEBUG(start_ccb->ccb_h.path, CAM_DEBUG_TRACE, + ("nvme_action: func= %#x\n", start_ccb->ccb_h.func_code)); + + switch (start_ccb->ccb_h.func_code) { + case XPT_SCAN_BUS: + printf("NVME scan BUS started -- ignored\n"); +// break; + case XPT_SCAN_TGT: + printf("NVME scan TGT started -- ignored\n"); +// break; + case XPT_SCAN_LUN: + printf("NVME scan started\n"); + nvme_scan_lun(start_ccb->ccb_h.path->periph, + start_ccb->ccb_h.path, start_ccb->crcn.flags, + start_ccb); + break; + case XPT_DEV_ADVINFO: + nvme_dev_advinfo(start_ccb); + break; + + default: + xpt_action_default(start_ccb); + break; + } +} + +/* + * Handle any per-device event notifications that require action by the XPT. + */ +static void +nvme_dev_async(u_int32_t async_code, struct cam_eb *bus, struct cam_et *target, + struct cam_ed *device, void *async_arg) +{ + + /* + * We only need to handle events for real devices. + */ + if (target->target_id == CAM_TARGET_WILDCARD + || device->lun_id == CAM_LUN_WILDCARD) + return; + + if (async_code == AC_LOST_DEVICE && + (device->flags & CAM_DEV_UNCONFIGURED) == 0) { + device->flags |= CAM_DEV_UNCONFIGURED; + xpt_release_device(device); + } +} + +static void +nvme_announce_periph(struct cam_periph *periph) +{ + struct ccb_pathinq cpi; + struct ccb_trans_settings cts; + struct cam_path *path = periph->path; + + cam_periph_assert(periph, MA_OWNED); + + xpt_setup_ccb(&cts.ccb_h, path, CAM_PRIORITY_NORMAL); + cts.ccb_h.func_code = XPT_GET_TRAN_SETTINGS; + cts.type = CTS_TYPE_CURRENT_SETTINGS; + xpt_action((union ccb*)&cts); + if ((cts.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) + return; + /* Ask the SIM for its base transfer speed */ + xpt_setup_ccb(&cpi.ccb_h, path, CAM_PRIORITY_NORMAL); + cpi.ccb_h.func_code = XPT_PATH_INQ; + xpt_action((union ccb *)&cpi); + /* XXX NVME STUFF HERE */ + printf("\n"); +} diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c index aa05b70..deef952 100644 --- a/sys/cam/scsi/scsi_da.c +++ b/sys/cam/scsi/scsi_da.c @@ -2377,6 +2377,7 @@ daregister(struct cam_periph *periph, void *arg) if (cam_iosched_init(&softc->cam_iosched, periph) != 0) { printf("daregister: Unable to probe new device. " "Unable to allocate iosched memory\n"); + free(softc, M_DEVBUF); return(CAM_REQ_CMP_ERR); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c index 3bb9146..cdc8009 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c @@ -2249,6 +2249,7 @@ arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr) ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, hdr->b_l1hdr.b_buf->b_data); ASSERT3U(hdr->b_l2hdr.b_compress, ==, ZIO_COMPRESS_OFF); + hdr->b_l1hdr.b_tmp_cdata = NULL; return; } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index a987138..c632618 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -7170,16 +7170,24 @@ zfs_vptocnp(struct vop_vptocnp_args *ap) int ltype; int error; + ZFS_ENTER(zfsvfs); + ZFS_VERIFY_ZP(zp); + /* * If we are a snapshot mounted under .zfs, run the operation * on the covered vnode. */ if ((error = sa_lookup(zp->z_sa_hdl, - SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) + SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) { + ZFS_EXIT(zfsvfs); return (error); + } - if (zp->z_id != parent || zfsvfs->z_parent == zfsvfs) + if (zp->z_id != parent || zfsvfs->z_parent == zfsvfs) { + ZFS_EXIT(zfsvfs); return (vop_stdvptocnp(ap)); + } + ZFS_EXIT(zfsvfs); covered_vp = vp->v_mount->mnt_vnodecovered; vhold(covered_vp); diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c index 2cd665a..56b2ade 100644 --- a/sys/compat/linprocfs/linprocfs.c +++ b/sys/compat/linprocfs/linprocfs.c @@ -490,7 +490,6 @@ linprocfs_doswaps(PFS_FILL_ARGS) char devname[SPECNAMELEN + 1]; sbuf_printf(sb, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n"); - mtx_lock(&Giant); for (n = 0; ; n++) { if (swap_dev_info(n, &xsw, devname, sizeof(devname)) != 0) break; @@ -504,7 +503,6 @@ linprocfs_doswaps(PFS_FILL_ARGS) sbuf_printf(sb, "/dev/%-34s unknown\t\t%jd\t%jd\t-1\n", devname, total, used); } - mtx_unlock(&Giant); return (0); } @@ -1326,13 +1324,13 @@ linprocfs_dofilesystems(PFS_FILL_ARGS) { struct vfsconf *vfsp; - mtx_lock(&Giant); + vfsconf_slock(); TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { if (vfsp->vfc_flags & VFCF_SYNTHETIC) sbuf_printf(sb, "nodev"); sbuf_printf(sb, "\t%s\n", vfsp->vfc_name); } - mtx_unlock(&Giant); + vfsconf_sunlock(); return(0); } diff --git a/sys/compat/linuxkpi/common/include/linux/etherdevice.h b/sys/compat/linuxkpi/common/include/linux/etherdevice.h index c581367..2c4a488 100644 --- a/sys/compat/linuxkpi/common/include/linux/etherdevice.h +++ b/sys/compat/linuxkpi/common/include/linux/etherdevice.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2015 Mellanox Technologies, Ltd. All rights reserved. + * Copyright (c) 2015-2016 Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -102,7 +102,8 @@ eth_broadcast_addr(u8 *pa) static inline void random_ether_addr(u8 * dst) { - read_random(dst, 6); + if (read_random(dst, 6) == 0) + arc4rand(dst, 6, 0); dst[0] &= 0xfe; dst[0] |= 0x02; diff --git a/sys/compat/linuxkpi/common/include/linux/random.h b/sys/compat/linuxkpi/common/include/linux/random.h index caae7b3..28475ed 100644 --- a/sys/compat/linuxkpi/common/include/linux/random.h +++ b/sys/compat/linuxkpi/common/include/linux/random.h @@ -2,7 +2,7 @@ * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. - * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. + * Copyright (c) 2013-2016 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,11 +32,13 @@ #define _LINUX_RANDOM_H_ #include <sys/random.h> +#include <sys/libkern.h> static inline void get_random_bytes(void *buf, int nbytes) { - read_random(buf, nbytes); + if (read_random(buf, nbytes) == 0) + arc4rand(buf, nbytes, 0); } #endif /* _LINUX_RANDOM_H_ */ diff --git a/sys/compat/linuxkpi/common/src/linux_idr.c b/sys/compat/linuxkpi/common/src/linux_idr.c index 5a02119..99e45e3 100644 --- a/sys/compat/linuxkpi/common/src/linux_idr.c +++ b/sys/compat/linuxkpi/common/src/linux_idr.c @@ -292,7 +292,8 @@ idr_get(struct idr *idr) return (il); } il = malloc(sizeof(*il), M_IDR, M_ZERO | M_NOWAIT); - bitmap_fill(&il->bitmap, IDR_SIZE); + if (il != NULL) + bitmap_fill(&il->bitmap, IDR_SIZE); return (il); } diff --git a/sys/conf/files b/sys/conf/files index a364a62..7982fd7 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -86,6 +86,9 @@ cam/cam_xpt.c optional scbus cam/ata/ata_all.c optional scbus cam/ata/ata_xpt.c optional scbus cam/ata/ata_pmp.c optional scbus +cam/nvme/nvme_all.c optional scbus nvme +cam/nvme/nvme_da.c optional scbus nvme da !nvd +cam/nvme/nvme_xpt.c optional scbus nvme cam/scsi/scsi_xpt.c optional scbus cam/scsi/scsi_all.c optional scbus cam/scsi/scsi_cd.c optional cd diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index 53c299f..4949d71 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -290,6 +290,7 @@ dev/nvme/nvme_ctrlr_cmd.c optional nvme dev/nvme/nvme_ns.c optional nvme dev/nvme/nvme_ns_cmd.c optional nvme dev/nvme/nvme_qpair.c optional nvme +dev/nvme/nvme_sim.c optional nvme scbus !nvd dev/nvme/nvme_sysctl.c optional nvme dev/nvme/nvme_test.c optional nvme dev/nvme/nvme_util.c optional nvme diff --git a/sys/conf/kern.opts.mk b/sys/conf/kern.opts.mk index 7b8f11c..343b4f8 100644 --- a/sys/conf/kern.opts.mk +++ b/sys/conf/kern.opts.mk @@ -45,6 +45,7 @@ __DEFAULT_YES_OPTIONS = \ __DEFAULT_NO_OPTIONS = \ EISA \ + EXTRA_TCP_STACKS \ NAND \ OFED diff --git a/sys/conf/kern.post.mk b/sys/conf/kern.post.mk index 70abb10..8c5a713 100644 --- a/sys/conf/kern.post.mk +++ b/sys/conf/kern.post.mk @@ -23,6 +23,10 @@ MKMODULESENV+= CONF_CFLAGS="${CONF_CFLAGS}" MKMODULESENV+= WITH_CTF="${WITH_CTF}" .endif +.if defined(WITH_EXTRA_TCP_STACKS) +MKMODULESENV+= WITH_EXTRA_TCP_STACKS="${WITH_EXTRA_TCP_STACKS}" +.endif + # Allow overriding the kernel debug directory, so kernel and user debug may be # installed in different directories. Setting it to "" restores the historical # behavior of installing debug files in the kernel directory. diff --git a/sys/conf/newvers.sh b/sys/conf/newvers.sh index c011b6e..2d89104 100644 --- a/sys/conf/newvers.sh +++ b/sys/conf/newvers.sh @@ -32,7 +32,7 @@ TYPE="FreeBSD" REVISION="11.0" -BRANCH="ALPHA2" +BRANCH="ALPHA3" if [ -n "${BRANCH_OVERRIDE}" ]; then BRANCH=${BRANCH_OVERRIDE} fi diff --git a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300.h b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300.h index eccafba..8dfc690 100644 --- a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300.h +++ b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300.h @@ -898,6 +898,8 @@ struct ath_hal_9300 { struct ar9300NfLimits nf_2GHz; struct ar9300NfLimits nf_5GHz; struct ar9300NfLimits *nfp; + + uint32_t ah_beaconInterval; }; #define AH9300(_ah) ((struct ath_hal_9300 *)(_ah)) diff --git a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c index e914057..0e12a3d 100644 --- a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c +++ b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c @@ -774,8 +774,7 @@ ar9300_beacon_set_beacon_timers(struct ath_hal *ah, OS_REG_WRITE(ah, AR_NEXT_NDP_TIMER, TU_TO_USEC(bt->bt_nextatim)); bperiod = TU_TO_USEC(bt->bt_intval & HAL_BEACON_PERIOD); - /* XXX TODO! */ -// ahp->ah_beaconInterval = bt->bt_intval & HAL_BEACON_PERIOD; + AH9300(ah)->ah_beaconInterval = bt->bt_intval & HAL_BEACON_PERIOD; OS_REG_WRITE(ah, AR_BEACON_PERIOD, bperiod); OS_REG_WRITE(ah, AR_DMA_BEACON_PERIOD, bperiod); OS_REG_WRITE(ah, AR_SWBA_PERIOD, bperiod); diff --git a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_xmit.c b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_xmit.c index 530b29a..f6ae73e 100644 --- a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_xmit.c +++ b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_xmit.c @@ -288,6 +288,7 @@ ar9300_reset_tx_queue(struct ath_hal *ah, u_int q) const struct ieee80211_channel *chan = AH_PRIVATE(ah)->ah_curchan; HAL_TX_QUEUE_INFO *qi; u_int32_t cw_min, chan_cw_min, value; + uint32_t qmisc, dmisc; if (q >= p_cap->halTotalQueues) { HALDEBUG(ah, HAL_DEBUG_QUEUE, "%s: invalid queue num %u\n", __func__, q); @@ -335,17 +336,15 @@ ar9300_reset_tx_queue(struct ath_hal *ah, u_int q) SM(qi->tqi_shretry, AR_D_RETRY_LIMIT_FR_SH)); /* enable early termination on the QCU */ - OS_REG_WRITE(ah, AR_QMISC(q), AR_Q_MISC_DCU_EARLY_TERM_REQ); + qmisc = AR_Q_MISC_DCU_EARLY_TERM_REQ; /* enable DCU to wait for next fragment from QCU */ if (AR_SREV_WASP(ah) && (AH_PRIVATE((ah))->ah_macRev <= AR_SREV_REVISION_WASP_12)) { /* WAR for EV#85395: Wasp Rx overrun issue - reduces Tx queue backoff * threshold to 1 to avoid Rx overruns - Fixed in Wasp 1.3 */ - OS_REG_WRITE(ah, AR_DMISC(q), - AR_D_MISC_CW_BKOFF_EN | AR_D_MISC_FRAG_WAIT_EN | 0x1); + dmisc = AR_D_MISC_CW_BKOFF_EN | AR_D_MISC_FRAG_WAIT_EN | 0x1; } else { - OS_REG_WRITE(ah, AR_DMISC(q), - AR_D_MISC_CW_BKOFF_EN | AR_D_MISC_FRAG_WAIT_EN | 0x2); + dmisc = AR_D_MISC_CW_BKOFF_EN | AR_D_MISC_FRAG_WAIT_EN | 0x2; } /* multiqueue support */ @@ -355,11 +354,9 @@ ar9300_reset_tx_queue(struct ath_hal *ah, u_int q) SM(qi->tqi_cbrPeriod, AR_Q_CBRCFG_INTERVAL) | SM(qi->tqi_cbrOverflowLimit, AR_Q_CBRCFG_OVF_THRESH)); - OS_REG_WRITE(ah, AR_QMISC(q), - OS_REG_READ(ah, AR_QMISC(q)) | - AR_Q_MISC_FSP_CBR | + qmisc |= AR_Q_MISC_FSP_CBR | (qi->tqi_cbrOverflowLimit ? - AR_Q_MISC_CBR_EXP_CNTR_LIMIT_EN : 0)); + AR_Q_MISC_CBR_EXP_CNTR_LIMIT_EN : 0); } if (qi->tqi_readyTime && (qi->tqi_type != HAL_TX_QUEUE_CAB)) { @@ -374,34 +371,27 @@ ar9300_reset_tx_queue(struct ath_hal *ah, u_int q) if (qi->tqi_burstTime && (qi->tqi_qflags & HAL_TXQ_RDYTIME_EXP_POLICY_ENABLE)) { - OS_REG_WRITE(ah, AR_QMISC(q), OS_REG_READ(ah, AR_QMISC(q)) | - AR_Q_MISC_RDYTIME_EXP_POLICY); + qmisc |= AR_Q_MISC_RDYTIME_EXP_POLICY; } if (qi->tqi_qflags & HAL_TXQ_BACKOFF_DISABLE) { - OS_REG_WRITE(ah, AR_DMISC(q), OS_REG_READ(ah, AR_DMISC(q)) | - AR_D_MISC_POST_FR_BKOFF_DIS); + dmisc |= AR_D_MISC_POST_FR_BKOFF_DIS; } if (qi->tqi_qflags & HAL_TXQ_FRAG_BURST_BACKOFF_ENABLE) { - OS_REG_WRITE(ah, AR_DMISC(q), OS_REG_READ(ah, AR_DMISC(q)) | - AR_D_MISC_FRAG_BKOFF_EN); + dmisc |= AR_D_MISC_FRAG_BKOFF_EN; } switch (qi->tqi_type) { case HAL_TX_QUEUE_BEACON: /* beacon frames */ - OS_REG_WRITE(ah, AR_QMISC(q), - OS_REG_READ(ah, AR_QMISC(q)) - | AR_Q_MISC_FSP_DBA_GATED + qmisc |= AR_Q_MISC_FSP_DBA_GATED | AR_Q_MISC_BEACON_USE - | AR_Q_MISC_CBR_INCR_DIS1); + | AR_Q_MISC_CBR_INCR_DIS1; - OS_REG_WRITE(ah, AR_DMISC(q), - OS_REG_READ(ah, AR_DMISC(q)) - | (AR_D_MISC_ARB_LOCKOUT_CNTRL_GLOBAL << + dmisc |= (AR_D_MISC_ARB_LOCKOUT_CNTRL_GLOBAL << AR_D_MISC_ARB_LOCKOUT_CNTRL_S) | AR_D_MISC_BEACON_USE - | AR_D_MISC_POST_FR_BKOFF_DIS); + | AR_D_MISC_POST_FR_BKOFF_DIS; /* XXX cwmin and cwmax should be 0 for beacon queue */ if (AH_PRIVATE(ah)->ah_opmode != HAL_M_IBSS) { OS_REG_WRITE(ah, AR_DLCL_IFS(q), SM(0, AR_D_LCL_IFS_CWMIN) @@ -416,21 +406,36 @@ ar9300_reset_tx_queue(struct ath_hal *ah, u_int q) * not properly refreshing the Tx descriptor if * the TXE clear setting is used. */ - OS_REG_WRITE(ah, AR_QMISC(q), - OS_REG_READ(ah, AR_QMISC(q)) - | AR_Q_MISC_FSP_DBA_GATED + qmisc |= AR_Q_MISC_FSP_DBA_GATED | AR_Q_MISC_CBR_INCR_DIS1 - | AR_Q_MISC_CBR_INCR_DIS0); + | AR_Q_MISC_CBR_INCR_DIS0; - value = TU_TO_USEC(qi->tqi_readyTime) - - (ah->ah_config.ah_sw_beacon_response_time - - ah->ah_config.ah_dma_beacon_response_time) - - ah->ah_config.ah_additional_swba_backoff; - OS_REG_WRITE(ah, AR_QRDYTIMECFG(q), value | AR_Q_RDYTIMECFG_EN); + if (qi->tqi_readyTime) { + OS_REG_WRITE(ah, AR_QRDYTIMECFG(q), + SM(qi->tqi_readyTime, AR_Q_RDYTIMECFG_DURATION) | + AR_Q_RDYTIMECFG_EN); + } else { + + value = (ahp->ah_beaconInterval * 50 / 100) + - ah->ah_config.ah_additional_swba_backoff + - ah->ah_config.ah_sw_beacon_response_time + + ah->ah_config.ah_dma_beacon_response_time; + /* + * XXX Ensure it isn't too low - nothing lower + * XXX than 10 TU + */ + if (value < 10) + value = 10; + HALDEBUG(ah, HAL_DEBUG_TXQUEUE, + "%s: defaulting to rdytime = %d uS\n", + __func__, value); + OS_REG_WRITE(ah, AR_QRDYTIMECFG(q), + SM(TU_TO_USEC(value), AR_Q_RDYTIMECFG_DURATION) | + AR_Q_RDYTIMECFG_EN); + } - OS_REG_WRITE(ah, AR_DMISC(q), OS_REG_READ(ah, AR_DMISC(q)) - | (AR_D_MISC_ARB_LOCKOUT_CNTRL_GLOBAL << - AR_D_MISC_ARB_LOCKOUT_CNTRL_S)); + dmisc |= (AR_D_MISC_ARB_LOCKOUT_CNTRL_GLOBAL << + AR_D_MISC_ARB_LOCKOUT_CNTRL_S); break; case HAL_TX_QUEUE_PSPOLL: /* @@ -441,12 +446,10 @@ ar9300_reset_tx_queue(struct ath_hal *ah, u_int q) * non-TIM elements and send PS-poll PS poll processing * will be done in software */ - OS_REG_WRITE(ah, AR_QMISC(q), - OS_REG_READ(ah, AR_QMISC(q)) | AR_Q_MISC_CBR_INCR_DIS1); + qmisc |= AR_Q_MISC_CBR_INCR_DIS1; break; case HAL_TX_QUEUE_UAPSD: - OS_REG_WRITE(ah, AR_DMISC(q), OS_REG_READ(ah, AR_DMISC(q)) - | AR_D_MISC_POST_FR_BKOFF_DIS); + dmisc |= AR_D_MISC_POST_FR_BKOFF_DIS; break; default: /* NB: silence compiler */ break; @@ -461,15 +464,15 @@ ar9300_reset_tx_queue(struct ath_hal *ah, u_int q) * queue_info->dcumode. */ if (qi->tqi_intFlags & HAL_TXQ_USE_LOCKOUT_BKOFF_DIS) { - OS_REG_WRITE(ah, AR_DMISC(q), - OS_REG_READ(ah, AR_DMISC(q)) | - SM(AR_D_MISC_ARB_LOCKOUT_CNTRL_GLOBAL, + dmisc |= SM(AR_D_MISC_ARB_LOCKOUT_CNTRL_GLOBAL, AR_D_MISC_ARB_LOCKOUT_CNTRL) | - AR_D_MISC_POST_FR_BKOFF_DIS); + AR_D_MISC_POST_FR_BKOFF_DIS; } #endif OS_REG_WRITE(ah, AR_Q_DESC_CRCCHK, AR_Q_DESC_CRCCHK_EN); + OS_REG_WRITE(ah, AR_QMISC(q), qmisc); + OS_REG_WRITE(ah, AR_DMISC(q), dmisc); /* * Always update the secondary interrupt mask registers - this diff --git a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300reg.h b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300reg.h index a5e0f79..2ec8625 100644 --- a/sys/contrib/dev/ath/ath_hal/ar9300/ar9300reg.h +++ b/sys/contrib/dev/ath/ath_hal/ar9300/ar9300reg.h @@ -506,6 +506,7 @@ #define AR_Q0_MISC AR_MAC_QCU_OFFSET(MAC_QCU_MISC) #define AR_QMISC(_i) (AR_Q0_MISC + ((_i)<<2)) #define AR_Q_MISC_FSP 0x0000000F // Mask for Frame Scheduling Policy +#define AR_Q_MISC_FSP_S 0 #define AR_Q_MISC_FSP_ASAP 0 // ASAP #define AR_Q_MISC_FSP_CBR 1 // CBR #define AR_Q_MISC_FSP_DBA_GATED 2 // DMA Beacon Alert gated diff --git a/sys/dev/acpica/acpi_thermal.c b/sys/dev/acpica/acpi_thermal.c index 5b01de5..fa1c2e8 100644 --- a/sys/dev/acpica/acpi_thermal.c +++ b/sys/dev/acpica/acpi_thermal.c @@ -311,21 +311,44 @@ acpi_tz_attach(device_t dev) "thermal sampling period for passive cooling"); /* - * Create thread to service all of the thermal zones. Register - * our power profile event handler. + * Register our power profile event handler. */ sc->tz_event = EVENTHANDLER_REGISTER(power_profile_change, acpi_tz_power_profile, sc, 0); - if (acpi_tz_proc == NULL) { - error = kproc_create(acpi_tz_thread, NULL, &acpi_tz_proc, - RFHIGHPID, 0, "acpi_thermal"); - if (error != 0) { - device_printf(sc->tz_dev, "could not create thread - %d", error); - goto out; - } + + /* + * Flag the event handler for a manual invocation by our timeout. + * We defer it like this so that the rest of the subsystem has time + * to come up. Don't bother evaluating/printing the temperature at + * this point; on many systems it'll be bogus until the EC is running. + */ + sc->tz_flags |= TZ_FLAG_GETPROFILE; + + return_VALUE (0); +} + +static void +acpi_tz_startup(void *arg __unused) +{ + struct acpi_tz_softc *sc; + device_t *devs; + int devcount, error, i; + + devclass_get_devices(acpi_tz_devclass, &devs, &devcount); + if (devcount == 0) { + free(devs, M_TEMP); + return; } /* + * Create thread to service all of the thermal zones. + */ + error = kproc_create(acpi_tz_thread, NULL, &acpi_tz_proc, RFHIGHPID, 0, + "acpi_thermal"); + if (error != 0) + printf("acpi_tz: could not create thread - %d", error); + + /* * Create a thread to handle passive cooling for 1st zone which * has _PSV, _TSP, _TC1 and _TC2. Users can enable it for other * zones manually for now. @@ -335,34 +358,22 @@ acpi_tz_attach(device_t dev) * given frequency whereas it's possible for different thermal * zones to specify independent settings for multiple CPUs. */ - if (acpi_tz_cooling_unit < 0 && acpi_tz_cooling_is_available(sc)) - sc->tz_cooling_enabled = TRUE; - if (sc->tz_cooling_enabled) { - error = acpi_tz_cooling_thread_start(sc); - if (error != 0) { - sc->tz_cooling_enabled = FALSE; - goto out; + for (i = 0; i < devcount; i++) { + sc = device_get_softc(devs[i]); + if (acpi_tz_cooling_is_available(sc)) { + sc->tz_cooling_enabled = TRUE; + error = acpi_tz_cooling_thread_start(sc); + if (error != 0) { + sc->tz_cooling_enabled = FALSE; + break; + } + acpi_tz_cooling_unit = device_get_unit(devs[i]); + break; } - acpi_tz_cooling_unit = device_get_unit(dev); - } - - /* - * Flag the event handler for a manual invocation by our timeout. - * We defer it like this so that the rest of the subsystem has time - * to come up. Don't bother evaluating/printing the temperature at - * this point; on many systems it'll be bogus until the EC is running. - */ - sc->tz_flags |= TZ_FLAG_GETPROFILE; - -out: - if (error != 0) { - EVENTHANDLER_DEREGISTER(power_profile_change, sc->tz_event); - AcpiRemoveNotifyHandler(sc->tz_handle, ACPI_DEVICE_NOTIFY, - acpi_tz_notify_handler); - sysctl_ctx_free(&sc->tz_sysctl_ctx); } - return_VALUE (error); + free(devs, M_TEMP); } +SYSINIT(acpi_tz, SI_SUB_KICK_SCHEDULER, SI_ORDER_ANY, acpi_tz_startup, NULL); /* * Parse the current state of this thermal zone and set up to use it. diff --git a/sys/dev/ath/if_ath_alq.h b/sys/dev/ath/if_ath_alq.h index 9f6ceac..988882d 100644 --- a/sys/dev/ath/if_ath_alq.h +++ b/sys/dev/ath/if_ath_alq.h @@ -113,6 +113,14 @@ struct if_ath_alq_mib_counters { #define ATH_ALQ_STUCK_BEACON 13 #define ATH_ALQ_RESUME_BEACON 14 +#define ATH_ALQ_TX_FIFO_PUSH 15 +struct if_ath_alq_tx_fifo_push { + uint32_t txq; + uint32_t nframes; + uint32_t fifo_depth; + uint32_t frame_cnt; +}; + /* * These will always be logged, regardless. */ diff --git a/sys/dev/ath/if_ath_sysctl.c b/sys/dev/ath/if_ath_sysctl.c index ab829df..749285a6a 100644 --- a/sys/dev/ath/if_ath_sysctl.c +++ b/sys/dev/ath/if_ath_sysctl.c @@ -1282,6 +1282,9 @@ ath_sysctl_stats_attach(struct ath_softc *sc) &sc->sc_stats.ast_rx_keymiss, 0, ""); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "ast_tx_swfiltered", CTLFLAG_RD, &sc->sc_stats.ast_tx_swfiltered, 0, ""); + SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "ast_tx_nodeq_overflow", + CTLFLAG_RD, &sc->sc_stats.ast_tx_nodeq_overflow, 0, + "tx dropped 'cuz nodeq overflow"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "ast_rx_stbc", CTLFLAG_RD, &sc->sc_stats.ast_rx_stbc, 0, "Number of STBC frames received"); diff --git a/sys/dev/bhnd/bcma/bcma_nexus.c b/sys/dev/bhnd/bcma/bcma_nexus.c index 7cc8c26..425e110 100644 --- a/sys/dev/bhnd/bcma/bcma_nexus.c +++ b/sys/dev/bhnd/bcma/bcma_nexus.c @@ -135,4 +135,5 @@ static device_method_t bcma_nexus_methods[] = { DEFINE_CLASS_2(bhnd, bcma_nexus_driver, bcma_nexus_methods, sizeof(struct bcma_nexus_softc), bhnd_nexus_driver, bcma_driver); -DRIVER_MODULE(bcma_nexus, nexus, bcma_nexus_driver, bhnd_devclass, 0, 0); +EARLY_DRIVER_MODULE(bcma_nexus, nexus, bcma_nexus_driver, bhnd_devclass, 0, 0, + BUS_PASS_BUS + BUS_PASS_ORDER_MIDDLE); diff --git a/sys/dev/bhnd/bhnd.h b/sys/dev/bhnd/bhnd.h index 53215cf..f011237 100644 --- a/sys/dev/bhnd/bhnd.h +++ b/sys/dev/bhnd/bhnd.h @@ -232,16 +232,8 @@ struct bhnd_device { BHND_COREID_ ## _device) }, _desc, _quirks, \ _flags } -#define BHND_MIPS_DEVICE(_device, _desc, _quirks, ...) \ - _BHND_DEVICE(MIPS, _device, _desc, _quirks, \ - ## __VA_ARGS__, 0) - -#define BHND_ARM_DEVICE(_device, _desc, _quirks, ...) \ - _BHND_DEVICE(ARM, _device, _desc, _quirks, \ - ## __VA_ARGS__, 0) - -#define BHND_DEVICE(_device, _desc, _quirks, ...) \ - _BHND_DEVICE(BCM, _device, _desc, _quirks, \ +#define BHND_DEVICE(_vendor, _device, _desc, _quirks, ...) \ + _BHND_DEVICE(_vendor, _device, _desc, _quirks, \ ## __VA_ARGS__, 0) #define BHND_DEVICE_END { { BHND_MATCH_ANY }, NULL, NULL, 0 } diff --git a/sys/dev/bhnd/bhnd_subr.c b/sys/dev/bhnd/bhnd_subr.c index 67b11cb..9a57995 100644 --- a/sys/dev/bhnd/bhnd_subr.c +++ b/sys/dev/bhnd/bhnd_subr.c @@ -86,7 +86,7 @@ static const struct bhnd_core_desc { BHND_CDESC(BCM, APHY, WLAN_PHY, "802.11a PHY"), BHND_CDESC(BCM, BPHY, WLAN_PHY, "802.11b PHY"), BHND_CDESC(BCM, GPHY, WLAN_PHY, "802.11g PHY"), - BHND_CDESC(BCM, MIPS33, CPU, "MIPS 3302 Core"), + BHND_CDESC(BCM, MIPS33, CPU, "MIPS3302 Core"), BHND_CDESC(BCM, USB11H, OTHER, "USB 1.1 Host Controller"), BHND_CDESC(BCM, USB11D, OTHER, "USB 1.1 Device Core"), BHND_CDESC(BCM, USB20H, OTHER, "USB 2.0 Host Controller"), @@ -108,7 +108,7 @@ static const struct bhnd_core_desc { BHND_CDESC(BCM, SDIOD, OTHER, "SDIO Device Core"), BHND_CDESC(BCM, ARMCM3, CPU, "ARM Cortex-M3 CPU"), BHND_CDESC(BCM, HTPHY, WLAN_PHY, "802.11n 4x4 PHY"), - BHND_CDESC(BCM, MIPS74K, CPU, "MIPS74k CPU"), + BHND_CDESC(MIPS,MIPS74K, CPU, "MIPS74k CPU"), BHND_CDESC(BCM, GMAC, ENET_MAC, "Gigabit MAC core"), BHND_CDESC(BCM, DMEMC, MEMC, "DDR1/DDR2 Memory Controller"), BHND_CDESC(BCM, PCIERC, OTHER, "PCIe Root Complex"), diff --git a/sys/dev/bhnd/cores/chipc/chipc.c b/sys/dev/bhnd/cores/chipc/chipc.c index b30cec1..41fccde 100644 --- a/sys/dev/bhnd/cores/chipc/chipc.c +++ b/sys/dev/bhnd/cores/chipc/chipc.c @@ -84,7 +84,7 @@ static struct bhnd_device_quirk chipc_quirks[]; /* Supported device identifiers */ static const struct bhnd_device chipc_devices[] = { - BHND_DEVICE(CC, NULL, chipc_quirks), + BHND_DEVICE(BCM, CC, NULL, chipc_quirks), BHND_DEVICE_END }; diff --git a/sys/dev/bhnd/cores/pci/bhnd_pci.c b/sys/dev/bhnd/cores/pci/bhnd_pci.c index 2445e65..87ff4bd 100644 --- a/sys/dev/bhnd/cores/pci/bhnd_pci.c +++ b/sys/dev/bhnd/cores/pci/bhnd_pci.c @@ -68,8 +68,8 @@ static struct bhnd_device_quirk bhnd_pcie_quirks[]; #define BHND_PCI_QUIRKS bhnd_pci_quirks #define BHND_PCIE_QUIRKS bhnd_pcie_quirks -#define BHND_PCI_DEV(_core, _desc, ...) \ - { BHND_DEVICE(_core, _desc, BHND_ ## _core ## _QUIRKS, \ +#define BHND_PCI_DEV(_core, _desc, ...) \ + { BHND_DEVICE(BCM, _core, _desc, BHND_ ## _core ## _QUIRKS, \ ## __VA_ARGS__), BHND_PCI_REGFMT_ ## _core } static const struct bhnd_pci_device { diff --git a/sys/dev/bhnd/cores/pci/bhnd_pci_hostb.c b/sys/dev/bhnd/cores/pci/bhnd_pci_hostb.c index 4ba7289..ec2b5b2 100644 --- a/sys/dev/bhnd/cores/pci/bhnd_pci_hostb.c +++ b/sys/dev/bhnd/cores/pci/bhnd_pci_hostb.c @@ -87,7 +87,7 @@ static int bhnd_pci_wars_hwdown(struct bhnd_pcihb_softc *sc, */ #define BHND_PCI_DEV(_core, _quirks) \ - BHND_DEVICE(_core, NULL, _quirks, BHND_DF_HOSTB) + BHND_DEVICE(BCM, _core, NULL, _quirks, BHND_DF_HOSTB) static const struct bhnd_device bhnd_pci_devs[] = { BHND_PCI_DEV(PCI, bhnd_pci_quirks), diff --git a/sys/dev/bhnd/cores/pcie2/bhnd_pcie2.c b/sys/dev/bhnd/cores/pcie2/bhnd_pcie2.c index 8641fcc..626d629 100644 --- a/sys/dev/bhnd/cores/pcie2/bhnd_pcie2.c +++ b/sys/dev/bhnd/cores/pcie2/bhnd_pcie2.c @@ -57,7 +57,7 @@ __FBSDID("$FreeBSD$"); static struct bhnd_device_quirk bhnd_pcie2_quirks[]; #define BHND_PCIE_DEV(_core, _desc, ...) \ - BHND_DEVICE(_core, _desc, bhnd_pcie2_quirks, ## __VA_ARGS__) + BHND_DEVICE(BCM, _core, _desc, bhnd_pcie2_quirks, ## __VA_ARGS__) static const struct bhnd_device bhnd_pcie2_devs[] = { BHND_PCIE_DEV(PCIE2, "PCIe-G2 Host-PCI bridge", BHND_DF_HOSTB), diff --git a/sys/dev/bhnd/cores/pcie2/bhnd_pcie2_hostb.c b/sys/dev/bhnd/cores/pcie2/bhnd_pcie2_hostb.c index f20de8e..5f4b0cb 100644 --- a/sys/dev/bhnd/cores/pcie2/bhnd_pcie2_hostb.c +++ b/sys/dev/bhnd/cores/pcie2/bhnd_pcie2_hostb.c @@ -83,7 +83,7 @@ static int bhnd_pcie2_wars_hwdown(struct bhnd_pcie2hb_softc *sc); */ #define BHND_PCI_DEV(_core, _quirks) \ - BHND_DEVICE(_core, NULL, _quirks, BHND_DF_HOSTB) + BHND_DEVICE(BCM, _core, NULL, _quirks, BHND_DF_HOSTB) static const struct bhnd_device bhnd_pcie2_devs[] = { BHND_PCI_DEV(PCIE2, bhnd_pcie2_quirks), diff --git a/sys/dev/bhnd/siba/siba_bhndb.c b/sys/dev/bhnd/siba/siba_bhndb.c index 04bdb76..462162e 100644 --- a/sys/dev/bhnd/siba/siba_bhndb.c +++ b/sys/dev/bhnd/siba/siba_bhndb.c @@ -74,7 +74,7 @@ static struct bhnd_device_quirk bridge_quirks[] = { }; static struct bhnd_device bridge_devs[] = { - BHND_DEVICE(PCI, NULL, bridge_quirks), + BHND_DEVICE(BCM, PCI, NULL, bridge_quirks), }; static int diff --git a/sys/dev/bhnd/siba/siba_nexus.c b/sys/dev/bhnd/siba/siba_nexus.c index 9b5e87e..9d7b6a5 100644 --- a/sys/dev/bhnd/siba/siba_nexus.c +++ b/sys/dev/bhnd/siba/siba_nexus.c @@ -115,4 +115,5 @@ static device_method_t siba_nexus_methods[] = { DEFINE_CLASS_2(bhnd, siba_nexus_driver, siba_nexus_methods, sizeof(struct siba_nexus_softc), bhnd_nexus_driver, siba_driver); -DRIVER_MODULE(siba_nexus, nexus, siba_nexus_driver, bhnd_devclass, 0, 0); +EARLY_DRIVER_MODULE(siba_nexus, nexus, siba_nexus_driver, bhnd_devclass, 0, 0, + BUS_PASS_BUS + BUS_PASS_ORDER_MIDDLE); diff --git a/sys/dev/cxgb/cxgb_sge.c b/sys/dev/cxgb/cxgb_sge.c index c83bd66..76d40d81 100644 --- a/sys/dev/cxgb/cxgb_sge.c +++ b/sys/dev/cxgb/cxgb_sge.c @@ -2900,7 +2900,8 @@ process_responses(adapter_t *adap, struct sge_qset *qs, int budget) eop = get_packet(adap, drop_thresh, qs, mh, r); if (eop) { if (r->rss_hdr.hash_type && !adap->timestamp) { - M_HASHTYPE_SET(mh->mh_head, M_HASHTYPE_OPAQUE); + M_HASHTYPE_SET(mh->mh_head, + M_HASHTYPE_OPAQUE_HASH); mh->mh_head->m_pkthdr.flowid = rss_hash; } } diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index 5800faa..0efeadb 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -54,6 +54,7 @@ #include <netinet/tcp_lro.h> #include "offload.h" +#include "t4_ioctl.h" #include "common/t4_msg.h" #include "firmware/t4fw_interface.h" @@ -264,6 +265,17 @@ struct vi_info { uint8_t hw_addr[ETHER_ADDR_LEN]; /* factory MAC address, won't change */ }; +enum { + /* tx_sched_class flags */ + TX_SC_OK = (1 << 0), /* Set up in hardware, active. */ +}; + +struct tx_sched_class { + int refcount; + int flags; + struct t4_sched_class_params params; +}; + struct port_info { device_t dev; struct adapter *adapter; @@ -273,6 +285,8 @@ struct port_info { int up_vis; int uld_vis; + struct tx_sched_class *tc; /* traffic classes for this channel */ + struct mtx pi_lock; char lockname[16]; unsigned long flags; @@ -522,6 +536,7 @@ struct sge_txq { struct tx_sdesc *sdesc; /* KVA of software descriptor ring */ struct sglist *gl; __be32 cpl_ctrl0; /* for convenience */ + int tc_idx; /* traffic class */ struct task tx_reclaim_task; /* stats for common events first */ diff --git a/sys/dev/cxgbe/t4_ioctl.h b/sys/dev/cxgbe/t4_ioctl.h index fdc5e98..c85d970 100644 --- a/sys/dev/cxgbe/t4_ioctl.h +++ b/sys/dev/cxgbe/t4_ioctl.h @@ -215,6 +215,20 @@ struct t4_filter { struct t4_filter_specification fs; }; +/* Tx Scheduling Class parameters */ +struct t4_sched_class_params { + int8_t level; /* scheduler hierarchy level */ + int8_t mode; /* per-class or per-flow */ + int8_t rateunit; /* bit or packet rate */ + int8_t ratemode; /* %port relative or kbps absolute */ + int8_t channel; /* scheduler channel [0..N] */ + int8_t cl; /* scheduler class [0..N] */ + int32_t minrate; /* minimum rate */ + int32_t maxrate; /* maximum rate */ + int16_t weight; /* percent weight */ + int16_t pktsize; /* average packet size */ +}; + /* * Support for "sched-class" command to allow a TX Scheduling Class to be * programmed with various parameters. @@ -226,19 +240,7 @@ struct t4_sched_params { struct { /* sub-command SCHED_CLASS_CONFIG */ int8_t minmax; /* minmax enable */ } config; - struct { /* sub-command SCHED_CLASS_PARAMS */ - int8_t level; /* scheduler hierarchy level */ - int8_t mode; /* per-class or per-flow */ - int8_t rateunit; /* bit or packet rate */ - int8_t ratemode; /* %port relative or kbps - absolute */ - int8_t channel; /* scheduler channel [0..N] */ - int8_t cl; /* scheduler class [0..N] */ - int32_t minrate; /* minimum rate */ - int32_t maxrate; /* maximum rate */ - int16_t weight; /* percent weight */ - int16_t pktsize; /* average packet size */ - } params; + struct t4_sched_class_params params; uint8_t reserved[6 + 8 * 8]; } u; }; diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index 132d69e..01c4352 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -496,6 +496,7 @@ static int sysctl_tp_la(SYSCTL_HANDLER_ARGS); static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS); static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS); static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS); +static int sysctl_tc_params(SYSCTL_HANDLER_ARGS); #endif #ifdef TCP_OFFLOAD static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS); @@ -875,6 +876,9 @@ t4_attach(device_t dev) mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF); sc->chan_map[pi->tx_chan] = i; + pi->tc = malloc(sizeof(struct tx_sched_class) * + sc->chip_params->nsched_cls, M_CXGBE, M_ZERO | M_WAITOK); + if (is_10G_port(pi) || is_40G_port(pi)) { n10g++; for_each_vi(pi, j, vi) { @@ -1131,6 +1135,7 @@ t4_detach(device_t dev) mtx_destroy(&pi->pi_lock); free(pi->vi, M_CXGBE); + free(pi->tc, M_CXGBE); free(pi, M_CXGBE); } } @@ -5030,8 +5035,10 @@ cxgbe_sysctls(struct port_info *pi) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; - struct sysctl_oid_list *children; + struct sysctl_oid_list *children, *children2; struct adapter *sc = pi->adapter; + int i; + char name[16]; ctx = device_get_sysctl_ctx(pi->dev); @@ -5060,6 +5067,29 @@ cxgbe_sysctls(struct port_info *pi) port_top_speed(pi), "max speed (in Gbps)"); /* + * dev.(cxgbe|cxl).X.tc. + */ + oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "tc", CTLFLAG_RD, NULL, + "Tx scheduler traffic classes"); + for (i = 0; i < sc->chip_params->nsched_cls; i++) { + struct tx_sched_class *tc = &pi->tc[i]; + + snprintf(name, sizeof(name), "%d", i); + children2 = SYSCTL_CHILDREN(SYSCTL_ADD_NODE(ctx, + SYSCTL_CHILDREN(oid), OID_AUTO, name, CTLFLAG_RD, NULL, + "traffic class")); + SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "flags", CTLFLAG_RD, + &tc->flags, 0, "flags"); + SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "refcount", + CTLFLAG_RD, &tc->refcount, 0, "references to this class"); +#ifdef SBUF_DRAIN + SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "params", + CTLTYPE_STRING | CTLFLAG_RD, sc, (pi->port_id << 16) | i, + sysctl_tc_params, "A", "traffic class parameters"); +#endif + } + + /* * dev.cxgbe.X.stats. */ oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD, @@ -7452,6 +7482,101 @@ sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS) return (rc); } + +static int +sysctl_tc_params(SYSCTL_HANDLER_ARGS) +{ + struct adapter *sc = arg1; + struct tx_sched_class *tc; + struct t4_sched_class_params p; + struct sbuf *sb; + int i, rc, port_id, flags, mbps, gbps; + + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + return (rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); + if (sb == NULL) + return (ENOMEM); + + port_id = arg2 >> 16; + MPASS(port_id < sc->params.nports); + MPASS(sc->port[port_id] != NULL); + i = arg2 & 0xffff; + MPASS(i < sc->chip_params->nsched_cls); + tc = &sc->port[port_id]->tc[i]; + + rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, + "t4tc_p"); + if (rc) + goto done; + flags = tc->flags; + p = tc->params; + end_synchronized_op(sc, LOCK_HELD); + + if ((flags & TX_SC_OK) == 0) { + sbuf_printf(sb, "none"); + goto done; + } + + if (p.level == SCHED_CLASS_LEVEL_CL_WRR) { + sbuf_printf(sb, "cl-wrr weight %u", p.weight); + goto done; + } else if (p.level == SCHED_CLASS_LEVEL_CL_RL) + sbuf_printf(sb, "cl-rl"); + else if (p.level == SCHED_CLASS_LEVEL_CH_RL) + sbuf_printf(sb, "ch-rl"); + else { + rc = ENXIO; + goto done; + } + + if (p.ratemode == SCHED_CLASS_RATEMODE_REL) { + /* XXX: top speed or actual link speed? */ + gbps = port_top_speed(sc->port[port_id]); + sbuf_printf(sb, " %u%% of %uGbps", p.maxrate, gbps); + } + else if (p.ratemode == SCHED_CLASS_RATEMODE_ABS) { + switch (p.rateunit) { + case SCHED_CLASS_RATEUNIT_BITS: + mbps = p.maxrate / 1000; + gbps = p.maxrate / 1000000; + if (p.maxrate == gbps * 1000000) + sbuf_printf(sb, " %uGbps", gbps); + else if (p.maxrate == mbps * 1000) + sbuf_printf(sb, " %uMbps", mbps); + else + sbuf_printf(sb, " %uKbps", p.maxrate); + break; + case SCHED_CLASS_RATEUNIT_PKTS: + sbuf_printf(sb, " %upps", p.maxrate); + break; + default: + rc = ENXIO; + goto done; + } + } + + switch (p.mode) { + case SCHED_CLASS_MODE_CLASS: + sbuf_printf(sb, " aggregate"); + break; + case SCHED_CLASS_MODE_FLOW: + sbuf_printf(sb, " per-flow"); + break; + default: + rc = ENXIO; + goto done; + } + +done: + if (rc == 0) + rc = sbuf_finish(sb); + sbuf_delete(sb); + + return (rc); +} #endif #ifdef TCP_OFFLOAD @@ -8297,155 +8422,147 @@ in_range(int val, int lo, int hi) } static int -set_sched_class(struct adapter *sc, struct t4_sched_params *p) +set_sched_class_config(struct adapter *sc, int minmax) { - int fw_subcmd, fw_type, rc; + int rc; + + if (minmax < 0) + return (EINVAL); - rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setsc"); + rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4sscc"); if (rc) return (rc); + rc = -t4_sched_config(sc, FW_SCHED_TYPE_PKTSCHED, minmax, 1); + end_synchronized_op(sc, 0); - if (!(sc->flags & FULL_INIT_DONE)) { - rc = EAGAIN; - goto done; - } + return (rc); +} - /* - * Translate the cxgbetool parameters into T4 firmware parameters. (The - * sub-command and type are in common locations.) - */ - if (p->subcmd == SCHED_CLASS_SUBCMD_CONFIG) - fw_subcmd = FW_SCHED_SC_CONFIG; - else if (p->subcmd == SCHED_CLASS_SUBCMD_PARAMS) - fw_subcmd = FW_SCHED_SC_PARAMS; - else { - rc = EINVAL; - goto done; - } - if (p->type == SCHED_CLASS_TYPE_PACKET) - fw_type = FW_SCHED_TYPE_PKTSCHED; - else { - rc = EINVAL; - goto done; - } +static int +set_sched_class_params(struct adapter *sc, struct t4_sched_class_params *p, + int sleep_ok) +{ + int rc, top_speed, fw_level, fw_mode, fw_rateunit, fw_ratemode; + struct port_info *pi; + struct tx_sched_class *tc; + + if (p->level == SCHED_CLASS_LEVEL_CL_RL) + fw_level = FW_SCHED_PARAMS_LEVEL_CL_RL; + else if (p->level == SCHED_CLASS_LEVEL_CL_WRR) + fw_level = FW_SCHED_PARAMS_LEVEL_CL_WRR; + else if (p->level == SCHED_CLASS_LEVEL_CH_RL) + fw_level = FW_SCHED_PARAMS_LEVEL_CH_RL; + else + return (EINVAL); - if (fw_subcmd == FW_SCHED_SC_CONFIG) { - /* Vet our parameters ..*/ - if (p->u.config.minmax < 0) { - rc = EINVAL; - goto done; - } + if (p->mode == SCHED_CLASS_MODE_CLASS) + fw_mode = FW_SCHED_PARAMS_MODE_CLASS; + else if (p->mode == SCHED_CLASS_MODE_FLOW) + fw_mode = FW_SCHED_PARAMS_MODE_FLOW; + else + return (EINVAL); - /* And pass the request to the firmware ...*/ - rc = -t4_sched_config(sc, fw_type, p->u.config.minmax, 1); - goto done; - } + if (p->rateunit == SCHED_CLASS_RATEUNIT_BITS) + fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE; + else if (p->rateunit == SCHED_CLASS_RATEUNIT_PKTS) + fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE; + else + return (EINVAL); - if (fw_subcmd == FW_SCHED_SC_PARAMS) { - int fw_level; - int fw_mode; - int fw_rateunit; - int fw_ratemode; + if (p->ratemode == SCHED_CLASS_RATEMODE_REL) + fw_ratemode = FW_SCHED_PARAMS_RATE_REL; + else if (p->ratemode == SCHED_CLASS_RATEMODE_ABS) + fw_ratemode = FW_SCHED_PARAMS_RATE_ABS; + else + return (EINVAL); - if (p->u.params.level == SCHED_CLASS_LEVEL_CL_RL) - fw_level = FW_SCHED_PARAMS_LEVEL_CL_RL; - else if (p->u.params.level == SCHED_CLASS_LEVEL_CL_WRR) - fw_level = FW_SCHED_PARAMS_LEVEL_CL_WRR; - else if (p->u.params.level == SCHED_CLASS_LEVEL_CH_RL) - fw_level = FW_SCHED_PARAMS_LEVEL_CH_RL; - else { - rc = EINVAL; - goto done; - } + /* Vet our parameters ... */ + if (!in_range(p->channel, 0, sc->chip_params->nchan - 1)) + return (ERANGE); - if (p->u.params.mode == SCHED_CLASS_MODE_CLASS) - fw_mode = FW_SCHED_PARAMS_MODE_CLASS; - else if (p->u.params.mode == SCHED_CLASS_MODE_FLOW) - fw_mode = FW_SCHED_PARAMS_MODE_FLOW; - else { - rc = EINVAL; - goto done; - } + pi = sc->port[sc->chan_map[p->channel]]; + if (pi == NULL) + return (ENXIO); + MPASS(pi->tx_chan == p->channel); + top_speed = port_top_speed(pi) * 1000000; /* Gbps -> Kbps */ - if (p->u.params.rateunit == SCHED_CLASS_RATEUNIT_BITS) - fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE; - else if (p->u.params.rateunit == SCHED_CLASS_RATEUNIT_PKTS) - fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE; - else { - rc = EINVAL; - goto done; - } + if (!in_range(p->cl, 0, sc->chip_params->nsched_cls) || + !in_range(p->minrate, 0, top_speed) || + !in_range(p->maxrate, 0, top_speed) || + !in_range(p->weight, 0, 100)) + return (ERANGE); - if (p->u.params.ratemode == SCHED_CLASS_RATEMODE_REL) - fw_ratemode = FW_SCHED_PARAMS_RATE_REL; - else if (p->u.params.ratemode == SCHED_CLASS_RATEMODE_ABS) - fw_ratemode = FW_SCHED_PARAMS_RATE_ABS; - else { - rc = EINVAL; - goto done; - } + /* + * Translate any unset parameters into the firmware's + * nomenclature and/or fail the call if the parameters + * are required ... + */ + if (p->rateunit < 0 || p->ratemode < 0 || p->channel < 0 || p->cl < 0) + return (EINVAL); - /* Vet our parameters ... */ - if (!in_range(p->u.params.channel, 0, 3) || - !in_range(p->u.params.cl, 0, sc->chip_params->nsched_cls) || - !in_range(p->u.params.minrate, 0, 10000000) || - !in_range(p->u.params.maxrate, 0, 10000000) || - !in_range(p->u.params.weight, 0, 100)) { - rc = ERANGE; - goto done; - } + if (p->minrate < 0) + p->minrate = 0; + if (p->maxrate < 0) { + if (p->level == SCHED_CLASS_LEVEL_CL_RL || + p->level == SCHED_CLASS_LEVEL_CH_RL) + return (EINVAL); + else + p->maxrate = 0; + } + if (p->weight < 0) { + if (p->level == SCHED_CLASS_LEVEL_CL_WRR) + return (EINVAL); + else + p->weight = 0; + } + if (p->pktsize < 0) { + if (p->level == SCHED_CLASS_LEVEL_CL_RL || + p->level == SCHED_CLASS_LEVEL_CH_RL) + return (EINVAL); + else + p->pktsize = 0; + } + rc = begin_synchronized_op(sc, NULL, + sleep_ok ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4sscp"); + if (rc) + return (rc); + tc = &pi->tc[p->cl]; + tc->params = *p; + rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, fw_level, fw_mode, + fw_rateunit, fw_ratemode, p->channel, p->cl, p->minrate, p->maxrate, + p->weight, p->pktsize, sleep_ok); + if (rc == 0) + tc->flags |= TX_SC_OK; + else { /* - * Translate any unset parameters into the firmware's - * nomenclature and/or fail the call if the parameters - * are required ... + * Unknown state at this point, see tc->params for what was + * attempted. */ - if (p->u.params.rateunit < 0 || p->u.params.ratemode < 0 || - p->u.params.channel < 0 || p->u.params.cl < 0) { - rc = EINVAL; - goto done; - } - if (p->u.params.minrate < 0) - p->u.params.minrate = 0; - if (p->u.params.maxrate < 0) { - if (p->u.params.level == SCHED_CLASS_LEVEL_CL_RL || - p->u.params.level == SCHED_CLASS_LEVEL_CH_RL) { - rc = EINVAL; - goto done; - } else - p->u.params.maxrate = 0; - } - if (p->u.params.weight < 0) { - if (p->u.params.level == SCHED_CLASS_LEVEL_CL_WRR) { - rc = EINVAL; - goto done; - } else - p->u.params.weight = 0; - } - if (p->u.params.pktsize < 0) { - if (p->u.params.level == SCHED_CLASS_LEVEL_CL_RL || - p->u.params.level == SCHED_CLASS_LEVEL_CH_RL) { - rc = EINVAL; - goto done; - } else - p->u.params.pktsize = 0; - } - - /* See what the firmware thinks of the request ... */ - rc = -t4_sched_params(sc, fw_type, fw_level, fw_mode, - fw_rateunit, fw_ratemode, p->u.params.channel, - p->u.params.cl, p->u.params.minrate, p->u.params.maxrate, - p->u.params.weight, p->u.params.pktsize, 1); - goto done; + tc->flags &= ~TX_SC_OK; } + end_synchronized_op(sc, sleep_ok ? 0 : LOCK_HELD); - rc = EINVAL; -done: - end_synchronized_op(sc, 0); return (rc); } static int +set_sched_class(struct adapter *sc, struct t4_sched_params *p) +{ + + if (p->type != SCHED_CLASS_TYPE_PACKET) + return (EINVAL); + + if (p->subcmd == SCHED_CLASS_SUBCMD_CONFIG) + return (set_sched_class_config(sc, p->u.config.minmax)); + + if (p->subcmd == SCHED_CLASS_SUBCMD_PARAMS) + return (set_sched_class_params(sc, &p->u.params, 1)); + + return (EINVAL); +} + +static int set_sched_queue(struct adapter *sc, struct t4_sched_queue *p) { struct port_info *pi = NULL; @@ -8458,11 +8575,6 @@ set_sched_queue(struct adapter *sc, struct t4_sched_queue *p) if (rc) return (rc); - if (!(sc->flags & FULL_INIT_DONE)) { - rc = EAGAIN; - goto done; - } - if (p->port >= sc->params.nports) { rc = EINVAL; goto done; @@ -8471,7 +8583,14 @@ set_sched_queue(struct adapter *sc, struct t4_sched_queue *p) /* XXX: Only supported for the main VI. */ pi = sc->port[p->port]; vi = &pi->vi[0]; - if (!in_range(p->queue, 0, vi->ntxq - 1) || !in_range(p->cl, 0, 7)) { + if (!(vi->flags & VI_INIT_DONE)) { + /* tx queues not set up yet */ + rc = EAGAIN; + goto done; + } + + if (!in_range(p->queue, 0, vi->ntxq - 1) || + !in_range(p->cl, 0, sc->chip_params->nsched_cls - 1)) { rc = EINVAL; goto done; } diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c index ad1da55..75a7c8a 100644 --- a/sys/dev/cxgbe/t4_sge.c +++ b/sys/dev/cxgbe/t4_sge.c @@ -248,6 +248,7 @@ static void drain_wrq_wr_list(struct adapter *, struct sge_wrq *); static int sysctl_uint16(SYSCTL_HANDLER_ARGS); static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS); +static int sysctl_tc(SYSCTL_HANDLER_ARGS); static counter_u64_t extfree_refs; static counter_u64_t extfree_rels; @@ -3434,6 +3435,7 @@ alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx, txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_VF_VLD(1) | V_TXPKT_VF(vi->viid)); + txq->tc_idx = -1; txq->sdesc = malloc(eq->sidx * sizeof(struct tx_sdesc), M_CXGBE, M_ZERO | M_WAITOK); @@ -3451,6 +3453,10 @@ alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx, CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I", "producer index"); + SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "tc", + CTLTYPE_INT | CTLFLAG_RW, vi, idx, sysctl_tc, "I", + "traffic class (-1 means none)"); + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, &txq->txcsum, "# of times hardware assisted with checksum"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_insertion", @@ -4684,3 +4690,78 @@ sysctl_bufsizes(SYSCTL_HANDLER_ARGS) sbuf_delete(&sb); return (rc); } + +static int +sysctl_tc(SYSCTL_HANDLER_ARGS) +{ + struct vi_info *vi = arg1; + struct port_info *pi; + struct adapter *sc; + struct sge_txq *txq; + struct tx_sched_class *tc; + int qidx = arg2, rc, tc_idx; + uint32_t fw_queue, fw_class; + + MPASS(qidx >= 0 && qidx < vi->ntxq); + pi = vi->pi; + sc = pi->adapter; + txq = &sc->sge.txq[vi->first_txq + qidx]; + + tc_idx = txq->tc_idx; + rc = sysctl_handle_int(oidp, &tc_idx, 0, req); + if (rc != 0 || req->newptr == NULL) + return (rc); + + /* Note that -1 is legitimate input (it means unbind). */ + if (tc_idx < -1 || tc_idx >= sc->chip_params->nsched_cls) + return (EINVAL); + + rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4stc"); + if (rc) + return (rc); + + if (tc_idx == txq->tc_idx) { + rc = 0; /* No change, nothing to do. */ + goto done; + } + + fw_queue = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | + V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH) | + V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id); + + if (tc_idx == -1) + fw_class = 0xffffffff; /* Unbind. */ + else { + /* + * Bind to a different class. Ethernet txq's are only allowed + * to bind to cl-rl mode-class for now. XXX: too restrictive. + */ + tc = &pi->tc[tc_idx]; + if (tc->flags & TX_SC_OK && + tc->params.level == SCHED_CLASS_LEVEL_CL_RL && + tc->params.mode == SCHED_CLASS_MODE_CLASS) { + /* Ok to proceed. */ + fw_class = tc_idx; + } else { + rc = tc->flags & TX_SC_OK ? EBUSY : ENXIO; + goto done; + } + } + + rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue, &fw_class); + if (rc == 0) { + if (txq->tc_idx != -1) { + tc = &pi->tc[txq->tc_idx]; + MPASS(tc->refcount > 0); + tc->refcount--; + } + if (tc_idx != -1) { + tc = &pi->tc[tc_idx]; + tc->refcount++; + } + txq->tc_idx = tc_idx; + } +done: + end_synchronized_op(sc, 0); + return (rc); +} diff --git a/sys/dev/dwc/if_dwc.c b/sys/dev/dwc/if_dwc.c index 988516e..a34879b 100644 --- a/sys/dev/dwc/if_dwc.c +++ b/sys/dev/dwc/if_dwc.c @@ -587,19 +587,25 @@ dwc_setup_rxfilter(struct dwc_softc *sc) struct ifmultiaddr *ifma; struct ifnet *ifp; uint8_t *eaddr, val; - uint32_t crc, ffval, hashbit, hashreg, hi, lo, reg; + uint32_t crc, ffval, hashbit, hashreg, hi, lo, hash[8]; + int nhash, i; DWC_ASSERT_LOCKED(sc); ifp = sc->ifp; + nhash = sc->mactype == DWC_GMAC_ALT_DESC ? 2 : 8; /* * Set the multicast (group) filter hash. */ - if ((ifp->if_flags & IFF_ALLMULTI)) + if ((ifp->if_flags & IFF_ALLMULTI) != 0) { ffval = (FRAME_FILTER_PM); - else { + for (i = 0; i < nhash; i++) + hash[i] = ~0; + } else { ffval = (FRAME_FILTER_HMC); + for (i = 0; i < nhash; i++) + hash[i] = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &sc->ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) @@ -609,12 +615,11 @@ dwc_setup_rxfilter(struct dwc_softc *sc) /* Take lower 8 bits and reverse it */ val = bitreverse(~crc & 0xff); + if (sc->mactype == DWC_GMAC_ALT_DESC) + val >>= nhash; /* Only need lower 6 bits */ hashreg = (val >> 5); hashbit = (val & 31); - - reg = READ4(sc, HASH_TABLE_REG(hashreg)); - reg |= (1 << hashbit); - WRITE4(sc, HASH_TABLE_REG(hashreg), reg); + hash[hashreg] |= (1 << hashbit); } if_maddr_runlock(ifp); } @@ -635,6 +640,13 @@ dwc_setup_rxfilter(struct dwc_softc *sc) WRITE4(sc, MAC_ADDRESS_LOW(0), lo); WRITE4(sc, MAC_ADDRESS_HIGH(0), hi); WRITE4(sc, MAC_FRAME_FILTER, ffval); + if (sc->mactype == DWC_GMAC_ALT_DESC) { + WRITE4(sc, GMAC_MAC_HTLOW, hash[0]); + WRITE4(sc, GMAC_MAC_HTHIGH, hash[1]); + } else { + for (i = 0; i < nhash; i++) + WRITE4(sc, HASH_TABLE_REG(i), hash[i]); + } } static int @@ -759,6 +771,9 @@ dwc_rxfinish_locked(struct dwc_softc *sc) m->m_len = len; if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); + /* Remove trailing FCS */ + m_adj(m, -ETHER_CRC_LEN); + DWC_UNLOCK(sc); (*ifp->if_input)(ifp, m); DWC_LOCK(sc); diff --git a/sys/dev/dwc/if_dwc.h b/sys/dev/dwc/if_dwc.h index c9403da..d88ca0d 100644 --- a/sys/dev/dwc/if_dwc.h +++ b/sys/dev/dwc/if_dwc.h @@ -53,6 +53,8 @@ #define FRAME_FILTER_HMC (1 << 2) #define FRAME_FILTER_HUC (1 << 1) #define FRAME_FILTER_PR (1 << 0) /* All Incoming Frames */ +#define GMAC_MAC_HTHIGH 0x08 +#define GMAC_MAC_HTLOW 0x0c #define GMII_ADDRESS 0x10 #define GMII_ADDRESS_PA_MASK 0x1f /* Phy device */ #define GMII_ADDRESS_PA_SHIFT 11 diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c index 120111d..83e1e83 100644 --- a/sys/dev/e1000/if_igb.c +++ b/sys/dev/e1000/if_igb.c @@ -5154,7 +5154,7 @@ igb_rxeof(struct igb_queue *que, int count, int *done) default: /* XXX fallthrough */ M_HASHTYPE_SET(rxr->fmp, - M_HASHTYPE_OPAQUE); + M_HASHTYPE_OPAQUE_HASH); } } else { #ifndef IGB_LEGACY_TX diff --git a/sys/dev/flash/mx25l.c b/sys/dev/flash/mx25l.c index 832e790..b41986f 100644 --- a/sys/dev/flash/mx25l.c +++ b/sys/dev/flash/mx25l.c @@ -130,6 +130,9 @@ struct mx25l_flash_ident flash_devices[] = { { "w25q64bv", 0xef, 0x4017, 64 * 1024, 128, FL_ERASE_4K }, { "w25q128", 0xef, 0x4018, 64 * 1024, 256, FL_ERASE_4K }, { "w25q256", 0xef, 0x4019, 64 * 1024, 512, FL_ERASE_4K }, + + /* Atmel */ + { "at25df641", 0x1f, 0x4800, 64 * 1024, 128, FL_ERASE_4K }, }; static uint8_t diff --git a/sys/dev/gpio/gpiobus.c b/sys/dev/gpio/gpiobus.c index f8b3b46..eac9d2b 100644 --- a/sys/dev/gpio/gpiobus.c +++ b/sys/dev/gpio/gpiobus.c @@ -79,22 +79,47 @@ static int gpiobus_pin_toggle(device_t, device_t, uint32_t); * data will be moved into struct resource. */ #ifdef INTRNG +static void +gpio_destruct_map_data(struct intr_map_data *map_data) +{ + + KASSERT(map_data->type == INTR_MAP_DATA_GPIO, + ("%s: bad map_data type %d", __func__, map_data->type)); + + free(map_data, M_DEVBUF); +} + struct resource * gpio_alloc_intr_resource(device_t consumer_dev, int *rid, u_int alloc_flags, gpio_pin_t pin, uint32_t intr_mode) { - u_int irqnum; - - /* - * Allocate new fictitious interrupt number and store configuration - * into it. - */ - irqnum = intr_gpio_map_irq(pin->dev, pin->pin, pin->flags, intr_mode); - if (irqnum == INTR_IRQ_INVALID) + int rv; + u_int irq; + struct intr_map_data_gpio *gpio_data; + struct resource *res; + + gpio_data = malloc(sizeof(*gpio_data), M_DEVBUF, M_WAITOK | M_ZERO); + gpio_data->hdr.type = INTR_MAP_DATA_GPIO; + gpio_data->hdr.destruct = gpio_destruct_map_data; + gpio_data->gpio_pin_num = pin->pin; + gpio_data->gpio_pin_flags = pin->flags; + gpio_data->gpio_intr_mode = intr_mode; + + rv = intr_map_irq(pin->dev, 0, (struct intr_map_data *)gpio_data, + &irq); + if (rv != 0) { + gpio_destruct_map_data((struct intr_map_data *)gpio_data); return (NULL); + } - return (bus_alloc_resource(consumer_dev, SYS_RES_IRQ, rid, - irqnum, irqnum, 1, alloc_flags)); + res = bus_alloc_resource(consumer_dev, SYS_RES_IRQ, rid, irq, irq, 1, + alloc_flags); + if (res == NULL) { + gpio_destruct_map_data((struct intr_map_data *)gpio_data); + return (NULL); + } + rman_set_virtual(res, gpio_data); + return (res); } #else struct resource * diff --git a/sys/dev/gpio/gpiobusvar.h b/sys/dev/gpio/gpiobusvar.h index f6b6d68..a271905 100644 --- a/sys/dev/gpio/gpiobusvar.h +++ b/sys/dev/gpio/gpiobusvar.h @@ -70,6 +70,13 @@ struct gpiobus_pin_data char *name; /* pin name. */ }; +struct intr_map_data_gpio { + struct intr_map_data hdr; + u_int gpio_pin_num; + u_int gpio_pin_flags; + u_int gpio_intr_mode; +}; + struct gpiobus_softc { struct mtx sc_mtx; /* bus mutex */ diff --git a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c index 04e998e..1199d20 100644 --- a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c +++ b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c @@ -1300,6 +1300,7 @@ netvsc_recv(struct hv_vmbus_channel *chan, netvsc_packet *packet, struct ifnet *ifp = rxr->hn_ifp; struct mbuf *m_new; int size, do_lro = 0, do_csum = 1; + int hash_type = M_HASHTYPE_OPAQUE_HASH; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return (0); @@ -1430,8 +1431,6 @@ skip: } if (hash_info != NULL && hash_value != NULL) { - int hash_type = M_HASHTYPE_OPAQUE; - rxr->hn_rss_pkts++; m_new->m_pkthdr.flowid = hash_value->hash_value; if ((hash_info->hash_info & NDIS_HASH_FUNCTION_MASK) == @@ -1465,14 +1464,15 @@ skip: break; } } - M_HASHTYPE_SET(m_new, hash_type); } else { - if (hash_value != NULL) + if (hash_value != NULL) { m_new->m_pkthdr.flowid = hash_value->hash_value; - else + } else { m_new->m_pkthdr.flowid = rxr->hn_rx_idx; - M_HASHTYPE_SET(m_new, M_HASHTYPE_OPAQUE); + hash_type = M_HASHTYPE_OPAQUE; + } } + M_HASHTYPE_SET(m_new, hash_type); /* * Note: Moved RX completion back to hv_nv_on_receive() so all diff --git a/sys/dev/hyperv/vmbus/hv_channel.c b/sys/dev/hyperv/vmbus/hv_channel.c index c1d415f..88ba7ca 100644 --- a/sys/dev/hyperv/vmbus/hv_channel.c +++ b/sys/dev/hyperv/vmbus/hv_channel.c @@ -37,12 +37,16 @@ __FBSDID("$FreeBSD$"); #include <sys/lock.h> #include <sys/mutex.h> #include <sys/sysctl.h> + +#include <machine/atomic.h> #include <machine/bus.h> + #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/pmap.h> #include <dev/hyperv/vmbus/hv_vmbus_priv.h> +#include <dev/hyperv/vmbus/vmbus_reg.h> #include <dev/hyperv/vmbus/vmbus_var.h> static int vmbus_channel_create_gpadl_header( @@ -62,17 +66,16 @@ static void VmbusProcessChannelEvent(void* channel, int pending); static void vmbus_channel_set_event(hv_vmbus_channel *channel) { - hv_vmbus_monitor_page *monitor_page; - if (channel->offer_msg.monitor_allocated) { - /* Each uint32_t represents 32 channels */ - synch_set_bit((channel->offer_msg.child_rel_id & 31), - ((uint32_t *)hv_vmbus_g_connection.send_interrupt_page - + ((channel->offer_msg.child_rel_id >> 5)))); + struct vmbus_softc *sc = vmbus_get_softc(); + hv_vmbus_monitor_page *monitor_page; + uint32_t chanid = channel->offer_msg.child_rel_id; - monitor_page = (hv_vmbus_monitor_page *) - hv_vmbus_g_connection.monitor_page_2; + atomic_set_long( + &sc->vmbus_tx_evtflags[chanid >> VMBUS_EVTFLAG_SHIFT], + 1UL << (chanid & VMBUS_EVTFLAG_MASK)); + monitor_page = sc->vmbus_mnf2; synch_set_bit(channel->monitor_bit, (uint32_t *)&monitor_page-> trigger_group[channel->monitor_group].u.pending); diff --git a/sys/dev/hyperv/vmbus/hv_connection.c b/sys/dev/hyperv/vmbus/hv_connection.c index 8a0bb09..e4dcd19 100644 --- a/sys/dev/hyperv/vmbus/hv_connection.c +++ b/sys/dev/hyperv/vmbus/hv_connection.c @@ -74,8 +74,8 @@ hv_vmbus_get_next_version(uint32_t current_ver) * Negotiate the highest supported hypervisor version. */ static int -hv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info, - uint32_t version) +hv_vmbus_negotiate_version(struct vmbus_softc *sc, + hv_vmbus_channel_msg_info *msg_info, uint32_t version) { int ret = 0; hv_vmbus_channel_initiate_contact *msg; @@ -86,14 +86,9 @@ hv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info, msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT; msg->vmbus_version_requested = version; - msg->interrupt_page = hv_get_phys_addr( - hv_vmbus_g_connection.interrupt_page); - - msg->monitor_page_1 = hv_get_phys_addr( - hv_vmbus_g_connection.monitor_page_1); - - msg->monitor_page_2 = hv_get_phys_addr( - hv_vmbus_g_connection.monitor_page_2); + msg->interrupt_page = sc->vmbus_evtflags_dma.hv_paddr; + msg->monitor_page_1 = sc->vmbus_mnf1_dma.hv_paddr; + msg->monitor_page_2 = sc->vmbus_mnf2_dma.hv_paddr; /** * Add to list before we send the request since we may receive the @@ -150,7 +145,7 @@ hv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info, * Send a connect request on the partition service connection */ int -hv_vmbus_connect(void) +hv_vmbus_connect(struct vmbus_softc *sc) { int ret = 0; uint32_t version; @@ -176,34 +171,6 @@ hv_vmbus_connect(void) mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel", NULL, MTX_DEF); - /** - * Setup the vmbus event connection for channel interrupt abstraction - * stuff - */ - hv_vmbus_g_connection.interrupt_page = malloc( - PAGE_SIZE, M_DEVBUF, - M_WAITOK | M_ZERO); - - hv_vmbus_g_connection.recv_interrupt_page = - hv_vmbus_g_connection.interrupt_page; - - hv_vmbus_g_connection.send_interrupt_page = - ((uint8_t *) hv_vmbus_g_connection.interrupt_page + - (PAGE_SIZE >> 1)); - - /** - * Set up the monitor notification facility. The 1st page for - * parent->child and the 2nd page for child->parent - */ - hv_vmbus_g_connection.monitor_page_1 = malloc( - PAGE_SIZE, - M_DEVBUF, - M_WAITOK | M_ZERO); - hv_vmbus_g_connection.monitor_page_2 = malloc( - PAGE_SIZE, - M_DEVBUF, - M_WAITOK | M_ZERO); - msg_info = (hv_vmbus_channel_msg_info*) malloc(sizeof(hv_vmbus_channel_msg_info) + sizeof(hv_vmbus_channel_initiate_contact), @@ -217,7 +184,7 @@ hv_vmbus_connect(void) version = HV_VMBUS_VERSION_CURRENT; do { - ret = hv_vmbus_negotiate_version(msg_info, version); + ret = hv_vmbus_negotiate_version(sc, msg_info, version); if (ret == EWOULDBLOCK) { /* * We timed out. @@ -251,14 +218,6 @@ hv_vmbus_connect(void) mtx_destroy(&hv_vmbus_g_connection.channel_lock); mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock); - if (hv_vmbus_g_connection.interrupt_page != NULL) { - free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF); - hv_vmbus_g_connection.interrupt_page = NULL; - } - - free(hv_vmbus_g_connection.monitor_page_1, M_DEVBUF); - free(hv_vmbus_g_connection.monitor_page_2, M_DEVBUF); - if (msg_info) { sema_destroy(&msg_info->wait_sema); free(msg_info, M_DEVBUF); @@ -281,8 +240,6 @@ hv_vmbus_disconnect(void) ret = hv_vmbus_post_message(&msg, sizeof(hv_vmbus_channel_unload)); - free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF); - mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock); free(hv_vmbus_g_connection.channels, M_DEVBUF); @@ -343,14 +300,13 @@ vmbus_event_proc(struct vmbus_softc *sc, int cpu) } void -vmbus_event_proc_compat(struct vmbus_softc *sc __unused, int cpu) +vmbus_event_proc_compat(struct vmbus_softc *sc, int cpu) { struct vmbus_evtflags *eventf; eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE; if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) { - vmbus_event_flags_proc( - hv_vmbus_g_connection.recv_interrupt_page, + vmbus_event_flags_proc(sc->vmbus_rx_evtflags, VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT); } } @@ -395,14 +351,12 @@ int hv_vmbus_post_message(void *buffer, size_t bufferLen) int hv_vmbus_set_event(hv_vmbus_channel *channel) { + struct vmbus_softc *sc = vmbus_get_softc(); int ret = 0; - uint32_t child_rel_id = channel->offer_msg.child_rel_id; - - /* Each uint32_t represents 32 channels */ + uint32_t chanid = channel->offer_msg.child_rel_id; - synch_set_bit(child_rel_id & 31, - (((uint32_t *)hv_vmbus_g_connection.send_interrupt_page - + (child_rel_id >> 5)))); + atomic_set_long(&sc->vmbus_tx_evtflags[chanid >> VMBUS_EVTFLAG_SHIFT], + 1UL << (chanid & VMBUS_EVTFLAG_MASK)); ret = hv_vmbus_signal_event(channel->signal_event_param); return (ret); diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_priv.h b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h index f7ea849..0008226 100644 --- a/sys/dev/hyperv/vmbus/hv_vmbus_priv.h +++ b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h @@ -244,25 +244,7 @@ typedef enum { typedef struct { hv_vmbus_connect_state connect_state; uint32_t next_gpadl_handle; - /** - * Represents channel interrupts. Each bit position - * represents a channel. - * When a channel sends an interrupt via VMBUS, it - * finds its bit in the send_interrupt_page, set it and - * calls Hv to generate a port event. The other end - * receives the port event and parse the - * recv_interrupt_page to see which bit is set - */ - void *interrupt_page; - void *send_interrupt_page; - void *recv_interrupt_page; - /* - * 2 pages - 1st page for parent->child - * notification and 2nd is child->parent - * notification - */ - void *monitor_page_1; - void *monitor_page_2; + TAILQ_HEAD(, hv_vmbus_channel_msg_info) channel_msg_anchor; struct mtx channel_msg_lock; /** @@ -440,7 +422,8 @@ int hv_vmbus_child_device_unregister( /** * Connection interfaces */ -int hv_vmbus_connect(void); +struct vmbus_softc; +int hv_vmbus_connect(struct vmbus_softc *); int hv_vmbus_disconnect(void); int hv_vmbus_post_message(void *buffer, size_t buf_size); int hv_vmbus_set_event(hv_vmbus_channel *channel); diff --git a/sys/dev/hyperv/vmbus/vmbus.c b/sys/dev/hyperv/vmbus/vmbus.c index bd63067..b5d1561 100644 --- a/sys/dev/hyperv/vmbus/vmbus.c +++ b/sys/dev/hyperv/vmbus/vmbus.c @@ -309,30 +309,50 @@ vmbus_synic_teardown(void *arg) static int vmbus_dma_alloc(struct vmbus_softc *sc) { + bus_dma_tag_t parent_dtag; + uint8_t *evtflags; int cpu; + parent_dtag = bus_get_dma_tag(sc->vmbus_dev); CPU_FOREACH(cpu) { void *ptr; /* * Per-cpu messages and event flags. */ - ptr = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev), - PAGE_SIZE, 0, PAGE_SIZE, - VMBUS_PCPU_PTR(sc, message_dma, cpu), + ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, + PAGE_SIZE, VMBUS_PCPU_PTR(sc, message_dma, cpu), BUS_DMA_WAITOK | BUS_DMA_ZERO); if (ptr == NULL) return ENOMEM; VMBUS_PCPU_GET(sc, message, cpu) = ptr; - ptr = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev), - PAGE_SIZE, 0, PAGE_SIZE, - VMBUS_PCPU_PTR(sc, event_flags_dma, cpu), + ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, + PAGE_SIZE, VMBUS_PCPU_PTR(sc, event_flags_dma, cpu), BUS_DMA_WAITOK | BUS_DMA_ZERO); if (ptr == NULL) return ENOMEM; VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr; } + + evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, + PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); + if (evtflags == NULL) + return ENOMEM; + sc->vmbus_rx_evtflags = (u_long *)evtflags; + sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2)); + sc->vmbus_evtflags = evtflags; + + sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, + PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); + if (sc->vmbus_mnf1 == NULL) + return ENOMEM; + + sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, + PAGE_SIZE, &sc->vmbus_mnf2_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); + if (sc->vmbus_mnf2 == NULL) + return ENOMEM; + return 0; } @@ -341,6 +361,21 @@ vmbus_dma_free(struct vmbus_softc *sc) { int cpu; + if (sc->vmbus_evtflags != NULL) { + hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags); + sc->vmbus_evtflags = NULL; + sc->vmbus_rx_evtflags = NULL; + sc->vmbus_tx_evtflags = NULL; + } + if (sc->vmbus_mnf1 != NULL) { + hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1); + sc->vmbus_mnf1 = NULL; + } + if (sc->vmbus_mnf2 != NULL) { + hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2); + sc->vmbus_mnf2 = NULL; + } + CPU_FOREACH(cpu) { if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) { hyperv_dmamem_free( @@ -609,8 +644,7 @@ vmbus_bus_init(void) /* * Connect to VMBus in the root partition */ - ret = hv_vmbus_connect(); - + ret = hv_vmbus_connect(sc); if (ret != 0) goto cleanup; diff --git a/sys/dev/hyperv/vmbus/vmbus_reg.h b/sys/dev/hyperv/vmbus/vmbus_reg.h index 4be7f1b..4be8614 100644 --- a/sys/dev/hyperv/vmbus/vmbus_reg.h +++ b/sys/dev/hyperv/vmbus/vmbus_reg.h @@ -66,6 +66,7 @@ CTASSERT(sizeof(struct vmbus_message) == VMBUS_MSG_SIZE); #define VMBUS_EVTFLAG_SHIFT 5 #endif #define VMBUS_EVTFLAG_LEN (1 << VMBUS_EVTFLAG_SHIFT) +#define VMBUS_EVTFLAG_MASK (VMBUS_EVTFLAG_LEN - 1) #define VMBUS_EVTFLAGS_SIZE 256 struct vmbus_evtflags { diff --git a/sys/dev/hyperv/vmbus/vmbus_var.h b/sys/dev/hyperv/vmbus/vmbus_var.h index 440f4d3..70ac5ee 100644 --- a/sys/dev/hyperv/vmbus/vmbus_var.h +++ b/sys/dev/hyperv/vmbus/vmbus_var.h @@ -51,7 +51,7 @@ struct vmbus_pcpu_data { struct vmbus_message *message; /* shared messages */ uint32_t vcpuid; /* virtual cpuid */ int event_flags_cnt;/* # of event flags */ - struct vmbus_evtflags *event_flags; /* shared event flags */ + struct vmbus_evtflags *event_flags; /* event flags from host */ /* Rarely used fields */ struct hyperv_dma message_dma; /* busdma glue */ @@ -63,12 +63,26 @@ struct vmbus_pcpu_data { struct vmbus_softc { void (*vmbus_event_proc)(struct vmbus_softc *, int); + u_long *vmbus_tx_evtflags; + /* event flags to host */ + void *vmbus_mnf2; /* monitored by host */ + + u_long *vmbus_rx_evtflags; + /* compat evtflgs from host */ struct vmbus_pcpu_data vmbus_pcpu[MAXCPU]; /* Rarely used fields */ device_t vmbus_dev; int vmbus_idtvec; uint32_t vmbus_flags; /* see VMBUS_FLAG_ */ + + /* Shared memory for vmbus_{rx,tx}_evtflags */ + void *vmbus_evtflags; + struct hyperv_dma vmbus_evtflags_dma; + + void *vmbus_mnf1; /* monitored by VM, unused */ + struct hyperv_dma vmbus_mnf1_dma; + struct hyperv_dma vmbus_mnf2_dma; }; #define VMBUS_FLAG_ATTACHED 0x0001 /* vmbus was attached */ diff --git a/sys/dev/ioat/ioat.c b/sys/dev/ioat/ioat.c index abdfe61..1a10c25 100644 --- a/sys/dev/ioat/ioat.c +++ b/sys/dev/ioat/ioat.c @@ -27,6 +27,8 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include "opt_ddb.h" + #include <sys/param.h> #include <sys/systm.h> #include <sys/bus.h> @@ -48,6 +50,10 @@ __FBSDID("$FreeBSD$"); #include <machine/resource.h> #include <machine/stdarg.h> +#ifdef DDB +#include <ddb/ddb.h> +#endif + #include "ioat.h" #include "ioat_hw.h" #include "ioat_internal.h" @@ -2090,3 +2096,96 @@ ioat_drain_locked(struct ioat_softc *ioat) while (ioat->refcnt > 0) msleep(IOAT_REFLK, IOAT_REFLK, 0, "ioat_drain", 0); } + +#ifdef DDB +#define _db_show_lock(lo) LOCK_CLASS(lo)->lc_ddb_show(lo) +#define db_show_lock(lk) _db_show_lock(&(lk)->lock_object) +DB_SHOW_COMMAND(ioat, db_show_ioat) +{ + struct ioat_softc *sc; + unsigned idx; + + if (!have_addr) + goto usage; + idx = (unsigned)addr; + if (addr >= ioat_channel_index) + goto usage; + + sc = ioat_channel[idx]; + db_printf("ioat softc at %p\n", sc); + if (sc == NULL) + return; + + db_printf(" version: %d\n", sc->version); + db_printf(" chan_idx: %u\n", sc->chan_idx); + db_printf(" submit_lock: "); + db_show_lock(&sc->submit_lock); + + db_printf(" capabilities: %b\n", (int)sc->capabilities, + IOAT_DMACAP_STR); + db_printf(" cached_intrdelay: %u\n", sc->cached_intrdelay); + db_printf(" *comp_update: 0x%jx\n", (uintmax_t)*sc->comp_update); + + db_printf(" timer:\n"); + db_printf(" c_time: %ju\n", (uintmax_t)sc->timer.c_time); + db_printf(" c_arg: %p\n", sc->timer.c_arg); + db_printf(" c_func: %p\n", sc->timer.c_func); + db_printf(" c_lock: %p\n", sc->timer.c_lock); + db_printf(" c_flags: 0x%x\n", (unsigned)sc->timer.c_flags); + + db_printf(" quiescing: %d\n", (int)sc->quiescing); + db_printf(" destroying: %d\n", (int)sc->destroying); + db_printf(" is_resize_pending: %d\n", (int)sc->is_resize_pending); + db_printf(" is_completion_pending: %d\n", (int)sc->is_completion_pending); + db_printf(" is_reset_pending: %d\n", (int)sc->is_reset_pending); + db_printf(" is_channel_running: %d\n", (int)sc->is_channel_running); + db_printf(" intrdelay_supported: %d\n", (int)sc->intrdelay_supported); + + db_printf(" head: %u\n", sc->head); + db_printf(" tail: %u\n", sc->tail); + db_printf(" hw_head: %u\n", sc->hw_head); + db_printf(" ring_size_order: %u\n", sc->ring_size_order); + db_printf(" last_seen: 0x%lx\n", sc->last_seen); + db_printf(" ring: %p\n", sc->ring); + + db_printf(" cleanup_lock: "); + db_show_lock(&sc->cleanup_lock); + + db_printf(" refcnt: %u\n", sc->refcnt); +#ifdef INVARIANTS + CTASSERT(IOAT_NUM_REF_KINDS == 2); + db_printf(" refkinds: [ENG=%u, DESCR=%u]\n", sc->refkinds[0], + sc->refkinds[1]); +#endif + db_printf(" stats:\n"); + db_printf(" interrupts: %lu\n", sc->stats.interrupts); + db_printf(" descriptors_processed: %lu\n", sc->stats.descriptors_processed); + db_printf(" descriptors_error: %lu\n", sc->stats.descriptors_error); + db_printf(" descriptors_submitted: %lu\n", sc->stats.descriptors_submitted); + + db_printf(" channel_halts: %u\n", sc->stats.channel_halts); + db_printf(" last_halt_chanerr: %u\n", sc->stats.last_halt_chanerr); + + if (db_pager_quit) + return; + + db_printf(" hw status:\n"); + db_printf(" status: 0x%lx\n", ioat_get_chansts(sc)); + db_printf(" chanctrl: 0x%x\n", + (unsigned)ioat_read_2(sc, IOAT_CHANCTRL_OFFSET)); + db_printf(" chancmd: 0x%x\n", + (unsigned)ioat_read_1(sc, IOAT_CHANCMD_OFFSET)); + db_printf(" dmacount: 0x%x\n", + (unsigned)ioat_read_2(sc, IOAT_DMACOUNT_OFFSET)); + db_printf(" chainaddr: 0x%lx\n", + ioat_read_double_4(sc, IOAT_CHAINADDR_OFFSET_LOW)); + db_printf(" chancmp: 0x%lx\n", + ioat_read_double_4(sc, IOAT_CHANCMP_OFFSET_LOW)); + db_printf(" chanerr: %b\n", + (int)ioat_read_4(sc, IOAT_CHANERR_OFFSET), IOAT_CHANERR_STR); + return; +usage: + db_printf("usage: show ioat <0-%u>\n", ioat_channel_index); + return; +} +#endif /* DDB */ diff --git a/sys/dev/iscsi/iscsi_ioctl.h b/sys/dev/iscsi/iscsi_ioctl.h index 21da22e..a494cd5 100644 --- a/sys/dev/iscsi/iscsi_ioctl.h +++ b/sys/dev/iscsi/iscsi_ioctl.h @@ -68,7 +68,7 @@ struct iscsi_session_conf { int isc_iser; char isc_offload[ISCSI_OFFLOAD_LEN]; int isc_enable; - int isc_spare[1]; + int isc_spare[4]; }; /* @@ -77,6 +77,7 @@ struct iscsi_session_conf { */ struct iscsi_session_limits { size_t isl_max_data_segment_length; + int isl_spare[8]; }; /* @@ -89,12 +90,13 @@ struct iscsi_session_state { int iss_header_digest; int iss_data_digest; int iss_max_data_segment_length; + int iss_max_burst_length; + int iss_first_burst_length; int iss_immediate_data; int iss_connected; char iss_reason[ISCSI_REASON_LEN]; char iss_offload[ISCSI_OFFLOAD_LEN]; - int iss_max_burst_length; - int iss_first_burst_length; + int iss_spare[4]; }; /* @@ -107,7 +109,7 @@ struct iscsi_daemon_request { uint16_t idr_tsih; uint16_t idr_spare_cid; struct iscsi_session_limits idr_limits; - int idr_spare[2]; + int idr_spare[4]; }; struct iscsi_daemon_handoff { @@ -120,11 +122,11 @@ struct iscsi_daemon_handoff { uint32_t idh_statsn; int idh_header_digest; int idh_data_digest; - int idh_initial_r2t; - int idh_immediate_data; size_t idh_max_data_segment_length; size_t idh_max_burst_length; size_t idh_first_burst_length; + int idh_immediate_data; + int idh_initial_r2t; int idh_spare[4]; }; @@ -156,7 +158,7 @@ struct iscsi_daemon_fail { */ struct iscsi_daemon_connect { - int idc_session_id; + unsigned int idc_session_id; int idc_iser; int idc_domain; int idc_socktype; @@ -169,7 +171,7 @@ struct iscsi_daemon_connect { }; struct iscsi_daemon_send { - int ids_session_id; + unsigned int ids_session_id; void *ids_bhs; size_t ids_spare; void *ids_spare2; @@ -179,7 +181,7 @@ struct iscsi_daemon_send { }; struct iscsi_daemon_receive { - int idr_session_id; + unsigned int idr_session_id; void *idr_bhs; size_t idr_spare; void *idr_spare2; diff --git a/sys/dev/iwm/if_iwm.c b/sys/dev/iwm/if_iwm.c index b60d053..5821555 100644 --- a/sys/dev/iwm/if_iwm.c +++ b/sys/dev/iwm/if_iwm.c @@ -866,10 +866,28 @@ iwm_alloc_rx_ring(struct iwm_softc *sc, struct iwm_rx_ring *ring) goto fail; } + /* Allocate spare bus_dmamap_t for iwm_rx_addbuf() */ + error = bus_dmamap_create(ring->data_dmat, 0, &ring->spare_map); + if (error != 0) { + device_printf(sc->sc_dev, + "%s: could not create RX buf DMA map, error %d\n", + __func__, error); + goto fail; + } /* * Allocate and map RX buffers. */ for (i = 0; i < IWM_RX_RING_COUNT; i++) { + struct iwm_rx_data *data = &ring->data[i]; + error = bus_dmamap_create(ring->data_dmat, 0, &data->map); + if (error != 0) { + device_printf(sc->sc_dev, + "%s: could not create RX buf DMA map, error %d\n", + __func__, error); + goto fail; + } + data->m = NULL; + if ((error = iwm_rx_addbuf(sc, IWM_RBUF_SIZE, i)) != 0) { goto fail; } @@ -923,6 +941,10 @@ iwm_free_rx_ring(struct iwm_softc *sc, struct iwm_rx_ring *ring) data->map = NULL; } } + if (ring->spare_map != NULL) { + bus_dmamap_destroy(ring->data_dmat, ring->spare_map); + ring->spare_map = NULL; + } if (ring->data_dmat != NULL) { bus_dma_tag_destroy(ring->data_dmat); ring->data_dmat = NULL; @@ -2119,6 +2141,7 @@ iwm_rx_addbuf(struct iwm_softc *sc, int size, int idx) struct iwm_rx_ring *ring = &sc->rxq; struct iwm_rx_data *data = &ring->data[idx]; struct mbuf *m; + bus_dmamap_t dmamap = NULL; int error; bus_addr_t paddr; @@ -2126,28 +2149,26 @@ iwm_rx_addbuf(struct iwm_softc *sc, int size, int idx) if (m == NULL) return ENOBUFS; - if (data->m != NULL) - bus_dmamap_unload(ring->data_dmat, data->map); - m->m_len = m->m_pkthdr.len = m->m_ext.ext_size; - error = bus_dmamap_create(ring->data_dmat, 0, &data->map); - if (error != 0) { - device_printf(sc->sc_dev, - "%s: could not create RX buf DMA map, error %d\n", - __func__, error); - goto fail; - } - data->m = m; - error = bus_dmamap_load(ring->data_dmat, data->map, - mtod(data->m, void *), IWM_RBUF_SIZE, iwm_dma_map_addr, + error = bus_dmamap_load(ring->data_dmat, ring->spare_map, + mtod(m, void *), IWM_RBUF_SIZE, iwm_dma_map_addr, &paddr, BUS_DMA_NOWAIT); if (error != 0 && error != EFBIG) { device_printf(sc->sc_dev, - "%s: can't not map mbuf, error %d\n", __func__, - error); + "%s: can't map mbuf, error %d\n", __func__, error); goto fail; } + + if (data->m != NULL) + bus_dmamap_unload(ring->data_dmat, data->map); + + /* Swap ring->spare_map with data->map */ + dmamap = data->map; + data->map = ring->spare_map; + ring->spare_map = dmamap; + bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_PREREAD); + data->m = m; /* Update RX descriptor. */ ring->desc[idx] = htole32(paddr >> 8); @@ -2156,6 +2177,7 @@ iwm_rx_addbuf(struct iwm_softc *sc, int size, int idx) return 0; fail: + m_free(m); return error; } diff --git a/sys/dev/iwm/if_iwmvar.h b/sys/dev/iwm/if_iwmvar.h index ef2eac4..e923aa4 100644 --- a/sys/dev/iwm/if_iwmvar.h +++ b/sys/dev/iwm/if_iwmvar.h @@ -289,6 +289,7 @@ struct iwm_rx_ring { uint32_t *desc; struct iwm_rb_status *stat; struct iwm_rx_data data[IWM_RX_RING_COUNT]; + bus_dmamap_t spare_map; /* for iwm_rx_addbuf() */ bus_dma_tag_t data_dmat; int cur; }; diff --git a/sys/dev/ixgbe/ix_txrx.c b/sys/dev/ixgbe/ix_txrx.c index 2f5663f..61c2ef0 100644 --- a/sys/dev/ixgbe/ix_txrx.c +++ b/sys/dev/ixgbe/ix_txrx.c @@ -1964,7 +1964,7 @@ ixgbe_rxeof(struct ix_queue *que) #endif default: M_HASHTYPE_SET(sendmp, - M_HASHTYPE_OPAQUE); + M_HASHTYPE_OPAQUE_HASH); } } else { sendmp->m_pkthdr.flowid = que->msix; diff --git a/sys/dev/ixl/ixl_txrx.c b/sys/dev/ixl/ixl_txrx.c index fc57f4e..d3aa7bf 100644 --- a/sys/dev/ixl/ixl_txrx.c +++ b/sys/dev/ixl/ixl_txrx.c @@ -1453,10 +1453,10 @@ ixl_ptype_to_hash(u8 ptype) ex = decoded.outer_frag; if (!decoded.known) - return M_HASHTYPE_OPAQUE; + return M_HASHTYPE_OPAQUE_HASH; if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) - return M_HASHTYPE_OPAQUE; + return M_HASHTYPE_OPAQUE_HASH; /* Note: anything that gets to this point is IP */ if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { @@ -1492,7 +1492,7 @@ ixl_ptype_to_hash(u8 ptype) } } /* We should never get here!! */ - return M_HASHTYPE_OPAQUE; + return M_HASHTYPE_OPAQUE_HASH; } #endif /* RSS */ diff --git a/sys/dev/mii/miidevs b/sys/dev/mii/miidevs index e2c408f..ef6550d 100644 --- a/sys/dev/mii/miidevs +++ b/sys/dev/mii/miidevs @@ -343,3 +343,4 @@ model xxXAQTI XMACII 0x0000 XaQti Corp. XMAC II gigabit interface /* SMC */ model SMC LAN8710A 0x000F SMC LAN8710A 10/100 interface +model SMC LAN8700 0x000C SMC LAN8700 10/100 interface diff --git a/sys/dev/mii/smscphy.c b/sys/dev/mii/smscphy.c index 1d6ce61..43b7087 100644 --- a/sys/dev/mii/smscphy.c +++ b/sys/dev/mii/smscphy.c @@ -77,6 +77,7 @@ DRIVER_MODULE(smscphy, miibus, smscphy_driver, smscphy_devclass, 0, 0); static const struct mii_phydesc smscphys[] = { MII_PHY_DESC(SMC, LAN8710A), + MII_PHY_DESC(SMC, LAN8700), MII_PHY_END }; diff --git a/sys/dev/mlx5/driver.h b/sys/dev/mlx5/driver.h index 8136e57..7f64850 100644 --- a/sys/dev/mlx5/driver.h +++ b/sys/dev/mlx5/driver.h @@ -33,6 +33,7 @@ #include <linux/pci.h> #include <linux/cache.h> #include <linux/rbtree.h> +#include <linux/if_ether.h> #include <linux/semaphore.h> #include <linux/slab.h> #include <linux/vmalloc.h> diff --git a/sys/dev/mlx5/mlx5_core/mlx5_vport.c b/sys/dev/mlx5/mlx5_core/mlx5_vport.c index 5c8626b..6ef5019 100644 --- a/sys/dev/mlx5/mlx5_core/mlx5_vport.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_vport.c @@ -471,6 +471,241 @@ int mlx5_set_nic_vport_promisc(struct mlx5_core_dev *mdev, int vport, return mlx5_modify_nic_vport_context(mdev, in, sizeof(in)); } EXPORT_SYMBOL_GPL(mlx5_set_nic_vport_promisc); + +int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, + u32 vport, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int *list_size) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + void *nic_vport_ctx; + int max_list_size; + int req_list_size; + u8 *mac_addr; + int out_sz; + void *out; + int err; + int i; + + req_list_size = *list_size; + + max_list_size = (list_type == MLX5_NIC_VPORT_LIST_TYPE_UC) ? + 1 << MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN_MAX(dev, log_max_current_mc_list); + + if (req_list_size > max_list_size) { + mlx5_core_warn(dev, "Requested list size (%d) > (%d) max_list_size\n", + req_list_size, max_list_size); + req_list_size = max_list_size; + } + + out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz); + if (err) + goto out; + + nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context); + req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, + allowed_list_size); + + *list_size = req_list_size; + for (i = 0; i < req_list_size; i++) { + mac_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(addr_list[i], mac_addr); + } +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_list); + +int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = list_type == MLX5_NIC_VPORT_LIST_TYPE_UC ? + 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + + memset(out, 0, sizeof(out)); + in = kzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, list_type); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + u8 *curr_mac = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(curr_mac, addr_list[i]); + } + + err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out)); + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list); + +int mlx5_query_nic_vport_vlan_list(struct mlx5_core_dev *dev, + u32 vport, + u16 *vlan_list, + int *list_size) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + void *nic_vport_ctx; + int max_list_size; + int req_list_size; + int out_sz; + void *out; + void *vlan_addr; + int err; + int i; + + req_list_size = *list_size; + + max_list_size = 1 << MLX5_CAP_GEN_MAX(dev, log_max_vlan_list); + + if (req_list_size > max_list_size) { + mlx5_core_warn(dev, "Requested list size (%d) > (%d) max_list_size\n", + req_list_size, max_list_size); + req_list_size = max_list_size; + } + + out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + req_list_size * MLX5_ST_SZ_BYTES(vlan_layout); + + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, + MLX5_NIC_VPORT_CONTEXT_ALLOWED_LIST_TYPE_VLAN_LIST); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz); + if (err) + goto out; + + nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context); + req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, + allowed_list_size); + + *list_size = req_list_size; + for (i = 0; i < req_list_size; i++) { + vlan_addr = MLX5_ADDR_OF(nic_vport_context, nic_vport_ctx, + current_uc_mac_address[i]); + vlan_list[i] = MLX5_GET(vlan_layout, vlan_addr, vlan); + } +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_vlan_list); + +int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, + u16 vlans[], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(vlan_layout); + + memset(out, 0, sizeof(out)); + in = kzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, MLX5_NIC_VPORT_LIST_TYPE_VLAN); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + void *vlan_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]); + MLX5_SET(vlan_layout, vlan_addr, vlan, vlans[i]); + } + + err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out)); + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans); + int mlx5_set_nic_vport_permanent_mac(struct mlx5_core_dev *mdev, int vport, u8 *addr) { @@ -785,6 +1020,129 @@ int mlx5_set_eswitch_cvlan_info(struct mlx5_core_dev *mdev, u8 vport, } EXPORT_SYMBOL_GPL(mlx5_set_eswitch_cvlan_info); +int mlx5_arm_vport_context_events(struct mlx5_core_dev *mdev, + u8 vport, + u32 events_mask) +{ + u32 *in; + u32 inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *nic_vport_ctx; + int err; + + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, + in, + opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, + in, + field_select.change_event, + 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1); + + if (events_mask & MLX5_UC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, + nic_vport_ctx, + event_on_uc_address_change, + 1); + if (events_mask & MLX5_MC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, + nic_vport_ctx, + event_on_mc_address_change, + 1); + if (events_mask & MLX5_VLAN_CHANGE) + MLX5_SET(nic_vport_context, + nic_vport_ctx, + event_on_vlan_change, + 1); + if (events_mask & MLX5_PROMISC_CHANGE) + MLX5_SET(nic_vport_context, + nic_vport_ctx, + event_on_promisc_change, + 1); + if (events_mask & MLX5_MTU_CHANGE) + MLX5_SET(nic_vport_context, + nic_vport_ctx, + event_on_mtu, + 1); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_arm_vport_context_events); + +int mlx5_query_vport_promisc(struct mlx5_core_dev *mdev, + u32 vport, + u8 *promisc_uc, + u8 *promisc_mc, + u8 *promisc_all) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, vport, out, outlen); + if (err) + goto out; + + *promisc_uc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_uc); + *promisc_mc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_mc); + *promisc_all = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_all); + +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_promisc); + +int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, + int promisc_uc, + int promisc_mc, + int promisc_all) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_err(mdev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(modify_nic_vport_context_in, in, field_select.promisc, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_uc, promisc_uc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_mc, promisc_mc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_all, promisc_all); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc); + int mlx5_query_vport_counter(struct mlx5_core_dev *dev, u8 port_num, u16 vport_num, void *out, int out_size) diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c index c618c92..a0dcd2d 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c @@ -390,6 +390,49 @@ add_eth_addr_rule_out: return (err); } +static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) +{ + struct ifnet *ifp = priv->ifp; + int max_list_size; + int list_size; + u16 *vlans; + int vlan; + int err; + int i; + + list_size = 0; + for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) + list_size++; + + max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list); + + if (list_size > max_list_size) { + if_printf(ifp, + "ifnet vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n", + list_size, max_list_size); + list_size = max_list_size; + } + + vlans = kcalloc(list_size, sizeof(*vlans), GFP_KERNEL); + if (!vlans) + return -ENOMEM; + + i = 0; + for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) { + if (i >= list_size) + break; + vlans[i++] = vlan; + } + + err = mlx5_modify_nic_vport_vlans(priv->mdev, vlans, list_size); + if (err) + if_printf(ifp, "Failed to modify vport vlans list err(%d)\n", + err); + + kfree(vlans); + return err; +} + enum mlx5e_vlan_rule_type { MLX5E_VLAN_RULE_TYPE_UNTAGGED, MLX5E_VLAN_RULE_TYPE_ANY_VID, @@ -448,6 +491,7 @@ mlx5e_add_vlan_rule(struct mlx5e_priv *priv, outer_headers.first_vid); MLX5_SET(fte_match_param, match_value, outer_headers.first_vid, vid); + mlx5e_vport_context_update_vlans(priv); break; } @@ -478,6 +522,7 @@ mlx5e_del_vlan_rule(struct mlx5e_priv *priv, case MLX5E_VLAN_RULE_TYPE_MATCH_VID: mlx5_del_flow_table_entry(priv->ft.vlan, priv->vlan.active_vlans_ft_ix[vid]); + mlx5e_vport_context_update_vlans(priv); break; } } @@ -628,6 +673,91 @@ mlx5e_sync_ifp_addr(struct mlx5e_priv *priv) if_maddr_runlock(ifp); } +static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type, + u8 addr_array[][ETH_ALEN], int size) +{ + bool is_uc = (list_type == MLX5_NIC_VPORT_LIST_TYPE_UC); + struct ifnet *ifp = priv->ifp; + struct mlx5e_eth_addr_hash_node *hn; + struct mlx5e_eth_addr_hash_head *addr_list; + struct mlx5e_eth_addr_hash_node *tmp; + int i = 0; + int hi; + + addr_list = is_uc ? priv->eth_addr.if_uc : priv->eth_addr.if_mc; + + if (is_uc) /* Make sure our own address is pushed first */ + ether_addr_copy(addr_array[i++], IF_LLADDR(ifp)); + else if (priv->eth_addr.broadcast_enabled) + ether_addr_copy(addr_array[i++], ifp->if_broadcastaddr); + + mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) { + if (ether_addr_equal(IF_LLADDR(ifp), hn->ai.addr)) + continue; + if (i >= size) + break; + ether_addr_copy(addr_array[i++], hn->ai.addr); + } +} + +static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv, + int list_type) +{ + bool is_uc = (list_type == MLX5_NIC_VPORT_LIST_TYPE_UC); + struct mlx5e_eth_addr_hash_node *hn; + u8 (*addr_array)[ETH_ALEN] = NULL; + struct mlx5e_eth_addr_hash_head *addr_list; + struct mlx5e_eth_addr_hash_node *tmp; + int max_size; + int size; + int err; + int hi; + + size = is_uc ? 0 : (priv->eth_addr.broadcast_enabled ? 1 : 0); + max_size = is_uc ? + 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_mc_list); + + addr_list = is_uc ? priv->eth_addr.if_uc : priv->eth_addr.if_mc; + mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) + size++; + + if (size > max_size) { + if_printf(priv->ifp, + "ifp %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n", + is_uc ? "UC" : "MC", size, max_size); + size = max_size; + } + + if (size) { + addr_array = kcalloc(size, ETH_ALEN, GFP_KERNEL); + if (!addr_array) { + err = -ENOMEM; + goto out; + } + mlx5e_fill_addr_array(priv, list_type, addr_array, size); + } + + err = mlx5_modify_nic_vport_mac_list(priv->mdev, list_type, addr_array, size); +out: + if (err) + if_printf(priv->ifp, + "Failed to modify vport %s list err(%d)\n", + is_uc ? "UC" : "MC", err); + kfree(addr_array); +} + +static void mlx5e_vport_context_update(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_db *ea = &priv->eth_addr; + + mlx5e_vport_context_update_addr_list(priv, MLX5_NIC_VPORT_LIST_TYPE_UC); + mlx5e_vport_context_update_addr_list(priv, MLX5_NIC_VPORT_LIST_TYPE_MC); + mlx5_modify_nic_vport_promisc(priv->mdev, 0, + ea->allmulti_enabled, + ea->promisc_enabled); +} + static void mlx5e_apply_ifp_addr(struct mlx5e_priv *priv) { @@ -701,6 +831,8 @@ mlx5e_set_rx_mode_core(struct mlx5e_priv *priv) ea->promisc_enabled = promisc_enabled; ea->allmulti_enabled = allmulti_enabled; ea->broadcast_enabled = broadcast_enabled; + + mlx5e_vport_context_update(priv); } void diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c index d71cbb3..4a8029b 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c @@ -3001,6 +3001,13 @@ mlx5e_create_ifp(struct mlx5_core_dev *mdev) } mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr); + /* check if we should generate a random MAC address */ + if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 && + is_zero_ether_addr(dev_addr)) { + random_ether_addr(dev_addr); + if_printf(ifp, "Assigned random MAC address\n"); + } + /* set default MTU */ mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu); diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c index 230dfcc..6bc2c17 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c @@ -222,11 +222,11 @@ mlx5e_build_rx_mbuf(struct mlx5_cqe64 *cqe, M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_IPV6); break; default: /* Other */ - M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE); + M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE_HASH); break; } #else - M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE); + M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE_HASH); #endif } else { mb->m_pkthdr.flowid = rq->ix; diff --git a/sys/dev/mlx5/vport.h b/sys/dev/mlx5/vport.h index c5948e7..cf52785 100644 --- a/sys/dev/mlx5/vport.h +++ b/sys/dev/mlx5/vport.h @@ -38,6 +38,18 @@ int mlx5_vport_query_out_of_rx_buffer(struct mlx5_core_dev *mdev, u32 *out_of_rx_buffer); u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod); +int mlx5_arm_vport_context_events(struct mlx5_core_dev *mdev, + u8 vport, + u32 events_mask); +int mlx5_query_vport_promisc(struct mlx5_core_dev *mdev, + u32 vport, + u8 *promisc_uc, + u8 *promisc_mc, + u8 *promisc_all); +int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, + int promisc_uc, + int promisc_mc, + int promisc_all); int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u32 vport, u8 *addr); int mlx5_set_nic_vport_current_mac(struct mlx5_core_dev *mdev, int vport, @@ -49,6 +61,22 @@ int mlx5_set_nic_vport_mc_list(struct mlx5_core_dev *mdev, int vport, int mlx5_set_nic_vport_promisc(struct mlx5_core_dev *mdev, int vport, bool promisc_mc, bool promisc_uc, bool promisc_all); +int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, + u32 vport, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int *list_size); +int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int list_size); +int mlx5_query_nic_vport_vlan_list(struct mlx5_core_dev *dev, + u32 vport, + u16 *vlan_list, + int *list_size); +int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, + u16 vlans[], + int list_size); int mlx5_set_nic_vport_permanent_mac(struct mlx5_core_dev *mdev, int vport, u8 *addr); int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev); diff --git a/sys/dev/nvme/nvme.h b/sys/dev/nvme/nvme.h index 7c65fbb..9709022 100644 --- a/sys/dev/nvme/nvme.h +++ b/sys/dev/nvme/nvme.h @@ -47,7 +47,8 @@ */ #define NVME_GLOBAL_NAMESPACE_TAG ((uint32_t)0xFFFFFFFF) -#define NVME_MAX_XFER_SIZE MAXPHYS +/* Cap nvme to 1MB transfers driver explodes with larger sizes */ +#define NVME_MAX_XFER_SIZE (MAXPHYS < (1<<20) ? MAXPHYS : (1<<20)) union cap_lo_register { uint32_t raw; @@ -903,6 +904,52 @@ uint32_t nvme_ns_get_stripesize(struct nvme_namespace *ns); int nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp, nvme_cb_fn_t cb_fn); +/* Command building helper functions -- shared with CAM */ +static inline +void nvme_ns_flush_cmd(struct nvme_command *cmd, uint16_t nsid) +{ + + cmd->opc = NVME_OPC_FLUSH; + cmd->nsid = nsid; +} + +static inline +void nvme_ns_rw_cmd(struct nvme_command *cmd, uint32_t rwcmd, uint16_t nsid, + uint64_t lba, uint32_t count) +{ + cmd->opc = rwcmd; + cmd->nsid = nsid; + *(uint64_t *)&cmd->cdw10 = lba; + cmd->cdw12 = count-1; + cmd->cdw13 = 0; + cmd->cdw14 = 0; + cmd->cdw15 = 0; +} + +static inline +void nvme_ns_write_cmd(struct nvme_command *cmd, uint16_t nsid, + uint64_t lba, uint32_t count) +{ + nvme_ns_rw_cmd(cmd, NVME_OPC_WRITE, nsid, lba, count); +} + +static inline +void nvme_ns_read_cmd(struct nvme_command *cmd, uint16_t nsid, + uint64_t lba, uint32_t count) +{ + nvme_ns_rw_cmd(cmd, NVME_OPC_READ, nsid, lba, count); +} + +static inline +void nvme_ns_trim_cmd(struct nvme_command *cmd, uint16_t nsid, + uint32_t num_ranges) +{ + cmd->opc = NVME_OPC_DATASET_MANAGEMENT; + cmd->nsid = nsid; + cmd->cdw10 = num_ranges - 1; + cmd->cdw11 = NVME_DSM_ATTR_DEALLOCATE; +} + #endif /* _KERNEL */ #endif /* __NVME_H__ */ diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c index 00277f0..fa889eb 100644 --- a/sys/dev/nvme/nvme_ctrlr.c +++ b/sys/dev/nvme/nvme_ctrlr.c @@ -27,6 +27,8 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include "opt_cam.h" + #include <sys/param.h> #include <sys/systm.h> #include <sys/buf.h> @@ -801,7 +803,7 @@ nvme_ctrlr_reset_task(void *arg, int pending) atomic_cmpset_32(&ctrlr->is_resetting, 1, 0); } -static void +void nvme_ctrlr_intx_handler(void *arg) { struct nvme_controller *ctrlr = arg; diff --git a/sys/dev/nvme/nvme_ns_cmd.c b/sys/dev/nvme/nvme_ns_cmd.c index b5c45dc..303e03c 100644 --- a/sys/dev/nvme/nvme_ns_cmd.c +++ b/sys/dev/nvme/nvme_ns_cmd.c @@ -34,20 +34,14 @@ nvme_ns_cmd_read(struct nvme_namespace *ns, void *payload, uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn, void *cb_arg) { struct nvme_request *req; - struct nvme_command *cmd; req = nvme_allocate_request_vaddr(payload, lba_count*nvme_ns_get_sector_size(ns), cb_fn, cb_arg); if (req == NULL) return (ENOMEM); - cmd = &req->cmd; - cmd->opc = NVME_OPC_READ; - cmd->nsid = ns->id; - /* TODO: create a read command data structure */ - *(uint64_t *)&cmd->cdw10 = lba; - cmd->cdw12 = lba_count-1; + nvme_ns_read_cmd(&req->cmd, ns->id, lba, lba_count); nvme_ctrlr_submit_io_request(ns->ctrlr, req); @@ -59,7 +53,6 @@ nvme_ns_cmd_read_bio(struct nvme_namespace *ns, struct bio *bp, nvme_cb_fn_t cb_fn, void *cb_arg) { struct nvme_request *req; - struct nvme_command *cmd; uint64_t lba; uint64_t lba_count; @@ -67,16 +60,10 @@ nvme_ns_cmd_read_bio(struct nvme_namespace *ns, struct bio *bp, if (req == NULL) return (ENOMEM); - cmd = &req->cmd; - cmd->opc = NVME_OPC_READ; - cmd->nsid = ns->id; lba = bp->bio_offset / nvme_ns_get_sector_size(ns); lba_count = bp->bio_bcount / nvme_ns_get_sector_size(ns); - - /* TODO: create a read command data structure */ - *(uint64_t *)&cmd->cdw10 = lba; - cmd->cdw12 = lba_count-1; + nvme_ns_read_cmd(&req->cmd, ns->id, lba, lba_count); nvme_ctrlr_submit_io_request(ns->ctrlr, req); @@ -88,7 +75,6 @@ nvme_ns_cmd_write(struct nvme_namespace *ns, void *payload, uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn, void *cb_arg) { struct nvme_request *req; - struct nvme_command *cmd; req = nvme_allocate_request_vaddr(payload, lba_count*nvme_ns_get_sector_size(ns), cb_fn, cb_arg); @@ -96,13 +82,7 @@ nvme_ns_cmd_write(struct nvme_namespace *ns, void *payload, uint64_t lba, if (req == NULL) return (ENOMEM); - cmd = &req->cmd; - cmd->opc = NVME_OPC_WRITE; - cmd->nsid = ns->id; - - /* TODO: create a write command data structure */ - *(uint64_t *)&cmd->cdw10 = lba; - cmd->cdw12 = lba_count-1; + nvme_ns_write_cmd(&req->cmd, ns->id, lba, lba_count); nvme_ctrlr_submit_io_request(ns->ctrlr, req); @@ -114,7 +94,6 @@ nvme_ns_cmd_write_bio(struct nvme_namespace *ns, struct bio *bp, nvme_cb_fn_t cb_fn, void *cb_arg) { struct nvme_request *req; - struct nvme_command *cmd; uint64_t lba; uint64_t lba_count; @@ -122,16 +101,9 @@ nvme_ns_cmd_write_bio(struct nvme_namespace *ns, struct bio *bp, if (req == NULL) return (ENOMEM); - cmd = &req->cmd; - cmd->opc = NVME_OPC_WRITE; - cmd->nsid = ns->id; - lba = bp->bio_offset / nvme_ns_get_sector_size(ns); lba_count = bp->bio_bcount / nvme_ns_get_sector_size(ns); - - /* TODO: create a write command data structure */ - *(uint64_t *)&cmd->cdw10 = lba; - cmd->cdw12 = lba_count-1; + nvme_ns_write_cmd(&req->cmd, ns->id, lba, lba_count); nvme_ctrlr_submit_io_request(ns->ctrlr, req); @@ -168,17 +140,13 @@ int nvme_ns_cmd_flush(struct nvme_namespace *ns, nvme_cb_fn_t cb_fn, void *cb_arg) { struct nvme_request *req; - struct nvme_command *cmd; req = nvme_allocate_request_null(cb_fn, cb_arg); if (req == NULL) return (ENOMEM); - cmd = &req->cmd; - cmd->opc = NVME_OPC_FLUSH; - cmd->nsid = ns->id; - + nvme_ns_flush_cmd(&req->cmd, ns->id); nvme_ctrlr_submit_io_request(ns->ctrlr, req); return (0); diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h index 3330711..aa02b45 100644 --- a/sys/dev/nvme/nvme_private.h +++ b/sys/dev/nvme/nvme_private.h @@ -245,6 +245,10 @@ struct nvme_controller { struct mtx lock; + struct cam_sim *sim; + struct cam_path *path; + int cam_ref; + uint32_t ready_timeout_in_ms; bus_space_tag_t bus_tag; @@ -528,4 +532,6 @@ void nvme_notify_async_consumers(struct nvme_controller *ctrlr, void nvme_notify_fail_consumers(struct nvme_controller *ctrlr); void nvme_notify_new_controller(struct nvme_controller *ctrlr); +void nvme_ctrlr_intx_handler(void *arg); + #endif /* __NVME_PRIVATE_H__ */ diff --git a/sys/dev/qlxgbe/ql_isr.c b/sys/dev/qlxgbe/ql_isr.c index c94a068..766e3cb 100644 --- a/sys/dev/qlxgbe/ql_isr.c +++ b/sys/dev/qlxgbe/ql_isr.c @@ -159,7 +159,7 @@ qla_rx_intr(qla_host_t *ha, qla_sgl_rcv_t *sgc, uint32_t sds_idx) if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); mpf->m_pkthdr.flowid = sgc->rss_hash; - M_HASHTYPE_SET(mpf, M_HASHTYPE_OPAQUE); + M_HASHTYPE_SET(mpf, M_HASHTYPE_OPAQUE_HASH); (*ifp->if_input)(ifp, mpf); @@ -324,7 +324,7 @@ qla_lro_intr(qla_host_t *ha, qla_sgl_lro_t *sgc, uint32_t sds_idx) mpf->m_pkthdr.csum_data = 0xFFFF; mpf->m_pkthdr.flowid = sgc->rss_hash; - M_HASHTYPE_SET(mpf, M_HASHTYPE_OPAQUE); + M_HASHTYPE_SET(mpf, M_HASHTYPE_OPAQUE_HASH); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); diff --git a/sys/dev/qlxge/qls_isr.c b/sys/dev/qlxge/qls_isr.c index 1eb833e..9b571f5 100644 --- a/sys/dev/qlxge/qls_isr.c +++ b/sys/dev/qlxge/qls_isr.c @@ -190,7 +190,7 @@ qls_rx_comp(qla_host_t *ha, uint32_t rxr_idx, uint32_t cq_idx, q81_rx_t *cq_e) if ((cq_e->flags1 & Q81_RX_FLAGS1_RSS_MATCH_MASK)) { rxr->rss_int++; mp->m_pkthdr.flowid = cq_e->rss; - M_HASHTYPE_SET(mp, M_HASHTYPE_OPAQUE); + M_HASHTYPE_SET(mp, M_HASHTYPE_OPAQUE_HASH); } if (cq_e->flags0 & (Q81_RX_FLAGS0_TE | Q81_RX_FLAGS0_NU | Q81_RX_FLAGS0_IE)) { diff --git a/sys/dev/ral/rt2860.c b/sys/dev/ral/rt2860.c index 79989fc..1b15a0f 100644 --- a/sys/dev/ral/rt2860.c +++ b/sys/dev/ral/rt2860.c @@ -900,7 +900,7 @@ rt2860_ampdu_rx_stop(struct ieee80211com *ic, struct ieee80211_node *ni, } #endif -int +static int rt2860_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct rt2860_vap *rvp = RT2860_VAP(vap); @@ -3824,9 +3824,16 @@ rt2860_init_locked(struct rt2860_softc *sc) /* disable DMA */ tmp = RAL_READ(sc, RT2860_WPDMA_GLO_CFG); - tmp &= 0xff0; + tmp &= ~(RT2860_RX_DMA_BUSY | RT2860_RX_DMA_EN | RT2860_TX_DMA_BUSY | + RT2860_TX_DMA_EN); + tmp |= RT2860_TX_WB_DDONE; RAL_WRITE(sc, RT2860_WPDMA_GLO_CFG, tmp); + /* reset DMA indexes */ + RAL_WRITE(sc, RT2860_WPDMA_RST_IDX, RT2860_RST_DRX_IDX0 | + RT2860_RST_DTX_IDX5 | RT2860_RST_DTX_IDX4 | RT2860_RST_DTX_IDX3 | + RT2860_RST_DTX_IDX2 | RT2860_RST_DTX_IDX1 | RT2860_RST_DTX_IDX0); + /* PBF hardware reset */ RAL_WRITE(sc, RT2860_SYS_CTRL, 0xe1f); RAL_BARRIER_WRITE(sc); @@ -3858,7 +3865,9 @@ rt2860_init_locked(struct rt2860_softc *sc) rt2860_stop_locked(sc); return; } - tmp &= 0xff0; + tmp &= ~(RT2860_RX_DMA_BUSY | RT2860_RX_DMA_EN | RT2860_TX_DMA_BUSY | + RT2860_TX_DMA_EN); + tmp |= RT2860_TX_WB_DDONE; RAL_WRITE(sc, RT2860_WPDMA_GLO_CFG, tmp); /* reset Rx ring and all 6 Tx rings */ @@ -3958,7 +3967,9 @@ rt2860_init_locked(struct rt2860_softc *sc) rt2860_stop_locked(sc); return; } - tmp &= 0xff0; + tmp &= ~(RT2860_RX_DMA_BUSY | RT2860_RX_DMA_EN | RT2860_TX_DMA_BUSY | + RT2860_TX_DMA_EN); + tmp |= RT2860_TX_WB_DDONE; RAL_WRITE(sc, RT2860_WPDMA_GLO_CFG, tmp); /* disable interrupts mitigation */ diff --git a/sys/dev/ral/rt2860reg.h b/sys/dev/ral/rt2860reg.h index 5bd7dbb..f79874e 100644 --- a/sys/dev/ral/rt2860reg.h +++ b/sys/dev/ral/rt2860reg.h @@ -257,6 +257,15 @@ #define RT2860_TX_DMA_BUSY (1 << 1) #define RT2860_TX_DMA_EN (1 << 0) +/* flags for register WPDMA_RST_IDX */ +#define RT2860_RST_DRX_IDX0 (1 << 16) +#define RT2860_RST_DTX_IDX5 (1 << 5) +#define RT2860_RST_DTX_IDX4 (1 << 4) +#define RT2860_RST_DTX_IDX3 (1 << 3) +#define RT2860_RST_DTX_IDX2 (1 << 2) +#define RT2860_RST_DTX_IDX1 (1 << 1) +#define RT2860_RST_DTX_IDX0 (1 << 0) + /* possible flags for register DELAY_INT_CFG */ #define RT2860_TXDLY_INT_EN (1U << 31) #define RT2860_TXMAX_PINT_SHIFT 24 @@ -1233,7 +1242,7 @@ static const struct rt2860_rate { { 20, 0xba }, \ { 21, 0xdb }, \ { 24, 0x16 }, \ - { 25, 0x01 }, \ + { 25, 0x03 }, \ { 29, 0x1f } #define RT5390_DEF_RF \ diff --git a/sys/dev/sfxge/common/ef10_nvram.c b/sys/dev/sfxge/common/ef10_nvram.c index 7f93df3..73459d6 100644 --- a/sys/dev/sfxge/common/ef10_nvram.c +++ b/sys/dev/sfxge/common/ef10_nvram.c @@ -1658,7 +1658,7 @@ ef10_nvram_partn_write_tlv( * Read a segment from nvram at the given offset into a buffer (segment_data) * and optionally write a new tag to it. */ - static __checkReturn efx_rc_t +static __checkReturn efx_rc_t ef10_nvram_segment_write_tlv( __in efx_nic_t *enp, __in uint32_t partn, @@ -1684,20 +1684,25 @@ ef10_nvram_segment_write_tlv( */ status = ef10_nvram_read_tlv_segment(enp, partn, *partn_offsetp, *seg_datap, *src_remain_lenp); - if (status != 0) - return (EINVAL); + if (status != 0) { + rc = EINVAL; + goto fail1; + } status = ef10_nvram_buf_segment_size(*seg_datap, *src_remain_lenp, &original_segment_size); - if (status != 0) - return (EINVAL); + if (status != 0) { + rc = EINVAL; + goto fail2; + } if (write) { /* Update the contents of the segment in the buffer */ if ((rc = ef10_nvram_buf_write_tlv(*seg_datap, *dest_remain_lenp, tag, data, size, - &modified_segment_size)) != 0) - goto fail1; + &modified_segment_size)) != 0) { + goto fail3; + } *dest_remain_lenp -= modified_segment_size; *seg_datap += modified_segment_size; } else { @@ -1714,6 +1719,10 @@ ef10_nvram_segment_write_tlv( return (0); +fail3: + EFSYS_PROBE(fail3); +fail2: + EFSYS_PROBE(fail2); fail1: EFSYS_PROBE1(fail1, efx_rc_t, rc); diff --git a/sys/dev/sfxge/sfxge_ev.c b/sys/dev/sfxge/sfxge_ev.c index 4fb8e0e..7bdcf22 100644 --- a/sys/dev/sfxge/sfxge_ev.c +++ b/sys/dev/sfxge/sfxge_ev.c @@ -453,7 +453,7 @@ sfxge_ev_stat_update(struct sfxge_softc *sc) goto out; now = ticks; - if (now - sc->ev_stats_update_time < hz) + if ((unsigned int)(now - sc->ev_stats_update_time) < (unsigned int)hz) goto out; sc->ev_stats_update_time = now; diff --git a/sys/dev/sfxge/sfxge_port.c b/sys/dev/sfxge/sfxge_port.c index c8b0334..2ebc521 100644 --- a/sys/dev/sfxge/sfxge_port.c +++ b/sys/dev/sfxge/sfxge_port.c @@ -62,7 +62,7 @@ sfxge_mac_stat_update(struct sfxge_softc *sc) } now = ticks; - if (now - port->mac_stats.update_time < hz) { + if ((unsigned int)(now - port->mac_stats.update_time) < (unsigned int)hz) { rc = 0; goto out; } @@ -570,7 +570,7 @@ sfxge_phy_stat_update(struct sfxge_softc *sc) } now = ticks; - if (now - port->phy_stats.update_time < hz) { + if ((unsigned int)(now - port->phy_stats.update_time) < (unsigned int)hz) { rc = 0; goto out; } diff --git a/sys/dev/sfxge/sfxge_tx.c b/sys/dev/sfxge/sfxge_tx.c index 08fc885..198b1e5 100644 --- a/sys/dev/sfxge/sfxge_tx.c +++ b/sys/dev/sfxge/sfxge_tx.c @@ -992,7 +992,7 @@ static void tso_start(struct sfxge_txq *txq, struct sfxge_tso_state *tso, tso->protocol = TSO_MBUF_PROTO(mbuf); tso->nh_off = mbuf->m_pkthdr.l2hlen; tso->tcph_off = mbuf->m_pkthdr.l3hlen; - tso->packet_id = TSO_MBUF_PACKETID(mbuf); + tso->packet_id = ntohs(TSO_MBUF_PACKETID(mbuf)); #endif #if !SFXGE_TX_PARSE_EARLY @@ -1001,7 +1001,7 @@ static void tso_start(struct sfxge_txq *txq, struct sfxge_tso_state *tso, KASSERT(tso_iph(tso)->ip_p == IPPROTO_TCP, ("TSO required on non-TCP packet")); tso->tcph_off = tso->nh_off + 4 * tso_iph(tso)->ip_hl; - tso->packet_id = tso_iph(tso)->ip_id; + tso->packet_id = ntohs(tso_iph(tso)->ip_id); } else { KASSERT(tso->protocol == htons(ETHERTYPE_IPV6), ("TSO required on non-IP packet")); diff --git a/sys/dev/sfxge/sfxge_version.h b/sys/dev/sfxge/sfxge_version.h index ab0ad54..af639ee 100644 --- a/sys/dev/sfxge/sfxge_version.h +++ b/sys/dev/sfxge/sfxge_version.h @@ -36,6 +36,6 @@ #ifndef _SFXGE_VERSION_H #define _SFXGE_VERSION_H -#define SFXGE_VERSION_STRING "v4.8.0.1071" +#define SFXGE_VERSION_STRING "v4.8.0.1081" #endif /* _SFXGE_DRIVER_VERSION_H */ diff --git a/sys/dev/urtwn/if_urtwn.c b/sys/dev/urtwn/if_urtwn.c index 2107fe1..999bb6e 100644 --- a/sys/dev/urtwn/if_urtwn.c +++ b/sys/dev/urtwn/if_urtwn.c @@ -373,6 +373,8 @@ static void urtwn_set_chan(struct urtwn_softc *, static void urtwn_iq_calib(struct urtwn_softc *); static void urtwn_lc_calib(struct urtwn_softc *); static void urtwn_temp_calib(struct urtwn_softc *); +static void urtwn_setup_static_keys(struct urtwn_softc *, + struct urtwn_vap *); static int urtwn_init(struct urtwn_softc *); static void urtwn_stop(struct urtwn_softc *); static void urtwn_abort_xfers(struct urtwn_softc *); @@ -2340,12 +2342,28 @@ static int urtwn_key_set(struct ieee80211vap *vap, const struct ieee80211_key *k) { struct urtwn_softc *sc = vap->iv_ic->ic_softc; + struct urtwn_vap *uvp = URTWN_VAP(vap); if (k->wk_flags & IEEE80211_KEY_SWCRYPT) { /* Not for us. */ return (1); } + if (&vap->iv_nw_keys[0] <= k && + k < &vap->iv_nw_keys[IEEE80211_WEP_NKID]) { + URTWN_LOCK(sc); + uvp->keys[k->wk_keyix] = k; + if ((sc->sc_flags & URTWN_RUNNING) == 0) { + /* + * The device was not started; + * the key will be installed later. + */ + URTWN_UNLOCK(sc); + return (1); + } + URTWN_UNLOCK(sc); + } + return (!urtwn_cmd_sleepable(sc, k, sizeof(*k), urtwn_key_set_cb)); } @@ -2353,12 +2371,25 @@ static int urtwn_key_delete(struct ieee80211vap *vap, const struct ieee80211_key *k) { struct urtwn_softc *sc = vap->iv_ic->ic_softc; + struct urtwn_vap *uvp = URTWN_VAP(vap); if (k->wk_flags & IEEE80211_KEY_SWCRYPT) { /* Not for us. */ return (1); } + if (&vap->iv_nw_keys[0] <= k && + k < &vap->iv_nw_keys[IEEE80211_WEP_NKID]) { + URTWN_LOCK(sc); + uvp->keys[k->wk_keyix] = NULL; + if ((sc->sc_flags & URTWN_RUNNING) == 0) { + /* All keys are removed on device reset. */ + URTWN_UNLOCK(sc); + return (1); + } + URTWN_UNLOCK(sc); + } + return (!urtwn_cmd_sleepable(sc, k, sizeof(*k), urtwn_key_del_cb)); } @@ -5230,6 +5261,20 @@ urtwn_temp_calib(struct urtwn_softc *sc) } } +static void +urtwn_setup_static_keys(struct urtwn_softc *sc, struct urtwn_vap *uvp) +{ + int i; + + for (i = 0; i < IEEE80211_WEP_NKID; i++) { + const struct ieee80211_key *k = uvp->keys[i]; + if (k != NULL) { + urtwn_cmd_sleepable(sc, k, sizeof(*k), + urtwn_key_set_cb); + } + } +} + static int urtwn_init(struct urtwn_softc *sc) { @@ -5418,12 +5463,6 @@ urtwn_init(struct urtwn_softc *sc) R92C_SECCFG_TXENC_ENA | R92C_SECCFG_RXDEC_ENA | R92C_SECCFG_TXBCKEY_DEF | R92C_SECCFG_RXBCKEY_DEF); - /* - * Install static keys (if any). - * Must be called after urtwn_cam_init(). - */ - ieee80211_runtask(ic, &sc->cmdq_task); - /* Enable hardware sequence numbering. */ urtwn_write_1(sc, R92C_HWSEQ_CTRL, R92C_TX_QUEUE_ALL); @@ -5459,6 +5498,13 @@ urtwn_init(struct urtwn_softc *sc) sc->sc_flags |= URTWN_RUNNING; + /* + * Install static keys (if any). + * Must be called after urtwn_cam_init(). + */ + if (vap != NULL) + urtwn_setup_static_keys(sc, URTWN_VAP(vap)); + callout_reset(&sc->sc_watchdog_ch, hz, urtwn_watchdog, sc); fail: if (usb_err != USB_ERR_NORMAL_COMPLETION) diff --git a/sys/dev/urtwn/if_urtwnvar.h b/sys/dev/urtwn/if_urtwnvar.h index 7313f07..ada6439 100644 --- a/sys/dev/urtwn/if_urtwnvar.h +++ b/sys/dev/urtwn/if_urtwnvar.h @@ -107,6 +107,8 @@ struct urtwn_vap { struct mbuf *bcn_mbuf; struct task tsf_task_adhoc; + const struct ieee80211_key *keys[IEEE80211_WEP_NKID]; + int (*newstate)(struct ieee80211vap *, enum ieee80211_state, int); void (*recv_mgmt)(struct ieee80211_node *, diff --git a/sys/dev/xen/netfront/netfront.c b/sys/dev/xen/netfront/netfront.c index d28f656..3d02a52 100644 --- a/sys/dev/xen/netfront/netfront.c +++ b/sys/dev/xen/netfront/netfront.c @@ -146,7 +146,8 @@ static int setup_device(device_t dev, struct netfront_info *info, static int xn_ifmedia_upd(struct ifnet *ifp); static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); -int xn_connect(struct netfront_info *); +static int xn_connect(struct netfront_info *); +static void xn_kick_rings(struct netfront_info *); static int xn_get_responses(struct netfront_rxq *, struct netfront_rx_info *, RING_IDX, RING_IDX *, @@ -976,7 +977,9 @@ netfront_backend_changed(device_t dev, XenbusState newstate) break; if (xn_connect(sc) != 0) break; - xenbus_set_state(dev, XenbusStateConnected); + /* Switch to connected state before kicking the rings. */ + xenbus_set_state(sc->xbdev, XenbusStateConnected); + xn_kick_rings(sc); break; case XenbusStateClosing: xenbus_set_state(dev, XenbusStateClosed); @@ -1924,7 +1927,7 @@ xn_rebuild_rx_bufs(struct netfront_rxq *rxq) } /* START of Xenolinux helper functions adapted to FreeBSD */ -int +static int xn_connect(struct netfront_info *np) { int i, error; @@ -1968,8 +1971,20 @@ xn_connect(struct netfront_info *np) * packets. */ netfront_carrier_on(np); + + return (0); +} + +static void +xn_kick_rings(struct netfront_info *np) +{ + struct netfront_rxq *rxq; + struct netfront_txq *txq; + int i; + for (i = 0; i < np->num_queues; i++) { txq = &np->txq[i]; + rxq = &np->rxq[i]; xen_intr_signal(txq->xen_intr_handle); XN_TX_LOCK(txq); xn_txeof(txq); @@ -1978,8 +1993,6 @@ xn_connect(struct netfront_info *np) xn_alloc_rx_buffers(rxq); XN_RX_UNLOCK(rxq); } - - return (0); } static void diff --git a/sys/dev/xen/timer/timer.c b/sys/dev/xen/timer/timer.c index 996399b..0b26847 100644 --- a/sys/dev/xen/timer/timer.c +++ b/sys/dev/xen/timer/timer.c @@ -477,6 +477,9 @@ xentimer_resume(device_t dev) /* Reset the last uptime value */ pvclock_resume(); + /* Reset the RTC clock */ + inittodr(time_second); + /* Kick the timers on all CPUs */ smp_rendezvous(NULL, xentimer_percpu_resume, NULL, dev); diff --git a/sys/fs/ext2fs/ext2_alloc.c b/sys/fs/ext2fs/ext2_alloc.c index 35e24c3..e0201cc 100644 --- a/sys/fs/ext2fs/ext2_alloc.c +++ b/sys/fs/ext2fs/ext2_alloc.c @@ -407,9 +407,11 @@ ext2_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp) /* * Set up a new generation number for this inode. + * Avoid zero values. */ - while (ip->i_gen == 0 || ++ip->i_gen == 0) + do { ip->i_gen = arc4random(); + } while ( ip->i_gen == 0); vfs_timestamp(&ts); ip->i_birthtime = ts.tv_sec; diff --git a/sys/fs/ext2fs/ext2_bmap.c b/sys/fs/ext2fs/ext2_bmap.c index 7966b9b..6f4ec34 100644 --- a/sys/fs/ext2fs/ext2_bmap.c +++ b/sys/fs/ext2fs/ext2_bmap.c @@ -86,8 +86,8 @@ ext2_bmap(struct vop_bmap_args *ap) } /* - * This function converts the logical block number of a file to - * its physical block number on the disk within ext4 extents. + * Convert the logical block number of a file to its physical block number + * on the disk within ext4 extents. */ static int ext4_bmapext(struct vnode *vp, int32_t bn, int64_t *bnp, int *runp, int *runb) @@ -97,7 +97,7 @@ ext4_bmapext(struct vnode *vp, int32_t bn, int64_t *bnp, int *runp, int *runb) struct ext4_extent *ep; struct ext4_extent_path path = { .ep_bp = NULL }; daddr_t lbn; - int ret = 0; + int error; ip = VTOI(vp); fs = ip->i_e2fs; @@ -105,9 +105,9 @@ ext4_bmapext(struct vnode *vp, int32_t bn, int64_t *bnp, int *runp, int *runb) if (runp != NULL) *runp = 0; - if (runb != NULL) *runb = 0; + error = 0; ext4_ext_find_extent(fs, ip, lbn, &path); if (path.ep_is_sparse) { @@ -118,29 +118,28 @@ ext4_bmapext(struct vnode *vp, int32_t bn, int64_t *bnp, int *runp, int *runb) if (runb != NULL) *runb = lbn - path.ep_sparse_ext.e_blk; } else { - ep = path.ep_ext; - if (ep == NULL) - ret = EIO; - else { - *bnp = fsbtodb(fs, lbn - ep->e_blk + - (ep->e_start_lo | (daddr_t)ep->e_start_hi << 32)); - - if (*bnp == 0) - *bnp = -1; - - if (runp != NULL) - *runp = ep->e_len - (lbn - ep->e_blk) - 1; - if (runb != NULL) - *runb = lbn - ep->e_blk; + if (path.ep_ext == NULL) { + error = EIO; + goto out; } + ep = path.ep_ext; + *bnp = fsbtodb(fs, lbn - ep->e_blk + + (ep->e_start_lo | (daddr_t)ep->e_start_hi << 32)); + + if (*bnp == 0) + *bnp = -1; + + if (runp != NULL) + *runp = ep->e_len - (lbn - ep->e_blk) - 1; + if (runb != NULL) + *runb = lbn - ep->e_blk; } - if (path.ep_bp != NULL) { +out: + if (path.ep_bp != NULL) brelse(path.ep_bp); - path.ep_bp = NULL; - } - return (ret); + return (error); } /* diff --git a/sys/fs/ext2fs/ext2_extents.h b/sys/fs/ext2fs/ext2_extents.h index 70eb685..b6a7fc2 100644 --- a/sys/fs/ext2fs/ext2_extents.h +++ b/sys/fs/ext2fs/ext2_extents.h @@ -40,8 +40,8 @@ * Ext4 file system extent on disk. */ struct ext4_extent { - uint32_t e_blk; /* first logical block */ - uint16_t e_len; /* number of blocks */ + uint32_t e_blk; /* first logical block */ + uint16_t e_len; /* number of blocks */ uint16_t e_start_hi; /* high 16 bits of physical block */ uint32_t e_start_lo; /* low 32 bits of physical block */ }; diff --git a/sys/fs/ext2fs/ext2_vfsops.c b/sys/fs/ext2fs/ext2_vfsops.c index 9f73357..2083fb6 100644 --- a/sys/fs/ext2fs/ext2_vfsops.c +++ b/sys/fs/ext2fs/ext2_vfsops.c @@ -158,11 +158,9 @@ ext2_mount(struct mount *mp) } fs->e2fs_ronly = 1; vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY); - DROP_GIANT(); g_topology_lock(); g_access(ump->um_cp, 0, -1, 0); g_topology_unlock(); - PICKUP_GIANT(); } if (!error && (mp->mnt_flag & MNT_RELOAD)) error = ext2_reload(mp, td); @@ -187,11 +185,9 @@ ext2_mount(struct mount *mp) return (error); } VOP_UNLOCK(devvp, 0); - DROP_GIANT(); g_topology_lock(); error = g_access(ump->um_cp, 0, 1, 0); g_topology_unlock(); - PICKUP_GIANT(); if (error) return (error); @@ -547,11 +543,9 @@ ext2_mountfs(struct vnode *devvp, struct mount *mp) ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0); /* XXX: use VOP_ACESS to check FS perms */ - DROP_GIANT(); g_topology_lock(); error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1); g_topology_unlock(); - PICKUP_GIANT(); VOP_UNLOCK(devvp, 0); if (error) return (error); @@ -559,11 +553,9 @@ ext2_mountfs(struct vnode *devvp, struct mount *mp) /* XXX: should we check for some sectorsize or 512 instead? */ if (((SBSIZE % cp->provider->sectorsize) != 0) || (SBSIZE < cp->provider->sectorsize)) { - DROP_GIANT(); g_topology_lock(); g_vfs_close(cp); g_topology_unlock(); - PICKUP_GIANT(); return (EINVAL); } @@ -683,11 +675,9 @@ out: if (bp) brelse(bp); if (cp != NULL) { - DROP_GIANT(); g_topology_lock(); g_vfs_close(cp); g_topology_unlock(); - PICKUP_GIANT(); } if (ump) { mtx_destroy(EXT2_MTX(ump)); @@ -729,11 +719,9 @@ ext2_unmount(struct mount *mp, int mntflags) ext2_sbupdate(ump, MNT_WAIT); } - DROP_GIANT(); g_topology_lock(); g_vfs_close(ump->um_cp); g_topology_unlock(); - PICKUP_GIANT(); vrele(ump->um_devvp); sump = fs->e2fs_clustersum; for (i = 0; i < fs->e2fs_gcount; i++, sump++) @@ -993,16 +981,6 @@ ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) * Finish inode initialization. */ - /* - * Set up a generation number for this inode if it does not - * already have one. This should only happen on old filesystems. - */ - if (ip->i_gen == 0) { - while (ip->i_gen == 0) - ip->i_gen = arc4random(); - if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) - ip->i_flag |= IN_MODIFIED; - } *vpp = vp; return (0); } diff --git a/sys/fs/ext2fs/ext2fs.h b/sys/fs/ext2fs/ext2fs.h index c23f2a2..b005ddd 100644 --- a/sys/fs/ext2fs/ext2fs.h +++ b/sys/fs/ext2fs/ext2fs.h @@ -87,7 +87,7 @@ struct ext2fs { uint32_t e3fs_journal_dev; /* device number of journal file */ uint32_t e3fs_last_orphan; /* start of list of inodes to delete */ uint32_t e3fs_hash_seed[4]; /* HTREE hash seed */ - char e3fs_def_hash_version; /* Default hash version to use */ + char e3fs_def_hash_version;/* Default hash version to use */ char e3fs_jnl_backup_type; uint16_t e3fs_desc_size; /* size of group descriptor */ uint32_t e3fs_default_mount_opts; @@ -97,13 +97,13 @@ struct ext2fs { uint32_t e4fs_bcount_hi; /* high bits of blocks count */ uint32_t e4fs_rbcount_hi; /* high bits of reserved blocks count */ uint32_t e4fs_fbcount_hi; /* high bits of free blocks count */ - uint16_t e4fs_min_extra_isize; /* all inodes have at least some bytes */ - uint16_t e4fs_want_extra_isize; /* inodes must reserve some bytes */ + uint16_t e4fs_min_extra_isize; /* all inodes have some bytes */ + uint16_t e4fs_want_extra_isize;/* inodes must reserve some bytes */ uint32_t e4fs_flags; /* miscellaneous flags */ uint16_t e4fs_raid_stride; /* RAID stride */ - uint16_t e4fs_mmpintv; /* number of seconds to wait in MMP checking */ + uint16_t e4fs_mmpintv; /* seconds to wait in MMP checking */ uint64_t e4fs_mmpblk; /* block for multi-mount protection */ - uint32_t e4fs_raid_stripe_wid; /* blocks on all data disks (N * stride) */ + uint32_t e4fs_raid_stripe_wid; /* blocks on data disks (N * stride) */ uint8_t e4fs_log_gpf; /* FLEX_BG group size */ uint8_t e4fs_chksum_type; /* metadata checksum algorithm used */ uint8_t e4fs_encrypt; /* versioning level for encryption */ @@ -117,7 +117,7 @@ struct ext2fs { uint32_t e4fs_first_errtime; /* first time an error happened */ uint32_t e4fs_first_errino; /* inode involved in first error */ uint64_t e4fs_first_errblk; /* block involved of first error */ - uint8_t e4fs_first_errfunc[32]; /* function where error happened */ + uint8_t e4fs_first_errfunc[32];/* function where error happened */ uint32_t e4fs_first_errline; /* line number where error happened */ uint32_t e4fs_last_errtime; /* most recent time of an error */ uint32_t e4fs_last_errino; /* inode involved in last error */ @@ -127,10 +127,10 @@ struct ext2fs { uint8_t e4fs_mount_opts[64]; uint32_t e4fs_usrquota_inum; /* inode for tracking user quota */ uint32_t e4fs_grpquota_inum; /* inode for tracking group quota */ - uint32_t e4fs_overhead_clusters; /* overhead blocks/clusters */ + uint32_t e4fs_overhead_clusters;/* overhead blocks/clusters */ uint32_t e4fs_backup_bgs[2]; /* groups with sparse_super2 SBs */ - uint8_t e4fs_encrypt_algos[4]; /* encryption algorithms in use */ - uint8_t e4fs_encrypt_pw_salt[16]; /* salt used for string2key */ + uint8_t e4fs_encrypt_algos[4];/* encryption algorithms in use */ + uint8_t e4fs_encrypt_pw_salt[16];/* salt used for string2key */ uint32_t e4fs_lpf_ino; /* location of the lost+found inode */ uint32_t e4fs_proj_quota_inum; /* inode for tracking project quota */ uint32_t e4fs_chksum_seed; /* checksum seed */ diff --git a/sys/fs/nfsclient/nfs_clvfsops.c b/sys/fs/nfsclient/nfs_clvfsops.c index db1ee9d..a3e2c70 100644 --- a/sys/fs/nfsclient/nfs_clvfsops.c +++ b/sys/fs/nfsclient/nfs_clvfsops.c @@ -763,37 +763,37 @@ nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep, /* * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs(). */ - if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL && - *(delimp + 1) == ':') { - hostp = spec + 1; - spec = delimp + 2; - have_bracket = 1; - } else if ((delimp = strrchr(spec, ':')) != NULL) { - hostp = spec; - spec = delimp + 1; - } else if ((delimp = strrchr(spec, '@')) != NULL) { - printf("%s: path@server syntax is deprecated, " + if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL && + *(delimp + 1) == ':') { + hostp = spec + 1; + spec = delimp + 2; + have_bracket = 1; + } else if ((delimp = strrchr(spec, ':')) != NULL) { + hostp = spec; + spec = delimp + 1; + } else if ((delimp = strrchr(spec, '@')) != NULL) { + printf("%s: path@server syntax is deprecated, " "use server:path\n", __func__); - hostp = delimp + 1; - } else { - printf("%s: no <host>:<dirpath> nfs-name\n", __func__); - return (EINVAL); - } - *delimp = '\0'; - - /* - * If there has been a trailing slash at mounttime it seems - * that some mountd implementations fail to remove the mount - * entries from their mountlist while unmounting. - */ - for (speclen = strlen(spec); - speclen > 1 && spec[speclen - 1] == '/'; - speclen--) - spec[speclen - 1] = '\0'; - if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) { - printf("%s: %s:%s: name too long", __func__, hostp, spec); - return (EINVAL); - } + hostp = delimp + 1; + } else { + printf("%s: no <host>:<dirpath> nfs-name\n", __func__); + return (EINVAL); + } + *delimp = '\0'; + + /* + * If there has been a trailing slash at mounttime it seems + * that some mountd implementations fail to remove the mount + * entries from their mountlist while unmounting. + */ + for (speclen = strlen(spec); + speclen > 1 && spec[speclen - 1] == '/'; + speclen--) + spec[speclen - 1] = '\0'; + if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) { + printf("%s: %s:%s: name too long", __func__, hostp, spec); + return (EINVAL); + } /* Make both '@' and ':' notations equal */ if (*hostp != '\0') { len = strlen(hostp); @@ -806,7 +806,8 @@ nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep, nam[len + offset++] = ':'; memmove(nam + len + offset, spec, speclen); nam[len + speclen + offset] = '\0'; - } + } else + nam[0] = '\0'; /* * XXX: IPv6 diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index d2de3c8..63d855d 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -806,8 +806,11 @@ interpret: /* * Set the new credentials. */ - if (imgp->newcred != NULL) + if (imgp->newcred != NULL) { proc_set_cred(p, imgp->newcred); + crfree(oldcred); + oldcred = NULL; + } /* * Store the vp for use in procfs. This vnode was referenced by namei @@ -918,8 +921,9 @@ exec_fail: SDT_PROBE1(proc, , , exec__failure, error); } - if (imgp->newcred != NULL) - crfree(oldcred); + if (imgp->newcred != NULL && oldcred != NULL) + crfree(imgp->newcred); + #ifdef MAC mac_execve_exit(imgp); mac_execve_interpreter_exit(interpvplabel); diff --git a/sys/kern/kern_fail.c b/sys/kern/kern_fail.c index ec466dd..b5bf066 100644 --- a/sys/kern/kern_fail.c +++ b/sys/kern/kern_fail.c @@ -82,13 +82,13 @@ static MALLOC_DEFINE(M_FAIL_POINT, "Fail Points", "fail points system"); #define fp_malloc(size, flags) malloc((size), M_FAIL_POINT, (flags)) #define fs_free(ptr) fp_free(ptr) #define fs_malloc() fp_malloc(sizeof(struct fail_point_setting), \ - M_WAITOK | M_ZERO) + M_WAITOK | M_ZERO) - /** - * These define the wchans that are used for sleeping, pausing respectively. - * They are chosen arbitrarily but need to be distinct to the failpoint and - * the sleep/pause distinction. - */ +/** + * These define the wchans that are used for sleeping, pausing respectively. + * They are chosen arbitrarily but need to be distinct to the failpoint and + * the sleep/pause distinction. + */ #define FP_SLEEP_CHANNEL(fp) (void*)(fp) #define FP_PAUSE_CHANNEL(fp) __DEVOLATILE(void*, &fp->fp_setting) @@ -166,7 +166,7 @@ struct fail_point_entry { enum fail_point_t fe_type; /**< type of entry */ int fe_arg; /**< argument to type (e.g. return value) */ int fe_prob; /**< likelihood of firing in millionths */ - int fe_count; /**< number of times to fire, -1 means infinite */ + int32_t fe_count; /**< number of times to fire, -1 means infinite */ pid_t fe_pid; /**< only fail for this process */ struct fail_point *fe_parent; /**< backpointer to fp */ TAILQ_ENTRY(fail_point_entry) fe_entries; /**< next entry ptr */ @@ -354,7 +354,7 @@ fail_point_eval_swap_out(struct fail_point *fp, /* Free up any zero-ref entries in the garbage queue */ static void -fail_point_garbage_collect() +fail_point_garbage_collect(void) { struct fail_point_setting *fs_current, *fs_next; struct fail_point_setting_garbage fp_ents_free_list; @@ -441,7 +441,7 @@ fail_point_sleep(struct fail_point *fp, int msecs, fp->fp_pre_sleep_fn(fp->fp_pre_sleep_arg); timeout(fp->fp_post_sleep_fn, fp->fp_post_sleep_arg, - timo); + timo); *pret = FAIL_POINT_RC_QUEUED; } } @@ -635,7 +635,6 @@ abort: fail_point_setting_release_ref(fp); return (ret); - } /** @@ -844,19 +843,17 @@ end: /** * Handle kernel failpoint set/get. */ - int fail_point_sysctl(SYSCTL_HANDLER_ARGS) { struct fail_point *fp; char *buf; - struct sbuf *sb_check; - struct sbuf sb; + struct sbuf sb, *sb_check; int error; + buf = NULL; error = 0; fp = arg1; - buf = NULL; sb_check = sbuf_new(&sb, NULL, 1024, SBUF_AUTOEXTEND); if (sb_check != &sb) @@ -949,7 +946,6 @@ fail_sysctl_drain_func(void *sysctl_args, const char *buf, int len) return (len); } - /** * Internal helper function to translate a human-readable failpoint string * into a internally-parsable fail_point structure. diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index e004097..1260c98 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -827,6 +827,10 @@ fork1(struct thread *td, struct fork_req *fr) /* Must provide a place to put a procdesc if creating one. */ if (fr->fr_pd_fd == NULL) return (EINVAL); + + /* Check if we are using supported flags. */ + if ((fr->fr_pd_flags & ~PD_ALLOWED_AT_FORK) != 0) + return (EINVAL); } p1 = td->td_proc; @@ -878,8 +882,8 @@ fork1(struct thread *td, struct fork_req *fr) * later. */ if (flags & RFPROCDESC) { - error = falloc_caps(td, &fp_procdesc, fr->fr_pd_fd, 0, - fr->fr_pd_fcaps); + error = procdesc_falloc(td, &fp_procdesc, fr->fr_pd_fd, + fr->fr_pd_flags, fr->fr_pd_fcaps); if (error != 0) goto fail2; } diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c index 35e270a..ebd714f 100644 --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -920,6 +920,46 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) } #endif + error = vfs_getopt(opts, "osrelease", (void **)&osrelstr, &len); + if (error == ENOENT) + osrelstr = NULL; + else if (error != 0) + goto done_free; + else { + if (flags & JAIL_UPDATE) { + error = EINVAL; + vfs_opterror(opts, + "osrelease cannot be changed after creation"); + goto done_errmsg; + } + if (len == 0 || len >= OSRELEASELEN) { + error = EINVAL; + vfs_opterror(opts, + "osrelease string must be 1-%d bytes long", + OSRELEASELEN - 1); + goto done_errmsg; + } + } + + error = vfs_copyopt(opts, "osreldate", &osreldt, sizeof(osreldt)); + if (error == ENOENT) + osreldt = 0; + else if (error != 0) + goto done_free; + else { + if (flags & JAIL_UPDATE) { + error = EINVAL; + vfs_opterror(opts, + "osreldate cannot be changed after creation"); + goto done_errmsg; + } + if (osreldt == 0) { + error = EINVAL; + vfs_opterror(opts, "osreldate cannot be 0"); + goto done_errmsg; + } + } + fullpath_disabled = 0; root = NULL; error = vfs_getopt(opts, "path", (void **)&path, &len); @@ -970,51 +1010,12 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) if (len + (path[0] == '/' && strcmp(mypr->pr_path, "/") ? strlen(mypr->pr_path) : 0) > MAXPATHLEN) { error = ENAMETOOLONG; + vrele(root); goto done_free; } } } - error = vfs_getopt(opts, "osrelease", (void **)&osrelstr, &len); - if (error == ENOENT) - osrelstr = NULL; - else if (error != 0) - goto done_free; - else { - if (flags & JAIL_UPDATE) { - error = EINVAL; - vfs_opterror(opts, - "osrelease cannot be changed after creation"); - goto done_errmsg; - } - if (len == 0 || len >= OSRELEASELEN) { - error = EINVAL; - vfs_opterror(opts, - "osrelease string must be 1-%d bytes long", - OSRELEASELEN - 1); - goto done_errmsg; - } - } - - error = vfs_copyopt(opts, "osreldate", &osreldt, sizeof(osreldt)); - if (error == ENOENT) - osreldt = 0; - else if (error != 0) - goto done_free; - else { - if (flags & JAIL_UPDATE) { - error = EINVAL; - vfs_opterror(opts, - "osreldate cannot be changed after creation"); - goto done_errmsg; - } - if (osreldt == 0) { - error = EINVAL; - vfs_opterror(opts, "osreldate cannot be 0"); - goto done_errmsg; - } - } - /* * Find the specified jail, or at least its parent. * This abuses the file error codes ENOENT and EEXIST. @@ -1929,19 +1930,17 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) vrele(root); done_errmsg: if (error) { - vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); - if (errmsg_len > 0) { + if (vfs_getopt(opts, "errmsg", (void **)&errmsg, + &errmsg_len) == 0 && errmsg_len > 0) { errmsg_pos = 2 * vfs_getopt_pos(opts, "errmsg") + 1; - if (errmsg_pos > 0) { - if (optuio->uio_segflg == UIO_SYSSPACE) - bcopy(errmsg, - optuio->uio_iov[errmsg_pos].iov_base, - errmsg_len); - else - copyout(errmsg, - optuio->uio_iov[errmsg_pos].iov_base, - errmsg_len); - } + if (optuio->uio_segflg == UIO_SYSSPACE) + bcopy(errmsg, + optuio->uio_iov[errmsg_pos].iov_base, + errmsg_len); + else + copyout(errmsg, + optuio->uio_iov[errmsg_pos].iov_base, + errmsg_len); } } done_free: @@ -2383,7 +2382,14 @@ sys_jail_attach(struct thread *td, struct jail_attach_args *uap) if (error) return (error); - sx_slock(&allprison_lock); + /* + * Start with exclusive hold on allprison_lock to ensure that a possible + * PR_METHOD_REMOVE call isn't concurrent with jail_set or jail_remove. + * But then immediately downgrade it since we don't need to stop + * readers. + */ + sx_xlock(&allprison_lock); + sx_downgrade(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); if (pr == NULL) { sx_sunlock(&allprison_lock); @@ -2601,9 +2607,11 @@ prison_complete(void *context, int pending) { struct prison *pr = context; + sx_xlock(&allprison_lock); mtx_lock(&pr->pr_mtx); prison_deref(pr, pr->pr_uref - ? PD_DEREF | PD_DEUREF | PD_LOCKED : PD_LOCKED); + ? PD_DEREF | PD_DEUREF | PD_LOCKED | PD_LIST_XLOCKED + : PD_LOCKED | PD_LIST_XLOCKED); } /* @@ -2647,13 +2655,8 @@ prison_deref(struct prison *pr, int flags) */ if (lasturef) { if (!(flags & (PD_LIST_SLOCKED | PD_LIST_XLOCKED))) { - if (ref > 1) { - sx_slock(&allprison_lock); - flags |= PD_LIST_SLOCKED; - } else { - sx_xlock(&allprison_lock); - flags |= PD_LIST_XLOCKED; - } + sx_xlock(&allprison_lock); + flags |= PD_LIST_XLOCKED; } (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL); mtx_lock(&pr->pr_mtx); diff --git a/sys/kern/kern_linker.c b/sys/kern/kern_linker.c index 16c39f0..4758cdc 100644 --- a/sys/kern/kern_linker.c +++ b/sys/kern/kern_linker.c @@ -54,6 +54,10 @@ __FBSDID("$FreeBSD$"); #include <sys/syscallsubr.h> #include <sys/sysctl.h> +#ifdef DDB +#include <ddb/ddb.h> +#endif + #include <net/vnet.h> #include <security/mac/mac_framework.h> @@ -1256,6 +1260,23 @@ kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat) return (0); } +#ifdef DDB +DB_COMMAND(kldstat, db_kldstat) +{ + linker_file_t lf; + +#define POINTER_WIDTH ((int)(sizeof(void *) * 2 + 2)) + db_printf("Id Refs Address%*c Size Name\n", POINTER_WIDTH - 7, ' '); +#undef POINTER_WIDTH + TAILQ_FOREACH(lf, &linker_files, link) { + if (db_pager_quit) + return; + db_printf("%2d %4d %p %-8zx %s\n", lf->id, lf->refs, + lf->address, lf->size, lf->filename); + } +} +#endif /* DDB */ + int sys_kldfirstmod(struct thread *td, struct kldfirstmod_args *uap) { diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c index db93e34..78227e7 100644 --- a/sys/kern/kern_shutdown.c +++ b/sys/kern/kern_shutdown.c @@ -929,3 +929,14 @@ mkdumpheader(struct kerneldumpheader *kdh, char *magic, uint32_t archver, strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring)); kdh->parity = kerneldump_parity(kdh); } + +#ifdef DDB +DB_SHOW_COMMAND(panic, db_show_panic) +{ + + if (panicstr == NULL) + db_printf("panicstr not set\n"); + else + db_printf("panic: %s\n", panicstr); +} +#endif diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 75a1259..f8bb87e 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -3123,6 +3123,7 @@ static int compress_user_cores = 0; #define corefilename_lock allproc_lock static char corefilename[MAXPATHLEN] = {"%N.core"}; +TUNABLE_STR("kern.corefile", corefilename, sizeof(corefilename)); static int sysctl_kern_corefile(SYSCTL_HANDLER_ARGS) @@ -3136,7 +3137,7 @@ sysctl_kern_corefile(SYSCTL_HANDLER_ARGS) return (error); } -SYSCTL_PROC(_kern, OID_AUTO, corefile, CTLTYPE_STRING | CTLFLAG_RWTUN | +SYSCTL_PROC(_kern, OID_AUTO, corefile, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_kern_corefile, "A", "Process corefile name format string"); diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c index 50e6db9..244e5c9 100644 --- a/sys/kern/kern_timeout.c +++ b/sys/kern/kern_timeout.c @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include "opt_callout_profiling.h" +#include "opt_ddb.h" #if defined(__arm__) #include "opt_timer.h" #endif @@ -60,6 +61,11 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #include <sys/smp.h> +#ifdef DDB +#include <ddb/ddb.h> +#include <machine/_inttypes.h> +#endif + #ifdef SMP #include <machine/cpu.h> #endif @@ -1615,3 +1621,34 @@ SYSCTL_PROC(_kern, OID_AUTO, callout_stat, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_kern_callout_stat, "I", "Dump immediate statistic snapshot of the scheduled callouts"); + +#ifdef DDB +static void +_show_callout(struct callout *c) +{ + + db_printf("callout %p\n", c); +#define C_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, c->e); + db_printf(" &c_links = %p\n", &(c->c_links)); + C_DB_PRINTF("%" PRId64, c_time); + C_DB_PRINTF("%" PRId64, c_precision); + C_DB_PRINTF("%p", c_arg); + C_DB_PRINTF("%p", c_func); + C_DB_PRINTF("%p", c_lock); + C_DB_PRINTF("%#x", c_flags); + C_DB_PRINTF("%#x", c_iflags); + C_DB_PRINTF("%d", c_cpu); +#undef C_DB_PRINTF +} + +DB_SHOW_COMMAND(callout, db_show_callout) +{ + + if (!have_addr) { + db_printf("usage: show callout <struct callout *>\n"); + return; + } + + _show_callout((struct callout *)addr); +} +#endif /* DDB */ diff --git a/sys/kern/subr_intr.c b/sys/kern/subr_intr.c index 5d18ea2..64d37ed 100644 --- a/sys/kern/subr_intr.c +++ b/sys/kern/subr_intr.c @@ -35,10 +35,8 @@ __FBSDID("$FreeBSD$"); * - to complete things for removable PICs */ -#include "opt_acpi.h" #include "opt_ddb.h" #include "opt_hwpmc_hooks.h" -#include "opt_platform.h" #include <sys/param.h> #include <sys/systm.h> @@ -123,35 +121,6 @@ static struct mtx isrc_table_lock; static struct intr_irqsrc *irq_sources[NIRQ]; u_int irq_next_free; -/* - * XXX - All stuff around struct intr_dev_data is considered as temporary - * until better place for storing struct intr_map_data will be find. - * - * For now, there are two global interrupt numbers spaces: - * <0, NIRQ) ... interrupts without config data - * managed in irq_sources[] - * IRQ_DDATA_BASE + <0, 2 * NIRQ) ... interrupts with config data - * managed in intr_ddata_tab[] - * - * Read intr_ddata_lookup() to see how these spaces are worked with. - * Note that each interrupt number from second space duplicates some number - * from first space at this moment. An interrupt number from first space can - * be duplicated even multiple times in second space. - */ -struct intr_dev_data { - device_t idd_dev; - intptr_t idd_xref; - u_int idd_irq; - struct intr_map_data * idd_data; - struct intr_irqsrc * idd_isrc; -}; - -static struct intr_dev_data *intr_ddata_tab[2 * NIRQ]; -static u_int intr_ddata_first_unused; - -#define IRQ_DDATA_BASE 10000 -CTASSERT(IRQ_DDATA_BASE > nitems(irq_sources)); - #ifdef SMP static boolean_t irq_assign_cpu = FALSE; #endif @@ -534,116 +503,6 @@ intr_isrc_init_on_cpu(struct intr_irqsrc *isrc, u_int cpu) } #endif -static struct intr_dev_data * -intr_ddata_alloc(u_int extsize) -{ - struct intr_dev_data *ddata; - size_t size; - - size = sizeof(*ddata); - ddata = malloc(size + extsize, M_INTRNG, M_WAITOK | M_ZERO); - - mtx_lock(&isrc_table_lock); - if (intr_ddata_first_unused >= nitems(intr_ddata_tab)) { - mtx_unlock(&isrc_table_lock); - free(ddata, M_INTRNG); - return (NULL); - } - intr_ddata_tab[intr_ddata_first_unused] = ddata; - ddata->idd_irq = IRQ_DDATA_BASE + intr_ddata_first_unused++; - mtx_unlock(&isrc_table_lock); - - ddata->idd_data = (struct intr_map_data *)((uintptr_t)ddata + size); - return (ddata); -} - -static struct intr_irqsrc * -intr_ddata_lookup(u_int irq, struct intr_map_data **datap) -{ - int error; - struct intr_irqsrc *isrc; - struct intr_dev_data *ddata; - - isrc = isrc_lookup(irq); - if (isrc != NULL) { - if (datap != NULL) - *datap = NULL; - return (isrc); - } - - if (irq < IRQ_DDATA_BASE) - return (NULL); - - irq -= IRQ_DDATA_BASE; - if (irq >= nitems(intr_ddata_tab)) - return (NULL); - - ddata = intr_ddata_tab[irq]; - if (ddata->idd_isrc == NULL) { - error = intr_map_irq(ddata->idd_dev, ddata->idd_xref, - ddata->idd_data, &irq); - if (error != 0) - return (NULL); - ddata->idd_isrc = isrc_lookup(irq); - } - if (datap != NULL) - *datap = ddata->idd_data; - return (ddata->idd_isrc); -} - -#ifdef DEV_ACPI -/* - * Map interrupt source according to ACPI info into framework. If such mapping - * does not exist, create it. Return unique interrupt number (resource handle) - * associated with mapped interrupt source. - */ -u_int -intr_acpi_map_irq(device_t dev, u_int irq, enum intr_polarity pol, - enum intr_trigger trig) -{ - struct intr_map_data_acpi *daa; - struct intr_dev_data *ddata; - - ddata = intr_ddata_alloc(sizeof(struct intr_map_data_acpi)); - if (ddata == NULL) - return (INTR_IRQ_INVALID); /* no space left */ - - ddata->idd_dev = dev; - ddata->idd_data->type = INTR_MAP_DATA_ACPI; - - daa = (struct intr_map_data_acpi *)ddata->idd_data; - daa->irq = irq; - daa->pol = pol; - daa->trig = trig; - - return (ddata->idd_irq); -} -#endif - -/* - * Store GPIO interrupt decription in framework and return unique interrupt - * number (resource handle) associated with it. - */ -u_int -intr_gpio_map_irq(device_t dev, u_int pin_num, u_int pin_flags, u_int intr_mode) -{ - struct intr_dev_data *ddata; - struct intr_map_data_gpio *dag; - - ddata = intr_ddata_alloc(sizeof(struct intr_map_data_gpio)); - if (ddata == NULL) - return (INTR_IRQ_INVALID); /* no space left */ - - ddata->idd_dev = dev; - ddata->idd_data->type = INTR_MAP_DATA_GPIO; - - dag = (struct intr_map_data_gpio *)ddata->idd_data; - dag->gpio_pin_num = pin_num; - dag->gpio_pin_flags = pin_flags; - dag->gpio_intr_mode = intr_mode; - return (ddata->idd_irq); -} - #ifdef INTR_SOLO /* * Setup filter into interrupt source. @@ -1074,14 +933,11 @@ intr_alloc_irq(device_t dev, struct resource *res) KASSERT(rman_get_start(res) == rman_get_end(res), ("%s: more interrupts in resource", __func__)); - data = rman_get_virtual(res); - if (data == NULL) - isrc = intr_ddata_lookup(rman_get_start(res), &data); - else - isrc = isrc_lookup(rman_get_start(res)); + isrc = isrc_lookup(rman_get_start(res)); if (isrc == NULL) return (EINVAL); + data = rman_get_virtual(res); return (PIC_ALLOC_INTR(isrc->isrc_dev, isrc, res, data)); } @@ -1094,14 +950,11 @@ intr_release_irq(device_t dev, struct resource *res) KASSERT(rman_get_start(res) == rman_get_end(res), ("%s: more interrupts in resource", __func__)); - data = rman_get_virtual(res); - if (data == NULL) - isrc = intr_ddata_lookup(rman_get_start(res), &data); - else - isrc = isrc_lookup(rman_get_start(res)); + isrc = isrc_lookup(rman_get_start(res)); if (isrc == NULL) return (EINVAL); + data = rman_get_virtual(res); return (PIC_RELEASE_INTR(isrc->isrc_dev, isrc, res, data)); } @@ -1117,14 +970,11 @@ intr_setup_irq(device_t dev, struct resource *res, driver_filter_t filt, KASSERT(rman_get_start(res) == rman_get_end(res), ("%s: more interrupts in resource", __func__)); - data = rman_get_virtual(res); - if (data == NULL) - isrc = intr_ddata_lookup(rman_get_start(res), &data); - else - isrc = isrc_lookup(rman_get_start(res)); + isrc = isrc_lookup(rman_get_start(res)); if (isrc == NULL) return (EINVAL); + data = rman_get_virtual(res); name = device_get_nameunit(dev); #ifdef INTR_SOLO @@ -1181,14 +1031,12 @@ intr_teardown_irq(device_t dev, struct resource *res, void *cookie) KASSERT(rman_get_start(res) == rman_get_end(res), ("%s: more interrupts in resource", __func__)); - data = rman_get_virtual(res); - if (data == NULL) - isrc = intr_ddata_lookup(rman_get_start(res), &data); - else - isrc = isrc_lookup(rman_get_start(res)); + isrc = isrc_lookup(rman_get_start(res)); if (isrc == NULL || isrc->isrc_handlers == 0) return (EINVAL); + data = rman_get_virtual(res); + #ifdef INTR_SOLO if (isrc->isrc_filter != NULL) { if (isrc != cookie) @@ -1231,7 +1079,7 @@ intr_describe_irq(device_t dev, struct resource *res, void *cookie, KASSERT(rman_get_start(res) == rman_get_end(res), ("%s: more interrupts in resource", __func__)); - isrc = intr_ddata_lookup(rman_get_start(res), NULL); + isrc = isrc_lookup(rman_get_start(res)); if (isrc == NULL || isrc->isrc_handlers == 0) return (EINVAL); #ifdef INTR_SOLO @@ -1263,7 +1111,7 @@ intr_bind_irq(device_t dev, struct resource *res, int cpu) KASSERT(rman_get_start(res) == rman_get_end(res), ("%s: more interrupts in resource", __func__)); - isrc = intr_ddata_lookup(rman_get_start(res), NULL); + isrc = isrc_lookup(rman_get_start(res)); if (isrc == NULL || isrc->isrc_handlers == 0) return (EINVAL); #ifdef INTR_SOLO diff --git a/sys/kern/subr_prf.c b/sys/kern/subr_prf.c index ce781d9..5bcf39b 100644 --- a/sys/kern/subr_prf.c +++ b/sys/kern/subr_prf.c @@ -730,7 +730,15 @@ reswitch: switch (ch = (u_char)*fmt++) { PCHAR('>'); break; case 'c': + width -= 1; + + if (!ladjust && width > 0) + while (width--) + PCHAR(padc); PCHAR(va_arg(ap, int)); + if (ladjust && width > 0) + while (width--) + PCHAR(padc); break; case 'D': up = va_arg(ap, u_char *); diff --git a/sys/kern/sys_procdesc.c b/sys/kern/sys_procdesc.c index 0d3b1f4..37139c1 100644 --- a/sys/kern/sys_procdesc.c +++ b/sys/kern/sys_procdesc.c @@ -243,6 +243,22 @@ procdesc_new(struct proc *p, int flags) } /* + * Create a new process decriptor for the process that refers to it. + */ +int +procdesc_falloc(struct thread *td, struct file **resultfp, int *resultfd, + int flags, struct filecaps *fcaps) +{ + int fflags; + + fflags = 0; + if (flags & PD_CLOEXEC) + fflags = O_CLOEXEC; + + return (falloc_caps(td, resultfp, resultfd, fflags, fcaps)); +} + +/* * Initialize a file with a process descriptor. */ void diff --git a/sys/kern/sysv_msg.c b/sys/kern/sysv_msg.c index 2a855a6..ba3995c 100644 --- a/sys/kern/sysv_msg.c +++ b/sys/kern/sysv_msg.c @@ -320,12 +320,6 @@ msgunload() #endif for (msqid = 0; msqid < msginfo.msgmni; msqid++) { - /* - * Look for an unallocated and unlocked msqid_ds. - * msqid_ds's can be locked by msgsnd or msgrcv while - * they are copying the message in/out. We can't - * re-use the entry until they release it. - */ msqkptr = &msqids[msqid]; if (msqkptr->u.msg_qbytes != 0 || (msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) diff --git a/sys/mips/broadcom/bcm_mipscore.c b/sys/mips/broadcom/bcm_mipscore.c index ddd4c80..e48a68f 100644 --- a/sys/mips/broadcom/bcm_mipscore.c +++ b/sys/mips/broadcom/bcm_mipscore.c @@ -53,10 +53,13 @@ static const struct resource_spec mipscore_rspec[MIPSCORE_MAX_RSPEC] = { { -1, -1, 0 } }; +#define MIPSCORE_DEV(_vendor, _core) \ + BHND_DEVICE(_vendor, _core, NULL, NULL, BHND_DF_SOC) + struct bhnd_device mipscore_match[] = { - BHND_MIPS_DEVICE(MIPS, "BHND MIPS processor", NULL), - BHND_MIPS_DEVICE(MIPS33, "BHND MIPS3302 processor", NULL), - BHND_MIPS_DEVICE(MIPS74K, "BHND MIPS74K processor", NULL), + MIPSCORE_DEV(BCM, MIPS), + MIPSCORE_DEV(BCM, MIPS33), + MIPSCORE_DEV(MIPS, MIPS74K), BHND_DEVICE_END }; @@ -116,8 +119,8 @@ static device_method_t mipscore_methods[] = { devclass_t bhnd_mipscore_devclass; -DEFINE_CLASS_0(bhnd_mipscore, mipscore_driver, mipscore_methods, - sizeof(struct mipscore_softc)); -DRIVER_MODULE(bhnd_mipscore, bhnd, mipscore_driver, bhnd_mipscore_devclass, - 0, 0); -MODULE_VERSION(bhnd_mipscore, 1); +DEFINE_CLASS_0(bhnd_mips, mipscore_driver, mipscore_methods, + sizeof(struct mipscore_softc)); +EARLY_DRIVER_MODULE(bhnd_mips, bhnd, mipscore_driver, + bhnd_mipscore_devclass, 0, 0, BUS_PASS_CPU + BUS_PASS_ORDER_EARLY); +MODULE_VERSION(bhnd_mips, 1); diff --git a/sys/mips/mips/swtch.S b/sys/mips/mips/swtch.S index 056d1f0..1cd7df4 100644 --- a/sys/mips/mips/swtch.S +++ b/sys/mips/mips/swtch.S @@ -401,6 +401,8 @@ END(cpu_switch) *---------------------------------------------------------------------------- */ LEAF(MipsSwitchFPState) + .set push + .set hardfloat mfc0 t1, MIPS_COP_0_STATUS # Save old SR li t0, MIPS_SR_COP_1_BIT # enable the coprocessor mtc0 t0, MIPS_COP_0_STATUS @@ -502,6 +504,7 @@ LEAF(MipsSwitchFPState) ITLBNOPFIX j ra nop + .set pop END(MipsSwitchFPState) /*---------------------------------------------------------------------------- @@ -522,6 +525,8 @@ END(MipsSwitchFPState) *---------------------------------------------------------------------------- */ LEAF(MipsSaveCurFPState) + .set push + .set hardfloat PTR_L a0, TD_PCB(a0) # get pointer to pcb for thread mfc0 t1, MIPS_COP_0_STATUS # Disable interrupts and li t0, MIPS_SR_COP_1_BIT # enable the coprocessor @@ -580,6 +585,7 @@ LEAF(MipsSaveCurFPState) ITLBNOPFIX j ra nop + .set pop END(MipsSaveCurFPState) /* diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 745d414..426d12f 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -353,7 +353,7 @@ SUBDIR= \ ${_syscons} \ sysvipc \ ${_ti} \ - tcp/fastpath \ + ${_tcp_fastpath} \ tests/framework \ tests/callout_test \ tl \ @@ -436,6 +436,10 @@ _random_other= random_other SUBDIR+= cuse .endif +.if ${MK_EXTRA_TCP_STACKS} != "no" || defined(ALL_MODULES) +_tcp_fastpath= tcp/fastpath +.endif + .if (${MK_INET_SUPPORT} != "no" || ${MK_INET6_SUPPORT} != "no") || \ defined(ALL_MODULES) _carp= carp diff --git a/sys/modules/bwn/Makefile b/sys/modules/bwn/Makefile index dbad3c3..f8eac6e 100644 --- a/sys/modules/bwn/Makefile +++ b/sys/modules/bwn/Makefile @@ -12,7 +12,7 @@ SRCS+= if_bwn_phy_g.c if_bwn_phy_lp.c SRCS+= if_bwn_phy_n.c # Other -SRCS+= device_if.h bus_if.h pci_if.h +SRCS+= device_if.h bus_if.h pci_if.h opt_bwn.h opt_wlan.h # Uncomment this for the GPL PHY code; this requires the # module be built with BWN_GPL_PHY set in the kernel diff --git a/sys/modules/bwn_pci/Makefile b/sys/modules/bwn_pci/Makefile index 355bc2a..15b9ded 100644 --- a/sys/modules/bwn_pci/Makefile +++ b/sys/modules/bwn_pci/Makefile @@ -7,5 +7,6 @@ SRCS= if_bwn_pci.c bwn_mac.c SRCS+= bhnd_bus_if.h bhndb_bus_if.h \ bhndb_if.h bhnd_nvram_map.h SRCS+= device_if.h bus_if.h pci_if.h \ + opt_bwn.h opt_wlan.h opt_global.h .include <bsd.kmod.mk> diff --git a/sys/modules/dtb/allwinner/Makefile b/sys/modules/dtb/allwinner/Makefile index e95d7bb..9dd0799 100644 --- a/sys/modules/dtb/allwinner/Makefile +++ b/sys/modules/dtb/allwinner/Makefile @@ -7,6 +7,7 @@ DTS= \ cubieboard2.dts \ olimex-a20-som-evb.dts \ olinuxino-lime.dts \ + pcduino3b.dts \ sinovoip-bpi-m3.dts .include <bsd.dtb.mk> diff --git a/sys/modules/netgraph/mppc/Makefile b/sys/modules/netgraph/mppc/Makefile index 40b3717..1ab69e6 100644 --- a/sys/modules/netgraph/mppc/Makefile +++ b/sys/modules/netgraph/mppc/Makefile @@ -4,11 +4,10 @@ KMOD= ng_mppc SRCS= ng_mppc.c opt_netgraph.h -NETGRAPH_MPPC_COMPRESSION?= 0 +NETGRAPH_MPPC_COMPRESSION?= 1 NETGRAPH_MPPC_ENCRYPTION?= 1 .if ${NETGRAPH_MPPC_COMPRESSION} > 0 -# XXX These files don't exist yet, but hopefully someday they will... .PATH: ${.CURDIR}/../../../net SRCS+= mppcc.c mppcd.c .endif diff --git a/sys/modules/siba_bwn/Makefile b/sys/modules/siba_bwn/Makefile index 433d37f..02fbd5d 100644 --- a/sys/modules/siba_bwn/Makefile +++ b/sys/modules/siba_bwn/Makefile @@ -4,6 +4,6 @@ KMOD= siba_bwn SRCS= siba_core.c siba_bwn.c sibareg.h sibavar.h -SRCS+= device_if.h bus_if.h pci_if.h +SRCS+= device_if.h bus_if.h pci_if.h opt_siba.h .include <bsd.kmod.mk> diff --git a/sys/modules/tcp/fastpath/Makefile b/sys/modules/tcp/fastpath/Makefile index d9a6587..c6469fe 100644 --- a/sys/modules/tcp/fastpath/Makefile +++ b/sys/modules/tcp/fastpath/Makefile @@ -7,7 +7,7 @@ KMOD= fastpath SRCS= fastpath.c -SRCS+= opt_ipfw.h opt_inet.h opt_inet6.h opt_ipsec.h +SRCS+= opt_inet.h opt_inet6.h opt_ipsec.h SRCS+= opt_tcpdebug.h # diff --git a/sys/net/flowtable.c b/sys/net/flowtable.c index 35aff00..b837a8e 100644 --- a/sys/net/flowtable.c +++ b/sys/net/flowtable.c @@ -689,7 +689,7 @@ flowtable_lookup(sa_family_t sa, struct mbuf *m, struct route *ro) return (EHOSTUNREACH); if (M_HASHTYPE_GET(m) == M_HASHTYPE_NONE) { - M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); + M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE_HASH); m->m_pkthdr.flowid = fle->f_hash; } diff --git a/sys/net/if.c b/sys/net/if.c index 6845bbf..4d475d9 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -848,6 +848,7 @@ if_purgeaddrs(struct ifnet *ifp) { struct ifaddr *ifa, *next; + /* XXX cannot hold IF_ADDR_WLOCK over called functions. */ TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) { if (ifa->ifa_addr->sa_family == AF_LINK) continue; @@ -872,7 +873,9 @@ if_purgeaddrs(struct ifnet *ifp) continue; } #endif /* INET6 */ + IF_ADDR_WLOCK(ifp); TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link); + IF_ADDR_WUNLOCK(ifp); ifa_free(ifa); } } @@ -1004,11 +1007,14 @@ if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp) ifp->if_addr = NULL; /* We can now free link ifaddr. */ + IF_ADDR_WLOCK(ifp); if (!TAILQ_EMPTY(&ifp->if_addrhead)) { ifa = TAILQ_FIRST(&ifp->if_addrhead); TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link); + IF_ADDR_WUNLOCK(ifp); ifa_free(ifa); - } + } else + IF_ADDR_WUNLOCK(ifp); } rt_flushifroutes(ifp); @@ -1024,9 +1030,11 @@ if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp) ifp->if_afdata_initialized = 0; IF_AFDATA_UNLOCK(ifp); for (dp = domains; i > 0 && dp; dp = dp->dom_next) { - if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) + if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) { (*dp->dom_ifdetach)(ifp, ifp->if_afdata[dp->dom_family]); + ifp->if_afdata[dp->dom_family] = NULL; + } } return (0); diff --git a/sys/net/if_vxlan.c b/sys/net/if_vxlan.c index a5631fc..f830b06 100644 --- a/sys/net/if_vxlan.c +++ b/sys/net/if_vxlan.c @@ -2237,8 +2237,7 @@ vxlan_pick_source_port(struct vxlan_softc *sc, struct mbuf *m) range = sc->vxl_max_port - sc->vxl_min_port + 1; /* check if flowid is set and not opaque */ - if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE && - M_HASHTYPE_GET(m) != M_HASHTYPE_OPAQUE) + if (M_HASHTYPE_ISHASH(m)) hash = m->m_pkthdr.flowid; else hash = jenkins_hash(m->m_data, ETHER_HDR_LEN, diff --git a/sys/net/iflib.c b/sys/net/iflib.c index be5b85b..955a1b2 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -3085,7 +3085,7 @@ iflib_if_transmit(if_t ifp, struct mbuf *m) next = next->m_nextpkt; } while (next != NULL); - if (count > 8) + if (count > nitems(marr)) if ((mp = malloc(count*sizeof(struct mbuf *), M_IFLIB, M_NOWAIT)) == NULL) { /* XXX check nextpkt */ m_freem(m); @@ -3112,7 +3112,7 @@ iflib_if_transmit(if_t ifp, struct mbuf *m) m_freem(mp[i]); ifmp_ring_check_drainage(txq->ift_br[0], TX_BATCH_SIZE); } - if (count > 16) + if (count > nitems(marr)) free(mp, M_IFLIB); return (err); @@ -3863,6 +3863,9 @@ iflib_queues_alloc(if_ctx_t ctx) KASSERT(ntxqs > 0, ("number of queues must be at least 1")); KASSERT(nrxqs > 0, ("number of queues must be at least 1")); + brscp = NULL; + rxq = NULL; + /* Allocate the TX ring struct memory */ if (!(txq = (iflib_txq_t) malloc(sizeof(struct iflib_txq) * @@ -3888,6 +3891,8 @@ iflib_queues_alloc(if_ctx_t ctx) ctx->ifc_txqs = txq; ctx->ifc_rxqs = rxq; + txq = NULL; + rxq = NULL; /* * XXX handle allocation failure @@ -3898,7 +3903,7 @@ iflib_queues_alloc(if_ctx_t ctx) if ((ifdip = malloc(sizeof(struct iflib_dma_info) * ntxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) { device_printf(dev, "failed to allocate iflib_dma_info\n"); err = ENOMEM; - goto fail; + goto err_tx_desc; } txq->ift_ifdi = ifdip; for (j = 0; j < ntxqs; j++, ifdip++) { @@ -3940,7 +3945,7 @@ iflib_queues_alloc(if_ctx_t ctx) if (err) { /* XXX free any allocated rings */ device_printf(dev, "Unable to allocate buf_ring\n"); - goto fail; + goto err_tx_desc; } } } @@ -3951,7 +3956,7 @@ iflib_queues_alloc(if_ctx_t ctx) if ((ifdip = malloc(sizeof(struct iflib_dma_info) * nrxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) { device_printf(dev, "failed to allocate iflib_dma_info\n"); err = ENOMEM; - goto fail; + goto err_tx_desc; } rxq->ifr_ifdi = ifdip; @@ -3975,7 +3980,7 @@ iflib_queues_alloc(if_ctx_t ctx) (iflib_fl_t) malloc(sizeof(struct iflib_fl) * nfree_lists, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate free list memory\n"); err = ENOMEM; - goto fail; + goto err_tx_desc; } rxq->ifr_fl = fl; for (j = 0; j < nfree_lists; j++) { @@ -4042,10 +4047,16 @@ err_tx_desc: if (ctx->ifc_rxqs != NULL) free(ctx->ifc_rxqs, M_IFLIB); ctx->ifc_rxqs = NULL; -rx_fail: if (ctx->ifc_txqs != NULL) free(ctx->ifc_txqs, M_IFLIB); ctx->ifc_txqs = NULL; +rx_fail: + if (brscp != NULL) + free(brscp, M_IFLIB); + if (rxq != NULL) + free(rxq, M_IFLIB); + if (txq != NULL) + free(txq, M_IFLIB); fail: return (err); } diff --git a/sys/net/mppc.h b/sys/net/mppc.h new file mode 100644 index 0000000..29b5113 --- /dev/null +++ b/sys/net/mppc.h @@ -0,0 +1,62 @@ +/*- + * Copyright (c) 2007 Alexander Motin <mav@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * MPPC decompression library. + * Version 1.0 + * + * Note that Hi/Fn (later acquired by Exar Corporation) held US patents + * on some implementation-critical aspects of MPPC compression. + * These patents lapsed due to non-payment of fees in 2007 and by 2015 + * expired altogether. + */ + +#ifndef _NET_MPPC_H_ +#define _NET_MPPC_H_ + +#define MPPC_MANDATORY_COMPRESS_FLAGS 0 +#define MPPC_MANDATORY_DECOMPRESS_FLAGS 0 + +#define MPPC_SAVE_HISTORY 1 + +#define MPPC_OK 5 +#define MPPC_EXPANDED 8 +#define MPPC_RESTART_HISTORY 16 +#define MPPC_DEST_EXHAUSTED 32 + +extern size_t MPPC_SizeOfCompressionHistory(void); +extern size_t MPPC_SizeOfDecompressionHistory(void); + +extern void MPPC_InitCompressionHistory(char *history); +extern void MPPC_InitDecompressionHistory(char *history); + +extern int MPPC_Compress(u_char **src, u_char **dst, u_long *srcCnt, u_long *dstCnt, char *history, int flags, int undef); +extern int MPPC_Decompress(u_char **src, u_char **dst, u_long *srcCnt, u_long *dstCnt, char *history, int flags); + +#endif diff --git a/sys/net/mppcc.c b/sys/net/mppcc.c new file mode 100644 index 0000000..01ce3ff --- /dev/null +++ b/sys/net/mppcc.c @@ -0,0 +1,299 @@ +/*- + * Copyright (c) 2002-2004 Jan Dubiec <jdx@slackware.pl> + * Copyright (c) 2007 Alexander Motin <mav@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * MPPC decompression library. + * Version 1.0 + * + * Note that Hi/Fn (later acquired by Exar Corporation) held US patents + * on some implementation-critical aspects of MPPC compression. + * These patents lapsed due to non-payment of fees in 2007 and by 2015 + * expired altogether. + */ + +#include <sys/param.h> +#include <sys/systm.h> + +#include <net/mppc.h> + +#define MPPE_HIST_LEN 8192 + +#define HASH(x) (((40543*(((((x)[0]<<4)^(x)[1])<<4)^(x)[2]))>>4) & 0x1fff) + +struct MPPC_comp_state { + uint8_t hist[2*MPPE_HIST_LEN]; + uint16_t histptr; + uint16_t hash[MPPE_HIST_LEN]; +}; + +/* Inserts 1 to 8 bits into the output buffer. */ +static void __inline +putbits8(uint8_t *buf, uint32_t val, const uint32_t n, uint32_t *i, uint32_t *l) +{ + buf += *i; + if (*l >= n) { + *l = (*l) - n; + val <<= *l; + *buf = *buf | (val & 0xff); + if (*l == 0) { + *l = 8; + (*i)++; + *(++buf) = 0; + } + } else { + (*i)++; + *l = 8 - n + (*l); + val <<= *l; + *buf = *buf | ((val >> 8) & 0xff); + *(++buf) = val & 0xff; + } +} + +/* Inserts 9 to 16 bits into the output buffer. */ +static void __inline +putbits16(uint8_t *buf, uint32_t val, const uint32_t n, uint32_t *i, uint32_t *l) +{ + buf += *i; + if (*l >= n - 8) { + (*i)++; + *l = 8 - n + (*l); + val <<= *l; + *buf = *buf | ((val >> 8) & 0xff); + *(++buf) = val & 0xff; + if (*l == 0) { + *l = 8; + (*i)++; + *(++buf) = 0; + } + } else { + (*i)++; (*i)++; + *l = 16 - n + (*l); + val <<= *l; + *buf = *buf | ((val >> 16) & 0xff); + *(++buf) = (val >> 8) & 0xff; + *(++buf) = val & 0xff; + } +} + +/* Inserts 17 to 24 bits into the output buffer. */ +static void __inline +putbits24(uint8_t *buf, uint32_t val, const uint32_t n, uint32_t *i, uint32_t *l) +{ + buf += *i; + if (*l >= n - 16) { + (*i)++; (*i)++; + *l = 16 - n + (*l); + val <<= *l; + *buf = *buf | ((val >> 16) & 0xff); + *(++buf) = (val >> 8) & 0xff; + *(++buf) = val & 0xff; + if (*l == 0) { + *l = 8; + (*i)++; + *(++buf) = 0; + } + } else { + (*i)++; (*i)++; (*i)++; + *l = 24 - n + (*l); + val <<= *l; + *buf = *buf | ((val >> 24) & 0xff); + *(++buf) = (val >> 16) & 0xff; + *(++buf) = (val >> 8) & 0xff; + *(++buf) = val & 0xff; + } +} + +size_t MPPC_SizeOfCompressionHistory(void) +{ + return (sizeof(struct MPPC_comp_state)); +} + +void MPPC_InitCompressionHistory(char *history) +{ + struct MPPC_comp_state *state = (struct MPPC_comp_state*)history; + + bzero(history, sizeof(struct MPPC_comp_state)); + state->histptr = MPPE_HIST_LEN; +} + +int MPPC_Compress(u_char **src, u_char **dst, u_long *srcCnt, u_long *dstCnt, char *history, int flags, int undef) +{ + struct MPPC_comp_state *state = (struct MPPC_comp_state*)history; + uint32_t olen, off, len, idx, i, l; + uint8_t *hist, *sbuf, *p, *q, *r, *s; + int rtn = MPPC_OK; + + /* + * At this point, to avoid possible buffer overflow caused by packet + * expansion during/after compression, we should make sure we have + * space for the worst case. + + * Maximum MPPC packet expansion is 12.5%. This is the worst case when + * all octets in the input buffer are >= 0x80 and we cannot find any + * repeated tokens. + */ + if (*dstCnt < (*srcCnt * 9 / 8 + 2)) { + rtn &= ~MPPC_OK; + return (rtn); + } + + /* We can't compress more then MPPE_HIST_LEN bytes in a call. */ + if (*srcCnt > MPPE_HIST_LEN) { + rtn &= ~MPPC_OK; + return (rtn); + } + + hist = state->hist + MPPE_HIST_LEN; + /* check if there is enough room at the end of the history */ + if (state->histptr + *srcCnt >= 2*MPPE_HIST_LEN) { + rtn |= MPPC_RESTART_HISTORY; + state->histptr = MPPE_HIST_LEN; + memcpy(state->hist, hist, MPPE_HIST_LEN); + } + /* Add packet to the history. */ + sbuf = state->hist + state->histptr; + memcpy(sbuf, *src, *srcCnt); + state->histptr += *srcCnt; + + /* compress data */ + r = sbuf + *srcCnt; + **dst = olen = i = 0; + l = 8; + while (i < *srcCnt - 2) { + s = q = sbuf + i; + + /* Prognose matching position using hash function. */ + idx = HASH(s); + p = hist + state->hash[idx]; + state->hash[idx] = (uint16_t) (s - hist); + if (p > s) /* It was before MPPC_RESTART_HISTORY. */ + p -= MPPE_HIST_LEN; /* Try previous history buffer. */ + off = s - p; + + /* Check our prognosis. */ + if (off > MPPE_HIST_LEN - 1 || off < 1 || *p++ != *s++ || + *p++ != *s++ || *p++ != *s++) { + /* No match found; encode literal byte. */ + if ((*src)[i] < 0x80) { /* literal byte < 0x80 */ + putbits8(*dst, (uint32_t) (*src)[i], 8, &olen, &l); + } else { /* literal byte >= 0x80 */ + putbits16(*dst, (uint32_t) (0x100|((*src)[i]&0x7f)), 9, + &olen, &l); + } + ++i; + continue; + } + + /* Find length of the matching fragment */ +#if defined(__amd64__) || defined(__i386__) + /* Optimization for CPUs without strict data aligning requirements */ + while ((*((uint32_t*)p) == *((uint32_t*)s)) && (s < (r - 3))) { + p+=4; + s+=4; + } +#endif + while((*p++ == *s++) && (s <= r)); + len = s - q - 1; + i += len; + + /* At least 3 character match found; code data. */ + /* Encode offset. */ + if (off < 64) { /* 10-bit offset; 0 <= offset < 64 */ + putbits16(*dst, 0x3c0|off, 10, &olen, &l); + } else if (off < 320) { /* 12-bit offset; 64 <= offset < 320 */ + putbits16(*dst, 0xe00|(off-64), 12, &olen, &l); + } else if (off < 8192) { /* 16-bit offset; 320 <= offset < 8192 */ + putbits16(*dst, 0xc000|(off-320), 16, &olen, &l); + } else { /* NOTREACHED */ + rtn &= ~MPPC_OK; + return rtn; + } + + /* Encode length of match. */ + if (len < 4) { /* length = 3 */ + putbits8(*dst, 0, 1, &olen, &l); + } else if (len < 8) { /* 4 <= length < 8 */ + putbits8(*dst, 0x08|(len&0x03), 4, &olen, &l); + } else if (len < 16) { /* 8 <= length < 16 */ + putbits8(*dst, 0x30|(len&0x07), 6, &olen, &l); + } else if (len < 32) { /* 16 <= length < 32 */ + putbits8(*dst, 0xe0|(len&0x0f), 8, &olen, &l); + } else if (len < 64) { /* 32 <= length < 64 */ + putbits16(*dst, 0x3c0|(len&0x1f), 10, &olen, &l); + } else if (len < 128) { /* 64 <= length < 128 */ + putbits16(*dst, 0xf80|(len&0x3f), 12, &olen, &l); + } else if (len < 256) { /* 128 <= length < 256 */ + putbits16(*dst, 0x3f00|(len&0x7f), 14, &olen, &l); + } else if (len < 512) { /* 256 <= length < 512 */ + putbits16(*dst, 0xfe00|(len&0xff), 16, &olen, &l); + } else if (len < 1024) { /* 512 <= length < 1024 */ + putbits24(*dst, 0x3fc00|(len&0x1ff), 18, &olen, &l); + } else if (len < 2048) { /* 1024 <= length < 2048 */ + putbits24(*dst, 0xff800|(len&0x3ff), 20, &olen, &l); + } else if (len < 4096) { /* 2048 <= length < 4096 */ + putbits24(*dst, 0x3ff000|(len&0x7ff), 22, &olen, &l); + } else if (len < 8192) { /* 4096 <= length < 8192 */ + putbits24(*dst, 0xffe000|(len&0xfff), 24, &olen, &l); + } else { /* NOTREACHED */ + rtn &= ~MPPC_OK; + return (rtn); + } + } + + /* Add remaining octets to the output. */ + while(*srcCnt - i > 0) { + if ((*src)[i] < 0x80) { /* literal byte < 0x80 */ + putbits8(*dst, (uint32_t) (*src)[i++], 8, &olen, &l); + } else { /* literal byte >= 0x80 */ + putbits16(*dst, (uint32_t) (0x100|((*src)[i++]&0x7f)), 9, &olen, + &l); + } + } + + /* Reset unused bits of the last output octet. */ + if ((l != 0) && (l != 8)) { + putbits8(*dst, 0, l, &olen, &l); + } + + /* If result is bigger then original, set flag and flush history. */ + if ((*srcCnt < olen) || ((flags & MPPC_SAVE_HISTORY) == 0)) { + if (*srcCnt < olen) + rtn |= MPPC_EXPANDED; + bzero(history, sizeof(struct MPPC_comp_state)); + state->histptr = MPPE_HIST_LEN; + } + + *src += *srcCnt; + *srcCnt = 0; + *dst += olen; + *dstCnt -= olen; + + return (rtn); +} diff --git a/sys/net/mppcd.c b/sys/net/mppcd.c new file mode 100644 index 0000000..c1730e5 --- /dev/null +++ b/sys/net/mppcd.c @@ -0,0 +1,284 @@ +/*- + * Copyright (c) 2002-2004 Jan Dubiec <jdx@slackware.pl> + * Copyright (c) 2007 Alexander Motin <mav@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * MPPC decompression library. + * Version 1.0 + * + * Note that Hi/Fn (later acquired by Exar Corporation) held US patents + * on some implementation-critical aspects of MPPC compression. + * These patents lapsed due to non-payment of fees in 2007 and by 2015 + * expired altogether. + */ + +#include <sys/param.h> +#include <sys/systm.h> + +#include <net/mppc.h> + +#define MPPE_HIST_LEN 8192 + +struct MPPC_decomp_state { + uint8_t hist[2*MPPE_HIST_LEN]; + uint16_t histptr; +}; + +static uint32_t __inline +getbits(const uint8_t *buf, const uint32_t n, uint32_t *i, uint32_t *l) +{ + static const uint32_t m[] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; + uint32_t res, ol; + + ol = *l; + if (*l >= n) { + *l = (*l) - n; + res = (buf[*i] & m[ol]) >> (*l); + if (*l == 0) { + *l = 8; + (*i)++; + } + } else { + *l = 8 - n + (*l); + res = (buf[(*i)++] & m[ol]) << 8; + res = (res | buf[*i]) >> (*l); + } + + return (res); +} + +static uint32_t __inline +getbyte(const uint8_t *buf, const uint32_t i, const uint32_t l) +{ + if (l == 8) { + return (buf[i]); + } else { + return ((((buf[i] << 8) | buf[i+1]) >> l) & 0xff); + } +} + +static void __inline +lamecopy(uint8_t *dst, uint8_t *src, uint32_t len) +{ + while (len--) + *dst++ = *src++; +} + +size_t MPPC_SizeOfDecompressionHistory(void) +{ + return (sizeof(struct MPPC_decomp_state)); +} + +void MPPC_InitDecompressionHistory(char *history) +{ + struct MPPC_decomp_state *state = (struct MPPC_decomp_state*)history; + + bzero(history, sizeof(struct MPPC_decomp_state)); + state->histptr = MPPE_HIST_LEN; +} + +int MPPC_Decompress(u_char **src, u_char **dst, u_long *srcCnt, u_long *dstCnt, char *history, int flags) +{ + struct MPPC_decomp_state *state = (struct MPPC_decomp_state*)history; + uint32_t olen, off, len, bits, val, sig, i, l; + uint8_t *hist, *s; + u_char *isrc = *src; + int rtn = MPPC_OK; + + if ((flags & MPPC_RESTART_HISTORY) != 0) { + memcpy(state->hist, state->hist + MPPE_HIST_LEN, MPPE_HIST_LEN); + state->histptr = MPPE_HIST_LEN; + } + + hist = state->hist + state->histptr; + olen = len = i = 0; + l = 8; + bits = *srcCnt * 8; + while (bits >= 8) { + val = getbyte(isrc, i++, l); + if (val < 0x80) { /* literal byte < 0x80 */ + if (state->histptr < 2*MPPE_HIST_LEN) { + /* Copy uncompressed byte to the history. */ + (state->hist)[(state->histptr)++] = (uint8_t) val; + } else { + /* Buffer overflow; drop packet. */ + rtn &= ~MPPC_OK; + return rtn; + } + olen++; + bits -= 8; + continue; + } + + sig = val & 0xc0; + if (sig == 0x80) { /* literal byte >= 0x80 */ + if (state->histptr < 2*MPPE_HIST_LEN) { + /* Copy uncompressed byte to the history. */ + (state->hist)[(state->histptr)++] = + (uint8_t) (0x80|((val&0x3f)<<1)|getbits(isrc, 1 , &i ,&l)); + } else { + /* buffer overflow; drop packet */ + rtn &= ~MPPC_OK; + return (rtn); + } + olen++; + bits -= 9; + continue; + } + + /* Not a literal byte so it must be an (offset,length) pair */ + /* decode offset */ + sig = val & 0xf0; + if (sig == 0xf0) { /* 10-bit offset; 0 <= offset < 64 */ + off = (((val&0x0f)<<2)|getbits(isrc, 2 , &i ,&l)); + bits -= 10; + } else { + if (sig == 0xe0) { /* 12-bit offset; 64 <= offset < 320 */ + off = ((((val&0x0f)<<4)|getbits(isrc, 4 , &i ,&l))+64); + bits -= 12; + } else { + if ((sig&0xe0) == 0xc0) {/* 16-bit offset; 320 <= offset < 8192 */ + off = ((((val&0x1f)<<8)|getbyte(isrc, i++, l))+320); + bits -= 16; + if (off > MPPE_HIST_LEN - 1) { + rtn &= ~MPPC_OK; + return (rtn); + } + } else { /* NOTREACHED */ + rtn &= ~MPPC_OK; + return (rtn); + } + } + } + /* Decode length of match. */ + val = getbyte(isrc, i, l); + if ((val & 0x80) == 0x00) { /* len = 3 */ + len = 3; + bits--; + getbits(isrc, 1 , &i ,&l); + } else if ((val & 0xc0) == 0x80) { /* 4 <= len < 8 */ + len = 0x04 | ((val>>4) & 0x03); + bits -= 4; + getbits(isrc, 4 , &i ,&l); + } else if ((val & 0xe0) == 0xc0) { /* 8 <= len < 16 */ + len = 0x08 | ((val>>2) & 0x07); + bits -= 6; + getbits(isrc, 6 , &i ,&l); + } else if ((val & 0xf0) == 0xe0) { /* 16 <= len < 32 */ + len = 0x10 | (val & 0x0f); + bits -= 8; + i++; + } else { + bits -= 8; + val = (val << 8) | getbyte(isrc, ++i, l); + if ((val & 0xf800) == 0xf000) { /* 32 <= len < 64 */ + len = 0x0020 | ((val >> 6) & 0x001f); + bits -= 2; + getbits(isrc, 2 , &i ,&l); + } else if ((val & 0xfc00) == 0xf800) { /* 64 <= len < 128 */ + len = 0x0040 | ((val >> 4) & 0x003f); + bits -= 4; + getbits(isrc, 4 , &i ,&l); + } else if ((val & 0xfe00) == 0xfc00) { /* 128 <= len < 256 */ + len = 0x0080 | ((val >> 2) & 0x007f); + bits -= 6; + getbits(isrc, 6 , &i ,&l); + } else if ((val & 0xff00) == 0xfe00) { /* 256 <= len < 512 */ + len = 0x0100 | (val & 0x00ff); + bits -= 8; + i++; + } else { + bits -= 8; + val = (val << 8) | getbyte(isrc, ++i, l); + if ((val & 0xff8000) == 0xff0000) { /* 512 <= len < 1024 */ + len = 0x000200 | ((val >> 6) & 0x0001ff); + bits -= 2; + getbits(isrc, 2 , &i ,&l); + } else if ((val & 0xffc000) == 0xff8000) {/* 1024 <= len < 2048 */ + len = 0x000400 | ((val >> 4) & 0x0003ff); + bits -= 4; + getbits(isrc, 4 , &i ,&l); + } else if ((val & 0xffe000) == 0xffc000) {/* 2048 <= len < 4096 */ + len = 0x000800 | ((val >> 2) & 0x0007ff); + bits -= 6; + getbits(isrc, 6 , &i ,&l); + } else if ((val & 0xfff000) == 0xffe000) {/* 4096 <= len < 8192 */ + len = 0x001000 | (val & 0x000fff); + bits -= 8; + i++; + } else { /* NOTREACHED */ + rtn &= ~MPPC_OK; + return (rtn); + } + } + } + + s = state->hist + state->histptr; + state->histptr += len; + olen += len; + if (state->histptr < 2*MPPE_HIST_LEN) { + /* Copy uncompressed bytes to the history. */ + + /* + * In some cases len may be greater than off. It means that memory + * areas pointed by s and s-off overlap. To decode that strange case + * data should be copied exactly by address increasing to make + * some data repeated. + */ + lamecopy(s, s - off, len); + } else { + /* Buffer overflow; drop packet. */ + rtn &= ~MPPC_OK; + return (rtn); + } + } + + /* Do PFC decompression. */ + len = olen; + if ((hist[0] & 0x01) != 0) { + (*dst)[0] = 0; + (*dst)++; + len++; + } + + if (len <= *dstCnt) { + /* Copy uncompressed packet to the output buffer. */ + memcpy(*dst, hist, olen); + } else { + /* Buffer overflow; drop packet. */ + rtn |= MPPC_DEST_EXHAUSTED; + } + + *src += *srcCnt; + *srcCnt = 0; + *dst += len; + *dstCnt -= len; + + return (rtn); +} diff --git a/sys/net/pfil.c b/sys/net/pfil.c index 90660e7..94a95ac 100644 --- a/sys/net/pfil.c +++ b/sys/net/pfil.c @@ -508,26 +508,24 @@ pfil_chain_remove(pfil_chain_t *chain, pfil_func_t func, void *arg) * Stuff that must be initialized for every instance (including the first of * course). */ -static int -vnet_pfil_init(const void *unused) +static void +vnet_pfil_init(const void *unused __unused) { LIST_INIT(&V_pfil_head_list); PFIL_LOCK_INIT_REAL(&V_pfil_lock, "shared"); - return (0); } /* * Called for the removal of each instance. */ -static int -vnet_pfil_uninit(const void *unused) +static void +vnet_pfil_uninit(const void *unused __unused) { KASSERT(LIST_EMPTY(&V_pfil_head_list), ("%s: pfil_head_list %p not empty", __func__, &V_pfil_head_list)); PFIL_LOCK_DESTROY_REAL(&V_pfil_lock); - return (0); } /* Define startup order. */ diff --git a/sys/net/route.c b/sys/net/route.c index 26e3b85..f2f8897 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -1141,6 +1141,15 @@ rt_ifdelroute(const struct rtentry *rt, void *arg) * to this interface...oh well... */ void +rt_flushifroutes_af(struct ifnet *ifp, int af) +{ + KASSERT((af >= 1 && af <= AF_MAX), ("%s: af %d not >= 1 and <= %d", + __func__, af, AF_MAX)); + + rt_foreach_fib_walk_del(af, rt_ifdelroute, ifp); +} + +void rt_flushifroutes(struct ifnet *ifp) { diff --git a/sys/net/route.h b/sys/net/route.h index 46e2ace..b6aa36d 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -468,6 +468,7 @@ typedef int rt_walktree_f_t(struct rtentry *, void *); typedef void rt_setwarg_t(struct rib_head *, uint32_t, int, void *); void rt_foreach_fib_walk(int af, rt_setwarg_t *, rt_walktree_f_t *, void *); void rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg); +void rt_flushifroutes_af(struct ifnet *, int); void rt_flushifroutes(struct ifnet *ifp); /* XXX MRT COMPAT VERSIONS THAT SET UNIVERSE to 0 */ diff --git a/sys/net80211/ieee80211_hostap.c b/sys/net80211/ieee80211_hostap.c index 5b7611e..643a610 100644 --- a/sys/net80211/ieee80211_hostap.c +++ b/sys/net80211/ieee80211_hostap.c @@ -412,8 +412,8 @@ hostap_deliver_data(struct ieee80211vap *vap, ieee80211_free_node(sta); } } - if (mcopy != NULL && ieee80211_vap_xmitpkt(vap, mcopy) == 0) - if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); + if (mcopy != NULL) + (void) ieee80211_vap_xmitpkt(vap, mcopy); } if (m != NULL) { /* diff --git a/sys/net80211/ieee80211_output.c b/sys/net80211/ieee80211_output.c index 1f49237..5a6cce6 100644 --- a/sys/net80211/ieee80211_output.c +++ b/sys/net80211/ieee80211_output.c @@ -608,6 +608,8 @@ ieee80211_output(struct ifnet *ifp, struct mbuf *m, if ((wh->i_fc[0] & IEEE80211_FC0_VERSION_MASK) != IEEE80211_FC0_VERSION_0) senderr(EIO); /* XXX */ + if (m->m_pkthdr.len < ieee80211_anyhdrsize(wh)) + senderr(EIO); /* XXX */ /* locate destination node */ switch (wh->i_fc[1] & IEEE80211_FC1_DIR_MASK) { @@ -617,8 +619,6 @@ ieee80211_output(struct ifnet *ifp, struct mbuf *m, break; case IEEE80211_FC1_DIR_TODS: case IEEE80211_FC1_DIR_DSTODS: - if (m->m_pkthdr.len < sizeof(struct ieee80211_frame)) - senderr(EIO); /* XXX */ ni = ieee80211_find_txnode(vap, wh->i_addr3); break; default: @@ -647,7 +647,6 @@ ieee80211_output(struct ifnet *ifp, struct mbuf *m, if (ieee80211_classify(ni, m)) senderr(EIO); /* XXX */ - if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); IEEE80211_NODE_STAT(ni, tx_data); if (IEEE80211_IS_MULTICAST(wh->i_addr1)) { IEEE80211_NODE_STAT(ni, tx_mcast); diff --git a/sys/netgraph/bluetooth/l2cap/ng_l2cap_misc.c b/sys/netgraph/bluetooth/l2cap/ng_l2cap_misc.c index f80a7b0..3ec2fa3 100644 --- a/sys/netgraph/bluetooth/l2cap/ng_l2cap_misc.c +++ b/sys/netgraph/bluetooth/l2cap/ng_l2cap_misc.c @@ -346,7 +346,8 @@ ng_l2cap_new_chan(ng_l2cap_p l2cap, ng_l2cap_con_p con, u_int16_t psm, int idtyp ch->scid = ng_l2cap_get_cid(l2cap, (con->linktype!= NG_HCI_LINK_ACL)); } - + + ch->idtype = idtype; if (ch->scid != NG_L2CAP_NULL_CID) { /* Initialize channel */ ch->psm = psm; diff --git a/sys/netgraph/bluetooth/l2cap/ng_l2cap_ulpi.c b/sys/netgraph/bluetooth/l2cap/ng_l2cap_ulpi.c index 4b88bc8..2dc370f 100644 --- a/sys/netgraph/bluetooth/l2cap/ng_l2cap_ulpi.c +++ b/sys/netgraph/bluetooth/l2cap/ng_l2cap_ulpi.c @@ -1389,7 +1389,13 @@ ng_l2cap_l2ca_discon_ind(ng_l2cap_chan_p ch) error = ENOMEM; else { ip = (ng_l2cap_l2ca_discon_ind_ip *)(msg->data); - ip->lcid = ch->scid; + ip->idtype = ch->idtype; + if(ch->idtype == NG_L2CAP_L2CA_IDTYPE_ATT|| + ch->idtype == NG_L2CAP_L2CA_IDTYPE_SMP) + ip->lcid = ch->con->con_handle; + else + ip->lcid = ch->scid; + NG_SEND_MSG_HOOK(error, l2cap->node, msg, l2cap->l2c, 0); } diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c index 7653263..d787219 100644 --- a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c @@ -1115,7 +1115,7 @@ ng_btsocket_l2cap_process_l2ca_discon_ind(struct ng_mesg *msg, /* Look for the socket with given channel ID */ pcb = ng_btsocket_l2cap_pcb_by_cid(&rt->src, ip->lcid, - NG_L2CAP_L2CA_IDTYPE_BREDR); + ip->idtype); if (pcb == NULL) { mtx_unlock(&ng_btsocket_l2cap_sockets_mtx); return (0); diff --git a/sys/netinet/igmp.c b/sys/netinet/igmp.c index 90c00c62..1fc1812 100644 --- a/sys/netinet/igmp.c +++ b/sys/netinet/igmp.c @@ -50,6 +50,8 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include "opt_ddb.h" + #include <sys/param.h> #include <sys/systm.h> #include <sys/module.h> @@ -64,6 +66,10 @@ __FBSDID("$FreeBSD$"); #include <sys/ktr.h> #include <sys/condvar.h> +#ifdef DDB +#include <ddb/ddb.h> +#endif + #include <net/if.h> #include <net/if_var.h> #include <net/netisr.h> @@ -3641,6 +3647,37 @@ vnet_igmp_uninit(const void *unused __unused) VNET_SYSUNINIT(vnet_igmp_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_igmp_uninit, NULL); +#ifdef DDB +DB_SHOW_COMMAND(igi_list, db_show_igi_list) +{ + struct igmp_ifsoftc *igi, *tigi; + LIST_HEAD(_igi_list, igmp_ifsoftc) *igi_head; + + if (!have_addr) { + db_printf("usage: show igi_list <addr>\n"); + return; + } + igi_head = (struct _igi_list *)addr; + + LIST_FOREACH_SAFE(igi, igi_head, igi_link, tigi) { + db_printf("igmp_ifsoftc %p:\n", igi); + db_printf(" ifp %p\n", igi->igi_ifp); + db_printf(" version %u\n", igi->igi_version); + db_printf(" v1_timer %u\n", igi->igi_v1_timer); + db_printf(" v2_timer %u\n", igi->igi_v2_timer); + db_printf(" v3_timer %u\n", igi->igi_v3_timer); + db_printf(" flags %#x\n", igi->igi_flags); + db_printf(" rv %u\n", igi->igi_rv); + db_printf(" qi %u\n", igi->igi_qi); + db_printf(" qri %u\n", igi->igi_qri); + db_printf(" uri %u\n", igi->igi_uri); + /* SLIST_HEAD(,in_multi) igi_relinmhead */ + /* struct mbufq igi_gq; */ + db_printf("\n"); + } +} +#endif + static int igmp_modevent(module_t mod, int type, void *unused __unused) { diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c index 3fc7c74..ea59c10 100644 --- a/sys/netinet/ip_fastfwd.c +++ b/sys/netinet/ip_fastfwd.c @@ -76,7 +76,6 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#include "opt_ipfw.h" #include "opt_ipstealth.h" #include <sys/param.h> diff --git a/sys/netinet/ip_id.c b/sys/netinet/ip_id.c index b0efbee..de4ae75 100644 --- a/sys/netinet/ip_id.c +++ b/sys/netinet/ip_id.c @@ -287,11 +287,11 @@ static void ipid_sysuninit(void) { - mtx_destroy(&V_ip_id_mtx); if (V_id_array != NULL) { free(V_id_array, M_IPID); free(V_id_bits, M_IPID); } counter_u64_free(V_ip_id); + mtx_destroy(&V_ip_id_mtx); } VNET_SYSUNINIT(ip_id, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipid_sysuninit, NULL); diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 36b51bd..dbf9ba6 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -33,7 +33,6 @@ __FBSDID("$FreeBSD$"); #include "opt_bootp.h" -#include "opt_ipfw.h" #include "opt_ipstealth.h" #include "opt_ipsec.h" #include "opt_route.h" diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 9fdd59d..b5d4951 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -33,7 +33,6 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" -#include "opt_ipfw.h" #include "opt_ipsec.h" #include "opt_mbuf_stress_test.h" #include "opt_mpath.h" diff --git a/sys/netinet/sctp_input.c b/sys/netinet/sctp_input.c index 1fff331..c07019c 100644 --- a/sys/netinet/sctp_input.c +++ b/sys/netinet/sctp_input.c @@ -1969,7 +1969,8 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset, asoc->strmout[i].abandoned_unsent[0] = 0; #endif stcb->asoc.strmout[i].stream_no = i; - stcb->asoc.strmout[i].next_sequence_send = 0; + stcb->asoc.strmout[i].next_mid_ordered = 0; + stcb->asoc.strmout[i].next_mid_unordered = 0; stcb->asoc.strmout[i].last_msg_incomplete = 0; } /* process the INIT-ACK info (my info) */ @@ -3521,11 +3522,13 @@ sctp_reset_out_streams(struct sctp_tcb *stcb, uint32_t number_entries, uint16_t /* no such stream */ continue; } - stcb->asoc.strmout[temp].next_sequence_send = 0; + stcb->asoc.strmout[temp].next_mid_ordered = 0; + stcb->asoc.strmout[temp].next_mid_unordered = 0; } } else { for (i = 0; i < stcb->asoc.streamoutcnt; i++) { - stcb->asoc.strmout[i].next_sequence_send = 0; + stcb->asoc.strmout[i].next_mid_ordered = 0; + stcb->asoc.strmout[i].next_mid_unordered = 0; } } sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_SEND, stcb, number_entries, (void *)list, SCTP_SO_NOT_LOCKED); @@ -6236,7 +6239,7 @@ sctp_input(struct mbuf **mp, int *offp, int proto SCTP_UNUSED) tag = htonl(sh->v_tag); flowid = tag ^ ntohs(sh->dest_port) ^ ntohs(sh->src_port); m->m_pkthdr.flowid = flowid; - M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); + M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE_HASH); } cpu_to_use = sctp_cpuarry[flowid % mp_ncpus]; sctp_queue_to_mcore(m, off, cpu_to_use); diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c index 6d71f7d..3544d69 100644 --- a/sys/netinet/sctp_output.c +++ b/sys/netinet/sctp_output.c @@ -3643,7 +3643,8 @@ sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *er for (i = 0; i < stcb->asoc.streamoutcnt; i++) { TAILQ_INIT(&stcb->asoc.strmout[i].outqueue); stcb->asoc.strmout[i].chunks_on_queues = 0; - stcb->asoc.strmout[i].next_sequence_send = 0; + stcb->asoc.strmout[i].next_mid_ordered = 0; + stcb->asoc.strmout[i].next_mid_unordered = 0; #if defined(SCTP_DETAILED_STR_STATS) for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) { stcb->asoc.strmout[i].abandoned_sent[j] = 0; @@ -6381,7 +6382,6 @@ sctp_msg_append(struct sctp_tcb *stcb, sp->ppid = srcv->sinfo_ppid; sp->context = srcv->sinfo_context; sp->fsn = 0; - sp->msg_id = atomic_fetchadd_int(&stcb->asoc.assoc_msg_id, 1); if (sp->sinfo_flags & SCTP_ADDR_OVER) { sp->net = net; atomic_add_int(&sp->net->ref_count, 1); @@ -7568,10 +7568,28 @@ dont_do_it: chk->asoc = &stcb->asoc; chk->pad_inplace = 0; chk->no_fr_allowed = 0; - chk->rec.data.stream_seq = strq->next_sequence_send; - if ((rcv_flags & SCTP_DATA_LAST_FRAG) && - !(rcv_flags & SCTP_DATA_UNORDERED)) { - strq->next_sequence_send++; + if (stcb->asoc.idata_supported == 0) { + if (rcv_flags & SCTP_DATA_UNORDERED) { + /* Just use 0. The receiver ignores the values. */ + chk->rec.data.stream_seq = 0; + } else { + chk->rec.data.stream_seq = strq->next_mid_ordered; + if (rcv_flags & SCTP_DATA_LAST_FRAG) { + strq->next_mid_ordered++; + } + } + } else { + if (rcv_flags & SCTP_DATA_UNORDERED) { + chk->rec.data.stream_seq = strq->next_mid_unordered; + if (rcv_flags & SCTP_DATA_LAST_FRAG) { + strq->next_mid_unordered++; + } + } else { + chk->rec.data.stream_seq = strq->next_mid_ordered; + if (rcv_flags & SCTP_DATA_LAST_FRAG) { + strq->next_mid_ordered++; + } + } } chk->rec.data.stream_number = sp->stream; chk->rec.data.payloadtype = sp->ppid; @@ -7630,7 +7648,7 @@ dont_do_it: dchkh->ch.chunk_flags = chk->rec.data.rcv_flags; dchkh->dp.tsn = htonl(chk->rec.data.TSN_seq); dchkh->dp.stream_id = htons((strq->stream_no & 0x0000ffff)); - dchkh->dp.stream_sequence = htons(chk->rec.data.stream_seq); + dchkh->dp.stream_sequence = htons((uint16_t) chk->rec.data.stream_seq); dchkh->dp.protocol_id = chk->rec.data.payloadtype; dchkh->ch.chunk_length = htons(chk->send_size); } else { @@ -7638,9 +7656,8 @@ dont_do_it: ndchkh->ch.chunk_flags = chk->rec.data.rcv_flags; ndchkh->dp.tsn = htonl(chk->rec.data.TSN_seq); ndchkh->dp.stream_id = htons(strq->stream_no); - /* WHAT DO WE DO HERE??? */ ndchkh->dp.reserved = htons(0); - ndchkh->dp.msg_id = htonl(sp->msg_id); + ndchkh->dp.msg_id = htonl(chk->rec.data.stream_seq); if (sp->fsn == 0) ndchkh->dp.ppid_fsn.protocol_id = chk->rec.data.payloadtype; else @@ -12235,7 +12252,8 @@ sctp_send_str_reset_req(struct sctp_tcb *stcb, for (i = 0; i < stcb->asoc.streamoutcnt; i++) { TAILQ_INIT(&stcb->asoc.strmout[i].outqueue); stcb->asoc.strmout[i].chunks_on_queues = oldstream[i].chunks_on_queues; - stcb->asoc.strmout[i].next_sequence_send = oldstream[i].next_sequence_send; + stcb->asoc.strmout[i].next_mid_ordered = oldstream[i].next_mid_ordered; + stcb->asoc.strmout[i].next_mid_unordered = oldstream[i].next_mid_unordered; stcb->asoc.strmout[i].last_msg_incomplete = oldstream[i].last_msg_incomplete; stcb->asoc.strmout[i].stream_no = i; stcb->asoc.strmout[i].state = oldstream[i].state; @@ -12267,7 +12285,8 @@ sctp_send_str_reset_req(struct sctp_tcb *stcb, stcb->asoc.strmout[i].abandoned_sent[0] = 0; stcb->asoc.strmout[i].abandoned_unsent[0] = 0; #endif - stcb->asoc.strmout[i].next_sequence_send = 0x0; + stcb->asoc.strmout[i].next_mid_ordered = 0; + stcb->asoc.strmout[i].next_mid_unordered = 0; stcb->asoc.strmout[i].stream_no = i; stcb->asoc.strmout[i].last_msg_incomplete = 0; stcb->asoc.ss_functions.sctp_ss_init_stream(&stcb->asoc.strmout[i], NULL); @@ -12425,7 +12444,6 @@ sctp_copy_it_in(struct sctp_tcb *stcb, sp->ppid = srcv->sinfo_ppid; sp->context = srcv->sinfo_context; sp->fsn = 0; - sp->msg_id = atomic_fetchadd_int(&stcb->asoc.assoc_msg_id, 1); (void)SCTP_GETTIME_TIMEVAL(&sp->ts); sp->stream = srcv->sinfo_stream; diff --git a/sys/netinet/sctp_pcb.c b/sys/netinet/sctp_pcb.c index 704e5d9..36e4c01 100644 --- a/sys/netinet/sctp_pcb.c +++ b/sys/netinet/sctp_pcb.c @@ -4070,7 +4070,7 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, net->flowid = stcb->asoc.my_vtag ^ ntohs(stcb->rport) ^ ntohs(stcb->sctp_ep->sctp_lport); - net->flowtype = M_HASHTYPE_OPAQUE; + net->flowtype = M_HASHTYPE_OPAQUE_HASH; if (netp) { *netp = net; } diff --git a/sys/netinet/sctp_structs.h b/sys/netinet/sctp_structs.h index c64d2b6..ad9d978 100644 --- a/sys/netinet/sctp_structs.h +++ b/sys/netinet/sctp_structs.h @@ -522,7 +522,6 @@ struct sctp_stream_queue_pending { TAILQ_ENTRY(sctp_stream_queue_pending) next; TAILQ_ENTRY(sctp_stream_queue_pending) ss_next; uint32_t fsn; - uint32_t msg_id; uint32_t length; uint32_t timetolive; uint32_t ppid; @@ -619,7 +618,12 @@ struct sctp_stream_out { uint32_t abandoned_unsent[1]; uint32_t abandoned_sent[1]; #endif - uint32_t next_sequence_send; /* next one I expect to send out */ + /* + * For associations using DATA chunks, the lower 16-bit of + * next_mid_ordered are used as the next SSN. + */ + uint32_t next_mid_ordered; + uint32_t next_mid_unordered; uint16_t stream_no; uint8_t last_msg_incomplete; uint8_t state; @@ -893,7 +897,6 @@ struct sctp_association { uint32_t stream_scheduling_module; uint32_t vrf_id; - uint32_t assoc_msg_id; uint32_t cookie_preserve_req; /* ASCONF next seq I am sending out, inits at init-tsn */ uint32_t asconf_seq_out; diff --git a/sys/netinet/sctputil.c b/sys/netinet/sctputil.c index cc18d9f..3f6ec55 100644 --- a/sys/netinet/sctputil.c +++ b/sys/netinet/sctputil.c @@ -1123,7 +1123,8 @@ sctp_init_asoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, * that were dropped must be notified to the upper layer as * failed to send. */ - asoc->strmout[i].next_sequence_send = 0x0; + asoc->strmout[i].next_mid_ordered = 0; + asoc->strmout[i].next_mid_unordered = 0; TAILQ_INIT(&asoc->strmout[i].outqueue); asoc->strmout[i].chunks_on_queues = 0; #if defined(SCTP_DETAILED_STR_STATS) @@ -4836,10 +4837,22 @@ sctp_release_pr_sctp_chunk(struct sctp_tcb *stcb, struct sctp_tmit_chunk *tp1, goto oh_well; } memset(chk, 0, sizeof(*chk)); - chk->rec.data.rcv_flags = SCTP_DATA_LAST_FRAG; + chk->rec.data.rcv_flags = 0; chk->sent = SCTP_FORWARD_TSN_SKIP; chk->asoc = &stcb->asoc; - chk->rec.data.stream_seq = strq->next_sequence_send; + if (stcb->asoc.idata_supported == 0) { + if (sp->sinfo_flags & SCTP_UNORDERED) { + chk->rec.data.stream_seq = 0; + } else { + chk->rec.data.stream_seq = strq->next_mid_ordered; + } + } else { + if (sp->sinfo_flags & SCTP_UNORDERED) { + chk->rec.data.stream_seq = strq->next_mid_unordered; + } else { + chk->rec.data.stream_seq = strq->next_mid_ordered; + } + } chk->rec.data.stream_number = sp->stream; chk->rec.data.payloadtype = sp->ppid; chk->rec.data.context = sp->context; @@ -4850,10 +4863,19 @@ sctp_release_pr_sctp_chunk(struct sctp_tcb *stcb, struct sctp_tmit_chunk *tp1, TAILQ_INSERT_TAIL(&stcb->asoc.sent_queue, chk, sctp_next); stcb->asoc.sent_queue_cnt++; stcb->asoc.pr_sctp_cnt++; + } + chk->rec.data.rcv_flags |= SCTP_DATA_LAST_FRAG; + if (stcb->asoc.idata_supported == 0) { + if ((sp->sinfo_flags & SCTP_UNORDERED) == 0) { + strq->next_mid_ordered++; + } } else { - chk->rec.data.rcv_flags |= SCTP_DATA_LAST_FRAG; + if (sp->sinfo_flags & SCTP_UNORDERED) { + strq->next_mid_unordered++; + } else { + strq->next_mid_ordered++; + } } - strq->next_sequence_send++; oh_well: if (sp->data) { /* diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index c106996..c72f01f 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -50,7 +50,6 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#include "opt_ipfw.h" /* for ipfw_fwd */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" diff --git a/sys/netinet/tcp_stacks/fastpath.c b/sys/netinet/tcp_stacks/fastpath.c index 7d573c5..7be9f96 100644 --- a/sys/netinet/tcp_stacks/fastpath.c +++ b/sys/netinet/tcp_stacks/fastpath.c @@ -54,7 +54,6 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#include "opt_ipfw.h" /* for ipfw_fwd */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index a1d9a43..173c44c 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -39,7 +39,6 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#include "opt_ipfw.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index d571dbd..e972ac7 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -360,7 +360,6 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, case SIOCSPFXFLUSH_IN6: case SIOCSRTRFLUSH_IN6: case SIOCGIFALIFETIME_IN6: - case SIOCSIFALIFETIME_IN6: case SIOCGIFSTAT_IN6: case SIOCGIFSTAT_ICMP6: sa6 = &ifr->ifr_addr; @@ -459,34 +458,6 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, goto out; } break; - - case SIOCSIFALIFETIME_IN6: - { - struct in6_addrlifetime *lt; - - if (td != NULL) { - error = priv_check(td, PRIV_NETINET_ALIFETIME6); - if (error) - goto out; - } - if (ia == NULL) { - error = EADDRNOTAVAIL; - goto out; - } - /* sanity for overflow - beware unsigned */ - lt = &ifr->ifr_ifru.ifru_lifetime; - if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME && - lt->ia6t_vltime + time_uptime < time_uptime) { - error = EINVAL; - goto out; - } - if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME && - lt->ia6t_pltime + time_uptime < time_uptime) { - error = EINVAL; - goto out; - } - break; - } } switch (cmd) { @@ -572,21 +543,6 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, } break; - case SIOCSIFALIFETIME_IN6: - ia->ia6_lifetime = ifr->ifr_ifru.ifru_lifetime; - /* for sanity */ - if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { - ia->ia6_lifetime.ia6t_expire = - time_uptime + ia->ia6_lifetime.ia6t_vltime; - } else - ia->ia6_lifetime.ia6t_expire = 0; - if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { - ia->ia6_lifetime.ia6t_preferred = - time_uptime + ia->ia6_lifetime.ia6t_pltime; - } else - ia->ia6_lifetime.ia6t_preferred = 0; - break; - case SIOCAIFADDR_IN6: { struct nd_prefixctl pr0; diff --git a/sys/netinet6/in6_var.h b/sys/netinet6/in6_var.h index 007fd66..77e5920 100644 --- a/sys/netinet6/in6_var.h +++ b/sys/netinet6/in6_var.h @@ -453,7 +453,6 @@ struct in6_rrenumreq { #define SIOCSRTRFLUSH_IN6 _IOWR('i', 80, struct in6_ifreq) #define SIOCGIFALIFETIME_IN6 _IOWR('i', 81, struct in6_ifreq) -#define SIOCSIFALIFETIME_IN6 _IOWR('i', 82, struct in6_ifreq) #define SIOCGIFSTAT_IN6 _IOWR('i', 83, struct in6_ifreq) #define SIOCGIFSTAT_ICMP6 _IOWR('i', 84, struct in6_ifreq) diff --git a/sys/netinet6/ip6_forward.c b/sys/netinet6/ip6_forward.c index 3b3058b..002c761 100644 --- a/sys/netinet6/ip6_forward.c +++ b/sys/netinet6/ip6_forward.c @@ -34,7 +34,6 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" -#include "opt_ipfw.h" #include "opt_ipsec.h" #include "opt_ipstealth.h" diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 1903857..9802c49 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -65,7 +65,6 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" -#include "opt_ipfw.h" #include "opt_ipsec.h" #include "opt_route.h" #include "opt_rss.h" diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index fde6710..c03bf5e 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -65,7 +65,6 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" -#include "opt_ipfw.h" #include "opt_ipsec.h" #include "opt_sctp.h" #include "opt_route.h" diff --git a/sys/netinet6/mld6.c b/sys/netinet6/mld6.c index a2f2d7e..dcae9f8 100644 --- a/sys/netinet6/mld6.c +++ b/sys/netinet6/mld6.c @@ -300,7 +300,8 @@ mld_restore_context(struct mbuf *m) #if defined(VIMAGE) && defined(INVARIANTS) KASSERT(curvnet == m->m_pkthdr.PH_loc.ptr, - ("%s: called when curvnet was not restored", __func__)); + ("%s: called when curvnet was not restored: cuvnet %p m ptr %p", + __func__, curvnet, m->m_pkthdr.PH_loc.ptr)); #endif return (m->m_pkthdr.flowid); } diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index 0585cd7..a75d588 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -896,9 +896,6 @@ nd6_timer(void *arg) struct nd_prefix *pr, *npr; struct in6_ifaddr *ia6, *nia6; - callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz, - nd6_timer, curvnet); - TAILQ_INIT(&drq); /* expire default router list */ @@ -1025,6 +1022,10 @@ nd6_timer(void *arg) prelist_remove(pr); } } + + callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz, + nd6_timer, curvnet); + CURVNET_RESTORE(); } diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c index 347eb56..b44fb9e 100644 --- a/sys/netinet6/udp6_usrreq.c +++ b/sys/netinet6/udp6_usrreq.c @@ -72,7 +72,6 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" -#include "opt_ipfw.h" #include "opt_ipsec.h" #include "opt_rss.h" diff --git a/sys/ofed/drivers/net/mlx4/en_rx.c b/sys/ofed/drivers/net/mlx4/en_rx.c index f4425af..2711b22 100644 --- a/sys/ofed/drivers/net/mlx4/en_rx.c +++ b/sys/ofed/drivers/net/mlx4/en_rx.c @@ -619,7 +619,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud /* forward Toeplitz compatible hash value */ mb->m_pkthdr.flowid = be32_to_cpu(cqe->immed_rss_invalid); - M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE); + M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE_HASH); mb->m_pkthdr.rcvif = dev; if (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK) { diff --git a/sys/riscv/include/riscvreg.h b/sys/riscv/include/riscvreg.h index 6984ab1..b18df17 100644 --- a/sys/riscv/include/riscvreg.h +++ b/sys/riscv/include/riscvreg.h @@ -124,7 +124,6 @@ #define XLEN 8 #define INSN_SIZE 4 -#define INSN_SIZE 4 #define RISCV_INSN_NOP 0x00000013 #define RISCV_INSN_BREAK 0x00100073 #define RISCV_INSN_RET 0x00008067 diff --git a/sys/rpc/clnt_bck.c b/sys/rpc/clnt_bck.c index f1c8dde..66f3c30 100644 --- a/sys/rpc/clnt_bck.c +++ b/sys/rpc/clnt_bck.c @@ -175,14 +175,9 @@ clnt_bck_create( return (cl); err: - if (cl) { - if (ct) { - mtx_destroy(&ct->ct_lock); - mem_free(ct, sizeof (struct ct_data)); - } - if (cl) - mem_free(cl, sizeof (CLIENT)); - } + mtx_destroy(&ct->ct_lock); + mem_free(ct, sizeof (struct ct_data)); + mem_free(cl, sizeof (CLIENT)); return (NULL); } diff --git a/sys/rpc/rpcb_clnt.c b/sys/rpc/rpcb_clnt.c index a68793e..f3781f6 100644 --- a/sys/rpc/rpcb_clnt.c +++ b/sys/rpc/rpcb_clnt.c @@ -1051,7 +1051,7 @@ done: * * Assuming that the address is all properly allocated */ -int +bool_t rpcb_getaddr(program, version, nconf, address, host) rpcprog_t program; rpcvers_t version; diff --git a/sys/security/audit/audit_bsm.c b/sys/security/audit/audit_bsm.c index 9f29ece..fed111a 100644 --- a/sys/security/audit/audit_bsm.c +++ b/sys/security/audit/audit_bsm.c @@ -952,6 +952,8 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau) case AUE_GETDIRENTRIESATTR: case AUE_LSEEK: case AUE_POLL: + case AUE_PREAD: + case AUE_PWRITE: case AUE_READ: case AUE_READV: case AUE_WRITE: diff --git a/sys/sys/intr.h b/sys/sys/intr.h index e513517..394aa5c 100644 --- a/sys/sys/intr.h +++ b/sys/sys/intr.h @@ -34,22 +34,6 @@ #define INTR_IRQ_INVALID 0xFFFFFFFF -#ifdef DEV_ACPI -struct intr_map_data_acpi { - struct intr_map_data hdr; - u_int irq; - enum intr_polarity pol; - enum intr_trigger trig; -}; -#endif - -struct intr_map_data_gpio { - struct intr_map_data hdr; - u_int gpio_pin_num; - u_int gpio_pin_flags; - u_int gpio_intr_mode; -}; - #ifdef notyet #define INTR_SOLO INTR_MD1 typedef int intr_irq_filter_t(void *arg, struct trapframe *tf); @@ -124,14 +108,6 @@ int intr_map_msi(device_t, device_t, intptr_t, int, uint64_t *, uint32_t *); int intr_alloc_msix(device_t, device_t, intptr_t, int *); int intr_release_msix(device_t, device_t, intptr_t, int); -#ifdef DEV_ACPI -u_int intr_acpi_map_irq(device_t, u_int, enum intr_polarity, - enum intr_trigger); -#endif - -u_int intr_gpio_map_irq(device_t dev, u_int pin_num, u_int pin_flags, - u_int intr_mode); - #ifdef SMP int intr_bind_irq(device_t, struct resource *, int); diff --git a/sys/sys/param.h b/sys/sys/param.h index d7fa2c7..b095aa7 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1100116 /* Master, propagated to newvers */ +#define __FreeBSD_version 1100117 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/sys/sys/procdesc.h b/sys/sys/procdesc.h index 1a3bc98..a9bad27 100644 --- a/sys/sys/procdesc.h +++ b/sys/sys/procdesc.h @@ -101,6 +101,9 @@ void procdesc_finit(struct procdesc *, struct file *); pid_t procdesc_pid(struct file *); void procdesc_reap(struct proc *); +int procdesc_falloc(struct thread *, struct file **, int *, int, + struct filecaps *); + #else /* !_KERNEL */ #include <sys/_types.h> @@ -127,5 +130,8 @@ __END_DECLS * Flags which can be passed to pdfork(2). */ #define PD_DAEMON 0x00000001 /* Don't exit when procdesc closes. */ +#define PD_CLOEXEC 0x00000002 /* Close file descriptor on exec. */ + +#define PD_ALLOWED_AT_FORK (PD_DAEMON | PD_CLOEXEC) #endif /* !_SYS_PROCDESC_H_ */ diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 6ab965b..37d8fd1 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -345,7 +345,6 @@ static struct sx sw_alloc_sx; #define NOBJLIST(handle) \ (&swap_pager_object_list[((int)(intptr_t)handle >> 4) & (NOBJLISTS-1)]) -static struct mtx sw_alloc_mtx; /* protect list manipulation */ static struct pagerlst swap_pager_object_list[NOBJLISTS]; static uma_zone_t swap_zone; @@ -486,8 +485,8 @@ swap_pager_init(void) for (i = 0; i < NOBJLISTS; ++i) TAILQ_INIT(&swap_pager_object_list[i]); - mtx_init(&sw_alloc_mtx, "swap_pager list", NULL, MTX_DEF); mtx_init(&sw_dev_mtx, "swapdev", NULL, MTX_DEF); + sx_init(&sw_alloc_sx, "swspsx"); sx_init(&swdev_syscall_lock, "swsysc"); /* @@ -582,28 +581,46 @@ swap_pager_swap_init(void) mtx_init(&swhash_mtx, "swap_pager swhash", NULL, MTX_DEF); } +static vm_object_t +swap_pager_alloc_init(void *handle, struct ucred *cred, vm_ooffset_t size, + vm_ooffset_t offset) +{ + vm_object_t object; + + if (cred != NULL) { + if (!swap_reserve_by_cred(size, cred)) + return (NULL); + crhold(cred); + } + object = vm_object_allocate(OBJT_SWAP, OFF_TO_IDX(offset + + PAGE_MASK + size)); + object->handle = handle; + if (cred != NULL) { + object->cred = cred; + object->charge = size; + } + object->un_pager.swp.swp_bcount = 0; + return (object); +} + /* * SWAP_PAGER_ALLOC() - allocate a new OBJT_SWAP VM object and instantiate * its metadata structures. * * This routine is called from the mmap and fork code to create a new - * OBJT_SWAP object. We do this by creating an OBJT_DEFAULT object - * and then converting it with swp_pager_meta_build(). - * - * This routine may block in vm_object_allocate() and create a named - * object lookup race, so we must interlock. + * OBJT_SWAP object. * - * MPSAFE + * This routine must ensure that no live duplicate is created for + * the named object request, which is protected against by + * holding the sw_alloc_sx lock in case handle != NULL. */ static vm_object_t swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t offset, struct ucred *cred) { vm_object_t object; - vm_pindex_t pindex; - pindex = OFF_TO_IDX(offset + PAGE_MASK + size); - if (handle) { + if (handle != NULL) { /* * Reference existing named region or allocate new one. There * should not be a race here against swp_pager_meta_build() @@ -613,38 +630,16 @@ swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, sx_xlock(&sw_alloc_sx); object = vm_pager_object_lookup(NOBJLIST(handle), handle); if (object == NULL) { - if (cred != NULL) { - if (!swap_reserve_by_cred(size, cred)) { - sx_xunlock(&sw_alloc_sx); - return (NULL); - } - crhold(cred); - } - object = vm_object_allocate(OBJT_DEFAULT, pindex); - VM_OBJECT_WLOCK(object); - object->handle = handle; - if (cred != NULL) { - object->cred = cred; - object->charge = size; + object = swap_pager_alloc_init(handle, cred, size, + offset); + if (object != NULL) { + TAILQ_INSERT_TAIL(NOBJLIST(object->handle), + object, pager_object_list); } - swp_pager_meta_build(object, 0, SWAPBLK_NONE); - VM_OBJECT_WUNLOCK(object); } sx_xunlock(&sw_alloc_sx); } else { - if (cred != NULL) { - if (!swap_reserve_by_cred(size, cred)) - return (NULL); - crhold(cred); - } - object = vm_object_allocate(OBJT_DEFAULT, pindex); - VM_OBJECT_WLOCK(object); - if (cred != NULL) { - object->cred = cred; - object->charge = size; - } - swp_pager_meta_build(object, 0, SWAPBLK_NONE); - VM_OBJECT_WUNLOCK(object); + object = swap_pager_alloc_init(handle, cred, size, offset); } return (object); } @@ -663,17 +658,22 @@ static void swap_pager_dealloc(vm_object_t object) { + VM_OBJECT_ASSERT_WLOCKED(object); + KASSERT((object->flags & OBJ_DEAD) != 0, ("dealloc of reachable obj")); + /* * Remove from list right away so lookups will fail if we block for * pageout completion. */ if (object->handle != NULL) { - mtx_lock(&sw_alloc_mtx); - TAILQ_REMOVE(NOBJLIST(object->handle), object, pager_object_list); - mtx_unlock(&sw_alloc_mtx); + VM_OBJECT_WUNLOCK(object); + sx_xlock(&sw_alloc_sx); + TAILQ_REMOVE(NOBJLIST(object->handle), object, + pager_object_list); + sx_xunlock(&sw_alloc_sx); + VM_OBJECT_WLOCK(object); } - VM_OBJECT_ASSERT_WLOCKED(object); vm_object_pip_wait(object, "swpdea"); /* @@ -900,16 +900,19 @@ swap_pager_copy(vm_object_t srcobject, vm_object_t dstobject, * If destroysource is set, we remove the source object from the * swap_pager internal queue now. */ - if (destroysource) { - if (srcobject->handle != NULL) { - mtx_lock(&sw_alloc_mtx); - TAILQ_REMOVE( - NOBJLIST(srcobject->handle), - srcobject, - pager_object_list - ); - mtx_unlock(&sw_alloc_mtx); - } + if (destroysource && srcobject->handle != NULL) { + vm_object_pip_add(srcobject, 1); + VM_OBJECT_WUNLOCK(srcobject); + vm_object_pip_add(dstobject, 1); + VM_OBJECT_WUNLOCK(dstobject); + sx_xlock(&sw_alloc_sx); + TAILQ_REMOVE(NOBJLIST(srcobject->handle), srcobject, + pager_object_list); + sx_xunlock(&sw_alloc_sx); + VM_OBJECT_WLOCK(dstobject); + vm_object_pip_wakeup(dstobject); + VM_OBJECT_WLOCK(srcobject); + vm_object_pip_wakeup(srcobject); } /* @@ -1745,16 +1748,7 @@ swp_pager_meta_build(vm_object_t object, vm_pindex_t pindex, daddr_t swapblk) if (object->type != OBJT_SWAP) { object->type = OBJT_SWAP; object->un_pager.swp.swp_bcount = 0; - - if (object->handle != NULL) { - mtx_lock(&sw_alloc_mtx); - TAILQ_INSERT_TAIL( - NOBJLIST(object->handle), - object, - pager_object_list - ); - mtx_unlock(&sw_alloc_mtx); - } + KASSERT(object->handle == NULL, ("default pager with handle")); } /* diff --git a/sys/xen/xen-os.h b/sys/xen/xen-os.h index 0aa3bb50..96e084f 100644 --- a/sys/xen/xen-os.h +++ b/sys/xen/xen-os.h @@ -118,7 +118,7 @@ xen_clear_bit(int bit, volatile long *addr) atomic_clear_long(&addr[bit / NBPL], 1UL << (bit % NBPL)); } -#undef NPBL +#undef NBPL /* * Functions to allocate/free unused memory in order |