diff options
Diffstat (limited to 'sys')
557 files changed, 33227 insertions, 11317 deletions
diff --git a/sys/Makefile b/sys/Makefile index 3b24ed9..969994f 100644 --- a/sys/Makefile +++ b/sys/Makefile @@ -12,7 +12,7 @@ CSCOPEDIRS= boot bsm cam cddl compat conf contrib crypto ddb dev fs gdb \ geom gnu isa kern libkern modules net net80211 netatalk \ netgraph netinet netinet6 netipsec netipx netnatm netncp \ netsmb nfs nfsclient nfsserver nlm opencrypto \ - pci rpc security sys ufs vm xdr ${CSCOPE_ARCHDIR} + pci rpc security sys ufs vm xdr xen ${CSCOPE_ARCHDIR} .if defined(ALL_ARCH) CSCOPE_ARCHDIR ?= amd64 arm i386 ia64 mips pc98 powerpc sparc64 x86 .else diff --git a/sys/amd64/acpica/acpi_wakeup.c b/sys/amd64/acpica/acpi_wakeup.c index 57341c9..29e66c5 100644 --- a/sys/amd64/acpica/acpi_wakeup.c +++ b/sys/amd64/acpica/acpi_wakeup.c @@ -78,7 +78,7 @@ static void acpi_stop_beep(void *); #ifdef SMP static int acpi_wakeup_ap(struct acpi_softc *, int); -static void acpi_wakeup_cpus(struct acpi_softc *, cpumask_t); +static void acpi_wakeup_cpus(struct acpi_softc *, const cpuset_t *); #endif #define WAKECODE_VADDR(sc) ((sc)->acpi_wakeaddr + (3 * PAGE_SIZE)) @@ -173,7 +173,7 @@ acpi_wakeup_ap(struct acpi_softc *sc, int cpu) #define BIOS_WARM (0x0a) static void -acpi_wakeup_cpus(struct acpi_softc *sc, cpumask_t wakeup_cpus) +acpi_wakeup_cpus(struct acpi_softc *sc, const cpuset_t *wakeup_cpus) { uint32_t mpbioswarmvec; int cpu; @@ -192,7 +192,7 @@ acpi_wakeup_cpus(struct acpi_softc *sc, cpumask_t wakeup_cpus) /* Wake up each AP. */ for (cpu = 1; cpu < mp_ncpus; cpu++) { - if ((wakeup_cpus & (1 << cpu)) == 0) + if (!CPU_ISSET(cpu, wakeup_cpus)) continue; if (acpi_wakeup_ap(sc, cpu) == 0) { /* restore the warmstart vector */ @@ -214,7 +214,7 @@ int acpi_sleep_machdep(struct acpi_softc *sc, int state) { #ifdef SMP - cpumask_t wakeup_cpus; + cpuset_t wakeup_cpus; #endif register_t cr3, rf; ACPI_STATUS status; @@ -244,10 +244,9 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state) if (savectx(susppcbs[0])) { #ifdef SMP - if (wakeup_cpus != 0 && suspend_cpus(wakeup_cpus) == 0) { - device_printf(sc->acpi_dev, - "Failed to suspend APs: CPU mask = 0x%jx\n", - (uintmax_t)(wakeup_cpus & ~stopped_cpus)); + if (!CPU_EMPTY(&wakeup_cpus) && + suspend_cpus(wakeup_cpus) == 0) { + device_printf(sc->acpi_dev, "Failed to suspend APs\n"); goto out; } #endif @@ -282,8 +281,8 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state) PCPU_SET(switchtime, 0); PCPU_SET(switchticks, ticks); #ifdef SMP - if (wakeup_cpus != 0) - acpi_wakeup_cpus(sc, wakeup_cpus); + if (!CPU_EMPTY(&wakeup_cpus)) + acpi_wakeup_cpus(sc, &wakeup_cpus); #endif acpi_resync_clock(sc); ret = 0; @@ -291,7 +290,7 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state) out: #ifdef SMP - if (wakeup_cpus != 0) + if (!CPU_EMPTY(&wakeup_cpus)) restart_cpus(wakeup_cpus); #endif diff --git a/sys/amd64/amd64/intr_machdep.c b/sys/amd64/amd64/intr_machdep.c index caab1d6..4557177 100644 --- a/sys/amd64/amd64/intr_machdep.c +++ b/sys/amd64/amd64/intr_machdep.c @@ -445,8 +445,7 @@ DB_SHOW_COMMAND(irqs, db_show_irqs) * allocate CPUs round-robin. */ -/* The BSP is always a valid target. */ -static cpumask_t intr_cpus = (1 << 0); +static cpuset_t intr_cpus; static int current_cpu; /* @@ -472,7 +471,7 @@ intr_next_cpu(void) current_cpu++; if (current_cpu > mp_maxid) current_cpu = 0; - } while (!(intr_cpus & (1 << current_cpu))); + } while (!CPU_ISSET(current_cpu, &intr_cpus)); mtx_unlock_spin(&icu_lock); return (apic_id); } @@ -503,7 +502,7 @@ intr_add_cpu(u_int cpu) printf("INTR: Adding local APIC %d as a target\n", cpu_apic_ids[cpu]); - intr_cpus |= (1 << cpu); + CPU_SET(cpu, &intr_cpus); } /* @@ -516,6 +515,9 @@ intr_shuffle_irqs(void *arg __unused) struct intsrc *isrc; int i; + /* The BSP is always a valid target. */ + CPU_SETOF(0, &intr_cpus); + /* Don't bother on UP. */ if (mp_ncpus == 1) return; diff --git a/sys/amd64/amd64/legacy.c b/sys/amd64/amd64/legacy.c index 100ce7c..06d7d17 100644 --- a/sys/amd64/amd64/legacy.c +++ b/sys/amd64/amd64/legacy.c @@ -81,6 +81,7 @@ static device_method_t legacy_methods[] = { DEVMETHOD(bus_read_ivar, legacy_read_ivar), DEVMETHOD(bus_write_ivar, legacy_write_ivar), DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource), + DEVMETHOD(bus_adjust_resource, bus_generic_adjust_resource), DEVMETHOD(bus_release_resource, bus_generic_release_resource), DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 94b4037..f90ad03 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" +#include "opt_mp_watchdog.h" #include "opt_perfmon.h" #include "opt_sched.h" #include "opt_kdtrace.h" @@ -116,6 +117,7 @@ __FBSDID("$FreeBSD$"); #include <x86/mca.h> #include <machine/md_var.h> #include <machine/metadata.h> +#include <machine/mp_watchdog.h> #include <machine/pc/bios.h> #include <machine/pcb.h> #include <machine/proc.h> @@ -734,9 +736,8 @@ cpu_idle(int busy) CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu); -#ifdef SMP - if (mp_grab_cpu_hlt()) - return; +#ifdef MP_WATCHDOG + ap_watchdog(PCPU_GET(cpuid)); #endif /* If we are busy - try to use fast methods. */ if (busy) { diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index fde9df2..dcf70d4 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -29,13 +29,13 @@ __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include "opt_kstack_pages.h" -#include "opt_mp_watchdog.h" #include "opt_sched.h" #include "opt_smp.h" #include <sys/param.h> #include <sys/systm.h> #include <sys/bus.h> +#include <sys/cpuset.h> #ifdef GPROF #include <sys/gmon.h> #endif @@ -63,7 +63,6 @@ __FBSDID("$FreeBSD$"); #include <machine/cpufunc.h> #include <x86/mca.h> #include <machine/md_var.h> -#include <machine/mp_watchdog.h> #include <machine/pcb.h> #include <machine/psl.h> #include <machine/smp.h> @@ -125,7 +124,7 @@ extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32); * Local data and functions. */ -static volatile cpumask_t ipi_nmi_pending; +static volatile cpuset_t ipi_nmi_pending; /* used to hold the AP's until we are ready to release them */ static struct mtx ap_boot_mtx; @@ -179,11 +178,8 @@ static int start_all_aps(void); static int start_ap(int apic_id); static void release_aps(void *dummy); -static int hlt_logical_cpus; static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */ -static cpumask_t hyperthreading_cpus_mask; static int hyperthreading_allowed = 1; -static struct sysctl_ctx_list logical_cpu_clist; static u_int bootMP_size; static void @@ -261,8 +257,11 @@ topo_probe_0x4(void) * logical processors that belong to the same core * as BSP thus deducing number of threads per core. */ - cpuid_count(0x04, 0, p); - max_cores = ((p[0] >> 26) & 0x3f) + 1; + if (cpu_high >= 0x4) { + cpuid_count(0x04, 0, p); + max_cores = ((p[0] >> 26) & 0x3f) + 1; + } else + max_cores = 1; core_id_bits = mask_width(max_logical/max_cores); if (core_id_bits < 0) return; @@ -354,7 +353,7 @@ topo_probe(void) if (cpu_topo_probed) return; - logical_cpus_mask = 0; + CPU_ZERO(&logical_cpus_mask); if (mp_ncpus <= 1) cpu_cores = cpu_logical = 1; else if (cpu_vendor_id == CPU_VENDOR_AMD) @@ -498,7 +497,7 @@ cpu_mp_probe(void) * Always record BSP in CPU map so that the mbuf init code works * correctly. */ - all_cpus = 1; + CPU_SETOF(0, &all_cpus); if (mp_ncpus == 0) { /* * No CPUs were found, so this must be a UP system. Setup @@ -625,6 +624,7 @@ cpu_mp_announce(void) void init_secondary(void) { + cpuset_t tcpuset, tallcpus; struct pcpu *pc; struct nmi_pcpu *np; u_int64_t msr, cr0; @@ -756,19 +756,17 @@ init_secondary(void) CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); + tcpuset = PCPU_GET(cpumask); /* Determine if we are a logical CPU. */ /* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */ if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0) - logical_cpus_mask |= PCPU_GET(cpumask); - - /* Determine if we are a hyperthread. */ - if (hyperthreading_cpus > 1 && - PCPU_GET(apic_id) % hyperthreading_cpus != 0) - hyperthreading_cpus_mask |= PCPU_GET(cpumask); + CPU_OR(&logical_cpus_mask, &tcpuset); /* Build our map of 'other' CPUs. */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + CPU_NAND(&tallcpus, &tcpuset); + PCPU_SET(other_cpus, tallcpus); if (bootverbose) lapic_dump("AP"); @@ -855,7 +853,7 @@ assign_cpu_ids(void) if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) { cpu_info[i].cpu_hyperthread = 1; -#if defined(SCHED_ULE) + /* * Don't use HT CPU if it has been disabled by a * tunable. @@ -864,7 +862,6 @@ assign_cpu_ids(void) cpu_info[i].cpu_disabled = 1; continue; } -#endif } /* Don't use this CPU if it has been disabled by a tunable. */ @@ -874,6 +871,11 @@ assign_cpu_ids(void) } } + if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) { + hyperthreading_cpus = 0; + cpu_logical = 1; + } + /* * Assign CPU IDs to local APIC IDs and disable any CPUs * beyond MAXCPU. CPU 0 is always assigned to the BSP. @@ -911,6 +913,7 @@ assign_cpu_ids(void) static int start_all_aps(void) { + cpuset_t tallcpus, tcpuset; vm_offset_t va = boot_address + KERNBASE; u_int64_t *pt4, *pt3, *pt2; u_int32_t mpbioswarmvec; @@ -998,11 +1001,14 @@ start_all_aps(void) panic("AP #%d (PHY# %d) failed!", cpu, apic_id); } - all_cpus |= (1 << cpu); /* record AP in CPU map */ + CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } /* build our map of 'other' CPUs */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + tcpuset = PCPU_GET(cpumask); + CPU_NAND(&tallcpus, &tcpuset); + PCPU_SET(other_cpus, tallcpus); /* restore the warmstart vector */ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; @@ -1131,6 +1137,30 @@ SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, #endif /* COUNT_XINVLTLB_HITS */ /* + * Send an IPI to specified CPU handling the bitmap logic. + */ +static void +ipi_send_cpu(int cpu, u_int ipi) +{ + u_int bitmap, old_pending, new_pending; + + KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); + + if (IPI_IS_BITMAPED(ipi)) { + bitmap = 1 << ipi; + ipi = IPI_BITMAP_VECTOR; + do { + old_pending = cpu_ipi_pending[cpu]; + new_pending = old_pending | bitmap; + } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], + old_pending, new_pending)); + if (old_pending) + return; + } + lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); +} + +/* * Flush the TLB on all other CPU's */ static void @@ -1154,28 +1184,19 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) } static void -smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) +smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) { - int ncpu, othercpus; + int cpu, ncpu, othercpus; othercpus = mp_ncpus - 1; - if (mask == (cpumask_t)-1) { - ncpu = othercpus; - if (ncpu < 1) + if (CPU_ISFULLSET(&mask)) { + if (othercpus < 1) return; } else { - mask &= ~PCPU_GET(cpumask); - if (mask == 0) - return; - ncpu = bitcount32(mask); - if (ncpu > othercpus) { - /* XXX this should be a panic offence */ - printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", - ncpu, othercpus); - ncpu = othercpus; - } - /* XXX should be a panic, implied by mask == 0 above */ - if (ncpu < 1) + sched_pin(); + CPU_NAND(&mask, PCPU_PTR(cpumask)); + sched_unpin(); + if (CPU_EMPTY(&mask)) return; } if (!(read_rflags() & PSL_I)) @@ -1184,39 +1205,25 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o smp_tlb_addr1 = addr1; smp_tlb_addr2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); - if (mask == (cpumask_t)-1) + if (CPU_ISFULLSET(&mask)) { + ncpu = othercpus; ipi_all_but_self(vector); - else - ipi_selected(mask, vector); + } else { + ncpu = 0; + while ((cpu = cpusetobj_ffs(&mask)) != 0) { + cpu--; + CPU_CLR(cpu, &mask); + CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, + cpu, vector); + ipi_send_cpu(cpu, vector); + ncpu++; + } + } while (smp_tlb_wait < ncpu) ia32_pause(); mtx_unlock_spin(&smp_ipi_mtx); } -/* - * Send an IPI to specified CPU handling the bitmap logic. - */ -static void -ipi_send_cpu(int cpu, u_int ipi) -{ - u_int bitmap, old_pending, new_pending; - - KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); - - if (IPI_IS_BITMAPED(ipi)) { - bitmap = 1 << ipi; - ipi = IPI_BITMAP_VECTOR; - do { - old_pending = cpu_ipi_pending[cpu]; - new_pending = old_pending | bitmap; - } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], - old_pending, new_pending)); - if (old_pending) - return; - } - lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); -} - void smp_cache_flush(void) { @@ -1263,7 +1270,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) } void -smp_masked_invltlb(cpumask_t mask) +smp_masked_invltlb(cpuset_t mask) { if (smp_started) { @@ -1275,7 +1282,7 @@ smp_masked_invltlb(cpumask_t mask) } void -smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) +smp_masked_invlpg(cpuset_t mask, vm_offset_t addr) { if (smp_started) { @@ -1287,7 +1294,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) } void -smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2) +smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { @@ -1340,7 +1347,7 @@ ipi_bitmap_handler(struct trapframe frame) * send an IPI to a set of cpus. */ void -ipi_selected(cpumask_t cpus, u_int ipi) +ipi_selected(cpuset_t cpus, u_int ipi) { int cpu; @@ -1350,12 +1357,12 @@ ipi_selected(cpumask_t cpus, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, cpus); + CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); - CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); - while ((cpu = ffs(cpus)) != 0) { + while ((cpu = cpusetobj_ffs(&cpus)) != 0) { cpu--; - cpus &= ~(1 << cpu); + CPU_CLR(cpu, &cpus); + CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); } } @@ -1373,7 +1380,7 @@ ipi_cpu(int cpu, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, 1 << cpu); + CPU_SET_ATOMIC(cpu, &ipi_nmi_pending); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); @@ -1386,8 +1393,10 @@ void ipi_all_but_self(u_int ipi) { + sched_pin(); if (IPI_IS_BITMAPED(ipi)) { ipi_selected(PCPU_GET(other_cpus), ipi); + sched_unpin(); return; } @@ -1397,7 +1406,8 @@ ipi_all_but_self(u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus)); + CPU_OR_ATOMIC(&ipi_nmi_pending, PCPU_PTR(other_cpus)); + sched_unpin(); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); @@ -1406,7 +1416,7 @@ ipi_all_but_self(u_int ipi) int ipi_nmi_handler() { - cpumask_t cpumask; + cpuset_t cpumask; /* * As long as there is not a simple way to know about a NMI's @@ -1414,11 +1424,13 @@ ipi_nmi_handler() * the global pending bitword an IPI_STOP_HARD has been issued * and should be handled. */ + sched_pin(); cpumask = PCPU_GET(cpumask); - if ((ipi_nmi_pending & cpumask) == 0) + sched_unpin(); + if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask)) return (1); - atomic_clear_int(&ipi_nmi_pending, cpumask); + CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask); cpustop_handler(); return (0); } @@ -1430,23 +1442,25 @@ ipi_nmi_handler() void cpustop_handler(void) { - cpumask_t cpumask; + cpuset_t cpumask; u_int cpu; + sched_pin(); cpu = PCPU_GET(cpuid); cpumask = PCPU_GET(cpumask); + sched_unpin(); savectx(&stoppcbs[cpu]); /* Indicate that we are stopped */ - atomic_set_int(&stopped_cpus, cpumask); + CPU_OR_ATOMIC(&stopped_cpus, &cpumask); /* Wait for restart */ - while (!(started_cpus & cpumask)) + while (!CPU_OVERLAP(&started_cpus, &cpumask)) ia32_pause(); - atomic_clear_int(&started_cpus, cpumask); - atomic_clear_int(&stopped_cpus, cpumask); + CPU_NAND_ATOMIC(&started_cpus, &cpumask); + CPU_NAND_ATOMIC(&stopped_cpus, &cpumask); if (cpu == 0 && cpustop_restartfunc != NULL) { cpustop_restartfunc(); @@ -1461,7 +1475,7 @@ cpustop_handler(void) void cpususpend_handler(void) { - cpumask_t cpumask; + cpuset_t cpumask; register_t cr3, rf; u_int cpu; @@ -1473,7 +1487,7 @@ cpususpend_handler(void) if (savectx(susppcbs[cpu])) { wbinvd(); - atomic_set_int(&stopped_cpus, cpumask); + CPU_OR_ATOMIC(&stopped_cpus, &cpumask); } else { pmap_init_pat(); PCPU_SET(switchtime, 0); @@ -1481,11 +1495,11 @@ cpususpend_handler(void) } /* Wait for resume */ - while (!(started_cpus & cpumask)) + while (!CPU_OVERLAP(&started_cpus, &cpumask)) ia32_pause(); - atomic_clear_int(&started_cpus, cpumask); - atomic_clear_int(&stopped_cpus, cpumask); + CPU_NAND_ATOMIC(&started_cpus, &cpumask); + CPU_NAND_ATOMIC(&stopped_cpus, &cpumask); /* Restore CR3 and enable interrupts */ load_cr3(cr3); @@ -1510,158 +1524,6 @@ release_aps(void *dummy __unused) } SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); -static int -sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS) -{ - cpumask_t mask; - int error; - - mask = hlt_cpus_mask; - error = sysctl_handle_int(oidp, &mask, 0, req); - if (error || !req->newptr) - return (error); - - if (logical_cpus_mask != 0 && - (mask & logical_cpus_mask) == logical_cpus_mask) - hlt_logical_cpus = 1; - else - hlt_logical_cpus = 0; - - if (! hyperthreading_allowed) - mask |= hyperthreading_cpus_mask; - - if ((mask & all_cpus) == all_cpus) - mask &= ~(1<<0); - hlt_cpus_mask = mask; - return (error); -} -SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW, - 0, 0, sysctl_hlt_cpus, "IU", - "Bitmap of CPUs to halt. 101 (binary) will halt CPUs 0 and 2."); - -static int -sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS) -{ - int disable, error; - - disable = hlt_logical_cpus; - error = sysctl_handle_int(oidp, &disable, 0, req); - if (error || !req->newptr) - return (error); - - if (disable) - hlt_cpus_mask |= logical_cpus_mask; - else - hlt_cpus_mask &= ~logical_cpus_mask; - - if (! hyperthreading_allowed) - hlt_cpus_mask |= hyperthreading_cpus_mask; - - if ((hlt_cpus_mask & all_cpus) == all_cpus) - hlt_cpus_mask &= ~(1<<0); - - hlt_logical_cpus = disable; - return (error); -} - -static int -sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS) -{ - int allowed, error; - - allowed = hyperthreading_allowed; - error = sysctl_handle_int(oidp, &allowed, 0, req); - if (error || !req->newptr) - return (error); - -#ifdef SCHED_ULE - /* - * SCHED_ULE doesn't allow enabling/disabling HT cores at - * run-time. - */ - if (allowed != hyperthreading_allowed) - return (ENOTSUP); - return (error); -#endif - - if (allowed) - hlt_cpus_mask &= ~hyperthreading_cpus_mask; - else - hlt_cpus_mask |= hyperthreading_cpus_mask; - - if (logical_cpus_mask != 0 && - (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask) - hlt_logical_cpus = 1; - else - hlt_logical_cpus = 0; - - if ((hlt_cpus_mask & all_cpus) == all_cpus) - hlt_cpus_mask &= ~(1<<0); - - hyperthreading_allowed = allowed; - return (error); -} - -static void -cpu_hlt_setup(void *dummy __unused) -{ - - if (logical_cpus_mask != 0) { - TUNABLE_INT_FETCH("machdep.hlt_logical_cpus", - &hlt_logical_cpus); - sysctl_ctx_init(&logical_cpu_clist); - SYSCTL_ADD_PROC(&logical_cpu_clist, - SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, - "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0, - sysctl_hlt_logical_cpus, "IU", ""); - SYSCTL_ADD_UINT(&logical_cpu_clist, - SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, - "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD, - &logical_cpus_mask, 0, ""); - - if (hlt_logical_cpus) - hlt_cpus_mask |= logical_cpus_mask; - - /* - * If necessary for security purposes, force - * hyperthreading off, regardless of the value - * of hlt_logical_cpus. - */ - if (hyperthreading_cpus_mask) { - SYSCTL_ADD_PROC(&logical_cpu_clist, - SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, - "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW, - 0, 0, sysctl_hyperthreading_allowed, "IU", ""); - if (! hyperthreading_allowed) - hlt_cpus_mask |= hyperthreading_cpus_mask; - } - } -} -SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL); - -int -mp_grab_cpu_hlt(void) -{ - cpumask_t mask; -#ifdef MP_WATCHDOG - u_int cpuid; -#endif - int retval; - - mask = PCPU_GET(cpumask); -#ifdef MP_WATCHDOG - cpuid = PCPU_GET(cpuid); - ap_watchdog(cpuid); -#endif - - retval = 0; - while (mask & hlt_cpus_mask) { - retval = 1; - __asm __volatile("sti; hlt" : : : "memory"); - } - return (retval); -} - #ifdef COUNT_IPIS /* * Setup interrupt counters for IPI handlers. diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index c9ff9bc..025ca5f 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -123,6 +123,8 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #ifdef SMP #include <sys/smp.h> +#else +#include <sys/cpuset.h> #endif #include <vm/vm.h> @@ -581,7 +583,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys); kernel_pmap->pm_root = NULL; - kernel_pmap->pm_active = -1; /* don't allow deactivation */ + CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); /* @@ -923,19 +925,20 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde) void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invlpg(va); smp_invlpg(va); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invlpg(va); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg(pmap->pm_active & other_cpus, va); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg(other_cpus, va); } sched_unpin(); } @@ -943,23 +946,23 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; vm_offset_t addr; sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); smp_invlpg_range(sva, eva); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg_range(pmap->pm_active & other_cpus, - sva, eva); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg_range(other_cpus, sva, eva); } sched_unpin(); } @@ -967,19 +970,20 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) void pmap_invalidate_all(pmap_t pmap) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invltlb(); smp_invltlb(); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invltlb(); - if (pmap->pm_active & other_cpus) - smp_masked_invltlb(pmap->pm_active & other_cpus); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invltlb(other_cpus); } sched_unpin(); } @@ -995,8 +999,8 @@ pmap_invalidate_cache(void) } struct pde_action { - cpumask_t store; /* processor that updates the PDE */ - cpumask_t invalidate; /* processors that invalidate their TLB */ + cpuset_t store; /* processor that updates the PDE */ + cpuset_t invalidate; /* processors that invalidate their TLB */ vm_offset_t va; pd_entry_t *pde; pd_entry_t newpde; @@ -1007,8 +1011,12 @@ pmap_update_pde_action(void *arg) { struct pde_action *act = arg; - if (act->store == PCPU_GET(cpumask)) + sched_pin(); + if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) { + sched_unpin(); pde_store(act->pde, act->newpde); + } else + sched_unpin(); } static void @@ -1016,8 +1024,12 @@ pmap_update_pde_teardown(void *arg) { struct pde_action *act = arg; - if ((act->invalidate & PCPU_GET(cpumask)) != 0) + sched_pin(); + if (CPU_OVERLAP(&act->invalidate, PCPU_PTR(cpumask))) { + sched_unpin(); pmap_update_pde_invalidate(act->va, act->newpde); + } else + sched_unpin(); } /* @@ -1032,26 +1044,28 @@ static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { struct pde_action act; - cpumask_t active, cpumask; + cpuset_t active, cpumask, other_cpus; sched_pin(); cpumask = PCPU_GET(cpumask); + other_cpus = PCPU_GET(other_cpus); if (pmap == kernel_pmap) active = all_cpus; else active = pmap->pm_active; - if ((active & PCPU_GET(other_cpus)) != 0) { + if (CPU_OVERLAP(&active, &other_cpus)) { act.store = cpumask; act.invalidate = active; act.va = va; act.pde = pde; act.newpde = newpde; - smp_rendezvous_cpus(cpumask | active, + CPU_OR(&cpumask, &active); + smp_rendezvous_cpus(cpumask, smp_no_rendevous_barrier, pmap_update_pde_action, pmap_update_pde_teardown, &act); } else { pde_store(pde, newpde); - if ((active & cpumask) != 0) + if (CPU_OVERLAP(&active, &cpumask)) pmap_update_pde_invalidate(va, newpde); } sched_unpin(); @@ -1065,7 +1079,7 @@ PMAP_INLINE void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invlpg(va); } @@ -1074,7 +1088,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t addr; - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); } @@ -1083,7 +1097,7 @@ PMAP_INLINE void pmap_invalidate_all(pmap_t pmap) { - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invltlb(); } @@ -1099,7 +1113,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { pde_store(pde, newpde); - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) pmap_update_pde_invalidate(va, newpde); } #endif /* !SMP */ @@ -1607,7 +1621,7 @@ pmap_pinit0(pmap_t pmap) PMAP_LOCK_INIT(pmap); pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); pmap->pm_root = NULL; - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1649,7 +1663,7 @@ pmap_pinit(pmap_t pmap) pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | PG_V | PG_RW | PG_A | PG_M; pmap->pm_root = NULL; - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -5087,11 +5101,11 @@ pmap_activate(struct thread *td) pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); #ifdef SMP - atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); - atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); + CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask)); #else - oldpmap->pm_active &= ~PCPU_GET(cpumask); - pmap->pm_active |= PCPU_GET(cpumask); + CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask)); #endif cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4); td->td_pcb->pcb_cr3 = cr3; diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 166bde1..2a9dd7a 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #include <sys/mutex.h> #include <sys/pioctl.h> #include <sys/proc.h> +#include <sys/sched.h> #include <sys/sf_buf.h> #include <sys/smp.h> #include <sys/sysctl.h> @@ -70,6 +71,7 @@ __FBSDID("$FreeBSD$"); #include <machine/cpu.h> #include <machine/md_var.h> #include <machine/pcb.h> +#include <machine/smp.h> #include <machine/specialreg.h> #include <machine/tss.h> @@ -512,13 +514,15 @@ cpu_set_user_tls(struct thread *td, void *tls_base) static void cpu_reset_proxy() { + cpuset_t tcrp; cpu_reset_proxy_active = 1; while (cpu_reset_proxy_active == 1) { ia32_pause(); ; /* Wait for other cpu to see that we've started */ } - stop_cpus((1<<cpu_reset_proxyid)); + CPU_SETOF(cpu_reset_proxyid, &tcrp); + stop_cpus(tcrp); printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid); DELAY(1000000); cpu_reset_real(); @@ -529,24 +533,28 @@ void cpu_reset() { #ifdef SMP - cpumask_t map; + cpuset_t map; u_int cnt; if (smp_active) { - map = PCPU_GET(other_cpus) & ~stopped_cpus; - if (map != 0) { + sched_pin(); + map = PCPU_GET(other_cpus); + CPU_NAND(&map, &stopped_cpus); + if (!CPU_EMPTY(&map)) { printf("cpu_reset: Stopping other CPUs\n"); stop_cpus(map); } if (PCPU_GET(cpuid) != 0) { cpu_reset_proxyid = PCPU_GET(cpuid); + sched_unpin(); cpustop_restartfunc = cpu_reset_proxy; cpu_reset_proxy_active = 0; printf("cpu_reset: Restarting BSP\n"); /* Restart CPU #0. */ - atomic_store_rel_int(&started_cpus, 1 << 0); + CPU_SETOF(0, &started_cpus); + wmb(); cnt = 0; while (cpu_reset_proxy_active == 0 && cnt < 10000000) { @@ -561,7 +569,8 @@ cpu_reset() while (1) ia32_pause(); /* NOTREACHED */ - } + } else + sched_unpin(); DELAY(1000000); } diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index 1117f15..7c42038 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -296,6 +296,7 @@ options USB_DEBUG # enable debug msgs device uhci # UHCI PCI->USB interface device ohci # OHCI PCI->USB interface device ehci # EHCI PCI->USB interface (USB 2.0) +device xhci # XHCI PCI->USB interface (USB 3.0) device usb # USB Bus (required) #device udbp # USB Double Bulk Pipe devices (needs netgraph) device uhid # "Human Interface Devices" @@ -343,3 +344,11 @@ device dcons_crom # Configuration ROM for dcons device bvmconsole # brain dead simple bvm console device bvmdebug # brain dead simple bvm gdb pipe device mptable + +# Sound support +device sound # Generic sound driver (required) +device snd_es137x # Ensoniq AudioPCI ES137x +device snd_hda # Intel High Definition Audio +device snd_ich # Intel, NVidia and other ICH AC'97 Audio +device snd_uaudio # USB Audio +device snd_via8233 # VIA VT8233x Audio diff --git a/sys/amd64/ia32/ia32_sigtramp.S b/sys/amd64/ia32/ia32_sigtramp.S index 7d64470..710834c 100644 --- a/sys/amd64/ia32/ia32_sigtramp.S +++ b/sys/amd64/ia32/ia32_sigtramp.S @@ -79,8 +79,20 @@ ia32_osigcode: jmp 1b +/* + * The lcall $7,$0 emulator cannot use the call gate that does an + * inter-privilege transition. The reason is that the call gate + * does not disable interrupts, and, before the swapgs is + * executed, we would have a window where the ring 0 code is + * executed with the wrong gsbase. + * + * Instead, reflect the lcall $7,$0 back to ring 3 trampoline + * which sets up the frame for int $0x80. + */ ALIGN_TEXT lcall_tramp: + cmpl $SYS_vfork,%eax + je 2f pushl %ebp movl %esp,%ebp pushl 0x24(%ebp) /* arg 6 */ @@ -91,8 +103,19 @@ lcall_tramp: pushl 0x10(%ebp) /* arg 1 */ pushl 0xc(%ebp) /* gap */ int $0x80 - leave + leavel +1: lretl +2: + /* + * vfork handling is special and relies on the libc stub saving + * the return ip in %ecx. If vfork failed, then there is no + * child which can corrupt the frame created by call gate. + */ + int $0x80 + jb 1b + addl $8,%esp + jmpl *%ecx #endif ALIGN_TEXT diff --git a/sys/amd64/include/_types.h b/sys/amd64/include/_types.h index 89d2e86..13dc3ea 100644 --- a/sys/amd64/include/_types.h +++ b/sys/amd64/include/_types.h @@ -61,7 +61,6 @@ typedef unsigned long __uint64_t; * Standard type definitions. */ typedef __int32_t __clock_t; /* clock()... */ -typedef unsigned int __cpumask_t; typedef __int64_t __critical_t; typedef double __double_t; typedef float __float_t; diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index 7a62851..1b8108a 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -152,6 +152,7 @@ #ifndef LOCORE #include <sys/queue.h> +#include <sys/_cpuset.h> #include <sys/_lock.h> #include <sys/_mutex.h> @@ -251,7 +252,7 @@ struct pmap { struct mtx pm_mtx; pml4_entry_t *pm_pml4; /* KVA of level 4 page table */ TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ - cpumask_t pm_active; /* active on cpus */ + cpuset_t pm_active; /* active on cpus */ /* spare u_int here due to padding */ struct pmap_statistics pm_stats; /* pmap statistics */ vm_page_t pm_root; /* spare page table pages */ diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index ec107f9..de686b7 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -63,17 +63,16 @@ void ipi_all_but_self(u_int ipi); void ipi_bitmap_handler(struct trapframe frame); void ipi_cpu(int cpu, u_int ipi); int ipi_nmi_handler(void); -void ipi_selected(cpumask_t cpus, u_int ipi); +void ipi_selected(cpuset_t cpus, u_int ipi); u_int mp_bootaddress(u_int); -int mp_grab_cpu_hlt(void); void smp_cache_flush(void); void smp_invlpg(vm_offset_t addr); -void smp_masked_invlpg(cpumask_t mask, vm_offset_t addr); +void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr); void smp_invlpg_range(vm_offset_t startva, vm_offset_t endva); -void smp_masked_invlpg_range(cpumask_t mask, vm_offset_t startva, +void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva, vm_offset_t endva); void smp_invltlb(void); -void smp_masked_invltlb(cpumask_t mask); +void smp_masked_invltlb(cpuset_t mask); #endif /* !LOCORE */ #endif /* SMP */ diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index 0f4c356..26646fb 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -111,7 +111,7 @@ struct vlapic *vm_lapic(struct vm *vm, int cpu); int vm_get_capability(struct vm *vm, int vcpu, int type, int *val); int vm_set_capability(struct vm *vm, int vcpu, int type, int val); void vm_activate_cpu(struct vm *vm, int vcpu); -cpumask_t vm_active_cpus(struct vm *vm); +cpuset_t vm_active_cpus(struct vm *vm); /* * Return 1 if device indicated by bus/slot/func is supposed to be a @@ -136,12 +136,6 @@ vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu) return (vm_get_run_state(vm, vcpu, hostcpu) == VCPU_RUNNING); } -static cpumask_t __inline -vcpu_mask(int vcpuid) -{ - return ((cpumask_t)1 << vcpuid); -} - #endif /* KERNEL */ #define VM_MAXCPU 8 /* maximum virtual cpus */ diff --git a/sys/amd64/vmm/amd/amdv.c b/sys/amd64/vmm/amd/amdv.c index 41e937a..6844cc0 100644 --- a/sys/amd64/vmm/amd/amdv.c +++ b/sys/amd64/vmm/amd/amdv.c @@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> #include <sys/errno.h> +#include <sys/smp.h> #include <machine/vmm.h> #include "io/iommu.h" diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c index 02db4bd..fcb36ad 100644 --- a/sys/amd64/vmm/io/ppt.c +++ b/sys/amd64/vmm/io/ppt.c @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include <sys/bus.h> #include <sys/pciio.h> #include <sys/rman.h> +#include <sys/smp.h> #include <dev/pci/pcivar.h> #include <dev/pci/pcireg.h> diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c index c0d82dd..0a14127 100644 --- a/sys/amd64/vmm/io/vlapic.c +++ b/sys/amd64/vmm/io/vlapic.c @@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$"); #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/systm.h> +#include <sys/smp.h> #include <machine/clock.h> #include <x86/apicreg.h> @@ -439,12 +440,11 @@ static int lapic_process_icr(struct vlapic *vlapic, uint64_t icrval) { int i; - cpumask_t dmask, thiscpumask; + cpuset_t dmask; uint32_t dest, vec, mode; - thiscpumask = vcpu_mask(vlapic->vcpuid); + CPU_ZERO(&dmask); - dmask = 0; dest = icrval >> 32; vec = icrval & APIC_VECTOR_MASK; mode = icrval & APIC_DELMODE_MASK; @@ -452,26 +452,27 @@ lapic_process_icr(struct vlapic *vlapic, uint64_t icrval) if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) { switch (icrval & APIC_DEST_MASK) { case APIC_DEST_DESTFLD: - dmask = vcpu_mask(dest); + CPU_SETOF(dest, &dmask); break; case APIC_DEST_SELF: - dmask = thiscpumask; + CPU_SETOF(vlapic->vcpuid, &dmask); break; case APIC_DEST_ALLISELF: dmask = vm_active_cpus(vlapic->vm); break; case APIC_DEST_ALLESELF: - dmask = vm_active_cpus(vlapic->vm) & ~thiscpumask; + dmask = vm_active_cpus(vlapic->vm); + CPU_CLR(vlapic->vcpuid, &dmask); break; } - for (i = 0; i < VM_MAXCPU; i++) { - if (dmask & vcpu_mask(i)) { - if (mode == APIC_DELMODE_FIXED) - lapic_set_intr(vlapic->vm, i, vec); - else - vm_inject_nmi(vlapic->vm, i); - } + while ((i = cpusetobj_ffs(&dmask)) != 0) { + i--; + CPU_CLR(i, &dmask); + if (mode == APIC_DELMODE_FIXED) + lapic_set_intr(vlapic->vm, i, vec); + else + vm_inject_nmi(vlapic->vm, i); } return (0); /* handled completely in the kernel */ diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index f984138..9d32ccc 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -97,11 +97,11 @@ struct vm { char name[VM_MAX_NAMELEN]; /* - * Mask of active vcpus. + * Set of active vcpus. * An active vcpu is one that has been started implicitly (BSP) or * explicitly (AP) by sending it a startup ipi. */ - cpumask_t active_cpus; + cpuset_t active_cpus; }; static struct vmm_ops *ops; @@ -720,10 +720,10 @@ vm_activate_cpu(struct vm *vm, int vcpuid) { if (vcpuid >= 0 && vcpuid < VM_MAXCPU) - vm->active_cpus |= vcpu_mask(vcpuid); + CPU_SET(vcpuid, &vm->active_cpus); } -cpumask_t +cpuset_t vm_active_cpus(struct vm *vm) { diff --git a/sys/amd64/vmm/vmm_lapic.c b/sys/amd64/vmm/vmm_lapic.c index 8704fcf..4aca087 100644 --- a/sys/amd64/vmm/vmm_lapic.c +++ b/sys/amd64/vmm/vmm_lapic.c @@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <sys/smp.h> #include <machine/vmm.h> #include "vmm_ipi.h" diff --git a/sys/amd64/vmm/vmm_msr.c b/sys/amd64/vmm/vmm_msr.c index 5317ca0..99ac293 100644 --- a/sys/amd64/vmm/vmm_msr.c +++ b/sys/amd64/vmm/vmm_msr.c @@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <sys/smp.h> #include <machine/specialreg.h> #include <x86/apicreg.h> diff --git a/sys/amd64/vmm/vmm_stat.c b/sys/amd64/vmm/vmm_stat.c index e6f5c48..ae60979 100644 --- a/sys/amd64/vmm/vmm_stat.c +++ b/sys/amd64/vmm/vmm_stat.c @@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$"); #include <sys/kernel.h> #include <sys/systm.h> #include <sys/malloc.h> +#include <sys/smp.h> #include <machine/vmm.h> #include "vmm_stat.h" diff --git a/sys/arm/arm/pmap.c b/sys/arm/arm/pmap.c index 087a744..cecf363 100644 --- a/sys/arm/arm/pmap.c +++ b/sys/arm/arm/pmap.c @@ -2395,7 +2395,7 @@ pmap_bootstrap(vm_offset_t firstaddr, vm_offset_t lastaddr, struct pv_addr *l1pt cpu_cpwait(); PMAP_LOCK_INIT(kernel_pmap); - kernel_pmap->pm_active = -1; + CPU_FILL(&kernel_pmap->pm_active); kernel_pmap->pm_domain = PMAP_DOMAIN_KERNEL; TAILQ_INIT(&kernel_pmap->pm_pvlist); @@ -3826,7 +3826,7 @@ pmap_pinit(pmap_t pmap) pmap_alloc_l1(pmap); bzero(pmap->pm_l2, sizeof(pmap->pm_l2)); - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvlist); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); diff --git a/sys/arm/include/_types.h b/sys/arm/include/_types.h index 48dd2a7..d8386f3 100644 --- a/sys/arm/include/_types.h +++ b/sys/arm/include/_types.h @@ -67,7 +67,6 @@ typedef unsigned long long __uint64_t; * Standard type definitions. */ typedef __uint32_t __clock_t; /* clock()... */ -typedef unsigned int __cpumask_t; typedef __int32_t __critical_t; typedef double __double_t; typedef double __float_t; diff --git a/sys/arm/include/pmap.h b/sys/arm/include/pmap.h index 701390a..3d63432 100644 --- a/sys/arm/include/pmap.h +++ b/sys/arm/include/pmap.h @@ -62,6 +62,7 @@ #ifndef LOCORE #include <sys/queue.h> +#include <sys/_cpuset.h> #include <sys/_lock.h> #include <sys/_mutex.h> @@ -134,7 +135,7 @@ struct pmap { struct l1_ttable *pm_l1; struct l2_dtable *pm_l2[L2_SIZE]; pd_entry_t *pm_pdir; /* KVA of page directory */ - cpumask_t pm_active; /* active on cpus */ + cpuset_t pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statictics */ TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ }; diff --git a/sys/boot/common/Makefile.inc b/sys/boot/common/Makefile.inc index 9893278..be6c3301 100644 --- a/sys/boot/common/Makefile.inc +++ b/sys/boot/common/Makefile.inc @@ -44,8 +44,15 @@ SRCS+= pnp.c # Forth interpreter .if defined(BOOT_FORTH) SRCS+= interp_forth.c +MAN+= ../forth/beastie.4th.8 +MAN+= ../forth/brand.4th.8 +MAN+= ../forth/check-password.4th.8 +MAN+= ../forth/color.4th.8 +MAN+= ../forth/delay.4th.8 MAN+= ../forth/loader.conf.5 MAN+= ../forth/loader.4th.8 +MAN+= ../forth/menu.4th.8 +MAN+= ../forth/version.4th.8 .endif .if defined(BOOT_PROMPT_123) diff --git a/sys/boot/common/load_elf_obj.c b/sys/boot/common/load_elf_obj.c index dcd71ef..54d9b10 100644 --- a/sys/boot/common/load_elf_obj.c +++ b/sys/boot/common/load_elf_obj.c @@ -196,7 +196,7 @@ static int __elfN(obj_loadimage)(struct preloaded_file *fp, elf_file_t ef, u_int64_t off) { Elf_Ehdr *hdr; - Elf_Shdr *shdr; + Elf_Shdr *shdr, *cshdr, *lshdr; vm_offset_t firstaddr, lastaddr; int i, nsym, res, ret, shdrbytes, symstrindex; @@ -294,12 +294,35 @@ __elfN(obj_loadimage)(struct preloaded_file *fp, elf_file_t ef, u_int64_t off) /* Clear the whole area, including bss regions. */ kern_bzero(firstaddr, lastaddr - firstaddr); - /* Now read it all in. */ - for (i = 0; i < hdr->e_shnum; i++) { - if (shdr[i].sh_addr == 0 || shdr[i].sh_type == SHT_NOBITS) - continue; - if (kern_pread(ef->fd, (vm_offset_t)shdr[i].sh_addr, - shdr[i].sh_size, (off_t)shdr[i].sh_offset) != 0) { + /* Figure section with the lowest file offset we haven't loaded yet. */ + for (cshdr = NULL; /* none */; /* none */) + { + /* + * Find next section to load. The complexity of this loop is + * O(n^2), but with the number of sections being typically + * small, we do not care. + */ + lshdr = cshdr; + + for (i = 0; i < hdr->e_shnum; i++) { + if (shdr[i].sh_addr == 0 || + shdr[i].sh_type == SHT_NOBITS) + continue; + /* Skip sections that were loaded already. */ + if (lshdr != NULL && + lshdr->sh_offset >= shdr[i].sh_offset) + continue; + /* Find section with smallest offset. */ + if (cshdr == lshdr || + cshdr->sh_offset > shdr[i].sh_offset) + cshdr = &shdr[i]; + } + + if (cshdr == lshdr) + break; + + if (kern_pread(ef->fd, (vm_offset_t)cshdr->sh_addr, + cshdr->sh_size, (off_t)cshdr->sh_offset) != 0) { printf("\nelf" __XSTRING(__ELF_WORD_SIZE) "_obj_loadimage: read failed\n"); goto out; diff --git a/sys/boot/forth/beastie.4th b/sys/boot/forth/beastie.4th index 1130ed0..75d6e02 100644 --- a/sys/boot/forth/beastie.4th +++ b/sys/boot/forth/beastie.4th @@ -1,7 +1,8 @@ \ Copyright (c) 2003 Scott Long <scottl@freebsd.org> \ Copyright (c) 2003 Aleksander Fafula <alex@fafula.com> +\ Copyright (c) 2006-2011 Devin Teske <devinteske@hotmail.com> \ All rights reserved. -\ +\ \ Redistribution and use in source and binary forms, with or without \ modification, are permitted provided that the following conditions \ are met: @@ -10,7 +11,7 @@ \ 2. Redistributions in binary form must reproduce the above copyright \ notice, this list of conditions and the following disclaimer in the \ documentation and/or other materials provided with the distribution. -\ +\ \ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND \ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE \ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -22,35 +23,24 @@ \ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY \ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF \ SUCH DAMAGE. -\ +\ \ $FreeBSD$ marker task-beastie.4th -include /boot/screen.4th -include /boot/frames.4th +include /boot/color.4th +include /boot/delay.4th -hide +variable logoX +variable logoY -variable menuidx -variable menubllt -variable menuX -variable menuY -variable promptwidth +\ Initialize logo placement to defaults +46 logoX ! +4 logoY ! -variable bootkey -variable bootacpikey -variable bootsafekey -variable bootverbosekey -variable bootsinglekey -variable escapekey -variable rebootkey +: beastie-logo ( x y -- ) \ color BSD mascot (19 rows x 34 columns) -46 constant dot - -\ The BSD Daemon. He is 19 rows high and 34 columns wide -: beastie-logo ( x y -- ) -2dup at-xy ." [1;31m, ," 1+ +2dup at-xy ." [31m, ," 1+ 2dup at-xy ." /( )`" 1+ 2dup at-xy ." \ \___ / |" 1+ 2dup at-xy ." /- [37m_[31m `-/ '" 1+ @@ -59,7 +49,7 @@ variable rebootkey 2dup at-xy ." [34mO O [37m) [31m/ |" 1+ 2dup at-xy ." [37m`-^--'[31m`< '" 1+ 2dup at-xy ." (_.) _ ) /" 1+ -2dup at-xy ." `.___/` / " 1+ +2dup at-xy ." `.___/` /" 1+ 2dup at-xy ." `-----' /" 1+ 2dup at-xy ." [33m<----.[31m __ / __ \" 1+ 2dup at-xy ." [33m<----|====[31mO)))[33m==[31m) \) /[33m====|" 1+ @@ -68,173 +58,201 @@ variable rebootkey 2dup at-xy ." \ / /\" 1+ 2dup at-xy ." [36m______[31m( (_ / \______/" 1+ 2dup at-xy ." [36m,' ,-----' |" 1+ -at-xy ." `--{__________) [0m" + at-xy ." `--{__________)[37m" + + \ Put the cursor back at the bottom + 0 25 at-xy ; -: beastiebw-logo ( x y -- ) - 2dup at-xy ." , ," 1+ - 2dup at-xy ." /( )`" 1+ - 2dup at-xy ." \ \___ / |" 1+ - 2dup at-xy ." /- _ `-/ '" 1+ - 2dup at-xy ." (/\/ \ \ /\" 1+ - 2dup at-xy ." / / | ` \" 1+ - 2dup at-xy ." O O ) / |" 1+ - 2dup at-xy ." `-^--'`< '" 1+ - 2dup at-xy ." (_.) _ ) /" 1+ - 2dup at-xy ." `.___/` /" 1+ - 2dup at-xy ." `-----' /" 1+ - 2dup at-xy ." <----. __ / __ \" 1+ - 2dup at-xy ." <----|====O)))==) \) /====" 1+ - 2dup at-xy ." <----' `--' `.__,' \" 1+ - 2dup at-xy ." | |" 1+ - 2dup at-xy ." \ / /\" 1+ - 2dup at-xy ." ______( (_ / \______/" 1+ - 2dup at-xy ." ,' ,-----' |" 1+ - at-xy ." `--{__________)" +: beastiebw-logo ( x y -- ) \ B/W BSD mascot (19 rows x 34 columns) + + 2dup at-xy ." , ," 1+ + 2dup at-xy ." /( )`" 1+ + 2dup at-xy ." \ \___ / |" 1+ + 2dup at-xy ." /- _ `-/ '" 1+ + 2dup at-xy ." (/\/ \ \ /\" 1+ + 2dup at-xy ." / / | ` \" 1+ + 2dup at-xy ." O O ) / |" 1+ + 2dup at-xy ." `-^--'`< '" 1+ + 2dup at-xy ." (_.) _ ) /" 1+ + 2dup at-xy ." `.___/` /" 1+ + 2dup at-xy ." `-----' /" 1+ + 2dup at-xy ." <----. __ / __ \" 1+ + 2dup at-xy ." <----|====O)))==) \) /====|" 1+ + 2dup at-xy ." <----' `--' `.__,' \" 1+ + 2dup at-xy ." | |" 1+ + 2dup at-xy ." \ / /\" 1+ + 2dup at-xy ." ______( (_ / \______/" 1+ + 2dup at-xy ." ,' ,-----' |" 1+ + at-xy ." `--{__________)" + + \ Put the cursor back at the bottom + 0 25 at-xy ; -: fbsdbw-logo ( x y -- ) - 2dup at-xy ." ______" 1+ - 2dup at-xy ." | ____| __ ___ ___ " 1+ - 2dup at-xy ." | |__ | '__/ _ \/ _ \" 1+ - 2dup at-xy ." | __|| | | __/ __/" 1+ - 2dup at-xy ." | | | | | | |" 1+ - 2dup at-xy ." |_| |_| \___|\___|" 1+ - 2dup at-xy ." ____ _____ _____" 1+ - 2dup at-xy ." | _ \ / ____| __ \" 1+ - 2dup at-xy ." | |_) | (___ | | | |" 1+ - 2dup at-xy ." | _ < \___ \| | | |" 1+ - 2dup at-xy ." | |_) |____) | |__| |" 1+ - 2dup at-xy ." | | | |" 1+ - at-xy ." |____/|_____/|_____/" +: fbsdbw-logo ( x y -- ) \ "FreeBSD" logo in B/W (13 rows x 21 columns) + + \ We used to use the beastie himself as our default... until the + \ eventual complaint derided his reign of the advanced boot-menu. + \ + \ This is the replacement of beastie to satiate the haters of our + \ beloved helper-daemon (ready to track down and spear bugs with + \ his trident and sporty sneakers; see above). + \ + \ Since we merely just changed the default and not the default- + \ location, below is an adjustment to the passed-in coordinates, + \ forever influenced by the proper location of beastie himself + \ kept as the default loader_logo_x/loader_logo_y values. + \ + 5 + swap 6 + swap + + 2dup at-xy ." ______" 1+ + 2dup at-xy ." | ____| __ ___ ___ " 1+ + 2dup at-xy ." | |__ | '__/ _ \/ _ \" 1+ + 2dup at-xy ." | __|| | | __/ __/" 1+ + 2dup at-xy ." | | | | | | |" 1+ + 2dup at-xy ." |_| |_| \___|\___|" 1+ + 2dup at-xy ." ____ _____ _____" 1+ + 2dup at-xy ." | _ \ / ____| __ \" 1+ + 2dup at-xy ." | |_) | (___ | | | |" 1+ + 2dup at-xy ." | _ < \___ \| | | |" 1+ + 2dup at-xy ." | |_) |____) | |__| |" 1+ + 2dup at-xy ." | | | |" 1+ + at-xy ." |____/|_____/|_____/" + + \ Put the cursor back at the bottom + 0 25 at-xy ; -: print-logo ( x y -- ) - s" loader_logo" getenv - dup -1 = if - drop - fbsdbw-logo - exit - then - 2dup s" fbsdbw" compare-insensitive 0= if - 2drop - fbsdbw-logo - exit - then - 2dup s" beastiebw" compare-insensitive 0= if - 2drop - beastiebw-logo - exit - then - 2dup s" beastie" compare-insensitive 0= if - 2drop - beastie-logo - exit - then - 2dup s" none" compare-insensitive 0= if - 2drop - \ no logo - exit - then - 2drop - fbsdbw-logo +: orb-logo ( x y -- ) \ color Orb mascot (15 rows x 30 columns) + + 3 + \ beastie adjustment (see `fbsdbw-logo' comments above) + + 2dup at-xy ." [31m``` [31;1m`[31m" 1+ + 2dup at-xy ." s` `.....---...[31;1m....--.``` -/[31m" 1+ + 2dup at-xy ." +o .--` [31;1m/y:` +.[31m" 1+ + 2dup at-xy ." yo`:. [31;1m:o `+-[31m" 1+ + 2dup at-xy ." y/ [31;1m-/` -o/[31m" 1+ + 2dup at-xy ." .- [31;1m::/sy+:.[31m" 1+ + 2dup at-xy ." / [31;1m`-- /[31m" 1+ + 2dup at-xy ." `: [31;1m:`[31m" 1+ + 2dup at-xy ." `: [31;1m:`[31m" 1+ + 2dup at-xy ." / [31;1m/[31m" 1+ + 2dup at-xy ." .- [31;1m-.[31m" 1+ + 2dup at-xy ." -- [31;1m-.[31m" 1+ + 2dup at-xy ." `:` [31;1m`:`" 1+ + 2dup at-xy ." [31;1m.-- `--." 1+ + at-xy ." .---.....----.[37m" + + \ Put the cursor back at the bottom + 0 25 at-xy ; -: acpipresent? ( -- flag ) - s" hint.acpi.0.rsdp" getenv - dup -1 = if - drop false exit - then - 2drop - true +: orbbw-logo ( x y -- ) \ B/W Orb mascot (15 rows x 32 columns) + + 3 + \ beastie adjustment (see `fbsdbw-logo' comments above) + + 2dup at-xy ." ``` `" 1+ + 2dup at-xy ." s` `.....---.......--.``` -/" 1+ + 2dup at-xy ." +o .--` /y:` +." 1+ + 2dup at-xy ." yo`:. :o `+-" 1+ + 2dup at-xy ." y/ -/` -o/" 1+ + 2dup at-xy ." .- ::/sy+:." 1+ + 2dup at-xy ." / `-- /" 1+ + 2dup at-xy ." `: :`" 1+ + 2dup at-xy ." `: :`" 1+ + 2dup at-xy ." / /" 1+ + 2dup at-xy ." .- -." 1+ + 2dup at-xy ." -- -." 1+ + 2dup at-xy ." `:` `:`" 1+ + 2dup at-xy ." .-- `--." 1+ + at-xy ." .---.....----." + + \ Put the cursor back at the bottom + 0 25 at-xy ; -: acpienabled? ( -- flag ) - s" hint.acpi.0.disabled" getenv - dup -1 <> if - s" 0" compare 0<> if - false exit - then +\ This function draws any number of beastie logos at (loader_logo_x, +\ loader_logo_y) if defined, else (46,4) (to the right of the menu). To choose +\ your beastie, set the variable `loader_logo' to the respective logo name. +\ +\ Currently available: +\ +\ NAME DESCRIPTION +\ beastie Color ``Helper Daemon'' mascot (19 rows x 34 columns) +\ beastiebw B/W ``Helper Daemon'' mascot (19 rows x 34 columns) +\ fbsdbw "FreeBSD" logo in B/W (13 rows x 21 columns) +\ orb Color ``Orb'' mascot (15 rows x 30 columns) +\ orbbw B/W ``Orb'' mascot (15 rows x 32 columns) (default) +\ +\ NOTE: Setting `loader_logo' to an undefined value (such as "none") will +\ prevent beastie from being drawn. +\ +: draw-beastie ( -- ) \ at (loader_logo_x,loader_logo_y), else (46,4) + + s" loader_logo_x" getenv dup -1 <> if + ?number 1 = if logoX ! then else drop then - true -; - -: printmenuitem ( -- n ) - menuidx @ - 1+ dup - menuidx ! - menuY @ + dup menuX @ swap at-xy - menuidx @ . - menuX @ 1+ swap at-xy - menubllt @ emit - menuidx @ 48 + -; - -: beastie-menu ( -- ) - 0 menuidx ! - dot menubllt ! - 8 menuY ! - 5 menuX ! - clear - 46 4 print-logo - 42 20 2 2 box - 13 6 at-xy ." Welcome to FreeBSD!" - printmenuitem ." Boot FreeBSD [default]" bootkey ! - s" arch-i386" environment? if + s" loader_logo_y" getenv dup -1 <> if + ?number 1 = if logoY ! then + else drop - acpipresent? if - printmenuitem ." Boot FreeBSD with ACPI " bootacpikey ! - acpienabled? if - ." disabled" - else - ." enabled" - then + then + + s" loader_logo" getenv dup -1 = if + logoX @ logoY @ + loader_color? if + orb-logo else - menuidx @ - 1+ - menuidx ! - -2 bootacpikey ! + orbbw-logo then - else - -2 bootacpikey ! + drop exit then - printmenuitem ." Boot FreeBSD in Safe Mode" bootsafekey ! - printmenuitem ." Boot FreeBSD in single user mode" bootsinglekey ! - printmenuitem ." Boot FreeBSD with verbose logging" bootverbosekey ! - printmenuitem ." Escape to loader prompt" escapekey ! - printmenuitem ." Reboot" rebootkey ! - menuX @ 20 at-xy - ." Select option, [Enter] for default" - menuX @ 21 at-xy - s" or [Space] to pause timer " dup 2 - promptwidth ! - type -; -: tkey - seconds + - begin 1 while - over 0<> if - dup seconds u< if - drop - -1 - exit - then - menuX @ promptwidth @ + 21 at-xy dup seconds - . - then - key? if - drop - key - exit - then - 50 ms - repeat + 2dup s" beastie" compare-insensitive 0= if + logoX @ logoY @ beastie-logo + 2drop exit + then + 2dup s" beastiebw" compare-insensitive 0= if + logoX @ logoY @ beastiebw-logo + 2drop exit + then + 2dup s" fbsdbw" compare-insensitive 0= if + logoX @ logoY @ fbsdbw-logo + 2drop exit + then + 2dup s" orb" compare-insensitive 0= if + logoX @ logoY @ orb-logo + 2drop exit + then + 2dup s" orbbw" compare-insensitive 0= if + logoX @ logoY @ orbbw-logo + 2drop exit + then + + 2drop ; -set-current +: clear-beastie ( -- ) \ clears beastie from the screen + logoX @ logoY @ + 2dup at-xy 34 spaces 1+ 2dup at-xy 34 spaces 1+ + 2dup at-xy 34 spaces 1+ 2dup at-xy 34 spaces 1+ + 2dup at-xy 34 spaces 1+ 2dup at-xy 34 spaces 1+ + 2dup at-xy 34 spaces 1+ 2dup at-xy 34 spaces 1+ + 2dup at-xy 34 spaces 1+ 2dup at-xy 34 spaces 1+ + 2dup at-xy 34 spaces 1+ 2dup at-xy 34 spaces 1+ + 2dup at-xy 34 spaces 1+ 2dup at-xy 34 spaces 1+ + 2dup at-xy 34 spaces 1+ 2dup at-xy 34 spaces 1+ + 2dup at-xy 34 spaces 1+ 2dup at-xy 34 spaces 1+ + 2dup at-xy 34 spaces 2drop + + \ Put the cursor back at the bottom + 0 25 at-xy +; -: beastie-start +: beastie-start ( -- ) \ starts the menu s" beastie_disable" getenv dup -1 <> if s" YES" compare-insensitive 0= if @@ -243,62 +261,15 @@ set-current else drop then - beastie-menu - s" autoboot_delay" getenv - dup -1 = if - drop - 10 + + s" loader_delay" getenv + -1 = if + s" include /boot/menu.rc" evaluate else - 2dup s" -1" compare 0= if - 0 boot - then - 0 s>d 2swap >number 2drop drop + drop + ." Loading Menu (Ctrl-C to Abort)" cr + s" set delay_command='include /boot/menu.rc'" evaluate + s" set delay_showdots" evaluate + delay_execute then - begin - dup tkey - 0 25 at-xy - dup 32 = if nip 0 swap then - dup -1 = if 0 boot then - dup 13 = if 0 boot then - dup bootkey @ = if 0 boot then - dup bootacpikey @ = if - acpienabled? if - s" 1" s" hint.acpi.0.disabled" setenv - s" 1" s" loader.acpi_disabled_by_user" setenv - else - s" 0" s" hint.acpi.0.disabled" setenv - then - 0 boot - then - dup bootsafekey @ = if - s" arch-i386" environment? if - drop - s" 1" s" hint.acpi.0.disabled" setenv - s" 1" s" loader.acpi_disabled_by_user" setenv - s" 1" s" hint.apic.0.disabled" setenv - then - s" 0" s" hw.ata.ata_dma" setenv - s" 0" s" hw.ata.atapi_dma" setenv - s" 0" s" hw.ata.wc" setenv - s" 0" s" hw.eisa_slots" setenv - s" 1" s" hint.kbdmux.0.disabled" setenv - 0 boot - then - dup bootverbosekey @ = if - s" YES" s" boot_verbose" setenv - 0 boot - then - dup bootsinglekey @ = if - s" YES" s" boot_single" setenv - 0 boot - then - dup escapekey @ = if - 2drop - s" NO" s" autoboot_delay" setenv - exit - then - rebootkey @ = if 0 reboot then - again ; - -previous diff --git a/sys/boot/forth/beastie.4th.8 b/sys/boot/forth/beastie.4th.8 new file mode 100644 index 0000000..5108769 --- /dev/null +++ b/sys/boot/forth/beastie.4th.8 @@ -0,0 +1,171 @@ +.\" Copyright (c) 2011 Devin Teske +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd May 16, 2011 +.Dt BEASTIE.4TH 8 +.Os +.Sh NAME +.Nm beastie.4th +.Nd FreeBSD ASCII art boot module. +.Sh DESCRIPTION +The file that goes by the name of +.Nm +is a set of commands designed to draw the ASCII art FreeBSD mascot +.Nd known simply as +.Ic beastie +.Nd to the right of the boot loader menu. +The commands of +.Nm +by themselves are not enough for most uses. +Please refer to the +examples below for the most common situations, and to +.Xr loader 8 +for additional commands. +.Pp +Before using any of the commands provided in +.Nm , +it must be included +through the command: +.Pp +.Dl include beastie.4th +.Pp +This line is present in the default +.Pa /boot/loader.rc +file, so it is not needed (and should not be re-issued) in a normal setup. +.Pp +The commands provided by it are: +.Pp +.Bl -tag -width disable-module_module -compact -offset indent +.It Ic draw-beastie +Draws the FreeBSD logo. +.Pp +The logo that is drawn is configured by setting the +.Ic loader_logo +variable in +.Xr loader.conf 5 +to one of +.Dq Li beastie , +.Dq Li beastiebw , +.Dq Li fbsdbw , +.Dq Li orb , +and +.Dq Li orbbw +(the default). +.Pp +The position of the logo can be configured by setting the +.Ic loader_logo_x +and +.Ic loader_logo_y +variables in +.Xr loader.conf 5 . +The default values are 46 (x) and 4 (y). +.Pp +.It Ic clear-beastie +Clears the screen of beastie. +.Pp +.It Ic beastie-start +Initializes the interactive boot loader menu. +.Pp +The +.Ic loader_delay +variable can be configured in +.Xr loader.conf 5 +to the number of seconds you would like to delay loading the boot menu. +During the delay the user can press Ctrl-C to fall back to autoboot or ENTER +to proceed. +The default behavior is to not delay. +.El +.Pp +The environment variables that effect its behavior are: +.Bl -tag -width bootfile -offset indent +.It Va loader_logo +Selects the desired logo in the beastie boot menu. Possible values are: +.Dq Li fbsdbw , +.Dq Li beastie , +.Dq Li beastiebw , +.Dq Li orb , +.Dq Li orbbw +(default), and +.Dq Li none . +.It Va loader_logo_x +Sets the desired column position of the logo. Default is 46. +.It Va loader_logo_y +Sets the desired row position of the logo. Default is 4. +.It Va beastie_disable +If set to +.Dq YES , +the beastie boot menu will be skipped. +.It Va loader_delay +If set to a number higher than zero, introduces a delay before starting the +beastie boot menu. During the delay the user can press either Ctrl-C to skip +the menu or ENTER to proceed to the menu. The default is to not delay when +loading the menu. +.El +.Sh FILES +.Bl -tag -width /boot/loader.4th -compact +.It Pa /boot/loader +The +.Xr loader 8 . +.It Pa /boot/beastie.4th +.Nm +itself. +.It Pa /boot/loader.rc +.Xr loader 8 +bootstrapping script. +.El +.Sh EXAMPLES +Standard i386 +.Pa /boot/loader.rc : +.Pp +.Bd -literal -offset indent -compact +include /boot/beastie.4th +beastie-start +.Ed +.Pp +Set a different logo in +.Xr loader.conf 5 : +.Pp +.Bd -literal -offset indent -compact +loader_logo="beastie" +.Ed +.Sh SEE ALSO +.Xr loader.conf 5 , +.Xr loader 8 , +.Xr loader.4th 8 +.Sh HISTORY +The +.Nm +set of commands first appeared in +.Fx 5.1 . +.Sh AUTHORS +The +.Nm +set of commands was written by +.An -nosplit +.An Scott Long Aq scottl@FreeBSD.org , +.An Aleksander Fafula Aq alex@fafula.com +and +.An Devin Teske Aq devinteske@hotmail.com . diff --git a/sys/boot/forth/brand.4th b/sys/boot/forth/brand.4th new file mode 100644 index 0000000..bc64174 --- /dev/null +++ b/sys/boot/forth/brand.4th @@ -0,0 +1,91 @@ +\ Copyright (c) 2006-2011 Devin Teske <devinteske@hotmail.com> +\ All rights reserved. +\ +\ Redistribution and use in source and binary forms, with or without +\ modification, are permitted provided that the following conditions +\ are met: +\ 1. Redistributions of source code must retain the above copyright +\ notice, this list of conditions and the following disclaimer. +\ 2. Redistributions in binary form must reproduce the above copyright +\ notice, this list of conditions and the following disclaimer in the +\ documentation and/or other materials provided with the distribution. +\ +\ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +\ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +\ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +\ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +\ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +\ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +\ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +\ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +\ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +\ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +\ SUCH DAMAGE. +\ +\ $FreeBSD$ + +marker task-brand.4th + +variable brandX +variable brandY + +\ Initialize logo placement +2 brandX ! +1 brandY ! + +: fbsd-logo ( x y -- ) \ "FreeBSD" [wide] logo in B/W (7 rows x 42 columns) + + 2dup at-xy ." ______ ____ _____ _____ " 1+ + 2dup at-xy ." | ____| | _ \ / ____| __ \ " 1+ + 2dup at-xy ." | |___ _ __ ___ ___ | |_) | (___ | | | |" 1+ + 2dup at-xy ." | ___| '__/ _ \/ _ \| _ < \___ \| | | |" 1+ + 2dup at-xy ." | | | | | __/ __/| |_) |____) | |__| |" 1+ + 2dup at-xy ." | | | | | | || | | |" 1+ + at-xy ." |_| |_| \___|\___||____/|_____/|_____/ " + + \ Put the cursor back at the bottom + 0 25 at-xy +; + +\ This function draws any number of company logos at (loader_brand_x, +\ loader_brand_y) if defined, or (2,1) (top-left) if not defined. To choose +\ your logo, set the variable `loader_brand' to the respective logo name. +\ +\ Currently available: +\ +\ NAME DESCRIPTION +\ fbsd FreeBSD logo +\ +\ NOTE: Setting `loader_brand' to an undefined value (such as "none") will +\ prevent any brand from being drawn. +\ +: draw-brand ( -- ) + + s" loader_brand_x" getenv dup -1 <> if + ?number 1 = if + brandX ! + then + else + drop + then + + s" loader_brand_y" getenv dup -1 <> if + ?number 1 = if + brandY ! + then + else + drop + then + + s" loader_brand" getenv dup -1 = if + brandX @ brandY @ fbsd-logo + drop exit + then + + 2dup s" fbsd" compare-insensitive 0= if + brandX @ brandY @ fbsd-logo + 2drop exit + then + + 2drop +; diff --git a/sys/boot/forth/brand.4th.8 b/sys/boot/forth/brand.4th.8 new file mode 100644 index 0000000..1a1cc84 --- /dev/null +++ b/sys/boot/forth/brand.4th.8 @@ -0,0 +1,125 @@ +.\" Copyright (c) 2011 Devin Teske +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd May 18, 2011 +.Dt BRAND.4TH 8 +.Os +.Sh NAME +.Nm brand.4th +.Nd FreeBSD ASCII art boot module. +.Sh DESCRIPTION +The file that goes by the name of +.Nm +is a set of commands designed to draw the ASCII art BSD brand above the boot +loader menu. +The commands of +.Nm +by themselves are not enough for most uses. +Please refer to the +examples below for the most common situations, and to +.Xr loader 8 +for additional commands. +.Pp +Before using any of the commands provided in +.Nm , +it must be included +through the command: +.Pp +.Dl include brand.4th +.Pp +This line is present in the default +.Pa /boot/menu.rc +file, so it is not needed (and should not be re-issued) in a normal setup. +.Pp +The commands provided by it are: +.Pp +.Bl -tag -width disable-module_module -compact -offset indent +.It Ic draw-brand +Draws the BSD brand. +.Pp +The brand that is drawn is configured by setting the +.Ic loader_brand +variable in +.Xr loader.conf 5 +to one of +.Dq Li fbsd +(the default) or +.Dq Li none . +.Pp +The position of the logo can be configured by setting the +.Ic loader_brand_x +and +.Ic loader_brand_y +variables in +.Xr loader.conf 5 . +The default values are 2 (x) and 1 (y). +.El +.Pp +The environment variables that effect its behavior are: +.Bl -tag -width bootfile -offset indent +.It Va loader_brand +Selects the desired brand in the beastie boot menu. Possible values are: +.Dq Li fbsd +(default) or +.Dq Li none . +.It Va loader_brand_x +Sets the desired column position of the brand. Default is 2. +.It Va loader_brand_y +Sets the desired row position of the brand. Default is 1. +.El +.Sh FILES +.Bl -tag -width /boot/loader.4th -compact +.It Pa /boot/loader +The +.Xr loader 8 . +.It Pa /boot/brand.4th +.Nm +itself. +.It Pa /boot/loader.rc +.Xr loader 8 +bootstrapping script. +.El +.Sh EXAMPLES +Set FreeBSD brand in +.Xr loader.conf 5 : +.Pp +.Bd -literal -offset indent -compact +loader_brand="fbsd" +.Ed +.Sh SEE ALSO +.Xr loader.conf 5 , +.Xr loader 8 , +.Sh HISTORY +The +.Nm +set of commands first appeared in +.Fx 9.0 . +.Sh AUTHORS +The +.Nm +set of commands was written by +.An -nosplit +.An Devin Teske Aq devinteske@hotmail.com . diff --git a/sys/boot/forth/check-password.4th b/sys/boot/forth/check-password.4th new file mode 100644 index 0000000..0a1fa5d --- /dev/null +++ b/sys/boot/forth/check-password.4th @@ -0,0 +1,156 @@ +\ Copyright (c) 2006-2011 Devin Teske <devinteske@hotmail.com> +\ All rights reserved. +\ +\ Redistribution and use in source and binary forms, with or without +\ modification, are permitted provided that the following conditions +\ are met: +\ 1. Redistributions of source code must retain the above copyright +\ notice, this list of conditions and the following disclaimer. +\ 2. Redistributions in binary form must reproduce the above copyright +\ notice, this list of conditions and the following disclaimer in the +\ documentation and/or other materials provided with the distribution. +\ +\ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +\ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +\ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +\ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +\ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +\ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +\ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +\ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +\ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +\ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +\ SUCH DAMAGE. +\ +\ $FreeBSD$ + +marker task-check-password.4th + +include /boot/screen.4th + +13 constant enter_key \ The decimal ASCII value for Enter key +8 constant bs_key \ The decimal ASCII value for Backspace key +16 constant readmax \ Maximum number of characters for the password + +variable readX \ Current X offset (column)(used by read) +variable read-start \ Starting X offset (column)(used by read) + +create readval 16 allot \ input obtained (maximum 16 characters) +variable readlen \ input length + +\ This function blocks program flow (loops forever) until a key is pressed. +\ The key that was pressed is added to the top of the stack in the form of its +\ decimal ASCII representation. Note: the stack cannot be empty when this +\ function starts or an underflow exception will occur. Simplest way to prevent +\ this is to pass 0 as a stack parameter (ie. `0 sgetkey'). This function is +\ called by the read function. You need not call it directly. NOTE: arrow keys +\ show as 0 on the stack +\ +: sgetkey ( -- ) + + begin \ Loop forever + key? if \ Was a key pressed? (see loader(8)) + + drop \ Remove stack-cruft + key \ Get the key that was pressed + + \ Check key pressed (see loader(8)) and input limit + dup 0<> if ( and ) readlen @ readmax < if + + \ Echo an asterisk (unless Backspace/Enter) + dup bs_key <> if ( and ) dup enter_key <> if + ." *" \ Echo an asterisk + then then + + exit \ Exit from the function + then then + + \ Always allow Backspace and Enter + dup bs_key = if exit then + dup enter_key = if exit then + + then + 50 ms \ Sleep for 50 milliseconds (see loader(8)) + again +; + +: read ( -- String prompt ) + + 0 25 at-xy \ Move the cursor to the bottom-left + dup 1+ read-start ! \ Store X offset after the prompt + read-start @ readX ! \ copy value to the current X offset + 0 readlen ! \ Initialize the read length + type \ Print the prompt + + begin \ Loop forever + + 0 sgetkey \ Block here, waiting for a key to be pressed + + \ We are not going to echo the password to the screen (for + \ security reasons). If Enter is pressed, we process the + \ password, otherwise augment the key to a string. + + \ If the key that was entered was not Enter, advance + dup enter_key <> if + readX @ 1+ readX ! \ Advance the column + readlen @ 1+ readlen ! \ Increment input length + then + + \ Handle backspacing + dup bs_key = if + readX @ 2 - readX ! \ Set new cursor position + readlen @ 2 - readlen ! \ Decrement input length + + \ Don't move behind starting position + readX @ read-start @ < if + read-start @ readX ! + then + readlen @ 0< if + 0 readlen ! + then + + \ Reposition cursor and erase character + readX @ 25 at-xy 1 spaces readX @ 25 at-xy + then + + dup enter_key = if + drop \ Clean up stack cruft + 10 emit \ Echo new line + exit + then + + \ If not Backspace or Enter, store the character + dup bs_key <> if ( and ) dup enter_key <> if + + \ store the character in our buffer + dup readval readlen @ 1- + c! + + then then + + drop \ drop the last key that was entered + + again \ Enter was not pressed; repeat +; + +: check-password ( -- ) + + \ Exit if a password was not set + s" password" getenv dup -1 = if + drop exit + then + + begin \ Loop as long as it takes to get the right password + + s" Password: " \ Output a prompt for a password + read \ Read the user's input until Enter + + 2dup readval readlen @ compare 0= if + 2drop exit \ Correct password + then + + \ Bad Password + 3000 ms + ." loader: incorrect password" 10 emit + + again \ Not the right password; repeat +; diff --git a/sys/boot/forth/check-password.4th.8 b/sys/boot/forth/check-password.4th.8 new file mode 100644 index 0000000..ec2323e --- /dev/null +++ b/sys/boot/forth/check-password.4th.8 @@ -0,0 +1,123 @@ +.\" Copyright (c) 2011 Devin Teske +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd May 18, 2011 +.Dt CHECK-PASSWORD.4TH 8 +.Os +.Sh NAME +.Nm check-password.4th +.Nd FreeBSD password-checking boot module. +.Sh DESCRIPTION +The file that goes by the name of +.Nm +is a set of commands designed to prevent booting without the proper password. +The commands of +.Nm +by themselves are not enough for most uses. +Please refer to the +examples below for the most common situations, and to +.Xr loader 8 +for additional commands. +.Pp +Before using any of the commands provided in +.Nm , +it must be included +through the command: +.Pp +.Dl include check-password.4th +.Pp +This line is present in +.Pa /boot/loader.4th +file, so it is not needed (and should not be re-issued) in a normal setup. +.Pp +The commands provided by it are: +.Pp +.Bl -tag -width disable-module_module -compact -offset indent +.It Ic check-password +Once called, the user cannot continue until the correct password is entered. +If the user enters the correct password the function returns. +.Pp +The password that is required is configured by setting the +.Ic password +variable in +.Xr loader.conf 5 . +.Pp +Subsequent calls after a successful password +has been entered will not cause reprompting +\(em the function will silently return. +.El +.Pp +The environment variables that effect its behavior are: +.Bl -tag -width bootfile -offset indent +.It Va password +Sets the password (up to 16 characters long) that is required by +.Ic check-password +to be entered before the system is allowed to boot. If unset (default) or NULL, +.Ic check-password +will silently abort. +.El +.Sh FILES +.Bl -tag -width /boot/loader.4th -compact +.It Pa /boot/loader +The +.Xr loader 8 . +.It Pa /boot/check-password.4th +.Nm +itself. +.It Pa /boot/loader.rc +.Xr loader 8 +bootstrapping script. +.El +.Sh EXAMPLES +Standard i386 +.Pa /boot/loader.rc : +.Pp +.Bd -literal -offset indent -compact +include /boot/loader.4th +check-password +.Ed +.Pp +Set a password in +.Xr loader.conf 5 : +.Pp +.Bd -literal -offset indent -compact +password="abc123" +.Ed +.Sh SEE ALSO +.Xr loader.conf 5 , +.Xr loader 8 , +.Xr loader.4th 8 +.Sh HISTORY +The +.Nm +set of commands first appeared in +.Fx 9.0 . +.Sh AUTHORS +The +.Nm +set of commands was written by +.An -nosplit +.An Devin Teske Aq devinteske@hotmail.com . diff --git a/sys/boot/forth/color.4th b/sys/boot/forth/color.4th new file mode 100644 index 0000000..4d43593 --- /dev/null +++ b/sys/boot/forth/color.4th @@ -0,0 +1,48 @@ +\ Copyright (c) 2011 Devin Teske <devinteske@hotmail.com> +\ All rights reserved. +\ +\ Redistribution and use in source and binary forms, with or without +\ modification, are permitted provided that the following conditions +\ are met: +\ 1. Redistributions of source code must retain the above copyright +\ notice, this list of conditions and the following disclaimer. +\ 2. Redistributions in binary form must reproduce the above copyright +\ notice, this list of conditions and the following disclaimer in the +\ documentation and/or other materials provided with the distribution. +\ +\ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +\ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +\ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +\ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +\ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +\ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +\ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +\ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +\ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +\ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +\ SUCH DAMAGE. +\ +\ $FreeBSD$ + +marker task-color.4th + +\ This function returns TRUE if the `loader_color' environment variable is set +\ to YES, yes, or 1. Otherwise, FALSE is returned. +\ +: loader_color? ( -- N ) + + s" loader_color" getenv dup -1 <> if + + 2dup s" YES" compare-insensitive 0= if + 2drop + TRUE exit + then + 2dup s" 1" compare 0= if + 2drop + TRUE exit + then + drop + then + + drop FALSE exit +; diff --git a/sys/boot/forth/color.4th.8 b/sys/boot/forth/color.4th.8 new file mode 100644 index 0000000..5a734dd --- /dev/null +++ b/sys/boot/forth/color.4th.8 @@ -0,0 +1,117 @@ +.\" Copyright (c) 2011 Devin Teske +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd May 18, 2011 +.Dt COLOR.4TH 8 +.Os +.Sh NAME +.Nm color.4th +.Nd FreeBSD color-detection boot module. +.Sh DESCRIPTION +The file that goes by the name of +.Nm +is a set of commands designed to simplify color logic. +The commands of +.Nm +by themselves are not enough for most uses. +Please refer to the +examples below for the most common situations, and to +.Xr loader 8 +for additional commands. +.Pp +Before using any of the commands provided in +.Nm , +it must be included +through the command: +.Pp +.Dl include color.4th +.Pp +This line is present in +.Pa /boot/beastie.4th +file, so it is not needed (and should not be re-issued) in a normal setup. +.Pp +The commands provided by it are: +.Pp +.Bl -tag -width disable-module_module -compact -offset indent +.It Ic loader_color? +Returns TRUE if the +.Ic loader_color +environment variable is set to +.Dq YES +(case-insensitive) or +.Dq 1 . +Otherwise returns FALSE. +.El +.Pp +The environment variables that effect its behavior are: +.Bl -tag -width bootfile -offset indent +.It Va loader_color +If set to +.Dq YES +(case-insensitive) or +.Dq 1 , +causes +.Ic loader_color? +to return TRUE, indicating to many other modules that color should be used +whenever/wherever possible. +.El +.Sh FILES +.Bl -tag -width /boot/loader.4th -compact +.It Pa /boot/loader +The +.Xr loader 8 . +.It Pa /boot/color.4th +.Nm +itself. +.It Pa /boot/loader.rc +.Xr loader 8 +bootstrapping script. +.El +.Sh EXAMPLES +Standard i386 +.Pa /boot/loader.rc : +.Pp +Use color where applicable: +.Pp +.Bd -literal -offset indent -compact +loader_color="YES" +.Ed +.Sh SEE ALSO +.Xr loader.conf 5 , +.Xr loader 8 , +.Xr beastie.4th 8 , +.Xr loader.4th 8 +.Sh HISTORY +The +.Nm +set of commands first appeared in +.Fx 9.0 . +.Sh AUTHORS +The +.Nm +set of commands was written by +.An -nosplit +.An Devin Teske Aq devinteske@hotmail.com . diff --git a/sys/boot/forth/delay.4th b/sys/boot/forth/delay.4th new file mode 100644 index 0000000..3068e65 --- /dev/null +++ b/sys/boot/forth/delay.4th @@ -0,0 +1,112 @@ +\ Copyright (c) 2008-2011 Devin Teske <devinteske@hotmail.com> +\ All rights reserved. +\ +\ Redistribution and use in source and binary forms, with or without +\ modification, are permitted provided that the following conditions +\ are met: +\ 1. Redistributions of source code must retain the above copyright +\ notice, this list of conditions and the following disclaimer. +\ 2. Redistributions in binary form must reproduce the above copyright +\ notice, this list of conditions and the following disclaimer in the +\ documentation and/or other materials provided with the distribution. +\ +\ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +\ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +\ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +\ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +\ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +\ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +\ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +\ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +\ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +\ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +\ SUCH DAMAGE. +\ +\ $FreeBSD$ + +marker task-delay.4th + +2 constant delay_default \ Default delay (in seconds) +3 constant etx_key \ End-of-Text character produced by Ctrl+C +13 constant enter_key \ Carriage-Return character produce by ENTER +27 constant esc_key \ Escape character produced by ESC or Ctrl+[ + +variable delay_tstart \ state variable used for delay timing +variable delay_delay \ determined configurable delay duration +variable delay_cancelled \ state variable for user cancellation +variable delay_showdots \ whether continually print dots while waiting + +: delay_execute ( -- ) + + \ make sure that we have a command to execute + s" delay_command" getenv dup -1 = if + drop exit + then + + \ read custom time-duration (if set) + s" loader_delay" getenv dup -1 = if + drop \ no custom duration (remove dup'd bunk -1) + delay_default \ use default setting (replacing bunk -1) + else + \ make sure custom duration is a number + ?number 0= if + delay_default \ use default if otherwise + then + then + + \ initialize state variables + delay_delay ! \ stored value is on the stack from above + seconds delay_tstart ! \ store the time we started + 0 delay_cancelled ! \ boolean flag indicating user-cancelled event + + false delay_showdots ! \ reset to zero and read from environment + s" delay_showdots" getenv dup -1 <> if + 2drop \ don't need the value, just existance + true delay_showdots ! + else + drop + then + + \ Loop until we have exceeded the desired time duration + begin + 25 ms \ sleep for 25 milliseconds (40 iterations/sec) + + \ throw some dots up on the screen if desired + delay_showdots @ if + ." ." \ dots visually aid in the perception of time + then + + \ was a key depressed? + key? if + key \ obtain ASCII value for keystroke + dup enter_key = if + -1 delay_delay ! \ break loop + then + dup etx_key = swap esc_key = OR if + -1 delay_delay ! \ break loop + -1 delay_cancelled ! \ set cancelled flag + then + then + + \ if the time duration is set to zero, loop forever + \ waiting for either ENTER or Ctrl-C/Escape to be pressed + delay_delay @ 0> if + \ calculate elapsed time + seconds delay_tstart @ - delay_delay @ > + else + -1 \ break loop + then + until + + \ if we were throwing up dots, throw up a line-break + delay_showdots @ if + cr + then + + \ did the user press either Ctrl-C or Escape? + delay_cancelled @ if + 2drop \ we don't need the command string anymore + else + evaluate \ evaluate/execute the command string + then +; diff --git a/sys/boot/forth/delay.4th.8 b/sys/boot/forth/delay.4th.8 new file mode 100644 index 0000000..3fe5b5b --- /dev/null +++ b/sys/boot/forth/delay.4th.8 @@ -0,0 +1,126 @@ +.\" Copyright (c) 2011 Devin Teske +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd May 18, 2011 +.Dt DELAY.4TH 8 +.Os +.Sh NAME +.Nm delay.4th +.Nd FreeBSD debugging boot module. +.Sh DESCRIPTION +The file that goes by the name of +.Nm +is a set of commands designed to add debugging capabilities to +.Xr loader 8 . +The commands of +.Nm +by themselves are not enough for most uses. +Please refer to the +examples below for the most common situations, and to +.Xr loader 8 +for additional commands. +.Pp +Before using any of the commands provided in +.Nm , +it must be included +through the command: +.Pp +.Dl include delay.4th +.Pp +This line is present in +.Pa /boot/beastie.4th +file, so it is not needed (and should not be re-issued) in a normal setup. +.Pp +The commands provided by it are: +.Pp +.Bl -tag -width disable-module_module -compact -offset indent +.It Ic delay_execute +Executes the [string] procedure stored in the +.Ic delay_command +environment variable after +.Ic loader_delay +seconds. +.Pp +If the optional +.Ic delay_showdots +environment variable is set, a continuous series of dots is printed. +.Pp +During the duration, the user can either press Ctrl-C (or Esc) to abort or +ENTER to proceed immediately. +.El +.Pp +The environment variables that effect its behavior are: +.Bl -tag -width bootfile -offset indent +.It Va delay_command +The command to be executed by +.Ic delay_execute . +.It Va loader_delay +The duration (in seconds) to delay before executing +.Ic delay_command . +.It Va delay_showdots +If set, will cause +.Ic delay_execute +to print a continuous series of dots during the delay duration. +.El +.Sh FILES +.Bl -tag -width /boot/loader.4th -compact +.It Pa /boot/loader +The +.Xr loader 8 . +.It Pa /boot/delay.4th +.Nm +itself. +.It Pa /boot/loader.rc +.Xr loader 8 +bootstrapping script. +.El +.Sh EXAMPLES +Introducing a 5-second delay before including another file from +.Pa /boot/loader.rc : +.Pp +.Bd -literal -offset indent -compact +include /boot/delay.4th +set delay_command="include /boot/other.4th" +set delay_showdots +set loader_delay=5 +delay_execute +.Ed +.Sh SEE ALSO +.Xr loader.conf 5 , +.Xr loader 8 , +.Xr beastie.4th 8 , +.Xr loader.4th 8 +.Sh HISTORY +The +.Nm +set of commands first appeared in +.Fx 9.0 . +.Sh AUTHORS +The +.Nm +set of commands was written by +.An -nosplit +.An Devin Teske Aq devinteske@hotmail.com . diff --git a/sys/boot/forth/loader.4th b/sys/boot/forth/loader.4th index 7b22b6d..c765147 100644 --- a/sys/boot/forth/loader.4th +++ b/sys/boot/forth/loader.4th @@ -89,30 +89,7 @@ builtin: boot-conf only forth definitions also support-functions -\ ***** check-password -\ -\ If a password was defined, execute autoboot and ask for -\ password if autoboot returns. -\ Do not exit unless the right password is given. - -: check-password - password .addr @ if - 0 autoboot - false >r - begin - bell emit bell emit - ." Password: " - password .len @ read-password - dup password .len @ = if - 2dup password .addr @ password .len @ - compare 0= if r> drop true >r then - then - drop free drop - r@ - until - r> drop - then -; +include /boot/check-password.4th \ ***** start \ diff --git a/sys/boot/forth/loader.conf.5 b/sys/boot/forth/loader.conf.5 index 6abb7ea..c8c61c6 100644 --- a/sys/boot/forth/loader.conf.5 +++ b/sys/boot/forth/loader.conf.5 @@ -215,14 +215,20 @@ be displayed. If set to .Dq YES , the beastie boot menu will be skipped. -.It Va loader_logo Pq Dq Li fbsdbw +.It Va loader_logo Pq Dq Li orbbw Selects a desired logo in the beastie boot menu. Possible values are: +.Dq Li orbbw , +.Dq Li orb , .Dq Li fbsdbw , .Dq Li beastiebw , .Dq Li beastie , and .Dq Li none . +.It Va loader_color +If set to +.Dq YES , +the beastie boot menu will be displayed using ANSI coloring where possible. .El .Sh FILES .Bl -tag -width /boot/defaults/loader.conf -compact diff --git a/sys/boot/forth/menu-commands.4th b/sys/boot/forth/menu-commands.4th new file mode 100644 index 0000000..828a148 --- /dev/null +++ b/sys/boot/forth/menu-commands.4th @@ -0,0 +1,190 @@ +\ Copyright (c) 2006-2011 Devin Teske <devinteske@hotmail.com> +\ All rights reserved. +\ +\ Redistribution and use in source and binary forms, with or without +\ modification, are permitted provided that the following conditions +\ are met: +\ 1. Redistributions of source code must retain the above copyright +\ notice, this list of conditions and the following disclaimer. +\ 2. Redistributions in binary form must reproduce the above copyright +\ notice, this list of conditions and the following disclaimer in the +\ documentation and/or other materials provided with the distribution. +\ +\ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +\ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +\ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +\ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +\ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +\ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +\ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +\ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +\ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +\ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +\ SUCH DAMAGE. +\ +\ $FreeBSD$ + +marker task-menu-commands.4th + +: acpi_enable ( -- ) + s" set acpi_load=YES" evaluate \ XXX deprecated but harmless + s" set hint.acpi.0.disabled=0" evaluate + s" loader.acpi_disabled_by_user" unsetenv +; + +: acpi_disable ( -- ) + s" acpi_load" unsetenv \ XXX deprecated but harmless + s" set hint.acpi.0.disabled=1" evaluate + s" set loader.acpi_disabled_by_user=1" evaluate +; + +: toggle_acpi ( N -- N TRUE ) + + \ Make changes effective _before_ calling menu-redraw + + acpienabled? if + acpi_disable + else + acpi_enable + then + + menu-redraw + + TRUE \ loop menu again +; + +: toggle_safemode ( N -- N TRUE ) + toggle_menuitem + + \ Now we're going to make the change effective + + s" toggle_stateN @" \ base name of toggle state var + -rot 2dup 12 + c! rot \ replace 'N' with ASCII numeral + + evaluate 0= if + s" hint.apic.0.disabled" unsetenv + s" hw.ata.ata_dma" unsetenv + s" hw.ata.atapi_dma" unsetenv + s" hw.ata.wc" unsetenv + s" hw.eisa_slots" unsetenv + s" hint.kbdmux.0.disabled" unsetenv + else + \ + \ Toggle ACPI elements if necessary + \ + acpipresent? if acpienabled? if + menuacpi @ dup 0<> if + toggle_menuitem ( N -- N ) + then + drop + acpi_disable + then then + + s" set hint.apic.0.disabled=1" evaluate + s" set hw.ata.ata_dma=0" evaluate + s" set hw.ata.atapi_dma=0" evaluate + s" set hw.ata.wc=0" evaluate + s" set hw.eisa_slots=0" evaluate + s" set hint.kbdmux.0.disabled=1" evaluate + then + + menu-redraw + + TRUE \ loop menu again +; + +: toggle_singleuser ( N -- N TRUE ) + toggle_menuitem + menu-redraw + + \ Now we're going to make the change effective + + s" toggle_stateN @" \ base name of toggle state var + -rot 2dup 12 + c! rot \ replace 'N' with ASCII numeral + + evaluate 0= if + s" boot_single" unsetenv + else + s" set boot_single=YES" evaluate + then + + TRUE \ loop menu again +; + +: toggle_verbose ( N -- N TRUE ) + toggle_menuitem + menu-redraw + + \ Now we're going to make the change effective + + s" toggle_stateN @" \ base name of toggle state var + -rot 2dup 12 + c! rot \ replace 'N' with ASCII numeral + + evaluate 0= if + s" boot_verbose" unsetenv + else + s" set boot_verbose=YES" evaluate + then + + TRUE \ loop menu again +; + +: goto_prompt ( N -- N FALSE ) + + s" set autoboot_delay=NO" evaluate + + cr + ." To get back to the menu, type `menu' and press ENTER" cr + ." or type `boot' and press ENTER to start FreeBSD." cr + cr + + FALSE \ exit the menu +; + +: cycle_kernel ( N -- N TRUE ) + cycle_menuitem + menu-redraw + + \ Now we're going to make the change effective + + s" cycle_stateN" \ base name of array state var + -rot 2dup 11 + c! rot \ replace 'N' with ASCII numeral + evaluate \ translate name into address + @ \ dereference address into value + 48 + \ convert to ASCII numeral + + \ Since we are [in this file] going to override the standard `boot' + \ routine with a custom one, you should know that we use $kernel + \ when referencing the desired kernel. Set $kernel below. + + s" set kernel=${kernel_prefix}${kernel[N]}${kernel_suffix}" + \ command to assemble full kernel-path + -rot tuck 36 + c! swap \ replace 'N' with array index value + evaluate \ sets $kernel to full kernel-path + + TRUE \ loop menu again +; + +: cycle_root ( N -- N TRUE ) + cycle_menuitem + menu-redraw + + \ Now we're going to make the change effective + + s" cycle_stateN" \ base name of array state var + -rot 2dup 11 + c! rot \ replace 'N' with ASCII numeral + evaluate \ translate name into address + @ \ dereference address into value + 48 + \ convert to ASCII numeral + + \ Since we are [in this file] going to override the standard `boot' + \ routine with a custom one, you should know that we use $root when + \ booting. Set $root below. + + s" set root=${root_prefix}${root[N]}${root_prefix}" + \ command to assemble full kernel-path + -rot tuck 30 + c! swap \ replace 'N' with array index value + evaluate \ sets $kernel to full kernel-path + + TRUE \ loop menu again +; diff --git a/sys/boot/forth/menu.4th b/sys/boot/forth/menu.4th new file mode 100644 index 0000000..110ec1c --- /dev/null +++ b/sys/boot/forth/menu.4th @@ -0,0 +1,971 @@ +\ Copyright (c) 2003 Scott Long <scottl@freebsd.org> +\ Copyright (c) 2003 Aleksander Fafula <alex@fafula.com> +\ Copyright (c) 2006-2011 Devin Teske <devinteske@hotmail.com> +\ All rights reserved. +\ +\ Redistribution and use in source and binary forms, with or without +\ modification, are permitted provided that the following conditions +\ are met: +\ 1. Redistributions of source code must retain the above copyright +\ notice, this list of conditions and the following disclaimer. +\ 2. Redistributions in binary form must reproduce the above copyright +\ notice, this list of conditions and the following disclaimer in the +\ documentation and/or other materials provided with the distribution. +\ +\ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +\ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +\ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +\ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +\ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +\ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +\ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +\ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +\ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +\ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +\ SUCH DAMAGE. +\ +\ $FreeBSD$ + +marker task-menu.4th + +\ Frame drawing +include /boot/frames.4th + +f_double \ Set frames to double (see frames.4th). Replace with + \ f_single if you want single frames. +46 constant dot \ ASCII definition of a period (in decimal) + + 4 constant menu_timeout_default_x \ default column position of timeout +23 constant menu_timeout_default_y \ default row position of timeout msg +10 constant menu_timeout_default \ default timeout (in seconds) + +\ Customize the following values with care + + 1 constant menu_start \ Numerical prefix of first menu item +dot constant bullet \ Menu bullet (appears after numerical prefix) + 5 constant menu_x \ Row position of the menu (from the top) + 10 constant menu_y \ Column position of the menu (from left side) + +\ Menu Appearance +variable menuidx \ Menu item stack for number prefixes +variable menurow \ Menu item stack for positioning +variable menubllt \ Menu item bullet + +\ Menu Positioning +variable menuX \ Menu X offset (columns) +variable menuY \ Menu Y offset (rows) + +\ Menu-item key association/detection +variable menukey1 +variable menukey2 +variable menukey3 +variable menukey4 +variable menukey5 +variable menukey6 +variable menukey7 +variable menukey8 +variable menureboot +variable menurebootadded +variable menuacpi +variable menuoptions + +\ Menu timer [count-down] variables +variable menu_timeout_enabled \ timeout state (internal use only) +variable menu_time \ variable for tracking the passage of time +variable menu_timeout \ determined configurable delay duration +variable menu_timeout_x \ column position of timeout message +variable menu_timeout_y \ row position of timeout message + +\ Boolean option status variables +variable toggle_state1 +variable toggle_state2 +variable toggle_state3 +variable toggle_state4 +variable toggle_state5 +variable toggle_state6 +variable toggle_state7 +variable toggle_state8 + +\ Array option status variables +variable cycle_state1 +variable cycle_state2 +variable cycle_state3 +variable cycle_state4 +variable cycle_state5 +variable cycle_state6 +variable cycle_state7 +variable cycle_state8 + +\ Containers for storing the initial caption text +create init_text1 255 allot +create init_text2 255 allot +create init_text3 255 allot +create init_text4 255 allot +create init_text5 255 allot +create init_text6 255 allot +create init_text7 255 allot +create init_text8 255 allot + +: arch-i386? ( -- BOOL ) \ Returns TRUE (-1) on i386, FALSE (0) otherwise. + s" arch-i386" environment? dup if + drop + then +; + +\ This function prints a menu item at menuX (row) and menuY (column), returns +\ the incremental decimal ASCII value associated with the menu item, and +\ increments the cursor position to the next row for the creation of the next +\ menu item. This function is called by the menu-create function. You need not +\ call it directly. +\ +: printmenuitem ( menu_item_str -- ascii_keycode ) + + menurow dup @ 1+ swap ! ( increment menurow ) + menuidx dup @ 1+ swap ! ( increment menuidx ) + + \ Calculate the menuitem row position + menurow @ menuY @ + + + \ Position the cursor at the menuitem position + dup menuX @ swap at-xy + + \ Print the value of menuidx + loader_color? if + ." [1m" + then + menuidx @ . + loader_color? if + ." [37m" + then + + \ Move the cursor forward 1 column + dup menuX @ 1+ swap at-xy + + menubllt @ emit \ Print the menu bullet using the emit function + + \ Move the cursor to the 3rd column from the current position + \ to allow for a space between the numerical prefix and the + \ text caption + menuX @ 3 + swap at-xy + + \ Print the menu caption (we expect a string to be on the stack + \ prior to invoking this function) + type + + \ Here we will add the ASCII decimal of the numerical prefix + \ to the stack (decimal ASCII for `1' is 49) as a "return value" + menuidx @ 48 + +; + +: toggle_menuitem ( N -- N ) \ toggles caption text and internal menuitem state + + \ ASCII numeral equal to user-selected menu item must be on the stack. + \ We do not modify the stack, so the ASCII numeral is left on top. + + s" init_textN" \ base name of buffer + -rot 2dup 9 + c! rot \ replace 'N' with ASCII num + + evaluate c@ 0= if + \ NOTE: no need to check toggle_stateN since the first time we + \ are called, we will populate init_textN. Further, we don't + \ need to test whether menu_caption[x] (ansi_caption[x] when + \ loader_color=1) is available since we would not have been + \ called if the caption was NULL. + + \ base name of environment variable + loader_color? if + s" ansi_caption[x]" + else + s" menu_caption[x]" + then + -rot 2dup 13 + c! rot \ replace 'x' with ASCII numeral + + getenv dup -1 <> if + + s" init_textN" \ base name of buffer + 4 pick \ copy ASCII num to top + rot tuck 9 + c! swap \ replace 'N' with ASCII num + evaluate + + \ now we have the buffer c-addr on top + \ ( followed by c-addr/u of current caption ) + + \ Copy the current caption into our buffer + 2dup c! -rot \ store strlen at first byte + begin + rot 1+ \ bring alt addr to top and increment + -rot -rot \ bring buffer addr to top + 2dup c@ swap c! \ copy current character + 1+ \ increment buffer addr + rot 1- \ bring buffer len to top and decrement + dup 0= \ exit loop if buffer len is zero + until + 2drop \ buffer len/addr + drop \ alt addr + + else + drop + then + then + + \ Now we are certain to have init_textN populated with the initial + \ value of menu_caption[x] (ansi_caption[x] with loader_color enabled). + \ We can now use init_textN as the untoggled caption and + \ toggled_text[x] (toggled_ansi[x] with loader_color enabled) as the + \ toggled caption and store the appropriate value into menu_caption[x] + \ (again, ansi_caption[x] with loader_color enabled). Last, we'll + \ negate the toggled state so that we reverse the flow on subsequent + \ calls. + + s" toggle_stateN @" \ base name of toggle state var + -rot 2dup 12 + c! rot \ replace 'N' with ASCII numeral + + evaluate 0= if + \ state is OFF, toggle to ON + + \ base name of toggled text var + loader_color? if + s" toggled_ansi[x]" + else + s" toggled_text[x]" + then + -rot 2dup 13 + c! rot \ replace 'x' with ASCII num + + getenv dup -1 <> if + \ Assign toggled text to menu caption + + \ base name of caption var + loader_color? if + s" ansi_caption[x]" + else + s" menu_caption[x]" + then + 4 pick \ copy ASCII num to top + rot tuck 13 + c! swap \ replace 'x' with ASCII num + + setenv \ set new caption + else + \ No toggled text, keep the same caption + + drop + then + + true \ new value of toggle state var (to be stored later) + else + \ state is ON, toggle to OFF + + s" init_textN" \ base name of initial text buffer + -rot 2dup 9 + c! rot \ replace 'N' with ASCII numeral + evaluate \ convert string to c-addr + count \ convert c-addr to c-addr/u + + \ base name of caption var + loader_color? if + s" ansi_caption[x]" + else + s" menu_caption[x]" + then + 4 pick \ copy ASCII num to top + rot tuck 13 + c! swap \ replace 'x' with ASCII numeral + + setenv \ set new caption + false \ new value of toggle state var (to be stored below) + then + + \ now we'll store the new toggle state (on top of stack) + s" toggle_stateN" \ base name of toggle state var + 3 pick \ copy ASCII numeral to top + rot tuck 12 + c! swap \ replace 'N' with ASCII numeral + evaluate \ convert string to addr + ! \ store new value +; + +: cycle_menuitem ( N -- N ) \ cycles through array of choices for a menuitem + + \ ASCII numeral equal to user-selected menu item must be on the stack. + \ We do not modify the stack, so the ASCII numeral is left on top. + + s" cycle_stateN" \ base name of array state var + -rot 2dup 11 + c! rot \ replace 'N' with ASCII numeral + + evaluate \ we now have a pointer to the proper variable + dup @ \ resolve the pointer (but leave it on the stack) + 1+ \ increment the value + + \ Before assigning the (incremented) value back to the pointer, + \ let's test for the existence of this particular array element. + \ If the element exists, we'll store index value and move on. + \ Otherwise, we'll loop around to zero and store that. + + dup 48 + \ duplicate Array index and convert to ASCII numeral + + \ base name of array caption text + loader_color? if + s" ansi_caption[x][y]" + else + s" menu_caption[x][y]" + then + -rot tuck 16 + c! swap \ replace 'y' with Array index + 4 pick rot tuck 13 + c! swap \ replace 'x' with menu choice + + \ Now test for the existence of our incremented array index in the + \ form of $menu_caption[x][y] ($ansi_caption[x][y] with loader_color + \ enabled) as set in loader.rc(5), et. al. + + getenv dup -1 = if + \ No caption set for this array index. Loop back to zero. + + drop ( getenv cruft ) + drop ( incremented array index ) + 0 ( new array index that will be stored later ) + + \ base name of caption var + loader_color? if + s" ansi_caption[x][0]" + else + s" menu_caption[x][0]" + then + 4 pick rot tuck 13 + c! swap \ replace 'x' with menu choice + + getenv dup -1 = if + \ This is highly unlikely to occur, but to make + \ sure that things move along smoothly, allocate + \ a temporary NULL string + + s" " + then + then + + \ At this point, we should have the following on the stack (in order, + \ from bottom to top): + \ + \ N - Ascii numeral representing the menu choice (inherited) + \ Addr - address of our internal cycle_stateN variable + \ N - zero-based number we intend to store to the above + \ C-Addr - string value we intend to store to menu_caption[x] + \ (or ansi_caption[x] with loader_color enabled) + \ + \ Let's perform what we need to with the above. + + \ base name of menuitem caption var + loader_color? if + s" ansi_caption[x]" + else + s" menu_caption[x]" + then + 6 pick rot tuck 13 + c! swap \ replace 'x' with menu choice + setenv \ set the new caption + + swap ! \ update array state variable +; + +: acpipresent? ( -- flag ) \ Returns TRUE if ACPI is present, FALSE otherwise + s" hint.acpi.0.rsdp" getenv + dup -1 = if + drop false exit + then + 2drop + true +; + +: acpienabled? ( -- flag ) \ Returns TRUE if ACPI is enabled, FALSE otherwise + s" hint.acpi.0.disabled" getenv + dup -1 <> if + s" 0" compare 0<> if + false exit + then + else + drop + then + true +; + +\ This function prints the appropriate menuitem basename to the stack if an +\ ACPI option is to be presented to the user, otherwise returns -1. Used +\ internally by menu-create, you need not (nor should you) call this directly. +\ +: acpimenuitem ( -- C-Addr | -1 ) + + arch-i386? if + acpipresent? if + acpienabled? if + loader_color? if + s" toggled_ansi[x]" + else + s" toggled_text[x]" + then + else + loader_color? if + s" ansi_caption[x]" + else + s" menu_caption[x]" + then + then + else + menuidx dup @ 1+ swap ! ( increment menuidx ) + -1 + then + else + -1 + then +; + +\ This function creates the list of menu items. This function is called by the +\ menu-display function. You need not be call it directly. +\ +: menu-create ( -- ) + + \ Print the frame caption at (x,y) + s" loader_menu_title" getenv dup -1 = if + drop s" Welcome to FreeBSD" + then + 24 over 2 / - 9 at-xy type + + \ Print our menu options with respective key/variable associations. + \ `printmenuitem' ends by adding the decimal ASCII value for the + \ numerical prefix to the stack. We store the value left on the stack + \ to the key binding variable for later testing against a character + \ captured by the `getkey' function. + + \ Note that any menu item beyond 9 will have a numerical prefix on the + \ screen consisting of the first digit (ie. 1 for the tenth menu item) + \ and the key required to activate that menu item will be the decimal + \ ASCII of 48 plus the menu item (ie. 58 for the tenth item, aka. `:') + \ which is misleading and not desirable. + \ + \ Thus, we do not allow more than 8 configurable items on the menu + \ (with "Reboot" as the optional ninth and highest numbered item). + + \ + \ Initialize the ACPI option status. + \ + 0 menuacpi ! + s" menu_acpi" getenv -1 <> if + c@ dup 48 > over 57 < and if ( '1' <= c1 <= '8' ) + menuacpi ! + arch-i386? if acpipresent? if + \ + \ Set menu toggle state to active state + \ (required by generic toggle_menuitem) + \ + menuacpi @ + s" acpienabled? toggle_stateN !" + -rot tuck 25 + c! swap + evaluate + then then + else + drop + then + then + + \ + \ Initialize the menu_options visual separator. + \ + 0 menuoptions ! + s" menu_options" getenv -1 <> if + c@ dup 48 > over 57 < and if ( '1' <= c1 <= '8' ) + menuoptions ! + else + drop + then + then + + \ Initialize "Reboot" menu state variable (prevents double-entry) + false menurebootadded ! + + 49 \ Iterator start (loop range 49 to 56; ASCII '1' to '8') + begin + \ If the "Options:" separator, print it. + dup menuoptions @ = if + \ Optionally add a reboot option to the menu + s" menu_reboot" getenv -1 <> if + drop + s" Reboot" printmenuitem menureboot ! + true menurebootadded ! + then + + menuX @ + menurow @ 2 + menurow ! + menurow @ menuY @ + + at-xy + ." Options:" + then + + \ If this is the ACPI menu option, act accordingly. + dup menuacpi @ = if + acpimenuitem ( -- C-Addr | -1 ) + else + loader_color? if + s" ansi_caption[x]" + else + s" menu_caption[x]" + then + then + + ( C-Addr | -1 ) + dup -1 <> if + \ replace 'x' with current iteration + -rot 2dup 13 + c! rot + + \ test for environment variable + getenv dup -1 <> if + printmenuitem ( C-Addr -- N ) + + s" menukeyN !" \ generate cmd to store result + -rot 2dup 7 + c! rot + + evaluate + else + drop + then + else + drop + + s" menu_command[x]" + -rot 2dup 13 + c! rot ( replace 'x' ) + unsetenv + then + + 1+ dup 56 > \ add 1 to iterator, continue if less than 57 + until + drop \ iterator + + \ Optionally add a reboot option to the menu + menurebootadded @ true <> if + s" menu_reboot" getenv -1 <> if + drop \ no need for the value + s" Reboot" \ menu caption (required by printmenuitem) + + printmenuitem + menureboot ! + else + 0 menureboot ! + then + then +; + +\ Takes a single integer on the stack and updates the timeout display. The +\ integer must be between 0 and 9 (we will only update a single digit in the +\ source message). +\ +: menu-timeout-update ( N -- ) + + dup 9 > if ( N N 9 -- N ) + drop ( N -- ) + 9 ( maximum: -- N ) + then + + dup 0 < if ( N N 0 -- N ) + drop ( N -- ) + 0 ( minimum: -- N ) + then + + 48 + ( convert single-digit numeral to ASCII: N 48 -- N ) + + s" Autoboot in N seconds. [Space] to pause" ( N -- N Addr C ) + + 2 pick 48 - 0> if ( N Addr C N 48 -- N Addr C ) + + \ Modify 'N' (Addr+12) above to reflect time-left + + -rot ( N Addr C -- C N Addr ) + tuck ( C N Addr -- C Addr N Addr ) + 12 + ( C Addr N Addr -- C Addr N Addr2 ) + c! ( C Addr N Addr2 -- C Addr ) + swap ( C Addr -- Addr C ) + + menu_timeout_x @ + menu_timeout_y @ + at-xy ( position cursor: Addr C N N -- Addr C ) + + type ( print message: Addr C -- ) + + else ( N Addr C N -- N Addr C ) + + menu_timeout_x @ + menu_timeout_y @ + at-xy ( position cursor: N Addr C N N -- N Addr C ) + + spaces ( erase message: N Addr C -- N Addr ) + 2drop ( N Addr -- ) + + then + + 0 25 at-xy ( position cursor back at bottom-left ) +; + +\ This function blocks program flow (loops forever) until a key is pressed. +\ The key that was pressed is added to the top of the stack in the form of its +\ decimal ASCII representation. This function is called by the menu-display +\ function. You need not call it directly. +\ +: getkey ( -- ascii_keycode ) + + begin \ loop forever + + menu_timeout_enabled @ 1 = if + ( -- ) + seconds ( get current time: -- N ) + dup menu_time @ <> if ( has time elapsed?: N N N -- N ) + + \ At least 1 second has elapsed since last loop + \ so we will decrement our "timeout" (really a + \ counter, insuring that we do not proceed too + \ fast) and update our timeout display. + + menu_time ! ( update time record: N -- ) + menu_timeout @ ( "time" remaining: -- N ) + dup 0> if ( greater than 0?: N N 0 -- N ) + 1- ( decrement counter: N -- N ) + dup menu_timeout ! + ( re-assign: N N Addr -- N ) + then + ( -- N ) + + dup 0= swap 0< or if ( N <= 0?: N N -- ) + \ halt the timer + 0 menu_timeout ! ( 0 Addr -- ) + 0 menu_timeout_enabled ! ( 0 Addr -- ) + then + + \ update the timer display ( N -- ) + menu_timeout @ menu-timeout-update + + menu_timeout @ 0= if + \ We've reached the end of the timeout + \ (user did not cancel by pressing ANY + \ key) + + s" menu_timeout_command" getenv dup + -1 = if + drop \ clean-up + else + evaluate + then + then + + else ( -- N ) + \ No [detectable] time has elapsed (in seconds) + drop ( N -- ) + then + ( -- ) + then + + key? if \ Was a key pressed? (see loader(8)) + + \ An actual key was pressed (if the timeout is running, + \ kill it regardless of which key was pressed) + menu_timeout @ 0<> if + 0 menu_timeout ! + 0 menu_timeout_enabled ! + + \ clear screen of timeout message + 0 menu-timeout-update + then + + \ get the key that was pressed and exit (if we + \ get a non-zero ASCII code) + key dup 0<> if + exit + else + drop + then + then + 50 ms \ sleep for 50 milliseconds (see loader(8)) + + again +; + +: menu-erase ( -- ) \ Erases menu and resets positioning variable to positon 1. + + \ Clear the screen area associated with the interactive menu + menuX @ menuY @ + 2dup at-xy 38 spaces 1+ 2dup at-xy 38 spaces 1+ + 2dup at-xy 38 spaces 1+ 2dup at-xy 38 spaces 1+ + 2dup at-xy 38 spaces 1+ 2dup at-xy 38 spaces 1+ + 2dup at-xy 38 spaces 1+ 2dup at-xy 38 spaces 1+ + 2dup at-xy 38 spaces 1+ 2dup at-xy 38 spaces 1+ + 2dup at-xy 38 spaces 1+ 2dup at-xy 38 spaces + 2drop + + \ Reset the starting index and position for the menu + menu_start 1- menuidx ! + 0 menurow ! +; + +\ Erase and redraw the menu. Useful if you change a caption and want to +\ update the menu to reflect the new value. +\ +: menu-redraw ( -- ) + menu-erase + menu-create +; + +\ This function initializes the menu. Call this from your `loader.rc' file +\ before calling any other menu-related functions. +\ +: menu-init ( -- ) + menu_start + 1- menuidx ! \ Initialize the starting index for the menu + 0 menurow ! \ Initialize the starting position for the menu + 42 13 2 9 box \ Draw frame (w,h,x,y) + 0 25 at-xy \ Move cursor to the bottom for output +; + +\ Main function. Call this from your `loader.rc' file. +\ +: menu-display ( -- ) + + 0 menu_timeout_enabled ! \ start with automatic timeout disabled + + \ check indication that automatic execution after delay is requested + s" menu_timeout_command" getenv -1 <> if ( Addr C -1 -- | Addr ) + drop ( just testing existence right now: Addr -- ) + + \ initialize state variables + seconds menu_time ! ( store the time we started ) + 1 menu_timeout_enabled ! ( enable automatic timeout ) + + \ read custom time-duration (if set) + s" autoboot_delay" getenv dup -1 = if + drop \ no custom duration (remove dup'd bunk -1) + menu_timeout_default \ use default setting + else + 2dup ?number 0= if ( if not a number ) + \ disable timeout if "NO", else use default + s" NO" compare-insensitive 0= if + 0 menu_timeout_enabled ! + 0 ( assigned to menu_timeout below ) + else + menu_timeout_default + then + else + -rot 2drop + + \ disable timeout if less than zero + dup 0< if + drop + 0 menu_timeout_enabled ! + 0 ( assigned to menu_timeout below ) + then + then + then + menu_timeout ! ( store value on stack from above ) + + menu_timeout_enabled @ 1 = if + \ read custom column position (if set) + s" loader_menu_timeout_x" getenv dup -1 = if + drop \ no custom column position + menu_timeout_default_x \ use default setting + else + \ make sure custom position is a number + ?number 0= if + menu_timeout_default_x \ or use default + then + then + menu_timeout_x ! ( store value on stack from above ) + + \ read custom row position (if set) + s" loader_menu_timeout_y" getenv dup -1 = if + drop \ no custom row position + menu_timeout_default_y \ use default setting + else + \ make sure custom position is a number + ?number 0= if + menu_timeout_default_y \ or use default + then + then + menu_timeout_y ! ( store value on stack from above ) + then + then + + menu-create + + begin \ Loop forever + + 0 25 at-xy \ Move cursor to the bottom for output + getkey \ Block here, waiting for a key to be pressed + + dup -1 = if + drop exit \ Caught abort (abnormal return) + then + + \ Boot if the user pressed Enter/Ctrl-M (13) or + \ Ctrl-Enter/Ctrl-J (10) + dup over 13 = swap 10 = or if + drop ( no longer needed ) + s" boot" evaluate + exit ( pedantic; never reached ) + then + + \ Evaluate the decimal ASCII value against known menu item + \ key associations and act accordingly + + 49 \ Iterator start (loop range 49 to 56; ASCII '1' to '8') + begin + s" menukeyN @" + + \ replace 'N' with current iteration + -rot 2dup 7 + c! rot + + evaluate rot tuck = if + + \ Adjust for missing ACPI menuitem on non-i386 + arch-i386? true <> menuacpi @ 0<> and if + menuacpi @ over 2dup < -rot = or + over 58 < and if + ( key >= menuacpi && key < 58: N -- N ) + 1+ + then + then + + \ base env name for the value (x is a number) + s" menu_command[x]" + + \ Copy ASCII number to string at offset 13 + -rot 2dup 13 + c! rot + + \ Test for the environment variable + getenv dup -1 <> if + \ Execute the stored procedure + evaluate + + \ We expect there to be a non-zero + \ value left on the stack after + \ executing the stored procedure. + \ If so, continue to run, else exit. + + 0= if + drop \ key pressed + drop \ loop iterator + exit + else + swap \ need iterator on top + then + then + + \ Re-adjust for missing ACPI menuitem + arch-i386? true <> menuacpi @ 0<> and if + swap + menuacpi @ 1+ over 2dup < -rot = or + over 59 < and if + 1- + then + swap + then + else + swap \ need iterator on top + then + + \ + \ Check for menu keycode shortcut(s) + \ + s" menu_keycode[x]" + -rot 2dup 13 + c! rot + getenv dup -1 = if + drop + else + ?number 0<> if + rot tuck = if + swap + s" menu_command[x]" + -rot 2dup 13 + c! rot + getenv dup -1 <> if + evaluate + 0= if + 2drop + exit + then + else + drop + then + else + swap + then + then + then + + 1+ dup 56 > \ increment iterator + \ continue if less than 57 + until + drop \ loop iterator + + menureboot @ = if 0 reboot then + + again \ Non-operational key was pressed; repeat +; + +\ This function unsets all the possible environment variables associated with +\ creating the interactive menu. Call this when you want to clear the menu +\ area in preparation for another menu. +\ +: menu-clear ( -- ) + + 49 \ Iterator start (loop range 49 to 56; ASCII '1' to '8') + begin + \ basename for caption variable + loader_color? if + s" ansi_caption[x]" + else + s" menu_caption[x]" + then + -rot 2dup 13 + c! rot \ replace 'x' with current iteration + unsetenv \ not erroneous to unset unknown var + + s" 0 menukeyN !" \ basename for key association var + -rot 2dup 9 + c! rot \ replace 'N' with current iteration + evaluate \ assign zero (0) to key assoc. var + + 1+ dup 56 > \ increment, continue if less than 57 + until + drop \ iterator + + \ clear the "Reboot" menu option flag + s" menu_reboot" unsetenv + 0 menureboot ! + + \ clear the ACPI menu option flag + s" menu_acpi" unsetenv + 0 menuacpi ! + + \ clear the "Options" menu separator flag + s" menu_options" unsetenv + 0 menuoptions ! + + menu-erase +; + +\ Assign configuration values +bullet menubllt ! +10 menuY ! +5 menuX ! + +\ Initialize our boolean state variables +0 toggle_state1 ! +0 toggle_state2 ! +0 toggle_state3 ! +0 toggle_state4 ! +0 toggle_state5 ! +0 toggle_state6 ! +0 toggle_state7 ! +0 toggle_state8 ! + +\ Initialize our array state variables +0 cycle_state1 ! +0 cycle_state2 ! +0 cycle_state3 ! +0 cycle_state4 ! +0 cycle_state5 ! +0 cycle_state6 ! +0 cycle_state7 ! +0 cycle_state8 ! + +\ Initialize string containers +0 init_text1 c! +0 init_text2 c! +0 init_text3 c! +0 init_text4 c! +0 init_text5 c! +0 init_text6 c! +0 init_text7 c! +0 init_text8 c! diff --git a/sys/boot/forth/menu.4th.8 b/sys/boot/forth/menu.4th.8 new file mode 100644 index 0000000..45388f5 --- /dev/null +++ b/sys/boot/forth/menu.4th.8 @@ -0,0 +1,307 @@ +.\" Copyright (c) 2011 Devin Teske +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd May 18, 2011 +.Dt MENU.4TH 8 +.Os +.Sh NAME +.Nm menu.4th +.Nd FreeBSD dynamic menu boot module. +.Sh DESCRIPTION +The file that goes by the name of +.Nm +is a set of commands designed to display a dynamic menu system managed through +a system of carefully named environment variables. +The commands of +.Nm +by themselves are not enough for most uses. +Please refer to the +examples below for the most common situations, and to +.Xr loader 8 +for additional commands. +.Pp +Before using any of the commands provided in +.Nm , +it must be included +through the command: +.Pp +.Dl include menu.4th +.Pp +This line is present in the default +.Pa /boot/menu.rc +file, so it is not needed (and should not be re-issued) in a normal setup. +.Pp +The commands provided by it are: +.Pp +.Bl -tag -width disable-module_module -compact -offset indent +.It Ic menu-init +Draws the menu bounding box and initializes some internal state variables. +This should be called before any other menu-related functions. +.It Ic menu-display +Displays the menu (configured via the below documented environment variables) +and blocks on keyboard input, awaiting user action. +.It Ic menu-erase +Clears the screen area within the menu bounding box. +.It Ic menu-redraw +Calls +.Ic menu-erase +and then redraws the menu. +.It Ic menu-clear +Unsets all possible environment variables used +to configure the menu and then calls +.Ic menu-erase . +.El +.Pp +The environment variables that effect its behavior are: +.Bl -tag -width bootfile -offset indent +.It Va loader_color +If set to +.Dq Li YES +(case-insensitive) or +.Dq Li 1 , +causes the menu to be displayed in color wherever possible. This includes the +use of ANSI bold for numbers appearing to the left of menuitems and the use of +special +.Dq Li ansi +variables describd below. +.It Va autoboot_delay +Number of seconds +.Ic menu-display +will wait before executing +.Va menu_timeout_command +.Ic ( boot +by default) unless a key is pressed. +If set to +.Dq Li NO +(case-insensitive) or +.Dq Li -1 , +.Ic menu-display +will wait for user input and never execute +.Ic menu_timeout_command . +Default is +.Dq Li 10 . +See +.Xr loader 8 +for additional information. +.It Va menu_timeout_command +The command to be executed after +.Va autoboot_delay +seconds if a key is not pressed. The default is +.Ic boot . +.It Va loader_menu_timeout_x +Sets the desired column position of the timeout countdown text. Default is 4. +.It Va loader_menu_timeout_y +Sets the desired row position of the timeout countdown text. Default is 23. +.It Va loader_menu_title +The text to display centered above the menu. Default is +.Dq Li "Welcome to FreeBSD" . +.It Va menu_caption[x] +The text to be displayed for the numbered menuitem +.Dq Li x . +.It Va menu_command[x] +The command to be executed when the number associated with menuitem +.Dq Li x +is pressed. See the list of included FICL words below for some ideas. +.It Va menu_keycode[x] +An optional decimal ASCII keycode to be associated with menuitem +.Dq Li x . +When pressed, will cause the execution of +.Va menu_command[x] . +.It Va ansi_caption[x] +If +.Va loader_color +is set, use this caption for menuitem +.Dq Li x +instead of +.Va menu_caption[x] . +.It Va toggled_text[x] +For menuitems where +.Va menu_command[x] +is set to +.Dq Li toggle_menuitem +(or a derivative thereof), the text displayed +will toggle between this and +.Va menu_caption[x] . +.It Va toggled_ansi[x] +Like +.Va toggled_text[x] +except used when +.Va loader_color +is enabled. +.It Va menu_caption[x][y] +For menuitems where +.Va menu_command[x] +is set to +.Dq Li cycle_menuitem +(or a derivative thereof), the text displayed will cycle between this and other +.Va menu_caption[x][y] +entries. +.It Va ansi_caption[x][y] +Like +.Va menu_caption[x][y] +except used when +.Va loader_color +is enabled. +.It Va menu_acpi +When set to a number +.Dq Li x +associated with a given menuitem, that menuitem will only appear when +running on i386-compatible hardware, +.Va hint.acpi.0.rsdp +is set (indicating the presence of hardware ACPI support as detected by +.Xr loader 8 ) , +and +.Va hint.acpi.0.disabled +is not set. +On non-i386 hardware, menuitems configured after the +.Dq Li menu_acpi +menuitem will use a lower number (to compensate for the missing ACPI menuitem) +but continue to function as expected. +On i386-compatible hardware lacking ACPI support (as detected by +.Xr loader 8 ) , +subsequent menuitems will retain their associated numbers. +.It Va hint.acpi.0.rsdp +Set automatically by +.Xr loader 8 +on i386-compatible hardware when ACPI support is detected at boot time. +Effects the display of the +.Dq Li menu_acpi +menuitem (if configured). +.It Va hint.acpi.0.disabled +Effects the display of the +.Va menu_acpi +menuitem. If set, the menuitem will display +.Va toggled_text[x] +.Va ( toggled_ansi[x] +if +.Va loader_color +is set), otherwise +.Va menu_caption[x] +.Va ( ansi_caption[x] +if +.Va loader_color +is set). +.It Va menu_options +When set to a number +.Dq Li x , +a single blank-line and an +.Dq Li Options +header are inserted between +.Va menu_caption[x-1] +and +.Va menu_caption[x] +(if configured). +.It Va menu_reboot +If set, adds a built-in +.Dq Li Reboot +menuitem to the end of the last configured menuitem. If +.Va menu_options +is configured, the +.Dq Li Reboot +menuitem will be inserted before the +.Dq Options +separator. +.El +.Pp +In addition, it provides the following FICL words: +.Pp +.Bl -tag -width disable-module_module -compact -offset indent +.It Ic arch-i386? ( -- BOOL ) +Returns true (-1) on i386 and false (0) otherwise. +.It Ic acpipresent? ( -- BOOL ) +Returns true (-1) if ACPI is present and false (0) otherwise. +.It Ic acpienabled? ( -- BOOL ) +Returns true (-1) if ACPI is enabled and false (0) otherwise. +.It Ic toggle_menuitem ( N -- N ) +Toggles menuitem +.Dq Li N +between +.Va menu_caption[x] +and +.Va toggled_text[x] +(where +.Dq Li N +represents the ASCII decimal value for +.Dq Li x ) . +.It Ic cycle_menuitem ( N -- N ) +Cycles menuitem +.Dq Li N +between +.Va menu_caption[x][y] +entries (where +.Va N +represents the ASCII decimal value for +.Va x ) . +.El +.Pp +For all values of +.Dq Li x +above, use any number between 1 through 9. Sorry, double-digits are not +currently supported. +.Sh FILES +.Bl -tag -width /boot/loader.4th -compact +.It Pa /boot/loader +The +.Xr loader 8 . +.It Pa /boot/menu.4th +.Nm +itself. +.It Pa /boot/loader.rc +.Xr loader 8 +bootstrapping script. +.El +.Sh EXAMPLES +A simple boot menu: +.Pp +.Bd -literal -offset indent -compact +include /boot/menu.4th +menu-init +set menu_caption[1]="Boot" +set menu_command[1]="boot" +set menu_options=2 +set menu_caption[2]="Option: NO" +set toggled_text[2]="Option: YES" +set menu_command[2]="toggle_menuitem" +set menu_timeout_command="boot" +set menu_reboot +menu-display +.Ed +.Sh SEE ALSO +.Xr loader.conf 5 , +.Xr loader 8 , +.Xr loader.4th 8 , +.Xr beastie.4th 8 +.Sh HISTORY +The +.Nm +set of commands first appeared in +.Fx 9.0 . +.Sh AUTHORS +The +.Nm +set of commands was written by +.An -nosplit +.An Devin Teske Aq devinteske@hotmail.com . diff --git a/sys/boot/forth/menu.rc b/sys/boot/forth/menu.rc new file mode 100644 index 0000000..d8af4b2 --- /dev/null +++ b/sys/boot/forth/menu.rc @@ -0,0 +1,76 @@ +\ Menu.rc +\ $FreeBSD$ +\ +\ Load required Forth modules +include /boot/version.4th +include /boot/brand.4th +include /boot/menu.4th +include /boot/menu-commands.4th +include /boot/shortcuts.4th + +\ Screen prep +clear \ clear the screen (see `screen.4th') +print_version \ print version string (bottom-right; see `version.4th') +draw-beastie \ draw freebsd mascot (on right; see `beastie.4th') +draw-brand \ draw the FreeBSD title (top-left; see `brand.4th') +menu-init \ initialize the menu area (see `menu.4th') + +\ Initialize main menu constructs (see `menu.4th') +\ NOTE: To use the `ansi' variants, add `loader_color=1' to loader.conf(5) + +set menu_caption[1]="Boot [ENTER]" +set menu_command[1]="boot" +set ansi_caption[1]="[1mB[37moot [1m[ENTER][37m" +set menu_keycode[1]="98" + +set menu_caption[2]="[Esc]ape to loader prompt" +set menu_command[2]="goto_prompt" +set menu_keycode[2]="27" +set ansi_caption[2]="[1mEsc[37mape to loader prompt" + +\ Enable built-in "Reboot" trailing menuitem +\ NOTE: appears before menu_options if configured +\ +set menu_reboot + +\ Enable "Options:" separator. When set to a numerical value (1-8), a visual +\ separator is inserted before that menuitem number. +\ +set menu_options=4 + +set menu_caption[4]="[A]CPI Support: Disabled" +set toggled_text[4]="[A]CPI Support: Enabled" +set menu_command[4]="toggle_acpi" +set menu_keycode[4]="97" +set menu_acpi=4 +set ansi_caption[4]="[1mA[37mCPI Support: [34;1mDisabled[37m" +set toggled_ansi[4]="[1mA[37mCPI Support: [32mEnabled[37m" + +set menu_caption[5]="Boot Safe [M]ode: NO" +set toggled_text[5]="Boot Safe [M]ode: YES" +set menu_command[5]="toggle_safemode" +set menu_keycode[5]="109" +set ansi_caption[5]="Boot Safe [1mM[37mode: [34;1mNO[37m" +set toggled_ansi[5]="Boot Safe [1mM[37mode: [32mYES[37m" + +set menu_caption[6]="Boot [S]ingle User: NO" +set toggled_text[6]="Boot [S]ingle User: YES" +set menu_command[6]="toggle_singleuser" +set menu_keycode[6]="115" +set ansi_caption[6]="Boot [1mS[37mingle User: [34;1mNO[37m" +set toggled_ansi[6]="Boot [1mS[37mingle User: [32mYES[37m" + +set menu_caption[7]="Boot [V]erbose: NO" +set toggled_text[7]="Boot [V]erbose: YES" +set menu_command[7]="toggle_verbose" +set menu_keycode[7]="118" +set ansi_caption[7]="Boot [1mV[37merbose: [34;1mNO[37m" +set toggled_ansi[7]="Boot [1mV[37merbose: [32mYES[37m" + +\ Enable automatic booting (add ``autoboot_delay=N'' to loader.conf(5) to +\ customize the timeout; default is 10-seconds) +\ +set menu_timeout_command="boot" + +\ Display the main menu (see `menu.4th') +menu-display diff --git a/sys/boot/forth/shortcuts.4th b/sys/boot/forth/shortcuts.4th new file mode 100644 index 0000000..55a369b --- /dev/null +++ b/sys/boot/forth/shortcuts.4th @@ -0,0 +1,50 @@ +\ Copyright (c) 2008-2011 Devin Teske <devinteske@hotmail.com> +\ All rights reserved. +\ +\ Redistribution and use in source and binary forms, with or without +\ modification, are permitted provided that the following conditions +\ are met: +\ 1. Redistributions of source code must retain the above copyright +\ notice, this list of conditions and the following disclaimer. +\ 2. Redistributions in binary form must reproduce the above copyright +\ notice, this list of conditions and the following disclaimer in the +\ documentation and/or other materials provided with the distribution. +\ +\ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +\ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +\ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +\ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +\ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +\ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +\ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +\ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +\ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +\ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +\ SUCH DAMAGE. +\ +\ $FreeBSD$ + +\ FICL words intended to be used as shortcuts for carrying out common tasks or +\ producing common results. Generally, words defined here are simply groupings +\ of other custom words that pull from multiple libraries (for example, if you +\ want to define a custom word that uses words defined in three different +\ libraries, this is a good place to define such a word). +\ +\ This script should be included after you have included any/all other +\ libraries. This will prevent calling a word defined here before any required +\ words have been defined. + +marker task-shortcuts.4th + +\ This "shortcut" word will not be used directly, but is defined here to +\ offer the user a quick way to get back into the interactive PXE menu +\ after they have escaped to the shell (perhaps by accident). +\ +: menu ( -- ) + clear \ Clear the screen (in screen.4th) + print_version \ print version string (bottom-right; see version.4th) + draw-beastie \ Draw FreeBSD logo at right (in beastie.4th) + draw-brand \ Draw FIS logo at top (in brand.4th) + menu-init \ Initialize menu and draw bounding box (in menu.4th) + menu-display \ Launch interactive menu (in menu.4th) +; diff --git a/sys/boot/forth/support.4th b/sys/boot/forth/support.4th index 5484e06..3dbeae8 100644 --- a/sys/boot/forth/support.4th +++ b/sys/boot/forth/support.4th @@ -54,7 +54,6 @@ \ Exported global variables; \ \ string conf_files configuration files to be loaded -\ string password password \ cell modules_options pointer to first module information \ value verbose? indicates if user wants a verbose loading \ value any_conf_read? indicates if a conf file was succesfully read @@ -164,7 +163,6 @@ structure: file_metadata string conf_files string nextboot_conf_file -string password create module_options sizeof module.next allot 0 module_options ! create last_module_option sizeof module.next allot 0 last_module_option ! 0 value verbose? @@ -610,8 +608,6 @@ only forth also support-functions also file-processing definitions also : execute? s" exec" assignment_type? ; -: password? s" password" assignment_type? ; - : module_load? load_module_suffix suffix_type? ; : module_loadname? module_loadname_suffix suffix_type? ; @@ -752,10 +748,6 @@ only forth also support-functions also file-processing definitions also ['] evaluate catch if EEXEC throw then ; -: set_password - value_buffer strget unquote password string= -; - : process_assignment name_buffer .len @ 0= if exit then loader_conf_files? if set_conf_files exit then @@ -763,7 +755,6 @@ only forth also support-functions also file-processing definitions also nextboot_conf? if set_nextboot_conf exit then verbose_flag? if set_verbose exit then execute? if execute_command exit then - password? if set_password exit then module_load? if set_module_flag exit then module_loadname? if set_module_loadname exit then module_type? if set_module_type exit then @@ -1532,30 +1523,6 @@ also builtins ?dup 0= if ['] load_modules catch then ; -\ read and store only as many bytes as we need, drop the extra -: read-password { size | buf len -- } - size allocate if ENOMEM throw then - to buf - 0 to len - begin - key - dup backspace = if - drop - len if - backspace emit bl emit backspace emit - len 1 - to len - else - bell emit - then - else - dup <cr> = if cr drop buf len exit then - [char] * emit - len size < if buf len chars + c! else drop then - len 1+ to len - then - again -; - \ Go back to straight forth vocabulary only forth also definitions diff --git a/sys/boot/forth/version.4th b/sys/boot/forth/version.4th new file mode 100644 index 0000000..c59f825 --- /dev/null +++ b/sys/boot/forth/version.4th @@ -0,0 +1,60 @@ +\ Copyright (c) 2006-2011 Devin Teske <devinteske@hotmail.com> +\ All rights reserved. +\ +\ Redistribution and use in source and binary forms, with or without +\ modification, are permitted provided that the following conditions +\ are met: +\ 1. Redistributions of source code must retain the above copyright +\ notice, this list of conditions and the following disclaimer. +\ 2. Redistributions in binary form must reproduce the above copyright +\ notice, this list of conditions and the following disclaimer in the +\ documentation and/or other materials provided with the distribution. +\ +\ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +\ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +\ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +\ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +\ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +\ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +\ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +\ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +\ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +\ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +\ SUCH DAMAGE. +\ +\ $FreeBSD$ + +marker task-version.4th + +variable versionX +variable versionY + +\ Initialize text placement to defaults +80 versionX ! \ NOTE: this is the ending column (text is right-justified) +24 versionY ! + +: print_version ( -- ) + + \ Get the text placement position (if set) + s" loader_version_x" getenv dup -1 <> if + ?number drop versionX ! -1 + then drop + s" loader_version_y" getenv dup -1 <> if + ?number drop versionY ! -1 + then drop + + \ Exit if a version was not set + s" loader_version" getenv dup -1 = if + drop exit + then + + \ Right justify the text + dup versionX @ swap - versionY @ at-xy + + \ Print the version (optionally in cyan) + loader_color? if + ." [36m" type ." [37m" + else + type + then +; diff --git a/sys/boot/forth/version.4th.8 b/sys/boot/forth/version.4th.8 new file mode 100644 index 0000000..fff2268 --- /dev/null +++ b/sys/boot/forth/version.4th.8 @@ -0,0 +1,126 @@ +.\" Copyright (c) 2011 Devin Teske +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd May 19, 2011 +.Dt VERSION.4TH 8 +.Os +.Sh NAME +.Nm version.4th +.Nd FreeBSD version string boot module. +.Sh DESCRIPTION +The file that goes by the name of +.Nm +is a set of commands designed to draw the boot loader +version at the bottom-right of the screen. +The commands of +.Nm +by themselves are not enough for most uses. +Please refer to the +examples below for the most common situations, and to +.Xr loader 8 +for additional commands. +.Pp +Before using any of the commands provided in +.Nm , +it must be included +through the command: +.Pp +.Dl include version.4th +.Pp +This line is present in the default +.Pa /boot/menu.rc +file, so it is not needed (and should not be re-issued) in a normal setup. +.Pp +The commands provided by it are: +.Pp +.Bl -tag -width disable-module_module -compact -offset indent +.It Ic print_version +Prints the contents of the +.Va loader_version +environment variable right-justified at the column +.Va loader_version_x +and row +.Va loader_version_y . +.El +.Pp +The environment variables that effect its behavior are: +.Bl -tag -width bootfile -offset indent +.It Va loader_version +Set automatically by +.Xr loader 8 , +but you can override it by setting in +.Xr loader.conf 5 . +This should be the version of boot loader used. +.It Va loader_version_x +Sets the desired ending column position of +.Va loader_version . +Default is 80. +.It Va loader_version_y +Sets the desired ending row position of +.Va loader_version . +Default is 24. +.It Va loader_color +If set to +.Dq Li YES +(case-insensitive) or +.Dq Li 1 , +causes the version to be printed in ANSI Cyan. +.El +.Sh FILES +.Bl -tag -width /boot/loader.4th -compact +.It Pa /boot/loader +The +.Xr loader 8 . +.It Pa /boot/version.4th +.Nm +itself. +.It Pa /boot/loader.rc +.Xr loader 8 +bootstrapping script. +.El +.Sh EXAMPLES +Override +.Xr loader 8 +version in +.Xr loader.conf 5 : +.Pp +.Bd -literal -offset indent -compact +loader_version="loader 1.1" +.Ed +.Sh SEE ALSO +.Xr loader.conf 5 , +.Xr loader 8 , +.Sh HISTORY +The +.Nm +set of commands first appeared in +.Fx 9.0 . +.Sh AUTHORS +The +.Nm +set of commands was written by +.An -nosplit +.An Devin Teske Aq devinteske@hotmail.com . diff --git a/sys/boot/i386/libi386/biosacpi.c b/sys/boot/i386/libi386/biosacpi.c index f511eb7..ff8b1ca 100644 --- a/sys/boot/i386/libi386/biosacpi.c +++ b/sys/boot/i386/libi386/biosacpi.c @@ -61,7 +61,7 @@ biosacpi_detect(void) return; /* export values from the RSDP */ - sprintf(buf, "%p", VTOP(rsdp)); + sprintf(buf, "%u", VTOP(rsdp)); setenv("hint.acpi.0.rsdp", buf, 1); revision = rsdp->Revision; if (revision == 0) diff --git a/sys/boot/i386/loader/Makefile b/sys/boot/i386/loader/Makefile index b834ea0..f4babff 100644 --- a/sys/boot/i386/loader/Makefile +++ b/sys/boot/i386/loader/Makefile @@ -104,11 +104,16 @@ FILESMODE_${LOADER}= ${BINMODE} -b .PATH: ${.CURDIR}/../../forth FILES+= loader.help loader.4th support.4th loader.conf FILES+= screen.4th frames.4th beastie.4th +FILES+= brand.4th check-password.4th color.4th delay.4th +FILES+= menu.4th menu-commands.4th shortcuts.4th version.4th FILESDIR_loader.conf= /boot/defaults .if !exists(${DESTDIR}/boot/loader.rc) FILES+= loader.rc .endif +.if !exists(${DESTDIR}/boot/menu.rc) +FILES+= menu.rc +.endif .endif # XXX crt0.o needs to be first for pxeboot(8) to work diff --git a/sys/boot/i386/zfsboot/Makefile b/sys/boot/i386/zfsboot/Makefile index 06ff863..65df86f 100644 --- a/sys/boot/i386/zfsboot/Makefile +++ b/sys/boot/i386/zfsboot/Makefile @@ -15,7 +15,7 @@ ORG1= 0x7c00 ORG2= 0x2000 CFLAGS= -DBOOTPROG=\"zfsboot\" \ - -Os -g \ + -Os \ -fno-guess-branch-probability \ -fomit-frame-pointer \ -fno-unit-at-a-time \ diff --git a/sys/boot/ia64/common/Makefile b/sys/boot/ia64/common/Makefile index 3204edc..d90898f 100644 --- a/sys/boot/ia64/common/Makefile +++ b/sys/boot/ia64/common/Makefile @@ -6,7 +6,7 @@ MK_SSP= no LIB= ia64 INTERNALLIB= -SRCS= autoload.c bootinfo.c copy.c devicename.c exec.c +SRCS= autoload.c bootinfo.c copy.c devicename.c exec.c icache.c CFLAGS+= -I${.CURDIR}/../../efi/include CFLAGS+= -I${.CURDIR}/../../efi/include/${MACHINE_CPUARCH} @@ -33,9 +33,15 @@ loader.help: help.common .PATH: ${.CURDIR}/../../forth FILES+= loader.4th support.4th loader.conf +FILES+= screen.4th frames.4th +FILES+= beastie.4th brand.4th check-password.4th color.4th delay.4th +FILES+= menu.4th menu-commands.4th shortcuts.4th version.4th .if !exists(${DESTDIR}/boot/loader.rc) FILES+= loader.rc .endif +.if !exists(${DESTDIR}/boot/menu.rc) +FILES+= menu.rc +.endif FILESDIR_loader.conf= /boot/defaults .include <bsd.lib.mk> diff --git a/sys/boot/ia64/common/exec.c b/sys/boot/ia64/common/exec.c index dd9c9ba..65886fa 100644 --- a/sys/boot/ia64/common/exec.c +++ b/sys/boot/ia64/common/exec.c @@ -258,6 +258,8 @@ ia64_loadseg(Elf_Ehdr *eh, Elf_Phdr *ph, uint64_t delta) if (ph->p_flags & PF_X) { ia64_text_start = ph->p_vaddr + delta; ia64_text_size = ph->p_memsz; + + ia64_sync_icache(ia64_text_start, ia64_text_size); } else { ia64_data_start = ph->p_vaddr + delta; ia64_data_size = ph->p_memsz; diff --git a/sys/boot/ia64/common/icache.c b/sys/boot/ia64/common/icache.c new file mode 100644 index 0000000..77a35d7 --- /dev/null +++ b/sys/boot/ia64/common/icache.c @@ -0,0 +1,51 @@ +/*- + * Copyright (c) 2011 Marcel Moolenaar + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <stand.h> +#include <machine/ia64_cpu.h> + +#include "libia64.h" + +void +ia64_sync_icache(vm_offset_t va, size_t sz) +{ + uintptr_t pa; + size_t cnt, max; + + while (sz > 0) { + max = sz; + pa = (uintptr_t)ia64_va2pa(va, &max); + for (cnt = 0; cnt < max; cnt += 32) + ia64_fc_i(pa + cnt); + ia64_sync_i(); + va += max; + sz -= max; + } + ia64_srlz_i(); +} diff --git a/sys/boot/ia64/common/libia64.h b/sys/boot/ia64/common/libia64.h index 29912f5..4bc7638 100644 --- a/sys/boot/ia64/common/libia64.h +++ b/sys/boot/ia64/common/libia64.h @@ -64,6 +64,7 @@ void ia64_loadseg(void *, void *, uint64_t); ssize_t ia64_copyin(const void *, vm_offset_t, size_t); ssize_t ia64_copyout(vm_offset_t, void *, size_t); +void ia64_sync_icache(vm_offset_t, size_t); ssize_t ia64_readin(int, vm_offset_t, size_t); void *ia64_va2pa(vm_offset_t, size_t *); diff --git a/sys/boot/ia64/efi/efimd.c b/sys/boot/ia64/efi/efimd.c index 0f7f02a..0b29e12 100644 --- a/sys/boot/ia64/efi/efimd.c +++ b/sys/boot/ia64/efi/efimd.c @@ -230,3 +230,35 @@ ia64_platform_enter(const char *kernel) return (0); } + +COMMAND_SET(pbvm, "pbvm", "show PBVM details", command_pbvm); + +static int +command_pbvm(int argc, char *argv[]) +{ + uint64_t limit, pg, start; + u_int idx; + + printf("Page table @ %p, size %x\n", ia64_pgtbl, ia64_pgtblsz); + + if (ia64_pgtbl == NULL) + return (0); + + limit = ~0; + start = ~0; + idx = 0; + while (ia64_pgtbl[idx] != 0) { + pg = ia64_pgtbl[idx]; + if (pg != limit) { + if (start != ~0) + printf("%#lx-%#lx\n", start, limit); + start = pg; + } + limit = pg + IA64_PBVM_PAGE_SIZE; + idx++; + } + if (start != ~0) + printf("%#lx-%#lx\n", start, limit); + + return (0); +} diff --git a/sys/boot/ia64/efi/main.c b/sys/boot/ia64/efi/main.c index 485a26d..ec12b42 100644 --- a/sys/boot/ia64/efi/main.c +++ b/sys/boot/ia64/efi/main.c @@ -153,9 +153,7 @@ main(int argc, CHAR16 *argv[]) */ cons_probe(); - printf("\n"); - printf("%s, Revision %s\n", bootprog_name, bootprog_rev); - printf("(%s, %s)\n", bootprog_maker, bootprog_date); + printf("\n%s, Revision %s\n", bootprog_name, bootprog_rev); find_pal_proc(); @@ -214,6 +212,18 @@ static int command_quit(int argc, char *argv[]) { exit(0); + /* NOTREACHED */ + return (CMD_OK); +} + +COMMAND_SET(reboot, "reboot", "reboot the system", command_reboot); + +static int +command_reboot(int argc, char *argv[]) +{ + + RS->ResetSystem(EfiResetWarm, EFI_SUCCESS, 0, NULL); + /* NOTREACHED */ return (CMD_OK); } @@ -585,3 +595,24 @@ command_hcdp(int argc, char *argv[]) printf("<EOT>\n"); return (CMD_OK); } + +COMMAND_SET(about, "about", "about the loader", command_about); + +extern uint64_t _start_plabel[]; + +static int +command_about(int argc, char *argv[]) +{ + EFI_LOADED_IMAGE *img; + + printf("%s\n", bootprog_name); + printf("revision %s\n", bootprog_rev); + printf("built by %s\n", bootprog_maker); + printf("built on %s\n", bootprog_date); + + printf("\n"); + + BS->HandleProtocol(IH, &imgid, (VOID**)&img); + printf("image loaded at %p\n", img->ImageBase); + printf("entry at %#lx (%#lx)\n", _start_plabel[0], _start_plabel[1]); +} diff --git a/sys/boot/ia64/efi/version b/sys/boot/ia64/efi/version index 3a947c8..17d14ea 100644 --- a/sys/boot/ia64/efi/version +++ b/sys/boot/ia64/efi/version @@ -3,6 +3,8 @@ $FreeBSD$ NOTE ANY CHANGES YOU MAKE TO THE BOOTBLOCKS HERE. The format of this file is important. Make sure the current version number is on line 6. +3.1: Add the about, reboot and pbvm commands. + I-cache coherency is maintained. 3.0: Add support for PBVM. 2.2: Create direct mapping based on start address instead of mapping first 256M. diff --git a/sys/boot/pc98/loader/Makefile b/sys/boot/pc98/loader/Makefile index e1f47cd..d289cce 100644 --- a/sys/boot/pc98/loader/Makefile +++ b/sys/boot/pc98/loader/Makefile @@ -82,6 +82,8 @@ loader.help: help.common help.pc98 .PATH: ${.CURDIR}/../../forth FILES= loader loader.help loader.4th support.4th loader.conf FILES+= screen.4th frames.4th beastie.4th +FILES+= brand.4th check-password.4th color.4th delay.4th +FILES+= menu.4th menu-commands.4th shortcuts.4th version.4th # XXX INSTALLFLAGS_loader= -b FILESMODE_loader= ${BINMODE} -b FILESDIR_loader.conf= /boot/defaults @@ -90,6 +92,10 @@ FILESDIR_loader.conf= /boot/defaults FILES+= ${.CURDIR}/../../i386/loader/loader.rc .endif +.if !exists(${DESTDIR}/boot/menu.rc) +FILES+= menu.rc +.endif + # XXX crt0.o needs to be first for pxeboot(8) to work OBJS= ${BTXCRT} diff --git a/sys/boot/powerpc/ofw/Makefile b/sys/boot/powerpc/ofw/Makefile index 776f98e..079f552 100644 --- a/sys/boot/powerpc/ofw/Makefile +++ b/sys/boot/powerpc/ofw/Makefile @@ -103,10 +103,17 @@ loader.help: help.common help.ofw .PATH: ${.CURDIR}/../../forth FILES= loader.help loader.4th support.4th loader.conf +FILES+= screen.4th frames.4th +FILES+= beastie.4th brand.4th check-password.4th color.4th delay.4th +FILES+= menu.4th menu-commands.4th shortcuts.4th version.4th FILESDIR_loader.conf= /boot/defaults .if !exists(${DESTDIR}/boot/loader.rc) FILES+= loader.rc .endif +.if !exists(${DESTDIR}/boot/menu.rc) +FILES+= menu.rc +.endif + .include <bsd.prog.mk> diff --git a/sys/boot/powerpc/ps3/Makefile b/sys/boot/powerpc/ps3/Makefile index b3a37be..b05470b 100644 --- a/sys/boot/powerpc/ps3/Makefile +++ b/sys/boot/powerpc/ps3/Makefile @@ -113,10 +113,17 @@ loader.help: help.common help.ps3 .PATH: ${.CURDIR}/../../forth FILES= loader.help loader.4th support.4th loader.conf +FILES+= screen.4th frames.4th +FILES+= beastie.4th brand.4th check-password.4th color.4th delay.4th +FILES+= menu.4th menu-commands.4th shortcuts.4th version.4th FILESDIR_loader.conf= /boot/defaults .if !exists(${DESTDIR}/boot/loader.rc) FILES+= loader.rc .endif +.if !exists(${DESTDIR}/boot/menu.rc) +FILES+= menu.rc +.endif + .include <bsd.prog.mk> diff --git a/sys/boot/sparc64/loader/Makefile b/sys/boot/sparc64/loader/Makefile index 46c6baa..d32fbab 100644 --- a/sys/boot/sparc64/loader/Makefile +++ b/sys/boot/sparc64/loader/Makefile @@ -84,10 +84,17 @@ loader.help: help.common help.sparc64 .PATH: ${.CURDIR}/../../forth FILES= loader.help loader.4th support.4th loader.conf +FILES+= screen.4th frames.4th +FILES+= beastie.4th brand.4th check-password.4th color.4th delay.4th +FILES+= menu.4th menu-commands.4th shortcuts.4th version.4th FILESDIR_loader.conf= /boot/defaults .if !exists(${DESTDIR}/boot/loader.rc) FILES+= loader.rc .endif +.if !exists(${DESTDIR}/boot/menu.rc) +FILES+= menu.rc +.endif + .include <bsd.prog.mk> diff --git a/sys/cam/ata/ata_all.c b/sys/cam/ata/ata_all.c index 3737e8f..560eef4 100644 --- a/sys/cam/ata/ata_all.c +++ b/sys/cam/ata/ata_all.c @@ -270,6 +270,7 @@ ata_print_ident(struct ata_params *ident_data) sizeof(revision)); printf("<%s %s> %s-%d", product, revision, + (ident_data->config == ATA_PROTO_CFA) ? "CFA" : (ident_data->config & ATA_PROTO_ATAPI) ? "ATAPI" : "ATA", ata_version(ident_data->version_major)); if (ident_data->satacapabilities && ident_data->satacapabilities != 0xffff) { diff --git a/sys/cam/ata/ata_da.c b/sys/cam/ata/ata_da.c index 128bb8b..ed0dbef 100644 --- a/sys/cam/ata/ata_da.c +++ b/sys/cam/ata/ata_da.c @@ -89,7 +89,8 @@ typedef enum { } ada_flags; typedef enum { - ADA_Q_NONE = 0x00 + ADA_Q_NONE = 0x00, + ADA_Q_4K = 0x01, } ada_quirks; typedef enum { @@ -113,11 +114,12 @@ struct disk_params { u_int64_t sectors; /* Total number sectors */ }; -#define TRIM_MAX_BLOCKS 4 -#define TRIM_MAX_RANGES TRIM_MAX_BLOCKS * 64 +#define TRIM_MAX_BLOCKS 8 +#define TRIM_MAX_RANGES (TRIM_MAX_BLOCKS * 64) +#define TRIM_MAX_BIOS (TRIM_MAX_RANGES * 4) struct trim_request { uint8_t data[TRIM_MAX_RANGES * 8]; - struct bio *bps[TRIM_MAX_RANGES]; + struct bio *bps[TRIM_MAX_BIOS]; }; struct ada_softc { @@ -154,6 +156,86 @@ struct ada_quirk_entry { static struct ada_quirk_entry ada_quirk_table[] = { { + /* Hitachi Advanced Format (4k) drives */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "Hitachi H??????????E3*", "*" }, + /*quirks*/ADA_Q_4K + }, + { + /* Samsung Advanced Format (4k) drives */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "SAMSUNG HD204UI*", "*" }, + /*quirks*/ADA_Q_4K + }, + { + /* Seagate Barracuda Green Advanced Format (4k) drives */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "ST????DL*", "*" }, + /*quirks*/ADA_Q_4K + }, + { + /* Seagate Momentus Advanced Format (4k) drives */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "ST9500423AS*", "*" }, + /*quirks*/ADA_Q_4K + }, + { + /* Seagate Momentus Advanced Format (4k) drives */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "ST9500424AS*", "*" }, + /*quirks*/ADA_Q_4K + }, + { + /* Seagate Momentus Advanced Format (4k) drives */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "ST9750420AS*", "*" }, + /*quirks*/ADA_Q_4K + }, + { + /* Seagate Momentus Advanced Format (4k) drives */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "ST9750422AS*", "*" }, + /*quirks*/ADA_Q_4K + }, + { + /* Seagate Momentus Thin Advanced Format (4k) drives */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "ST???LT*", "*" }, + /*quirks*/ADA_Q_4K + }, + { + /* WDC Caviar Green Advanced Format (4k) drives */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD????RS*", "*" }, + /*quirks*/ADA_Q_4K + }, + { + /* WDC Caviar Green Advanced Format (4k) drives */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD????RX*", "*" }, + /*quirks*/ADA_Q_4K + }, + { + /* WDC Caviar Green Advanced Format (4k) drives */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD??????RS*", "*" }, + /*quirks*/ADA_Q_4K + }, + { + /* WDC Caviar Green Advanced Format (4k) drives */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD??????RX*", "*" }, + /*quirks*/ADA_Q_4K + }, + { + /* WDC Scorpio Black Advanced Format (4k) drives */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD???PKT*", "*" }, + /*quirks*/ADA_Q_4K + }, + { + /* WDC Scorpio Black Advanced Format (4k) drives */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD?????PKT*", "*" }, + /*quirks*/ADA_Q_4K + }, + { + /* WDC Scorpio Blue Advanced Format (4k) drives */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD???PVT*", "*" }, + /*quirks*/ADA_Q_4K + }, + { + /* WDC Scorpio Blue Advanced Format (4k) drives */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD?????PVT*", "*" }, + /*quirks*/ADA_Q_4K + }, + { /* Default */ { T_ANY, SIP_MEDIA_REMOVABLE|SIP_MEDIA_FIXED, @@ -730,6 +812,25 @@ adasysctlinit(void *context, int pending) cam_periph_release(periph); } +static int +adagetattr(struct bio *bp) +{ + int ret = -1; + struct cam_periph *periph; + + if (bp->bio_disk == NULL || bp->bio_disk->d_drv1 == NULL) + return ENXIO; + periph = (struct cam_periph *)bp->bio_disk->d_drv1; + if (periph->path == NULL) + return ENXIO; + + ret = xpt_getattr(bp->bio_data, bp->bio_length, bp->bio_attribute, + periph->path); + if (ret == 0) + bp->bio_completed = bp->bio_length; + return ret; +} + static cam_status adaregister(struct cam_periph *periph, void *arg) { @@ -740,7 +841,7 @@ adaregister(struct cam_periph *periph, void *arg) struct disk_params *dp; caddr_t match; u_int maxio; - int legacy_id; + int legacy_id, quirks; cgd = (struct ccb_getdev *)arg; if (periph == NULL) { @@ -815,6 +916,11 @@ adaregister(struct cam_periph *periph, void *arg) */ (void)cam_periph_hold(periph, PRIBIO); mtx_unlock(periph->sim->mtx); + snprintf(announce_buf, sizeof(announce_buf), + "kern.cam.ada.%d.quirks", periph->unit_number); + quirks = softc->quirks; + TUNABLE_INT_FETCH(announce_buf, &quirks); + softc->quirks = quirks; softc->write_cache = -1; snprintf(announce_buf, sizeof(announce_buf), "kern.cam.ada.%d.write_cache", periph->unit_number); @@ -830,6 +936,7 @@ adaregister(struct cam_periph *periph, void *arg) softc->disk->d_open = adaopen; softc->disk->d_close = adaclose; softc->disk->d_strategy = adastrategy; + softc->disk->d_getattr = adagetattr; softc->disk->d_dump = adadump; softc->disk->d_name = "ada"; softc->disk->d_drv1 = periph; @@ -851,8 +958,6 @@ adaregister(struct cam_periph *periph, void *arg) ((softc->flags & ADA_FLAG_CAN_CFA) && !(softc->flags & ADA_FLAG_CAN_48BIT))) softc->disk->d_flags |= DISKFLAG_CANDELETE; - strlcpy(softc->disk->d_ident, cgd->serial_num, - MIN(sizeof(softc->disk->d_ident), cgd->serial_num_len + 1)); strlcpy(softc->disk->d_descr, cgd->ident_data.model, MIN(sizeof(softc->disk->d_descr), sizeof(cgd->ident_data.model))); softc->disk->d_hba_vendor = cpi.hba_vendor; @@ -870,6 +975,9 @@ adaregister(struct cam_periph *periph, void *arg) softc->disk->d_stripeoffset = (softc->disk->d_stripesize - ata_logical_sector_offset(&cgd->ident_data)) % softc->disk->d_stripesize; + } else if (softc->quirks & ADA_Q_4K) { + softc->disk->d_stripesize = 4096; + softc->disk->d_stripeoffset = 0; } softc->disk->d_fwsectors = softc->params.secs_per_track; softc->disk->d_fwheads = softc->params.heads; @@ -978,7 +1086,8 @@ adastart(struct cam_periph *periph, union ccb *start_ccb) (bp = bioq_first(&softc->trim_queue)) != 0) { struct trim_request *req = &softc->trim_req; struct bio *bp1; - int bps = 0, ranges = 0; + uint64_t lastlba = (uint64_t)-1; + int bps = 0, c, lastcount = 0, off, ranges = 0; softc->trim_running = 1; bzero(req, sizeof(*req)); @@ -989,10 +1098,22 @@ adastart(struct cam_periph *periph, union ccb *start_ccb) softc->params.secsize; bioq_remove(&softc->trim_queue, bp1); - while (count > 0) { - int c = min(count, 0xffff); - int off = ranges * 8; + /* Try to extend the previous range. */ + if (lba == lastlba) { + c = min(count, 0xffff - lastcount); + lastcount += c; + off = (ranges - 1) * 8; + req->data[off + 6] = lastcount & 0xff; + req->data[off + 7] = + (lastcount >> 8) & 0xff; + count -= c; + lba += c; + } + + while (count > 0) { + c = min(count, 0xffff); + off = ranges * 8; req->data[off + 0] = lba & 0xff; req->data[off + 1] = (lba >> 8) & 0xff; req->data[off + 2] = (lba >> 16) & 0xff; @@ -1003,11 +1124,14 @@ adastart(struct cam_periph *periph, union ccb *start_ccb) req->data[off + 7] = (c >> 8) & 0xff; lba += c; count -= c; + lastcount = c; ranges++; } + lastlba = lba; req->bps[bps++] = bp1; bp1 = bioq_first(&softc->trim_queue); - if (bp1 == NULL || + if (bps >= TRIM_MAX_BIOS || + bp1 == NULL || bp1->bio_bcount / softc->params.secsize > (softc->trim_max_ranges - ranges) * 0xffff) break; @@ -1281,8 +1405,7 @@ adadone(struct cam_periph *periph, union ccb *done_ccb) (struct trim_request *)ataio->data_ptr; int i; - for (i = 1; i < softc->trim_max_ranges && - req->bps[i]; i++) { + for (i = 1; i < TRIM_MAX_BIOS && req->bps[i]; i++) { struct bio *bp1 = req->bps[i]; bp1->bio_resid = bp->bio_resid; diff --git a/sys/cam/ata/ata_xpt.c b/sys/cam/ata/ata_xpt.c index 9e236a4..d02b36f 100644 --- a/sys/cam/ata/ata_xpt.c +++ b/sys/cam/ata/ata_xpt.c @@ -1583,12 +1583,14 @@ ata_device_transport(struct cam_path *path) cts.proto_specific.valid = 0; if (ident_buf) { if (path->device->transport == XPORT_ATA) { - cts.xport_specific.ata.atapi = + cts.xport_specific.ata.atapi = + (ident_buf->config == ATA_PROTO_CFA) ? 0 : ((ident_buf->config & ATA_PROTO_MASK) == ATA_PROTO_ATAPI_16) ? 16 : ((ident_buf->config & ATA_PROTO_MASK) == ATA_PROTO_ATAPI_12) ? 12 : 0; cts.xport_specific.ata.valid = CTS_ATA_VALID_ATAPI; } else { - cts.xport_specific.sata.atapi = + cts.xport_specific.sata.atapi = + (ident_buf->config == ATA_PROTO_CFA) ? 0 : ((ident_buf->config & ATA_PROTO_MASK) == ATA_PROTO_ATAPI_16) ? 16 : ((ident_buf->config & ATA_PROTO_MASK) == ATA_PROTO_ATAPI_12) ? 12 : 0; cts.xport_specific.sata.valid = CTS_SATA_VALID_ATAPI; @@ -1638,7 +1640,9 @@ ata_action(union ccb *start_ccb) uint16_t p = device->ident_data.config & ATA_PROTO_MASK; - maxlen = (p == ATA_PROTO_ATAPI_16) ? 16 : + maxlen = + (device->ident_data.config == ATA_PROTO_CFA) ? 0 : + (p == ATA_PROTO_ATAPI_16) ? 16 : (p == ATA_PROTO_ATAPI_12) ? 12 : 0; } if (start_ccb->csio.cdb_len > maxlen) { diff --git a/sys/cam/cam_ccb.h b/sys/cam/cam_ccb.h index 981a5ed..ed2a890 100644 --- a/sys/cam/cam_ccb.h +++ b/sys/cam/cam_ccb.h @@ -144,8 +144,8 @@ typedef enum { /* Device statistics (error counts, etc.) */ XPT_FREEZE_QUEUE = 0x0d, /* Freeze device queue */ - XPT_GDEV_ADVINFO = 0x0e, - /* Advanced device information */ + XPT_DEV_ADVINFO = 0x0e, + /* Get/Set Device advanced information */ /* SCSI Control Functions: 0x10->0x1F */ XPT_ABORT = 0x10, /* Abort the specified CCB */ @@ -391,15 +391,24 @@ typedef enum { DEV_MATCH_TARGET = 0x002, DEV_MATCH_LUN = 0x004, DEV_MATCH_INQUIRY = 0x008, + DEV_MATCH_DEVID = 0x010, DEV_MATCH_ANY = 0x00f } dev_pattern_flags; +struct device_id_match_pattern { + uint8_t id_len; + uint8_t id[256]; +}; + struct device_match_pattern { - path_id_t path_id; - target_id_t target_id; - lun_id_t target_lun; - struct scsi_static_inquiry_pattern inq_pat; - dev_pattern_flags flags; + path_id_t path_id; + target_id_t target_id; + lun_id_t target_lun; + dev_pattern_flags flags; + union { + struct scsi_static_inquiry_pattern inq_pat; + struct device_id_match_pattern devid_pat; + } data; }; typedef enum { @@ -745,6 +754,7 @@ struct ccb_relsim { * Definitions for the asynchronous callback CCB fields. */ typedef enum { + AC_ADVINFO_CHANGED = 0x2000,/* Advance info might have changes */ AC_CONTRACT = 0x1000,/* A contractual callback */ AC_GETDEV_CHANGED = 0x800,/* Getdev info might have changed */ AC_INQ_CHANGED = 0x400,/* Inquiry info might have changed */ @@ -1094,19 +1104,20 @@ struct ccb_eng_exec { /* This structure must match SCSIIO size */ #define XPT_CCB_INVALID -1 /* for signaling a bad CCB to free */ /* - * CCB for getting advanced device information. This operates in a fashion + * CCB for working with advanced device information. This operates in a fashion * similar to XPT_GDEV_TYPE. Specify the target in ccb_h, the buffer * type requested, and provide a buffer size/buffer to write to. If the - * buffer is too small, the handler will set GDEVAI_FLAG_MORE. + * buffer is too small, provsiz will be larger than bufsiz. */ -struct ccb_getdev_advinfo { +struct ccb_dev_advinfo { struct ccb_hdr ccb_h; uint32_t flags; -#define CGDAI_FLAG_TRANSPORT 0x1 -#define CGDAI_FLAG_PROTO 0x2 +#define CDAI_FLAG_STORE 0x1 /* If set, action becomes store */ uint32_t buftype; /* IN: Type of data being requested */ /* NB: buftype is interpreted on a per-transport basis */ -#define CGDAI_TYPE_SCSI_DEVID 1 +#define CDAI_TYPE_SCSI_DEVID 1 +#define CDAI_TYPE_SERIAL_NUM 2 +#define CDAI_TYPE_PHYS_PATH 3 off_t bufsiz; /* IN: Size of external buffer */ #define CAM_SCSI_DEVID_MAXLEN 65536 /* length in buffer is an uint16_t */ off_t provsiz; /* OUT: Size required/used */ @@ -1151,7 +1162,7 @@ union ccb { struct ccb_rescan crcn; struct ccb_debug cdbg; struct ccb_ataio ataio; - struct ccb_getdev_advinfo cgdai; + struct ccb_dev_advinfo cdai; }; __BEGIN_DECLS diff --git a/sys/cam/cam_periph.c b/sys/cam/cam_periph.c index dd51bca..f630772 100644 --- a/sys/cam/cam_periph.c +++ b/sys/cam/cam_periph.c @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include <sys/proc.h> #include <sys/devicestat.h> #include <sys/bus.h> +#include <sys/sbuf.h> #include <vm/vm.h> #include <vm/vm_extern.h> @@ -303,6 +304,38 @@ cam_periph_find(struct cam_path *path, char *name) return(NULL); } +/* + * Find a peripheral structure with the specified path, target, lun, + * and (optionally) type. If the name is NULL, this function will return + * the first peripheral driver that matches the specified path. + */ +int +cam_periph_list(struct cam_path *path, struct sbuf *sb) +{ + struct periph_driver **p_drv; + struct cam_periph *periph; + int count; + + count = 0; + xpt_lock_buses(); + for (p_drv = periph_drivers; *p_drv != NULL; p_drv++) { + + TAILQ_FOREACH(periph, &(*p_drv)->units, unit_links) { + if (xpt_path_comp(periph->path, path) != 0) + continue; + + if (sbuf_len(sb) != 0) + sbuf_cat(sb, ","); + + sbuf_printf(sb, "%s%d", periph->periph_name, + periph->unit_number); + count++; + } + } + xpt_unlock_buses(); + return (count); +} + cam_status cam_periph_acquire(struct cam_periph *periph) { @@ -654,12 +687,12 @@ cam_periph_mapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo) dirs[1] = CAM_DIR_IN; numbufs = 2; break; - case XPT_GDEV_ADVINFO: - if (ccb->cgdai.bufsiz == 0) + case XPT_DEV_ADVINFO: + if (ccb->cdai.bufsiz == 0) return (0); - data_ptrs[0] = (uint8_t **)&ccb->cgdai.buf; - lengths[0] = ccb->cgdai.bufsiz; + data_ptrs[0] = (uint8_t **)&ccb->cdai.buf; + lengths[0] = ccb->cdai.bufsiz; dirs[0] = CAM_DIR_IN; numbufs = 1; @@ -813,9 +846,9 @@ cam_periph_unmapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo) data_ptrs[0] = &ccb->smpio.smp_request; data_ptrs[1] = &ccb->smpio.smp_response; break; - case XPT_GDEV_ADVINFO: + case XPT_DEV_ADVINFO: numbufs = min(mapinfo->num_bufs_used, 1); - data_ptrs[0] = (uint8_t **)&ccb->cgdai.buf; + data_ptrs[0] = (uint8_t **)&ccb->cdai.buf; break; default: /* allow ourselves to be swapped once again */ diff --git a/sys/cam/cam_periph.h b/sys/cam/cam_periph.h index 33e9f75..58bfd7b 100644 --- a/sys/cam/cam_periph.h +++ b/sys/cam/cam_periph.h @@ -142,6 +142,7 @@ cam_status cam_periph_alloc(periph_ctor_t *periph_ctor, char *name, cam_periph_type type, struct cam_path *, ac_callback_t *, ac_code, void *arg); struct cam_periph *cam_periph_find(struct cam_path *path, char *name); +int cam_periph_list(struct cam_path *, struct sbuf *); cam_status cam_periph_acquire(struct cam_periph *periph); void cam_periph_release(struct cam_periph *periph); void cam_periph_release_locked(struct cam_periph *periph); @@ -200,5 +201,12 @@ cam_periph_owned(struct cam_periph *periph) return (mtx_owned(periph->sim->mtx)); } +static __inline int +cam_periph_sleep(struct cam_periph *periph, void *chan, int priority, + const char *wmesg, int timo) +{ + return (msleep(chan, periph->sim->mtx, priority, wmesg, timo)); +} + #endif /* _KERNEL */ #endif /* _CAM_CAM_PERIPH_H */ diff --git a/sys/cam/cam_xpt.c b/sys/cam/cam_xpt.c index 1ce205d..f234076 100644 --- a/sys/cam/cam_xpt.c +++ b/sys/cam/cam_xpt.c @@ -287,9 +287,6 @@ static xpt_targetfunc_t xptdeftargetfunc; static xpt_devicefunc_t xptdefdevicefunc; static xpt_periphfunc_t xptdefperiphfunc; static void xpt_finishconfig_task(void *context, int pending); -static int xpt_for_all_busses(xpt_busfunc_t *tr_func, void *arg); -static int xpt_for_all_devices(xpt_devicefunc_t *tr_func, - void *arg); static void xpt_dev_async_default(u_int32_t async_code, struct cam_eb *bus, struct cam_et *target, @@ -1105,6 +1102,44 @@ xpt_announce_periph(struct cam_periph *periph, char *announce_string) periph->unit_number, announce_string); } +int +xpt_getattr(char *buf, size_t len, const char *attr, struct cam_path *path) +{ + int ret = -1; + struct ccb_dev_advinfo cdai; + + memset(&cdai, 0, sizeof(cdai)); + xpt_setup_ccb(&cdai.ccb_h, path, CAM_PRIORITY_NORMAL); + cdai.ccb_h.func_code = XPT_DEV_ADVINFO; + cdai.bufsiz = len; + + if (!strcmp(attr, "GEOM::ident")) + cdai.buftype = CDAI_TYPE_SERIAL_NUM; + else if (!strcmp(attr, "GEOM::physpath")) + cdai.buftype = CDAI_TYPE_PHYS_PATH; + else + goto out; + + cdai.buf = malloc(cdai.bufsiz, M_CAMXPT, M_NOWAIT|M_ZERO); + if (cdai.buf == NULL) { + ret = ENOMEM; + goto out; + } + xpt_action((union ccb *)&cdai); /* can only be synchronous */ + if ((cdai.ccb_h.status & CAM_DEV_QFRZN) != 0) + cam_release_devq(cdai.ccb_h.path, 0, 0, 0, FALSE); + if (cdai.provsiz == 0) + goto out; + ret = 0; + if (strlcpy(buf, cdai.buf, len) >= len) + ret = EFAULT; + +out: + if (cdai.buf != NULL) + free(cdai.buf, M_CAMXPT); + return ret; +} + static dev_match_ret xptbusmatch(struct dev_match_pattern *patterns, u_int num_patterns, struct cam_eb *bus) @@ -1241,6 +1276,7 @@ xptdevicematch(struct dev_match_pattern *patterns, u_int num_patterns, for (i = 0; i < num_patterns; i++) { struct device_match_pattern *cur_pattern; + struct scsi_vpd_device_id *device_id_page; /* * If the pattern in question isn't for a device node, we @@ -1255,22 +1291,17 @@ xptdevicematch(struct dev_match_pattern *patterns, u_int num_patterns, cur_pattern = &patterns[i].pattern.device_pattern; + /* Error out if mutually exclusive options are specified. */ + if ((cur_pattern->flags & (DEV_MATCH_INQUIRY|DEV_MATCH_DEVID)) + == (DEV_MATCH_INQUIRY|DEV_MATCH_DEVID)) + return(DM_RET_ERROR); + /* * If they want to match any device node, we give them any * device node. */ - if (cur_pattern->flags == DEV_MATCH_ANY) { - /* set the copy flag */ - retval |= DM_RET_COPY; - - - /* - * If we've already decided on an action, go ahead - * and return. - */ - if ((retval & DM_RET_ACTION_MASK) != DM_RET_NONE) - return(retval); - } + if (cur_pattern->flags == DEV_MATCH_ANY) + goto copy_dev_node; /* * Not sure why someone would do this... @@ -1292,11 +1323,22 @@ xptdevicematch(struct dev_match_pattern *patterns, u_int num_patterns, if (((cur_pattern->flags & DEV_MATCH_INQUIRY) != 0) && (cam_quirkmatch((caddr_t)&device->inq_data, - (caddr_t)&cur_pattern->inq_pat, - 1, sizeof(cur_pattern->inq_pat), + (caddr_t)&cur_pattern->data.inq_pat, + 1, sizeof(cur_pattern->data.inq_pat), scsi_static_inquiry_match) == NULL)) continue; + device_id_page = (struct scsi_vpd_device_id *)device->device_id; + if (((cur_pattern->flags & DEV_MATCH_DEVID) != 0) + && (device->device_id_len < SVPD_DEVICE_ID_HDR_LEN + || scsi_devid_match((uint8_t *)device_id_page->desc_list, + device->device_id_len + - SVPD_DEVICE_ID_HDR_LEN, + cur_pattern->data.devid_pat.id, + cur_pattern->data.devid_pat.id_len) != 0)) + continue; + +copy_dev_node: /* * If we get to this point, the user definitely wants * information on this device. So tell the caller to copy @@ -2889,6 +2931,8 @@ xpt_action_default(union ccb *start_ccb) case XPT_TERM_IO: case XPT_ENG_INQ: /* XXX Implement */ + printf("%s: CCB type %#x not supported\n", __func__, + start_ccb->ccb_h.func_code); start_ccb->ccb_h.status = CAM_PROVIDE_FAIL; if (start_ccb->ccb_h.func_code & XPT_FC_DEV_QUEUED) { xpt_done(start_ccb); @@ -3528,16 +3572,12 @@ xpt_path_string(struct cam_path *path, char *str, size_t str_len) path_id_t xpt_path_path_id(struct cam_path *path) { - mtx_assert(path->bus->sim->mtx, MA_OWNED); - return(path->bus->path_id); } target_id_t xpt_path_target_id(struct cam_path *path) { - mtx_assert(path->bus->sim->mtx, MA_OWNED); - if (path->target != NULL) return (path->target->target_id); else @@ -3547,8 +3587,6 @@ xpt_path_target_id(struct cam_path *path) lun_id_t xpt_path_lun_id(struct cam_path *path) { - mtx_assert(path->bus->sim->mtx, MA_OWNED); - if (path->device != NULL) return (path->device->lun_id); else @@ -4242,7 +4280,8 @@ xpt_alloc_target(struct cam_eb *bus, target_id_t target_id) { struct cam_et *target; - target = (struct cam_et *)malloc(sizeof(*target), M_CAMXPT, M_NOWAIT); + target = (struct cam_et *)malloc(sizeof(*target), M_CAMXPT, + M_NOWAIT|M_ZERO); if (target != NULL) { struct cam_et *cur_target; @@ -4330,7 +4369,7 @@ xpt_alloc_device(struct cam_eb *bus, struct cam_et *target, lun_id_t lun_id) device = NULL; } else { device = (struct cam_ed *)malloc(sizeof(*device), - M_CAMXPT, M_NOWAIT); + M_CAMXPT, M_NOWAIT|M_ZERO); } if (device != NULL) { @@ -4676,27 +4715,29 @@ xpt_register_async(int event, ac_callback_t *cbfunc, void *cbarg, csa.callback_arg = cbarg; xpt_action((union ccb *)&csa); status = csa.ccb_h.status; + if (xptpath) { xpt_free_path(path); mtx_unlock(&xsoftc.xpt_lock); + } - if ((status == CAM_REQ_CMP) && - (csa.event_enable & AC_FOUND_DEVICE)) { - /* - * Get this peripheral up to date with all - * the currently existing devices. - */ - xpt_for_all_devices(xptsetasyncfunc, &csa); - } - if ((status == CAM_REQ_CMP) && - (csa.event_enable & AC_PATH_REGISTERED)) { - /* - * Get this peripheral up to date with all - * the currently existing busses. - */ - xpt_for_all_busses(xptsetasyncbusfunc, &csa); - } + if ((status == CAM_REQ_CMP) && + (csa.event_enable & AC_FOUND_DEVICE)) { + /* + * Get this peripheral up to date with all + * the currently existing devices. + */ + xpt_for_all_devices(xptsetasyncfunc, &csa); } + if ((status == CAM_REQ_CMP) && + (csa.event_enable & AC_PATH_REGISTERED)) { + /* + * Get this peripheral up to date with all + * the currently existing busses. + */ + xpt_for_all_busses(xptsetasyncbusfunc, &csa); + } + return (status); } @@ -4852,8 +4893,10 @@ camisr_runqueue(void *V_queue) if ((dev->flags & CAM_DEV_TAG_AFTER_COUNT) != 0 && (--dev->tag_delay_count == 0)) xpt_start_tags(ccb_h->path); - if (!device_is_send_queued(dev)) - xpt_schedule_dev_sendq(ccb_h->path->bus, dev); + if (!device_is_send_queued(dev)) { + runq = xpt_schedule_dev_sendq(ccb_h->path->bus, + dev); + } } if (ccb_h->status & CAM_RELEASE_SIMQ) { diff --git a/sys/cam/cam_xpt.h b/sys/cam/cam_xpt.h index 9355be4..f7d9b42 100644 --- a/sys/cam/cam_xpt.h +++ b/sys/cam/cam_xpt.h @@ -103,6 +103,8 @@ cam_status xpt_create_path_unlocked(struct cam_path **new_path_ptr, struct cam_periph *perph, path_id_t path_id, target_id_t target_id, lun_id_t lun_id); +int xpt_getattr(char *buf, size_t len, const char *attr, + struct cam_path *path); void xpt_free_path(struct cam_path *path); int xpt_path_comp(struct cam_path *path1, struct cam_path *path2); diff --git a/sys/cam/cam_xpt_internal.h b/sys/cam/cam_xpt_internal.h index f485e37..b6e8f66 100644 --- a/sys/cam/cam_xpt_internal.h +++ b/sys/cam/cam_xpt_internal.h @@ -97,6 +97,8 @@ struct cam_ed { uint8_t supported_vpds_len; uint32_t device_id_len; uint8_t *device_id; + uint8_t physpath_len; + uint8_t *physpath; /* physical path string form */ struct ata_params ident_data; u_int8_t inq_flags; /* * Current settings for inquiry flags. diff --git a/sys/cam/scsi/scsi_all.c b/sys/cam/scsi/scsi_all.c index 7ededa1..7361c42 100644 --- a/sys/cam/scsi/scsi_all.c +++ b/sys/cam/scsi/scsi_all.c @@ -3552,32 +3552,63 @@ scsi_calc_syncparam(u_int period) return (period/400); } -uint8_t * -scsi_get_sas_addr(struct scsi_vpd_device_id *id, uint32_t len) +int +scsi_devid_is_naa_ieee_reg(uint8_t *bufp) { - uint8_t *bufp, *buf_end; struct scsi_vpd_id_descriptor *descr; struct scsi_vpd_id_naa_basic *naa; - bufp = buf_end = (uint8_t *)id; - bufp += SVPD_DEVICE_ID_HDR_LEN; - buf_end += len; - while (bufp < buf_end) { - descr = (struct scsi_vpd_id_descriptor *)bufp; - bufp += SVPD_DEVICE_ID_DESC_HDR_LEN; - /* Right now, we only care about SAS NAA IEEE Reg addrs */ - if (((descr->id_type & SVPD_ID_PIV) != 0) - && (descr->proto_codeset >> SVPD_ID_PROTO_SHIFT) == - SCSI_PROTO_SAS - && (descr->id_type & SVPD_ID_TYPE_MASK) == SVPD_ID_TYPE_NAA){ - naa = (struct scsi_vpd_id_naa_basic *)bufp; - if ((naa->naa >> 4) == SVPD_ID_NAA_IEEE_REG) - return bufp; - } - bufp += descr->length; + descr = (struct scsi_vpd_id_descriptor *)bufp; + naa = (struct scsi_vpd_id_naa_basic *)descr->identifier; + if ((descr->id_type & SVPD_ID_TYPE_MASK) != SVPD_ID_TYPE_NAA) + return 0; + if (descr->length < sizeof(struct scsi_vpd_id_naa_ieee_reg)) + return 0; + if ((naa->naa >> SVPD_ID_NAA_NAA_SHIFT) != SVPD_ID_NAA_IEEE_REG) + return 0; + return 1; +} + +int +scsi_devid_is_sas_target(uint8_t *bufp) +{ + struct scsi_vpd_id_descriptor *descr; + + descr = (struct scsi_vpd_id_descriptor *)bufp; + if (!scsi_devid_is_naa_ieee_reg(bufp)) + return 0; + if ((descr->id_type & SVPD_ID_PIV) == 0) /* proto field reserved */ + return 0; + if ((descr->proto_codeset >> SVPD_ID_PROTO_SHIFT) != SCSI_PROTO_SAS) + return 0; + return 1; +} + +uint8_t * +scsi_get_devid(struct scsi_vpd_device_id *id, uint32_t page_len, + scsi_devid_checkfn_t ck_fn) +{ + struct scsi_vpd_id_descriptor *desc; + uint8_t *page_end; + uint8_t *desc_buf_end; + + page_end = (uint8_t *)id + page_len; + if (page_end < id->desc_list) + return (NULL); + + desc_buf_end = MIN(id->desc_list + scsi_2btoul(id->length), page_end); + + for (desc = (struct scsi_vpd_id_descriptor *)id->desc_list; + desc->identifier <= desc_buf_end + && desc->identifier + desc->length <= desc_buf_end; + desc = (struct scsi_vpd_id_descriptor *)(desc->identifier + + desc->length)) { + + if (ck_fn == NULL || ck_fn((uint8_t *)desc) != 0) + return (desc->identifier); } - return NULL; + return (NULL); } void @@ -4174,6 +4205,77 @@ scsi_read_write(struct ccb_scsiio *csio, u_int32_t retries, timeout); } +void +scsi_receive_diagnostic_results(struct ccb_scsiio *csio, u_int32_t retries, + void (*cbfcnp)(struct cam_periph *, union ccb*), + uint8_t tag_action, int pcv, uint8_t page_code, + uint8_t *data_ptr, uint16_t allocation_length, + uint8_t sense_len, uint32_t timeout) +{ + struct scsi_receive_diag *scsi_cmd; + + scsi_cmd = (struct scsi_receive_diag *)&csio->cdb_io.cdb_bytes; + memset(scsi_cmd, 0, sizeof(*scsi_cmd)); + scsi_cmd->opcode = RECEIVE_DIAGNOSTIC; + if (pcv) { + scsi_cmd->byte2 |= SRD_PCV; + scsi_cmd->page_code = page_code; + } + scsi_ulto2b(allocation_length, scsi_cmd->length); + + cam_fill_csio(csio, + retries, + cbfcnp, + /*flags*/CAM_DIR_IN, + tag_action, + data_ptr, + allocation_length, + sense_len, + sizeof(*scsi_cmd), + timeout); +} + +void +scsi_send_diagnostic(struct ccb_scsiio *csio, u_int32_t retries, + void (*cbfcnp)(struct cam_periph *, union ccb *), + uint8_t tag_action, int unit_offline, int device_offline, + int self_test, int page_format, int self_test_code, + uint8_t *data_ptr, uint16_t param_list_length, + uint8_t sense_len, uint32_t timeout) +{ + struct scsi_send_diag *scsi_cmd; + + scsi_cmd = (struct scsi_send_diag *)&csio->cdb_io.cdb_bytes; + memset(scsi_cmd, 0, sizeof(*scsi_cmd)); + scsi_cmd->opcode = SEND_DIAGNOSTIC; + + /* + * The default self-test mode control and specific test + * control are mutually exclusive. + */ + if (self_test) + self_test_code = SSD_SELF_TEST_CODE_NONE; + + scsi_cmd->byte2 = ((self_test_code << SSD_SELF_TEST_CODE_SHIFT) + & SSD_SELF_TEST_CODE_MASK) + | (unit_offline ? SSD_UNITOFFL : 0) + | (device_offline ? SSD_DEVOFFL : 0) + | (self_test ? SSD_SELFTEST : 0) + | (page_format ? SSD_PF : 0); + scsi_ulto2b(param_list_length, scsi_cmd->length); + + cam_fill_csio(csio, + retries, + cbfcnp, + /*flags*/param_list_length ? CAM_DIR_OUT : CAM_DIR_NONE, + tag_action, + data_ptr, + param_list_length, + sense_len, + sizeof(*scsi_cmd), + timeout); +} + void scsi_start_stop(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), @@ -4206,7 +4308,6 @@ scsi_start_stop(struct ccb_scsiio *csio, u_int32_t retries, sense_len, sizeof(*scsi_cmd), timeout); - } @@ -4264,6 +4365,66 @@ scsi_static_inquiry_match(caddr_t inqbuffer, caddr_t table_entry) return (-1); } +/** + * Compare two buffers of vpd device descriptors for a match. + * + * \param lhs Pointer to first buffer of descriptors to compare. + * \param lhs_len The length of the first buffer. + * \param rhs Pointer to second buffer of descriptors to compare. + * \param rhs_len The length of the second buffer. + * + * \return 0 on a match, -1 otherwise. + * + * Treat rhs and lhs as arrays of vpd device id descriptors. Walk lhs matching + * agains each element in rhs until all data are exhausted or we have found + * a match. + */ +int +scsi_devid_match(uint8_t *lhs, size_t lhs_len, uint8_t *rhs, size_t rhs_len) +{ + struct scsi_vpd_id_descriptor *lhs_id; + struct scsi_vpd_id_descriptor *lhs_last; + struct scsi_vpd_id_descriptor *rhs_last; + uint8_t *lhs_end; + uint8_t *rhs_end; + + lhs_end = lhs + lhs_len; + rhs_end = rhs + rhs_len; + + /* + * rhs_last and lhs_last are the last posible position of a valid + * descriptor assuming it had a zero length identifier. We use + * these variables to insure we can safely dereference the length + * field in our loop termination tests. + */ + lhs_last = (struct scsi_vpd_id_descriptor *) + (lhs_end - __offsetof(struct scsi_vpd_id_descriptor, identifier)); + rhs_last = (struct scsi_vpd_id_descriptor *) + (rhs_end - __offsetof(struct scsi_vpd_id_descriptor, identifier)); + + lhs_id = (struct scsi_vpd_id_descriptor *)lhs; + while (lhs_id <= lhs_last + && (lhs_id->identifier + lhs_id->length) <= lhs_end) { + struct scsi_vpd_id_descriptor *rhs_id; + + rhs_id = (struct scsi_vpd_id_descriptor *)rhs; + while (rhs_id <= rhs_last + && (rhs_id->identifier + rhs_id->length) <= rhs_end) { + + if (rhs_id->length == lhs_id->length + && memcmp(rhs_id->identifier, lhs_id->identifier, + rhs_id->length) == 0) + return (0); + + rhs_id = (struct scsi_vpd_id_descriptor *) + (rhs_id->identifier + rhs_id->length); + } + lhs_id = (struct scsi_vpd_id_descriptor *) + (lhs_id->identifier + lhs_id->length); + } + return (-1); +} + #ifdef _KERNEL static void init_scsi_delay(void) diff --git a/sys/cam/scsi/scsi_all.h b/sys/cam/scsi/scsi_all.h index 0a7a58f..93b11d5 100644 --- a/sys/cam/scsi/scsi_all.h +++ b/sys/cam/scsi/scsi_all.h @@ -115,6 +115,7 @@ struct scsi_request_sense { u_int8_t opcode; u_int8_t byte2; +#define SRS_DESC 0x01 u_int8_t unused[2]; u_int8_t length; u_int8_t control; @@ -128,17 +129,33 @@ struct scsi_test_unit_ready u_int8_t control; }; -struct scsi_send_diag -{ - u_int8_t opcode; - u_int8_t byte2; -#define SSD_UOL 0x01 -#define SSD_DOL 0x02 -#define SSD_SELFTEST 0x04 -#define SSD_PF 0x10 - u_int8_t unused[1]; - u_int8_t paramlen[2]; - u_int8_t control; +struct scsi_receive_diag { + uint8_t opcode; + uint8_t byte2; +#define SRD_PCV 0x01 + uint8_t page_code; + uint8_t length[2]; + uint8_t control; +}; + +struct scsi_send_diag { + uint8_t opcode; + uint8_t byte2; +#define SSD_UNITOFFL 0x01 +#define SSD_DEVOFFL 0x02 +#define SSD_SELFTEST 0x04 +#define SSD_PF 0x10 +#define SSD_SELF_TEST_CODE_MASK 0xE0 +#define SSD_SELF_TEST_CODE_SHIFT 5 +#define SSD_SELF_TEST_CODE_NONE 0x00 +#define SSD_SELF_TEST_CODE_BG_SHORT 0x01 +#define SSD_SELF_TEST_CODE_BG_EXTENDED 0x02 +#define SSD_SELF_TEST_CODE_BG_ABORT 0x04 +#define SSD_SELF_TEST_CODE_FG_SHORT 0x05 +#define SSD_SELF_TEST_CODE_FG_EXTENDED 0x06 + uint8_t reserved; + uint8_t length[2]; + uint8_t control; }; struct scsi_sense @@ -894,11 +911,12 @@ struct scsi_vpd_id_naa_basic uint8_t naa : 4; uint8_t naa_desig : 4; */ +#define SVPD_ID_NAA_NAA_SHIFT 4 #define SVPD_ID_NAA_IEEE_EXT 0x02 #define SVPD_ID_NAA_LOCAL_REG 0x03 #define SVPD_ID_NAA_IEEE_REG 0x05 #define SVPD_ID_NAA_IEEE_REG_EXT 0x06 - uint8_t naa_data[0]; + uint8_t naa_data[]; }; struct scsi_vpd_id_naa_ieee_extended_id @@ -1322,7 +1340,12 @@ void scsi_print_inquiry(struct scsi_inquiry_data *inq_data); u_int scsi_calc_syncsrate(u_int period_factor); u_int scsi_calc_syncparam(u_int period); -uint8_t * scsi_get_sas_addr(struct scsi_vpd_device_id *id, uint32_t len); + +typedef int (*scsi_devid_checkfn_t)(uint8_t *); +int scsi_devid_is_naa_ieee_reg(uint8_t *bufp); +int scsi_devid_is_sas_target(uint8_t *bufp); +uint8_t * scsi_get_devid(struct scsi_vpd_device_id *id, uint32_t len, + scsi_devid_checkfn_t ck_fn); void scsi_test_unit_ready(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, @@ -1439,6 +1462,22 @@ void scsi_synchronize_cache(struct ccb_scsiio *csio, u_int32_t begin_lba, u_int16_t lb_count, u_int8_t sense_len, u_int32_t timeout); +void scsi_receive_diagnostic_results(struct ccb_scsiio *csio, u_int32_t retries, + void (*cbfcnp)(struct cam_periph *, + union ccb*), + uint8_t tag_action, int pcv, + uint8_t page_code, uint8_t *data_ptr, + uint16_t allocation_length, + uint8_t sense_len, uint32_t timeout); + +void scsi_send_diagnostic(struct ccb_scsiio *csio, u_int32_t retries, + void (*cbfcnp)(struct cam_periph *, union ccb *), + uint8_t tag_action, int unit_offline, + int device_offline, int self_test, int page_format, + int self_test_code, uint8_t *data_ptr, + uint16_t param_list_length, uint8_t sense_len, + uint32_t timeout); + void scsi_read_write(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, int readop, u_int8_t byte2, @@ -1455,6 +1494,8 @@ void scsi_start_stop(struct ccb_scsiio *csio, u_int32_t retries, int scsi_inquiry_match(caddr_t inqbuffer, caddr_t table_entry); int scsi_static_inquiry_match(caddr_t inqbuffer, caddr_t table_entry); +int scsi_devid_match(uint8_t *rhs, size_t rhs_len, + uint8_t *lhs, size_t lhs_len); static __inline void scsi_extract_sense(struct scsi_sense_data *sense, int *error_code, int *sense_key, diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c index 9729878..a436318 100644 --- a/sys/cam/scsi/scsi_da.c +++ b/sys/cam/scsi/scsi_da.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include <sys/eventhandler.h> #include <sys/malloc.h> #include <sys/cons.h> +#include <geom/geom.h> #include <geom/geom_disk.h> #endif /* _KERNEL */ @@ -727,7 +728,8 @@ daclose(struct disk *dp) softc = (struct da_softc *)periph->softc; - if ((softc->quirks & DA_Q_NO_SYNC_CACHE) == 0) { + if ((softc->quirks & DA_Q_NO_SYNC_CACHE) == 0 + && (softc->flags & DA_FLAG_PACK_INVALID) == 0) { union ccb *ccb; ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL); @@ -932,6 +934,25 @@ dadump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t leng return (0); } +static int +dagetattr(struct bio *bp) +{ + int ret = -1; + struct cam_periph *periph; + + if (bp->bio_disk == NULL || bp->bio_disk->d_drv1 == NULL) + return ENXIO; + periph = (struct cam_periph *)bp->bio_disk->d_drv1; + if (periph->path == NULL) + return ENXIO; + + ret = xpt_getattr(bp->bio_data, bp->bio_length, bp->bio_attribute, + periph->path); + if (ret == 0) + bp->bio_completed = bp->bio_length; + return ret; +} + static void dainit(void) { @@ -977,7 +998,8 @@ daoninvalidate(struct cam_periph *periph) bioq_flush(&softc->bio_queue, NULL, ENXIO); disk_gone(softc->disk); - xpt_print(periph->path, "lost device\n"); + xpt_print(periph->path, "lost device - %d outstanding\n", + softc->outstanding_cmds); } static void @@ -1044,6 +1066,20 @@ daasync(void *callback_arg, u_int32_t code, && status != CAM_REQ_INPROG) printf("daasync: Unable to attach to new device " "due to status 0x%x\n", status); + return; + } + case AC_ADVINFO_CHANGED: + { + uintptr_t buftype; + + buftype = (uintptr_t)arg; + if (buftype == CDAI_TYPE_PHYS_PATH) { + struct da_softc *softc; + + softc = periph->softc; + disk_attr_changed(softc->disk, "GEOM::physpath", + M_NOWAIT); + } break; } case AC_SENT_BDR: @@ -1060,12 +1096,12 @@ daasync(void *callback_arg, u_int32_t code, softc->flags |= DA_FLAG_RETRY_UA; LIST_FOREACH(ccbh, &softc->pending_ccbs, periph_links.le) ccbh->ccb_state |= DA_CCB_RETRY_UA; - /* FALLTHROUGH*/ + break; } default: - cam_periph_async(periph, code, path, arg); break; } + cam_periph_async(periph, code, path, arg); } static void @@ -1231,17 +1267,6 @@ daregister(struct cam_periph *periph, void *arg) TASK_INIT(&softc->sysctl_task, 0, dasysctlinit, periph); /* - * Add async callbacks for bus reset and - * bus device reset calls. I don't bother - * checking if this fails as, in most cases, - * the system will function just fine without - * them and the only alternative would be to - * not attach the device on failure. - */ - xpt_register_async(AC_SENT_BDR | AC_BUS_RESET | AC_LOST_DEVICE, - daasync, periph, periph->path); - - /* * Take an exclusive refcount on the periph while dastart is called * to finish the probe. The reference will be dropped in dadone at * the end of probe. @@ -1301,6 +1326,7 @@ daregister(struct cam_periph *periph, void *arg) softc->disk->d_close = daclose; softc->disk->d_strategy = dastrategy; softc->disk->d_dump = dadump; + softc->disk->d_getattr = dagetattr; softc->disk->d_name = "da"; softc->disk->d_drv1 = periph; if (cpi.maxio == 0) @@ -1313,8 +1339,6 @@ daregister(struct cam_periph *periph, void *arg) softc->disk->d_flags = 0; if ((softc->quirks & DA_Q_NO_SYNC_CACHE) == 0) softc->disk->d_flags |= DISKFLAG_CANFLUSHCACHE; - strlcpy(softc->disk->d_ident, cgd->serial_num, - MIN(sizeof(softc->disk->d_ident), cgd->serial_num_len + 1)); cam_strvis(softc->disk->d_descr, cgd->inq_data.vendor, sizeof(cgd->inq_data.vendor), sizeof(softc->disk->d_descr)); strlcat(softc->disk->d_descr, " ", sizeof(softc->disk->d_descr)); @@ -1328,6 +1352,25 @@ daregister(struct cam_periph *periph, void *arg) disk_create(softc->disk, DISK_VERSION); mtx_lock(periph->sim->mtx); + /* + * Add async callbacks for events of interest. + * I don't bother checking if this fails as, + * in most cases, the system will function just + * fine without them and the only alternative + * would be to not attach the device on failure. + */ + xpt_register_async(AC_SENT_BDR | AC_BUS_RESET + | AC_LOST_DEVICE | AC_ADVINFO_CHANGED, + daasync, periph, periph->path); + + /* + * Emit an attribute changed notification just in case + * physical path information arrived before our async + * event handler was registered, but after anyone attaching + * to our disk device polled it. + */ + disk_attr_changed(softc->disk, "GEOM::physpath", M_NOWAIT); + xpt_schedule(periph, CAM_PRIORITY_DEV); return(CAM_REQ_CMP); @@ -1558,7 +1601,7 @@ dadone(struct cam_periph *periph, union ccb *done_ccb) if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { int error; int sf; - + if ((csio->ccb_h.ccb_state & DA_CCB_RETRY_UA) != 0) sf = SF_RETRY_UA; else @@ -1573,8 +1616,17 @@ dadone(struct cam_periph *periph, union ccb *done_ccb) return; } if (error != 0) { + int queued_error; + + /* + * return all queued I/O with EIO, so that + * the client can retry these I/Os in the + * proper order should it attempt to recover. + */ + queued_error = EIO; - if (error == ENXIO) { + if (error == ENXIO + && (softc->flags & DA_FLAG_PACK_INVALID)== 0) { /* * Catastrophic error. Mark our pack as * invalid. @@ -1586,14 +1638,10 @@ dadone(struct cam_periph *periph, union ccb *done_ccb) xpt_print(periph->path, "Invalidating pack\n"); softc->flags |= DA_FLAG_PACK_INVALID; + queued_error = ENXIO; } - - /* - * return all queued I/O with EIO, so that - * the client can retry these I/Os in the - * proper order should it attempt to recover. - */ - bioq_flush(&softc->bio_queue, NULL, EIO); + bioq_flush(&softc->bio_queue, NULL, + queued_error); bp->bio_error = error; bp->bio_resid = bp->bio_bcount; bp->bio_flags |= BIO_ERROR; @@ -1626,6 +1674,11 @@ dadone(struct cam_periph *periph, union ccb *done_ccb) if (softc->outstanding_cmds == 0) softc->flags |= DA_FLAG_WENT_IDLE; + if ((softc->flags & DA_FLAG_PACK_INVALID) != 0) { + xpt_print(periph->path, "oustanding %d\n", + softc->outstanding_cmds); + } + biodone(bp); break; } diff --git a/sys/cam/scsi/scsi_pass.c b/sys/cam/scsi/scsi_pass.c index e7ecb35..a124468 100644 --- a/sys/cam/scsi/scsi_pass.c +++ b/sys/cam/scsi/scsi_pass.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include <sys/errno.h> #include <sys/devicestat.h> #include <sys/proc.h> +#include <sys/taskqueue.h> #include <cam/cam.h> #include <cam/cam_ccb.h> @@ -70,12 +71,14 @@ typedef enum { #define ccb_bp ppriv_ptr1 struct pass_softc { - pass_state state; - pass_flags flags; - u_int8_t pd_type; - union ccb saved_ccb; - struct devstat *device_stats; - struct cdev *dev; + pass_state state; + pass_flags flags; + u_int8_t pd_type; + union ccb saved_ccb; + struct devstat *device_stats; + struct cdev *dev; + struct cdev *alias_dev; + struct task add_physpath_task; }; @@ -88,6 +91,7 @@ static periph_ctor_t passregister; static periph_oninv_t passoninvalidate; static periph_dtor_t passcleanup; static periph_start_t passstart; +static void pass_add_physpath(void *context, int pending); static void passasync(void *callback_arg, u_int32_t code, struct cam_path *path, void *arg); static void passdone(struct cam_periph *periph, @@ -168,17 +172,45 @@ passcleanup(struct cam_periph *periph) if (bootverbose) xpt_print(periph->path, "removing device entry\n"); devstat_remove_entry(softc->device_stats); + cam_periph_unlock(periph); + taskqueue_drain(taskqueue_thread, &softc->add_physpath_task); + /* * passcleanup() is indirectly a d_close method via passclose, * so using destroy_dev(9) directly can result in deadlock. */ destroy_dev_sched(softc->dev); cam_periph_lock(periph); + free(softc, M_DEVBUF); } static void +pass_add_physpath(void *context, int pending) +{ + struct cam_periph *periph; + struct pass_softc *softc; + char *physpath; + + /* + * If we have one, create a devfs alias for our + * physical path. + */ + periph = context; + softc = periph->softc; + physpath = malloc(MAXPATHLEN, M_DEVBUF, M_WAITOK); + if (xpt_getattr(physpath, MAXPATHLEN, + "GEOM::physpath", periph->path) == 0 + && strlen(physpath) != 0) { + + make_dev_physpath_alias(MAKEDEV_WAITOK, &softc->alias_dev, + softc->dev, softc->alias_dev, physpath); + } + free(physpath, M_DEVBUF); +} + +static void passasync(void *callback_arg, u_int32_t code, struct cam_path *path, void *arg) { @@ -219,6 +251,20 @@ passasync(void *callback_arg, u_int32_t code, break; } + case AC_ADVINFO_CHANGED: + { + uintptr_t buftype; + + buftype = (uintptr_t)arg; + if (buftype == CDAI_TYPE_PHYS_PATH) { + struct pass_softc *softc; + + softc = (struct pass_softc *)periph->softc; + taskqueue_enqueue(taskqueue_thread, + &softc->add_physpath_task); + } + break; + } default: cam_periph_async(periph, code, path, arg); break; @@ -292,11 +338,22 @@ passregister(struct cam_periph *periph, void *arg) mtx_lock(periph->sim->mtx); softc->dev->si_drv1 = periph; + TASK_INIT(&softc->add_physpath_task, /*priority*/0, + pass_add_physpath, periph); + + /* + * See if physical path information is already available. + */ + taskqueue_enqueue(taskqueue_thread, &softc->add_physpath_task); + /* - * Add an async callback so that we get - * notified if this device goes away. + * Add an async callback so that we get notified if + * this device goes away or its physical path + * (stored in the advanced info data of the EDT) has + * changed. */ - xpt_register_async(AC_LOST_DEVICE, passasync, periph, periph->path); + xpt_register_async(AC_LOST_DEVICE | AC_ADVINFO_CHANGED, + passasync, periph, periph->path); if (bootverbose) xpt_announce_periph(periph, NULL); @@ -548,8 +605,8 @@ passsendccb(struct cam_periph *periph, union ccb *ccb, union ccb *inccb) && ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE)) || (ccb->ccb_h.func_code == XPT_DEV_MATCH) || (ccb->ccb_h.func_code == XPT_SMP_IO) - || ((ccb->ccb_h.func_code == XPT_GDEV_ADVINFO) - && (ccb->cgdai.bufsiz > 0)))) { + || ((ccb->ccb_h.func_code == XPT_DEV_ADVINFO) + && (ccb->cdai.bufsiz > 0)))) { bzero(&mapinfo, sizeof(mapinfo)); diff --git a/sys/cam/scsi/scsi_ses.h b/sys/cam/scsi/scsi_ses.h index 9925c1f..a52d517 100644 --- a/sys/cam/scsi/scsi_ses.h +++ b/sys/cam/scsi/scsi_ses.h @@ -101,6 +101,7 @@ typedef struct { #define SESTYP_UPS 0x0b #define SESTYP_DISPLAY 0x0c #define SESTYP_KEYPAD 0x0d +#define SESTYP_ENCLOSURE 0x0e #define SESTYP_SCSIXVR 0x0f #define SESTYP_LANGUAGE 0x10 #define SESTYP_COMPORT 0x11 @@ -109,6 +110,9 @@ typedef struct { #define SESTYP_SCSI_TGT 0x14 #define SESTYP_SCSI_INI 0x15 #define SESTYP_SUBENC 0x16 +#define SESTYP_ARRAY 0x17 +#define SESTYP_SASEXPANDER 0x18 +#define SESTYP_SASCONNECTOR 0x19 /* * Overall Enclosure Status diff --git a/sys/cam/scsi/scsi_xpt.c b/sys/cam/scsi/scsi_xpt.c index 2a38128..1b507ca 100644 --- a/sys/cam/scsi/scsi_xpt.c +++ b/sys/cam/scsi/scsi_xpt.c @@ -542,6 +542,7 @@ static const int scsi_quirk_table_size = static cam_status proberegister(struct cam_periph *periph, void *arg); static void probeschedule(struct cam_periph *probe_periph); +static int device_has_vpd(struct cam_ed *device, uint8_t page_id); static void probestart(struct cam_periph *periph, union ccb *start_ccb); static void proberequestdefaultnegotiation(struct cam_periph *periph); static int proberequestbackoff(struct cam_periph *periph, @@ -1460,7 +1461,7 @@ probedone(struct cam_periph *periph, union ccb *done_ccb) path->device->device_id = (uint8_t *)devid; } } else if (cam_periph_error(done_ccb, 0, - SF_RETRY_UA|SF_NO_PRINT, + SF_RETRY_UA, &softc->saved_ccb) == ERESTART) { return; } else if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) { @@ -1506,9 +1507,9 @@ probe_device_check: (u_int8_t *)malloc((serial_buf->length + 1), M_CAMXPT, M_NOWAIT); if (path->device->serial_num != NULL) { - bcopy(serial_buf->serial_num, - path->device->serial_num, - serial_buf->length); + memcpy(path->device->serial_num, + serial_buf->serial_num, + serial_buf->length); path->device->serial_num_len = serial_buf->length; path->device->serial_num[serial_buf->length] @@ -2433,28 +2434,77 @@ scsi_devise_transport(struct cam_path *path) } static void -scsi_getdev_advinfo(union ccb *start_ccb) +scsi_dev_advinfo(union ccb *start_ccb) { struct cam_ed *device; - struct ccb_getdev_advinfo *cgdai; + struct ccb_dev_advinfo *cdai; off_t amt; device = start_ccb->ccb_h.path->device; - cgdai = &start_ccb->cgdai; - switch(cgdai->buftype) { - case CGDAI_TYPE_SCSI_DEVID: - cgdai->provsiz = device->device_id_len; + cdai = &start_ccb->cdai; + switch(cdai->buftype) { + case CDAI_TYPE_SCSI_DEVID: + if (cdai->flags & CDAI_FLAG_STORE) + break; + cdai->provsiz = device->device_id_len; if (device->device_id_len == 0) break; amt = device->device_id_len; - if (cgdai->provsiz > cgdai->bufsiz) - amt = cgdai->bufsiz; - bcopy(device->device_id, cgdai->buf, amt); + if (cdai->provsiz > cdai->bufsiz) + amt = cdai->bufsiz; + memcpy(cdai->buf, device->device_id, amt); + break; + case CDAI_TYPE_SERIAL_NUM: + if (cdai->flags & CDAI_FLAG_STORE) + break; + cdai->provsiz = device->serial_num_len; + if (device->serial_num_len == 0) + break; + amt = device->serial_num_len; + if (cdai->provsiz > cdai->bufsiz) + amt = cdai->bufsiz; + memcpy(cdai->buf, device->serial_num, amt); + break; + case CDAI_TYPE_PHYS_PATH: + if (cdai->flags & CDAI_FLAG_STORE) { + if (device->physpath != NULL) + free(device->physpath, M_CAMXPT); + device->physpath_len = cdai->bufsiz; + /* Clear existing buffer if zero length */ + if (cdai->bufsiz == 0) + break; + device->physpath = malloc(cdai->bufsiz, M_CAMXPT, M_NOWAIT); + if (device->physpath == NULL) { + start_ccb->ccb_h.status = CAM_REQ_ABORTED; + return; + } + memcpy(device->physpath, cdai->buf, cdai->bufsiz); + } else { + cdai->provsiz = device->physpath_len; + if (device->physpath_len == 0) + break; + amt = device->physpath_len; + if (cdai->provsiz > cdai->bufsiz) + amt = cdai->bufsiz; + memcpy(cdai->buf, device->physpath, amt); + } break; default: break; } start_ccb->ccb_h.status = CAM_REQ_CMP; + + if (cdai->flags & CDAI_FLAG_STORE) { + int owned; + + owned = mtx_owned(start_ccb->ccb_h.path->bus->sim->mtx); + if (owned == 0) + mtx_lock(start_ccb->ccb_h.path->bus->sim->mtx); + xpt_async(AC_ADVINFO_CHANGED, start_ccb->ccb_h.path, + (void *)(uintptr_t)cdai->buftype); + if (owned == 0) + mtx_unlock(start_ccb->ccb_h.path->bus->sim->mtx); + } } static void @@ -2486,9 +2536,9 @@ scsi_action(union ccb *start_ccb) (*(sim->sim_action))(sim, start_ccb); break; } - case XPT_GDEV_ADVINFO: + case XPT_DEV_ADVINFO: { - scsi_getdev_advinfo(start_ccb); + scsi_dev_advinfo(start_ccb); break; } default: diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris.c b/sys/cddl/compat/opensolaris/kern/opensolaris.c index 54118eb..640b2f3 100644 --- a/sys/cddl/compat/opensolaris/kern/opensolaris.c +++ b/sys/cddl/compat/opensolaris/kern/opensolaris.c @@ -40,6 +40,7 @@ cpu_core_t cpu_core[MAXCPU]; kmutex_t cpu_lock; solaris_cpu_t solaris_cpu[MAXCPU]; +int nsec_per_tick; /* * OpenSolaris subsystem initialisation. @@ -60,6 +61,8 @@ opensolaris_load(void *dummy) } mutex_init(&cpu_lock, "OpenSolaris CPU lock", MUTEX_DEFAULT, NULL); + + nsec_per_tick = NANOSEC / hz; } SYSINIT(opensolaris_register, SI_SUB_OPENSOLARIS, SI_ORDER_FIRST, opensolaris_load, NULL); diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris_sysevent.c b/sys/cddl/compat/opensolaris/kern/opensolaris_sysevent.c index dea618c..c6ae497 100644 --- a/sys/cddl/compat/opensolaris/kern/opensolaris_sysevent.c +++ b/sys/cddl/compat/opensolaris/kern/opensolaris_sysevent.c @@ -113,8 +113,10 @@ sysevent_add_attr(sysevent_attr_list_t **ev_attr_list, char *name, } break; default: +#if 0 printf("%s: type %d is not implemented\n", __func__, se_value->value_type); +#endif break; } @@ -286,8 +288,10 @@ log_sysevent(sysevent_t *evp, int flag, sysevent_id_t *eid) break; } default: +#if 0 printf("%s: type %d is not implemented\n", __func__, nvpair_type(elem)); +#endif break; } } diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris_taskq.c b/sys/cddl/compat/opensolaris/kern/opensolaris_taskq.c index 5a20488..a74f795 100644 --- a/sys/cddl/compat/opensolaris/kern/opensolaris_taskq.c +++ b/sys/cddl/compat/opensolaris/kern/opensolaris_taskq.c @@ -147,9 +147,7 @@ taskq_run_safe(void *arg, int pending __unused) { struct ostask *task = arg; - ASSERT(task->ost_magic == TASKQ_MAGIC); task->ost_func(task->ost_arg); - task->ost_magic = 0; } taskqid_t @@ -158,15 +156,12 @@ taskq_dispatch_safe(taskq_t *tq, task_func_t func, void *arg, u_int flags, { int prio; - ASSERT(task->ost_magic != TASKQ_MAGIC); - /* * If TQ_FRONT is given, we want higher priority for this task, so it * can go at the front of the queue. */ prio = !!(flags & TQ_FRONT); - task->ost_magic = TASKQ_MAGIC; task->ost_func = func; task->ost_arg = arg; diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c b/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c index be9f4ec..a266eca 100644 --- a/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c +++ b/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c @@ -172,6 +172,11 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath, */ mp->mnt_flag |= MNT_RDONLY; /* + * We don't want snapshots to allow access to vulnerable setuid + * programs, so we turn off setuid when mounting snapshots. + */ + mp->mnt_flag |= MNT_NOSUID; + /* * We don't want snapshots to be visible in regular * mount(8) and df(1) output. */ diff --git a/sys/cddl/compat/opensolaris/sys/atomic.h b/sys/cddl/compat/opensolaris/sys/atomic.h index af9cc5d..f34d77e 100644 --- a/sys/cddl/compat/opensolaris/sys/atomic.h +++ b/sys/cddl/compat/opensolaris/sys/atomic.h @@ -40,8 +40,6 @@ extern void atomic_add_64(volatile uint64_t *target, int64_t delta); extern void atomic_dec_64(volatile uint64_t *target); #endif -#ifndef __LP64__ -#endif #ifndef __sparc64__ extern uint32_t atomic_cas_32(volatile uint32_t *target, uint32_t cmp, uint32_t newval); diff --git a/sys/cddl/compat/opensolaris/sys/kstat.h b/sys/cddl/compat/opensolaris/sys/kstat.h index 9df4965..d73bd22 100644 --- a/sys/cddl/compat/opensolaris/sys/kstat.h +++ b/sys/cddl/compat/opensolaris/sys/kstat.h @@ -58,7 +58,7 @@ typedef struct kstat_named { } value; } kstat_named_t; -kstat_t *kstat_create(char *module, int instance, char *name, char *class, +kstat_t *kstat_create(char *module, int instance, char *name, char *cls, uchar_t type, ulong_t ndata, uchar_t flags); void kstat_install(kstat_t *ksp); void kstat_delete(kstat_t *ksp); diff --git a/sys/cddl/compat/opensolaris/sys/taskq.h b/sys/cddl/compat/opensolaris/sys/taskq.h index eedc4da..ffe70ca 100644 --- a/sys/cddl/compat/opensolaris/sys/taskq.h +++ b/sys/cddl/compat/opensolaris/sys/taskq.h @@ -35,7 +35,6 @@ struct ostask { struct task ost_task; task_func_t *ost_func; void *ost_arg; - int ost_magic; }; taskqid_t taskq_dispatch_safe(taskq_t *tq, task_func_t func, void *arg, diff --git a/sys/cddl/compat/opensolaris/sys/time.h b/sys/cddl/compat/opensolaris/sys/time.h index 05db50e..8e8a99d 100644 --- a/sys/cddl/compat/opensolaris/sys/time.h +++ b/sys/cddl/compat/opensolaris/sys/time.h @@ -62,8 +62,21 @@ gethrtime(void) { #define gethrestime(ts) getnanotime(ts) #define gethrtime_waitfree() gethrtime() -#define ddi_get_lbolt() ((gethrtime() * hz) / NANOSEC) -#define ddi_get_lbolt64() (int64_t)((gethrtime() * hz) / NANOSEC) +extern int nsec_per_tick; /* nanoseconds per clock tick */ + +static __inline int64_t +ddi_get_lbolt64(void) +{ + + return (gethrtime() / nsec_per_tick); +} + +static __inline clock_t +ddi_get_lbolt(void) +{ + + return (ddi_get_lbolt64()); +} #else diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c index 942636b..130c918 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c @@ -500,9 +500,11 @@ spa_history_log_version(spa_t *spa, history_internal_events_t event) utsname.nodename, utsname.release, utsname.version, utsname.machine); } +#if 0 cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", event == LOG_POOL_IMPORT ? "imported" : event == LOG_POOL_CREATE ? "created" : "accessed", (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION); #endif +#endif } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h index 8f189c6..5265222 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h @@ -37,8 +37,8 @@ extern "C" { typedef struct arc_buf_hdr arc_buf_hdr_t; typedef struct arc_buf arc_buf_t; -typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *private); -typedef int arc_evict_func_t(void *private); +typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv); +typedef int arc_evict_func_t(void *priv); /* generic arc_done_func_t's which you can use */ arc_done_func_t arc_bcopy_func; @@ -103,17 +103,17 @@ int arc_referenced(arc_buf_t *buf); #endif int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_buf_t *pbuf, - arc_done_func_t *done, void *private, int priority, int zio_flags, + arc_done_func_t *done, void *priv, int priority, int zio_flags, uint32_t *arc_flags, const zbookmark_t *zb); int arc_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bp, - arc_done_func_t *done, void *private, int priority, int flags, + arc_done_func_t *done, void *priv, int priority, int flags, uint32_t *arc_flags, const zbookmark_t *zb); zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp, - arc_done_func_t *ready, arc_done_func_t *done, void *private, + arc_done_func_t *ready, arc_done_func_t *done, void *priv, int priority, int zio_flags, const zbookmark_t *zb); -void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private); +void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *priv); int arc_buf_evict(arc_buf_t *buf); void arc_flush(spa_t *spa); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/ddt.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/ddt.h index 9724d6e..405622b 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/ddt.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/ddt.h @@ -169,15 +169,15 @@ typedef struct ddt_ops { #define DDT_NAMELEN 80 extern void ddt_object_name(ddt_t *ddt, enum ddt_type type, - enum ddt_class class, char *name); + enum ddt_class cls, char *name); extern int ddt_object_walk(ddt_t *ddt, enum ddt_type type, - enum ddt_class class, uint64_t *walk, ddt_entry_t *dde); + enum ddt_class cls, uint64_t *walk, ddt_entry_t *dde); extern uint64_t ddt_object_count(ddt_t *ddt, enum ddt_type type, - enum ddt_class class); + enum ddt_class cls); extern int ddt_object_info(ddt_t *ddt, enum ddt_type type, - enum ddt_class class, dmu_object_info_t *); + enum ddt_class cls, dmu_object_info_t *); extern boolean_t ddt_object_exists(ddt_t *ddt, enum ddt_type type, - enum ddt_class class); + enum ddt_class cls); extern void ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp, uint64_t txg); @@ -235,7 +235,7 @@ extern void ddt_unload(spa_t *spa); extern void ddt_sync(spa_t *spa, uint64_t txg); extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde); extern int ddt_object_update(ddt_t *ddt, enum ddt_type type, - enum ddt_class class, ddt_entry_t *dde, dmu_tx_t *tx); + enum ddt_class cls, ddt_entry_t *dde, dmu_tx_t *tx); extern const ddt_ops_t ddt_zap_ops; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h index 7d25bd7..57725b5 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h @@ -126,10 +126,10 @@ void dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp); void dsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp); int dsl_read(zio_t *pio, spa_t *spa, const blkptr_t *bpp, arc_buf_t *pbuf, - arc_done_func_t *done, void *private, int priority, int zio_flags, + arc_done_func_t *done, void *priv, int priority, int zio_flags, uint32_t *arc_flags, const zbookmark_t *zb); int dsl_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bpp, - arc_done_func_t *done, void *private, int priority, int zio_flags, + arc_done_func_t *done, void *priv, int priority, int zio_flags, uint32_t *arc_flags, const zbookmark_t *zb); void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx); void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h index 23d48c8..9cdec18 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h @@ -655,7 +655,7 @@ extern void spa_history_log_version(spa_t *spa, history_internal_events_t evt); /* error handling */ struct zbookmark; extern void spa_log_error(spa_t *spa, zio_t *zio); -extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd, +extern void zfs_ereport_post(const char *cls, spa_t *spa, vdev_t *vd, zio_t *zio, uint64_t stateoroffset, uint64_t length); extern void zfs_post_remove(spa_t *spa, vdev_t *vd); extern void zfs_post_state_change(spa_t *spa, vdev_t *vd); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h index 63b9c57..f7e44aa 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h @@ -115,6 +115,87 @@ typedef enum drr_headertype { /* * zfs ioctl command structure */ +struct drr_begin { + uint64_t drr_magic; + uint64_t drr_versioninfo; /* was drr_version */ + uint64_t drr_creation_time; + dmu_objset_type_t drr_type; + uint32_t drr_flags; + uint64_t drr_toguid; + uint64_t drr_fromguid; + char drr_toname[MAXNAMELEN]; +}; + +struct drr_end { + zio_cksum_t drr_checksum; + uint64_t drr_toguid; +}; + +struct drr_object { + uint64_t drr_object; + dmu_object_type_t drr_type; + dmu_object_type_t drr_bonustype; + uint32_t drr_blksz; + uint32_t drr_bonuslen; + uint8_t drr_checksumtype; + uint8_t drr_compress; + uint8_t drr_pad[6]; + uint64_t drr_toguid; + /* bonus content follows */ +}; + +struct drr_freeobjects { + uint64_t drr_firstobj; + uint64_t drr_numobjs; + uint64_t drr_toguid; +}; + +struct drr_write { + uint64_t drr_object; + dmu_object_type_t drr_type; + uint32_t drr_pad; + uint64_t drr_offset; + uint64_t drr_length; + uint64_t drr_toguid; + uint8_t drr_checksumtype; + uint8_t drr_checksumflags; + uint8_t drr_pad2[6]; + ddt_key_t drr_key; /* deduplication key */ + /* content follows */ +}; + +struct drr_free { + uint64_t drr_object; + uint64_t drr_offset; + uint64_t drr_length; + uint64_t drr_toguid; +}; + +struct drr_write_byref { + /* where to put the data */ + uint64_t drr_object; + uint64_t drr_offset; + uint64_t drr_length; + uint64_t drr_toguid; + /* where to find the prior copy of the data */ + uint64_t drr_refguid; + uint64_t drr_refobject; + uint64_t drr_refoffset; + /* properties of the data */ + uint8_t drr_checksumtype; + uint8_t drr_checksumflags; + uint8_t drr_pad2[6]; + ddt_key_t drr_key; /* deduplication key */ +}; + +struct drr_spill { + uint64_t drr_object; + uint64_t drr_length; + uint64_t drr_toguid; + uint64_t drr_pad[4]; /* needed for crypto */ + /* spill data follows */ +}; + typedef struct dmu_replay_record { enum { DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS, @@ -123,79 +204,14 @@ typedef struct dmu_replay_record { } drr_type; uint32_t drr_payloadlen; union { - struct drr_begin { - uint64_t drr_magic; - uint64_t drr_versioninfo; /* was drr_version */ - uint64_t drr_creation_time; - dmu_objset_type_t drr_type; - uint32_t drr_flags; - uint64_t drr_toguid; - uint64_t drr_fromguid; - char drr_toname[MAXNAMELEN]; - } drr_begin; - struct drr_end { - zio_cksum_t drr_checksum; - uint64_t drr_toguid; - } drr_end; - struct drr_object { - uint64_t drr_object; - dmu_object_type_t drr_type; - dmu_object_type_t drr_bonustype; - uint32_t drr_blksz; - uint32_t drr_bonuslen; - uint8_t drr_checksumtype; - uint8_t drr_compress; - uint8_t drr_pad[6]; - uint64_t drr_toguid; - /* bonus content follows */ - } drr_object; - struct drr_freeobjects { - uint64_t drr_firstobj; - uint64_t drr_numobjs; - uint64_t drr_toguid; - } drr_freeobjects; - struct drr_write { - uint64_t drr_object; - dmu_object_type_t drr_type; - uint32_t drr_pad; - uint64_t drr_offset; - uint64_t drr_length; - uint64_t drr_toguid; - uint8_t drr_checksumtype; - uint8_t drr_checksumflags; - uint8_t drr_pad2[6]; - ddt_key_t drr_key; /* deduplication key */ - /* content follows */ - } drr_write; - struct drr_free { - uint64_t drr_object; - uint64_t drr_offset; - uint64_t drr_length; - uint64_t drr_toguid; - } drr_free; - struct drr_write_byref { - /* where to put the data */ - uint64_t drr_object; - uint64_t drr_offset; - uint64_t drr_length; - uint64_t drr_toguid; - /* where to find the prior copy of the data */ - uint64_t drr_refguid; - uint64_t drr_refobject; - uint64_t drr_refoffset; - /* properties of the data */ - uint8_t drr_checksumtype; - uint8_t drr_checksumflags; - uint8_t drr_pad2[6]; - ddt_key_t drr_key; /* deduplication key */ - } drr_write_byref; - struct drr_spill { - uint64_t drr_object; - uint64_t drr_length; - uint64_t drr_toguid; - uint64_t drr_pad[4]; /* needed for crypto */ - /* spill data follows */ - } drr_spill; + struct drr_begin drr_begin; + struct drr_end drr_end; + struct drr_object drr_object; + struct drr_freeobjects drr_freeobjects; + struct drr_write drr_write; + struct drr_free drr_free; + struct drr_write_byref drr_write_byref; + struct drr_spill drr_spill; } drr_u; } dmu_replay_record_t; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h index 355f560..4a4e843 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h @@ -421,28 +421,27 @@ struct zio { #ifdef _KERNEL /* FreeBSD only. */ - struct ostask io_task_issue; - struct ostask io_task_interrupt; + struct ostask io_task; #endif }; extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, - zio_done_func_t *done, void *private, enum zio_flag flags); + zio_done_func_t *done, void *priv, enum zio_flag flags); extern zio_t *zio_root(spa_t *spa, - zio_done_func_t *done, void *private, enum zio_flag flags); + zio_done_func_t *done, void *priv, enum zio_flag flags); extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data, - uint64_t size, zio_done_func_t *done, void *private, + uint64_t size, zio_done_func_t *done, void *priv, int priority, enum zio_flag flags, const zbookmark_t *zb); extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, void *data, uint64_t size, const zio_prop_t *zp, - zio_done_func_t *ready, zio_done_func_t *done, void *private, + zio_done_func_t *ready, zio_done_func_t *done, void *priv, int priority, enum zio_flag flags, const zbookmark_t *zb); extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, - void *data, uint64_t size, zio_done_func_t *done, void *private, + void *data, uint64_t size, zio_done_func_t *done, void *priv, int priority, enum zio_flag flags, zbookmark_t *zb); extern void zio_write_override(zio_t *zio, blkptr_t *bp, int copies); @@ -451,19 +450,20 @@ extern void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp); extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, - zio_done_func_t *done, void *private, enum zio_flag flags); + zio_done_func_t *done, void *priv, enum zio_flag flags); extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, - zio_done_func_t *done, void *private, int priority, enum zio_flag flags); + zio_done_func_t *done, void *priv, int priority, + enum zio_flag flags); extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, void *data, int checksum, - zio_done_func_t *done, void *private, int priority, enum zio_flag flags, + zio_done_func_t *done, void *priv, int priority, enum zio_flag flags, boolean_t labels); extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, void *data, int checksum, - zio_done_func_t *done, void *private, int priority, enum zio_flag flags, + zio_done_func_t *done, void *priv, int priority, enum zio_flag flags, boolean_t labels); extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, @@ -494,11 +494,11 @@ extern void zio_resubmit_stage_async(void *); extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd, uint64_t offset, void *data, uint64_t size, int type, int priority, - enum zio_flag flags, zio_done_func_t *done, void *private); + enum zio_flag flags, zio_done_func_t *done, void *priv); extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, void *data, uint64_t size, int type, int priority, - enum zio_flag flags, zio_done_func_t *done, void *private); + enum zio_flag flags, zio_done_func_t *done, void *priv); extern void zio_vdev_io_bypass(zio_t *zio); extern void zio_vdev_io_reissue(zio_t *zio); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c index bae9071..6ff9339 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c @@ -239,15 +239,20 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, return (ENOENT); } if (dl == NULL) { + size_t namesize; + /* * Allocate a new dirlock and add it to the list. */ - dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP); + namesize = strlen(name) + 1; + dl = kmem_alloc(sizeof (zfs_dirlock_t) + namesize, + KM_SLEEP); cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL); - dl->dl_name = name; + dl->dl_name = (char *)(dl + 1); + bcopy(name, dl->dl_name, namesize); dl->dl_sharecnt = 0; dl->dl_namelock = 0; - dl->dl_namesize = 0; + dl->dl_namesize = namesize; dl->dl_dzp = dzp; dl->dl_next = dzp->z_dirlocks; dzp->z_dirlocks = dl; @@ -264,20 +269,8 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, if (flag & ZHAVELOCK) dl->dl_namelock = 1; - if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) { - /* - * We're the second shared reference to dl. Make a copy of - * dl_name in case the first thread goes away before we do. - * Note that we initialize the new name before storing its - * pointer into dl_name, because the first thread may load - * dl->dl_name at any time. He'll either see the old value, - * which is his, or the new shared copy; either is OK. - */ - dl->dl_namesize = strlen(dl->dl_name) + 1; - name = kmem_alloc(dl->dl_namesize, KM_SLEEP); - bcopy(dl->dl_name, name, dl->dl_namesize); - dl->dl_name = name; - } + if (flag & ZSHARED) + dl->dl_sharecnt++; mutex_exit(&dzp->z_lock); @@ -361,10 +354,8 @@ zfs_dirent_unlock(zfs_dirlock_t *dl) cv_broadcast(&dl->dl_cv); mutex_exit(&dzp->z_lock); - if (dl->dl_namesize != 0) - kmem_free(dl->dl_name, dl->dl_namesize); cv_destroy(&dl->dl_cv); - kmem_free(dl, sizeof (*dl)); + kmem_free(dl, sizeof (*dl) + dl->dl_namesize); } /* diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c index 5e968b5..9a04344 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c @@ -1068,19 +1068,9 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline) spa_t *spa = zio->io_spa; zio_type_t t = zio->io_type; int flags = TQ_SLEEP | (cutinline ? TQ_FRONT : 0); -#ifdef _KERNEL - struct ostask *task; -#endif ASSERT(q == ZIO_TASKQ_ISSUE || q == ZIO_TASKQ_INTERRUPT); -#ifdef _KERNEL - if (q == ZIO_TASKQ_ISSUE) - task = &zio->io_task_issue; - else /* if (q == ZIO_TASKQ_INTERRUPT) */ - task = &zio->io_task_interrupt; -#endif - /* * If we're a config writer or a probe, the normal issue and * interrupt threads may all be blocked waiting for the config lock. @@ -1105,7 +1095,7 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline) ASSERT3U(q, <, ZIO_TASKQ_TYPES); #ifdef _KERNEL (void) taskq_dispatch_safe(spa->spa_zio_taskq[t][q], - (task_func_t *)zio_execute, zio, flags, task); + (task_func_t *)zio_execute, zio, flags, &zio->io_task); #else (void) taskq_dispatch(spa->spa_zio_taskq[t][q], (task_func_t *)zio_execute, zio, flags); @@ -2904,7 +2894,7 @@ zio_done(zio_t *zio) (void) taskq_dispatch_safe( spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE], (task_func_t *)zio_reexecute, zio, TQ_SLEEP, - &zio->io_task_issue); + &zio->io_task); #else (void) taskq_dispatch( spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE], diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/ctf_api.h b/sys/cddl/contrib/opensolaris/uts/common/sys/ctf_api.h index cd4caaa..b810f07 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/sys/ctf_api.h +++ b/sys/cddl/contrib/opensolaris/uts/common/sys/ctf_api.h @@ -65,7 +65,7 @@ typedef long ctf_id_t; * filling in ctf_sect_t structures and passing them to ctf_bufopen(): */ typedef struct ctf_sect { - char *cts_name; /* section name (if any) */ + const char *cts_name; /* section name (if any) */ ulong_t cts_type; /* section type (ELF SHT_... value) */ ulong_t cts_flags; /* section flags (ELF SHF_... value) */ #if defined(sun) diff --git a/sys/cddl/dev/cyclic/i386/cyclic_machdep.c b/sys/cddl/dev/cyclic/i386/cyclic_machdep.c index 6f93663..9ba2fd3 100644 --- a/sys/cddl/dev/cyclic/i386/cyclic_machdep.c +++ b/sys/cddl/dev/cyclic/i386/cyclic_machdep.c @@ -123,7 +123,9 @@ reprogram(cyb_arg_t arg __unused, hrtime_t exp) static void xcall(cyb_arg_t arg __unused, cpu_t *c, cyc_func_t func, void *param) { + cpuset_t cpus; - smp_rendezvous_cpus((cpumask_t)1 << c->cpuid, + CPU_SETOF(c->cpuid, &cpus); + smp_rendezvous_cpus(cpus, smp_no_rendevous_barrier, func, smp_no_rendevous_barrier, param); } diff --git a/sys/cddl/dev/dtrace/amd64/dtrace_subr.c b/sys/cddl/dev/dtrace/amd64/dtrace_subr.c index a081f67..0b86eac 100644 --- a/sys/cddl/dev/dtrace/amd64/dtrace_subr.c +++ b/sys/cddl/dev/dtrace/amd64/dtrace_subr.c @@ -113,12 +113,12 @@ dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit)) void dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg) { - cpumask_t cpus; + cpuset_t cpus; if (cpu == DTRACE_CPUALL) cpus = all_cpus; else - cpus = (cpumask_t)1 << cpu; + CPU_SETOF(cpu, &cpus); smp_rendezvous_cpus(cpus, smp_no_rendevous_barrier, func, smp_no_rendevous_barrier, arg); @@ -374,7 +374,7 @@ dtrace_gethrtime_init(void *arg) { struct pcpu *pc; uint64_t tsc_f; - cpumask_t map; + cpuset_t map; int i; /* @@ -412,7 +412,8 @@ dtrace_gethrtime_init(void *arg) continue; pc = pcpu_find(i); - map = PCPU_GET(cpumask) | pc->pc_cpumask; + map = PCPU_GET(cpumask); + CPU_OR(&map, &pc->pc_cpumask); smp_rendezvous_cpus(map, NULL, dtrace_gethrtime_init_cpu, diff --git a/sys/cddl/dev/dtrace/i386/dtrace_subr.c b/sys/cddl/dev/dtrace/i386/dtrace_subr.c index 2753ffc..412fc38 100644 --- a/sys/cddl/dev/dtrace/i386/dtrace_subr.c +++ b/sys/cddl/dev/dtrace/i386/dtrace_subr.c @@ -30,6 +30,7 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/types.h> +#include <sys/cpuset.h> #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/kmem.h> @@ -113,12 +114,12 @@ dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit)) void dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg) { - cpumask_t cpus; + cpuset_t cpus; if (cpu == DTRACE_CPUALL) cpus = all_cpus; else - cpus = (cpumask_t)1 << cpu; + CPU_SETOF(cpu, &cpus); smp_rendezvous_cpus(cpus, smp_no_rendevous_barrier, func, smp_no_rendevous_barrier, arg); @@ -372,9 +373,9 @@ dtrace_gethrtime_init_cpu(void *arg) static void dtrace_gethrtime_init(void *arg) { + cpuset_t map; struct pcpu *pc; uint64_t tsc_f; - cpumask_t map; int i; /* @@ -412,7 +413,8 @@ dtrace_gethrtime_init(void *arg) continue; pc = pcpu_find(i); - map = PCPU_GET(cpumask) | pc->pc_cpumask; + map = PCPU_GET(cpumask); + CPU_OR(&map, &pc->pc_cpumask); smp_rendezvous_cpus(map, NULL, dtrace_gethrtime_init_cpu, diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c index 23985d3..5b6e895 100644 --- a/sys/compat/freebsd32/freebsd32_misc.c +++ b/sys/compat/freebsd32/freebsd32_misc.c @@ -1416,6 +1416,19 @@ freebsd32_pwrite(struct thread *td, struct freebsd32_pwrite_args *uap) return (pwrite(td, &ap)); } +#ifdef COMPAT_43 +int +ofreebsd32_lseek(struct thread *td, struct ofreebsd32_lseek_args *uap) +{ + struct lseek_args nuap; + + nuap.fd = uap->fd; + nuap.offset = uap->offset; + nuap.whence = uap->whence; + return (lseek(td, &nuap)); +} +#endif + int freebsd32_lseek(struct thread *td, struct freebsd32_lseek_args *uap) { diff --git a/sys/compat/freebsd32/freebsd32_proto.h b/sys/compat/freebsd32/freebsd32_proto.h index bfc142d..c10c6a8 100644 --- a/sys/compat/freebsd32/freebsd32_proto.h +++ b/sys/compat/freebsd32/freebsd32_proto.h @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD + * created from FreeBSD: head/sys/compat/freebsd32/syscalls.master 223166 2011-06-16 22:05:56Z kib */ #ifndef _FREEBSD32_SYSPROTO_H_ @@ -690,6 +690,11 @@ int freebsd32_posix_fallocate(struct thread *, struct freebsd32_posix_fallocate_ #if !defined(PAD64_REQUIRED) && defined(__powerpc__) #define PAD64_REQUIRED #endif +struct ofreebsd32_lseek_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char offset_l_[PADL_(int)]; int offset; char offset_r_[PADR_(int)]; + char whence_l_[PADL_(int)]; int whence; char whence_r_[PADR_(int)]; +}; struct ofreebsd32_stat_args { char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; char ub_l_[PADL_(struct ostat32 *)]; struct ostat32 * ub; char ub_r_[PADR_(struct ostat32 *)]; @@ -747,6 +752,7 @@ struct ofreebsd32_getdirentries_args { #ifdef PAD64_REQUIRED #else #endif +int ofreebsd32_lseek(struct thread *, struct ofreebsd32_lseek_args *); int ofreebsd32_stat(struct thread *, struct ofreebsd32_stat_args *); int ofreebsd32_lstat(struct thread *, struct ofreebsd32_lstat_args *); int ofreebsd32_sigaction(struct thread *, struct ofreebsd32_sigaction_args *); @@ -923,6 +929,7 @@ int freebsd7_freebsd32_shmctl(struct thread *, struct freebsd7_freebsd32_shmctl_ #define FREEBSD32_SYS_AUE_freebsd32_wait4 AUE_WAIT4 #define FREEBSD32_SYS_AUE_freebsd4_freebsd32_getfsstat AUE_GETFSSTAT +#define FREEBSD32_SYS_AUE_ofreebsd32_lseek AUE_LSEEK #define FREEBSD32_SYS_AUE_freebsd32_recvmsg AUE_RECVMSG #define FREEBSD32_SYS_AUE_freebsd32_sendmsg AUE_SENDMSG #define FREEBSD32_SYS_AUE_freebsd32_recvfrom AUE_RECVFROM diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h index 9fcbdf0..8903338 100644 --- a/sys/compat/freebsd32/freebsd32_syscall.h +++ b/sys/compat/freebsd32/freebsd32_syscall.h @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD + * created from FreeBSD: head/sys/compat/freebsd32/syscalls.master 223166 2011-06-16 22:05:56Z kib */ #define FREEBSD32_SYS_syscall 0 @@ -25,7 +25,7 @@ #define FREEBSD32_SYS_chown 16 #define FREEBSD32_SYS_break 17 #define FREEBSD32_SYS_freebsd4_freebsd32_getfsstat 18 - /* 19 is obsolete olseek */ + /* 19 is old freebsd32_lseek */ #define FREEBSD32_SYS_getpid 20 #define FREEBSD32_SYS_mount 21 #define FREEBSD32_SYS_unmount 22 diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c index 7e648e4..eac2385 100644 --- a/sys/compat/freebsd32/freebsd32_syscalls.c +++ b/sys/compat/freebsd32/freebsd32_syscalls.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD + * created from FreeBSD: head/sys/compat/freebsd32/syscalls.master 223166 2011-06-16 22:05:56Z kib */ const char *freebsd32_syscallnames[] = { @@ -29,7 +29,7 @@ const char *freebsd32_syscallnames[] = { "chown", /* 16 = chown */ "break", /* 17 = break */ "compat4.freebsd32_getfsstat", /* 18 = freebsd4 freebsd32_getfsstat */ - "obs_olseek", /* 19 = obsolete olseek */ + "compat.freebsd32_lseek", /* 19 = old freebsd32_lseek */ "getpid", /* 20 = getpid */ "mount", /* 21 = mount */ "unmount", /* 22 = unmount */ diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c index 2c174c7..61d1448 100644 --- a/sys/compat/freebsd32/freebsd32_sysent.c +++ b/sys/compat/freebsd32/freebsd32_sysent.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD + * created from FreeBSD: head/sys/compat/freebsd32/syscalls.master 223166 2011-06-16 22:05:56Z kib */ #include "opt_compat.h" @@ -66,7 +66,7 @@ struct sysent freebsd32_sysent[] = { { AS(chown_args), (sy_call_t *)chown, AUE_CHOWN, NULL, 0, 0, 0, SY_THR_STATIC }, /* 16 = chown */ { AS(obreak_args), (sy_call_t *)obreak, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 17 = break */ { compat4(AS(freebsd4_freebsd32_getfsstat_args),freebsd32_getfsstat), AUE_GETFSSTAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 18 = freebsd4 freebsd32_getfsstat */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 19 = obsolete olseek */ + { compat(AS(ofreebsd32_lseek_args),freebsd32_lseek), AUE_LSEEK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 19 = old freebsd32_lseek */ { 0, (sy_call_t *)getpid, AUE_GETPID, NULL, 0, 0, 0, SY_THR_STATIC }, /* 20 = getpid */ { AS(mount_args), (sy_call_t *)mount, AUE_MOUNT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 21 = mount */ { AS(unmount_args), (sy_call_t *)unmount, AUE_UMOUNT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 22 = unmount */ diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master index d524f3c..3e19298 100644 --- a/sys/compat/freebsd32/syscalls.master +++ b/sys/compat/freebsd32/syscalls.master @@ -89,7 +89,8 @@ 18 AUE_GETFSSTAT COMPAT4 { int freebsd32_getfsstat( \ struct statfs32 *buf, long bufsize, \ int flags); } -19 AUE_LSEEK OBSOL olseek +19 AUE_LSEEK COMPAT { int freebsd32_lseek(int fd, int offset, \ + int whence); } 20 AUE_GETPID NOPROTO { pid_t getpid(void); } 21 AUE_MOUNT NOPROTO { int mount(char *type, char *path, \ int flags, caddr_t data); } diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c index ddbd8b4..692c5a3 100644 --- a/sys/compat/linprocfs/linprocfs.c +++ b/sys/compat/linprocfs/linprocfs.c @@ -1049,6 +1049,15 @@ linprocfs_doproccmdline(PFS_FILL_ARGS) PROC_UNLOCK(p); return (ret); } + + /* + * Mimic linux behavior and pass only processes with usermode + * address space as valid. Return zero silently otherwize. + */ + if (p->p_vmspace == &vmspace0) { + PROC_UNLOCK(p); + return (0); + } if (p->p_args != NULL) { sbuf_bcpy(sb, p->p_args->ar_args, p->p_args->ar_length); PROC_UNLOCK(p); @@ -1073,6 +1082,15 @@ linprocfs_doprocenviron(PFS_FILL_ARGS) PROC_UNLOCK(p); return (ret); } + + /* + * Mimic linux behavior and pass only processes with usermode + * address space as valid. Return zero silently otherwize. + */ + if (p->p_vmspace == &vmspace0) { + PROC_UNLOCK(p); + return (0); + } PROC_UNLOCK(p); ret = linprocfs_doargv(td, p, sb, ps_string_env); diff --git a/sys/conf/Makefile.arm b/sys/conf/Makefile.arm index 756945d..d099256 100644 --- a/sys/conf/Makefile.arm +++ b/sys/conf/Makefile.arm @@ -108,7 +108,8 @@ ${KERNEL_KO}.tramp: ${KERNEL_KO} $S/$M/$M/inckern.S $S/$M/$M/elf_trampoline.c gzip -9 ${KERNEL_KO}.tmp eval $$(stat -s ${KERNEL_KO}.tmp.gz) && \ echo "#define KERNCOMPSIZE $$st_size" >>opt_kernname.h - ${CC} -O2 -DKZIP -I. -I$S -c $S/kern/inflate.c -o inflate-tramp.o + ${CC} -O2 -ffreestanding -DKZIP -I. -I$S -c $S/kern/inflate.c -o \ + inflate-tramp.o ${CC} -O -nostdlib -I. -I$S -Xlinker -T -Xlinker ldscript.$M.tramp \ -DKZIP tmphack.S $S/$M/$M/elf_trampoline.c inflate-tramp.o \ $S/$M/$M/inckern.S ${FILES_CPU_FUNC} -o ${KERNEL_KO}.gz.tramp diff --git a/sys/conf/Makefile.powerpc b/sys/conf/Makefile.powerpc index e4cd85f..725f3c7 100644 --- a/sys/conf/Makefile.powerpc +++ b/sys/conf/Makefile.powerpc @@ -35,7 +35,7 @@ LDSCRIPT_NAME?= ldscript.${MACHINE_ARCH} INCLUDES+= -I$S/contrib/libfdt -CFLAGS+= -msoft-float +CFLAGS+= -msoft-float -Wa,-many DDB_ENABLED!= grep DDB opt_ddb.h || true .if !empty(DDB_ENABLED) diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 94311c6..b84d0c5 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -432,7 +432,10 @@ options KTRACE_REQUEST_POOL=101 # defined by the KTR_* constants in <sys/ktr.h>. KTR_MASK defines the # initial value of the ktr_mask variable which determines at runtime # what events to trace. KTR_CPUMASK determines which CPU's log -# events, with bit X corresponding to CPU X. KTR_VERBOSE enables +# events, with bit X corresponding to CPU X. The layout of the string +# passed as KTR_CPUMASK must match a serie of bitmasks each of them +# separated by the ", " characters (ie: +# KTR_CPUMASK=("0xAF, 0xFFFFFFFFFFFFFFFF")). KTR_VERBOSE enables # dumping of KTR events to the console by default. This functionality # can be toggled via the debug.ktr_verbose sysctl and defaults to off # if KTR_VERBOSE is not defined. See ktr(4) and ktrdump(8) for details. @@ -441,7 +444,7 @@ options KTR options KTR_ENTRIES=1024 options KTR_COMPILE=(KTR_INTR|KTR_PROC) options KTR_MASK=KTR_INTR -options KTR_CPUMASK=0x3 +options KTR_CPUMASK=("0x3") options KTR_VERBOSE # diff --git a/sys/conf/files b/sys/conf/files index 5488f69..a62e1a6 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -604,6 +604,9 @@ dev/ath/ath_hal/ah_eeprom_v14.c \ dev/ath/ath_hal/ah_eeprom_v4k.c \ optional ath_hal | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath" +dev/ath/ath_hal/ah_eeprom_9287.c \ + optional ath_hal | ath_ar9287 \ + compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/ath_hal/ah_regdomain.c optional ath \ compile-with "${NORMAL_C} -I$S/dev/ath" # ar5210 @@ -651,111 +654,128 @@ dev/ath/ath_hal/ar5211/ar5211_xmit.c optional ath_hal | ath_ar5211 \ # ar5212 dev/ath/ath_hal/ar5212/ar5212_ani.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ - ath_ar9285 \ + ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_attach.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ - ath_ar9285 \ + ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_beacon.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ - ath_ar9285 \ + ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_eeprom.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ - ath_ar9285 \ + ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_gpio.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ - ath_ar9285 \ + ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_interrupts.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ - ath_ar9285 \ + ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_keycache.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ - ath_ar9285 \ + ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_misc.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ - ath_ar9285 \ + ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_phy.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ - ath_ar9285 \ + ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_power.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ - ath_ar9285 \ + ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_recv.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ - ath_ar9285 \ + ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_reset.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ - ath_ar9285 \ + ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_rfgain.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ - ath_ar9285 \ + ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5212/ar5212_xmit.c \ optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \ - ath_ar9285 \ + ath_ar9285 ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ar5416 (depends on ar5212) dev/ath/ath_hal/ar5416/ar5416_ani.c \ - optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 \ + optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ + ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_attach.c \ - optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 \ + optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ + ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_beacon.c \ - optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 \ + optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ + ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_cal.c \ - optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 \ + optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ + ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_cal_iq.c \ - optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 \ + optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ + ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_cal_adcgain.c \ - optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 \ + optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ + ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_cal_adcdc.c \ - optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 \ + optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ + ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_eeprom.c \ - optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 \ + optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ + ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_gpio.c \ - optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 \ + optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ + ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_interrupts.c \ - optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 \ + optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ + ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_keycache.c \ - optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 \ + optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ + ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_misc.c \ - optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 \ + optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ + ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_phy.c \ - optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 \ + optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ + ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_power.c \ - optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 \ + optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ + ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_recv.c \ - optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 \ + optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ + ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_reset.c \ - optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 \ + optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ + ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar5416/ar5416_xmit.c \ - optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 \ + optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \ + ath_ar9287 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ar9130 (depends upon ar5416) - also requires AH_SUPPORT_AR9130 dev/ath/ath_hal/ar9001/ar9130_attach.c optional ath_hal | ath_ar9130 \ @@ -786,6 +806,16 @@ dev/ath/ath_hal/ar9002/ar9285_phy.c optional ath_hal | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9285_diversity.c optional ath_hal | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" +# ar9287 (depends on ar5416) +dev/ath/ath_hal/ar9002/ar9287_attach.c optional ath_hal | ath_ar9287 \ + compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" +dev/ath/ath_hal/ar9002/ar9287_reset.c optional ath_hal | ath_ar9287 \ + compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" +dev/ath/ath_hal/ar9002/ar9287_cal.c optional ath_hal | ath_ar9287 \ + compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" +dev/ath/ath_hal/ar9002/ar9287_olc.c optional ath_hal | ath_ar9287 \ + compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" + # rf backends dev/ath/ath_hal/ar5212/ar2316.c optional ath_rf2316 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" @@ -807,6 +837,8 @@ dev/ath/ath_hal/ar9002/ar9280.c optional ath_hal | ath_ar9280 | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" dev/ath/ath_hal/ar9002/ar9285.c optional ath_hal | ath_ar9285 \ compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" +dev/ath/ath_hal/ar9002/ar9287.c optional ath_hal | ath_ar9287 \ + compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal" # ath rate control algorithms dev/ath/ath_rate/amrr/amrr.c optional ath_rate_amrr \ compile-with "${NORMAL_C} -I$S/dev/ath" @@ -814,7 +846,10 @@ dev/ath/ath_rate/onoe/onoe.c optional ath_rate_onoe \ compile-with "${NORMAL_C} -I$S/dev/ath" dev/ath/ath_rate/sample/sample.c optional ath_rate_sample \ compile-with "${NORMAL_C} -I$S/dev/ath" -# +# ath DFS modules +dev/ath/ath_dfs/null/dfs_null.c optional ath \ + compile-with "${NORMAL_C} -I$S/dev/ath" +# dev/bce/if_bce.c optional bce dev/bfe/if_bfe.c optional bfe dev/bge/if_bge.c optional bge @@ -885,6 +920,8 @@ dev/cxgbe/t4_main.c optional cxgbe pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/t4_sge.c optional cxgbe pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" +dev/cxgbe/t4_l2t.c optional cxgbe pci \ + compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/common/t4_hw.c optional cxgbe pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cy/cy.c optional cy @@ -1921,6 +1958,7 @@ dev/usb/serial/uftdi.c optional uftdi dev/usb/serial/ugensa.c optional ugensa dev/usb/serial/uipaq.c optional uipaq dev/usb/serial/ulpt.c optional ulpt +dev/usb/serial/umcs.c optional umcs dev/usb/serial/umct.c optional umct dev/usb/serial/umodem.c optional umodem dev/usb/serial/umoscom.c optional umoscom @@ -1930,7 +1968,7 @@ dev/usb/serial/uvisor.c optional uvisor dev/usb/serial/uvscom.c optional uvscom dev/usb/serial/usb_serial.c optional ucom | u3g | uark | ubsa | ubser | \ uchcom | ucycom | ufoma | uftdi | \ - ugensa | uipaq | umct | \ + ugensa | uipaq | umcs | umct | \ umodem | umoscom | uplcom | uslcom | \ uvisor | uvscom # @@ -2710,6 +2748,7 @@ netinet/ip_gre.c optional gre inet netinet/ip_id.c optional inet netinet/in_mcast.c optional inet netinet/in_pcb.c optional inet | inet6 +netinet/in_pcbgroup.c optional inet pcbgroup | inet6 pcbgroup netinet/in_proto.c optional inet | inet6 \ compile-with "${NORMAL_C} -I$S/contrib/pf" netinet/in_rmx.c optional inet @@ -2787,6 +2826,7 @@ netinet6/in6_gif.c optional gif inet6 | netgraph_gif inet6 netinet6/in6_ifattach.c optional inet6 netinet6/in6_mcast.c optional inet6 netinet6/in6_pcb.c optional inet6 +netinet6/in6_pcbgroup.c optional inet6 pcbgroup netinet6/in6_proto.c optional inet6 netinet6/in6_rmx.c optional inet6 netinet6/in6_src.c optional inet6 @@ -3112,41 +3152,41 @@ ofed/drivers/net/mlx4/en_tx.c optional mlxen \ compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/infiniband/hw/mthca/mthca_allocator.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_av.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_catas.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_cmd.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_cq.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_eq.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_mad.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_main.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_mcg.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_memfree.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_mr.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_pd.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_profile.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_provider.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_qp.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_reset.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_srq.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_uar.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" # crypto support opencrypto/cast.c optional crypto | ipsec diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc index 42bf6f2..9f5a357 100644 --- a/sys/conf/files.powerpc +++ b/sys/conf/files.powerpc @@ -27,6 +27,7 @@ dev/cfi/cfi_bus_fdt.c optional cfi fdt dev/fb/fb.c optional sc dev/fdt/fdt_powerpc.c optional fdt dev/hwpmc/hwpmc_powerpc.c optional hwpmc +dev/iicbus/ad7417.c optional ad7417 powermac dev/iicbus/ds1775.c optional ds1775 powermac dev/iicbus/max6690.c optional max6690 powermac dev/kbd/kbd.c optional sc @@ -86,7 +87,6 @@ powerpc/aim/moea64_if.m optional aim powerpc/aim/moea64_native.c optional aim powerpc/aim/mp_cpudep.c optional aim smp powerpc/aim/nexus.c optional aim -powerpc/aim/ofwmagic.S optional aim powerpc/aim/slb.c optional aim powerpc64 powerpc/aim/swtch32.S optional aim powerpc powerpc/aim/swtch64.S optional aim powerpc64 @@ -136,6 +136,10 @@ powerpc/ofw/ofw_pcibus.c optional pci aim powerpc/ofw/ofw_pcib_pci.c optional pci aim powerpc/ofw/ofw_real.c optional aim powerpc/ofw/ofw_syscons.c optional sc aim +powerpc/ofw/ofwcall32.S optional aim powerpc +powerpc/ofw/ofwcall64.S optional aim powerpc64 +powerpc/ofw/ofwmagic.S optional aim +powerpc/ofw/rtas.c optional aim powerpc/powermac/ata_kauai.c optional powermac ata | powermac atamacio powerpc/powermac/ata_macio.c optional powermac ata | powermac atamacio powerpc/powermac/ata_dbdma.c optional powermac ata | powermac atamacio @@ -150,6 +154,7 @@ powerpc/powermac/macgpio.c optional powermac pci powerpc/powermac/macio.c optional powermac pci powerpc/powermac/openpic_macio.c optional powermac pci powerpc/powermac/platform_powermac.c optional powermac +powerpc/powermac/powermac_thermal.c optional powermac powerpc/powermac/pswitch.c optional powermac pswitch powerpc/powermac/pmu.c optional powermac pmu powerpc/powermac/smu.c optional powermac smu @@ -157,6 +162,7 @@ powerpc/powermac/smusat.c optional powermac smu powerpc/powermac/uninorth.c optional powermac powerpc/powermac/uninorthpci.c optional powermac pci powerpc/powermac/vcoregpio.c optional powermac +powerpc/powermac/windtunnel.c optional powermac windtunnel powerpc/powerpc/altivec.c optional aim powerpc/powerpc/atomic.S standard powerpc/powerpc/autoconf.c standard @@ -197,11 +203,13 @@ powerpc/powerpc/syncicache.c standard powerpc/powerpc/sys_machdep.c standard powerpc/powerpc/uio_machdep.c standard powerpc/ps3/ehci_ps3.c optional ps3 ehci +powerpc/ps3/ohci_ps3.c optional ps3 ohci powerpc/ps3/if_glc.c optional ps3 glc powerpc/ps3/mmu_ps3.c optional ps3 powerpc/ps3/platform_ps3.c optional ps3 powerpc/ps3/ps3ata.c optional ps3 ps3ata powerpc/ps3/ps3bus.c optional ps3 +powerpc/ps3/ps3disk.c optional ps3 powerpc/ps3/ps3pic.c optional ps3 powerpc/ps3/ps3_syscons.c optional ps3 sc powerpc/ps3/ps3-hvcall.S optional ps3 sc diff --git a/sys/conf/options b/sys/conf/options index a608d86..ee696a8 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -419,6 +419,7 @@ MROUTING opt_mrouting.h NCP NETATALK opt_atalk.h NFSLOCKD +PCBGROUP opt_pcbgroup.h RADIX_MPATH opt_mpath.h ROUTETABLES opt_route.h SLIP_IFF_OPTS opt_slip.h diff --git a/sys/contrib/dev/acpica/changes.txt b/sys/contrib/dev/acpica/changes.txt index 68dda91..f53fd42 100644 --- a/sys/contrib/dev/acpica/changes.txt +++ b/sys/contrib/dev/acpica/changes.txt @@ -1,31 +1,99 @@ ---------------------------------------- +27 May 2011. Summary of changes for version 20110527: + +This release is available at www.acpica.org/downloads + +1) ACPI CA Core Subsystem: + +ASL Load() operator: Reinstate most restrictions on the incoming ACPI table +signature. Now, only allow SSDT, OEMx, and a null signature. History: + 1) Originally, we checked the table signature for "SSDT" or "PSDT". + (PSDT is now obsolete.) + 2) We added support for OEMx tables, signature "OEM" plus a fourth + "don't care" character. + 3) Valid tables were encountered with a null signature, so we just + gave up on validating the signature, (05/2008). + 4) We encountered non-AML tables such as the MADT, which caused + interpreter errors and kernel faults. So now, we once again allow + only SSDT, OEMx, and now, also a null signature. (05/2011). + +Added the missing _TDL predefined name to the global name list in order to +enable validation. Affects both the core ACPICA code and the iASL compiler. + +Example Code and Data Size: These are the sizes for the OS-independent +acpica.lib produced by the Microsoft Visual C++ 9.0 32-bit compiler. The debug +version of the code includes the debug output trace mechanism and has a much +larger code and data size. + + Previous Release (VC 9.0): + Non-Debug Version: 90.0K Code, 23.8K Data, 113.8K Total + Debug Version: 164.5K Code, 68.0K Data, 232.5K Total + Current Release (VC 9.0): + Non-Debug Version: 90.1K Code, 23.9K Data, 114.0K Total + Debug Version: 165.6K Code, 68.4K Data, 234.0K Total + +2) iASL Compiler/Disassembler and Tools: + +Debugger/AcpiExec: Implemented support for "complex" method arguments on the +debugger command line. This adds support beyond simple integers -- including +Strings, Buffers, and Packages. Includes support for nested packages. +Increased the default command line buffer size to accommodate these arguments. +See the ACPICA reference for details and syntax. ACPICA BZ 917. + +Debugger/AcpiExec: Implemented support for "default" method arguments for the +Execute/Debug command. Now, the debugger will always invoke a control method +with the required number of arguments -- even if the command line specifies +none or insufficient arguments. It uses default integer values for any missing +arguments. Also fixes a bug where only six method arguments maximum were +supported instead of the required seven. + +Debugger/AcpiExec: Add a maximum buffer length parameter to AcpiOsGetLine and +also return status in order to prevent buffer overruns. See the ACPICA +reference for details and syntax. ACPICA BZ 921 + +iASL: Cleaned up support for Berkeley yacc. A general cleanup of code and +makefiles to simplify support for the two different but similar parser +generators, bison and yacc. + +Updated the generic unix makefile for gcc 4. The default gcc version is now +expected to be 4 or greater, since options specific to gcc 4 are used. + +---------------------------------------- 13 April 2011. Summary of changes for version 20110413: 1) ACPI CA Core Subsystem: Implemented support to execute a so-called "orphan" _REG method under the EC -device. This change will force the execution of a _REG method underneath the EC +device. This change will force the execution of a _REG method underneath the +EC device even if there is no corresponding operation region of type EmbeddedControl. Fixes a problem seen on some machines and apparently is compatible with Windows behavior. ACPICA BZ 875. Added more predefined methods that are eligible for automatic NULL package -element removal. This change adds another group of predefined names to the list +element removal. This change adds another group of predefined names to the +list of names that can be repaired by having NULL package elements dynamically removed. This group are those methods that return a single variable-length package containing simple data types such as integers, buffers, strings. This -includes: _ALx, _BCL, _CID,_ DOD, _EDL, _FIX, _PCL, _PLD, _PMD, _PRx, _PSL, _Sx, +includes: _ALx, _BCL, _CID,_ DOD, _EDL, _FIX, _PCL, _PLD, _PMD, _PRx, _PSL, +_Sx, and _TZD. ACPICA BZ 914. -Split and segregated all internal global lock functions to a new file, evglock.c. +Split and segregated all internal global lock functions to a new file, +evglock.c. -Updated internal address SpaceID for DataTable regions. Moved this internal space -id in preparation for ACPI 5.0 changes that will include some new space IDs. This +Updated internal address SpaceID for DataTable regions. Moved this internal +space +id in preparation for ACPI 5.0 changes that will include some new space IDs. +This change should not affect user/host code. -Example Code and Data Size: These are the sizes for the OS-independent acpica.lib +Example Code and Data Size: These are the sizes for the OS-independent +acpica.lib produced by the Microsoft Visual C++ 9.0 32-bit compiler. The debug version of -the code includes the debug output trace mechanism and has a much larger code and +the code includes the debug output trace mechanism and has a much larger code +and data size. Previous Release (VC 9.0): @@ -40,34 +108,44 @@ data size. iASL/DTC: Major update for new grammar features. Allow generic data types in custom ACPI tables. Field names are now optional. Any line can be split to multiple lines using the continuation char (\). Large buffers now use line- -continuation character(s) and no colon on the continuation lines. See the grammar +continuation character(s) and no colon on the continuation lines. See the +grammar update in the iASL compiler reference. ACPI BZ 910,911. Lin Ming, Bob Moore. iASL: Mark ASL "Return()" and the simple "Return" as "Null" return statements. -Since the parser stuffs a "zero" as the return value for these statements (due to +Since the parser stuffs a "zero" as the return value for these statements (due +to the underlying AML grammar), they were seen as "return with value" by the iASL semantic checking. They are now seen correctly as "null" return statements. iASL: Check if a_REG declaration has a corresponding Operation Region. Adds a check for each _REG to ensure that there is in fact a corresponding operation -region declaration in the same scope. If not, the _REG method is not very useful +region declaration in the same scope. If not, the _REG method is not very +useful since it probably won't be executed. ACPICA BZ 915. -iASL/DTC: Finish support for expression evaluation. Added a new expression parser -that implements c-style operator precedence and parenthesization. ACPICA bugzilla +iASL/DTC: Finish support for expression evaluation. Added a new expression +parser +that implements c-style operator precedence and parenthesization. ACPICA +bugzilla 908. -Disassembler/DTC: Remove support for () and <> style comments in data tables. Now -that DTC has full expression support, we don't want to have comment strings that -start with a parentheses or a less-than symbol. Now, only the standard /* and // +Disassembler/DTC: Remove support for () and <> style comments in data tables. +Now +that DTC has full expression support, we don't want to have comment strings +that +start with a parentheses or a less-than symbol. Now, only the standard /* and +// comments are supported, as well as the bracket [] comments. -AcpiXtract: Fix for RSDP and dynamic SSDT extraction. These tables have "unusual" +AcpiXtract: Fix for RSDP and dynamic SSDT extraction. These tables have +"unusual" headers in the acpidump file. Update the header validation to support these tables. Problem introduced in previous AcpiXtract version in the change to support "wrong checksum" error messages emitted by acpidump utility. -iASL: Add a * option to generate all template files (as a synonym for ALL) as in +iASL: Add a * option to generate all template files (as a synonym for ALL) as +in "iasl -T *" or "iasl -T ALL". iASL/DTC: Do not abort compiler on fatal errors. We do not want to completely @@ -81,7 +159,8 @@ invocation. 1) ACPI CA Core Subsystem: Fixed a problem caused by a _PRW method appearing at the namespace root scope -during the setup of wake GPEs. A fault could occur if a _PRW directly under the +during the setup of wake GPEs. A fault could occur if a _PRW directly under +the root object was passed to the AcpiSetupGpeForWake interface. Lin Ming. Implemented support for "spurious" Global Lock interrupts. On some systems, a @@ -89,9 +168,11 @@ global lock interrupt can occur without the pending flag being set. Upon a GL interrupt, we now ensure that a thread is actually waiting for the lock before signaling GL availability. Rafael Wysocki, Bob Moore. -Example Code and Data Size: These are the sizes for the OS-independent acpica.lib +Example Code and Data Size: These are the sizes for the OS-independent +acpica.lib produced by the Microsoft Visual C++ 9.0 32-bit compiler. The debug version of -the code includes the debug output trace mechanism and has a much larger code and +the code includes the debug output trace mechanism and has a much larger code +and data size. Previous Release (VC 9.0): @@ -108,14 +189,16 @@ header files, disassembler, table compiler, and template generator. Bob Moore, Lin Ming. AcpiXtract: Correctly handle embedded comments and messages from AcpiDump. -Apparently some or all versions of acpidump will occasionally emit a comment like +Apparently some or all versions of acpidump will occasionally emit a comment +like "Wrong checksum", etc., into the dump file. This was causing problems for AcpiXtract. ACPICA BZ 905. iASL: Fix the Linux makefile by removing an inadvertent double file inclusion. ACPICA BZ 913. -AcpiExec: Update installation of operation region handlers. Install one handler +AcpiExec: Update installation of operation region handlers. Install one +handler for a user-defined address space. This is used by the ASL test suite (ASLTS). ---------------------------------------- diff --git a/sys/contrib/dev/acpica/debugger/dbexec.c b/sys/contrib/dev/acpica/debugger/dbexec.c index 3d46335..eb0dde2 100644 --- a/sys/contrib/dev/acpica/debugger/dbexec.c +++ b/sys/contrib/dev/acpica/debugger/dbexec.c @@ -53,7 +53,8 @@ ACPI_MODULE_NAME ("dbexec") -static ACPI_DB_METHOD_INFO AcpiGbl_DbMethodInfo; +static ACPI_DB_METHOD_INFO AcpiGbl_DbMethodInfo; +#define DB_DEFAULT_PKG_ELEMENTS 33 /* Local prototypes */ @@ -81,6 +82,348 @@ AcpiDbExecutionWalk ( void *Context, void **ReturnValue); +static ACPI_STATUS +AcpiDbHexCharToValue ( + int HexChar, + UINT8 *ReturnValue); + +static ACPI_STATUS +AcpiDbConvertToPackage ( + char *String, + ACPI_OBJECT *Object); + +static ACPI_STATUS +AcpiDbConvertToObject ( + ACPI_OBJECT_TYPE Type, + char *String, + ACPI_OBJECT *Object); + +static void +AcpiDbDeleteObjects ( + UINT32 Count, + ACPI_OBJECT *Objects); + + +/******************************************************************************* + * + * FUNCTION: AcpiDbHexCharToValue + * + * PARAMETERS: HexChar - Ascii Hex digit, 0-9|a-f|A-F + * ReturnValue - Where the converted value is returned + * + * RETURN: Status + * + * DESCRIPTION: Convert a single hex character to a 4-bit number (0-16). + * + ******************************************************************************/ + +static ACPI_STATUS +AcpiDbHexCharToValue ( + int HexChar, + UINT8 *ReturnValue) +{ + UINT8 Value; + + + /* Digit must be ascii [0-9a-fA-F] */ + + if (!ACPI_IS_XDIGIT (HexChar)) + { + return (AE_BAD_HEX_CONSTANT); + } + + if (HexChar <= 0x39) + { + Value = (UINT8) (HexChar - 0x30); + } + else + { + Value = (UINT8) (ACPI_TOUPPER (HexChar) - 0x37); + } + + *ReturnValue = Value; + return (AE_OK); +} + + +/******************************************************************************* + * + * FUNCTION: AcpiDbHexByteToBinary + * + * PARAMETERS: HexByte - Double hex digit (0x00 - 0xFF) in format: + * HiByte then LoByte. + * ReturnValue - Where the converted value is returned + * + * RETURN: Status + * + * DESCRIPTION: Convert two hex characters to an 8 bit number (0 - 255). + * + ******************************************************************************/ + +static ACPI_STATUS +AcpiDbHexByteToBinary ( + char *HexByte, + UINT8 *ReturnValue) +{ + UINT8 Local0; + UINT8 Local1; + ACPI_STATUS Status; + + + /* High byte */ + + Status = AcpiDbHexCharToValue (HexByte[0], &Local0); + if (ACPI_FAILURE (Status)) + { + return (Status); + } + + /* Low byte */ + + Status = AcpiDbHexCharToValue (HexByte[1], &Local1); + if (ACPI_FAILURE (Status)) + { + return (Status); + } + + *ReturnValue = (UINT8) ((Local0 << 4) | Local1); + return (AE_OK); +} + + +/******************************************************************************* + * + * FUNCTION: AcpiDbConvertToBuffer + * + * PARAMETERS: String - Input string to be converted + * Object - Where the buffer object is returned + * + * RETURN: Status + * + * DESCRIPTION: Convert a string to a buffer object. String is treated a list + * of buffer elements, each separated by a space or comma. + * + ******************************************************************************/ + +static ACPI_STATUS +AcpiDbConvertToBuffer ( + char *String, + ACPI_OBJECT *Object) +{ + UINT32 i; + UINT32 j; + UINT32 Length; + UINT8 *Buffer; + ACPI_STATUS Status; + + + /* Generate the final buffer length */ + + for (i = 0, Length = 0; String[i];) + { + i+=2; + Length++; + + while (String[i] && + ((String[i] == ',') || (String[i] == ' '))) + { + i++; + } + } + + Buffer = ACPI_ALLOCATE (Length); + if (!Buffer) + { + return (AE_NO_MEMORY); + } + + /* Convert the command line bytes to the buffer */ + + for (i = 0, j = 0; String[i];) + { + Status = AcpiDbHexByteToBinary (&String[i], &Buffer[j]); + if (ACPI_FAILURE (Status)) + { + ACPI_FREE (Buffer); + return (Status); + } + + j++; + i+=2; + while (String[i] && + ((String[i] == ',') || (String[i] == ' '))) + { + i++; + } + } + + Object->Type = ACPI_TYPE_BUFFER; + Object->Buffer.Pointer = Buffer; + Object->Buffer.Length = Length; + return (AE_OK); +} + + +/******************************************************************************* + * + * FUNCTION: AcpiDbConvertToPackage + * + * PARAMETERS: String - Input string to be converted + * Object - Where the package object is returned + * + * RETURN: Status + * + * DESCRIPTION: Convert a string to a package object. Handles nested packages + * via recursion with AcpiDbConvertToObject. + * + ******************************************************************************/ + +static ACPI_STATUS +AcpiDbConvertToPackage ( + char *String, + ACPI_OBJECT *Object) +{ + char *This; + char *Next; + UINT32 i; + ACPI_OBJECT_TYPE Type; + ACPI_OBJECT *Elements; + ACPI_STATUS Status; + + + Elements = ACPI_ALLOCATE_ZEROED ( + DB_DEFAULT_PKG_ELEMENTS * sizeof (ACPI_OBJECT)); + + This = String; + for (i = 0; i < (DB_DEFAULT_PKG_ELEMENTS - 1); i++) + { + This = AcpiDbGetNextToken (This, &Next, &Type); + if (!This) + { + break; + } + + /* Recursive call to convert each package element */ + + Status = AcpiDbConvertToObject (Type, This, &Elements[i]); + if (ACPI_FAILURE (Status)) + { + AcpiDbDeleteObjects (i + 1, Elements); + ACPI_FREE (Elements); + return (Status); + } + + This = Next; + } + + Object->Type = ACPI_TYPE_PACKAGE; + Object->Package.Count = i; + Object->Package.Elements = Elements; + return (AE_OK); +} + + +/******************************************************************************* + * + * FUNCTION: AcpiDbConvertToObject + * + * PARAMETERS: Type - Object type as determined by parser + * String - Input string to be converted + * Object - Where the new object is returned + * + * RETURN: Status + * + * DESCRIPTION: Convert a typed and tokenized string to an ACPI_OBJECT. Typing: + * 1) String objects were surrounded by quotes. + * 2) Buffer objects were surrounded by parentheses. + * 3) Package objects were surrounded by brackets "[]". + * 4) All standalone tokens are treated as integers. + * + ******************************************************************************/ + +static ACPI_STATUS +AcpiDbConvertToObject ( + ACPI_OBJECT_TYPE Type, + char *String, + ACPI_OBJECT *Object) +{ + ACPI_STATUS Status = AE_OK; + + + switch (Type) + { + case ACPI_TYPE_STRING: + Object->Type = ACPI_TYPE_STRING; + Object->String.Pointer = String; + Object->String.Length = (UINT32) ACPI_STRLEN (String); + break; + + case ACPI_TYPE_BUFFER: + Status = AcpiDbConvertToBuffer (String, Object); + break; + + case ACPI_TYPE_PACKAGE: + Status = AcpiDbConvertToPackage (String, Object); + break; + + default: + Object->Type = ACPI_TYPE_INTEGER; + Status = AcpiUtStrtoul64 (String, 16, &Object->Integer.Value); + break; + } + + return (Status); +} + + +/******************************************************************************* + * + * FUNCTION: AcpiDbDeleteObjects + * + * PARAMETERS: Count - Count of objects in the list + * Objects - Array of ACPI_OBJECTs to be deleted + * + * RETURN: None + * + * DESCRIPTION: Delete a list of ACPI_OBJECTS. Handles packages and nested + * packages via recursion. + * + ******************************************************************************/ + +static void +AcpiDbDeleteObjects ( + UINT32 Count, + ACPI_OBJECT *Objects) +{ + UINT32 i; + + + for (i = 0; i < Count; i++) + { + switch (Objects[i].Type) + { + case ACPI_TYPE_BUFFER: + ACPI_FREE (Objects[i].Buffer.Pointer); + break; + + case ACPI_TYPE_PACKAGE: + + /* Recursive call to delete package elements */ + + AcpiDbDeleteObjects (Objects[i].Package.Count, + Objects[i].Package.Elements); + + /* Free the elements array */ + + ACPI_FREE (Objects[i].Package.Elements); + break; + + default: + break; + } + } +} + /******************************************************************************* * @@ -104,8 +447,8 @@ AcpiDbExecuteMethod ( ACPI_OBJECT_LIST ParamObjects; ACPI_OBJECT Params[ACPI_METHOD_NUM_ARGS]; ACPI_HANDLE Handle; - UINT32 i; ACPI_DEVICE_INFO *ObjInfo; + UINT32 i; ACPI_FUNCTION_TRACE (DbExecuteMethod); @@ -139,25 +482,37 @@ AcpiDbExecuteMethod ( { /* Are there arguments to the method? */ + i = 0; if (Info->Args && Info->Args[0]) { - for (i = 0; Info->Args[i] && + /* Get arguments passed on the command line */ + + for (; Info->Args[i] && (i < ACPI_METHOD_NUM_ARGS) && (i < ObjInfo->ParamCount); i++) { - Params[i].Type = ACPI_TYPE_INTEGER; - Params[i].Integer.Value = ACPI_STRTOUL (Info->Args[i], NULL, 16); - } + /* Convert input string (token) to an actual ACPI_OBJECT */ - ParamObjects.Pointer = Params; - ParamObjects.Count = i; + Status = AcpiDbConvertToObject (Info->Types[i], + Info->Args[i], &Params[i]); + if (ACPI_FAILURE (Status)) + { + ACPI_EXCEPTION ((AE_INFO, Status, + "While parsing method arguments")); + goto Cleanup; + } + } } - else + + /* Create additional "default" parameters as needed */ + + if (i < ObjInfo->ParamCount) { - /* Setup default parameters */ + AcpiOsPrintf ("Adding %u arguments containing default values\n", + ObjInfo->ParamCount - i); - for (i = 0; i < ObjInfo->ParamCount; i++) + for (; i < ObjInfo->ParamCount; i++) { switch (i) { @@ -181,13 +536,11 @@ AcpiDbExecuteMethod ( break; } } - - ParamObjects.Pointer = Params; - ParamObjects.Count = ObjInfo->ParamCount; } - } - ACPI_FREE (ObjInfo); + ParamObjects.Count = ObjInfo->ParamCount; + ParamObjects.Pointer = Params; + } /* Prepare for a return object of arbitrary size */ @@ -198,7 +551,7 @@ AcpiDbExecuteMethod ( AcpiGbl_MethodExecuting = TRUE; Status = AcpiEvaluateObject (NULL, - Info->Pathname, &ParamObjects, ReturnObj); + Info->Pathname, &ParamObjects, ReturnObj); AcpiGbl_CmSingleStep = FALSE; AcpiGbl_MethodExecuting = FALSE; @@ -206,16 +559,20 @@ AcpiDbExecuteMethod ( if (ACPI_FAILURE (Status)) { ACPI_EXCEPTION ((AE_INFO, Status, - "while executing %s from debugger", Info->Pathname)); + "while executing %s from debugger", Info->Pathname)); if (Status == AE_BUFFER_OVERFLOW) { ACPI_ERROR ((AE_INFO, - "Possible overflow of internal debugger buffer (size 0x%X needed 0x%X)", + "Possible overflow of internal debugger buffer (size 0x%X needed 0x%X)", ACPI_DEBUG_BUFFER_SIZE, (UINT32) ReturnObj->Length)); } } +Cleanup: + AcpiDbDeleteObjects (ObjInfo->ParamCount, Params); + ACPI_FREE (ObjInfo); + return_ACPI_STATUS (Status); } @@ -380,6 +737,7 @@ void AcpiDbExecute ( char *Name, char **Args, + ACPI_OBJECT_TYPE *Types, UINT32 Flags) { ACPI_STATUS Status; @@ -417,6 +775,7 @@ AcpiDbExecute ( AcpiUtStrupr (NameString); AcpiGbl_DbMethodInfo.Name = NameString; AcpiGbl_DbMethodInfo.Args = Args; + AcpiGbl_DbMethodInfo.Types = Types; AcpiGbl_DbMethodInfo.Flags = Flags; ReturnObj.Pointer = NULL; @@ -529,6 +888,8 @@ AcpiDbMethodThread ( LocalInfo.Arguments[2] = LocalInfo.IndexOfThreadStr; LocalInfo.Arguments[3] = NULL; + LocalInfo.Types = LocalInfo.ArgTypes; + (void) AcpiOsSignalSemaphore (Info->InfoGate, 1); for (i = 0; i < Info->NumLoops; i++) @@ -696,6 +1057,12 @@ AcpiDbCreateExecutionThreads ( AcpiGbl_DbMethodInfo.Arguments[1] = AcpiGbl_DbMethodInfo.IdOfThreadStr; AcpiGbl_DbMethodInfo.Arguments[2] = AcpiGbl_DbMethodInfo.IndexOfThreadStr; AcpiGbl_DbMethodInfo.Arguments[3] = NULL; + + AcpiGbl_DbMethodInfo.Types = AcpiGbl_DbMethodInfo.ArgTypes; + AcpiGbl_DbMethodInfo.ArgTypes[0] = ACPI_TYPE_INTEGER; + AcpiGbl_DbMethodInfo.ArgTypes[1] = ACPI_TYPE_INTEGER; + AcpiGbl_DbMethodInfo.ArgTypes[2] = ACPI_TYPE_INTEGER; + AcpiDbUInt32ToHexString (NumThreads, AcpiGbl_DbMethodInfo.NumThreadsStr); AcpiDbExecuteSetup (&AcpiGbl_DbMethodInfo); diff --git a/sys/contrib/dev/acpica/debugger/dbinput.c b/sys/contrib/dev/acpica/debugger/dbinput.c index 5b8e70a..7d8bbb7 100644 --- a/sys/contrib/dev/acpica/debugger/dbinput.c +++ b/sys/contrib/dev/acpica/debugger/dbinput.c @@ -54,11 +54,6 @@ /* Local prototypes */ -static char * -AcpiDbGetNextToken ( - char *String, - char **Next); - static UINT32 AcpiDbGetLine ( char *InputBuffer); @@ -285,6 +280,10 @@ AcpiDbDisplayHelp ( AcpiOsPrintf (" Call Run to next control method invocation\n"); AcpiOsPrintf (" Debug <Namepath> [Arguments] Single Step a control method\n"); AcpiOsPrintf (" Execute <Namepath> [Arguments] Execute control method\n"); + AcpiOsPrintf (" Hex Integer Integer method argument\n"); + AcpiOsPrintf (" \"Ascii String\" String method argument\n"); + AcpiOsPrintf (" (Byte List) Buffer method argument\n"); + AcpiOsPrintf (" [Package Element List] Package method argument\n"); AcpiOsPrintf (" Go Allow method to run to completion\n"); AcpiOsPrintf (" Information Display info about the current method\n"); AcpiOsPrintf (" Into Step into (not over) a method call\n"); @@ -318,12 +317,15 @@ AcpiDbDisplayHelp ( * ******************************************************************************/ -static char * +char * AcpiDbGetNextToken ( char *String, - char **Next) + char **Next, + ACPI_OBJECT_TYPE *ReturnType) { char *Start; + UINT32 Depth; + ACPI_OBJECT_TYPE Type = ACPI_TYPE_INTEGER; /* At end of buffer? */ @@ -333,7 +335,7 @@ AcpiDbGetNextToken ( return (NULL); } - /* Get rid of any spaces at the beginning */ + /* Remove any spaces at the beginning */ if (*String == ' ') { @@ -348,22 +350,88 @@ AcpiDbGetNextToken ( } } - if (*String == '"') + switch (*String) { + case '"': + /* This is a quoted string, scan until closing quote */ String++; Start = String; + Type = ACPI_TYPE_STRING; - /* Find end of token */ + /* Find end of string */ while (*String && (*String != '"')) { String++; } - } - else - { + break; + + case '(': + + /* This is the start of a buffer, scan until closing paren */ + + String++; + Start = String; + Type = ACPI_TYPE_BUFFER; + + /* Find end of buffer */ + + while (*String && (*String != ')')) + { + String++; + } + break; + + case '[': + + /* This is the start of a package, scan until closing bracket */ + + String++; + Depth = 1; + Start = String; + Type = ACPI_TYPE_PACKAGE; + + /* Find end of package (closing bracket) */ + + while (*String) + { + /* Handle String package elements */ + + if (*String == '"') + { + /* Find end of string */ + + String++; + while (*String && (*String != '"')) + { + String++; + } + if (!(*String)) + { + break; + } + } + else if (*String == '[') + { + Depth++; /* A nested package declaration */ + } + else if (*String == ']') + { + Depth--; + if (Depth == 0) /* Found final package closing bracket */ + { + break; + } + } + + String++; + } + break; + + default: + Start = String; /* Find end of token */ @@ -372,6 +440,7 @@ AcpiDbGetNextToken ( { String++; } + break; } if (!(*String)) @@ -384,6 +453,7 @@ AcpiDbGetNextToken ( *Next = String + 1; } + *ReturnType = Type; return (Start); } @@ -416,7 +486,8 @@ AcpiDbGetLine ( This = AcpiGbl_DbParsedBuf; for (i = 0; i < ACPI_DEBUGGER_MAX_ARGS; i++) { - AcpiGbl_DbArgs[i] = AcpiDbGetNextToken (This, &Next); + AcpiGbl_DbArgs[i] = AcpiDbGetNextToken (This, &Next, + &AcpiGbl_DbArgTypes[i]); if (!AcpiGbl_DbArgs[i]) { break; @@ -575,7 +646,8 @@ AcpiDbCommandDispatch ( break; case CMD_DEBUG: - AcpiDbExecute (AcpiGbl_DbArgs[1], &AcpiGbl_DbArgs[2], EX_SINGLE_STEP); + AcpiDbExecute (AcpiGbl_DbArgs[1], + &AcpiGbl_DbArgs[2], &AcpiGbl_DbArgTypes[2], EX_SINGLE_STEP); break; case CMD_DISASSEMBLE: @@ -601,7 +673,7 @@ AcpiDbCommandDispatch ( case CMD_EXECUTE: AcpiDbExecute (AcpiGbl_DbArgs[1], - &AcpiGbl_DbArgs[2], EX_NO_SINGLE_STEP); + &AcpiGbl_DbArgs[2], &AcpiGbl_DbArgTypes[2], EX_NO_SINGLE_STEP); break; case CMD_FIND: @@ -961,7 +1033,13 @@ AcpiDbUserCommands ( /* Get the user input line */ - (void) AcpiOsGetLine (AcpiGbl_DbLineBuf); + Status = AcpiOsGetLine (AcpiGbl_DbLineBuf, + ACPI_DB_LINE_BUFFER_SIZE, NULL); + if (ACPI_FAILURE (Status)) + { + ACPI_EXCEPTION ((AE_INFO, Status, "While parsing command line")); + return (Status); + } /* Check for single or multithreaded debug */ diff --git a/sys/contrib/dev/acpica/debugger/dbutils.c b/sys/contrib/dev/acpica/debugger/dbutils.c index 8385fc8..bed52e6 100644 --- a/sys/contrib/dev/acpica/debugger/dbutils.c +++ b/sys/contrib/dev/acpica/debugger/dbutils.c @@ -382,7 +382,7 @@ AcpiDbUInt32ToHexString ( UINT32 Value, char *Buffer) { - UINT8 i; + int i; if (Value == 0) @@ -391,10 +391,9 @@ AcpiDbUInt32ToHexString ( return; } - ACPI_STRCPY (Buffer, "0x"); - Buffer[10] = '\0'; + Buffer[8] = '\0'; - for (i = 9; i > 1; i--) + for (i = 7; i >= 0; i--) { Buffer[i] = Converter [Value & 0x0F]; Value = Value >> 4; diff --git a/sys/contrib/dev/acpica/debugger/dbxface.c b/sys/contrib/dev/acpica/debugger/dbxface.c index 2f3708a..e8602bc 100644 --- a/sys/contrib/dev/acpica/debugger/dbxface.c +++ b/sys/contrib/dev/acpica/debugger/dbxface.c @@ -137,7 +137,13 @@ AcpiDbStartCommand ( /* Get the user input line */ - (void) AcpiOsGetLine (AcpiGbl_DbLineBuf); + Status = AcpiOsGetLine (AcpiGbl_DbLineBuf, + ACPI_DB_LINE_BUFFER_SIZE, NULL); + if (ACPI_FAILURE (Status)) + { + ACPI_EXCEPTION ((AE_INFO, Status, "While parsing command line")); + return (Status); + } } Status = AcpiDbCommandDispatch (AcpiGbl_DbLineBuf, WalkState, Op); diff --git a/sys/contrib/dev/acpica/include/acconfig.h b/sys/contrib/dev/acpica/include/acconfig.h index 4b05ed3..7a2107a 100644 --- a/sys/contrib/dev/acpica/include/acconfig.h +++ b/sys/contrib/dev/acpica/include/acconfig.h @@ -200,7 +200,8 @@ * *****************************************************************************/ -#define ACPI_DEBUGGER_MAX_ARGS 8 /* Must be max method args + 1 */ +#define ACPI_DEBUGGER_MAX_ARGS ACPI_METHOD_NUM_ARGS + 2 /* Max command line arguments */ +#define ACPI_DB_LINE_BUFFER_SIZE 512 #define ACPI_DEBUGGER_COMMAND_PROMPT '-' #define ACPI_DEBUGGER_EXECUTE_PROMPT '%' diff --git a/sys/contrib/dev/acpica/include/acdebug.h b/sys/contrib/dev/acpica/include/acdebug.h index 9b4822d..2c1fe73 100644 --- a/sys/contrib/dev/acpica/include/acdebug.h +++ b/sys/contrib/dev/acpica/include/acdebug.h @@ -277,6 +277,7 @@ void AcpiDbExecute ( char *Name, char **Args, + ACPI_OBJECT_TYPE *Types, UINT32 Flags); void @@ -357,6 +358,12 @@ AcpiDbUserCommands ( char Prompt, ACPI_PARSE_OBJECT *Op); +char * +AcpiDbGetNextToken ( + char *String, + char **Next, + ACPI_OBJECT_TYPE *ReturnType); + /* * dbstats - Generation and display of ACPI table statistics diff --git a/sys/contrib/dev/acpica/include/acglobal.h b/sys/contrib/dev/acpica/include/acglobal.h index fc20994..ebbae0f 100644 --- a/sys/contrib/dev/acpica/include/acglobal.h +++ b/sys/contrib/dev/acpica/include/acglobal.h @@ -425,10 +425,11 @@ ACPI_EXTERN BOOLEAN AcpiGbl_DbOpt_ini_methods; ACPI_EXTERN BOOLEAN AcpiGbl_DbOpt_NoRegionSupport; ACPI_EXTERN char *AcpiGbl_DbArgs[ACPI_DEBUGGER_MAX_ARGS]; -ACPI_EXTERN char AcpiGbl_DbLineBuf[80]; -ACPI_EXTERN char AcpiGbl_DbParsedBuf[80]; -ACPI_EXTERN char AcpiGbl_DbScopeBuf[40]; -ACPI_EXTERN char AcpiGbl_DbDebugFilename[40]; +ACPI_EXTERN ACPI_OBJECT_TYPE AcpiGbl_DbArgTypes[ACPI_DEBUGGER_MAX_ARGS]; +ACPI_EXTERN char AcpiGbl_DbLineBuf[ACPI_DB_LINE_BUFFER_SIZE]; +ACPI_EXTERN char AcpiGbl_DbParsedBuf[ACPI_DB_LINE_BUFFER_SIZE]; +ACPI_EXTERN char AcpiGbl_DbScopeBuf[80]; +ACPI_EXTERN char AcpiGbl_DbDebugFilename[80]; ACPI_EXTERN BOOLEAN AcpiGbl_DbOutputToFile; ACPI_EXTERN char *AcpiGbl_DbBuffer; ACPI_EXTERN char *AcpiGbl_DbFilename; diff --git a/sys/contrib/dev/acpica/include/aclocal.h b/sys/contrib/dev/acpica/include/aclocal.h index bdefb76..2eb6fc2 100644 --- a/sys/contrib/dev/acpica/include/aclocal.h +++ b/sys/contrib/dev/acpica/include/aclocal.h @@ -1198,6 +1198,7 @@ typedef struct acpi_db_method_info UINT32 NumLoops; char Pathname[128]; char **Args; + ACPI_OBJECT_TYPE *Types; /* * Arguments to be passed to method for the command @@ -1206,6 +1207,7 @@ typedef struct acpi_db_method_info * Index of current thread inside all them created. */ char InitArgs; + ACPI_OBJECT_TYPE ArgTypes[4]; char *Arguments[4]; char NumThreadsStr[11]; char IdOfThreadStr[11]; diff --git a/sys/contrib/dev/acpica/include/acpiosxf.h b/sys/contrib/dev/acpica/include/acpiosxf.h index 53605ad..43ca582 100644 --- a/sys/contrib/dev/acpica/include/acpiosxf.h +++ b/sys/contrib/dev/acpica/include/acpiosxf.h @@ -376,9 +376,11 @@ AcpiOsRedirectOutput ( /* * Debug input */ -UINT32 +ACPI_STATUS AcpiOsGetLine ( - char *Buffer); + char *Buffer, + UINT32 BufferLength, + UINT32 *BytesRead); /* diff --git a/sys/contrib/dev/acpica/include/acpixf.h b/sys/contrib/dev/acpica/include/acpixf.h index 891ccf3..1755261 100644 --- a/sys/contrib/dev/acpica/include/acpixf.h +++ b/sys/contrib/dev/acpica/include/acpixf.h @@ -48,7 +48,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20110413 +#define ACPI_CA_VERSION 0x20110527 #include <contrib/dev/acpica/include/actypes.h> #include <contrib/dev/acpica/include/actbl.h> diff --git a/sys/contrib/dev/acpica/include/acpredef.h b/sys/contrib/dev/acpica/include/acpredef.h index 884f118..0496767 100644 --- a/sys/contrib/dev/acpica/include/acpredef.h +++ b/sys/contrib/dev/acpica/include/acpredef.h @@ -470,6 +470,7 @@ static const ACPI_PREDEFINED_INFO PredefinedNames[] = {{"_SWS", 0, ACPI_RTYPE_INTEGER}}, {{"_TC1", 0, ACPI_RTYPE_INTEGER}}, {{"_TC2", 0, ACPI_RTYPE_INTEGER}}, + {{"_TDL", 0, ACPI_RTYPE_INTEGER}}, {{"_TIP", 1, ACPI_RTYPE_INTEGER}}, {{"_TIV", 1, ACPI_RTYPE_INTEGER}}, {{"_TMP", 0, ACPI_RTYPE_INTEGER}}, diff --git a/sys/contrib/dev/acpica/osunixxf.c b/sys/contrib/dev/acpica/osunixxf.c index 0983bd4..f486c27 100644 --- a/sys/contrib/dev/acpica/osunixxf.c +++ b/sys/contrib/dev/acpica/osunixxf.c @@ -311,18 +311,21 @@ AcpiOsVprintf ( * * FUNCTION: AcpiOsGetLine * - * PARAMETERS: fmt - Standard printf format - * args - Argument list + * PARAMETERS: Buffer - Where to return the command line + * BufferLength - Maximum length of Buffer + * BytesRead - Where the actual byte count is returned * - * RETURN: Actual bytes read + * RETURN: Status and actual bytes read * * DESCRIPTION: Formatted input with argument list pointer * *****************************************************************************/ -UINT32 +ACPI_STATUS AcpiOsGetLine ( - char *Buffer) + char *Buffer, + UINT32 BufferLength, + UINT32 *BytesRead) { UINT8 Temp; UINT32 i; @@ -330,6 +333,11 @@ AcpiOsGetLine ( for (i = 0; ; i++) { + if (i >= BufferLength) + { + return (AE_BUFFER_OVERFLOW); + } + scanf ("%1c", &Temp); if (!Temp || Temp == '\n') { @@ -345,7 +353,11 @@ AcpiOsGetLine ( /* Return the number of bytes in the string */ - return (i); + if (BytesRead) + { + *BytesRead = i; + } + return (AE_OK); } diff --git a/sys/contrib/dev/acpica/tables/tbinstal.c b/sys/contrib/dev/acpica/tables/tbinstal.c index 8e2199f..8697fa1 100644 --- a/sys/contrib/dev/acpica/tables/tbinstal.c +++ b/sys/contrib/dev/acpica/tables/tbinstal.c @@ -144,12 +144,28 @@ AcpiTbAddTable ( } /* - * Originally, we checked the table signature for "SSDT" or "PSDT" here. - * Next, we added support for OEMx tables, signature "OEM". - * Valid tables were encountered with a null signature, so we've just - * given up on validating the signature, since it seems to be a waste - * of code. The original code was removed (05/2008). + * Validate the incoming table signature. + * + * 1) Originally, we checked the table signature for "SSDT" or "PSDT". + * 2) We added support for OEMx tables, signature "OEM". + * 3) Valid tables were encountered with a null signature, so we just + * gave up on validating the signature, (05/2008). + * 4) We encountered non-AML tables such as the MADT, which caused + * interpreter errors and kernel faults. So now, we once again allow + * only "SSDT", "OEMx", and now, also a null signature. (05/2011). */ + if ((TableDesc->Pointer->Signature[0] != 0x00) && + (!ACPI_COMPARE_NAME (TableDesc->Pointer->Signature, ACPI_SIG_SSDT)) && + (ACPI_STRNCMP (TableDesc->Pointer->Signature, "OEM", 3))) + { + ACPI_ERROR ((AE_INFO, + "Table has invalid signature [%4.4s] (0x%8.8X), must be SSDT or OEMx", + AcpiUtValidAcpiName (*(UINT32 *) TableDesc->Pointer->Signature) ? + TableDesc->Pointer->Signature : "????", + *(UINT32 *) TableDesc->Pointer->Signature)); + + return_ACPI_STATUS (AE_BAD_SIGNATURE); + } (void) AcpiUtAcquireMutex (ACPI_MTX_TABLES); diff --git a/sys/contrib/pf/net/pf.c b/sys/contrib/pf/net/pf.c index d65ab8d..135d734 100644 --- a/sys/contrib/pf/net/pf.c +++ b/sys/contrib/pf/net/pf.c @@ -3034,16 +3034,18 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd) #ifdef INET case AF_INET: #ifdef __FreeBSD__ - INP_INFO_RLOCK(pi); /* XXX LOR */ - inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4, - dport, 0, NULL); + /* + * XXXRW: would be nice if we had an mbuf here so that we + * could use in_pcblookup_mbuf(). + */ + inp = in_pcblookup(pi, saddr->v4, sport, daddr->v4, + dport, INPLOOKUP_RLOCKPCB, NULL); if (inp == NULL) { - inp = in_pcblookup_hash(pi, saddr->v4, sport, - daddr->v4, dport, INPLOOKUP_WILDCARD, NULL); - if(inp == NULL) { - INP_INFO_RUNLOCK(pi); + inp = in_pcblookup(pi, saddr->v4, sport, + daddr->v4, dport, INPLOOKUP_WILDCARD | + INPLOOKUP_RLOCKPCB, NULL); + if (inp == NULL) return (-1); - } } #else inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport); @@ -3058,16 +3060,18 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd) #ifdef INET6 case AF_INET6: #ifdef __FreeBSD__ - INP_INFO_RLOCK(pi); - inp = in6_pcblookup_hash(pi, &saddr->v6, sport, - &daddr->v6, dport, 0, NULL); + /* + * XXXRW: would be nice if we had an mbuf here so that we + * could use in6_pcblookup_mbuf(). + */ + inp = in6_pcblookup(pi, &saddr->v6, sport, + &daddr->v6, dport, INPLOOKUP_RLOCKPCB, NULL); if (inp == NULL) { - inp = in6_pcblookup_hash(pi, &saddr->v6, sport, - &daddr->v6, dport, INPLOOKUP_WILDCARD, NULL); - if (inp == NULL) { - INP_INFO_RUNLOCK(pi); + inp = in6_pcblookup(pi, &saddr->v6, sport, + &daddr->v6, dport, INPLOOKUP_WILDCARD | + INPLOOKUP_RLOCKPCB, NULL); + if (inp == NULL) return (-1); - } } #else inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6, @@ -3085,9 +3089,10 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd) return (-1); } #ifdef __FreeBSD__ + INP_RLOCK_ASSERT(inp); pd->lookup.uid = inp->inp_cred->cr_uid; pd->lookup.gid = inp->inp_cred->cr_groups[0]; - INP_INFO_RUNLOCK(pi); + INP_RUNLOCK(inp); #else pd->lookup.uid = inp->inp_socket->so_euid; pd->lookup.gid = inp->inp_socket->so_egid; @@ -6135,9 +6140,11 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) #ifdef __FreeBSD__ /* XXX MRT not always INET */ /* stick with table 0 though */ +#ifdef INET if (af == AF_INET) in_rtalloc_ign((struct route *)&ro, 0, 0); else +#endif rtalloc_ign((struct route *)&ro, 0); #else /* ! __FreeBSD__ */ rtalloc_noclone((struct route *)&ro, NO_CLONING); @@ -6217,9 +6224,11 @@ pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) # ifdef RTF_PRCLONING rtalloc_ign((struct route *)&ro, (RTF_CLONING|RTF_PRCLONING)); # else /* !RTF_PRCLONING */ +#ifdef INET if (af == AF_INET) in_rtalloc_ign((struct route *)&ro, 0, 0); else +#endif rtalloc_ign((struct route *)&ro, 0); # endif #else /* ! __FreeBSD__ */ @@ -6792,11 +6801,13 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t a KMOD_UDPSTAT_INC(udps_badsum); break; } +#ifdef INET case IPPROTO_ICMP: { KMOD_ICMPSTAT_INC(icps_checksum); break; } +#endif #ifdef INET6 case IPPROTO_ICMPV6: { @@ -6892,9 +6903,11 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, case IPPROTO_UDP: KMOD_UDPSTAT_INC(udps_badsum); break; +#ifdef INET case IPPROTO_ICMP: KMOD_ICMPSTAT_INC(icps_checksum); break; +#endif #ifdef INET6 case IPPROTO_ICMPV6: KMOD_ICMP6STAT_INC(icp6s_checksum); diff --git a/sys/contrib/pf/net/pf_ioctl.c b/sys/contrib/pf/net/pf_ioctl.c index 2a66fd8..c41fcc6 100644 --- a/sys/contrib/pf/net/pf_ioctl.c +++ b/sys/contrib/pf/net/pf_ioctl.c @@ -3735,9 +3735,12 @@ pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, */ int chk; - /* We need a proper CSUM befor we start (s. OpenBSD ip_output) */ + /* We need a proper CSUM before we start (s. OpenBSD ip_output) */ if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { +#ifdef INET + /* XXX-BZ copy&paste error from r126261? */ in_delayed_cksum(*m); +#endif (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } chk = pf_test6(PF_OUT, ifp, m, NULL, inp); diff --git a/sys/contrib/pf/net/pf_norm.c b/sys/contrib/pf/net/pf_norm.c index 32ba9f4..cd6d65a 100644 --- a/sys/contrib/pf/net/pf_norm.c +++ b/sys/contrib/pf/net/pf_norm.c @@ -949,6 +949,7 @@ pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, return (NULL); } +#ifdef INET int pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, struct pf_pdesc *pd) @@ -1198,6 +1199,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, return (PF_DROP); } +#endif #ifdef INET6 int diff --git a/sys/ddb/db_command.c b/sys/ddb/db_command.c index 21cb7c5..f2e2c42 100644 --- a/sys/ddb/db_command.c +++ b/sys/ddb/db_command.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include <sys/signalvar.h> #include <sys/systm.h> #include <sys/cons.h> +#include <sys/conf.h> #include <sys/watchdog.h> #include <sys/kernel.h> @@ -64,6 +65,7 @@ db_addr_t db_last_addr; db_addr_t db_prev; db_addr_t db_next; +static db_cmdfcn_t db_dump; static db_cmdfcn_t db_fncall; static db_cmdfcn_t db_gdb; static db_cmdfcn_t db_halt; @@ -102,6 +104,7 @@ static struct command db_cmds[] = { { "w", db_write_cmd, CS_MORE|CS_SET_DOT, 0 }, { "delete", db_delete_cmd, 0, 0 }, { "d", db_delete_cmd, 0, 0 }, + { "dump", db_dump, 0, 0 }, { "break", db_breakpoint_cmd, 0, 0 }, { "b", db_breakpoint_cmd, 0, 0 }, { "dwatch", db_deletewatch_cmd, 0, 0 }, @@ -526,6 +529,27 @@ db_error(s) kdb_reenter(); } +static void +db_dump(db_expr_t dummy, boolean_t dummy2, db_expr_t dummy3, char *dummy4) +{ + int error; + + error = doadump(FALSE); + if (error) { + db_printf("Cannot dump: "); + switch (error) { + case EBUSY: + db_printf("debugger got invoked while dumping.\n"); + break; + case ENXIO: + db_printf("no dump device specified.\n"); + break; + default: + db_printf("unknown error (error=%d).\n", error); + break; + } + } +} /* * Call random function: diff --git a/sys/dev/aac/aac.c b/sys/dev/aac/aac.c index 53528fd..45cfa02 100644 --- a/sys/dev/aac/aac.c +++ b/sys/dev/aac/aac.c @@ -661,6 +661,16 @@ aac_detach(device_t dev) callout_drain(&sc->aac_daemontime); + mtx_lock(&sc->aac_io_lock); + while (sc->aifflags & AAC_AIFFLAGS_RUNNING) { + sc->aifflags |= AAC_AIFFLAGS_EXIT; + wakeup(sc->aifthread); + msleep(sc->aac_dev, &sc->aac_io_lock, PUSER, "aacdch", 0); + } + mtx_unlock(&sc->aac_io_lock); + KASSERT((sc->aifflags & AAC_AIFFLAGS_RUNNING) == 0, + ("%s: invalid detach state", __func__)); + /* Remove the child containers */ while ((co = TAILQ_FIRST(&sc->aac_container_tqh)) != NULL) { error = device_delete_child(dev, co->co_disk); @@ -679,15 +689,6 @@ aac_detach(device_t dev) free(sim, M_AACBUF); } - if (sc->aifflags & AAC_AIFFLAGS_RUNNING) { - sc->aifflags |= AAC_AIFFLAGS_EXIT; - wakeup(sc->aifthread); - tsleep(sc->aac_dev, PUSER | PCATCH, "aacdch", 30 * hz); - } - - if (sc->aifflags & AAC_AIFFLAGS_RUNNING) - panic("Cannot shutdown AIF thread"); - if ((error = aac_shutdown(dev))) return(error); @@ -1020,7 +1021,7 @@ aac_command_thread(struct aac_softc *sc) /* * First see if any FIBs need to be allocated. This needs * to be called without the driver lock because contigmalloc - * will grab Giant, and would result in an LOR. + * can sleep. */ if ((sc->aifflags & AAC_AIFFLAGS_ALLOCFIBS) != 0) { mtx_unlock(&sc->aac_io_lock); @@ -1372,7 +1373,9 @@ aac_alloc_command(struct aac_softc *sc, struct aac_command **cmp) if ((cm = aac_dequeue_free(sc)) == NULL) { if (sc->total_fibs < sc->aac_max_fibs) { + mtx_lock(&sc->aac_io_lock); sc->aifflags |= AAC_AIFFLAGS_ALLOCFIBS; + mtx_unlock(&sc->aac_io_lock); wakeup(sc->aifthread); } return (EBUSY); diff --git a/sys/dev/aac/aacvar.h b/sys/dev/aac/aacvar.h index 61f3c5b..d994acf 100644 --- a/sys/dev/aac/aacvar.h +++ b/sys/dev/aac/aacvar.h @@ -386,13 +386,12 @@ struct aac_softc struct proc *aifthread; int aifflags; #define AAC_AIFFLAGS_RUNNING (1 << 0) -#define AAC_AIFFLAGS_AIF (1 << 1) +#define AAC_AIFFLAGS_UNUSED0 (1 << 1) #define AAC_AIFFLAGS_EXIT (1 << 2) #define AAC_AIFFLAGS_EXITED (1 << 3) -#define AAC_AIFFLAGS_PRINTF (1 << 4) +#define AAC_AIFFLAGS_UNUSED1 (1 << 4) #define AAC_AIFFLAGS_ALLOCFIBS (1 << 5) -#define AAC_AIFFLAGS_PENDING (AAC_AIFFLAGS_AIF | AAC_AIFFLAGS_PRINTF | \ - AAC_AIFFLAGS_ALLOCFIBS) +#define AAC_AIFFLAGS_PENDING AAC_AIFFLAGS_ALLOCFIBS u_int32_t flags; #define AAC_FLAGS_PERC2QC (1 << 0) #define AAC_FLAGS_ENABLE_CAM (1 << 1) /* No SCSI passthrough */ diff --git a/sys/dev/acpica/Osd/OsdDebug.c b/sys/dev/acpica/Osd/OsdDebug.c index 8425357..0547f75 100644 --- a/sys/dev/acpica/Osd/OsdDebug.c +++ b/sys/dev/acpica/Osd/OsdDebug.c @@ -47,20 +47,23 @@ __FBSDID("$FreeBSD$"); #include <dev/acpica/acpivar.h> -UINT32 -AcpiOsGetLine(char *Buffer) +ACPI_STATUS +AcpiOsGetLine(char *Buffer, UINT32 BufferLength, UINT32 *BytesRead) { #ifdef DDB - char *cp; + char *cp; - db_readline(Buffer, 80); - for (cp = Buffer; *cp != 0; cp++) - if (*cp == '\n') - *cp = 0; - return (AE_OK); + cp = Buffer; + if (db_readline(Buffer, BufferLength) > 0) + while (*cp != '\0' && *cp != '\n' && *cp != '\r') + cp++; + *cp = '\0'; + if (BytesRead != NULL) + *BytesRead = cp - Buffer; + return (AE_OK); #else - printf("AcpiOsGetLine called but no input support"); - return (AE_NOT_EXIST); + printf("AcpiOsGetLine called but no input support"); + return (AE_NOT_EXIST); #endif /* DDB */ } diff --git a/sys/dev/acpica/acpi.c b/sys/dev/acpica/acpi.c index f6d6094..84f65bd 100644 --- a/sys/dev/acpica/acpi.c +++ b/sys/dev/acpica/acpi.c @@ -123,6 +123,8 @@ static int acpi_set_resource(device_t dev, device_t child, int type, static struct resource *acpi_alloc_resource(device_t bus, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags); +static int acpi_adjust_resource(device_t bus, device_t child, int type, + struct resource *r, u_long start, u_long end); static int acpi_release_resource(device_t bus, device_t child, int type, int rid, struct resource *r); static void acpi_delete_resource(device_t bus, device_t child, int type, @@ -149,6 +151,7 @@ static ACPI_STATUS acpi_sleep_disable(struct acpi_softc *sc); static ACPI_STATUS acpi_EnterSleepState(struct acpi_softc *sc, int state); static void acpi_shutdown_final(void *arg, int howto); static void acpi_enable_fixed_events(struct acpi_softc *sc); +static BOOLEAN acpi_has_hid(ACPI_HANDLE handle); static int acpi_wake_sleep_prep(ACPI_HANDLE handle, int sstate); static int acpi_wake_run_prep(ACPI_HANDLE handle, int sstate); static int acpi_wake_prep_walk(int sstate); @@ -193,6 +196,7 @@ static device_method_t acpi_methods[] = { DEVMETHOD(bus_set_resource, acpi_set_resource), DEVMETHOD(bus_get_resource, bus_generic_rl_get_resource), DEVMETHOD(bus_alloc_resource, acpi_alloc_resource), + DEVMETHOD(bus_adjust_resource, acpi_adjust_resource), DEVMETHOD(bus_release_resource, acpi_release_resource), DEVMETHOD(bus_delete_resource, acpi_delete_resource), DEVMETHOD(bus_child_pnpinfo_str, acpi_child_pnpinfo_str_method), @@ -1325,29 +1329,40 @@ acpi_alloc_resource(device_t bus, device_t child, int type, int *rid, } static int -acpi_release_resource(device_t bus, device_t child, int type, int rid, - struct resource *r) +acpi_is_resource_managed(int type, struct resource *r) { - struct rman *rm; - int ret; /* We only handle memory and IO resources through rman. */ switch (type) { case SYS_RES_IOPORT: - rm = &acpi_rman_io; - break; + return (rman_is_region_manager(r, &acpi_rman_io)); case SYS_RES_MEMORY: - rm = &acpi_rman_mem; - break; - default: - rm = NULL; + return (rman_is_region_manager(r, &acpi_rman_mem)); } + return (0); +} + +static int +acpi_adjust_resource(device_t bus, device_t child, int type, struct resource *r, + u_long start, u_long end) +{ + + if (acpi_is_resource_managed(type, r)) + return (rman_adjust_resource(r, start, end)); + return (bus_generic_adjust_resource(bus, child, type, r, start, end)); +} + +static int +acpi_release_resource(device_t bus, device_t child, int type, int rid, + struct resource *r) +{ + int ret; /* * If this resource belongs to one of our internal managers, * deactivate it and release it to the local pool. */ - if (rm != NULL && rman_is_region_manager(r, rm)) { + if (acpi_is_resource_managed(type, r)) { if (rman_get_flags(r) & RF_ACTIVE) { ret = bus_deactivate_resource(child, type, rid, r); if (ret != 0) @@ -1841,6 +1856,13 @@ acpi_probe_child(ACPI_HANDLE handle, UINT32 level, void *context, void **status) break; if (acpi_parse_prw(handle, &prw) == 0) AcpiSetupGpeForWake(handle, prw.gpe_handle, prw.gpe_bit); + + /* + * Ignore devices that do not have a _HID or _CID. They should + * be discovered by other buses (e.g. the PCI bus driver). + */ + if (!acpi_has_hid(handle)) + break; /* FALLTHROUGH */ case ACPI_TYPE_PROCESSOR: case ACPI_TYPE_THERMAL: @@ -2029,6 +2051,30 @@ acpi_BatteryIsPresent(device_t dev) } /* + * Returns true if a device has at least one valid device ID. + */ +static BOOLEAN +acpi_has_hid(ACPI_HANDLE h) +{ + ACPI_DEVICE_INFO *devinfo; + BOOLEAN ret; + + if (h == NULL || + ACPI_FAILURE(AcpiGetObjectInfo(h, &devinfo))) + return (FALSE); + + ret = FALSE; + if ((devinfo->Valid & ACPI_VALID_HID) != 0) + ret = TRUE; + else if ((devinfo->Valid & ACPI_VALID_CID) != 0) + if (devinfo->CompatibleIdList.Count > 0) + ret = TRUE; + + AcpiOsFree(devinfo); + return (ret); +} + +/* * Match a HID string against a handle */ BOOLEAN diff --git a/sys/dev/acpica/acpi_pci.c b/sys/dev/acpica/acpi_pci.c index 76cbacb..44db74a 100644 --- a/sys/dev/acpica/acpi_pci.c +++ b/sys/dev/acpica/acpi_pci.c @@ -209,38 +209,24 @@ acpi_pci_update_device(ACPI_HANDLE handle, device_t pci_child) device_t child; /* - * Lookup and remove the unused device that acpi0 creates when it walks - * the namespace creating devices. + * Occasionally a PCI device may show up as an ACPI device + * with a _HID. (For example, the TabletPC TC1000 has a + * second PCI-ISA bridge that has a _HID for an + * acpi_sysresource device.) In that case, leave ACPI-CA's + * device data pointing at the ACPI-enumerated device. */ child = acpi_get_device(handle); if (child != NULL) { - if (device_is_alive(child)) { - /* - * The TabletPC TC1000 has a second PCI-ISA bridge - * that has a _HID for an acpi_sysresource device. - * In that case, leave ACPI-CA's device data pointing - * at the ACPI-enumerated device. - */ - device_printf(child, - "Conflicts with PCI device %d:%d:%d\n", - pci_get_bus(pci_child), pci_get_slot(pci_child), - pci_get_function(pci_child)); - return; - } KASSERT(device_get_parent(child) == devclass_get_device(devclass_find("acpi"), 0), ("%s: child (%s)'s parent is not acpi0", __func__, acpi_name(handle))); - device_delete_child(device_get_parent(child), child); + return; } /* * Update ACPI-CA to use the PCI enumerated device_t for this handle. */ - status = AcpiDetachData(handle, acpi_fake_objhandler); - if (ACPI_FAILURE(status)) - printf("WARNING: Unable to detach object data from %s - %s\n", - acpi_name(handle), AcpiFormatException(status)); status = AcpiAttachData(handle, acpi_fake_objhandler, pci_child); if (ACPI_FAILURE(status)) printf("WARNING: Unable to attach object data to %s - %s\n", diff --git a/sys/dev/ahci/ahci.c b/sys/dev/ahci/ahci.c index 2a06492..136011c 100644 --- a/sys/dev/ahci/ahci.c +++ b/sys/dev/ahci/ahci.c @@ -119,6 +119,7 @@ static struct { #define AHCI_Q_NOBSYRES 256 #define AHCI_Q_NOAA 512 #define AHCI_Q_NOCOUNT 1024 +#define AHCI_Q_ALTSIG 2048 } ahci_ids[] = { {0x43801002, 0x00, "ATI IXP600", 0}, {0x43901002, 0x00, "ATI IXP700", 0}, @@ -192,8 +193,9 @@ static struct { {0x614511ab, 0x00, "Marvell 88SX6145", AHCI_Q_NOFORCE | AHCI_Q_4CH | AHCI_Q_EDGEIS | AHCI_Q_NONCQ | AHCI_Q_NOCOUNT}, {0x91201b4b, 0x00, "Marvell 88SE912x", AHCI_Q_EDGEIS|AHCI_Q_NOBSYRES}, - {0x91231b4b, 0x11, "Marvell 88SE912x", AHCI_Q_NOBSYRES}, + {0x91231b4b, 0x11, "Marvell 88SE912x", AHCI_Q_NOBSYRES|AHCI_Q_ALTSIG}, {0x91231b4b, 0x00, "Marvell 88SE912x", AHCI_Q_EDGEIS|AHCI_Q_SATA2|AHCI_Q_NOBSYRES}, + {0x91721b4b, 0x00, "Marvell 88SE9172", AHCI_Q_NOBSYRES}, {0x91821b4b, 0x00, "Marvell 88SE9182", AHCI_Q_NOBSYRES}, {0x06201103, 0x00, "HighPoint RocketRAID 620", AHCI_Q_NOBSYRES}, {0x06201b4b, 0x00, "HighPoint RocketRAID 620", AHCI_Q_NOBSYRES}, @@ -398,6 +400,13 @@ ahci_attach(device_t dev) if (ctlr->caps & AHCI_CAP_EMS) ctlr->capsem = ATA_INL(ctlr->r_mem, AHCI_EM_CTL); ctlr->ichannels = ATA_INL(ctlr->r_mem, AHCI_PI); + + /* Identify and set separate quirks for HBA and RAID f/w Marvells. */ + if ((ctlr->quirks & AHCI_Q_NOBSYRES) && + (ctlr->quirks & AHCI_Q_ALTSIG) && + (ctlr->caps & AHCI_CAP_SPM) == 0) + ctlr->quirks &= ~AHCI_Q_NOBSYRES; + if (ctlr->quirks & AHCI_Q_1CH) { ctlr->caps &= ~AHCI_CAP_NPMASK; ctlr->ichannels &= 0x01; @@ -1764,7 +1773,7 @@ ahci_execute_transaction(struct ahci_slot *slot) struct ahci_cmd_list *clp; union ccb *ccb = slot->ccb; int port = ccb->ccb_h.target_id & 0x0f; - int fis_size, i; + int fis_size, i, softreset; uint8_t *fis = ch->dma.rfis + 0x40; uint8_t val; @@ -1791,17 +1800,20 @@ ahci_execute_transaction(struct ahci_slot *slot) if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL)) { if (ccb->ataio.cmd.control & ATA_A_RESET) { + softreset = 1; /* Kick controller into sane state */ ahci_stop(dev); ahci_clo(dev); ahci_start(dev, 0); clp->cmd_flags |= AHCI_CMD_RESET | AHCI_CMD_CLR_BUSY; } else { + softreset = 2; /* Prepare FIS receive area for check. */ for (i = 0; i < 20; i++) fis[i] = 0xff; } - } + } else + softreset = 0; clp->bytecount = 0; clp->cmd_table_phys = htole64(ch->dma.work_bus + AHCI_CT_OFFSET + (AHCI_CT_SIZE * slot->slot)); @@ -1825,8 +1837,7 @@ ahci_execute_transaction(struct ahci_slot *slot) ATA_OUTL(ch->r_mem, AHCI_P_CI, (1 << slot->slot)); /* Device reset commands doesn't interrupt. Poll them. */ if (ccb->ccb_h.func_code == XPT_ATA_IO && - (ccb->ataio.cmd.command == ATA_DEVICE_RESET || - (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL))) { + (ccb->ataio.cmd.command == ATA_DEVICE_RESET || softreset)) { int count, timeout = ccb->ccb_h.timeout * 100; enum ahci_err_type et = AHCI_ERR_NONE; @@ -1834,10 +1845,13 @@ ahci_execute_transaction(struct ahci_slot *slot) DELAY(10); if (!(ATA_INL(ch->r_mem, AHCI_P_CI) & (1 << slot->slot))) break; - if (ATA_INL(ch->r_mem, AHCI_P_TFD) & ATA_S_ERROR) { + if ((ATA_INL(ch->r_mem, AHCI_P_TFD) & ATA_S_ERROR) && + softreset != 1) { +#if 0 device_printf(ch->dev, "Poll error on slot %d, TFD: %04x\n", slot->slot, ATA_INL(ch->r_mem, AHCI_P_TFD)); +#endif et = AHCI_ERR_TFE; break; } @@ -1849,9 +1863,20 @@ ahci_execute_transaction(struct ahci_slot *slot) break; } } + + /* Marvell controllers do not wait for readyness. */ + if ((ch->quirks & AHCI_Q_NOBSYRES) && softreset == 2 && + et == AHCI_ERR_NONE) { + while ((val = fis[2]) & ATA_S_BUSY) { + DELAY(10); + if (count++ >= timeout) + break; + } + } + if (timeout && (count >= timeout)) { - device_printf(ch->dev, - "Poll timeout on slot %d\n", slot->slot); + device_printf(dev, "Poll timeout on slot %d port %d\n", + slot->slot, port); device_printf(dev, "is %08x cs %08x ss %08x " "rs %08x tfd %02x serr %08x\n", ATA_INL(ch->r_mem, AHCI_P_IS), @@ -1861,30 +1886,11 @@ ahci_execute_transaction(struct ahci_slot *slot) ATA_INL(ch->r_mem, AHCI_P_SERR)); et = AHCI_ERR_TIMEOUT; } - /* Marvell controllers do not wait for readyness. */ - if ((ch->quirks & AHCI_Q_NOBSYRES) && - (ccb->ccb_h.func_code == XPT_ATA_IO) && - (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL) && - (ccb->ataio.cmd.control & ATA_A_RESET) == 0) { - while ((val = fis[2]) & (ATA_S_BUSY | ATA_S_DRQ)) { - DELAY(10); - if (count++ >= timeout) { - device_printf(dev, "device is not " - "ready after soft-reset: " - "tfd = %08x\n", val); - et = AHCI_ERR_TIMEOUT; - break; - } - } - } - ahci_end_transaction(slot, et); + /* Kick controller into sane state and enable FBS. */ - if ((ccb->ccb_h.func_code == XPT_ATA_IO) && - (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL) && - (ccb->ataio.cmd.control & ATA_A_RESET) == 0) { - ahci_stop(ch->dev); - ahci_start(ch->dev, 1); - } + if (softreset == 2) + ch->eslots |= (1 << slot->slot); + ahci_end_transaction(slot, et); return; } /* Start command execution timeout */ @@ -1962,7 +1968,8 @@ ahci_timeout(struct ahci_slot *slot) return; } - device_printf(dev, "Timeout on slot %d\n", slot->slot); + device_printf(dev, "Timeout on slot %d port %d\n", + slot->slot, slot->ccb->ccb_h.target_id & 0x0f); device_printf(dev, "is %08x cs %08x ss %08x rs %08x tfd %02x serr %08x\n", ATA_INL(ch->r_mem, AHCI_P_IS), ATA_INL(ch->r_mem, AHCI_P_CI), ATA_INL(ch->r_mem, AHCI_P_SACT), ch->rslots, @@ -2013,6 +2020,7 @@ ahci_end_transaction(struct ahci_slot *slot, enum ahci_err_type et) union ccb *ccb = slot->ccb; struct ahci_cmd_list *clp; int lastto; + uint32_t sig; bus_dmamap_sync(ch->dma.work_tag, ch->dma.work_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); @@ -2050,6 +2058,20 @@ ahci_end_transaction(struct ahci_slot *slot, enum ahci_err_type et) res->lba_high_exp = fis[10]; res->sector_count = fis[12]; res->sector_count_exp = fis[13]; + + /* + * Some weird controllers do not return signature in + * FIS receive area. Read it from PxSIG register. + */ + if ((ch->quirks & AHCI_Q_ALTSIG) && + (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL) && + (ccb->ataio.cmd.control & ATA_A_RESET) == 0) { + sig = ATA_INL(ch->r_mem, AHCI_P_SIG); + res->lba_high = sig >> 24; + res->lba_mid = sig >> 16; + res->lba_low = sig >> 8; + res->sector_count = sig; + } } else bzero(res, sizeof(*res)); if ((ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA) == 0 && @@ -2169,13 +2191,6 @@ ahci_end_transaction(struct ahci_slot *slot, enum ahci_err_type et) ch->numhslots++; } else xpt_done(ccb); - /* Unfreeze frozen command. */ - if (ch->frozen && !ahci_check_collision(dev, ch->frozen)) { - union ccb *fccb = ch->frozen; - ch->frozen = NULL; - ahci_begin_transaction(dev, fccb); - xpt_release_simq(ch->sim, TRUE); - } /* If we have no other active commands, ... */ if (ch->rslots == 0) { /* if there was fatal error - reset port. */ @@ -2185,6 +2200,7 @@ ahci_end_transaction(struct ahci_slot *slot, enum ahci_err_type et) /* if we have slots in error, we can reinit port. */ if (ch->eslots != 0) { ahci_stop(dev); + ahci_clo(dev); ahci_start(dev, 1); } /* if there commands on hold, we can do READ LOG. */ @@ -2195,6 +2211,13 @@ ahci_end_transaction(struct ahci_slot *slot, enum ahci_err_type et) } else if ((ch->rslots & ~ch->toslots) == 0 && et != AHCI_ERR_TIMEOUT) ahci_rearm_timeout(dev); + /* Unfreeze frozen command. */ + if (ch->frozen && !ahci_check_collision(dev, ch->frozen)) { + union ccb *fccb = ch->frozen; + ch->frozen = NULL; + ahci_begin_transaction(dev, fccb); + xpt_release_simq(ch->sim, TRUE); + } /* Start PM timer. */ if (ch->numrslots == 0 && ch->pm_level > 3 && (ch->curr[ch->pm_present ? 15 : 0].caps & CTS_SATA_CAPS_D_PMREQ)) { diff --git a/sys/dev/amdsbwd/amdsbwd.c b/sys/dev/amdsbwd/amdsbwd.c index f5f0f87..4256381 100644 --- a/sys/dev/amdsbwd/amdsbwd.c +++ b/sys/dev/amdsbwd/amdsbwd.c @@ -25,8 +25,8 @@ */ /* - * This is a driver for watchdog timer present in AMD SB600/SB7xx - * south bridges and other watchdog timers advertised via WDRT ACPI table. + * This is a driver for watchdog timer present in AMD SB600/SB7xx/SB8xx + * southbridges. * Please see the following specifications for the descriptions of the * registers and flags: * - AMD SB600 Register Reference Guide, Public Version, Rev. 3.03 (SB600 RRG) @@ -35,11 +35,13 @@ * http://developer.amd.com/assets/43009_sb7xx_rrg_pub_1.00.pdf * - AMD SB700/710/750 Register Programming Requirements (RPR) * http://developer.amd.com/assets/42413_sb7xx_rpr_pub_1.00.pdf + * - AMD SB800-Series Southbridges Register Reference Guide (RRG) + * http://support.amd.com/us/Embedded_TechDocs/45482.pdf * Please see the following for Watchdog Resource Table specification: * - Watchdog Timer Hardware Requirements for Windows Server 2003 (WDRT) * http://www.microsoft.com/whdc/system/sysinternals/watchdog.mspx - * AMD SB600/SB7xx watchdog hardware seems to conform to the above, - * but my system doesn't provide the table. + * AMD SB600/SB7xx/SB8xx watchdog hardware seems to conform to the above + * specifications, but the table hasn't been spotted in the wild yet. */ #include <sys/cdefs.h> @@ -59,15 +61,15 @@ __FBSDID("$FreeBSD$"); #include <dev/pci/pcivar.h> #include <isa/isavar.h> -/* RRG 2.3.3.1.1, page 161. */ +/* SB7xx RRG 2.3.3.1.1. */ #define AMDSB_PMIO_INDEX 0xcd6 #define AMDSB_PMIO_DATA (PMIO_INDEX + 1) #define AMDSB_PMIO_WIDTH 2 -/* RRG 2.3.3.2, page 181. */ +/* SB7xx RRG 2.3.3.2. */ #define AMDSB_PM_RESET_STATUS0 0x44 #define AMDSB_PM_RESET_STATUS1 0x45 #define AMDSB_WD_RST_STS 0x02 -/* RRG 2.3.3.2, page 188; RPR 2.36, page 30. */ +/* SB7xx RRG 2.3.3.2, RPR 2.36. */ #define AMDSB_PM_WDT_CTRL 0x69 #define AMDSB_WDT_DISABLE 0x01 #define AMDSB_WDT_RES_MASK (0x02 | 0x04) @@ -77,7 +79,18 @@ __FBSDID("$FreeBSD$"); #define AMDSB_WDT_RES_1S 0x06 #define AMDSB_PM_WDT_BASE_LSB 0x6c #define AMDSB_PM_WDT_BASE_MSB 0x6f -/* RRG 2.3.4, page 223, WDRT. */ +/* SB8xx RRG 2.3.3. */ +#define AMDSB8_PM_WDT_EN 0x48 +#define AMDSB8_WDT_DEC_EN 0x01 +#define AMDSB8_WDT_DISABLE 0x02 +#define AMDSB8_PM_WDT_CTRL 0x4c +#define AMDSB8_WDT_32KHZ 0x00 +#define AMDSB8_WDT_1HZ 0x03 +#define AMDSB8_WDT_RES_MASK 0x03 +#define AMDSB8_PM_RESET_STATUS0 0xC0 +#define AMDSB8_PM_RESET_STATUS1 0xC1 +#define AMDSB8_WD_RST_STS 0x20 +/* SB7xx RRG 2.3.4, WDRT. */ #define AMDSB_WD_CTRL 0x00 #define AMDSB_WD_RUN 0x01 #define AMDSB_WD_FIRED 0x02 @@ -90,8 +103,9 @@ __FBSDID("$FreeBSD$"); #define AMDSB_WDIO_REG_WIDTH 4 /* WDRT */ #define MAXCOUNT_MIN_VALUE 511 -/* RRG 2.3.1.1, page 122; SB600 RRG 2.3.1.1, page 97. */ -#define AMDSB7xx_SMBUS_DEVID 0x43851002 +/* SB7xx RRG 2.3.1.1, SB600 RRG 2.3.1.1, SB8xx RRG 2.3.1. */ +#define AMDSB_SMBUS_DEVID 0x43851002 +#define AMDSB8_SMBUS_REVID 0x40 #define amdsbwd_verbose_printf(dev, ...) \ do { \ @@ -265,7 +279,7 @@ amdsbwd_identify(driver_t *driver, device_t parent) smb_dev = pci_find_bsf(0, 20, 0); if (smb_dev == NULL) return; - if (pci_get_devid(smb_dev) != AMDSB7xx_SMBUS_DEVID) + if (pci_get_devid(smb_dev) != AMDSB_SMBUS_DEVID) return; child = BUS_ADD_CHILD(parent, ISA_ORDER_SPECULATIVE, "amdsbwd", -1); @@ -273,15 +287,102 @@ amdsbwd_identify(driver_t *driver, device_t parent) device_printf(parent, "add amdsbwd child failed\n"); } + +static void +amdsbwd_probe_sb7xx(device_t dev, struct resource *pmres, uint32_t *addr) +{ + uint32_t val; + int i; + + /* Report cause of previous reset for user's convenience. */ + val = pmio_read(pmres, AMDSB_PM_RESET_STATUS0); + if (val != 0) + amdsbwd_verbose_printf(dev, "ResetStatus0 = %#04x\n", val); + val = pmio_read(pmres, AMDSB_PM_RESET_STATUS1); + if (val != 0) + amdsbwd_verbose_printf(dev, "ResetStatus1 = %#04x\n", val); + if ((val & AMDSB_WD_RST_STS) != 0) + device_printf(dev, "Previous Reset was caused by Watchdog\n"); + + /* Find base address of memory mapped WDT registers. */ + for (*addr = 0, i = 0; i < 4; i++) { + *addr <<= 8; + *addr |= pmio_read(pmres, AMDSB_PM_WDT_BASE_MSB - i); + } + /* Set watchdog timer tick to 1s. */ + val = pmio_read(pmres, AMDSB_PM_WDT_CTRL); + val &= ~AMDSB_WDT_RES_MASK; + val |= AMDSB_WDT_RES_10MS; + pmio_write(pmres, AMDSB_PM_WDT_CTRL, val); + + /* Enable watchdog device (in stopped state). */ + val = pmio_read(pmres, AMDSB_PM_WDT_CTRL); + val &= ~AMDSB_WDT_DISABLE; + pmio_write(pmres, AMDSB_PM_WDT_CTRL, val); + + /* + * XXX TODO: Ensure that watchdog decode is enabled + * (register 0x41, bit 3). + */ + device_set_desc(dev, "AMD SB600/SB7xx Watchdog Timer"); +} + +static void +amdsbwd_probe_sb8xx(device_t dev, struct resource *pmres, uint32_t *addr) +{ + uint32_t val; + int i; + + /* Report cause of previous reset for user's convenience. */ + val = pmio_read(pmres, AMDSB8_PM_RESET_STATUS0); + if (val != 0) + amdsbwd_verbose_printf(dev, "ResetStatus0 = %#04x\n", val); + val = pmio_read(pmres, AMDSB8_PM_RESET_STATUS1); + if (val != 0) + amdsbwd_verbose_printf(dev, "ResetStatus1 = %#04x\n", val); + if ((val & AMDSB8_WD_RST_STS) != 0) + device_printf(dev, "Previous Reset was caused by Watchdog\n"); + + /* Find base address of memory mapped WDT registers. */ + for (*addr = 0, i = 0; i < 4; i++) { + *addr <<= 8; + *addr |= pmio_read(pmres, AMDSB8_PM_WDT_EN + 3 - i); + } + *addr &= ~0x07u; + + /* Set watchdog timer tick to 1s. */ + val = pmio_read(pmres, AMDSB8_PM_WDT_CTRL); + val &= ~AMDSB8_WDT_RES_MASK; + val |= AMDSB8_WDT_1HZ; + pmio_write(pmres, AMDSB8_PM_WDT_CTRL, val); +#ifdef AMDSBWD_DEBUG + val = pmio_read(pmres, AMDSB8_PM_WDT_CTRL); + amdsbwd_verbose_printf(dev, "AMDSB8_PM_WDT_CTRL value = %#02x\n", val); +#endif + + /* + * Enable watchdog device (in stopped state) + * and decoding of its address. + */ + val = pmio_read(pmres, AMDSB8_PM_WDT_EN); + val &= ~AMDSB8_WDT_DISABLE; + val |= AMDSB8_WDT_DEC_EN; + pmio_write(pmres, AMDSB8_PM_WDT_EN, val); +#ifdef AMDSBWD_DEBUG + val = pmio_read(pmres, AMDSB8_PM_WDT_EN); + device_printf(dev, "AMDSB8_PM_WDT_EN value = %#02x\n", val); +#endif + device_set_desc(dev, "AMD SB8xx Watchdog Timer"); +} + static int amdsbwd_probe(device_t dev) { struct resource *res; + device_t smb_dev; uint32_t addr; - uint32_t val; int rid; int rc; - int i; /* Do not claim some ISA PnP device by accident. */ if (isa_get_logicalid(dev) != 0) @@ -301,21 +402,16 @@ amdsbwd_probe(device_t dev) return (ENXIO); } - /* Report cause of previous reset for user's convenience. */ - val = pmio_read(res, AMDSB_PM_RESET_STATUS0); - if (val != 0) - amdsbwd_verbose_printf(dev, "ResetStatus0 = %#04x\n", val); - val = pmio_read(res, AMDSB_PM_RESET_STATUS1); - if (val != 0) - amdsbwd_verbose_printf(dev, "ResetStatus1 = %#04x\n", val); - if ((val & AMDSB_WD_RST_STS) != 0) - device_printf(dev, "Previous Reset was caused by Watchdog\n"); + smb_dev = pci_find_bsf(0, 20, 0); + KASSERT(smb_dev != NULL, ("can't find SMBus PCI device\n")); + if (pci_get_revid(smb_dev) < AMDSB8_SMBUS_REVID) + amdsbwd_probe_sb7xx(dev, res, &addr); + else + amdsbwd_probe_sb8xx(dev, res, &addr); + + bus_release_resource(dev, SYS_RES_IOPORT, rid, res); + bus_delete_resource(dev, SYS_RES_IOPORT, rid); - /* Find base address of memory mapped WDT registers. */ - for (addr = 0, i = 0; i < 4; i++) { - addr <<= 8; - addr |= pmio_read(res, AMDSB_PM_WDT_BASE_MSB - i); - } amdsbwd_verbose_printf(dev, "memory base address = %#010x\n", addr); rc = bus_set_resource(dev, SYS_RES_MEMORY, 0, addr + AMDSB_WD_CTRL, AMDSB_WDIO_REG_WIDTH); @@ -330,36 +426,25 @@ amdsbwd_probe(device_t dev) return (ENXIO); } - /* Set watchdog timer tick to 10ms. */ - val = pmio_read(res, AMDSB_PM_WDT_CTRL); - val &= ~AMDSB_WDT_RES_MASK; - val |= AMDSB_WDT_RES_10MS; - pmio_write(res, AMDSB_PM_WDT_CTRL, val); - - /* Enable watchdog device (in stopped state). */ - val = pmio_read(res, AMDSB_PM_WDT_CTRL); - val &= ~AMDSB_WDT_DISABLE; - pmio_write(res, AMDSB_PM_WDT_CTRL, val); - - /* - * XXX TODO: Ensure that watchdog decode is enabled - * (register 0x41, bit 3). - */ - bus_release_resource(dev, SYS_RES_IOPORT, rid, res); - bus_delete_resource(dev, SYS_RES_IOPORT, rid); - - device_set_desc(dev, "AMD SB600/SB7xx Watchdog Timer"); return (0); } static int amdsbwd_attach_sb(device_t dev, struct amdsbwd_softc *sc) { + device_t smb_dev; + sc->max_ticks = UINT16_MAX; - sc->ms_per_tick = 10; sc->rid_ctrl = 0; sc->rid_count = 1; + smb_dev = pci_find_bsf(0, 20, 0); + KASSERT(smb_dev != NULL, ("can't find SMBus PCI device\n")); + if (pci_get_revid(smb_dev) < AMDSB8_SMBUS_REVID) + sc->ms_per_tick = 10; + else + sc->ms_per_tick = 1000; + sc->res_ctrl = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->rid_ctrl, RF_ACTIVE); if (sc->res_ctrl == NULL) { @@ -388,6 +473,11 @@ amdsbwd_attach(device_t dev) if (rc != 0) goto fail; +#ifdef AMDSBWD_DEBUG + device_printf(dev, "wd ctrl = %#04x\n", wdctrl_read(sc)); + device_printf(dev, "wd count = %#04x\n", wdcount_read(sc)); +#endif + /* Setup initial state of Watchdog Control. */ wdctrl_write(sc, AMDSB_WD_FIRED); diff --git a/sys/dev/ata/ata-sata.c b/sys/dev/ata/ata-sata.c index e95fc8f..1ddf238 100644 --- a/sys/dev/ata/ata-sata.c +++ b/sys/dev/ata/ata-sata.c @@ -54,6 +54,11 @@ ata_sata_phy_check_events(device_t dev, int port) u_int32_t error, status; ata_sata_scr_read(ch, port, ATA_SERROR, &error); + + /* Check that SError value is sane. */ + if (error == 0xffffffff) + return; + /* Clear set error bits/interrupt. */ if (error) ata_sata_scr_write(ch, port, ATA_SERROR, error); @@ -163,18 +168,18 @@ ata_sata_phy_reset(device_t dev, int port, int quick) if (bootverbose) { if (port < 0) { - device_printf(dev, "hardware reset ...\n"); + device_printf(dev, "hard reset ...\n"); } else { - device_printf(dev, "p%d: hardware reset ...\n", port); + device_printf(dev, "p%d: hard reset ...\n", port); } } for (retry = 0; retry < 10; retry++) { for (loop = 0; loop < 10; loop++) { if (ata_sata_scr_write(ch, port, ATA_SCONTROL, ATA_SC_DET_RESET)) - return (0); + goto fail; ata_udelay(100); if (ata_sata_scr_read(ch, port, ATA_SCONTROL, &val)) - return (0); + goto fail; if ((val & ATA_SC_DET_MASK) == ATA_SC_DET_RESET) break; } @@ -183,15 +188,26 @@ ata_sata_phy_reset(device_t dev, int port, int quick) if (ata_sata_scr_write(ch, port, ATA_SCONTROL, ATA_SC_DET_IDLE | ((ch->pm_level > 0) ? 0 : ATA_SC_IPM_DIS_PARTIAL | ATA_SC_IPM_DIS_SLUMBER))) - return (0); + goto fail; ata_udelay(100); if (ata_sata_scr_read(ch, port, ATA_SCONTROL, &val)) - return (0); + goto fail; if ((val & ATA_SC_DET_MASK) == 0) return ata_sata_connect(ch, port, 0); } } - return 0; +fail: + /* Clear SATA error register. */ + ata_sata_scr_write(ch, port, ATA_SERROR, 0xffffffff); + + if (bootverbose) { + if (port < 0) { + device_printf(dev, "hard reset failed\n"); + } else { + device_printf(dev, "p%d: hard reset failed\n", port); + } + } + return (0); } int diff --git a/sys/dev/ata/chipsets/ata-intel.c b/sys/dev/ata/chipsets/ata-intel.c index 3b514db..e128051 100644 --- a/sys/dev/ata/chipsets/ata-intel.c +++ b/sys/dev/ata/chipsets/ata-intel.c @@ -288,7 +288,9 @@ ata_intel_chipinit(device_t dev) ATA_OUTL(ctlr->r_res2, 0x0C, ATA_INL(ctlr->r_res2, 0x0C) | 0xf); } - } else { + /* Skip BAR(5) on ICH8M Apples, system locks up on access. */ + } else if (ctlr->chip->chipid != ATA_I82801HBM_S1 || + pci_get_subvendor(dev) != 0x106b) { ctlr->r_type2 = SYS_RES_IOPORT; ctlr->r_rid2 = PCIR_BAR(5); ctlr->r_res2 = bus_alloc_resource_any(dev, ctlr->r_type2, diff --git a/sys/dev/ath/ath_dfs/null/dfs_null.c b/sys/dev/ath/ath_dfs/null/dfs_null.c new file mode 100644 index 0000000..2f050a4 --- /dev/null +++ b/sys/dev/ath/ath_dfs/null/dfs_null.c @@ -0,0 +1,160 @@ +/*- + * Copyright (c) 2011 Adrian Chadd, Xenion Pty Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any + * redistribution must be conditioned upon including a substantially + * similar Disclaimer requirement for further binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGES. + * + * $FreeBSD$ + */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +/* + * This implements an empty DFS module. + */ +#include "opt_inet.h" +#include "opt_wlan.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/sysctl.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/errno.h> + +#include <machine/bus.h> +#include <machine/resource.h> +#include <sys/bus.h> + +#include <sys/socket.h> + +#include <net/if.h> +#include <net/if_media.h> +#include <net/if_arp.h> +#include <net/ethernet.h> /* XXX for ether_sprintf */ + +#include <net80211/ieee80211_var.h> + +#include <net/bpf.h> + +#ifdef INET +#include <netinet/in.h> +#include <netinet/if_ether.h> +#endif + +#include <dev/ath/if_athvar.h> +#include <dev/ath/if_athdfs.h> + +#include <dev/ath/ath_hal/ah_desc.h> + +/* + * Methods which are required + */ + +/* + * Attach DFS to the given interface + */ +int +ath_dfs_attach(struct ath_softc *sc) +{ + return 1; +} + +/* + * Detach DFS from the given interface + */ +int +ath_dfs_detach(struct ath_softc *sc) +{ + return 1; +} + +/* + * Enable radar check + */ +void +ath_dfs_radar_enable(struct ath_softc *sc, struct ieee80211_channel *chan) +{ + /* Check if the current channel is radar-enabled */ + if (! IEEE80211_IS_CHAN_DFS(chan)) + return; +} + +/* + * Process DFS related PHY errors + */ +void +ath_dfs_process_phy_err(struct ath_softc *sc, const char *buf, + uint64_t tsf, struct ath_rx_status *rxstat) +{ + +} + +/* + * Process the radar events and determine whether a DFS event has occured. + * + * This is designed to run outside of the RX processing path. + * The RX path will call ath_dfs_tasklet_needed() to see whether + * the task/callback running this routine needs to be called. + */ +int +ath_dfs_process_radar_event(struct ath_softc *sc, + struct ieee80211_channel *chan) +{ + return 0; +} + +/* + * Determine whether the the DFS check task needs to be queued. + * + * This is called in the RX task when the current batch of packets + * have been received. It will return whether there are any radar + * events for ath_dfs_process_radar_event() to handle. + */ +int +ath_dfs_tasklet_needed(struct ath_softc *sc, struct ieee80211_channel *chan) +{ + return 0; +} + +/* + * Handle ioctl requests from the diagnostic interface + */ +int +ath_ioctl_phyerr(struct ath_softc *sc, struct ath_diag *ad) +{ + return 1; +} + +/* + * Get the current DFS thresholds from the HAL + */ +int +ath_dfs_get_thresholds(struct ath_softc *sc, HAL_PHYERR_PARAM *param) +{ + ath_hal_getdfsthresh(sc->sc_ah, param); + return 1; +} diff --git a/sys/dev/ath/ath_hal/ah.c b/sys/dev/ath/ath_hal/ah.c index 13a59f1..647f322 100644 --- a/sys/dev/ath/ath_hal/ah.c +++ b/sys/dev/ath/ath_hal/ah.c @@ -117,6 +117,8 @@ ath_hal_mac_name(struct ath_hal *ah) return "9280"; case AR_XSREV_VERSION_KITE: return "9285"; + case AR_XSREV_VERSION_KIWI: + return "9287"; } return "????"; } @@ -608,6 +610,10 @@ ath_hal_getcapability(struct ath_hal *ah, HAL_CAPABILITY_TYPE type, return HAL_OK; case HAL_CAP_4ADDR_AGGR: return pCap->hal4AddrAggrSupport ? HAL_OK : HAL_ENOTSUPP; + case HAL_CAP_EXT_CHAN_DFS: + return pCap->halExtChanDfsSupport ? HAL_OK : HAL_ENOTSUPP; + case HAL_CAP_COMBINED_RADAR_RSSI: + return pCap->halUseCombinedRadarRssi ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_AUTO_SLEEP: return pCap->halAutoSleepSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_MBSSID_AGGR_SUPPORT: @@ -624,6 +630,8 @@ ath_hal_getcapability(struct ath_hal *ah, HAL_CAPABILITY_TYPE type, case HAL_CAP_RXTSTAMP_PREC: /* rx desc tstamp precision (bits) */ *result = pCap->halTstampPrecision; return HAL_OK; + case HAL_CAP_ENHANCED_DFS_SUPPORT: + return pCap->halEnhancedDfsSupport ? HAL_OK : HAL_ENOTSUPP; /* FreeBSD-specific entries for now */ case HAL_CAP_RXORN_FATAL: /* HAL_INT_RXORN treated as fatal */ diff --git a/sys/dev/ath/ath_hal/ah.h b/sys/dev/ath/ath_hal/ah.h index 85790e1..7a01be3 100644 --- a/sys/dev/ath/ath_hal/ah.h +++ b/sys/dev/ath/ath_hal/ah.h @@ -121,6 +121,9 @@ typedef enum { HAL_CAP_RTS_AGGR_LIMIT = 42, /* aggregation limit with RTS */ HAL_CAP_4ADDR_AGGR = 43, /* hardware is capable of 4addr aggregation */ + HAL_CAP_DFS_DMN = 44, /* current DFS domain */ + HAL_CAP_EXT_CHAN_DFS = 45, /* DFS support for extension channel */ + HAL_CAP_COMBINED_RADAR_RSSI = 46, /* Is combined RSSI for radar accurate */ HAL_CAP_AUTO_SLEEP = 48, /* hardware can go to network sleep automatically after waking up to receive TIM */ @@ -133,6 +136,7 @@ typedef enum { HAL_CAP_HT20_SGI = 96, /* hardware supports HT20 short GI */ HAL_CAP_RXTSTAMP_PREC = 100, /* rx desc tstamp precision (bits) */ + HAL_CAP_ENHANCED_DFS_SUPPORT = 117, /* hardware supports enhanced DFS */ /* The following are private to the FreeBSD HAL (224 onward) */ @@ -669,6 +673,90 @@ typedef struct { } HAL_CHANNEL_SURVEY; /* + * ANI commands. + * + * These are used both internally and externally via the diagnostic + * API. + * + * Note that this is NOT the ANI commands being used via the INTMIT + * capability - that has a different mapping for some reason. + */ +typedef enum { + HAL_ANI_PRESENT = 0, /* is ANI support present */ + HAL_ANI_NOISE_IMMUNITY_LEVEL = 1, /* set level */ + HAL_ANI_OFDM_WEAK_SIGNAL_DETECTION = 2, /* enable/disable */ + HAL_ANI_CCK_WEAK_SIGNAL_THR = 3, /* enable/disable */ + HAL_ANI_FIRSTEP_LEVEL = 4, /* set level */ + HAL_ANI_SPUR_IMMUNITY_LEVEL = 5, /* set level */ + HAL_ANI_MODE = 6, /* 0 => manual, 1 => auto (XXX do not change) */ + HAL_ANI_PHYERR_RESET = 7, /* reset phy error stats */ +} HAL_ANI_CMD; + +/* + * This is the layout of the ANI INTMIT capability. + * + * Notice that the command values differ to HAL_ANI_CMD. + */ +typedef enum { + HAL_CAP_INTMIT_PRESENT = 0, + HAL_CAP_INTMIT_ENABLE = 1, + HAL_CAP_INTMIT_NOISE_IMMUNITY_LEVEL = 2, + HAL_CAP_INTMIT_OFDM_WEAK_SIGNAL_LEVEL = 3, + HAL_CAP_INTMIT_CCK_WEAK_SIGNAL_THR = 4, + HAL_CAP_INTMIT_FIRSTEP_LEVEL = 5, + HAL_CAP_INTMIT_SPUR_IMMUNITY_LEVEL = 6 +} HAL_CAP_INTMIT_CMD; + +typedef struct { + int32_t pe_firpwr; /* FIR pwr out threshold */ + int32_t pe_rrssi; /* Radar rssi thresh */ + int32_t pe_height; /* Pulse height thresh */ + int32_t pe_prssi; /* Pulse rssi thresh */ + int32_t pe_inband; /* Inband thresh */ + + /* The following params are only for AR5413 and later */ + u_int32_t pe_relpwr; /* Relative power threshold in 0.5dB steps */ + u_int32_t pe_relstep; /* Pulse Relative step threshold in 0.5dB steps */ + u_int32_t pe_maxlen; /* Max length of radar sign in 0.8us units */ + HAL_BOOL pe_usefir128; /* Use the average in-band power measured over 128 cycles */ + HAL_BOOL pe_blockradar; /* + * Enable to block radar check if pkt detect is done via OFDM + * weak signal detect or pkt is detected immediately after tx + * to rx transition + */ + HAL_BOOL pe_enmaxrssi; /* + * Enable to use the max rssi instead of the last rssi during + * fine gain changes for radar detection + */ + HAL_BOOL pe_extchannel; /* Enable DFS on ext channel */ +} HAL_PHYERR_PARAM; + +#define HAL_PHYERR_PARAM_NOVAL 65535 +#define HAL_PHYERR_PARAM_ENABLE 0x8000 /* Enable/Disable if applicable */ + + +/* + * Flag for setting QUIET period + */ +typedef enum { + HAL_QUIET_DISABLE = 0x0, + HAL_QUIET_ENABLE = 0x1, + HAL_QUIET_ADD_CURRENT_TSF = 0x2, /* add current TSF to next_start offset */ + HAL_QUIET_ADD_SWBA_RESP_TIME = 0x4, /* add beacon response time to next_start offset */ +} HAL_QUIET_FLAG; + +#define HAL_DFS_EVENT_PRICH 0x0000001 + +struct dfs_event { + uint64_t re_full_ts; /* 64-bit full timestamp from interrupt time */ + uint32_t re_ts; /* Original 15 bit recv timestamp */ + uint8_t re_rssi; /* rssi of radar event */ + uint8_t re_dur; /* duration of radar pulse */ + uint32_t re_flags; /* Flags (see above) */ +}; +typedef struct dfs_event HAL_DFS_EVENT; + +/* * Hardware Access Layer (HAL) API. * * Clients of the HAL call ath_hal_attach to obtain a reference to an @@ -842,6 +930,18 @@ struct ath_hal { u_int __ahdecl(*ah_getCTSTimeout)(struct ath_hal*); HAL_BOOL __ahdecl(*ah_setDecompMask)(struct ath_hal*, uint16_t, int); void __ahdecl(*ah_setCoverageClass)(struct ath_hal*, uint8_t, int); + HAL_STATUS __ahdecl(*ah_setQuiet)(struct ath_hal *ah, uint32_t period, + uint32_t duration, uint32_t nextStart, + HAL_QUIET_FLAG flag); + + /* DFS functions */ + void __ahdecl(*ah_enableDfs)(struct ath_hal *ah, + HAL_PHYERR_PARAM *pe); + void __ahdecl(*ah_getDfsThresh)(struct ath_hal *ah, + HAL_PHYERR_PARAM *pe); + HAL_BOOL __ahdecl(*ah_procRadarEvent)(struct ath_hal *ah, + struct ath_rx_status *rxs, uint64_t fulltsf, + const char *buf, HAL_DFS_EVENT *event); /* Key Cache Functions */ uint32_t __ahdecl(*ah_getKeyCacheSize)(struct ath_hal*); diff --git a/sys/dev/ath/ath_hal/ah_desc.h b/sys/dev/ath/ath_hal/ah_desc.h index ff6f40c..bd3e6a8 100644 --- a/sys/dev/ath/ath_hal/ah_desc.h +++ b/sys/dev/ath/ath_hal/ah_desc.h @@ -144,7 +144,7 @@ enum { HAL_PHYERR_RADAR = 5, /* Radar detect */ HAL_PHYERR_SERVICE = 6, /* Illegal service */ HAL_PHYERR_TOR = 7, /* Transmit override receive */ - /* NB: these are specific to the 5212 */ + /* NB: these are specific to the 5212 and later */ HAL_PHYERR_OFDM_TIMING = 17, /* */ HAL_PHYERR_OFDM_SIGNAL_PARITY = 18, /* */ HAL_PHYERR_OFDM_RATE_ILLEGAL = 19, /* */ @@ -152,6 +152,7 @@ enum { HAL_PHYERR_OFDM_POWER_DROP = 21, /* */ HAL_PHYERR_OFDM_SERVICE = 22, /* */ HAL_PHYERR_OFDM_RESTART = 23, /* */ + HAL_PHYERR_FALSE_RADAR_EXT = 24, /* */ HAL_PHYERR_CCK_TIMING = 25, /* */ HAL_PHYERR_CCK_HEADER_CRC = 26, /* */ HAL_PHYERR_CCK_RATE_ILLEGAL = 27, /* */ diff --git a/sys/dev/ath/ath_hal/ah_devid.h b/sys/dev/ath/ath_hal/ah_devid.h index 64033f3..c7a98dd 100644 --- a/sys/dev/ath/ath_hal/ah_devid.h +++ b/sys/dev/ath/ath_hal/ah_devid.h @@ -80,6 +80,8 @@ #define AR9280_DEVID_PCIE 0x002a /* AR9280 PCI-E Merlin */ #define AR9285_DEVID_PCIE 0x002b /* AR9285 PCI-E Kite */ #define AR2427_DEVID_PCIE 0x002c /* AR2427 PCI-E w/ 802.11n bonded out */ +#define AR9287_DEVID_PCI 0x002d /* AR9227 PCI Kiwi */ +#define AR9287_DEVID_PCIE 0x002e /* AR9287 PCI-E Kiwi */ #define AR_SUBVENDOR_ID_NOG 0x0e11 /* No 11G subvendor ID */ #define AR_SUBVENDOR_ID_NEW_A 0x7065 /* Update device to new RD */ diff --git a/sys/dev/ath/ath_hal/ah_eeprom.h b/sys/dev/ath/ath_hal/ah_eeprom.h index c7fe385..2ca0589 100644 --- a/sys/dev/ath/ath_hal/ah_eeprom.h +++ b/sys/dev/ath/ath_hal/ah_eeprom.h @@ -101,7 +101,9 @@ enum { AR_EEP_ANTGAINMAX_2, /* int8_t* */ AR_EEP_WRITEPROTECT, /* use ath_hal_eepromGetFlag */ AR_EEP_PWR_TABLE_OFFSET,/* int8_t* */ - AR_EEP_PWDCLKIND /* uint8_t* */ + AR_EEP_PWDCLKIND, /* uint8_t* */ + AR_EEP_TEMPSENSE_SLOPE, /* int8_t* */ + AR_EEP_TEMPSENSE_SLOPE_PAL_ON, /* int8_t* */ }; typedef struct { diff --git a/sys/dev/ath/ath_hal/ah_eeprom_9287.c b/sys/dev/ath/ath_hal/ah_eeprom_9287.c index e8c5e54..4055093 100644 --- a/sys/dev/ath/ath_hal/ah_eeprom_9287.c +++ b/sys/dev/ath/ath_hal/ah_eeprom_9287.c @@ -63,28 +63,10 @@ v9287EepromGet(struct ath_hal *ah, int param, void *val) return pBase->opCapFlags; case AR_EEP_RFSILENT: return pBase->rfSilent; -#if 0 - case AR_EEP_OB_5: - return pModal[CHAN_A_IDX].ob; - case AR_EEP_DB_5: - return pModal[CHAN_A_IDX].db; - case AR_EEP_OB_2: - return pModal[CHAN_B_IDX].ob; - case AR_EEP_DB_2: - return pModal[CHAN_B_IDX].db; -#endif case AR_EEP_TXMASK: return pBase->txMask; case AR_EEP_RXMASK: return pBase->rxMask; -#if 0 - case AR_EEP_RXGAIN_TYPE: - return IS_VERS(>=, AR5416_EEP_MINOR_VER_17) ? - pBase->rxGainType : AR5416_EEP_RXGAIN_ORIG; - case AR_EEP_TXGAIN_TYPE: - return IS_VERS(>=, AR5416_EEP_MINOR_VER_19) ? - pBase->txGainType : AR5416_EEP_TXGAIN_ORIG; -#endif case AR_EEP_OL_PWRCTRL: HALASSERT(val == AH_NULL); return pBase->openLoopPwrCntl ? HAL_OK : HAL_EIO; @@ -117,6 +99,18 @@ v9287EepromGet(struct ath_hal *ah, int param, void *val) case AR_EEP_PWR_TABLE_OFFSET: *(int8_t *) val = pBase->pwrTableOffset; return HAL_OK; + case AR_EEP_TEMPSENSE_SLOPE: + if (IS_VERS(>=, AR9287_EEP_MINOR_VER_2)) + *(int8_t *)val = pBase->tempSensSlope; + else + *(int8_t *)val = 0; + return HAL_OK; + case AR_EEP_TEMPSENSE_SLOPE_PAL_ON: + if (IS_VERS(>=, AR9287_EEP_MINOR_VER_3)) + *(int8_t *)val = pBase->tempSensSlopePalOn; + else + *(int8_t *)val = 0; + return HAL_OK; default: HALASSERT(0); return HAL_EINVAL; @@ -132,14 +126,12 @@ v9287EepromSet(struct ath_hal *ah, int param, int v) HAL_EEPROM_9287 *ee = AH_PRIVATE(ah)->ah_eeprom; switch (param) { - case AR_EEP_ANTGAINMAX_2: - ee->ee_antennaGainMax[1] = (int8_t) v; - return HAL_OK; - case AR_EEP_ANTGAINMAX_5: - ee->ee_antennaGainMax[0] = (int8_t) v; - return HAL_OK; + case AR_EEP_ANTGAINMAX_2: + ee->ee_antennaGainMax[1] = (int8_t) v; + return HAL_OK; + default: + return HAL_EINVAL; } - return HAL_EINVAL; } static HAL_BOOL diff --git a/sys/dev/ath/ath_hal/ah_internal.h b/sys/dev/ath/ath_hal/ah_internal.h index b4cc817..d66c9d8 100644 --- a/sys/dev/ath/ath_hal/ah_internal.h +++ b/sys/dev/ath/ath_hal/ah_internal.h @@ -200,8 +200,10 @@ typedef struct { halRifsTxSupport : 1, hal4AddrAggrSupport : 1, halExtChanDfsSupport : 1, + halUseCombinedRadarRssi : 1, halForcePpmSupport : 1, halEnhancedPmSupport : 1, + halEnhancedDfsSupport : 1, halMbssidAggrSupport : 1, halBssidMatchSupport : 1, hal4kbSplitTransSupport : 1, @@ -418,18 +420,6 @@ extern HAL_BOOL ath_hal_setTxQProps(struct ath_hal *ah, extern HAL_BOOL ath_hal_getTxQProps(struct ath_hal *ah, HAL_TXQ_INFO *qInfo, const HAL_TX_QUEUE_INFO *qi); -typedef enum { - HAL_ANI_PRESENT = 0x1, /* is ANI support present */ - HAL_ANI_NOISE_IMMUNITY_LEVEL = 0x2, /* set level */ - HAL_ANI_OFDM_WEAK_SIGNAL_DETECTION = 0x4, /* enable/disable */ - HAL_ANI_CCK_WEAK_SIGNAL_THR = 0x8, /* enable/disable */ - HAL_ANI_FIRSTEP_LEVEL = 0x10, /* set level */ - HAL_ANI_SPUR_IMMUNITY_LEVEL = 0x20, /* set level */ - HAL_ANI_MODE = 0x40, /* 0 => manual, 1 => auto (XXX do not change) */ - HAL_ANI_PHYERR_RESET =0x80, /* reset phy error stats */ - HAL_ANI_ALL = 0xff -} HAL_ANI_CMD; - #define HAL_SPUR_VAL_MASK 0x3FFF #define HAL_SPUR_CHAN_WIDTH 87 #define HAL_BIN_WIDTH_BASE_100HZ 3125 diff --git a/sys/dev/ath/ath_hal/ar5212/ar5212.h b/sys/dev/ath/ath_hal/ar5212/ar5212.h index e226816..8503a62 100644 --- a/sys/dev/ath/ath_hal/ar5212/ar5212.h +++ b/sys/dev/ath/ath_hal/ar5212/ar5212.h @@ -320,6 +320,9 @@ struct ath_hal_5212 { struct ar5212AniState *ah_curani; /* cached last reference */ struct ar5212AniState ah_ani[AH_MAXCHAN]; /* per-channel state */ + /* AR5416 uses some of the AR5212 ANI code; these are the ANI methods */ + HAL_BOOL (*ah_aniControl) (struct ath_hal *, HAL_ANI_CMD cmd, int param); + /* * Transmit power state. Note these are maintained * here so they can be retrieved by diagnostic tools. @@ -503,6 +506,8 @@ extern HAL_BOOL ar5212SetCapability(struct ath_hal *, HAL_CAPABILITY_TYPE, extern HAL_BOOL ar5212GetDiagState(struct ath_hal *ah, int request, const void *args, uint32_t argsize, void **result, uint32_t *resultsize); +extern HAL_STATUS ar5212SetQuiet(struct ath_hal *ah, uint32_t period, + uint32_t duration, uint32_t nextStart, HAL_QUIET_FLAG flag); extern HAL_BOOL ar5212SetPowerMode(struct ath_hal *ah, HAL_POWER_MODE mode, int setChip); @@ -615,5 +620,10 @@ extern void ar5212AniReset(struct ath_hal *, const struct ieee80211_channel *, extern HAL_BOOL ar5212IsNFCalInProgress(struct ath_hal *ah); extern HAL_BOOL ar5212WaitNFCalComplete(struct ath_hal *ah, int i); +extern void ar5212EnableDfs(struct ath_hal *ah, HAL_PHYERR_PARAM *pe); +extern void ar5212GetDfsThresh(struct ath_hal *ah, HAL_PHYERR_PARAM *pe); +extern HAL_BOOL ar5212ProcessRadarEvent(struct ath_hal *ah, + struct ath_rx_status *rxs, uint64_t fulltsf, const char *buf, + HAL_DFS_EVENT *event); #endif /* _ATH_AR5212_H_ */ diff --git a/sys/dev/ath/ath_hal/ar5212/ar5212_attach.c b/sys/dev/ath/ath_hal/ar5212/ar5212_attach.c index 4b0fcbe..8e7f3cb 100644 --- a/sys/dev/ath/ath_hal/ar5212/ar5212_attach.c +++ b/sys/dev/ath/ath_hal/ar5212/ar5212_attach.c @@ -127,6 +127,12 @@ static const struct ath_hal_private ar5212hal = {{ .ah_getCTSTimeout = ar5212GetCTSTimeout, .ah_setDecompMask = ar5212SetDecompMask, .ah_setCoverageClass = ar5212SetCoverageClass, + .ah_setQuiet = ar5212SetQuiet, + + /* DFS Functions */ + .ah_enableDfs = ar5212EnableDfs, + .ah_getDfsThresh = ar5212GetDfsThresh, + .ah_procRadarEvent = ar5212ProcessRadarEvent, /* Key Cache Functions */ .ah_getKeyCacheSize = ar5212GetKeyCacheSize, @@ -203,6 +209,9 @@ ar5212AniSetup(struct ath_hal *ah) ar5212AniAttach(ah, &tmp, &tmp, AH_TRUE); } else ar5212AniAttach(ah, &aniparams, &aniparams, AH_TRUE); + + /* Set overridable ANI methods */ + AH5212(ah)->ah_aniControl = ar5212AniControl; } /* diff --git a/sys/dev/ath/ath_hal/ar5212/ar5212_misc.c b/sys/dev/ath/ath_hal/ar5212/ar5212_misc.c index 0d6adc1..3a6019d 100644 --- a/sys/dev/ath/ath_hal/ar5212/ar5212_misc.c +++ b/sys/dev/ath/ath_hal/ar5212/ar5212_misc.c @@ -21,9 +21,7 @@ #include "ah.h" #include "ah_internal.h" #include "ah_devid.h" -#ifdef AH_DEBUG #include "ah_desc.h" /* NB: for HAL_PHYERR* */ -#endif #include "ar5212/ar5212.h" #include "ar5212/ar5212reg.h" @@ -634,6 +632,20 @@ ar5212SetCoverageClass(struct ath_hal *ah, uint8_t coverageclass, int now) } } +HAL_STATUS +ar5212SetQuiet(struct ath_hal *ah, uint32_t period, uint32_t duration, + uint32_t nextStart, HAL_QUIET_FLAG flag) +{ + OS_REG_WRITE(ah, AR_QUIET2, period | (duration << AR_QUIET2_QUIET_DUR_S)); + if (flag & HAL_QUIET_ENABLE) { + OS_REG_WRITE(ah, AR_QUIET1, nextStart | (1 << 16)); + } + else { + OS_REG_WRITE(ah, AR_QUIET1, nextStart); + } + return HAL_OK; +} + void ar5212SetPCUConfig(struct ath_hal *ah) { @@ -880,16 +892,16 @@ ar5212GetCapability(struct ath_hal *ah, HAL_CAPABILITY_TYPE type, return HAL_OK; case HAL_CAP_INTMIT: /* interference mitigation */ switch (capability) { - case 0: /* hardware capability */ + case HAL_CAP_INTMIT_PRESENT: /* hardware capability */ return HAL_OK; - case 1: + case HAL_CAP_INTMIT_ENABLE: return (ahp->ah_procPhyErr & HAL_ANI_ENA) ? HAL_OK : HAL_ENXIO; - case 2: /* HAL_ANI_NOISE_IMMUNITY_LEVEL */ - case 3: /* HAL_ANI_OFDM_WEAK_SIGNAL_DETECTION */ - case 4: /* HAL_ANI_CCK_WEAK_SIGNAL_THR */ - case 5: /* HAL_ANI_FIRSTEP_LEVEL */ - case 6: /* HAL_ANI_SPUR_IMMUNITY_LEVEL */ + case HAL_CAP_INTMIT_NOISE_IMMUNITY_LEVEL: + case HAL_CAP_INTMIT_OFDM_WEAK_SIGNAL_LEVEL: + case HAL_CAP_INTMIT_CCK_WEAK_SIGNAL_THR: + case HAL_CAP_INTMIT_FIRSTEP_LEVEL: + case HAL_CAP_INTMIT_SPUR_IMMUNITY_LEVEL: ani = ar5212AniGetCurrentState(ah); if (ani == AH_NULL) return HAL_ENXIO; @@ -980,6 +992,8 @@ ar5212SetCapability(struct ath_hal *ah, HAL_CAPABILITY_TYPE type, OS_REG_WRITE(ah, AR_TPC, ahp->ah_macTPC); return AH_TRUE; case HAL_CAP_INTMIT: { /* interference mitigation */ + /* This maps the public ANI commands to the internal ANI commands */ + /* Private: HAL_ANI_CMD; Public: HAL_CAP_INTMIT_CMD */ static const HAL_ANI_CMD cmds[] = { HAL_ANI_PRESENT, HAL_ANI_MODE, @@ -990,7 +1004,7 @@ ar5212SetCapability(struct ath_hal *ah, HAL_CAPABILITY_TYPE type, HAL_ANI_SPUR_IMMUNITY_LEVEL, }; return capability < N(cmds) ? - ar5212AniControl(ah, cmds[capability], setting) : + AH5212(ah)->ah_aniControl(ah, cmds[capability], setting) : AH_FALSE; } case HAL_CAP_TSF_ADJUST: /* hardware has beacon tsf adjust */ @@ -1053,7 +1067,7 @@ ar5212GetDiagState(struct ath_hal *ah, int request, case HAL_DIAG_ANI_CMD: if (argsize != 2*sizeof(uint32_t)) return AH_FALSE; - ar5212AniControl(ah, ((const uint32_t *)args)[0], + AH5212(ah)->ah_aniControl(ah, ((const uint32_t *)args)[0], ((const uint32_t *)args)[1]); return AH_TRUE; case HAL_DIAG_ANI_PARAMS: @@ -1113,3 +1127,98 @@ ar5212WaitNFCalComplete(struct ath_hal *ah, int i) } return AH_FALSE; } + +void +ar5212EnableDfs(struct ath_hal *ah, HAL_PHYERR_PARAM *pe) +{ + uint32_t val; + val = OS_REG_READ(ah, AR_PHY_RADAR_0); + + if (pe->pe_firpwr != HAL_PHYERR_PARAM_NOVAL) { + val &= ~AR_PHY_RADAR_0_FIRPWR; + val |= SM(pe->pe_firpwr, AR_PHY_RADAR_0_FIRPWR); + } + if (pe->pe_rrssi != HAL_PHYERR_PARAM_NOVAL) { + val &= ~AR_PHY_RADAR_0_RRSSI; + val |= SM(pe->pe_rrssi, AR_PHY_RADAR_0_RRSSI); + } + if (pe->pe_height != HAL_PHYERR_PARAM_NOVAL) { + val &= ~AR_PHY_RADAR_0_HEIGHT; + val |= SM(pe->pe_height, AR_PHY_RADAR_0_HEIGHT); + } + if (pe->pe_prssi != HAL_PHYERR_PARAM_NOVAL) { + val &= ~AR_PHY_RADAR_0_PRSSI; + val |= SM(pe->pe_prssi, AR_PHY_RADAR_0_PRSSI); + } + if (pe->pe_inband != HAL_PHYERR_PARAM_NOVAL) { + val &= ~AR_PHY_RADAR_0_INBAND; + val |= SM(pe->pe_inband, AR_PHY_RADAR_0_INBAND); + } + OS_REG_WRITE(ah, AR_PHY_RADAR_0, val | AR_PHY_RADAR_0_ENA); +} + +void +ar5212GetDfsThresh(struct ath_hal *ah, HAL_PHYERR_PARAM *pe) +{ + uint32_t val,temp; + + val = OS_REG_READ(ah, AR_PHY_RADAR_0); + + temp = MS(val,AR_PHY_RADAR_0_FIRPWR); + temp |= 0xFFFFFF80; + pe->pe_firpwr = temp; + pe->pe_rrssi = MS(val, AR_PHY_RADAR_0_RRSSI); + pe->pe_height = MS(val, AR_PHY_RADAR_0_HEIGHT); + pe->pe_prssi = MS(val, AR_PHY_RADAR_0_PRSSI); + pe->pe_inband = MS(val, AR_PHY_RADAR_0_INBAND); + + pe->pe_relpwr = 0; + pe->pe_relstep = 0; + pe->pe_maxlen = 0; + pe->pe_extchannel = AH_FALSE; +} + +/* + * Process the radar phy error and extract the pulse duration. + */ +HAL_BOOL +ar5212ProcessRadarEvent(struct ath_hal *ah, struct ath_rx_status *rxs, + uint64_t fulltsf, const char *buf, HAL_DFS_EVENT *event) +{ + uint8_t dur; + uint8_t rssi; + + /* Check whether the given phy error is a radar event */ + if ((rxs->rs_phyerr != HAL_PHYERR_RADAR) && + (rxs->rs_phyerr != HAL_PHYERR_FALSE_RADAR_EXT)) + return AH_FALSE; + + /* + * The first byte is the pulse width - if there's + * no data, simply set the duration to 0 + */ + if (rxs->rs_datalen >= 1) + /* The pulse width is byte 0 of the data */ + dur = ((uint8_t) buf[0]) & 0xff; + else + dur = 0; + + /* Pulse RSSI is the normal reported RSSI */ + rssi = (uint8_t) rxs->rs_rssi; + + /* 0 duration/rssi is not a valid radar event */ + if (dur == 0 && rssi == 0) + return AH_FALSE; + + HALDEBUG(ah, HAL_DEBUG_DFS, "%s: rssi=%d, dur=%d\n", + __func__, rssi, dur); + + /* Record the event */ + event->re_full_ts = fulltsf; + event->re_ts = rxs->rs_tstamp; + event->re_rssi = rssi; + event->re_dur = dur; + event->re_flags = HAL_DFS_EVENT_PRICH; + + return AH_TRUE; +} diff --git a/sys/dev/ath/ath_hal/ar5212/ar5212reg.h b/sys/dev/ath/ath_hal/ar5212/ar5212reg.h index f99b203..15c1a58 100644 --- a/sys/dev/ath/ath_hal/ar5212/ar5212reg.h +++ b/sys/dev/ath/ath_hal/ar5212/ar5212reg.h @@ -300,6 +300,7 @@ #define AR_QUIET1_NEXT_QUIET 0xffff #define AR_QUIET1_QUIET_ENABLE 0x10000 /* Enable Quiet time operation */ #define AR_QUIET1_QUIET_ACK_CTS_ENABLE 0x20000 /* Do we ack/cts during quiet period */ +#define AR_QUIET1_QUIET_ACK_CTS_ENABLE_S 17 #define AR_QUIET2 0x8100 /* More Quiet time programming */ #define AR_QUIET2_QUIET_PER_S 0 /* Periodicity of quiet period (TU) */ diff --git a/sys/dev/ath/ath_hal/ar5416/ar5416.h b/sys/dev/ath/ath_hal/ar5416/ar5416.h index 5327296..e5294b0 100644 --- a/sys/dev/ath/ath_hal/ar5416/ar5416.h +++ b/sys/dev/ath/ath_hal/ar5416/ar5416.h @@ -194,6 +194,8 @@ extern uint32_t ar5416Get11nExtBusy(struct ath_hal *ah); extern void ar5416Set11nMac2040(struct ath_hal *ah, HAL_HT_MACMODE mode); extern HAL_HT_RXCLEAR ar5416Get11nRxClear(struct ath_hal *ah); extern void ar5416Set11nRxClear(struct ath_hal *ah, HAL_HT_RXCLEAR rxclear); +extern HAL_STATUS ar5416SetQuiet(struct ath_hal *ah, uint32_t period, + uint32_t duration, uint32_t nextStart, HAL_QUIET_FLAG flag); extern HAL_STATUS ar5416GetCapability(struct ath_hal *ah, HAL_CAPABILITY_TYPE type, uint32_t capability, uint32_t *result); extern HAL_BOOL ar5416GetDiagState(struct ath_hal *ah, int request, @@ -201,6 +203,11 @@ extern HAL_BOOL ar5416GetDiagState(struct ath_hal *ah, int request, void **result, uint32_t *resultsize); extern HAL_BOOL ar5416SetRifsDelay(struct ath_hal *ah, const struct ieee80211_channel *chan, HAL_BOOL enable); +extern void ar5416EnableDfs(struct ath_hal *ah, HAL_PHYERR_PARAM *pe); +extern void ar5416GetDfsThresh(struct ath_hal *ah, HAL_PHYERR_PARAM *pe); +extern HAL_BOOL ar5416ProcessRadarEvent(struct ath_hal *ah, + struct ath_rx_status *rxs, uint64_t fulltsf, const char *buf, + HAL_DFS_EVENT *event); extern HAL_BOOL ar5416SetPowerMode(struct ath_hal *ah, HAL_POWER_MODE mode, int setChip); diff --git a/sys/dev/ath/ath_hal/ar5416/ar5416_ani.c b/sys/dev/ath/ath_hal/ar5416/ar5416_ani.c index 3a8f785..e2c8592 100644 --- a/sys/dev/ath/ath_hal/ar5416/ar5416_ani.c +++ b/sys/dev/ath/ath_hal/ar5416/ar5416_ani.c @@ -175,9 +175,17 @@ ar5416AniControl(struct ath_hal *ah, HAL_ANI_CMD cmd, int param) struct ar5212AniState *aniState = ahp->ah_curani; const struct ar5212AniParams *params = aniState->params; + /* Check whether the particular function is enabled */ + if (((1 << cmd) & AH5416(ah)->ah_ani_function) == 0) { + HALDEBUG(ah, HAL_DEBUG_ANI, "%s: command %d disabled\n", + __func__, cmd); + HALDEBUG(ah, HAL_DEBUG_ANI, "%s: cmd %d; mask %x\n", __func__, cmd, AH5416(ah)->ah_ani_function); + return AH_FALSE; + } + OS_MARK(ah, AH_MARK_ANI_CONTROL, cmd); - switch (cmd & AH5416(ah)->ah_ani_function) { + switch (cmd) { case HAL_ANI_NOISE_IMMUNITY_LEVEL: { u_int level = param; @@ -356,14 +364,14 @@ ar5416AniOfdmErrTrigger(struct ath_hal *ah) aniState = ahp->ah_curani; params = aniState->params; /* First, raise noise immunity level, up to max */ - if ((AH5416(ah)->ah_ani_function & HAL_ANI_NOISE_IMMUNITY_LEVEL) && + if ((AH5416(ah)->ah_ani_function & (1 << HAL_ANI_NOISE_IMMUNITY_LEVEL)) && (aniState->noiseImmunityLevel+1 < params->maxNoiseImmunityLevel)) { ar5416AniControl(ah, HAL_ANI_NOISE_IMMUNITY_LEVEL, aniState->noiseImmunityLevel + 1); return; } /* then, raise spur immunity level, up to max */ - if ((AH5416(ah)->ah_ani_function & HAL_ANI_SPUR_IMMUNITY_LEVEL) && + if ((AH5416(ah)->ah_ani_function & (1 << HAL_ANI_SPUR_IMMUNITY_LEVEL)) && (aniState->spurImmunityLevel+1 < params->maxSpurImmunityLevel)) { ar5416AniControl(ah, HAL_ANI_SPUR_IMMUNITY_LEVEL, aniState->spurImmunityLevel + 1); @@ -443,7 +451,8 @@ ar5416AniCckErrTrigger(struct ath_hal *ah) /* first, raise noise immunity level, up to max */ aniState = ahp->ah_curani; params = aniState->params; - if (aniState->noiseImmunityLevel+1 < params->maxNoiseImmunityLevel) { + if ((AH5416(ah)->ah_ani_function & (1 << HAL_ANI_NOISE_IMMUNITY_LEVEL) && + aniState->noiseImmunityLevel+1 < params->maxNoiseImmunityLevel)) { ar5416AniControl(ah, HAL_ANI_NOISE_IMMUNITY_LEVEL, aniState->noiseImmunityLevel + 1); return; diff --git a/sys/dev/ath/ath_hal/ar5416/ar5416_attach.c b/sys/dev/ath/ath_hal/ar5416/ar5416_attach.c index 6779bf9..e636325 100644 --- a/sys/dev/ath/ath_hal/ar5416/ar5416_attach.c +++ b/sys/dev/ath/ath_hal/ar5416/ar5416_attach.c @@ -58,7 +58,7 @@ ar5416AniSetup(struct ath_hal *ah) .period = 100, }; /* NB: disable ANI noise immmunity for reliable RIFS rx */ - AH5416(ah)->ah_ani_function &= ~ HAL_ANI_NOISE_IMMUNITY_LEVEL; + AH5416(ah)->ah_ani_function &= ~(1 << HAL_ANI_NOISE_IMMUNITY_LEVEL); ar5416AniAttach(ah, &aniparams, &aniparams, AH_TRUE); } @@ -139,10 +139,16 @@ ar5416InitState(struct ath_hal_5416 *ahp5416, uint16_t devid, HAL_SOFTC sc, ah->ah_setAntennaSwitch = ar5416SetAntennaSwitch; ah->ah_setDecompMask = ar5416SetDecompMask; ah->ah_setCoverageClass = ar5416SetCoverageClass; + ah->ah_setQuiet = ar5416SetQuiet; ah->ah_resetKeyCacheEntry = ar5416ResetKeyCacheEntry; ah->ah_setKeyCacheEntry = ar5416SetKeyCacheEntry; + /* DFS Functions */ + ah->ah_enableDfs = ar5416EnableDfs; + ah->ah_getDfsThresh = ar5416GetDfsThresh; + ah->ah_procRadarEvent = ar5416ProcessRadarEvent; + /* Power Management Functions */ ah->ah_setPowerMode = ar5416SetPowerMode; @@ -199,7 +205,10 @@ ar5416InitState(struct ath_hal_5416 *ahp5416, uint16_t devid, HAL_SOFTC sc, AH5416(ah)->ah_tx_chainmask = AR5416_DEFAULT_TXCHAINMASK; /* Enable all ANI functions to begin with */ - AH5416(ah)->ah_ani_function = HAL_ANI_ALL; + AH5416(ah)->ah_ani_function = 0xffffffff; + + /* Set overridable ANI methods */ + AH5212(ah)->ah_aniControl = ar5416AniControl; } uint32_t @@ -875,6 +884,7 @@ ar5416FillCapabilityInfo(struct ath_hal *ah) pCap->halBssidMatchSupport = AH_TRUE; pCap->halGTTSupport = AH_TRUE; pCap->halCSTSupport = AH_TRUE; + pCap->halEnhancedDfsSupport = AH_FALSE; if (ath_hal_eepromGetFlag(ah, AR_EEP_RFKILL) && ath_hal_eepromGet(ah, AR_EEP_RFSILENT, &ahpriv->ah_rfsilent) == HAL_OK) { diff --git a/sys/dev/ath/ath_hal/ar5416/ar5416_cal.c b/sys/dev/ath/ath_hal/ar5416/ar5416_cal.c index ee61c30..1356c7d 100644 --- a/sys/dev/ath/ath_hal/ar5416/ar5416_cal.c +++ b/sys/dev/ath/ath_hal/ar5416/ar5416_cal.c @@ -594,8 +594,8 @@ ar5416LoadNF(struct ath_hal *ah, const struct ieee80211_channel *chan) if (AR_SREV_KITE(ah)) { /* Kite has only one chain */ chainmask = 0x9; - } else if (AR_SREV_MERLIN(ah)) { - /* Merlin has only two chains */ + } else if (AR_SREV_MERLIN(ah) || AR_SREV_KIWI(ah)) { + /* Merlin/Kiwi has only two chains */ chainmask = 0x1B; } else { chainmask = 0x3F; diff --git a/sys/dev/ath/ath_hal/ar5416/ar5416_misc.c b/sys/dev/ath/ath_hal/ar5416/ar5416_misc.c index 8f18c46..2332656 100644 --- a/sys/dev/ath/ath_hal/ar5416/ar5416_misc.c +++ b/sys/dev/ath/ath_hal/ar5416/ar5416_misc.c @@ -273,6 +273,35 @@ ar5416Set11nRxClear(struct ath_hal *ah, HAL_HT_RXCLEAR rxclear) } } +/* XXX shouldn't be here! */ +#define TU_TO_USEC(_tu) ((_tu) << 10) + +HAL_STATUS +ar5416SetQuiet(struct ath_hal *ah, uint32_t period, uint32_t duration, + uint32_t nextStart, HAL_QUIET_FLAG flag) +{ + uint32_t period_us = TU_TO_USEC(period); /* convert to us unit */ + uint32_t nextStart_us = TU_TO_USEC(nextStart); /* convert to us unit */ + if (flag & HAL_QUIET_ENABLE) { + if ((!nextStart) || (flag & HAL_QUIET_ADD_CURRENT_TSF)) { + /* Add the nextStart offset to the current TSF */ + nextStart_us += OS_REG_READ(ah, AR_TSF_L32); + } + if (flag & HAL_QUIET_ADD_SWBA_RESP_TIME) { + nextStart_us += ath_hal_sw_beacon_response_time; + } + OS_REG_RMW_FIELD(ah, AR_QUIET1, AR_QUIET1_QUIET_ACK_CTS_ENABLE, 1); + OS_REG_WRITE(ah, AR_QUIET2, SM(duration, AR_QUIET2_QUIET_DUR)); + OS_REG_WRITE(ah, AR_QUIET_PERIOD, period_us); + OS_REG_WRITE(ah, AR_NEXT_QUIET, nextStart_us); + OS_REG_SET_BIT(ah, AR_TIMER_MODE, AR_TIMER_MODE_QUIET); + } else { + OS_REG_CLR_BIT(ah, AR_TIMER_MODE, AR_TIMER_MODE_QUIET); + } + return HAL_OK; +} +#undef TU_TO_USEC + HAL_STATUS ar5416GetCapability(struct ath_hal *ah, HAL_CAPABILITY_TYPE type, uint32_t capability, uint32_t *result) @@ -560,3 +589,122 @@ ar5416DetectBBHang(struct ath_hal *ah) #undef N } #undef NUM_STATUS_READS + +/* + * Get the radar parameter values and return them in the pe + * structure + */ +void +ar5416GetDfsThresh(struct ath_hal *ah, HAL_PHYERR_PARAM *pe) +{ + uint32_t val, temp; + + val = OS_REG_READ(ah, AR_PHY_RADAR_0); + + temp = MS(val,AR_PHY_RADAR_0_FIRPWR); + temp |= 0xFFFFFF80; + pe->pe_firpwr = temp; + pe->pe_rrssi = MS(val, AR_PHY_RADAR_0_RRSSI); + pe->pe_height = MS(val, AR_PHY_RADAR_0_HEIGHT); + pe->pe_prssi = MS(val, AR_PHY_RADAR_0_PRSSI); + pe->pe_inband = MS(val, AR_PHY_RADAR_0_INBAND); + + val = OS_REG_READ(ah, AR_PHY_RADAR_1); + temp = val & AR_PHY_RADAR_1_RELPWR_ENA; + pe->pe_relpwr = MS(val, AR_PHY_RADAR_1_RELPWR_THRESH); + if (temp) + pe->pe_relpwr |= HAL_PHYERR_PARAM_ENABLE; + temp = val & AR_PHY_RADAR_1_RELSTEP_CHECK; + pe->pe_relstep = MS(val, AR_PHY_RADAR_1_RELSTEP_THRESH); + if (temp) + pe->pe_relstep |= HAL_PHYERR_PARAM_ENABLE; + pe->pe_maxlen = MS(val, AR_PHY_RADAR_1_MAXLEN); + pe->pe_extchannel = !! (OS_REG_READ(ah, AR_PHY_RADAR_EXT) & + AR_PHY_RADAR_EXT_ENA); +} + +/* + * Enable radar detection and set the radar parameters per the + * values in pe + */ +void +ar5416EnableDfs(struct ath_hal *ah, HAL_PHYERR_PARAM *pe) +{ + uint32_t val; + + val = OS_REG_READ(ah, AR_PHY_RADAR_0); + + if (pe->pe_firpwr != HAL_PHYERR_PARAM_NOVAL) { + val &= ~AR_PHY_RADAR_0_FIRPWR; + val |= SM(pe->pe_firpwr, AR_PHY_RADAR_0_FIRPWR); + } + if (pe->pe_rrssi != HAL_PHYERR_PARAM_NOVAL) { + val &= ~AR_PHY_RADAR_0_RRSSI; + val |= SM(pe->pe_rrssi, AR_PHY_RADAR_0_RRSSI); + } + if (pe->pe_height != HAL_PHYERR_PARAM_NOVAL) { + val &= ~AR_PHY_RADAR_0_HEIGHT; + val |= SM(pe->pe_height, AR_PHY_RADAR_0_HEIGHT); + } + if (pe->pe_prssi != HAL_PHYERR_PARAM_NOVAL) { + val &= ~AR_PHY_RADAR_0_PRSSI; + val |= SM(pe->pe_prssi, AR_PHY_RADAR_0_PRSSI); + } + if (pe->pe_inband != HAL_PHYERR_PARAM_NOVAL) { + val &= ~AR_PHY_RADAR_0_INBAND; + val |= SM(pe->pe_inband, AR_PHY_RADAR_0_INBAND); + } + + /*Enable FFT data*/ + val |= AR_PHY_RADAR_0_FFT_ENA; + + OS_REG_WRITE(ah, AR_PHY_RADAR_0, val | AR_PHY_RADAR_0_ENA); + + val = OS_REG_READ(ah, AR_PHY_RADAR_1); + val |= (AR_PHY_RADAR_1_MAX_RRSSI | AR_PHY_RADAR_1_BLOCK_CHECK); + + if (pe->pe_maxlen != HAL_PHYERR_PARAM_NOVAL) { + val &= ~AR_PHY_RADAR_1_MAXLEN; + val |= SM(pe->pe_maxlen, AR_PHY_RADAR_1_MAXLEN); + } + OS_REG_WRITE(ah, AR_PHY_RADAR_1, val); + + /* + * Enable HT/40 if the upper layer asks; + * it should check the channel is HT/40 and HAL_CAP_EXT_CHAN_DFS + * is available. + */ + if (pe->pe_extchannel) + OS_REG_SET_BIT(ah, AR_PHY_RADAR_EXT, AR_PHY_RADAR_EXT_ENA); + else + OS_REG_CLR_BIT(ah, AR_PHY_RADAR_EXT, AR_PHY_RADAR_EXT_ENA); + + if (pe->pe_relstep != HAL_PHYERR_PARAM_NOVAL) { + val = OS_REG_READ(ah, AR_PHY_RADAR_1); + val &= ~AR_PHY_RADAR_1_RELSTEP_THRESH; + val |= SM(pe->pe_relstep, AR_PHY_RADAR_1_RELSTEP_THRESH); + OS_REG_WRITE(ah, AR_PHY_RADAR_1, val); + } + if (pe->pe_relpwr != HAL_PHYERR_PARAM_NOVAL) { + val = OS_REG_READ(ah, AR_PHY_RADAR_1); + val &= ~AR_PHY_RADAR_1_RELPWR_THRESH; + val |= SM(pe->pe_relpwr, AR_PHY_RADAR_1_RELPWR_THRESH); + OS_REG_WRITE(ah, AR_PHY_RADAR_1, val); + } +} + +/* + * Extract the radar event information from the given phy error. + * + * Returns AH_TRUE if the phy error was actually a phy error, + * AH_FALSE if the phy error wasn't a phy error. + */ +HAL_BOOL +ar5416ProcessRadarEvent(struct ath_hal *ah, struct ath_rx_status *rxs, + uint64_t fulltsf, const char *buf, HAL_DFS_EVENT *event) +{ + /* + * For now, this isn't implemented. + */ + return AH_FALSE; +} diff --git a/sys/dev/ath/ath_hal/ar5416/ar5416_reset.c b/sys/dev/ath/ath_hal/ar5416/ar5416_reset.c index d2ae351..1da686a 100644 --- a/sys/dev/ath/ath_hal/ar5416/ar5416_reset.c +++ b/sys/dev/ath/ath_hal/ar5416/ar5416_reset.c @@ -167,6 +167,17 @@ ar5416Reset(struct ath_hal *ah, HAL_OPMODE opmode, AH5416(ah)->ah_writeIni(ah, chan); + if(AR_SREV_KIWI_13_OR_LATER(ah) ) { + /* Enable ASYNC FIFO */ + OS_REG_SET_BIT(ah, AR_MAC_PCU_ASYNC_FIFO_REG3, + AR_MAC_PCU_ASYNC_FIFO_REG3_DATAPATH_SEL); + OS_REG_SET_BIT(ah, AR_PHY_MODE, AR_PHY_MODE_ASYNCFIFO); + OS_REG_CLR_BIT(ah, AR_MAC_PCU_ASYNC_FIFO_REG3, + AR_MAC_PCU_ASYNC_FIFO_REG3_SOFT_RESET); + OS_REG_SET_BIT(ah, AR_MAC_PCU_ASYNC_FIFO_REG3, + AR_MAC_PCU_ASYNC_FIFO_REG3_SOFT_RESET); + } + /* Override ini values (that can be overriden in this fashion) */ ar5416OverrideIni(ah, chan); @@ -258,6 +269,12 @@ ar5416Reset(struct ath_hal *ah, HAL_OPMODE opmode, OS_REG_WRITE(ah, AR_MAC_LED, OS_REG_READ(ah, AR_MAC_LED) | saveLedState); + /* Start TSF2 for generic timer 8-15 */ +#ifdef NOTYET + if (AR_SREV_KIWI(ah)) + ar5416StartTsf2(ah); +#endif + /* Restore previous antenna */ OS_REG_WRITE(ah, AR_DEF_ANTENNA, saveDefAntenna); @@ -292,6 +309,41 @@ ar5416Reset(struct ath_hal *ah, HAL_OPMODE opmode, /* This may override the AR_DIAG_SW register */ ar5416InitUserSettings(ah); + if (AR_SREV_KIWI_13_OR_LATER(ah)) { + /* + * Enable ASYNC FIFO + * + * If Async FIFO is enabled, the following counters change + * as MAC now runs at 117 Mhz instead of 88/44MHz when + * async FIFO is disabled. + * + * Overwrite the delay/timeouts initialized in ProcessIni() + * above. + */ + OS_REG_WRITE(ah, AR_D_GBL_IFS_SIFS, + AR_D_GBL_IFS_SIFS_ASYNC_FIFO_DUR); + OS_REG_WRITE(ah, AR_D_GBL_IFS_SLOT, + AR_D_GBL_IFS_SLOT_ASYNC_FIFO_DUR); + OS_REG_WRITE(ah, AR_D_GBL_IFS_EIFS, + AR_D_GBL_IFS_EIFS_ASYNC_FIFO_DUR); + + OS_REG_WRITE(ah, AR_TIME_OUT, + AR_TIME_OUT_ACK_CTS_ASYNC_FIFO_DUR); + OS_REG_WRITE(ah, AR_USEC, AR_USEC_ASYNC_FIFO_DUR); + + OS_REG_SET_BIT(ah, AR_MAC_PCU_LOGIC_ANALYZER, + AR_MAC_PCU_LOGIC_ANALYZER_DISBUG20768); + OS_REG_RMW_FIELD(ah, AR_AHB_MODE, AR_AHB_CUSTOM_BURST_EN, + AR_AHB_CUSTOM_BURST_ASYNC_FIFO_VAL); + } + + if (AR_SREV_KIWI_13_OR_LATER(ah)) { + /* Enable AGGWEP to accelerate encryption engine */ + OS_REG_SET_BIT(ah, AR_PCU_MISC_MODE2, + AR_PCU_MISC_MODE2_ENABLE_AGGWEP); + } + + /* * disable seq number generation in hw */ @@ -2576,7 +2628,7 @@ ar5416OverrideIni(struct ath_hal *ah, const struct ieee80211_channel *chan) if (!AR_SREV_9271(ah)) val &= ~AR_PCU_MISC_MODE2_HWWAR1; - if (AR_SREV_9287_11_OR_LATER(ah)) + if (AR_SREV_KIWI_11_OR_LATER(ah)) val = val & (~AR_PCU_MISC_MODE2_HWWAR2); OS_REG_WRITE(ah, AR_PCU_MISC_MODE2, val); diff --git a/sys/dev/ath/ath_hal/ar5416/ar5416phy.h b/sys/dev/ath/ath_hal/ar5416/ar5416phy.h index 86643f0..d7a5e0b 100644 --- a/sys/dev/ath/ath_hal/ar5416/ar5416phy.h +++ b/sys/dev/ath/ath_hal/ar5416/ar5416phy.h @@ -21,6 +21,25 @@ #include "ar5212/ar5212phy.h" +/* For AR_PHY_RADAR0 */ +#define AR_PHY_RADAR_0_FFT_ENA 0x80000000 + +#define AR_PHY_RADAR_EXT 0x9940 +#define AR_PHY_RADAR_EXT_ENA 0x00004000 + +#define AR_PHY_RADAR_1 0x9958 +#define AR_PHY_RADAR_1_RELPWR_ENA 0x00800000 +#define AR_PHY_RADAR_1_USE_FIR128 0x00400000 +#define AR_PHY_RADAR_1_RELPWR_THRESH 0x003F0000 +#define AR_PHY_RADAR_1_RELPWR_THRESH_S 16 +#define AR_PHY_RADAR_1_BLOCK_CHECK 0x00008000 +#define AR_PHY_RADAR_1_MAX_RRSSI 0x00004000 +#define AR_PHY_RADAR_1_RELSTEP_CHECK 0x00002000 +#define AR_PHY_RADAR_1_RELSTEP_THRESH 0x00001F00 +#define AR_PHY_RADAR_1_RELSTEP_THRESH_S 8 +#define AR_PHY_RADAR_1_MAXLEN 0x000000FF +#define AR_PHY_RADAR_1_MAXLEN_S 0 + #define AR_PHY_CHIP_ID_REV_0 0x80 /* 5416 Rev 0 (owl 1.0) BB */ #define AR_PHY_CHIP_ID_REV_1 0x81 /* 5416 Rev 1 (owl 2.0) BB */ @@ -301,4 +320,6 @@ #define AR_PHY_TX_PWRCTRL9_RES_DC_REMOVAL 0x80000000 #define AR_PHY_TX_PWRCTRL9_RES_DC_REMOVAL_S 31 +#define AR_PHY_MODE_ASYNCFIFO 0x80 /* Enable async fifo */ + #endif /* _DEV_ATH_AR5416PHY_H_ */ diff --git a/sys/dev/ath/ath_hal/ar5416/ar5416reg.h b/sys/dev/ath/ath_hal/ar5416/ar5416reg.h index 9921366..561c5b4 100644 --- a/sys/dev/ath/ath_hal/ar5416/ar5416reg.h +++ b/sys/dev/ath/ath_hal/ar5416/ar5416reg.h @@ -219,6 +219,10 @@ #define AR_AHB_PAGE_SIZE_1K 0x00000000 /* set page-size as 1k */ #define AR_AHB_PAGE_SIZE_2K 0x00000008 /* set page-size as 2k */ #define AR_AHB_PAGE_SIZE_4K 0x00000010 /* set page-size as 4k */ +/* Kiwi */ +#define AR_AHB_CUSTOM_BURST_EN 0x000000C0 /* set Custom Burst Mode */ +#define AR_AHB_CUSTOM_BURST_EN_S 6 /* set Custom Burst Mode */ +#define AR_AHB_CUSTOM_BURST_ASYNC_FIFO_VAL 3 /* set both bits in Async FIFO mode */ /* MAC PCU Registers */ #define AR_STA_ID1_PRESERVE_SEQNUM 0x20000000 /* Don't replace seq num */ @@ -451,9 +455,23 @@ * For Merlin and above only. */ #define AR_PCU_MISC_MODE2_ADHOC_MCAST_KEYID_ENABLE 0x00000040 +#define AR_PCU_MISC_MODE2_ENABLE_AGGWEP 0x00020000 /* Kiwi or later? */ #define AR_PCU_MISC_MODE2_HWWAR1 0x00100000 #define AR_PCU_MISC_MODE2_HWWAR2 0x02000000 +/* For Kiwi */ +#define AR_MAC_PCU_ASYNC_FIFO_REG3 0x8358 +#define AR_MAC_PCU_ASYNC_FIFO_REG3_DATAPATH_SEL 0x00000400 +#define AR_MAC_PCU_ASYNC_FIFO_REG3_SOFT_RESET 0x80000000 + +/* TSF2. For Kiwi only */ +#define AR_TSF2_L32 0x8390 +#define AR_TSF2_U32 0x8394 + +/* MAC Direct Connect Control. For Kiwi only */ +#define AR_DIRECT_CONNECT 0x83A0 +#define AR_DC_AP_STA_EN 0x00000001 + /* GPIO Interrupt */ #define AR_INTR_GPIO 0x3FF00000 /* gpio interrupted */ #define AR_INTR_GPIO_S 20 @@ -488,6 +506,17 @@ #define AR_PCU_TXBUF_CTRL_USABLE_SIZE 0x700 #define AR_9285_PCU_TXBUF_CTRL_USABLE_SIZE 0x380 +/* IFS, SIFS, slot, etc for Async FIFO mode (Kiwi) */ +#define AR_D_GBL_IFS_SIFS_ASYNC_FIFO_DUR 0x000003AB +#define AR_TIME_OUT_ACK_CTS_ASYNC_FIFO_DUR 0x16001D56 +#define AR_USEC_ASYNC_FIFO_DUR 0x12e00074 +#define AR_D_GBL_IFS_SLOT_ASYNC_FIFO_DUR 0x00000420 +#define AR_D_GBL_IFS_EIFS_ASYNC_FIFO_DUR 0x0000A5EB + +/* Used by Kiwi Async FIFO */ +#define AR_MAC_PCU_LOGIC_ANALYZER 0x8264 +#define AR_MAC_PCU_LOGIC_ANALYZER_DISBUG20768 0x20000000 + /* Eeprom defines */ #define AR_EEPROM_STATUS_DATA_VAL 0x0000ffff #define AR_EEPROM_STATUS_DATA_VAL_S 0 @@ -566,6 +595,11 @@ #define AR_XSREV_REVISION_KITE_10 0 /* Kite 1.0 */ #define AR_XSREV_REVISION_KITE_11 1 /* Kite 1.1 */ #define AR_XSREV_REVISION_KITE_12 2 /* Kite 1.2 */ +#define AR_XSREV_VERSION_KIWI 0x180 /* Kiwi (AR9287) */ +#define AR_XSREV_REVISION_KIWI_10 0 +#define AR_XSREV_REVISION_KIWI_11 1 +#define AR_XSREV_REVISION_KIWI_12 2 +#define AR_XSREV_REVISION_KIWI_13 3 /* Owl (AR5416) */ #define AR_SREV_OWL(_ah) \ @@ -648,9 +682,31 @@ (AR_SREV_KITE_12_OR_LATER(_ah) && \ ((OS_REG_READ(_ah, AR_AN_SYNTH9) & 0x7) == 0x1)) +#define AR_SREV_KIWI(_ah) \ + (AH_PRIVATE((_ah))->ah_macVersion == AR_XSREV_VERSION_KIWI) + +#define AR_SREV_KIWI_11_OR_LATER(_ah) \ + (AR_SREV_KIWI(_ah) && \ + AH_PRIVATE((_ah))->ah_macRev >= AR_XSREV_REVISION_KIWI_11) + +#define AR_SREV_KIWI_11(_ah) \ + (AR_SREV_KIWI(_ah) && \ + AH_PRIVATE((_ah))->ah_macRev == AR_XSREV_REVISION_KIWI_11) + +#define AR_SREV_KIWI_12(_ah) \ + (AR_SREV_KIWI(_ah) && \ + AH_PRIVATE((_ah))->ah_macRev == AR_XSREV_REVISION_KIWI_12) + +#define AR_SREV_KIWI_12_OR_LATER(_ah) \ + (AR_SREV_KIWI(_ah) && \ + AH_PRIVATE((_ah))->ah_macRev >= AR_XSREV_REVISION_KIWI_12) + +#define AR_SREV_KIWI_13_OR_LATER(_ah) \ + (AR_SREV_KIWI(_ah) && \ + AH_PRIVATE((_ah))->ah_macRev >= AR_XSREV_REVISION_KIWI_13) + + /* Not yet implemented chips */ #define AR_SREV_9271(_ah) 0 -#define AR_SREV_9287_11_OR_LATER(_ah) 0 -#define AR_SREV_KIWI_10_OR_LATER(_ah) 0 #endif /* _DEV_ATH_AR5416REG_H */ diff --git a/sys/dev/ath/ath_hal/ar9001/ar9130_attach.c b/sys/dev/ath/ath_hal/ar9001/ar9130_attach.c index 49a5f5e..2a3f3f0 100644 --- a/sys/dev/ath/ath_hal/ar9001/ar9130_attach.c +++ b/sys/dev/ath/ath_hal/ar9001/ar9130_attach.c @@ -289,6 +289,7 @@ ar9130FillCapabilityInfo(struct ath_hal *ah) pCap->halRifsTxSupport = AH_TRUE; pCap->halRtsAggrLimit = 64*1024; /* 802.11n max */ pCap->halExtChanDfsSupport = AH_TRUE; + pCap->halUseCombinedRadarRssi = AH_TRUE; pCap->halAutoSleepSupport = AH_FALSE; /* XXX? */ /* * MBSSID aggregation is broken in Howl v1.1, v1.2, v1.3 diff --git a/sys/dev/ath/ath_hal/ar9001/ar9160_attach.c b/sys/dev/ath/ath_hal/ar9001/ar9160_attach.c index 0b6472b..44a549d 100644 --- a/sys/dev/ath/ath_hal/ar9001/ar9160_attach.c +++ b/sys/dev/ath/ath_hal/ar9001/ar9160_attach.c @@ -82,7 +82,7 @@ ar9160AniSetup(struct ath_hal *ah) }; /* NB: disable ANI noise immmunity for reliable RIFS rx */ - AH5416(ah)->ah_ani_function &= ~ HAL_ANI_NOISE_IMMUNITY_LEVEL; + AH5416(ah)->ah_ani_function &= ~(1 << HAL_ANI_NOISE_IMMUNITY_LEVEL); ar5416AniAttach(ah, &aniparams, &aniparams, AH_TRUE); } @@ -293,6 +293,7 @@ ar9160FillCapabilityInfo(struct ath_hal *ah) pCap->halRifsTxSupport = AH_TRUE; pCap->halRtsAggrLimit = 64*1024; /* 802.11n max */ pCap->halExtChanDfsSupport = AH_TRUE; + pCap->halUseCombinedRadarRssi = AH_TRUE; pCap->halAutoSleepSupport = AH_FALSE; /* XXX? */ pCap->halMbssidAggrSupport = AH_TRUE; pCap->hal4AddrAggrSupport = AH_TRUE; diff --git a/sys/dev/ath/ath_hal/ar9002/ar9280_attach.c b/sys/dev/ath/ath_hal/ar9002/ar9280_attach.c index 3351edb..ebe3be1 100644 --- a/sys/dev/ath/ath_hal/ar9002/ar9280_attach.c +++ b/sys/dev/ath/ath_hal/ar9002/ar9280_attach.c @@ -93,7 +93,7 @@ ar9280AniSetup(struct ath_hal *ah) .period = 100, }; /* NB: disable ANI noise immmunity for reliable RIFS rx */ - AH5416(ah)->ah_ani_function &= ~ HAL_ANI_NOISE_IMMUNITY_LEVEL; + AH5416(ah)->ah_ani_function &= ~(1 << HAL_ANI_NOISE_IMMUNITY_LEVEL); /* NB: ANI is not enabled yet */ ar5416AniAttach(ah, &aniparams, &aniparams, AH_TRUE); @@ -783,6 +783,7 @@ ar9280FillCapabilityInfo(struct ath_hal *ah) pCap->halRifsTxSupport = AH_TRUE; pCap->halRtsAggrLimit = 64*1024; /* 802.11n max */ pCap->halExtChanDfsSupport = AH_TRUE; + pCap->halUseCombinedRadarRssi = AH_TRUE; #if 0 /* XXX bluetooth */ pCap->halBtCoexSupport = AH_TRUE; @@ -804,6 +805,7 @@ ar9280FillCapabilityInfo(struct ath_hal *ah) } pCap->halRxStbcSupport = 1; pCap->halTxStbcSupport = 1; + pCap->halEnhancedDfsSupport = AH_TRUE; return AH_TRUE; } diff --git a/sys/dev/ath/ath_hal/ar9002/ar9285_attach.c b/sys/dev/ath/ath_hal/ar9002/ar9285_attach.c index b7ed27d..9120313 100644 --- a/sys/dev/ath/ath_hal/ar9002/ar9285_attach.c +++ b/sys/dev/ath/ath_hal/ar9002/ar9285_attach.c @@ -98,7 +98,7 @@ ar9285AniSetup(struct ath_hal *ah) .period = 100, }; /* NB: disable ANI noise immmunity for reliable RIFS rx */ - AH5416(ah)->ah_ani_function &= ~ HAL_ANI_NOISE_IMMUNITY_LEVEL; + AH5416(ah)->ah_ani_function &= ~(1 << HAL_ANI_NOISE_IMMUNITY_LEVEL); ar5416AniAttach(ah, &aniparams, &aniparams, AH_TRUE); } @@ -414,6 +414,7 @@ ar9285FillCapabilityInfo(struct ath_hal *ah) pCap->halRifsTxSupport = AH_TRUE; pCap->halRtsAggrLimit = 64*1024; /* 802.11n max */ pCap->halExtChanDfsSupport = AH_TRUE; + pCap->halUseCombinedRadarRssi = AH_TRUE; #if 0 /* XXX bluetooth */ pCap->halBtCoexSupport = AH_TRUE; diff --git a/sys/dev/ath/ath_hal/ar9002/ar9287.c b/sys/dev/ath/ath_hal/ar9002/ar9287.c new file mode 100644 index 0000000..9b874b3 --- /dev/null +++ b/sys/dev/ath/ath_hal/ar9002/ar9287.c @@ -0,0 +1,392 @@ +/* + * Copyright (c) 2008-2009 Sam Leffler, Errno Consulting + * Copyright (c) 2008 Atheros Communications, Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * $FreeBSD$ + */ +#include "opt_ah.h" + +/* + * NB: Merlin and later have a simpler RF backend. + */ +#include "ah.h" +#include "ah_internal.h" + +#include "ah_eeprom_v14.h" + +#include "ar9002/ar9287.h" +#include "ar5416/ar5416reg.h" +#include "ar5416/ar5416phy.h" + +#define N(a) (sizeof(a)/sizeof(a[0])) + +struct ar9287State { + RF_HAL_FUNCS base; /* public state, must be first */ + uint16_t pcdacTable[1]; /* XXX */ +}; +#define AR9287(ah) ((struct ar9287State *) AH5212(ah)->ah_rfHal) + +static HAL_BOOL ar9287GetChannelMaxMinPower(struct ath_hal *, + const struct ieee80211_channel *, int16_t *maxPow,int16_t *minPow); +int16_t ar9287GetNfAdjust(struct ath_hal *ah, const HAL_CHANNEL_INTERNAL *c); + +static void +ar9287WriteRegs(struct ath_hal *ah, u_int modesIndex, u_int freqIndex, + int writes) +{ + (void) ath_hal_ini_write(ah, &AH5416(ah)->ah_ini_bb_rfgain, + freqIndex, writes); +} + +/* + * Take the MHz channel value and set the Channel value + * + * ASSUMES: Writes enabled to analog bus + * + * Actual Expression, + * + * For 2GHz channel, + * Channel Frequency = (3/4) * freq_ref * (chansel[8:0] + chanfrac[16:0]/2^17) + * (freq_ref = 40MHz) + * + * For 5GHz channel, + * Channel Frequency = (3/2) * freq_ref * (chansel[8:0] + chanfrac[16:0]/2^10) + * (freq_ref = 40MHz/(24>>amodeRefSel)) + * + * For 5GHz channels which are 5MHz spaced, + * Channel Frequency = (3/2) * freq_ref * (chansel[8:0] + chanfrac[16:0]/2^17) + * (freq_ref = 40MHz) + */ +static HAL_BOOL +ar9287SetChannel(struct ath_hal *ah, const struct ieee80211_channel *chan) +{ + uint16_t bMode, fracMode, aModeRefSel = 0; + uint32_t freq, ndiv, channelSel = 0, channelFrac = 0, reg32 = 0; + CHAN_CENTERS centers; + uint32_t refDivA = 24; + + OS_MARK(ah, AH_MARK_SETCHANNEL, chan->ic_freq); + + ar5416GetChannelCenters(ah, chan, ¢ers); + freq = centers.synth_center; + + reg32 = OS_REG_READ(ah, AR_PHY_SYNTH_CONTROL); + reg32 &= 0xc0000000; + + if (freq < 4800) { /* 2 GHz, fractional mode */ + uint32_t txctl; + int regWrites = 0; + + bMode = 1; + fracMode = 1; + aModeRefSel = 0; + channelSel = (freq * 0x10000)/15; + + if (AR_SREV_KIWI_11_OR_LATER(ah)) { + if (freq == 2484) { + ath_hal_ini_write(ah, + &AH9287(ah)->ah_ini_cckFirJapan2484, 1, + regWrites); + } else { + ath_hal_ini_write(ah, + &AH9287(ah)->ah_ini_cckFirNormal, 1, + regWrites); + } + } + + txctl = OS_REG_READ(ah, AR_PHY_CCK_TX_CTRL); + if (freq == 2484) { + /* Enable channel spreading for channel 14 */ + OS_REG_WRITE(ah, AR_PHY_CCK_TX_CTRL, + txctl | AR_PHY_CCK_TX_CTRL_JAPAN); + } else { + OS_REG_WRITE(ah, AR_PHY_CCK_TX_CTRL, + txctl &~ AR_PHY_CCK_TX_CTRL_JAPAN); + } + } else { + bMode = 0; + fracMode = 0; + + if ((freq % 20) == 0) { + aModeRefSel = 3; + } else if ((freq % 10) == 0) { + aModeRefSel = 2; + } else { + aModeRefSel = 0; + /* + * Enable 2G (fractional) mode for channels which + * are 5MHz spaced + */ + fracMode = 1; + refDivA = 1; + channelSel = (freq * 0x8000)/15; + + /* RefDivA setting */ + OS_A_REG_RMW_FIELD(ah, AR_AN_SYNTH9, + AR_AN_SYNTH9_REFDIVA, refDivA); + } + if (!fracMode) { + ndiv = (freq * (refDivA >> aModeRefSel))/60; + channelSel = ndiv & 0x1ff; + channelFrac = (ndiv & 0xfffffe00) * 2; + channelSel = (channelSel << 17) | channelFrac; + } + } + + reg32 = reg32 | (bMode << 29) | (fracMode << 28) | + (aModeRefSel << 26) | (channelSel); + + OS_REG_WRITE(ah, AR_PHY_SYNTH_CONTROL, reg32); + + AH_PRIVATE(ah)->ah_curchan = chan; + + return AH_TRUE; +} + +/* + * Return a reference to the requested RF Bank. + */ +static uint32_t * +ar9287GetRfBank(struct ath_hal *ah, int bank) +{ + HALDEBUG(ah, HAL_DEBUG_ANY, "%s: unknown RF Bank %d requested\n", + __func__, bank); + return AH_NULL; +} + +/* + * Reads EEPROM header info from device structure and programs + * all rf registers + */ +static HAL_BOOL +ar9287SetRfRegs(struct ath_hal *ah, const struct ieee80211_channel *chan, + uint16_t modesIndex, uint16_t *rfXpdGain) +{ + return AH_TRUE; /* nothing to do */ +} + +/* + * Read the transmit power levels from the structures taken from EEPROM + * Interpolate read transmit power values for this channel + * Organize the transmit power values into a table for writing into the hardware + */ + +static HAL_BOOL +ar9287SetPowerTable(struct ath_hal *ah, int16_t *pPowerMin, int16_t *pPowerMax, + const struct ieee80211_channel *chan, uint16_t *rfXpdGain) +{ + return AH_TRUE; +} + +#if 0 +static int16_t +ar9287GetMinPower(struct ath_hal *ah, EXPN_DATA_PER_CHANNEL_5112 *data) +{ + int i, minIndex; + int16_t minGain,minPwr,minPcdac,retVal; + + /* Assume NUM_POINTS_XPD0 > 0 */ + minGain = data->pDataPerXPD[0].xpd_gain; + for (minIndex=0,i=1; i<NUM_XPD_PER_CHANNEL; i++) { + if (data->pDataPerXPD[i].xpd_gain < minGain) { + minIndex = i; + minGain = data->pDataPerXPD[i].xpd_gain; + } + } + minPwr = data->pDataPerXPD[minIndex].pwr_t4[0]; + minPcdac = data->pDataPerXPD[minIndex].pcdac[0]; + for (i=1; i<NUM_POINTS_XPD0; i++) { + if (data->pDataPerXPD[minIndex].pwr_t4[i] < minPwr) { + minPwr = data->pDataPerXPD[minIndex].pwr_t4[i]; + minPcdac = data->pDataPerXPD[minIndex].pcdac[i]; + } + } + retVal = minPwr - (minPcdac*2); + return(retVal); +} +#endif + +static HAL_BOOL +ar9287GetChannelMaxMinPower(struct ath_hal *ah, + const struct ieee80211_channel *chan, + int16_t *maxPow, int16_t *minPow) +{ +#if 0 + struct ath_hal_5212 *ahp = AH5212(ah); + int numChannels=0,i,last; + int totalD, totalF,totalMin; + EXPN_DATA_PER_CHANNEL_5112 *data=AH_NULL; + EEPROM_POWER_EXPN_5112 *powerArray=AH_NULL; + + *maxPow = 0; + if (IS_CHAN_A(chan)) { + powerArray = ahp->ah_modePowerArray5112; + data = powerArray[headerInfo11A].pDataPerChannel; + numChannels = powerArray[headerInfo11A].numChannels; + } else if (IS_CHAN_G(chan) || IS_CHAN_108G(chan)) { + /* XXX - is this correct? Should we also use the same power for turbo G? */ + powerArray = ahp->ah_modePowerArray5112; + data = powerArray[headerInfo11G].pDataPerChannel; + numChannels = powerArray[headerInfo11G].numChannels; + } else if (IS_CHAN_B(chan)) { + powerArray = ahp->ah_modePowerArray5112; + data = powerArray[headerInfo11B].pDataPerChannel; + numChannels = powerArray[headerInfo11B].numChannels; + } else { + return (AH_TRUE); + } + /* Make sure the channel is in the range of the TP values + * (freq piers) + */ + if ((numChannels < 1) || + (chan->channel < data[0].channelValue) || + (chan->channel > data[numChannels-1].channelValue)) + return(AH_FALSE); + + /* Linearly interpolate the power value now */ + for (last=0,i=0; + (i<numChannels) && (chan->channel > data[i].channelValue); + last=i++); + totalD = data[i].channelValue - data[last].channelValue; + if (totalD > 0) { + totalF = data[i].maxPower_t4 - data[last].maxPower_t4; + *maxPow = (int8_t) ((totalF*(chan->channel-data[last].channelValue) + data[last].maxPower_t4*totalD)/totalD); + + totalMin = ar9287GetMinPower(ah,&data[i]) - ar9287GetMinPower(ah, &data[last]); + *minPow = (int8_t) ((totalMin*(chan->channel-data[last].channelValue) + ar9287GetMinPower(ah, &data[last])*totalD)/totalD); + return (AH_TRUE); + } else { + if (chan->channel == data[i].channelValue) { + *maxPow = data[i].maxPower_t4; + *minPow = ar9287GetMinPower(ah, &data[i]); + return(AH_TRUE); + } else + return(AH_FALSE); + } +#else + *maxPow = *minPow = 0; + return AH_FALSE; +#endif +} + +/* + * The ordering of nfarray is thus: + * + * nfarray[0]: Chain 0 ctl + * nfarray[1]: Chain 1 ctl + * nfarray[2]: Chain 2 ctl + * nfarray[3]: Chain 0 ext + * nfarray[4]: Chain 1 ext + * nfarray[5]: Chain 2 ext + */ +static void +ar9287GetNoiseFloor(struct ath_hal *ah, int16_t nfarray[]) +{ + int16_t nf; + + nf = MS(OS_REG_READ(ah, AR_PHY_CCA), AR9280_PHY_MINCCA_PWR); + if (nf & 0x100) + nf = 0 - ((nf ^ 0x1ff) + 1); + HALDEBUG(ah, HAL_DEBUG_NFCAL, + "NF calibrated [ctl] [chain 0] is %d\n", nf); + nfarray[0] = nf; + + nf = MS(OS_REG_READ(ah, AR_PHY_CH1_CCA), AR9280_PHY_CH1_MINCCA_PWR); + if (nf & 0x100) + nf = 0 - ((nf ^ 0x1ff) + 1); + HALDEBUG(ah, HAL_DEBUG_NFCAL, + "NF calibrated [ctl] [chain 1] is %d\n", nf); + nfarray[1] = nf; + + nf = MS(OS_REG_READ(ah, AR_PHY_EXT_CCA), AR9280_PHY_EXT_MINCCA_PWR); + if (nf & 0x100) + nf = 0 - ((nf ^ 0x1ff) + 1); + HALDEBUG(ah, HAL_DEBUG_NFCAL, + "NF calibrated [ext] [chain 0] is %d\n", nf); + nfarray[3] = nf; + + nf = MS(OS_REG_READ(ah, AR_PHY_CH1_EXT_CCA), AR9280_PHY_CH1_EXT_MINCCA_PWR); + if (nf & 0x100) + nf = 0 - ((nf ^ 0x1ff) + 1); + HALDEBUG(ah, HAL_DEBUG_NFCAL, + "NF calibrated [ext] [chain 1] is %d\n", nf); + nfarray[4] = nf; + + /* Chain 2 - invalid */ + nfarray[2] = 0; + nfarray[5] = 0; + +} + +/* + * Adjust NF based on statistical values for 5GHz frequencies. + * Stubbed:Not used by Fowl + */ +int16_t +ar9287GetNfAdjust(struct ath_hal *ah, const HAL_CHANNEL_INTERNAL *c) +{ + return 0; +} + +/* + * Free memory for analog bank scratch buffers + */ +static void +ar9287RfDetach(struct ath_hal *ah) +{ + struct ath_hal_5212 *ahp = AH5212(ah); + + HALASSERT(ahp->ah_rfHal != AH_NULL); + ath_hal_free(ahp->ah_rfHal); + ahp->ah_rfHal = AH_NULL; +} + +HAL_BOOL +ar9287RfAttach(struct ath_hal *ah, HAL_STATUS *status) +{ + struct ath_hal_5212 *ahp = AH5212(ah); + struct ar9287State *priv; + + HALDEBUG(ah, HAL_DEBUG_ATTACH, "%s: attach AR9280 radio\n", __func__); + + HALASSERT(ahp->ah_rfHal == AH_NULL); + priv = ath_hal_malloc(sizeof(struct ar9287State)); + if (priv == AH_NULL) { + HALDEBUG(ah, HAL_DEBUG_ANY, + "%s: cannot allocate private state\n", __func__); + *status = HAL_ENOMEM; /* XXX */ + return AH_FALSE; + } + priv->base.rfDetach = ar9287RfDetach; + priv->base.writeRegs = ar9287WriteRegs; + priv->base.getRfBank = ar9287GetRfBank; + priv->base.setChannel = ar9287SetChannel; + priv->base.setRfRegs = ar9287SetRfRegs; + priv->base.setPowerTable = ar9287SetPowerTable; + priv->base.getChannelMaxMinPower = ar9287GetChannelMaxMinPower; + priv->base.getNfAdjust = ar9287GetNfAdjust; + + ahp->ah_pcdacTable = priv->pcdacTable; + ahp->ah_pcdacTableSize = sizeof(priv->pcdacTable); + ahp->ah_rfHal = &priv->base; + /* + * Set noise floor adjust method; we arrange a + * direct call instead of thunking. + */ + AH_PRIVATE(ah)->ah_getNfAdjust = priv->base.getNfAdjust; + AH_PRIVATE(ah)->ah_getNoiseFloor = ar9287GetNoiseFloor; + + return AH_TRUE; +} diff --git a/sys/dev/ath/ath_hal/ar9002/ar9287.h b/sys/dev/ath/ath_hal/ar9002/ar9287.h new file mode 100644 index 0000000..90d25ed --- /dev/null +++ b/sys/dev/ath/ath_hal/ar9002/ar9287.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2010 Atheros Communications, Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * $FreeBSD$ + */ + +#ifndef _ATH_AR9287_H_ +#define _ATH_AR9287_H_ + +#include "ar5416/ar5416.h" + +/* + * This is a chip thing, but it's used here as part of the + * ath_hal_9287 struct; so it's convienent to locate the + * define here. + */ +#define AR9287_TX_GAIN_TABLE_SIZE 22 + +struct ath_hal_9287 { + struct ath_hal_5416 ah_5416; + + HAL_INI_ARRAY ah_ini_xmodes; + HAL_INI_ARRAY ah_ini_rxgain; + HAL_INI_ARRAY ah_ini_txgain; + + HAL_INI_ARRAY ah_ini_cckFirNormal; + HAL_INI_ARRAY ah_ini_cckFirJapan2484; + + int PDADCdelta; + + uint32_t originalGain[AR9287_TX_GAIN_TABLE_SIZE]; +}; +#define AH9287(_ah) ((struct ath_hal_9287 *)(_ah)) + +#define AR9287_DEFAULT_RXCHAINMASK 3 +#define AR9285_DEFAULT_RXCHAINMASK 1 +#define AR9287_DEFAULT_TXCHAINMASK 3 +#define AR9285_DEFAULT_TXCHAINMASK 1 + +#define AR_PHY_CCA_NOM_VAL_9287_2GHZ -112 +#define AR_PHY_CCA_NOM_VAL_9287_5GHZ -112 +#define AR_PHY_CCA_MIN_GOOD_VAL_9287_2GHZ -127 +#define AR_PHY_CCA_MIN_GOOD_VAL_9287_5GHZ -122 +#define AR_PHY_CCA_MAX_GOOD_VAL_9287_2GHZ -97 +#define AR_PHY_CCA_MAX_GOOD_VAL_9287_5GHZ -102 + +extern HAL_BOOL ar9287RfAttach(struct ath_hal *, HAL_STATUS *); +extern HAL_BOOL ar9287SetAntennaSwitch(struct ath_hal *, HAL_ANT_SETTING); + +#endif /* _ATH_AR9287_H_ */ diff --git a/sys/dev/ath/ath_hal/ar9002/ar9287.ini b/sys/dev/ath/ath_hal/ar9002/ar9287.ini new file mode 100644 index 0000000..7f4ca05 --- /dev/null +++ b/sys/dev/ath/ath_hal/ar9002/ar9287.ini @@ -0,0 +1,783 @@ +/* + * Copyright (c) 2010 Atheros Communications Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * $FreeBSD$ + */ + +static const uint32_t ar9287Modes_9287_1_1[][6] = { + {0x00001030, 0x00000000, 0x00000000, 0x000002c0, 0x00000160, 0x000001e0}, + {0x00001070, 0x00000000, 0x00000000, 0x00000318, 0x0000018c, 0x000001e0}, + {0x000010b0, 0x00000000, 0x00000000, 0x00007c70, 0x00003e38, 0x00001180}, + {0x000010f0, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000008}, + {0x00008014, 0x00000000, 0x00000000, 0x10801600, 0x08400b00, 0x06e006e0}, + {0x0000801c, 0x00000000, 0x00000000, 0x12e00057, 0x12e0002b, 0x0988004f}, + {0x00008120, 0x08f04800, 0x08f04800, 0x08f04810, 0x08f04810, 0x08f04810}, + {0x000081d0, 0x00003200, 0x00003200, 0x0000320a, 0x0000320a, 0x0000320a}, + {0x00008318, 0x00000000, 0x00000000, 0x00006880, 0x00003440, 0x00006880}, + {0x00009804, 0x00000000, 0x00000000, 0x000003c4, 0x00000300, 0x00000303}, + {0x00009820, 0x00000000, 0x00000000, 0x02020200, 0x02020200, 0x02020200}, + {0x00009824, 0x00000000, 0x00000000, 0x01000e0e, 0x01000e0e, 0x01000e0e}, + {0x00009828, 0x00000000, 0x00000000, 0x3a020001, 0x3a020001, 0x3a020001}, + {0x00009834, 0x00000000, 0x00000000, 0x00000e0e, 0x00000e0e, 0x00000e0e}, + {0x00009838, 0x00000003, 0x00000003, 0x00000007, 0x00000007, 0x00000007}, + {0x00009840, 0x206a002e, 0x206a002e, 0x206a012e, 0x206a012e, 0x206a012e}, + {0x00009844, 0x03720000, 0x03720000, 0x037216a0, 0x037216a0, 0x037216a0}, + {0x00009850, 0x60000000, 0x60000000, 0x6d4000e2, 0x6c4000e2, 0x6c4000e2}, + {0x00009858, 0x7c000d00, 0x7c000d00, 0x7ec84d2e, 0x7ec84d2e, 0x7ec84d2e}, + {0x0000985c, 0x3100005e, 0x3100005e, 0x3139605e, 0x31395d5e, 0x31395d5e}, + {0x00009860, 0x00058d00, 0x00058d00, 0x00058d20, 0x00058d20, 0x00058d18}, + {0x00009864, 0x00000e00, 0x00000e00, 0x0001ce00, 0x0001ce00, 0x0001ce00}, + {0x00009868, 0x000040c0, 0x000040c0, 0x5ac640d0, 0x5ac640d0, 0x5ac640d0}, + {0x0000986c, 0x00000080, 0x00000080, 0x06903881, 0x06903881, 0x06903881}, + {0x00009914, 0x00000000, 0x00000000, 0x00001130, 0x00000898, 0x000007d0}, + {0x00009918, 0x00000000, 0x00000000, 0x00000016, 0x0000000b, 0x00000016}, + {0x00009924, 0xd00a8a01, 0xd00a8a01, 0xd00a8a0d, 0xd00a8a0d, 0xd00a8a0d}, + {0x00009944, 0xefbc0000, 0xefbc0000, 0xefbc1010, 0xefbc1010, 0xefbc1010}, + {0x00009960, 0x00000000, 0x00000000, 0x00000010, 0x00000010, 0x00000010}, + {0x0000a960, 0x00000000, 0x00000000, 0x00000010, 0x00000010, 0x00000010}, + {0x00009964, 0x00000000, 0x00000000, 0x00000210, 0x00000210, 0x00000210}, + {0x0000c968, 0x00000200, 0x00000200, 0x000003ce, 0x000003ce, 0x000003ce}, + {0x000099b8, 0x00000000, 0x00000000, 0x0000001c, 0x0000001c, 0x0000001c}, + {0x000099bc, 0x00000000, 0x00000000, 0x00000c00, 0x00000c00, 0x00000c00}, + {0x000099c0, 0x00000000, 0x00000000, 0x05eea6d4, 0x05eea6d4, 0x05eea6d4}, + {0x0000a204, 0x00000440, 0x00000440, 0x00000444, 0x00000444, 0x00000444}, + {0x0000a20c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, + {0x0000b20c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, + {0x0000a21c, 0x1803800a, 0x1803800a, 0x1883800a, 0x1883800a, 0x1883800a}, + {0x0000a230, 0x00000000, 0x00000000, 0x00000210, 0x00000108, 0x00000000}, + {0x0000a250, 0x00000000, 0x00000000, 0x0004a000, 0x0004a000, 0x0004a000}, + {0x0000a358, 0x7999aa02, 0x7999aa02, 0x7999aa0e, 0x7999aa0e, 0x7999aa0e}, + {0x0000a3d8, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, +}; + +static const uint32_t ar9287Common_9287_1_1[][2] = { + /* Addr allmodes */ + {0x0000000c, 0x00000000}, + {0x00000030, 0x00020015}, + {0x00000034, 0x00000005}, + {0x00000040, 0x00000000}, + {0x00000044, 0x00000008}, + {0x00000048, 0x00000008}, + {0x0000004c, 0x00000010}, + {0x00000050, 0x00000000}, + {0x00000054, 0x0000001f}, + {0x00000800, 0x00000000}, + {0x00000804, 0x00000000}, + {0x00000808, 0x00000000}, + {0x0000080c, 0x00000000}, + {0x00000810, 0x00000000}, + {0x00000814, 0x00000000}, + {0x00000818, 0x00000000}, + {0x0000081c, 0x00000000}, + {0x00000820, 0x00000000}, + {0x00000824, 0x00000000}, + {0x00001040, 0x002ffc0f}, + {0x00001044, 0x002ffc0f}, + {0x00001048, 0x002ffc0f}, + {0x0000104c, 0x002ffc0f}, + {0x00001050, 0x002ffc0f}, + {0x00001054, 0x002ffc0f}, + {0x00001058, 0x002ffc0f}, + {0x0000105c, 0x002ffc0f}, + {0x00001060, 0x002ffc0f}, + {0x00001064, 0x002ffc0f}, + {0x00001230, 0x00000000}, + {0x00001270, 0x00000000}, + {0x00001038, 0x00000000}, + {0x00001078, 0x00000000}, + {0x000010b8, 0x00000000}, + {0x000010f8, 0x00000000}, + {0x00001138, 0x00000000}, + {0x00001178, 0x00000000}, + {0x000011b8, 0x00000000}, + {0x000011f8, 0x00000000}, + {0x00001238, 0x00000000}, + {0x00001278, 0x00000000}, + {0x000012b8, 0x00000000}, + {0x000012f8, 0x00000000}, + {0x00001338, 0x00000000}, + {0x00001378, 0x00000000}, + {0x000013b8, 0x00000000}, + {0x000013f8, 0x00000000}, + {0x00001438, 0x00000000}, + {0x00001478, 0x00000000}, + {0x000014b8, 0x00000000}, + {0x000014f8, 0x00000000}, + {0x00001538, 0x00000000}, + {0x00001578, 0x00000000}, + {0x000015b8, 0x00000000}, + {0x000015f8, 0x00000000}, + {0x00001638, 0x00000000}, + {0x00001678, 0x00000000}, + {0x000016b8, 0x00000000}, + {0x000016f8, 0x00000000}, + {0x00001738, 0x00000000}, + {0x00001778, 0x00000000}, + {0x000017b8, 0x00000000}, + {0x000017f8, 0x00000000}, + {0x0000103c, 0x00000000}, + {0x0000107c, 0x00000000}, + {0x000010bc, 0x00000000}, + {0x000010fc, 0x00000000}, + {0x0000113c, 0x00000000}, + {0x0000117c, 0x00000000}, + {0x000011bc, 0x00000000}, + {0x000011fc, 0x00000000}, + {0x0000123c, 0x00000000}, + {0x0000127c, 0x00000000}, + {0x000012bc, 0x00000000}, + {0x000012fc, 0x00000000}, + {0x0000133c, 0x00000000}, + {0x0000137c, 0x00000000}, + {0x000013bc, 0x00000000}, + {0x000013fc, 0x00000000}, + {0x0000143c, 0x00000000}, + {0x0000147c, 0x00000000}, + {0x00004030, 0x00000002}, + {0x0000403c, 0x00000002}, + {0x00004024, 0x0000001f}, + {0x00004060, 0x00000000}, + {0x00004064, 0x00000000}, + {0x00007010, 0x00000033}, + {0x00007020, 0x00000000}, + {0x00007034, 0x00000002}, + {0x00007038, 0x000004c2}, + {0x00008004, 0x00000000}, + {0x00008008, 0x00000000}, + {0x0000800c, 0x00000000}, + {0x00008018, 0x00000700}, + {0x00008020, 0x00000000}, + {0x00008038, 0x00000000}, + {0x0000803c, 0x00000000}, + {0x00008048, 0x40000000}, + {0x00008054, 0x00000000}, + {0x00008058, 0x00000000}, + {0x0000805c, 0x000fc78f}, + {0x00008060, 0x0000000f}, + {0x00008064, 0x00000000}, + {0x00008070, 0x00000000}, + {0x000080c0, 0x2a80001a}, + {0x000080c4, 0x05dc01e0}, + {0x000080c8, 0x1f402710}, + {0x000080cc, 0x01f40000}, + {0x000080d0, 0x00001e00}, + {0x000080d4, 0x00000000}, + {0x000080d8, 0x00400000}, + {0x000080e0, 0xffffffff}, + {0x000080e4, 0x0000ffff}, + {0x000080e8, 0x003f3f3f}, + {0x000080ec, 0x00000000}, + {0x000080f0, 0x00000000}, + {0x000080f4, 0x00000000}, + {0x000080f8, 0x00000000}, + {0x000080fc, 0x00020000}, + {0x00008100, 0x00020000}, + {0x00008104, 0x00000001}, + {0x00008108, 0x00000052}, + {0x0000810c, 0x00000000}, + {0x00008110, 0x00000168}, + {0x00008118, 0x000100aa}, + {0x0000811c, 0x00003210}, + {0x00008124, 0x00000000}, + {0x00008128, 0x00000000}, + {0x0000812c, 0x00000000}, + {0x00008130, 0x00000000}, + {0x00008134, 0x00000000}, + {0x00008138, 0x00000000}, + {0x0000813c, 0x00000000}, + {0x00008144, 0xffffffff}, + {0x00008168, 0x00000000}, + {0x0000816c, 0x00000000}, + {0x00008170, 0x18487320}, + {0x00008174, 0xfaa4fa50}, + {0x00008178, 0x00000100}, + {0x0000817c, 0x00000000}, + {0x000081c0, 0x00000000}, + {0x000081c4, 0x00000000}, + {0x000081d4, 0x00000000}, + {0x000081ec, 0x00000000}, + {0x000081f0, 0x00000000}, + {0x000081f4, 0x00000000}, + {0x000081f8, 0x00000000}, + {0x000081fc, 0x00000000}, + {0x00008200, 0x00000000}, + {0x00008204, 0x00000000}, + {0x00008208, 0x00000000}, + {0x0000820c, 0x00000000}, + {0x00008210, 0x00000000}, + {0x00008214, 0x00000000}, + {0x00008218, 0x00000000}, + {0x0000821c, 0x00000000}, + {0x00008220, 0x00000000}, + {0x00008224, 0x00000000}, + {0x00008228, 0x00000000}, + {0x0000822c, 0x00000000}, + {0x00008230, 0x00000000}, + {0x00008234, 0x00000000}, + {0x00008238, 0x00000000}, + {0x0000823c, 0x00000000}, + {0x00008240, 0x00100000}, + {0x00008244, 0x0010f400}, + {0x00008248, 0x00000100}, + {0x0000824c, 0x0001e800}, + {0x00008250, 0x00000000}, + {0x00008254, 0x00000000}, + {0x00008258, 0x00000000}, + {0x0000825c, 0x400000ff}, + {0x00008260, 0x00080922}, + {0x00008264, 0x88a00010}, + {0x00008270, 0x00000000}, + {0x00008274, 0x40000000}, + {0x00008278, 0x003e4180}, + {0x0000827c, 0x00000000}, + {0x00008284, 0x0000002c}, + {0x00008288, 0x0000002c}, + {0x0000828c, 0x000000ff}, + {0x00008294, 0x00000000}, + {0x00008298, 0x00000000}, + {0x0000829c, 0x00000000}, + {0x00008300, 0x00000040}, + {0x00008314, 0x00000000}, + {0x00008328, 0x00000000}, + {0x0000832c, 0x00000007}, + {0x00008330, 0x00000302}, + {0x00008334, 0x00000e00}, + {0x00008338, 0x00ff0000}, + {0x0000833c, 0x00000000}, + {0x00008340, 0x000107ff}, + {0x00008344, 0x01c81043}, + {0x00008360, 0xffffffff}, + {0x00008364, 0xffffffff}, + {0x00008368, 0x00000000}, + {0x00008370, 0x00000000}, + {0x00008374, 0x000000ff}, + {0x00008378, 0x00000000}, + {0x0000837c, 0x00000000}, + {0x00008380, 0xffffffff}, + {0x00008384, 0xffffffff}, + {0x00008390, 0x0fffffff}, + {0x00008394, 0x0fffffff}, + {0x00008398, 0x00000000}, + {0x0000839c, 0x00000000}, + {0x000083a0, 0x00000000}, + {0x00009808, 0x00000000}, + {0x0000980c, 0xafe68e30}, + {0x00009810, 0xfd14e000}, + {0x00009814, 0x9c0a9f6b}, + {0x0000981c, 0x00000000}, + {0x0000982c, 0x0000a000}, + {0x00009830, 0x00000000}, + {0x0000983c, 0x00200400}, + {0x0000984c, 0x0040233c}, + {0x0000a84c, 0x0040233c}, + {0x00009854, 0x00000044}, + {0x00009900, 0x00000000}, + {0x00009904, 0x00000000}, + {0x00009908, 0x00000000}, + {0x0000990c, 0x00000000}, + {0x00009910, 0x10002310}, + {0x0000991c, 0x10000fff}, + {0x00009920, 0x04900000}, + {0x0000a920, 0x04900000}, + {0x00009928, 0x00000001}, + {0x0000992c, 0x00000004}, + {0x00009930, 0x00000000}, + {0x0000a930, 0x00000000}, + {0x00009934, 0x1e1f2022}, + {0x00009938, 0x0a0b0c0d}, + {0x0000993c, 0x00000000}, + {0x00009948, 0x9280c00a}, + {0x0000994c, 0x00020028}, + {0x00009954, 0x5f3ca3de}, + {0x00009958, 0x0108ecff}, + {0x00009940, 0x14750604}, + {0x0000c95c, 0x004b6a8e}, + {0x00009970, 0x990bb514}, + {0x00009974, 0x00000000}, + {0x00009978, 0x00000001}, + {0x0000997c, 0x00000000}, + {0x000099a0, 0x00000000}, + {0x000099a4, 0x00000001}, + {0x000099a8, 0x201fff00}, + {0x000099ac, 0x0c6f0000}, + {0x000099b0, 0x03051000}, + {0x000099b4, 0x00000820}, + {0x000099c4, 0x06336f77}, + {0x000099c8, 0x6af6532f}, + {0x000099cc, 0x08f186c8}, + {0x000099d0, 0x00046384}, + {0x000099dc, 0x00000000}, + {0x000099e0, 0x00000000}, + {0x000099e4, 0xaaaaaaaa}, + {0x000099e8, 0x3c466478}, + {0x000099ec, 0x0cc80caa}, + {0x000099f0, 0x00000000}, + {0x000099fc, 0x00001042}, + {0x0000a208, 0x803e4788}, + {0x0000a210, 0x4080a333}, + {0x0000a214, 0x40206c10}, + {0x0000a218, 0x009c4060}, + {0x0000a220, 0x01834061}, + {0x0000a224, 0x00000400}, + {0x0000a228, 0x000003b5}, + {0x0000a22c, 0x233f7180}, + {0x0000a234, 0x20202020}, + {0x0000a238, 0x20202020}, + {0x0000a23c, 0x13c889af}, + {0x0000a240, 0x38490a20}, + {0x0000a244, 0x00000000}, + {0x0000a248, 0xfffffffc}, + {0x0000a24c, 0x00000000}, + {0x0000a254, 0x00000000}, + {0x0000a258, 0x0cdbd380}, + {0x0000a25c, 0x0f0f0f01}, + {0x0000a260, 0xdfa91f01}, + {0x0000a264, 0x00418a11}, + {0x0000b264, 0x00418a11}, + {0x0000a268, 0x00000000}, + {0x0000a26c, 0x0e79e5c6}, + {0x0000b26c, 0x0e79e5c6}, + {0x0000d270, 0x00820820}, + {0x0000a278, 0x1ce739ce}, + {0x0000a27c, 0x050701ce}, + {0x0000d35c, 0x07ffffef}, + {0x0000d360, 0x0fffffe7}, + {0x0000d364, 0x17ffffe5}, + {0x0000d368, 0x1fffffe4}, + {0x0000d36c, 0x37ffffe3}, + {0x0000d370, 0x3fffffe3}, + {0x0000d374, 0x57ffffe3}, + {0x0000d378, 0x5fffffe2}, + {0x0000d37c, 0x7fffffe2}, + {0x0000d380, 0x7f3c7bba}, + {0x0000d384, 0xf3307ff0}, + {0x0000a388, 0x0c000000}, + {0x0000a38c, 0x20202020}, + {0x0000a390, 0x20202020}, + {0x0000a394, 0x1ce739ce}, + {0x0000a398, 0x000001ce}, + {0x0000b398, 0x000001ce}, + {0x0000a39c, 0x00000001}, + {0x0000a3c8, 0x00000246}, + {0x0000a3cc, 0x20202020}, + {0x0000a3d0, 0x20202020}, + {0x0000a3d4, 0x20202020}, + {0x0000a3dc, 0x1ce739ce}, + {0x0000a3e0, 0x000001ce}, + {0x0000a3e4, 0x00000000}, + {0x0000a3e8, 0x18c43433}, + {0x0000a3ec, 0x00f70081}, + {0x0000a3f0, 0x01036a1e}, + {0x0000a3f4, 0x00000000}, + {0x0000b3f4, 0x00000000}, + {0x0000a7d8, 0x000003f1}, + {0x00007800, 0x00000800}, + {0x00007804, 0x6c35ffd2}, + {0x00007808, 0x6db6c000}, + {0x0000780c, 0x6db6cb30}, + {0x00007810, 0x6db6cb6c}, + {0x00007814, 0x0501e200}, + {0x00007818, 0x0094128d}, + {0x0000781c, 0x976ee392}, + {0x00007820, 0xf75ff6fc}, + {0x00007824, 0x00040000}, + {0x00007828, 0xdb003012}, + {0x0000782c, 0x04924914}, + {0x00007830, 0x21084210}, + {0x00007834, 0x00140000}, + {0x00007838, 0x0e4548d8}, + {0x0000783c, 0x54214514}, + {0x00007840, 0x02025830}, + {0x00007844, 0x71c0d388}, + {0x00007848, 0x934934a8}, + {0x00007850, 0x00000000}, + {0x00007854, 0x00000800}, + {0x00007858, 0x6c35ffd2}, + {0x0000785c, 0x6db6c000}, + {0x00007860, 0x6db6cb30}, + {0x00007864, 0x6db6cb6c}, + {0x00007868, 0x0501e200}, + {0x0000786c, 0x0094128d}, + {0x00007870, 0x976ee392}, + {0x00007874, 0xf75ff6fc}, + {0x00007878, 0x00040000}, + {0x0000787c, 0xdb003012}, + {0x00007880, 0x04924914}, + {0x00007884, 0x21084210}, + {0x00007888, 0x001b6db0}, + {0x0000788c, 0x00376b63}, + {0x00007890, 0x06db6db6}, + {0x00007894, 0x006d8000}, + {0x00007898, 0x48100000}, + {0x0000789c, 0x00000000}, + {0x000078a0, 0x08000000}, + {0x000078a4, 0x0007ffd8}, + {0x000078a8, 0x0007ffd8}, + {0x000078ac, 0x001c0020}, + {0x000078b0, 0x00060aeb}, + {0x000078b4, 0x40008080}, + {0x000078b8, 0x2a850160}, +}; + +static const uint32_t ar9287Common_normal_cck_fir_coeff_9287_1_1[][2] = { + /* Addr allmodes */ + {0x0000a1f4, 0x00fffeff}, + {0x0000a1f8, 0x00f5f9ff}, + {0x0000a1fc, 0xb79f6427}, +}; + +static const uint32_t ar9287Common_japan_2484_cck_fir_coeff_9287_1_1[][2] = { + /* Addr allmodes */ + {0x0000a1f4, 0x00000000}, + {0x0000a1f8, 0xefff0301}, + {0x0000a1fc, 0xca9228ee}, +}; + +static const uint32_t ar9287Modes_tx_gain_9287_1_1[][6] = { + {0x0000a300, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, + {0x0000a304, 0x00000000, 0x00000000, 0x00004002, 0x00004002, 0x00004002}, + {0x0000a308, 0x00000000, 0x00000000, 0x00008004, 0x00008004, 0x00008004}, + {0x0000a30c, 0x00000000, 0x00000000, 0x0000c00a, 0x0000c00a, 0x0000c00a}, + {0x0000a310, 0x00000000, 0x00000000, 0x0001000c, 0x0001000c, 0x0001000c}, + {0x0000a314, 0x00000000, 0x00000000, 0x0001420b, 0x0001420b, 0x0001420b}, + {0x0000a318, 0x00000000, 0x00000000, 0x0001824a, 0x0001824a, 0x0001824a}, + {0x0000a31c, 0x00000000, 0x00000000, 0x0001c44a, 0x0001c44a, 0x0001c44a}, + {0x0000a320, 0x00000000, 0x00000000, 0x0002064a, 0x0002064a, 0x0002064a}, + {0x0000a324, 0x00000000, 0x00000000, 0x0002484a, 0x0002484a, 0x0002484a}, + {0x0000a328, 0x00000000, 0x00000000, 0x00028a4a, 0x00028a4a, 0x00028a4a}, + {0x0000a32c, 0x00000000, 0x00000000, 0x0002cc4a, 0x0002cc4a, 0x0002cc4a}, + {0x0000a330, 0x00000000, 0x00000000, 0x00030e4a, 0x00030e4a, 0x00030e4a}, + {0x0000a334, 0x00000000, 0x00000000, 0x00034e8a, 0x00034e8a, 0x00034e8a}, + {0x0000a338, 0x00000000, 0x00000000, 0x00038e8c, 0x00038e8c, 0x00038e8c}, + {0x0000a33c, 0x00000000, 0x00000000, 0x0003cecc, 0x0003cecc, 0x0003cecc}, + {0x0000a340, 0x00000000, 0x00000000, 0x00040ed4, 0x00040ed4, 0x00040ed4}, + {0x0000a344, 0x00000000, 0x00000000, 0x00044edc, 0x00044edc, 0x00044edc}, + {0x0000a348, 0x00000000, 0x00000000, 0x00048ede, 0x00048ede, 0x00048ede}, + {0x0000a34c, 0x00000000, 0x00000000, 0x0004cf1e, 0x0004cf1e, 0x0004cf1e}, + {0x0000a350, 0x00000000, 0x00000000, 0x00050f5e, 0x00050f5e, 0x00050f5e}, + {0x0000a354, 0x00000000, 0x00000000, 0x00054f9e, 0x00054f9e, 0x00054f9e}, + {0x0000a780, 0x00000000, 0x00000000, 0x00000062, 0x00000062, 0x00000062}, + {0x0000a784, 0x00000000, 0x00000000, 0x00004064, 0x00004064, 0x00004064}, + {0x0000a788, 0x00000000, 0x00000000, 0x000080a4, 0x000080a4, 0x000080a4}, + {0x0000a78c, 0x00000000, 0x00000000, 0x0000c0aa, 0x0000c0aa, 0x0000c0aa}, + {0x0000a790, 0x00000000, 0x00000000, 0x000100ac, 0x000100ac, 0x000100ac}, + {0x0000a794, 0x00000000, 0x00000000, 0x000140b4, 0x000140b4, 0x000140b4}, + {0x0000a798, 0x00000000, 0x00000000, 0x000180f4, 0x000180f4, 0x000180f4}, + {0x0000a79c, 0x00000000, 0x00000000, 0x0001c134, 0x0001c134, 0x0001c134}, + {0x0000a7a0, 0x00000000, 0x00000000, 0x00020174, 0x00020174, 0x00020174}, + {0x0000a7a4, 0x00000000, 0x00000000, 0x0002417c, 0x0002417c, 0x0002417c}, + {0x0000a7a8, 0x00000000, 0x00000000, 0x0002817e, 0x0002817e, 0x0002817e}, + {0x0000a7ac, 0x00000000, 0x00000000, 0x0002c1be, 0x0002c1be, 0x0002c1be}, + {0x0000a7b0, 0x00000000, 0x00000000, 0x000301fe, 0x000301fe, 0x000301fe}, + {0x0000a7b4, 0x00000000, 0x00000000, 0x000301fe, 0x000301fe, 0x000301fe}, + {0x0000a7b8, 0x00000000, 0x00000000, 0x000301fe, 0x000301fe, 0x000301fe}, + {0x0000a7bc, 0x00000000, 0x00000000, 0x000301fe, 0x000301fe, 0x000301fe}, + {0x0000a7c0, 0x00000000, 0x00000000, 0x000301fe, 0x000301fe, 0x000301fe}, + {0x0000a7c4, 0x00000000, 0x00000000, 0x000301fe, 0x000301fe, 0x000301fe}, + {0x0000a7c8, 0x00000000, 0x00000000, 0x000301fe, 0x000301fe, 0x000301fe}, + {0x0000a7cc, 0x00000000, 0x00000000, 0x000301fe, 0x000301fe, 0x000301fe}, + {0x0000a7d0, 0x00000000, 0x00000000, 0x000301fe, 0x000301fe, 0x000301fe}, + {0x0000a7d4, 0x00000000, 0x00000000, 0x000301fe, 0x000301fe, 0x000301fe}, + {0x0000a274, 0x0a180000, 0x0a180000, 0x0a1aa000, 0x0a1aa000, 0x0a1aa000}, +}; + +static const uint32_t ar9287Modes_rx_gain_9287_1_1[][6] = { + {0x00009a00, 0x00000000, 0x00000000, 0x0000a120, 0x0000a120, 0x0000a120}, + {0x00009a04, 0x00000000, 0x00000000, 0x0000a124, 0x0000a124, 0x0000a124}, + {0x00009a08, 0x00000000, 0x00000000, 0x0000a128, 0x0000a128, 0x0000a128}, + {0x00009a0c, 0x00000000, 0x00000000, 0x0000a12c, 0x0000a12c, 0x0000a12c}, + {0x00009a10, 0x00000000, 0x00000000, 0x0000a130, 0x0000a130, 0x0000a130}, + {0x00009a14, 0x00000000, 0x00000000, 0x0000a194, 0x0000a194, 0x0000a194}, + {0x00009a18, 0x00000000, 0x00000000, 0x0000a198, 0x0000a198, 0x0000a198}, + {0x00009a1c, 0x00000000, 0x00000000, 0x0000a20c, 0x0000a20c, 0x0000a20c}, + {0x00009a20, 0x00000000, 0x00000000, 0x0000a210, 0x0000a210, 0x0000a210}, + {0x00009a24, 0x00000000, 0x00000000, 0x0000a284, 0x0000a284, 0x0000a284}, + {0x00009a28, 0x00000000, 0x00000000, 0x0000a288, 0x0000a288, 0x0000a288}, + {0x00009a2c, 0x00000000, 0x00000000, 0x0000a28c, 0x0000a28c, 0x0000a28c}, + {0x00009a30, 0x00000000, 0x00000000, 0x0000a290, 0x0000a290, 0x0000a290}, + {0x00009a34, 0x00000000, 0x00000000, 0x0000a294, 0x0000a294, 0x0000a294}, + {0x00009a38, 0x00000000, 0x00000000, 0x0000a2a0, 0x0000a2a0, 0x0000a2a0}, + {0x00009a3c, 0x00000000, 0x00000000, 0x0000a2a4, 0x0000a2a4, 0x0000a2a4}, + {0x00009a40, 0x00000000, 0x00000000, 0x0000a2a8, 0x0000a2a8, 0x0000a2a8}, + {0x00009a44, 0x00000000, 0x00000000, 0x0000a2ac, 0x0000a2ac, 0x0000a2ac}, + {0x00009a48, 0x00000000, 0x00000000, 0x0000a2b0, 0x0000a2b0, 0x0000a2b0}, + {0x00009a4c, 0x00000000, 0x00000000, 0x0000a2b4, 0x0000a2b4, 0x0000a2b4}, + {0x00009a50, 0x00000000, 0x00000000, 0x0000a2b8, 0x0000a2b8, 0x0000a2b8}, + {0x00009a54, 0x00000000, 0x00000000, 0x0000a2c4, 0x0000a2c4, 0x0000a2c4}, + {0x00009a58, 0x00000000, 0x00000000, 0x0000a708, 0x0000a708, 0x0000a708}, + {0x00009a5c, 0x00000000, 0x00000000, 0x0000a70c, 0x0000a70c, 0x0000a70c}, + {0x00009a60, 0x00000000, 0x00000000, 0x0000a710, 0x0000a710, 0x0000a710}, + {0x00009a64, 0x00000000, 0x00000000, 0x0000ab04, 0x0000ab04, 0x0000ab04}, + {0x00009a68, 0x00000000, 0x00000000, 0x0000ab08, 0x0000ab08, 0x0000ab08}, + {0x00009a6c, 0x00000000, 0x00000000, 0x0000ab0c, 0x0000ab0c, 0x0000ab0c}, + {0x00009a70, 0x00000000, 0x00000000, 0x0000ab10, 0x0000ab10, 0x0000ab10}, + {0x00009a74, 0x00000000, 0x00000000, 0x0000ab14, 0x0000ab14, 0x0000ab14}, + {0x00009a78, 0x00000000, 0x00000000, 0x0000ab18, 0x0000ab18, 0x0000ab18}, + {0x00009a7c, 0x00000000, 0x00000000, 0x0000ab8c, 0x0000ab8c, 0x0000ab8c}, + {0x00009a80, 0x00000000, 0x00000000, 0x0000ab90, 0x0000ab90, 0x0000ab90}, + {0x00009a84, 0x00000000, 0x00000000, 0x0000ab94, 0x0000ab94, 0x0000ab94}, + {0x00009a88, 0x00000000, 0x00000000, 0x0000ab98, 0x0000ab98, 0x0000ab98}, + {0x00009a8c, 0x00000000, 0x00000000, 0x0000aba4, 0x0000aba4, 0x0000aba4}, + {0x00009a90, 0x00000000, 0x00000000, 0x0000aba8, 0x0000aba8, 0x0000aba8}, + {0x00009a94, 0x00000000, 0x00000000, 0x0000cb04, 0x0000cb04, 0x0000cb04}, + {0x00009a98, 0x00000000, 0x00000000, 0x0000cb08, 0x0000cb08, 0x0000cb08}, + {0x00009a9c, 0x00000000, 0x00000000, 0x0000cb0c, 0x0000cb0c, 0x0000cb0c}, + {0x00009aa0, 0x00000000, 0x00000000, 0x0000cb10, 0x0000cb10, 0x0000cb10}, + {0x00009aa4, 0x00000000, 0x00000000, 0x0000cb14, 0x0000cb14, 0x0000cb14}, + {0x00009aa8, 0x00000000, 0x00000000, 0x0000cb18, 0x0000cb18, 0x0000cb18}, + {0x00009aac, 0x00000000, 0x00000000, 0x0000cb8c, 0x0000cb8c, 0x0000cb8c}, + {0x00009ab0, 0x00000000, 0x00000000, 0x0000cb90, 0x0000cb90, 0x0000cb90}, + {0x00009ab4, 0x00000000, 0x00000000, 0x0000cf18, 0x0000cf18, 0x0000cf18}, + {0x00009ab8, 0x00000000, 0x00000000, 0x0000cf24, 0x0000cf24, 0x0000cf24}, + {0x00009abc, 0x00000000, 0x00000000, 0x0000cf28, 0x0000cf28, 0x0000cf28}, + {0x00009ac0, 0x00000000, 0x00000000, 0x0000d314, 0x0000d314, 0x0000d314}, + {0x00009ac4, 0x00000000, 0x00000000, 0x0000d318, 0x0000d318, 0x0000d318}, + {0x00009ac8, 0x00000000, 0x00000000, 0x0000d38c, 0x0000d38c, 0x0000d38c}, + {0x00009acc, 0x00000000, 0x00000000, 0x0000d390, 0x0000d390, 0x0000d390}, + {0x00009ad0, 0x00000000, 0x00000000, 0x0000d394, 0x0000d394, 0x0000d394}, + {0x00009ad4, 0x00000000, 0x00000000, 0x0000d398, 0x0000d398, 0x0000d398}, + {0x00009ad8, 0x00000000, 0x00000000, 0x0000d3a4, 0x0000d3a4, 0x0000d3a4}, + {0x00009adc, 0x00000000, 0x00000000, 0x0000d3a8, 0x0000d3a8, 0x0000d3a8}, + {0x00009ae0, 0x00000000, 0x00000000, 0x0000d3ac, 0x0000d3ac, 0x0000d3ac}, + {0x00009ae4, 0x00000000, 0x00000000, 0x0000d3b0, 0x0000d3b0, 0x0000d3b0}, + {0x00009ae8, 0x00000000, 0x00000000, 0x0000f380, 0x0000f380, 0x0000f380}, + {0x00009aec, 0x00000000, 0x00000000, 0x0000f384, 0x0000f384, 0x0000f384}, + {0x00009af0, 0x00000000, 0x00000000, 0x0000f388, 0x0000f388, 0x0000f388}, + {0x00009af4, 0x00000000, 0x00000000, 0x0000f710, 0x0000f710, 0x0000f710}, + {0x00009af8, 0x00000000, 0x00000000, 0x0000f714, 0x0000f714, 0x0000f714}, + {0x00009afc, 0x00000000, 0x00000000, 0x0000f718, 0x0000f718, 0x0000f718}, + {0x00009b00, 0x00000000, 0x00000000, 0x0000fb10, 0x0000fb10, 0x0000fb10}, + {0x00009b04, 0x00000000, 0x00000000, 0x0000fb14, 0x0000fb14, 0x0000fb14}, + {0x00009b08, 0x00000000, 0x00000000, 0x0000fb18, 0x0000fb18, 0x0000fb18}, + {0x00009b0c, 0x00000000, 0x00000000, 0x0000fb8c, 0x0000fb8c, 0x0000fb8c}, + {0x00009b10, 0x00000000, 0x00000000, 0x0000fb90, 0x0000fb90, 0x0000fb90}, + {0x00009b14, 0x00000000, 0x00000000, 0x0000fb94, 0x0000fb94, 0x0000fb94}, + {0x00009b18, 0x00000000, 0x00000000, 0x0000ff8c, 0x0000ff8c, 0x0000ff8c}, + {0x00009b1c, 0x00000000, 0x00000000, 0x0000ff90, 0x0000ff90, 0x0000ff90}, + {0x00009b20, 0x00000000, 0x00000000, 0x0000ff94, 0x0000ff94, 0x0000ff94}, + {0x00009b24, 0x00000000, 0x00000000, 0x0000ffa0, 0x0000ffa0, 0x0000ffa0}, + {0x00009b28, 0x00000000, 0x00000000, 0x0000ffa4, 0x0000ffa4, 0x0000ffa4}, + {0x00009b2c, 0x00000000, 0x00000000, 0x0000ffa8, 0x0000ffa8, 0x0000ffa8}, + {0x00009b30, 0x00000000, 0x00000000, 0x0000ffac, 0x0000ffac, 0x0000ffac}, + {0x00009b34, 0x00000000, 0x00000000, 0x0000ffb0, 0x0000ffb0, 0x0000ffb0}, + {0x00009b38, 0x00000000, 0x00000000, 0x0000ffb4, 0x0000ffb4, 0x0000ffb4}, + {0x00009b3c, 0x00000000, 0x00000000, 0x0000ffa1, 0x0000ffa1, 0x0000ffa1}, + {0x00009b40, 0x00000000, 0x00000000, 0x0000ffa5, 0x0000ffa5, 0x0000ffa5}, + {0x00009b44, 0x00000000, 0x00000000, 0x0000ffa9, 0x0000ffa9, 0x0000ffa9}, + {0x00009b48, 0x00000000, 0x00000000, 0x0000ffad, 0x0000ffad, 0x0000ffad}, + {0x00009b4c, 0x00000000, 0x00000000, 0x0000ffb1, 0x0000ffb1, 0x0000ffb1}, + {0x00009b50, 0x00000000, 0x00000000, 0x0000ffb5, 0x0000ffb5, 0x0000ffb5}, + {0x00009b54, 0x00000000, 0x00000000, 0x0000ffb9, 0x0000ffb9, 0x0000ffb9}, + {0x00009b58, 0x00000000, 0x00000000, 0x0000ffc5, 0x0000ffc5, 0x0000ffc5}, + {0x00009b5c, 0x00000000, 0x00000000, 0x0000ffc9, 0x0000ffc9, 0x0000ffc9}, + {0x00009b60, 0x00000000, 0x00000000, 0x0000ffcd, 0x0000ffcd, 0x0000ffcd}, + {0x00009b64, 0x00000000, 0x00000000, 0x0000ffd1, 0x0000ffd1, 0x0000ffd1}, + {0x00009b68, 0x00000000, 0x00000000, 0x0000ffd5, 0x0000ffd5, 0x0000ffd5}, + {0x00009b6c, 0x00000000, 0x00000000, 0x0000ffc2, 0x0000ffc2, 0x0000ffc2}, + {0x00009b70, 0x00000000, 0x00000000, 0x0000ffc6, 0x0000ffc6, 0x0000ffc6}, + {0x00009b74, 0x00000000, 0x00000000, 0x0000ffca, 0x0000ffca, 0x0000ffca}, + {0x00009b78, 0x00000000, 0x00000000, 0x0000ffce, 0x0000ffce, 0x0000ffce}, + {0x00009b7c, 0x00000000, 0x00000000, 0x0000ffd2, 0x0000ffd2, 0x0000ffd2}, + {0x00009b80, 0x00000000, 0x00000000, 0x0000ffd6, 0x0000ffd6, 0x0000ffd6}, + {0x00009b84, 0x00000000, 0x00000000, 0x0000ffda, 0x0000ffda, 0x0000ffda}, + {0x00009b88, 0x00000000, 0x00000000, 0x0000ffc7, 0x0000ffc7, 0x0000ffc7}, + {0x00009b8c, 0x00000000, 0x00000000, 0x0000ffcb, 0x0000ffcb, 0x0000ffcb}, + {0x00009b90, 0x00000000, 0x00000000, 0x0000ffcf, 0x0000ffcf, 0x0000ffcf}, + {0x00009b94, 0x00000000, 0x00000000, 0x0000ffd3, 0x0000ffd3, 0x0000ffd3}, + {0x00009b98, 0x00000000, 0x00000000, 0x0000ffd7, 0x0000ffd7, 0x0000ffd7}, + {0x00009b9c, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009ba0, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009ba4, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009ba8, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009bac, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009bb0, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009bb4, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009bb8, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009bbc, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009bc0, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009bc4, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009bc8, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009bcc, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009bd0, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009bd4, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009bd8, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009bdc, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009be0, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009be4, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009be8, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009bec, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009bf0, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009bf4, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009bf8, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009bfc, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000aa00, 0x00000000, 0x00000000, 0x0000a120, 0x0000a120, 0x0000a120}, + {0x0000aa04, 0x00000000, 0x00000000, 0x0000a124, 0x0000a124, 0x0000a124}, + {0x0000aa08, 0x00000000, 0x00000000, 0x0000a128, 0x0000a128, 0x0000a128}, + {0x0000aa0c, 0x00000000, 0x00000000, 0x0000a12c, 0x0000a12c, 0x0000a12c}, + {0x0000aa10, 0x00000000, 0x00000000, 0x0000a130, 0x0000a130, 0x0000a130}, + {0x0000aa14, 0x00000000, 0x00000000, 0x0000a194, 0x0000a194, 0x0000a194}, + {0x0000aa18, 0x00000000, 0x00000000, 0x0000a198, 0x0000a198, 0x0000a198}, + {0x0000aa1c, 0x00000000, 0x00000000, 0x0000a20c, 0x0000a20c, 0x0000a20c}, + {0x0000aa20, 0x00000000, 0x00000000, 0x0000a210, 0x0000a210, 0x0000a210}, + {0x0000aa24, 0x00000000, 0x00000000, 0x0000a284, 0x0000a284, 0x0000a284}, + {0x0000aa28, 0x00000000, 0x00000000, 0x0000a288, 0x0000a288, 0x0000a288}, + {0x0000aa2c, 0x00000000, 0x00000000, 0x0000a28c, 0x0000a28c, 0x0000a28c}, + {0x0000aa30, 0x00000000, 0x00000000, 0x0000a290, 0x0000a290, 0x0000a290}, + {0x0000aa34, 0x00000000, 0x00000000, 0x0000a294, 0x0000a294, 0x0000a294}, + {0x0000aa38, 0x00000000, 0x00000000, 0x0000a2a0, 0x0000a2a0, 0x0000a2a0}, + {0x0000aa3c, 0x00000000, 0x00000000, 0x0000a2a4, 0x0000a2a4, 0x0000a2a4}, + {0x0000aa40, 0x00000000, 0x00000000, 0x0000a2a8, 0x0000a2a8, 0x0000a2a8}, + {0x0000aa44, 0x00000000, 0x00000000, 0x0000a2ac, 0x0000a2ac, 0x0000a2ac}, + {0x0000aa48, 0x00000000, 0x00000000, 0x0000a2b0, 0x0000a2b0, 0x0000a2b0}, + {0x0000aa4c, 0x00000000, 0x00000000, 0x0000a2b4, 0x0000a2b4, 0x0000a2b4}, + {0x0000aa50, 0x00000000, 0x00000000, 0x0000a2b8, 0x0000a2b8, 0x0000a2b8}, + {0x0000aa54, 0x00000000, 0x00000000, 0x0000a2c4, 0x0000a2c4, 0x0000a2c4}, + {0x0000aa58, 0x00000000, 0x00000000, 0x0000a708, 0x0000a708, 0x0000a708}, + {0x0000aa5c, 0x00000000, 0x00000000, 0x0000a70c, 0x0000a70c, 0x0000a70c}, + {0x0000aa60, 0x00000000, 0x00000000, 0x0000a710, 0x0000a710, 0x0000a710}, + {0x0000aa64, 0x00000000, 0x00000000, 0x0000ab04, 0x0000ab04, 0x0000ab04}, + {0x0000aa68, 0x00000000, 0x00000000, 0x0000ab08, 0x0000ab08, 0x0000ab08}, + {0x0000aa6c, 0x00000000, 0x00000000, 0x0000ab0c, 0x0000ab0c, 0x0000ab0c}, + {0x0000aa70, 0x00000000, 0x00000000, 0x0000ab10, 0x0000ab10, 0x0000ab10}, + {0x0000aa74, 0x00000000, 0x00000000, 0x0000ab14, 0x0000ab14, 0x0000ab14}, + {0x0000aa78, 0x00000000, 0x00000000, 0x0000ab18, 0x0000ab18, 0x0000ab18}, + {0x0000aa7c, 0x00000000, 0x00000000, 0x0000ab8c, 0x0000ab8c, 0x0000ab8c}, + {0x0000aa80, 0x00000000, 0x00000000, 0x0000ab90, 0x0000ab90, 0x0000ab90}, + {0x0000aa84, 0x00000000, 0x00000000, 0x0000ab94, 0x0000ab94, 0x0000ab94}, + {0x0000aa88, 0x00000000, 0x00000000, 0x0000ab98, 0x0000ab98, 0x0000ab98}, + {0x0000aa8c, 0x00000000, 0x00000000, 0x0000aba4, 0x0000aba4, 0x0000aba4}, + {0x0000aa90, 0x00000000, 0x00000000, 0x0000aba8, 0x0000aba8, 0x0000aba8}, + {0x0000aa94, 0x00000000, 0x00000000, 0x0000cb04, 0x0000cb04, 0x0000cb04}, + {0x0000aa98, 0x00000000, 0x00000000, 0x0000cb08, 0x0000cb08, 0x0000cb08}, + {0x0000aa9c, 0x00000000, 0x00000000, 0x0000cb0c, 0x0000cb0c, 0x0000cb0c}, + {0x0000aaa0, 0x00000000, 0x00000000, 0x0000cb10, 0x0000cb10, 0x0000cb10}, + {0x0000aaa4, 0x00000000, 0x00000000, 0x0000cb14, 0x0000cb14, 0x0000cb14}, + {0x0000aaa8, 0x00000000, 0x00000000, 0x0000cb18, 0x0000cb18, 0x0000cb18}, + {0x0000aaac, 0x00000000, 0x00000000, 0x0000cb8c, 0x0000cb8c, 0x0000cb8c}, + {0x0000aab0, 0x00000000, 0x00000000, 0x0000cb90, 0x0000cb90, 0x0000cb90}, + {0x0000aab4, 0x00000000, 0x00000000, 0x0000cf18, 0x0000cf18, 0x0000cf18}, + {0x0000aab8, 0x00000000, 0x00000000, 0x0000cf24, 0x0000cf24, 0x0000cf24}, + {0x0000aabc, 0x00000000, 0x00000000, 0x0000cf28, 0x0000cf28, 0x0000cf28}, + {0x0000aac0, 0x00000000, 0x00000000, 0x0000d314, 0x0000d314, 0x0000d314}, + {0x0000aac4, 0x00000000, 0x00000000, 0x0000d318, 0x0000d318, 0x0000d318}, + {0x0000aac8, 0x00000000, 0x00000000, 0x0000d38c, 0x0000d38c, 0x0000d38c}, + {0x0000aacc, 0x00000000, 0x00000000, 0x0000d390, 0x0000d390, 0x0000d390}, + {0x0000aad0, 0x00000000, 0x00000000, 0x0000d394, 0x0000d394, 0x0000d394}, + {0x0000aad4, 0x00000000, 0x00000000, 0x0000d398, 0x0000d398, 0x0000d398}, + {0x0000aad8, 0x00000000, 0x00000000, 0x0000d3a4, 0x0000d3a4, 0x0000d3a4}, + {0x0000aadc, 0x00000000, 0x00000000, 0x0000d3a8, 0x0000d3a8, 0x0000d3a8}, + {0x0000aae0, 0x00000000, 0x00000000, 0x0000d3ac, 0x0000d3ac, 0x0000d3ac}, + {0x0000aae4, 0x00000000, 0x00000000, 0x0000d3b0, 0x0000d3b0, 0x0000d3b0}, + {0x0000aae8, 0x00000000, 0x00000000, 0x0000f380, 0x0000f380, 0x0000f380}, + {0x0000aaec, 0x00000000, 0x00000000, 0x0000f384, 0x0000f384, 0x0000f384}, + {0x0000aaf0, 0x00000000, 0x00000000, 0x0000f388, 0x0000f388, 0x0000f388}, + {0x0000aaf4, 0x00000000, 0x00000000, 0x0000f710, 0x0000f710, 0x0000f710}, + {0x0000aaf8, 0x00000000, 0x00000000, 0x0000f714, 0x0000f714, 0x0000f714}, + {0x0000aafc, 0x00000000, 0x00000000, 0x0000f718, 0x0000f718, 0x0000f718}, + {0x0000ab00, 0x00000000, 0x00000000, 0x0000fb10, 0x0000fb10, 0x0000fb10}, + {0x0000ab04, 0x00000000, 0x00000000, 0x0000fb14, 0x0000fb14, 0x0000fb14}, + {0x0000ab08, 0x00000000, 0x00000000, 0x0000fb18, 0x0000fb18, 0x0000fb18}, + {0x0000ab0c, 0x00000000, 0x00000000, 0x0000fb8c, 0x0000fb8c, 0x0000fb8c}, + {0x0000ab10, 0x00000000, 0x00000000, 0x0000fb90, 0x0000fb90, 0x0000fb90}, + {0x0000ab14, 0x00000000, 0x00000000, 0x0000fb94, 0x0000fb94, 0x0000fb94}, + {0x0000ab18, 0x00000000, 0x00000000, 0x0000ff8c, 0x0000ff8c, 0x0000ff8c}, + {0x0000ab1c, 0x00000000, 0x00000000, 0x0000ff90, 0x0000ff90, 0x0000ff90}, + {0x0000ab20, 0x00000000, 0x00000000, 0x0000ff94, 0x0000ff94, 0x0000ff94}, + {0x0000ab24, 0x00000000, 0x00000000, 0x0000ffa0, 0x0000ffa0, 0x0000ffa0}, + {0x0000ab28, 0x00000000, 0x00000000, 0x0000ffa4, 0x0000ffa4, 0x0000ffa4}, + {0x0000ab2c, 0x00000000, 0x00000000, 0x0000ffa8, 0x0000ffa8, 0x0000ffa8}, + {0x0000ab30, 0x00000000, 0x00000000, 0x0000ffac, 0x0000ffac, 0x0000ffac}, + {0x0000ab34, 0x00000000, 0x00000000, 0x0000ffb0, 0x0000ffb0, 0x0000ffb0}, + {0x0000ab38, 0x00000000, 0x00000000, 0x0000ffb4, 0x0000ffb4, 0x0000ffb4}, + {0x0000ab3c, 0x00000000, 0x00000000, 0x0000ffa1, 0x0000ffa1, 0x0000ffa1}, + {0x0000ab40, 0x00000000, 0x00000000, 0x0000ffa5, 0x0000ffa5, 0x0000ffa5}, + {0x0000ab44, 0x00000000, 0x00000000, 0x0000ffa9, 0x0000ffa9, 0x0000ffa9}, + {0x0000ab48, 0x00000000, 0x00000000, 0x0000ffad, 0x0000ffad, 0x0000ffad}, + {0x0000ab4c, 0x00000000, 0x00000000, 0x0000ffb1, 0x0000ffb1, 0x0000ffb1}, + {0x0000ab50, 0x00000000, 0x00000000, 0x0000ffb5, 0x0000ffb5, 0x0000ffb5}, + {0x0000ab54, 0x00000000, 0x00000000, 0x0000ffb9, 0x0000ffb9, 0x0000ffb9}, + {0x0000ab58, 0x00000000, 0x00000000, 0x0000ffc5, 0x0000ffc5, 0x0000ffc5}, + {0x0000ab5c, 0x00000000, 0x00000000, 0x0000ffc9, 0x0000ffc9, 0x0000ffc9}, + {0x0000ab60, 0x00000000, 0x00000000, 0x0000ffcd, 0x0000ffcd, 0x0000ffcd}, + {0x0000ab64, 0x00000000, 0x00000000, 0x0000ffd1, 0x0000ffd1, 0x0000ffd1}, + {0x0000ab68, 0x00000000, 0x00000000, 0x0000ffd5, 0x0000ffd5, 0x0000ffd5}, + {0x0000ab6c, 0x00000000, 0x00000000, 0x0000ffc2, 0x0000ffc2, 0x0000ffc2}, + {0x0000ab70, 0x00000000, 0x00000000, 0x0000ffc6, 0x0000ffc6, 0x0000ffc6}, + {0x0000ab74, 0x00000000, 0x00000000, 0x0000ffca, 0x0000ffca, 0x0000ffca}, + {0x0000ab78, 0x00000000, 0x00000000, 0x0000ffce, 0x0000ffce, 0x0000ffce}, + {0x0000ab7c, 0x00000000, 0x00000000, 0x0000ffd2, 0x0000ffd2, 0x0000ffd2}, + {0x0000ab80, 0x00000000, 0x00000000, 0x0000ffd6, 0x0000ffd6, 0x0000ffd6}, + {0x0000ab84, 0x00000000, 0x00000000, 0x0000ffda, 0x0000ffda, 0x0000ffda}, + {0x0000ab88, 0x00000000, 0x00000000, 0x0000ffc7, 0x0000ffc7, 0x0000ffc7}, + {0x0000ab8c, 0x00000000, 0x00000000, 0x0000ffcb, 0x0000ffcb, 0x0000ffcb}, + {0x0000ab90, 0x00000000, 0x00000000, 0x0000ffcf, 0x0000ffcf, 0x0000ffcf}, + {0x0000ab94, 0x00000000, 0x00000000, 0x0000ffd3, 0x0000ffd3, 0x0000ffd3}, + {0x0000ab98, 0x00000000, 0x00000000, 0x0000ffd7, 0x0000ffd7, 0x0000ffd7}, + {0x0000ab9c, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000aba0, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000aba4, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000aba8, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000abac, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000abb0, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000abb4, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000abb8, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000abbc, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000abc0, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000abc4, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000abc8, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000abcc, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000abd0, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000abd4, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000abd8, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000abdc, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000abe0, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000abe4, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000abe8, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000abec, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000abf0, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000abf4, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000abf8, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x0000abfc, 0x00000000, 0x00000000, 0x0000ffdb, 0x0000ffdb, 0x0000ffdb}, + {0x00009848, 0x00000000, 0x00000000, 0x00001067, 0x00001067, 0x00001067}, + {0x0000a848, 0x00000000, 0x00000000, 0x00001067, 0x00001067, 0x00001067}, +}; + +static const uint32_t ar9287PciePhy_clkreq_always_on_L1_9287_1_1[][2] = { + /* Addr allmodes */ + {0x00004040, 0x9248fd00}, + {0x00004040, 0x24924924}, + {0x00004040, 0xa8000019}, + {0x00004040, 0x13160820}, + {0x00004040, 0xe5980560}, + {0x00004040, 0xc01dcffd}, + {0x00004040, 0x1aaabe41}, + {0x00004040, 0xbe105554}, + {0x00004040, 0x00043007}, + {0x00004044, 0x00000000}, +}; + +static const uint32_t ar9287PciePhy_clkreq_off_L1_9287_1_1[][2] = { + /* Addr allmodes */ + {0x00004040, 0x9248fd00}, + {0x00004040, 0x24924924}, + {0x00004040, 0xa8000019}, + {0x00004040, 0x13160820}, + {0x00004040, 0xe5980560}, + {0x00004040, 0xc01dcffc}, + {0x00004040, 0x1aaabe41}, + {0x00004040, 0xbe105554}, + {0x00004040, 0x00043007}, + {0x00004044, 0x00000000}, +}; diff --git a/sys/dev/ath/ath_hal/ar9002/ar9287_attach.c b/sys/dev/ath/ath_hal/ar9002/ar9287_attach.c new file mode 100644 index 0000000..9cbe0a5 --- /dev/null +++ b/sys/dev/ath/ath_hal/ar9002/ar9287_attach.c @@ -0,0 +1,476 @@ +/* + * Copyright (c) 2008-2009 Sam Leffler, Errno Consulting + * Copyright (c) 2008 Atheros Communications, Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * $FreeBSD$ + */ +#include "opt_ah.h" + +#include "ah.h" +#include "ah_internal.h" +#include "ah_devid.h" + +#include "ah_eeprom_v14.h" /* XXX for tx/rx gain */ +#include "ah_eeprom_9287.h" + +#include "ar9002/ar9280.h" +#include "ar9002/ar9287.h" +#include "ar5416/ar5416reg.h" +#include "ar5416/ar5416phy.h" + +#include "ar9002/ar9287_cal.h" +#include "ar9002/ar9287_reset.h" +#include "ar9002/ar9287_olc.h" + +#include "ar9002/ar9287.ini" + +static const HAL_PERCAL_DATA ar9287_iq_cal = { /* single sample */ + .calName = "IQ", .calType = IQ_MISMATCH_CAL, + .calNumSamples = MIN_CAL_SAMPLES, + .calCountMax = PER_MAX_LOG_COUNT, + .calCollect = ar5416IQCalCollect, + .calPostProc = ar5416IQCalibration +}; +static const HAL_PERCAL_DATA ar9287_adc_gain_cal = { /* single sample */ + .calName = "ADC Gain", .calType = ADC_GAIN_CAL, + .calNumSamples = MIN_CAL_SAMPLES, + .calCountMax = PER_MIN_LOG_COUNT, + .calCollect = ar5416AdcGainCalCollect, + .calPostProc = ar5416AdcGainCalibration +}; +static const HAL_PERCAL_DATA ar9287_adc_dc_cal = { /* single sample */ + .calName = "ADC DC", .calType = ADC_DC_CAL, + .calNumSamples = MIN_CAL_SAMPLES, + .calCountMax = PER_MIN_LOG_COUNT, + .calCollect = ar5416AdcDcCalCollect, + .calPostProc = ar5416AdcDcCalibration +}; +static const HAL_PERCAL_DATA ar9287_adc_init_dc_cal = { + .calName = "ADC Init DC", .calType = ADC_DC_INIT_CAL, + .calNumSamples = MIN_CAL_SAMPLES, + .calCountMax = INIT_LOG_COUNT, + .calCollect = ar5416AdcDcCalCollect, + .calPostProc = ar5416AdcDcCalibration +}; + +static void ar9287ConfigPCIE(struct ath_hal *ah, HAL_BOOL restore); +static HAL_BOOL ar9287FillCapabilityInfo(struct ath_hal *ah); +static void ar9287WriteIni(struct ath_hal *ah, + const struct ieee80211_channel *chan); + +static void +ar9287AniSetup(struct ath_hal *ah) +{ + /* + * These are the parameters from the AR5416 ANI code; + * they likely need quite a bit of adjustment for the + * AR9280. + */ + static const struct ar5212AniParams aniparams = { + .maxNoiseImmunityLevel = 4, /* levels 0..4 */ + .totalSizeDesired = { -55, -55, -55, -55, -62 }, + .coarseHigh = { -14, -14, -14, -14, -12 }, + .coarseLow = { -64, -64, -64, -64, -70 }, + .firpwr = { -78, -78, -78, -78, -80 }, + .maxSpurImmunityLevel = 2, + .cycPwrThr1 = { 2, 4, 6 }, + .maxFirstepLevel = 2, /* levels 0..2 */ + .firstep = { 0, 4, 8 }, + .ofdmTrigHigh = 500, + .ofdmTrigLow = 200, + .cckTrigHigh = 200, + .cckTrigLow = 100, + .rssiThrHigh = 40, + .rssiThrLow = 7, + .period = 100, + }; + /* NB: disable ANI noise immmunity for reliable RIFS rx */ + AH5416(ah)->ah_ani_function &= ~ HAL_ANI_NOISE_IMMUNITY_LEVEL; + + /* NB: ANI is not enabled yet */ + ar5416AniAttach(ah, &aniparams, &aniparams, AH_TRUE); +} + +/* + * Attach for an AR9287 part. + */ +static struct ath_hal * +ar9287Attach(uint16_t devid, HAL_SOFTC sc, + HAL_BUS_TAG st, HAL_BUS_HANDLE sh, uint16_t *eepromdata, + HAL_STATUS *status) +{ + struct ath_hal_9287 *ahp9287; + struct ath_hal_5212 *ahp; + struct ath_hal *ah; + uint32_t val; + HAL_STATUS ecode; + HAL_BOOL rfStatus; + int8_t pwr_table_offset; + + HALDEBUG(AH_NULL, HAL_DEBUG_ATTACH, "%s: sc %p st %p sh %p\n", + __func__, sc, (void*) st, (void*) sh); + + /* NB: memory is returned zero'd */ + ahp9287 = ath_hal_malloc(sizeof (struct ath_hal_9287)); + if (ahp9287 == AH_NULL) { + HALDEBUG(AH_NULL, HAL_DEBUG_ANY, + "%s: cannot allocate memory for state block\n", __func__); + *status = HAL_ENOMEM; + return AH_NULL; + } + ahp = AH5212(ahp9287); + ah = &ahp->ah_priv.h; + + ar5416InitState(AH5416(ah), devid, sc, st, sh, status); + + /* XXX override with 9280 specific state */ + /* override 5416 methods for our needs */ + ah->ah_setAntennaSwitch = ar9287SetAntennaSwitch; + ah->ah_configPCIE = ar9287ConfigPCIE; + + AH5416(ah)->ah_cal.iqCalData.calData = &ar9287_iq_cal; + AH5416(ah)->ah_cal.adcGainCalData.calData = &ar9287_adc_gain_cal; + AH5416(ah)->ah_cal.adcDcCalData.calData = &ar9287_adc_dc_cal; + AH5416(ah)->ah_cal.adcDcCalInitData.calData = &ar9287_adc_init_dc_cal; + /* Better performance without ADC Gain Calibration */ + AH5416(ah)->ah_cal.suppCals = ADC_DC_CAL | IQ_MISMATCH_CAL; + + AH5416(ah)->ah_spurMitigate = ar9280SpurMitigate; + AH5416(ah)->ah_writeIni = ar9287WriteIni; + + ah->ah_setTxPower = ar9287SetTransmitPower; + ah->ah_setBoardValues = ar9287SetBoardValues; + + AH5416(ah)->ah_olcInit = ar9287olcInit; + AH5416(ah)->ah_olcTempCompensation = ar9287olcTemperatureCompensation; + //AH5416(ah)->ah_setPowerCalTable = ar9287SetPowerCalTable; + AH5416(ah)->ah_cal_initcal = ar9287InitCalHardware; + AH5416(ah)->ah_cal_pacal = ar9287PACal; + + /* XXX NF calibration */ + /* XXX Ini override? (IFS vars - since the kiwi mac clock is faster?) */ + /* XXX what else is kiwi-specific in the radio/calibration pathway? */ + + AH5416(ah)->ah_rx_chainmask = AR9287_DEFAULT_RXCHAINMASK; + AH5416(ah)->ah_tx_chainmask = AR9287_DEFAULT_TXCHAINMASK; + + if (!ar5416SetResetReg(ah, HAL_RESET_POWER_ON)) { + /* reset chip */ + HALDEBUG(ah, HAL_DEBUG_ANY, "%s: couldn't reset chip\n", + __func__); + ecode = HAL_EIO; + goto bad; + } + + if (!ar5416SetPowerMode(ah, HAL_PM_AWAKE, AH_TRUE)) { + HALDEBUG(ah, HAL_DEBUG_ANY, "%s: couldn't wakeup chip\n", + __func__); + ecode = HAL_EIO; + goto bad; + } + /* Read Revisions from Chips before taking out of reset */ + val = OS_REG_READ(ah, AR_SREV); + HALDEBUG(ah, HAL_DEBUG_ATTACH, + "%s: ID 0x%x VERSION 0x%x TYPE 0x%x REVISION 0x%x\n", + __func__, MS(val, AR_XSREV_ID), MS(val, AR_XSREV_VERSION), + MS(val, AR_XSREV_TYPE), MS(val, AR_XSREV_REVISION)); + /* NB: include chip type to differentiate from pre-Sowl versions */ + AH_PRIVATE(ah)->ah_macVersion = + (val & AR_XSREV_VERSION) >> AR_XSREV_TYPE_S; + AH_PRIVATE(ah)->ah_macRev = MS(val, AR_XSREV_REVISION); + AH_PRIVATE(ah)->ah_ispcie = (val & AR_XSREV_TYPE_HOST_MODE) == 0; + + /* Don't support Kiwi < 1.2; those are pre-release chips */ + if (! AR_SREV_KIWI_12_OR_LATER(ah)) { + ath_hal_printf(ah, "[ath]: Kiwi < 1.2 is not supported\n"); + ecode = HAL_EIO; + goto bad; + } + + /* setup common ini data; rf backends handle remainder */ + HAL_INI_INIT(&ahp->ah_ini_modes, ar9287Modes_9287_1_1, 6); + HAL_INI_INIT(&ahp->ah_ini_common, ar9287Common_9287_1_1, 2); + + /* If pcie_clock_req */ + HAL_INI_INIT(&AH5416(ah)->ah_ini_pcieserdes, + ar9287PciePhy_clkreq_always_on_L1_9287_1_1, 2); + + /* XXX WoW ini values */ + + /* Else */ +#if 0 + HAL_INI_INIT(&AH5416(ah)->ah_ini_pcieserdes, + ar9287PciePhy_clkreq_off_L1_9287_1_1, 2); +#endif + + /* Initialise Japan arrays */ + HAL_INI_INIT(&ahp9287->ah_ini_cckFirNormal, + ar9287Common_normal_cck_fir_coeff_9287_1_1, 2); + HAL_INI_INIT(&ahp9287->ah_ini_cckFirJapan2484, + ar9287Common_japan_2484_cck_fir_coeff_9287_1_1, 2); + + ar5416AttachPCIE(ah); + + ecode = ath_hal_9287EepromAttach(ah); + if (ecode != HAL_OK) + goto bad; + + if (!ar5416ChipReset(ah, AH_NULL)) { /* reset chip */ + HALDEBUG(ah, HAL_DEBUG_ANY, "%s: chip reset failed\n", __func__); + ecode = HAL_EIO; + goto bad; + } + + AH_PRIVATE(ah)->ah_phyRev = OS_REG_READ(ah, AR_PHY_CHIP_ID); + + if (!ar5212ChipTest(ah)) { + HALDEBUG(ah, HAL_DEBUG_ANY, "%s: hardware self-test failed\n", + __func__); + ecode = HAL_ESELFTEST; + goto bad; + } + + /* + * Set correct Baseband to analog shift + * setting to access analog chips. + */ + OS_REG_WRITE(ah, AR_PHY(0), 0x00000007); + + /* Read Radio Chip Rev Extract */ + AH_PRIVATE(ah)->ah_analog5GhzRev = ar5416GetRadioRev(ah); + switch (AH_PRIVATE(ah)->ah_analog5GhzRev & AR_RADIO_SREV_MAJOR) { + case AR_RAD2133_SREV_MAJOR: /* Sowl: 2G/3x3 */ + case AR_RAD5133_SREV_MAJOR: /* Sowl: 2+5G/3x3 */ + break; + default: + if (AH_PRIVATE(ah)->ah_analog5GhzRev == 0) { + AH_PRIVATE(ah)->ah_analog5GhzRev = + AR_RAD5133_SREV_MAJOR; + break; + } +#ifdef AH_DEBUG + HALDEBUG(ah, HAL_DEBUG_ANY, + "%s: 5G Radio Chip Rev 0x%02X is not supported by " + "this driver\n", __func__, + AH_PRIVATE(ah)->ah_analog5GhzRev); + ecode = HAL_ENOTSUPP; + goto bad; +#endif + } + rfStatus = ar9287RfAttach(ah, &ecode); + if (!rfStatus) { + HALDEBUG(ah, HAL_DEBUG_ANY, "%s: RF setup failed, status %u\n", + __func__, ecode); + goto bad; + } + + /* + * We only implement open-loop TX power control + * for the AR9287 in this codebase. + */ + if (! ath_hal_eepromGetFlag(ah, AR_EEP_OL_PWRCTRL)) { + ath_hal_printf(ah, "[ath] AR9287 w/ closed-loop TX power control" + " isn't supported.\n"); + ecode = HAL_ENOTSUPP; + goto bad; + } + + /* + * Check whether the power table offset isn't the default. + * This can occur with eeprom minor V21 or greater on Merlin. + */ + (void) ath_hal_eepromGet(ah, AR_EEP_PWR_TABLE_OFFSET, &pwr_table_offset); + if (pwr_table_offset != AR5416_PWR_TABLE_OFFSET_DB) + ath_hal_printf(ah, "[ath]: default pwr offset: %d dBm != EEPROM pwr offset: %d dBm; curves will be adjusted.\n", + AR5416_PWR_TABLE_OFFSET_DB, (int) pwr_table_offset); + + /* setup rxgain table */ + HAL_INI_INIT(&ahp9287->ah_ini_rxgain, ar9287Modes_rx_gain_9287_1_1, 6); + + /* setup txgain table */ + HAL_INI_INIT(&ahp9287->ah_ini_txgain, ar9287Modes_tx_gain_9287_1_1, 6); + + /* + * Got everything we need now to setup the capabilities. + */ + if (!ar9287FillCapabilityInfo(ah)) { + ecode = HAL_EEREAD; + goto bad; + } + + ecode = ath_hal_eepromGet(ah, AR_EEP_MACADDR, ahp->ah_macaddr); + if (ecode != HAL_OK) { + HALDEBUG(ah, HAL_DEBUG_ANY, + "%s: error getting mac address from EEPROM\n", __func__); + goto bad; + } + /* XXX How about the serial number ? */ + /* Read Reg Domain */ + AH_PRIVATE(ah)->ah_currentRD = + ath_hal_eepromGet(ah, AR_EEP_REGDMN_0, AH_NULL); + + /* + * ah_miscMode is populated by ar5416FillCapabilityInfo() + * starting from griffin. Set here to make sure that + * AR_MISC_MODE_MIC_NEW_LOC_ENABLE is set before a GTK is + * placed into hardware. + */ + if (ahp->ah_miscMode != 0) + OS_REG_WRITE(ah, AR_MISC_MODE, OS_REG_READ(ah, AR_MISC_MODE) | ahp->ah_miscMode); + + ar9287AniSetup(ah); /* Anti Noise Immunity */ + + /* Setup noise floor min/max/nominal values */ + AH5416(ah)->nf_2g.max = AR_PHY_CCA_MAX_GOOD_VAL_9287_2GHZ; + AH5416(ah)->nf_2g.min = AR_PHY_CCA_MIN_GOOD_VAL_9287_2GHZ; + AH5416(ah)->nf_2g.nominal = AR_PHY_CCA_NOM_VAL_9287_2GHZ; + AH5416(ah)->nf_5g.max = AR_PHY_CCA_MAX_GOOD_VAL_9287_5GHZ; + AH5416(ah)->nf_5g.min = AR_PHY_CCA_MIN_GOOD_VAL_9287_5GHZ; + AH5416(ah)->nf_5g.nominal = AR_PHY_CCA_NOM_VAL_9287_5GHZ; + + ar5416InitNfHistBuff(AH5416(ah)->ah_cal.nfCalHist); + + HALDEBUG(ah, HAL_DEBUG_ATTACH, "%s: return\n", __func__); + + return ah; +bad: + if (ah != AH_NULL) + ah->ah_detach(ah); + if (status) + *status = ecode; + return AH_NULL; +} + +static void +ar9287ConfigPCIE(struct ath_hal *ah, HAL_BOOL restore) +{ + if (AH_PRIVATE(ah)->ah_ispcie && !restore) { + ath_hal_ini_write(ah, &AH5416(ah)->ah_ini_pcieserdes, 1, 0); + OS_DELAY(1000); + OS_REG_SET_BIT(ah, AR_PCIE_PM_CTRL, AR_PCIE_PM_CTRL_ENA); + OS_REG_WRITE(ah, AR_WA, AR9285_WA_DEFAULT); /* Yes, Kiwi uses the Kite PCIe PHY WA */ + } +} + +static void +ar9287WriteIni(struct ath_hal *ah, const struct ieee80211_channel *chan) +{ + u_int modesIndex, freqIndex; + int regWrites = 0; + + /* Setup the indices for the next set of register array writes */ + /* XXX Ignore 11n dynamic mode on the AR5416 for the moment */ + if (IEEE80211_IS_CHAN_2GHZ(chan)) { + freqIndex = 2; + if (IEEE80211_IS_CHAN_HT40(chan)) + modesIndex = 3; + else if (IEEE80211_IS_CHAN_108G(chan)) + modesIndex = 5; + else + modesIndex = 4; + } else { + freqIndex = 1; + if (IEEE80211_IS_CHAN_HT40(chan) || + IEEE80211_IS_CHAN_TURBO(chan)) + modesIndex = 2; + else + modesIndex = 1; + } + + /* Set correct Baseband to analog shift setting to access analog chips. */ + OS_REG_WRITE(ah, AR_PHY(0), 0x00000007); + OS_REG_WRITE(ah, AR_PHY_ADC_SERIAL_CTL, AR_PHY_SEL_INTERNAL_ADDAC); + + regWrites = ath_hal_ini_write(ah, &AH5212(ah)->ah_ini_modes, modesIndex, regWrites); + regWrites = ath_hal_ini_write(ah, &AH9287(ah)->ah_ini_rxgain, modesIndex, regWrites); + regWrites = ath_hal_ini_write(ah, &AH9287(ah)->ah_ini_txgain, modesIndex, regWrites); + regWrites = ath_hal_ini_write(ah, &AH5212(ah)->ah_ini_common, 1, regWrites); +} + +#define AR_BASE_FREQ_2GHZ 2300 +#define AR_BASE_FREQ_5GHZ 4900 +#define AR_SPUR_FEEQ_BOUND_HT40 19 +#define AR_SPUR_FEEQ_BOUND_HT20 10 + + + +/* + * Fill all software cached or static hardware state information. + * Return failure if capabilities are to come from EEPROM and + * cannot be read. + */ +static HAL_BOOL +ar9287FillCapabilityInfo(struct ath_hal *ah) +{ + HAL_CAPABILITIES *pCap = &AH_PRIVATE(ah)->ah_caps; + + if (!ar5416FillCapabilityInfo(ah)) + return AH_FALSE; + pCap->halNumGpioPins = 10; + pCap->halWowSupport = AH_TRUE; + pCap->halWowMatchPatternExact = AH_TRUE; +#if 0 + pCap->halWowMatchPatternDword = AH_TRUE; +#endif + + pCap->halCSTSupport = AH_TRUE; + pCap->halRifsRxSupport = AH_TRUE; + pCap->halRifsTxSupport = AH_TRUE; + pCap->halRtsAggrLimit = 64*1024; /* 802.11n max */ + pCap->halExtChanDfsSupport = AH_TRUE; + pCap->halUseCombinedRadarRssi = AH_TRUE; +#if 0 + /* XXX bluetooth */ + pCap->halBtCoexSupport = AH_TRUE; +#endif + pCap->halAutoSleepSupport = AH_FALSE; /* XXX? */ + pCap->hal4kbSplitTransSupport = AH_FALSE; + /* Disable this so Block-ACK works correctly */ + pCap->halHasRxSelfLinkedTail = AH_FALSE; + pCap->halPSPollBroken = AH_FALSE; + + /* Hardware supports (at least) single-stream STBC TX/RX */ + pCap->halRxStbcSupport = 1; + pCap->halTxStbcSupport = 1; + + /* Hardware supports short-GI w/ 20MHz */ + pCap->halHTSGI20Support = 1; + + pCap->halEnhancedDfsSupport = AH_TRUE; + + return AH_TRUE; +} + +/* + * This has been disabled - having the HAL flip chainmasks on/off + * when attempting to implement 11n disrupts things. For now, just + * leave this flipped off and worry about implementing TX diversity + * for legacy and MCS0-7 when 11n is fully functioning. + */ +HAL_BOOL +ar9287SetAntennaSwitch(struct ath_hal *ah, HAL_ANT_SETTING settings) +{ + return AH_TRUE; +} + +static const char* +ar9287Probe(uint16_t vendorid, uint16_t devid) +{ + if (vendorid == ATHEROS_VENDOR_ID && + (devid == AR9287_DEVID_PCI || devid == AR9287_DEVID_PCIE)) + return "Atheros 9287"; + return AH_NULL; +} +AH_CHIP(AR9287, ar9287Probe, ar9287Attach); diff --git a/sys/dev/ath/ath_hal/ar9002/ar9287_cal.c b/sys/dev/ath/ath_hal/ar9002/ar9287_cal.c new file mode 100644 index 0000000..d5024b0 --- /dev/null +++ b/sys/dev/ath/ath_hal/ar9002/ar9287_cal.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2008-2010 Atheros Communications Inc. + * Copyright (c) 2011 Adrian Chadd, Xenion Pty Ltd. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ +#include "opt_ah.h" +#include "ah.h" +#include "ah_internal.h" + +#include "ah_eeprom_v4k.h" + +#include "ar9002/ar9285.h" +#include "ar5416/ar5416reg.h" +#include "ar5416/ar5416phy.h" +#include "ar9002/ar9002phy.h" +//#include "ar9002/ar9287phy.h" + +#include "ar9002/ar9287_cal.h" + + +void +ar9287PACal(struct ath_hal *ah, HAL_BOOL is_reset) +{ + /* XXX not required */ +} + +/* + * This is like Merlin but without ADC disable + */ +HAL_BOOL +ar9287InitCalHardware(struct ath_hal *ah, const struct ieee80211_channel *chan) +{ + OS_REG_SET_BIT(ah, AR_PHY_AGC_CONTROL, AR_PHY_AGC_CONTROL_FLTR_CAL); + + /* Calibrate the AGC */ + OS_REG_WRITE(ah, AR_PHY_AGC_CONTROL, + OS_REG_READ(ah, AR_PHY_AGC_CONTROL) | AR_PHY_AGC_CONTROL_CAL); + + /* Poll for offset calibration complete */ + if (!ath_hal_wait(ah, AR_PHY_AGC_CONTROL, + AR_PHY_AGC_CONTROL_CAL, 0)) { + HALDEBUG(ah, HAL_DEBUG_RESET, + "%s: offset calibration failed to complete in 1ms; " + "noisy environment?\n", __func__); + return AH_FALSE; + } + + OS_REG_CLR_BIT(ah, AR_PHY_AGC_CONTROL, AR_PHY_AGC_CONTROL_FLTR_CAL); + + return AH_TRUE; +} diff --git a/sys/dev/ath/ath_hal/ar9002/ar9287_cal.h b/sys/dev/ath/ath_hal/ar9002/ar9287_cal.h new file mode 100644 index 0000000..1a7cda2 --- /dev/null +++ b/sys/dev/ath/ath_hal/ar9002/ar9287_cal.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2008-2010 Atheros Communications Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ +#ifndef __AR9287_CAL_H__ +#define __AR9287_CAL_H__ + +extern void ar9287PACal(struct ath_hal *ah, HAL_BOOL is_reset); +extern HAL_BOOL ar9287InitCalHardware(struct ath_hal *ah, const struct ieee80211_channel *chan); + +#endif /* __AR9287_CAL_H__ */ diff --git a/sys/dev/ath/ath_hal/ar9002/ar9287_olc.c b/sys/dev/ath/ath_hal/ar9002/ar9287_olc.c new file mode 100644 index 0000000..cbbe017 --- /dev/null +++ b/sys/dev/ath/ath_hal/ar9002/ar9287_olc.c @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2011 Adrian Chadd, Xenion Pty Ltd. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ +#include "opt_ah.h" + +#include "ah.h" +#include "ah_internal.h" + +#include "ah_eeprom_v14.h" +#include "ah_eeprom_9287.h" + +#include "ar9002/ar9280.h" +#include "ar5416/ar5416reg.h" +#include "ar5416/ar5416phy.h" +#include "ar9002/ar9002phy.h" + +#include "ar9002/ar9287phy.h" +#include "ar9002/ar9287an.h" +#include "ar9002/ar9287_olc.h" + +void +ar9287olcInit(struct ath_hal *ah) +{ + OS_REG_SET_BIT(ah, AR_PHY_TX_PWRCTRL9, + AR_PHY_TX_PWRCTRL9_RES_DC_REMOVAL); + OS_A_REG_RMW_FIELD(ah, AR9287_AN_TXPC0, + AR9287_AN_TXPC0_TXPCMODE, + AR9287_AN_TXPC0_TXPCMODE_TEMPSENSE); + OS_DELAY(100); +} + +/* + * Run temperature compensation calibration. + * + * The TX gain table is adjusted depending upon the difference + * between the initial PDADC value and the currently read + * average TX power sample value. This value is only valid if + * frames have been transmitted, so currPDADC will be 0 if + * no frames have yet been transmitted. + */ +void +ar9287olcTemperatureCompensation(struct ath_hal *ah) +{ + uint32_t rddata; + int32_t delta, currPDADC, slope; + + rddata = OS_REG_READ(ah, AR_PHY_TX_PWRCTRL4); + currPDADC = MS(rddata, AR_PHY_TX_PWRCTRL_PD_AVG_OUT); + + HALDEBUG(ah, HAL_DEBUG_PERCAL, "%s: initPDADC=%d, currPDADC=%d\n", + __func__, AH5416(ah)->initPDADC, currPDADC); + + if (AH5416(ah)->initPDADC == 0 || currPDADC == 0) { + /* + * Zero value indicates that no frames have been transmitted + * yet, can't do temperature compensation until frames are + * transmitted. + */ + return; + } else { + int8_t val; + (void) (ath_hal_eepromGet(ah, AR_EEP_TEMPSENSE_SLOPE, &val)); + slope = val; + + if (slope == 0) { /* to avoid divide by zero case */ + delta = 0; + } else { + delta = ((currPDADC - AH5416(ah)->initPDADC)*4) / slope; + } + OS_REG_RMW_FIELD(ah, AR_PHY_CH0_TX_PWRCTRL11, + AR_PHY_TX_PWRCTRL_OLPC_TEMP_COMP, delta); + OS_REG_RMW_FIELD(ah, AR_PHY_CH1_TX_PWRCTRL11, + AR_PHY_TX_PWRCTRL_OLPC_TEMP_COMP, delta); + + HALDEBUG(ah, HAL_DEBUG_PERCAL, "%s: delta=%d\n", __func__, delta); + } +} + +void +ar9287olcGetTxGainIndex(struct ath_hal *ah, + const struct ieee80211_channel *chan, + struct cal_data_op_loop_ar9287 *pRawDatasetOpLoop, + uint8_t *pCalChans, uint16_t availPiers, int8_t *pPwr) +{ + uint16_t idxL = 0, idxR = 0, numPiers; + HAL_BOOL match; + CHAN_CENTERS centers; + + ar5416GetChannelCenters(ah, chan, ¢ers); + + for (numPiers = 0; numPiers < availPiers; numPiers++) { + if (pCalChans[numPiers] == AR5416_BCHAN_UNUSED) + break; + } + + match = ath_ee_getLowerUpperIndex( + (uint8_t)FREQ2FBIN(centers.synth_center, IEEE80211_IS_CHAN_2GHZ(chan)), + pCalChans, numPiers, &idxL, &idxR); + + if (match) { + *pPwr = (int8_t) pRawDatasetOpLoop[idxL].pwrPdg[0][0]; + } else { + *pPwr = ((int8_t) pRawDatasetOpLoop[idxL].pwrPdg[0][0] + + (int8_t) pRawDatasetOpLoop[idxR].pwrPdg[0][0])/2; + } +} + +void +ar9287olcSetPDADCs(struct ath_hal *ah, int32_t txPower, + uint16_t chain) +{ + uint32_t tmpVal; + uint32_t a; + + /* Enable OLPC for chain 0 */ + + tmpVal = OS_REG_READ(ah, 0xa270); + tmpVal = tmpVal & 0xFCFFFFFF; + tmpVal = tmpVal | (0x3 << 24); + OS_REG_WRITE(ah, 0xa270, tmpVal); + + /* Enable OLPC for chain 1 */ + + tmpVal = OS_REG_READ(ah, 0xb270); + tmpVal = tmpVal & 0xFCFFFFFF; + tmpVal = tmpVal | (0x3 << 24); + OS_REG_WRITE(ah, 0xb270, tmpVal); + + /* Write the OLPC ref power for chain 0 */ + + if (chain == 0) { + tmpVal = OS_REG_READ(ah, 0xa398); + tmpVal = tmpVal & 0xff00ffff; + a = (txPower)&0xff; + tmpVal = tmpVal | (a << 16); + OS_REG_WRITE(ah, 0xa398, tmpVal); + } + + /* Write the OLPC ref power for chain 1 */ + + if (chain == 1) { + tmpVal = OS_REG_READ(ah, 0xb398); + tmpVal = tmpVal & 0xff00ffff; + a = (txPower)&0xff; + tmpVal = tmpVal | (a << 16); + OS_REG_WRITE(ah, 0xb398, tmpVal); + } +} diff --git a/sys/dev/ath/ath_hal/ar9002/ar9287_olc.h b/sys/dev/ath/ath_hal/ar9002/ar9287_olc.h new file mode 100644 index 0000000..ff21ce6 --- /dev/null +++ b/sys/dev/ath/ath_hal/ar9002/ar9287_olc.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2010 Atheros Communications, Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * $FreeBSD$ + */ + +#ifndef __AR9287_OLC_H__ +#define __AR9287_OLC_H__ + +extern void ar9287olcInit(struct ath_hal *ah); +extern void ar9287olcTemperatureCompensation(struct ath_hal *ah); +extern void ar9287olcGetTxGainIndex(struct ath_hal *ah, + const struct ieee80211_channel *chan, + struct cal_data_op_loop_ar9287 *pRawDatasetOpLoop, + uint8_t *pCalChans, uint16_t availPiers, int8_t *pPwr); +extern void ar9287olcSetPDADCs(struct ath_hal *ah, + int32_t txPower, uint16_t chain); + +#endif /* __AR9287_OLC_H__ */ diff --git a/sys/dev/ath/ath_hal/ar9002/ar9287_reset.c b/sys/dev/ath/ath_hal/ar9002/ar9287_reset.c new file mode 100644 index 0000000..a799ba0 --- /dev/null +++ b/sys/dev/ath/ath_hal/ar9002/ar9287_reset.c @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting + * Copyright (c) 2002-2008 Atheros Communications, Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * $FreeBSD$ + */ + +#include "opt_ah.h" + +#include "ah.h" +#include "ah_internal.h" +#include "ah_devid.h" + +#include "ah_eeprom_v14.h" +#include "ah_eeprom_9287.h" + +#include "ar5416/ar5416.h" +#include "ar5416/ar5416reg.h" +#include "ar5416/ar5416phy.h" + +#include "ar9002/ar9287phy.h" +#include "ar9002/ar9287an.h" + +#include "ar9002/ar9287_olc.h" +#include "ar9002/ar9287_reset.h" + +/* + * Set the TX power calibration table per-chain. + * + * This only supports open-loop TX power control for the AR9287. + */ +static void +ar9287SetPowerCalTable(struct ath_hal *ah, + const struct ieee80211_channel *chan, int16_t *pTxPowerIndexOffset) +{ + struct cal_data_op_loop_ar9287 *pRawDatasetOpenLoop; + uint8_t *pCalBChans = NULL; + uint16_t pdGainOverlap_t2; + uint16_t numPiers = 0, i; + uint16_t numXpdGain, xpdMask; + uint16_t xpdGainValues[AR5416_NUM_PD_GAINS] = {0, 0, 0, 0}; + uint32_t regChainOffset; + HAL_EEPROM_9287 *ee = AH_PRIVATE(ah)->ah_eeprom; + struct ar9287_eeprom *pEepData = &ee->ee_base; + + xpdMask = pEepData->modalHeader.xpdGain; + + if ((pEepData->baseEepHeader.version & AR9287_EEP_VER_MINOR_MASK) >= + AR9287_EEP_MINOR_VER_2) + pdGainOverlap_t2 = pEepData->modalHeader.pdGainOverlap; + else + pdGainOverlap_t2 = (uint16_t)(MS(OS_REG_READ(ah, AR_PHY_TPCRG5), + AR_PHY_TPCRG5_PD_GAIN_OVERLAP)); + + /* Note: Kiwi should only be 2ghz.. */ + if (IEEE80211_IS_CHAN_2GHZ(chan)) { + pCalBChans = pEepData->calFreqPier2G; + numPiers = AR9287_NUM_2G_CAL_PIERS; + pRawDatasetOpenLoop = (struct cal_data_op_loop_ar9287 *)pEepData->calPierData2G[0]; + AH5416(ah)->initPDADC = pRawDatasetOpenLoop->vpdPdg[0][0]; + } + numXpdGain = 0; + + /* Calculate the value of xpdgains from the xpdGain Mask */ + for (i = 1; i <= AR5416_PD_GAINS_IN_MASK; i++) { + if ((xpdMask >> (AR5416_PD_GAINS_IN_MASK - i)) & 1) { + if (numXpdGain >= AR5416_NUM_PD_GAINS) + break; + xpdGainValues[numXpdGain] = + (uint16_t)(AR5416_PD_GAINS_IN_MASK-i); + numXpdGain++; + } + } + + OS_REG_RMW_FIELD(ah, AR_PHY_TPCRG1, AR_PHY_TPCRG1_NUM_PD_GAIN, + (numXpdGain - 1) & 0x3); + OS_REG_RMW_FIELD(ah, AR_PHY_TPCRG1, AR_PHY_TPCRG1_PD_GAIN_1, + xpdGainValues[0]); + OS_REG_RMW_FIELD(ah, AR_PHY_TPCRG1, AR_PHY_TPCRG1_PD_GAIN_2, + xpdGainValues[1]); + OS_REG_RMW_FIELD(ah, AR_PHY_TPCRG1, AR_PHY_TPCRG1_PD_GAIN_3, + xpdGainValues[2]); + + for (i = 0; i < AR9287_MAX_CHAINS; i++) { + regChainOffset = i * 0x1000; + + if (pEepData->baseEepHeader.txMask & (1 << i)) { + int8_t txPower; + pRawDatasetOpenLoop = + (struct cal_data_op_loop_ar9287 *)pEepData->calPierData2G[i]; + ar9287olcGetTxGainIndex(ah, chan, + pRawDatasetOpenLoop, + pCalBChans, numPiers, + &txPower); + ar9287olcSetPDADCs(ah, txPower, i); + } + } + + *pTxPowerIndexOffset = 0; +} + + +/* XXX hard-coded values? */ +#define REDUCE_SCALED_POWER_BY_TWO_CHAIN 6 + +/* + * ar9287SetPowerPerRateTable + * + * Sets the transmit power in the baseband for the given + * operating channel and mode. + * + * This is like the v14 EEPROM table except the 5GHz code. + */ +static HAL_BOOL +ar9287SetPowerPerRateTable(struct ath_hal *ah, + struct ar9287_eeprom *pEepData, + const struct ieee80211_channel *chan, + int16_t *ratesArray, uint16_t cfgCtl, + uint16_t AntennaReduction, + uint16_t twiceMaxRegulatoryPower, + uint16_t powerLimit) +{ +#define N(a) (sizeof(a)/sizeof(a[0])) +/* Local defines to distinguish between extension and control CTL's */ +#define EXT_ADDITIVE (0x8000) +#define CTL_11A_EXT (CTL_11A | EXT_ADDITIVE) +#define CTL_11G_EXT (CTL_11G | EXT_ADDITIVE) +#define CTL_11B_EXT (CTL_11B | EXT_ADDITIVE) + + uint16_t twiceMaxEdgePower = AR5416_MAX_RATE_POWER; + int i; + int16_t twiceLargestAntenna; + struct cal_ctl_data_ar9287 *rep; + CAL_TARGET_POWER_LEG targetPowerOfdm; + CAL_TARGET_POWER_LEG targetPowerCck = {0, {0, 0, 0, 0}}; + CAL_TARGET_POWER_LEG targetPowerOfdmExt = {0, {0, 0, 0, 0}}; + CAL_TARGET_POWER_LEG targetPowerCckExt = {0, {0, 0, 0, 0}}; + CAL_TARGET_POWER_HT targetPowerHt20; + CAL_TARGET_POWER_HT targetPowerHt40 = {0, {0, 0, 0, 0}}; + int16_t scaledPower, minCtlPower; + +#define SUB_NUM_CTL_MODES_AT_2G_40 3 /* excluding HT40, EXT-OFDM, EXT-CCK */ + static const uint16_t ctlModesFor11g[] = { + CTL_11B, CTL_11G, CTL_2GHT20, CTL_11B_EXT, CTL_11G_EXT, CTL_2GHT40 + }; + const uint16_t *pCtlMode; + uint16_t numCtlModes, ctlMode, freq; + CHAN_CENTERS centers; + + ar5416GetChannelCenters(ah, chan, ¢ers); + + /* Compute TxPower reduction due to Antenna Gain */ + + twiceLargestAntenna = AH_MAX( + pEepData->modalHeader.antennaGainCh[0], + pEepData->modalHeader.antennaGainCh[1]); + + twiceLargestAntenna = (int16_t)AH_MIN((AntennaReduction) - twiceLargestAntenna, 0); + + /* XXX setup for 5212 use (really used?) */ + ath_hal_eepromSet(ah, AR_EEP_ANTGAINMAX_2, twiceLargestAntenna); + + /* + * scaledPower is the minimum of the user input power level and + * the regulatory allowed power level + */ + scaledPower = AH_MIN(powerLimit, twiceMaxRegulatoryPower + twiceLargestAntenna); + + /* Reduce scaled Power by number of chains active to get to per chain tx power level */ + /* TODO: better value than these? */ + switch (owl_get_ntxchains(AH5416(ah)->ah_tx_chainmask)) { + case 1: + break; + case 2: + scaledPower -= REDUCE_SCALED_POWER_BY_TWO_CHAIN; + break; + default: + return AH_FALSE; /* Unsupported number of chains */ + } + + scaledPower = AH_MAX(0, scaledPower); + + /* Get target powers from EEPROM - our baseline for TX Power */ + /* XXX assume channel is 2ghz */ + if (1) { + /* Setup for CTL modes */ + numCtlModes = N(ctlModesFor11g) - SUB_NUM_CTL_MODES_AT_2G_40; /* CTL_11B, CTL_11G, CTL_2GHT20 */ + pCtlMode = ctlModesFor11g; + + ar5416GetTargetPowersLeg(ah, chan, pEepData->calTargetPowerCck, + AR9287_NUM_2G_CCK_TARGET_POWERS, &targetPowerCck, 4, AH_FALSE); + ar5416GetTargetPowersLeg(ah, chan, pEepData->calTargetPower2G, + AR9287_NUM_2G_20_TARGET_POWERS, &targetPowerOfdm, 4, AH_FALSE); + ar5416GetTargetPowers(ah, chan, pEepData->calTargetPower2GHT20, + AR9287_NUM_2G_20_TARGET_POWERS, &targetPowerHt20, 8, AH_FALSE); + + if (IEEE80211_IS_CHAN_HT40(chan)) { + numCtlModes = N(ctlModesFor11g); /* All 2G CTL's */ + + ar5416GetTargetPowers(ah, chan, pEepData->calTargetPower2GHT40, + AR9287_NUM_2G_40_TARGET_POWERS, &targetPowerHt40, 8, AH_TRUE); + /* Get target powers for extension channels */ + ar5416GetTargetPowersLeg(ah, chan, pEepData->calTargetPowerCck, + AR9287_NUM_2G_CCK_TARGET_POWERS, &targetPowerCckExt, 4, AH_TRUE); + ar5416GetTargetPowersLeg(ah, chan, pEepData->calTargetPower2G, + AR9287_NUM_2G_20_TARGET_POWERS, &targetPowerOfdmExt, 4, AH_TRUE); + } + } + + /* + * For MIMO, need to apply regulatory caps individually across dynamically + * running modes: CCK, OFDM, HT20, HT40 + * + * The outer loop walks through each possible applicable runtime mode. + * The inner loop walks through each ctlIndex entry in EEPROM. + * The ctl value is encoded as [7:4] == test group, [3:0] == test mode. + * + */ + for (ctlMode = 0; ctlMode < numCtlModes; ctlMode++) { + HAL_BOOL isHt40CtlMode = (pCtlMode[ctlMode] == CTL_5GHT40) || + (pCtlMode[ctlMode] == CTL_2GHT40); + if (isHt40CtlMode) { + freq = centers.ctl_center; + } else if (pCtlMode[ctlMode] & EXT_ADDITIVE) { + freq = centers.ext_center; + } else { + freq = centers.ctl_center; + } + + /* walk through each CTL index stored in EEPROM */ + for (i = 0; (i < AR9287_NUM_CTLS) && pEepData->ctlIndex[i]; i++) { + uint16_t twiceMinEdgePower; + + /* compare test group from regulatory channel list with test mode from pCtlMode list */ + if ((((cfgCtl & ~CTL_MODE_M) | (pCtlMode[ctlMode] & CTL_MODE_M)) == pEepData->ctlIndex[i]) || + (((cfgCtl & ~CTL_MODE_M) | (pCtlMode[ctlMode] & CTL_MODE_M)) == + ((pEepData->ctlIndex[i] & CTL_MODE_M) | SD_NO_CTL))) { + rep = &(pEepData->ctlData[i]); + twiceMinEdgePower = ar5416GetMaxEdgePower(freq, + rep->ctlEdges[owl_get_ntxchains(AH5416(ah)->ah_tx_chainmask) - 1], + IEEE80211_IS_CHAN_2GHZ(chan)); + if ((cfgCtl & ~CTL_MODE_M) == SD_NO_CTL) { + /* Find the minimum of all CTL edge powers that apply to this channel */ + twiceMaxEdgePower = AH_MIN(twiceMaxEdgePower, twiceMinEdgePower); + } else { + /* specific */ + twiceMaxEdgePower = twiceMinEdgePower; + break; + } + } + } + minCtlPower = (uint8_t)AH_MIN(twiceMaxEdgePower, scaledPower); + /* Apply ctl mode to correct target power set */ + switch(pCtlMode[ctlMode]) { + case CTL_11B: + for (i = 0; i < N(targetPowerCck.tPow2x); i++) { + targetPowerCck.tPow2x[i] = (uint8_t)AH_MIN(targetPowerCck.tPow2x[i], minCtlPower); + } + break; + case CTL_11A: + case CTL_11G: + for (i = 0; i < N(targetPowerOfdm.tPow2x); i++) { + targetPowerOfdm.tPow2x[i] = (uint8_t)AH_MIN(targetPowerOfdm.tPow2x[i], minCtlPower); + } + break; + case CTL_5GHT20: + case CTL_2GHT20: + for (i = 0; i < N(targetPowerHt20.tPow2x); i++) { + targetPowerHt20.tPow2x[i] = (uint8_t)AH_MIN(targetPowerHt20.tPow2x[i], minCtlPower); + } + break; + case CTL_11B_EXT: + targetPowerCckExt.tPow2x[0] = (uint8_t)AH_MIN(targetPowerCckExt.tPow2x[0], minCtlPower); + break; + case CTL_11A_EXT: + case CTL_11G_EXT: + targetPowerOfdmExt.tPow2x[0] = (uint8_t)AH_MIN(targetPowerOfdmExt.tPow2x[0], minCtlPower); + break; + case CTL_5GHT40: + case CTL_2GHT40: + for (i = 0; i < N(targetPowerHt40.tPow2x); i++) { + targetPowerHt40.tPow2x[i] = (uint8_t)AH_MIN(targetPowerHt40.tPow2x[i], minCtlPower); + } + break; + default: + return AH_FALSE; + break; + } + } /* end ctl mode checking */ + + /* Set rates Array from collected data */ + ar5416SetRatesArrayFromTargetPower(ah, chan, ratesArray, + &targetPowerCck, + &targetPowerCckExt, + &targetPowerOfdm, + &targetPowerOfdmExt, + &targetPowerHt20, + &targetPowerHt40); + return AH_TRUE; +#undef EXT_ADDITIVE +#undef CTL_11A_EXT +#undef CTL_11G_EXT +#undef CTL_11B_EXT +#undef SUB_NUM_CTL_MODES_AT_5G_40 +#undef SUB_NUM_CTL_MODES_AT_2G_40 +#undef N +} + +#undef REDUCE_SCALED_POWER_BY_TWO_CHAIN + +/* + * This is based off of the AR5416/AR9285 code and likely could + * be unified in the future. + */ +HAL_BOOL +ar9287SetTransmitPower(struct ath_hal *ah, + const struct ieee80211_channel *chan, uint16_t *rfXpdGain) +{ +#define POW_SM(_r, _s) (((_r) & 0x3f) << (_s)) +#define N(a) (sizeof (a) / sizeof (a[0])) + + const struct modal_eep_ar9287_header *pModal; + struct ath_hal_5212 *ahp = AH5212(ah); + int16_t ratesArray[Ar5416RateSize]; + int16_t txPowerIndexOffset = 0; + uint8_t ht40PowerIncForPdadc = 2; + int i; + + uint16_t cfgCtl; + uint16_t powerLimit; + uint16_t twiceAntennaReduction; + uint16_t twiceMaxRegulatoryPower; + int16_t maxPower; + HAL_EEPROM_9287 *ee = AH_PRIVATE(ah)->ah_eeprom; + struct ar9287_eeprom *pEepData = &ee->ee_base; + + /* Setup info for the actual eeprom */ + OS_MEMZERO(ratesArray, sizeof(ratesArray)); + cfgCtl = ath_hal_getctl(ah, chan); + powerLimit = chan->ic_maxregpower * 2; + twiceAntennaReduction = chan->ic_maxantgain; + twiceMaxRegulatoryPower = AH_MIN(MAX_RATE_POWER, AH_PRIVATE(ah)->ah_powerLimit); + pModal = &pEepData->modalHeader; + HALDEBUG(ah, HAL_DEBUG_RESET, "%s Channel=%u CfgCtl=%u\n", + __func__,chan->ic_freq, cfgCtl ); + + /* XXX Assume Minor is v2 or later */ + ht40PowerIncForPdadc = pModal->ht40PowerIncForPdadc; + + /* Fetch per-rate power table for the given channel */ + if (! ar9287SetPowerPerRateTable(ah, pEepData, chan, + &ratesArray[0],cfgCtl, + twiceAntennaReduction, + twiceMaxRegulatoryPower, powerLimit)) { + HALDEBUG(ah, HAL_DEBUG_ANY, + "%s: unable to set tx power per rate table\n", __func__); + return AH_FALSE; + } + + /* Set TX power control calibration curves for each TX chain */ + ar9287SetPowerCalTable(ah, chan, &txPowerIndexOffset); + + /* Calculate maximum power level */ + maxPower = AH_MAX(ratesArray[rate6mb], ratesArray[rateHt20_0]); + maxPower = AH_MAX(maxPower, ratesArray[rate1l]); + + if (IEEE80211_IS_CHAN_HT40(chan)) + maxPower = AH_MAX(maxPower, ratesArray[rateHt40_0]); + + ahp->ah_tx6PowerInHalfDbm = maxPower; + AH_PRIVATE(ah)->ah_maxPowerLevel = maxPower; + ahp->ah_txPowerIndexOffset = txPowerIndexOffset; + + /* + * txPowerIndexOffset is set by the SetPowerTable() call - + * adjust the rate table (0 offset if rates EEPROM not loaded) + */ + /* XXX what about the pwrTableOffset? */ + for (i = 0; i < N(ratesArray); i++) { + ratesArray[i] = (int16_t)(txPowerIndexOffset + ratesArray[i]); + /* -5 dBm offset for Merlin and later; this includes Kiwi */ + ratesArray[i] -= AR5416_PWR_TABLE_OFFSET_DB * 2; + if (ratesArray[i] > AR5416_MAX_RATE_POWER) + ratesArray[i] = AR5416_MAX_RATE_POWER; + if (ratesArray[i] < 0) + ratesArray[i] = 0; + } + +#ifdef AH_EEPROM_DUMP + ar5416PrintPowerPerRate(ah, ratesArray); +#endif + + /* + * Adjust the HT40 power to meet the correct target TX power + * for 40MHz mode, based on TX power curves that are established + * for 20MHz mode. + * + * XXX handle overflow/too high power level? + */ + if (IEEE80211_IS_CHAN_HT40(chan)) { + ratesArray[rateHt40_0] += ht40PowerIncForPdadc; + ratesArray[rateHt40_1] += ht40PowerIncForPdadc; + ratesArray[rateHt40_2] += ht40PowerIncForPdadc; + ratesArray[rateHt40_3] += ht40PowerIncForPdadc; + ratesArray[rateHt40_4] += ht40PowerIncForPdadc; + ratesArray[rateHt40_5] += ht40PowerIncForPdadc; + ratesArray[rateHt40_6] += ht40PowerIncForPdadc; + ratesArray[rateHt40_7] += ht40PowerIncForPdadc; + } + + /* Write the TX power rate registers */ + ar5416WriteTxPowerRateRegisters(ah, chan, ratesArray); + + return AH_TRUE; +#undef POW_SM +#undef N +} + +/* + * Read EEPROM header info and program the device for correct operation + * given the channel value. + */ +HAL_BOOL +ar9287SetBoardValues(struct ath_hal *ah, const struct ieee80211_channel *chan) +{ + const HAL_EEPROM_9287 *ee = AH_PRIVATE(ah)->ah_eeprom; + const struct ar9287_eeprom *eep = &ee->ee_base; + const struct modal_eep_ar9287_header *pModal = &eep->modalHeader; + uint16_t antWrites[AR9287_ANT_16S]; + uint32_t regChainOffset, regval; + uint8_t txRxAttenLocal; + int i, j, offset_num; + + pModal = &eep->modalHeader; + + antWrites[0] = (uint16_t)((pModal->antCtrlCommon >> 28) & 0xF); + antWrites[1] = (uint16_t)((pModal->antCtrlCommon >> 24) & 0xF); + antWrites[2] = (uint16_t)((pModal->antCtrlCommon >> 20) & 0xF); + antWrites[3] = (uint16_t)((pModal->antCtrlCommon >> 16) & 0xF); + antWrites[4] = (uint16_t)((pModal->antCtrlCommon >> 12) & 0xF); + antWrites[5] = (uint16_t)((pModal->antCtrlCommon >> 8) & 0xF); + antWrites[6] = (uint16_t)((pModal->antCtrlCommon >> 4) & 0xF); + antWrites[7] = (uint16_t)(pModal->antCtrlCommon & 0xF); + + offset_num = 8; + + for (i = 0, j = offset_num; i < AR9287_MAX_CHAINS; i++) { + antWrites[j++] = (uint16_t)((pModal->antCtrlChain[i] >> 28) & 0xf); + antWrites[j++] = (uint16_t)((pModal->antCtrlChain[i] >> 10) & 0x3); + antWrites[j++] = (uint16_t)((pModal->antCtrlChain[i] >> 8) & 0x3); + antWrites[j++] = 0; + antWrites[j++] = (uint16_t)((pModal->antCtrlChain[i] >> 6) & 0x3); + antWrites[j++] = (uint16_t)((pModal->antCtrlChain[i] >> 4) & 0x3); + antWrites[j++] = (uint16_t)((pModal->antCtrlChain[i] >> 2) & 0x3); + antWrites[j++] = (uint16_t)(pModal->antCtrlChain[i] & 0x3); + } + + OS_REG_WRITE(ah, AR_PHY_SWITCH_COM, pModal->antCtrlCommon); + + for (i = 0; i < AR9287_MAX_CHAINS; i++) { + regChainOffset = i * 0x1000; + + OS_REG_WRITE(ah, AR_PHY_SWITCH_CHAIN_0 + regChainOffset, + pModal->antCtrlChain[i]); + + OS_REG_WRITE(ah, AR_PHY_TIMING_CTRL4_CHAIN(0) + regChainOffset, + (OS_REG_READ(ah, AR_PHY_TIMING_CTRL4_CHAIN(0) + regChainOffset) + & ~(AR_PHY_TIMING_CTRL4_IQCORR_Q_Q_COFF | + AR_PHY_TIMING_CTRL4_IQCORR_Q_I_COFF)) | + SM(pModal->iqCalICh[i], + AR_PHY_TIMING_CTRL4_IQCORR_Q_I_COFF) | + SM(pModal->iqCalQCh[i], + AR_PHY_TIMING_CTRL4_IQCORR_Q_Q_COFF)); + + txRxAttenLocal = pModal->txRxAttenCh[i]; + + OS_REG_RMW_FIELD(ah, AR_PHY_GAIN_2GHZ + regChainOffset, + AR_PHY_GAIN_2GHZ_XATTEN1_MARGIN, + pModal->bswMargin[i]); + OS_REG_RMW_FIELD(ah, AR_PHY_GAIN_2GHZ + regChainOffset, + AR_PHY_GAIN_2GHZ_XATTEN1_DB, + pModal->bswAtten[i]); + OS_REG_RMW_FIELD(ah, AR_PHY_RXGAIN + regChainOffset, + AR9280_PHY_RXGAIN_TXRX_ATTEN, + txRxAttenLocal); + OS_REG_RMW_FIELD(ah, AR_PHY_RXGAIN + regChainOffset, + AR9280_PHY_RXGAIN_TXRX_MARGIN, + pModal->rxTxMarginCh[i]); + } + + + if (IEEE80211_IS_CHAN_HT40(chan)) + OS_REG_RMW_FIELD(ah, AR_PHY_SETTLING, + AR_PHY_SETTLING_SWITCH, pModal->swSettleHt40); + else + OS_REG_RMW_FIELD(ah, AR_PHY_SETTLING, + AR_PHY_SETTLING_SWITCH, pModal->switchSettling); + + OS_REG_RMW_FIELD(ah, AR_PHY_DESIRED_SZ, + AR_PHY_DESIRED_SZ_ADC, pModal->adcDesiredSize); + + OS_REG_WRITE(ah, AR_PHY_RF_CTL4, + SM(pModal->txEndToXpaOff, AR_PHY_RF_CTL4_TX_END_XPAA_OFF) + | SM(pModal->txEndToXpaOff, AR_PHY_RF_CTL4_TX_END_XPAB_OFF) + | SM(pModal->txFrameToXpaOn, AR_PHY_RF_CTL4_FRAME_XPAA_ON) + | SM(pModal->txFrameToXpaOn, AR_PHY_RF_CTL4_FRAME_XPAB_ON)); + + OS_REG_RMW_FIELD(ah, AR_PHY_RF_CTL3, + AR_PHY_TX_END_TO_A2_RX_ON, pModal->txEndToRxOn); + + OS_REG_RMW_FIELD(ah, AR_PHY_CCA, + AR9280_PHY_CCA_THRESH62, pModal->thresh62); + OS_REG_RMW_FIELD(ah, AR_PHY_EXT_CCA0, + AR_PHY_EXT_CCA0_THRESH62, pModal->thresh62); + + regval = OS_REG_READ(ah, AR9287_AN_RF2G3_CH0); + regval &= ~(AR9287_AN_RF2G3_DB1 | + AR9287_AN_RF2G3_DB2 | + AR9287_AN_RF2G3_OB_CCK | + AR9287_AN_RF2G3_OB_PSK | + AR9287_AN_RF2G3_OB_QAM | + AR9287_AN_RF2G3_OB_PAL_OFF); + regval |= (SM(pModal->db1, AR9287_AN_RF2G3_DB1) | + SM(pModal->db2, AR9287_AN_RF2G3_DB2) | + SM(pModal->ob_cck, AR9287_AN_RF2G3_OB_CCK) | + SM(pModal->ob_psk, AR9287_AN_RF2G3_OB_PSK) | + SM(pModal->ob_qam, AR9287_AN_RF2G3_OB_QAM) | + SM(pModal->ob_pal_off, AR9287_AN_RF2G3_OB_PAL_OFF)); + + OS_REG_WRITE(ah, AR9287_AN_RF2G3_CH0, regval); + OS_DELAY(100); /* analog write */ + + regval = OS_REG_READ(ah, AR9287_AN_RF2G3_CH1); + regval &= ~(AR9287_AN_RF2G3_DB1 | + AR9287_AN_RF2G3_DB2 | + AR9287_AN_RF2G3_OB_CCK | + AR9287_AN_RF2G3_OB_PSK | + AR9287_AN_RF2G3_OB_QAM | + AR9287_AN_RF2G3_OB_PAL_OFF); + regval |= (SM(pModal->db1, AR9287_AN_RF2G3_DB1) | + SM(pModal->db2, AR9287_AN_RF2G3_DB2) | + SM(pModal->ob_cck, AR9287_AN_RF2G3_OB_CCK) | + SM(pModal->ob_psk, AR9287_AN_RF2G3_OB_PSK) | + SM(pModal->ob_qam, AR9287_AN_RF2G3_OB_QAM) | + SM(pModal->ob_pal_off, AR9287_AN_RF2G3_OB_PAL_OFF)); + + OS_REG_WRITE(ah, AR9287_AN_RF2G3_CH1, regval); + OS_DELAY(100); /* analog write */ + + OS_REG_RMW_FIELD(ah, AR_PHY_RF_CTL2, + AR_PHY_TX_FRAME_TO_DATA_START, pModal->txFrameToDataStart); + OS_REG_RMW_FIELD(ah, AR_PHY_RF_CTL2, + AR_PHY_TX_FRAME_TO_PA_ON, pModal->txFrameToPaOn); + + OS_A_REG_RMW_FIELD(ah, AR9287_AN_TOP2, + AR9287_AN_TOP2_XPABIAS_LVL, pModal->xpaBiasLvl); + + return AH_TRUE; +} diff --git a/sys/dev/ath/ath_hal/ar9002/ar9287_reset.h b/sys/dev/ath/ath_hal/ar9002/ar9287_reset.h new file mode 100644 index 0000000..679fb8c --- /dev/null +++ b/sys/dev/ath/ath_hal/ar9002/ar9287_reset.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2010 Atheros Communications, Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * $FreeBSD$ + */ + +#ifndef __AR9287_RESET_H__ +#define __AR9287_RESET_H__ + +extern HAL_BOOL ar9287SetTransmitPower(struct ath_hal *ah, + const struct ieee80211_channel *chan, uint16_t *rfXpdGain); +extern HAL_BOOL ar9287SetBoardValues(struct ath_hal *ah, + const struct ieee80211_channel *chan); + +#endif /* __AR9287_RESET_H__ */ diff --git a/sys/dev/ath/ath_hal/ar9002/ar9287an.h b/sys/dev/ath/ath_hal/ar9002/ar9287an.h new file mode 100644 index 0000000..ba7a92c --- /dev/null +++ b/sys/dev/ath/ath_hal/ar9002/ar9287an.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2010 Atheros Communications, Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * $FreeBSD$ + */ + +#ifndef __AR9287AN_H__ +#define __AR9287AN_H__ + +#define AR9287_AN_RF2G3_CH0 0x7808 +#define AR9287_AN_RF2G3_CH1 0x785c +#define AR9287_AN_RF2G3_DB1 0xE0000000 +#define AR9287_AN_RF2G3_DB1_S 29 +#define AR9287_AN_RF2G3_DB2 0x1C000000 +#define AR9287_AN_RF2G3_DB2_S 26 +#define AR9287_AN_RF2G3_OB_CCK 0x03800000 +#define AR9287_AN_RF2G3_OB_CCK_S 23 +#define AR9287_AN_RF2G3_OB_PSK 0x00700000 +#define AR9287_AN_RF2G3_OB_PSK_S 20 +#define AR9287_AN_RF2G3_OB_QAM 0x000E0000 +#define AR9287_AN_RF2G3_OB_QAM_S 17 +#define AR9287_AN_RF2G3_OB_PAL_OFF 0x0001C000 +#define AR9287_AN_RF2G3_OB_PAL_OFF_S 14 + +#define AR9287_AN_TXPC0 0x7898 +#define AR9287_AN_TXPC0_TXPCMODE 0x0000C000 +#define AR9287_AN_TXPC0_TXPCMODE_S 14 +#define AR9287_AN_TXPC0_TXPCMODE_NORMAL 0 +#define AR9287_AN_TXPC0_TXPCMODE_TEST 1 +#define AR9287_AN_TXPC0_TXPCMODE_TEMPSENSE 2 +#define AR9287_AN_TXPC0_TXPCMODE_ATBTEST 3 + +#define AR9287_AN_TOP2 0x78b4 +#define AR9287_AN_TOP2_XPABIAS_LVL 0xC0000000 +#define AR9287_AN_TOP2_XPABIAS_LVL_S 30 + +#endif diff --git a/sys/dev/ath/ath_hal/ar9002/ar9287phy.h b/sys/dev/ath/ath_hal/ar9002/ar9287phy.h new file mode 100644 index 0000000..8f28194 --- /dev/null +++ b/sys/dev/ath/ath_hal/ar9002/ar9287phy.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2010 Atheros Communications, Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * $FreeBSD$ + */ + +#ifndef __AR9287PHY_H__ +#define __AR9287PHY_H__ + +/* AR_PHY_CH0_TX_PWRCTRL11, AR_PHY_CH1_TX_PWRCTRL11 */ +#define AR_PHY_TX_PWRCTRL_OLPC_TEMP_COMP 0x0000FC00 +#define AR_PHY_TX_PWRCTRL_OLPC_TEMP_COMP_S 10 + +#endif diff --git a/sys/dev/ath/if_ath.c b/sys/dev/ath/if_ath.c index 745a0d5..6c7b0e7 100644 --- a/sys/dev/ath/if_ath.c +++ b/sys/dev/ath/if_ath.c @@ -95,11 +95,13 @@ __FBSDID("$FreeBSD$"); #include <dev/ath/if_ath_tx.h> #include <dev/ath/if_ath_sysctl.h> #include <dev/ath/if_ath_keycache.h> +#include <dev/ath/if_athdfs.h> #ifdef ATH_TX99_DIAG #include <dev/ath/ath_tx99/ath_tx99.h> #endif + /* * ATH_BCBUF determines the number of vap's that can transmit * beacons and also (currently) the number of vap's that can @@ -199,6 +201,8 @@ static void ath_setcurmode(struct ath_softc *, enum ieee80211_phymode); static void ath_announce(struct ath_softc *); +static void ath_dfs_tasklet(void *, int); + #ifdef IEEE80211_SUPPORT_TDMA static void ath_tdma_settimers(struct ath_softc *sc, u_int32_t nexttbtt, u_int32_t bintval); @@ -471,6 +475,16 @@ ath_attach(u_int16_t devid, struct ath_softc *sc) goto bad2; } + /* Attach DFS module */ + if (! ath_dfs_attach(sc)) { + device_printf(sc->sc_dev, "%s: unable to attach DFS\n", __func__); + error = EIO; + goto bad2; + } + + /* Start DFS processing tasklet */ + TASK_INIT(&sc->sc_dfstask, 0, ath_dfs_tasklet, sc); + sc->sc_blinking = 0; sc->sc_ledstate = 1; sc->sc_ledon = 0; /* low true */ @@ -627,13 +641,22 @@ ath_attach(u_int16_t devid, struct ath_softc *sc) | IEEE80211_HTC_AMPDU /* A-MPDU tx/rx */ | IEEE80211_HTC_AMSDU /* A-MSDU tx/rx */ | IEEE80211_HTCAP_MAXAMSDU_3839 /* max A-MSDU length */ - /* At the present time, the hardware doesn't support short-GI in 20mhz mode */ -#if 0 - | IEEE80211_HTCAP_SHORTGI20 /* short GI in 20MHz */ -#endif | IEEE80211_HTCAP_SMPS_OFF; /* SM power save off */ ; + /* + * Enable short-GI for HT20 only if the hardware + * advertises support. + * Notably, anything earlier than the AR9287 doesn't. + */ + if ((ath_hal_getcapability(ah, + HAL_CAP_HT20_SGI, 0, NULL) == HAL_OK) && + (wmodes & HAL_MODE_HT20)) { + device_printf(sc->sc_dev, + "[HT] enabling short-GI in 20MHz mode\n"); + ic->ic_htcaps |= IEEE80211_HTCAP_SHORTGI20; + } + if (wmodes & HAL_MODE_HT40) ic->ic_htcaps |= IEEE80211_HTCAP_CHWIDTH40 | IEEE80211_HTCAP_SHORTGI40; @@ -762,6 +785,8 @@ ath_detach(struct ath_softc *sc) sc->sc_tx99->detach(sc->sc_tx99); #endif ath_rate_detach(sc->sc_rc); + + ath_dfs_detach(sc); ath_desc_free(sc); ath_tx_cleanup(sc); ath_hal_detach(sc->sc_ah); /* NB: sets chip in full sleep */ @@ -975,6 +1000,21 @@ ath_vap_create(struct ieee80211com *ic, avp->av_bmiss = vap->iv_bmiss; vap->iv_bmiss = ath_bmiss_vap; + /* Set default parameters */ + + /* + * Anything earlier than some AR9300 series MACs don't + * support a smaller MPDU density. + */ + vap->iv_ampdu_density = IEEE80211_HTCAP_MPDUDENSITY_8; + /* + * All NICs can handle the maximum size, however + * AR5416 based MACs can only TX aggregates w/ RTS + * protection when the total aggregate size is <= 8k. + * However, for now that's enforced by the TX path. + */ + vap->iv_ampdu_rxmax = IEEE80211_HTCAP_MAXRXAMPDU_64K; + avp->av_bslot = -1; if (needbeacon) { /* @@ -1221,6 +1261,10 @@ ath_resume(struct ath_softc *sc) sc->sc_curchan != NULL ? sc->sc_curchan : ic->ic_curchan, AH_FALSE, &status); ath_reset_keycache(sc); + + /* Let DFS at it in case it's a DFS channel */ + ath_dfs_radar_enable(sc, ic->ic_curchan); + if (sc->sc_resume_up) { if (ic->ic_opmode == IEEE80211_M_STA) { ath_init(sc); @@ -1530,6 +1574,9 @@ ath_init(void *arg) } ath_chan_change(sc, ic->ic_curchan); + /* Let DFS at it in case it's a DFS channel */ + ath_dfs_radar_enable(sc, ic->ic_curchan); + /* * Likewise this is set during reset so update * state cached in the driver. @@ -1675,6 +1722,10 @@ ath_reset(struct ifnet *ifp) if_printf(ifp, "%s: unable to reset hardware; hal status %u\n", __func__, status); sc->sc_diversity = ath_hal_getdiversity(ah); + + /* Let DFS at it in case it's a DFS channel */ + ath_dfs_radar_enable(sc, ic->ic_curchan); + if (ath_startrecv(sc) != 0) /* restart recv */ if_printf(ifp, "%s: unable to start recv logic\n", __func__); /* @@ -1966,6 +2017,10 @@ ath_calcrxfilter(struct ath_softc *sc) if (ic->ic_opmode == IEEE80211_M_MONITOR) rfilt |= HAL_RX_FILTER_CONTROL; + if (sc->sc_dodfs) { + rfilt |= HAL_RX_FILTER_PHYRADAR; + } + /* * Enable RX of compressed BAR frames only when doing * 802.11n. Required for A-MPDU. @@ -3417,6 +3472,17 @@ ath_rx_proc(void *arg, int npending) sc->sc_stats.ast_rx_fifoerr++; if (rs->rs_status & HAL_RXERR_PHY) { sc->sc_stats.ast_rx_phyerr++; + /* Process DFS radar events */ + if ((rs->rs_phyerr == HAL_PHYERR_RADAR) || + (rs->rs_phyerr == HAL_PHYERR_FALSE_RADAR_EXT)) { + /* Since we're touching the frame data, sync it */ + bus_dmamap_sync(sc->sc_dmat, + bf->bf_dmamap, + BUS_DMASYNC_POSTREAD); + /* Now pass it to the radar processing code */ + ath_dfs_process_phy_err(sc, mtod(m, char *), tsf, rs); + } + /* Be suitably paranoid about receiving phy errors out of the stats array bounds */ if (rs->rs_phyerr < 64) sc->sc_stats.ast_rx_phy[rs->rs_phyerr]++; @@ -3658,6 +3724,10 @@ rx_next: if (ngood) sc->sc_lastrx = tsf; + /* Queue DFS tasklet if needed */ + if (ath_dfs_tasklet_needed(sc, sc->sc_curchan)) + taskqueue_enqueue(sc->sc_tq, &sc->sc_dfstask); + if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { #ifdef IEEE80211_SUPPORT_SUPERG ieee80211_ff_age_all(ic, 100); @@ -4375,6 +4445,9 @@ ath_chan_set(struct ath_softc *sc, struct ieee80211_channel *chan) } sc->sc_diversity = ath_hal_getdiversity(ah); + /* Let DFS at it in case it's a DFS channel */ + ath_dfs_radar_enable(sc, ic->ic_curchan); + /* * Re-enable rx framework. */ @@ -5641,5 +5714,23 @@ ath_tdma_beacon_send(struct ath_softc *sc, struct ieee80211vap *vap) } #endif /* IEEE80211_SUPPORT_TDMA */ +static void +ath_dfs_tasklet(void *p, int npending) +{ + struct ath_softc *sc = (struct ath_softc *) p; + struct ifnet *ifp = sc->sc_ifp; + struct ieee80211com *ic = ifp->if_l2com; + + /* + * If previous processing has found a radar event, + * signal this to the net80211 layer to begin DFS + * processing. + */ + if (ath_dfs_process_radar_event(sc, sc->sc_curchan)) { + /* DFS event found, initiate channel change */ + ieee80211_dfs_notify_radar(ic, sc->sc_curchan); + } +} + MODULE_VERSION(if_ath, 1); MODULE_DEPEND(if_ath, wlan, 1, 1, 1); /* 802.11 media layer */ diff --git a/sys/dev/ath/if_ath_ahb.c b/sys/dev/ath/if_ath_ahb.c index 33b8b92..a2bca05 100644 --- a/sys/dev/ath/if_ath_ahb.c +++ b/sys/dev/ath/if_ath_ahb.c @@ -123,7 +123,7 @@ ath_ahb_attach(device_t dev) device_printf(sc->sc_dev, "eeprom @ %p\n", (void *) eepromaddr); psc->sc_eeprom = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, (uintptr_t) eepromaddr, (uintptr_t) eepromaddr + (uintptr_t) ((ATH_EEPROM_DATA_SIZE * 2) - 1), 0, RF_ACTIVE); - if (psc->sc_sr == NULL) { + if (psc->sc_eeprom == NULL) { device_printf(dev, "cannot map eeprom space\n"); goto bad0; } @@ -139,6 +139,10 @@ ath_ahb_attach(device_t dev) /* Copy the EEPROM data out */ sc->sc_eepromdata = malloc(ATH_EEPROM_DATA_SIZE * 2, M_TEMP, M_NOWAIT | M_ZERO); + if (sc->sc_eepromdata == NULL) { + device_printf(dev, "cannot allocate memory for eeprom data\n"); + goto bad1; + } device_printf(sc->sc_dev, "eeprom data @ %p\n", (void *) rman_get_bushandle(psc->sc_eeprom)); /* XXX why doesn't this work? -adrian */ #if 0 diff --git a/sys/dev/ath/if_ath_tx_ht.c b/sys/dev/ath/if_ath_tx_ht.c index f4ade31..348a1499 100644 --- a/sys/dev/ath/if_ath_tx_ht.c +++ b/sys/dev/ath/if_ath_tx_ht.c @@ -136,15 +136,23 @@ ath_rateseries_setup(struct ath_softc *sc, struct ieee80211_node *ni, */ if (ni->ni_chw == 40) series[i].RateFlags |= HAL_RATESERIES_2040; -#if 0 + /* - * The hardware only supports short-gi in 40mhz mode - - * if later hardware supports it in 20mhz mode, be sure - * to add the relevant check here. + * Set short-GI only if the node has advertised it + * the channel width is suitable, and we support it. + * We don't currently have a "negotiated" set of bits - + * ni_htcap is what the remote end sends, not what this + * node is capable of. */ - if (ni->ni_htcap & IEEE80211_HTCAP_SHORTGI40) + if (ni->ni_chw == 40 && + ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI40 && + ni->ni_htcap & IEEE80211_HTCAP_SHORTGI40) + series[i].RateFlags |= HAL_RATESERIES_HALFGI; + + if (ni->ni_chw == 20 && + ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI20 && + ni->ni_htcap & IEEE80211_HTCAP_SHORTGI20) series[i].RateFlags |= HAL_RATESERIES_HALFGI; -#endif series[i].Rate = rt->info[rix[i]].rateCode; diff --git a/sys/dev/ath/if_athdfs.h b/sys/dev/ath/if_athdfs.h new file mode 100644 index 0000000..88ee7fc --- /dev/null +++ b/sys/dev/ath/if_athdfs.h @@ -0,0 +1,47 @@ +/*- + * Copyright (c) 2011 Adrian Chadd, Xenion Pty Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any + * redistribution must be conditioned upon including a substantially + * similar Disclaimer requirement for further binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGES. + * + * $FreeBSD$ + */ +#ifndef __IF_ATHDFS_H__ +#define __IF_ATHDFS_H__ + +extern int ath_dfs_attach(struct ath_softc *sc); +extern int ath_dfs_detach(struct ath_softc *sc); +extern void ath_dfs_radar_enable(struct ath_softc *, + struct ieee80211_channel *chan); +extern void ath_dfs_process_phy_err(struct ath_softc *sc, const char *buf, + uint64_t tsf, struct ath_rx_status *rxstat); +extern int ath_dfs_process_radar_event(struct ath_softc *sc, + struct ieee80211_channel *chan); +extern int ath_dfs_tasklet_needed(struct ath_softc *sc, + struct ieee80211_channel *chan); +extern int ath_ioctl_phyerr(struct ath_softc *sc, struct ath_diag *ad); +extern int ath_dfs_get_thresholds(struct ath_softc *sc, HAL_PHYERR_PARAM *param); + +#endif /* __IF_ATHDFS_H__ */ diff --git a/sys/dev/ath/if_athvar.h b/sys/dev/ath/if_athvar.h index 26a50bc..3bc8522 100644 --- a/sys/dev/ath/if_athvar.h +++ b/sys/dev/ath/if_athvar.h @@ -357,6 +357,11 @@ struct ath_softc { uint16_t *sc_eepromdata; /* Local eeprom data, if AR9100 */ int sc_txchainmask; /* currently configured TX chainmask */ int sc_rxchainmask; /* currently configured RX chainmask */ + + /* DFS related state */ + void *sc_dfs; /* Used by an optional DFS module */ + int sc_dodfs; /* Whether to enable DFS rx filter bits */ + struct task sc_dfstask; /* DFS processing task */ }; #define ATH_LOCK_INIT(_sc) \ @@ -634,11 +639,11 @@ void ath_intr(void *); #define ath_hal_settpcts(_ah, _tpcts) \ ath_hal_setcapability(_ah, HAL_CAP_TPC_CTS, 0, _tpcts, NULL) #define ath_hal_hasintmit(_ah) \ - (ath_hal_getcapability(_ah, HAL_CAP_INTMIT, 0, NULL) == HAL_OK) + (ath_hal_getcapability(_ah, HAL_CAP_INTMIT, HAL_CAP_INTMIT_PRESENT, NULL) == HAL_OK) #define ath_hal_getintmit(_ah) \ - (ath_hal_getcapability(_ah, HAL_CAP_INTMIT, 1, NULL) == HAL_OK) + (ath_hal_getcapability(_ah, HAL_CAP_INTMIT, HAL_CAP_INTMIT_ENABLE, NULL) == HAL_OK) #define ath_hal_setintmit(_ah, _v) \ - ath_hal_setcapability(_ah, HAL_CAP_INTMIT, 1, _v, NULL) + ath_hal_setcapability(_ah, HAL_CAP_INTMIT, HAL_CAP_INTMIT_ENABLE, _v, NULL) #define ath_hal_getchannoise(_ah, _c) \ ((*(_ah)->ah_getChanNoise)((_ah), (_c))) #define ath_hal_getrxchainmask(_ah, _prxchainmask) \ @@ -694,6 +699,19 @@ void ath_intr(void *); #define ath_hal_set11nburstduration(_ah, _ds, _dur) \ ((*(_ah)->ah_set11nBurstDuration)((_ah), (_ds), (_dur))) +/* + * This is badly-named; you need to set the correct parameters + * to begin to receive useful radar events; and even then + * it doesn't "enable" DFS. See the ath_dfs/null/ module for + * more information. + */ +#define ath_hal_enabledfs(_ah, _param) \ + ((*(_ah)->ah_enableDfs)((_ah), (_param))) +#define ath_hal_getdfsthresh(_ah, _param) \ + ((*(_ah)->ah_getDfsThresh)((_ah), (_param))) +#define ath_hal_procradarevent(_ah, _rxs, _fulltsf, _buf, _event) \ + ((*(_ah)->ah_procRadarEvent)((_ah), (_rxs), (_fulltsf), (_buf), (_event))) + #define ath_hal_gpioCfgOutput(_ah, _gpio, _type) \ ((*(_ah)->ah_gpioCfgOutput)((_ah), (_gpio), (_type))) #define ath_hal_gpioset(_ah, _gpio, _b) \ diff --git a/sys/dev/atkbdc/atkbd.c b/sys/dev/atkbdc/atkbd.c index 643554d..f2f5d74 100644 --- a/sys/dev/atkbdc/atkbd.c +++ b/sys/dev/atkbdc/atkbd.c @@ -1097,10 +1097,17 @@ get_typematic(keyboard_t *kbd) x86regs_t regs; uint8_t *p; - if (!(kbd->kb_config & KB_CONF_PROBE_TYPEMATIC)) - return (ENODEV); - - if (x86bios_get_intr(0x15) == 0 || x86bios_get_intr(0x16) == 0) + /* + * Traditional entry points of int 0x15 and 0x16 are fixed + * and later BIOSes follow them. (U)EFI CSM specification + * also mandates these fixed entry points. + * + * Validate the entry points here before we proceed further. + * It's known that some recent laptops does not have the + * same entry point and hang on boot if we call it. + */ + if (x86bios_get_intr(0x15) != 0xf000f859 || + x86bios_get_intr(0x16) != 0xf000e82e) return (ENODEV); /* Is BIOS system configuration table supported? */ diff --git a/sys/dev/atkbdc/atkbdreg.h b/sys/dev/atkbdc/atkbdreg.h index 3d54b4d..cf7ee6b 100644 --- a/sys/dev/atkbdc/atkbdreg.h +++ b/sys/dev/atkbdc/atkbdreg.h @@ -36,7 +36,6 @@ #define KB_CONF_NO_RESET (1 << 1) /* don't reset the keyboard */ #define KB_CONF_ALT_SCANCODESET (1 << 2) /* assume the XT type keyboard */ #define KB_CONF_NO_PROBE_TEST (1 << 3) /* don't test keyboard during probe */ -#define KB_CONF_PROBE_TYPEMATIC (1 << 4) /* probe keyboard typematic */ #ifdef _KERNEL diff --git a/sys/dev/bxe/bxe_debug.h b/sys/dev/bxe/bxe_debug.h index 99cbe5d..baf0e32 100644 --- a/sys/dev/bxe/bxe_debug.h +++ b/sys/dev/bxe/bxe_debug.h @@ -41,21 +41,22 @@ extern uint32_t bxe_debug; * Debugging macros and definitions. */ -#define BXE_CP_LOAD 0x00000001 -#define BXE_CP_SEND 0x00000002 -#define BXE_CP_RECV 0x00000004 -#define BXE_CP_INTR 0x00000008 -#define BXE_CP_UNLOAD 0x00000010 -#define BXE_CP_RESET 0x00000020 -#define BXE_CP_IOCTL 0x00000040 -#define BXE_CP_STATS 0x00000080 -#define BXE_CP_MISC 0x00000100 -#define BXE_CP_PHY 0x00000200 -#define BXE_CP_RAMROD 0x00000400 -#define BXE_CP_NVRAM 0x00000800 -#define BXE_CP_REGS 0x00001000 -#define BXE_CP_ALL 0x00FFFFFF -#define BXE_CP_MASK 0x00FFFFFF +#define BXE_CP_LOAD 0x00000001 +#define BXE_CP_SEND 0x00000002 +#define BXE_CP_RECV 0x00000004 +#define BXE_CP_INTR 0x00000008 +#define BXE_CP_UNLOAD 0x00000010 +#define BXE_CP_RESET 0x00000020 +#define BXE_CP_IOCTL 0x00000040 +#define BXE_CP_STATS 0x00000080 +#define BXE_CP_MISC 0x00000100 +#define BXE_CP_PHY 0x00000200 +#define BXE_CP_RAMROD 0x00000400 +#define BXE_CP_NVRAM 0x00000800 +#define BXE_CP_REGS 0x00001000 +#define BXE_CP_TPA 0x00002000 +#define BXE_CP_ALL 0x00FFFFFF +#define BXE_CP_MASK 0x00FFFFFF #define BXE_LEVEL_FATAL 0x00000000 #define BXE_LEVEL_WARN 0x01000000 @@ -144,12 +145,18 @@ extern uint32_t bxe_debug; #define BXE_EXTREME_REGS (BXE_CP_REGS | BXE_LEVEL_EXTREME) #define BXE_INSANE_REGS (BXE_CP_REGS | BXE_LEVEL_INSANE) -#define BXE_FATAL (BXE_CP_ALL | BXE_LEVEL_FATAL) -#define BXE_WARN (BXE_CP_ALL | BXE_LEVEL_WARN) -#define BXE_INFO (BXE_CP_ALL | BXE_LEVEL_INFO) -#define BXE_VERBOSE (BXE_CP_ALL | BXE_LEVEL_VERBOSE) -#define BXE_EXTREME (BXE_CP_ALL | BXE_LEVEL_EXTREME) -#define BXE_INSANE (BXE_CP_ALL | BXE_LEVEL_INSANE) +#define BXE_WARN_TPA (BXE_CP_TPA | BXE_LEVEL_WARN) +#define BXE_INFO_TPA (BXE_CP_TPA | BXE_LEVEL_INFO) +#define BXE_VERBOSE_TPA (BXE_CP_TPA | BXE_LEVEL_VERBOSE) +#define BXE_EXTREME_TPA (BXE_CP_TPA | BXE_LEVEL_EXTREME) +#define BXE_INSANE_TPA (BXE_CP_TPA | BXE_LEVEL_INSANE) + +#define BXE_FATAL (BXE_CP_ALL | BXE_LEVEL_FATAL) +#define BXE_WARN (BXE_CP_ALL | BXE_LEVEL_WARN) +#define BXE_INFO (BXE_CP_ALL | BXE_LEVEL_INFO) +#define BXE_VERBOSE (BXE_CP_ALL | BXE_LEVEL_VERBOSE) +#define BXE_EXTREME (BXE_CP_ALL | BXE_LEVEL_EXTREME) +#define BXE_INSANE (BXE_CP_ALL | BXE_LEVEL_INSANE) #define BXE_CODE_PATH(cp) ((cp & BXE_CP_MASK) & bxe_debug) #define BXE_MSG_LEVEL(lv) ((lv & BXE_LEVEL_MASK) <= (bxe_debug & BXE_LEVEL_MASK)) diff --git a/sys/dev/bxe/bxe_link.c b/sys/dev/bxe/bxe_link.c index 6ee29a8..8adc87e 100644 --- a/sys/dev/bxe/bxe_link.c +++ b/sys/dev/bxe/bxe_link.c @@ -1168,15 +1168,17 @@ bxe_set_parallel_detection(struct link_params *params, uint8_t phy_flags) control2 |= MDIO_SERDES_DIGITAL_A_1000X_CONTROL2_PRL_DT_EN; else control2 &= ~MDIO_SERDES_DIGITAL_A_1000X_CONTROL2_PRL_DT_EN; - DBPRINT(sc, 1, "params->speed_cap_mask = 0x%x, control2 = 0x%x\n", - params->speed_cap_mask, control2); + + DBPRINT(sc, BXE_VERBOSE_PHY, "%s(): params->speed_cap_mask = 0x%x, " + "control2 = 0x%x\n", __FUNCTION__, params->speed_cap_mask, control2); + CL45_WR_OVER_CL22(sc, params->port, params->phy_addr, MDIO_REG_BANK_SERDES_DIGITAL, MDIO_SERDES_DIGITAL_A_1000X_CONTROL2, control2); if ((phy_flags & PHY_XGXS_FLAG) && (params->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)) { - DBPRINT(sc, BXE_INFO, "XGXS\n"); + DBPRINT(sc, BXE_VERBOSE_PHY, "%s(): XGXS\n", __FUNCTION__); CL45_WR_OVER_CL22(sc, params->port, params->phy_addr, MDIO_REG_BANK_10G_PARALLEL_DETECT, @@ -1688,7 +1690,9 @@ bxe_flow_ctrl_resolve(struct link_params *params, struct link_vars *vars, } bxe_pause_resolve(vars, pause_result); } - DBPRINT(sc, BXE_INFO, "flow_ctrl 0x%x\n", vars->flow_ctrl); + + DBPRINT(sc, BXE_VERBOSE_PHY, "%s(): flow_ctrl 0x%x\n", + __FUNCTION__, vars->flow_ctrl); } static void @@ -1698,13 +1702,16 @@ bxe_check_fallback_to_cl37(struct link_params *params) uint16_t rx_status, ustat_val, cl37_fsm_recieved; sc = params->sc; - DBPRINT(sc, BXE_INFO, "bxe_check_fallback_to_cl37\n"); + + DBPRINT(sc, BXE_VERBOSE_PHY, "%s(): IEEE 802.3 Clause 37 Fallback\n", + __FUNCTION__); + CL45_RD_OVER_CL22(sc, params->port, params->phy_addr, MDIO_REG_BANK_RX0, MDIO_RX0_RX_STATUS, &rx_status); if ((rx_status & MDIO_RX0_RX_STATUS_SIGDET) != (MDIO_RX0_RX_STATUS_SIGDET)) { DBPRINT(sc, BXE_VERBOSE_PHY, - "Signal is not detected. Restoring CL73." + "No signal detected. Restoring CL73." "rx_status(0x80b0) = 0x%x\n", rx_status); CL45_WR_OVER_CL22(sc, params->port, params->phy_addr, MDIO_REG_BANK_CL73_IEEEB0, MDIO_CL73_IEEEB0_CL73_AN_CONTROL, @@ -1738,7 +1745,9 @@ bxe_check_fallback_to_cl37(struct link_params *params) CL45_WR_OVER_CL22(sc, params->port, params->phy_addr, MDIO_REG_BANK_CL73_IEEEB0, MDIO_CL73_IEEEB0_CL73_AN_CONTROL, 0); bxe_restart_autoneg(params, 0); - DBPRINT(sc, BXE_INFO, "Disabling CL73, and restarting CL37 autoneg\n"); + + DBPRINT(sc, BXE_INFO, "%s(): Disabling CL73 and restarting CL37 " + "autoneg\n", __FUNCTION__); } static void @@ -3391,7 +3400,8 @@ bxe_init_internal_phy(struct link_params *params, struct link_vars *vars, ((XGXS_EXT_PHY_TYPE(params->ext_phy_config) == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT) && params->loopback_mode == LOOPBACK_EXT)) { - DBPRINT(sc, BXE_INFO, "not SGMII, no AN\n"); + DBPRINT(sc, BXE_VERBOSE_PHY, "%s(): Not SGMII, no AN\n", + __FUNCTION__); /* Disable autoneg. */ bxe_set_autoneg(params, vars, 0); @@ -5338,9 +5348,6 @@ bxe_set_led(struct link_params *params, uint8_t mode, uint32_t speed) emac_base = port ? GRCBASE_EMAC1 : GRCBASE_EMAC0; ext_phy_type = XGXS_EXT_PHY_TYPE(params->ext_phy_config); - DBPRINT(sc, BXE_INFO, "bxe_set_led: port %x, mode %d\n", port, mode); - DBPRINT(sc, BXE_VERBOSE_PHY, "speed 0x%x, hw_led_mode 0x%x\n", speed, - hw_led_mode); switch (mode) { case LED_MODE_OFF: REG_WR(sc, NIG_REG_LED_10G_P0 + port * 4, 0); @@ -5382,7 +5389,7 @@ bxe_set_led(struct link_params *params, uint8_t mode, uint32_t speed) default: rc = -EINVAL; DBPRINT(sc, BXE_VERBOSE_PHY, - "bxe_set_led: Invalid led mode %d\n", mode); + "%s(): Invalid led mode (%d)!\n", __FUNCTION__, mode); break; } return (rc); @@ -5635,7 +5642,10 @@ bxe_link_reset(struct link_params *params, struct link_vars *vars, ext_phy_type = XGXS_EXT_PHY_TYPE(ext_phy_config); val = REG_RD(sc, params->shmem_base + offsetof(struct shmem_region, dev_info.port_feature_config[params->port].config)); - DBPRINT(sc, BXE_INFO, "Resetting the link of port %d\n", port); + + DBPRINT(sc, BXE_INFO, "%s(): Resetting port %d link.\n", + __FUNCTION__, port); + /* Disable attentions. */ vars->link_status = 0; bxe_update_mng(params, vars->link_status); diff --git a/sys/dev/bxe/if_bxe.c b/sys/dev/bxe/if_bxe.c index cf0a40e..e7534f4 100644 --- a/sys/dev/bxe/if_bxe.c +++ b/sys/dev/bxe/if_bxe.c @@ -70,7 +70,6 @@ __FBSDID("$FreeBSD$"); #ifdef BXE_DEBUG uint32_t bxe_debug = BXE_WARN; - /* 0 = Never */ /* 1 = 1 in 2,147,483,648 */ /* 256 = 1 in 8,388,608 */ @@ -84,12 +83,9 @@ uint32_t bxe_debug = BXE_WARN; /* Controls how often to simulate an mbuf allocation failure. */ int bxe_debug_mbuf_allocation_failure = 0; -/* Controls how often to simulate a DMA mapping failure. */ +/* Controls how often to simulate a DMA mapping failure. */ int bxe_debug_dma_map_addr_failure = 0; -/* Controls how often to received frame error. */ -int bxe_debug_received_frame_error = 0; - /* Controls how often to simulate a bootcode failure. */ int bxe_debug_bootcode_running_failure = 0; #endif @@ -103,7 +99,7 @@ int bxe_debug_bootcode_running_failure = 0; /* BXE Build Time Options */ /* #define BXE_NVRAM_WRITE 1 */ -#define USE_DMAE 1 +#define BXE_USE_DMAE 1 /* * PCI Device ID Table @@ -132,14 +128,17 @@ static int bxe_attach(device_t); static int bxe_detach(device_t); static int bxe_shutdown(device_t); -static void bxe_set_tunables(struct bxe_softc *); +/* + * Driver local functions. + */ +static void bxe_tunables_set(struct bxe_softc *); static void bxe_print_adapter_info(struct bxe_softc *); static void bxe_probe_pci_caps(struct bxe_softc *); static void bxe_link_settings_supported(struct bxe_softc *, uint32_t); static void bxe_link_settings_requested(struct bxe_softc *); -static int bxe_get_function_hwinfo(struct bxe_softc *); -static void bxe_get_port_hwinfo(struct bxe_softc *); -static void bxe_get_common_hwinfo(struct bxe_softc *); +static int bxe_hwinfo_function_get(struct bxe_softc *); +static int bxe_hwinfo_port_get(struct bxe_softc *); +static int bxe_hwinfo_common_get(struct bxe_softc *); static void bxe_undi_unload(struct bxe_softc *); static int bxe_setup_leading(struct bxe_softc *); static int bxe_stop_leading(struct bxe_softc *); @@ -241,8 +240,8 @@ static int bxe_tx_encap(struct bxe_fastpath *, struct mbuf **); static void bxe_tx_start(struct ifnet *); static void bxe_tx_start_locked(struct ifnet *, struct bxe_fastpath *); static int bxe_tx_mq_start(struct ifnet *, struct mbuf *); -static int bxe_tx_mq_start_locked(struct ifnet *, struct bxe_fastpath *, - struct mbuf *); +static int bxe_tx_mq_start_locked(struct ifnet *, + struct bxe_fastpath *, struct mbuf *); static void bxe_mq_flush(struct ifnet *ifp); static int bxe_ioctl(struct ifnet *, u_long, caddr_t); static __inline int bxe_has_rx_work(struct bxe_fastpath *); @@ -254,33 +253,34 @@ static void bxe_intr_sp(void *); static void bxe_task_fp(void *, int); static void bxe_intr_fp(void *); static void bxe_zero_sb(struct bxe_softc *, int); -static void bxe_init_sb(struct bxe_softc *, struct host_status_block *, - bus_addr_t, int); +static void bxe_init_sb(struct bxe_softc *, + struct host_status_block *, bus_addr_t, int); static void bxe_zero_def_sb(struct bxe_softc *); -static void bxe_init_def_sb(struct bxe_softc *, struct host_def_status_block *, - bus_addr_t, int); +static void bxe_init_def_sb(struct bxe_softc *, + struct host_def_status_block *, bus_addr_t, int); static void bxe_update_coalesce(struct bxe_softc *); static __inline void bxe_update_rx_prod(struct bxe_softc *, - struct bxe_fastpath *, uint16_t, uint16_t, uint16_t); + struct bxe_fastpath *, uint16_t, uint16_t, uint16_t); static void bxe_clear_sge_mask_next_elems(struct bxe_fastpath *); static __inline void bxe_init_sge_ring_bit_mask(struct bxe_fastpath *); -static __inline void bxe_free_tpa_pool(struct bxe_fastpath *, int); -static __inline void bxe_free_rx_sge(struct bxe_softc *, struct bxe_fastpath *, - uint16_t); -static __inline void bxe_free_rx_sge_range(struct bxe_softc *, - struct bxe_fastpath *, int); -static struct mbuf *bxe_alloc_mbuf(struct bxe_fastpath *, int); -static int bxe_map_mbuf(struct bxe_fastpath *, struct mbuf *, bus_dma_tag_t, - bus_dmamap_t, bus_dma_segment_t *); -static struct mbuf *bxe_alloc_tpa_mbuf(struct bxe_fastpath *, int, int); -static void bxe_alloc_mutexes(struct bxe_softc *); -static void bxe_free_mutexes(struct bxe_softc *); -static int bxe_alloc_rx_sge(struct bxe_softc *, struct bxe_fastpath *, - uint16_t); -static void bxe_init_rx_chains(struct bxe_softc *); +static int bxe_alloc_tpa_mbuf(struct bxe_fastpath *, int); +static int bxe_fill_tpa_pool(struct bxe_fastpath *); +static void bxe_free_tpa_pool(struct bxe_fastpath *); + +static int bxe_alloc_rx_sge_mbuf(struct bxe_fastpath *, uint16_t); +static int bxe_fill_sg_chain(struct bxe_fastpath *); +static void bxe_free_sg_chain(struct bxe_fastpath *); + +static int bxe_alloc_rx_bd_mbuf(struct bxe_fastpath *, uint16_t); +static int bxe_fill_rx_bd_chain(struct bxe_fastpath *); +static void bxe_free_rx_bd_chain(struct bxe_fastpath *); + +static void bxe_mutexes_alloc(struct bxe_softc *); +static void bxe_mutexes_free(struct bxe_softc *); +static void bxe_clear_rx_chains(struct bxe_softc *); +static int bxe_init_rx_chains(struct bxe_softc *); +static void bxe_clear_tx_chains(struct bxe_softc *); static void bxe_init_tx_chains(struct bxe_softc *); -static void bxe_free_rx_chains(struct bxe_softc *); -static void bxe_free_tx_chains(struct bxe_softc *); static void bxe_init_sp_ring(struct bxe_softc *); static void bxe_init_context(struct bxe_softc *); static void bxe_init_ind_table(struct bxe_softc *); @@ -291,8 +291,7 @@ static void bxe_init_internal_port(struct bxe_softc *); static void bxe_init_internal_func(struct bxe_softc *); static void bxe_init_internal(struct bxe_softc *, uint32_t); -static void bxe_init_nic(struct bxe_softc *, uint32_t); -static int bxe_gunzip_init(struct bxe_softc *); +static int bxe_init_nic(struct bxe_softc *, uint32_t); static void bxe_lb_pckt(struct bxe_softc *); static int bxe_int_mem_test(struct bxe_softc *); static void bxe_enable_blocks_attention (struct bxe_softc *); @@ -304,13 +303,9 @@ static void bxe_ilt_wr(struct bxe_softc *, uint32_t, bus_addr_t); static int bxe_init_func(struct bxe_softc *); static int bxe_init_hw(struct bxe_softc *, uint32_t); static int bxe_fw_command(struct bxe_softc *, uint32_t); -static void bxe_dma_free(struct bxe_softc *); -static void bxe_dmamem_free(struct bxe_softc *, bus_dma_tag_t, caddr_t, - bus_dmamap_t); +static void bxe_host_structures_free(struct bxe_softc *); static void bxe_dma_map_addr(void *, bus_dma_segment_t *, int, int); -static int bxe_dma_alloc(device_t); -static int bxe_dmamem_alloc(struct bxe_softc *, bus_dma_tag_t, bus_dmamap_t, - void *, uint32_t, bus_addr_t *); +static int bxe_host_structures_alloc(device_t); static void bxe_set_mac_addr_e1(struct bxe_softc *, int); static void bxe_set_mac_addr_e1h(struct bxe_softc *, int); static void bxe_set_rx_mode(struct bxe_softc *); @@ -330,15 +325,12 @@ static void bxe_tpa_stop(struct bxe_softc *, struct bxe_fastpath *, uint16_t, int, int, union eth_rx_cqe *, uint16_t); static void bxe_rxeof(struct bxe_fastpath *); static void bxe_txeof(struct bxe_fastpath *); -static int bxe_get_buf(struct bxe_fastpath *, struct mbuf *, uint16_t); static int bxe_watchdog(struct bxe_fastpath *fp); -static int bxe_change_mtu(struct bxe_softc *, int); static void bxe_tick(void *); static void bxe_add_sysctls(struct bxe_softc *); -static void bxe_gunzip_end(struct bxe_softc *); -static void bxe_write_dmae_phys_len(struct bxe_softc *, bus_addr_t, uint32_t, - uint32_t); +static void bxe_write_dmae_phys_len(struct bxe_softc *, + bus_addr_t, uint32_t, uint32_t); void bxe_write_dmae(struct bxe_softc *, bus_addr_t, uint32_t, uint32_t); void bxe_read_dmae(struct bxe_softc *, uint32_t, uint32_t); @@ -360,32 +352,33 @@ static int bxe_sysctl_dump_rx_bd_chain(SYSCTL_HANDLER_ARGS); static int bxe_sysctl_dump_tx_chain(SYSCTL_HANDLER_ARGS); static int bxe_sysctl_reg_read(SYSCTL_HANDLER_ARGS); static int bxe_sysctl_breakpoint(SYSCTL_HANDLER_ARGS); -static void bxe_validate_rx_packet(struct bxe_fastpath *, uint16_t, - union eth_rx_cqe *, struct mbuf *); +static __noinline void bxe_validate_rx_packet(struct bxe_fastpath *, + uint16_t, union eth_rx_cqe *, struct mbuf *); static void bxe_grcdump(struct bxe_softc *, int); -static void bxe_dump_enet(struct bxe_softc *,struct mbuf *); -static void bxe_dump_mbuf (struct bxe_softc *, struct mbuf *); -static void bxe_dump_tx_mbuf_chain(struct bxe_softc *, int, int); -static void bxe_dump_rx_mbuf_chain(struct bxe_softc *, int, int); -static void bxe_dump_tx_parsing_bd(struct bxe_fastpath *,int, - struct eth_tx_parse_bd *); -static void bxe_dump_txbd(struct bxe_fastpath *, int, - union eth_tx_bd_types *); -static void bxe_dump_rxbd(struct bxe_fastpath *, int, - struct eth_rx_bd *); -static void bxe_dump_cqe(struct bxe_fastpath *, int, union eth_rx_cqe *); -static void bxe_dump_tx_chain(struct bxe_fastpath *, int, int); -static void bxe_dump_rx_cq_chain(struct bxe_fastpath *, int, int); -static void bxe_dump_rx_bd_chain(struct bxe_fastpath *, int, int); -static void bxe_dump_status_block(struct bxe_softc *); -static void bxe_dump_stats_block(struct bxe_softc *); -static void bxe_dump_fp_state(struct bxe_fastpath *); -static void bxe_dump_port_state_locked(struct bxe_softc *); -static void bxe_dump_link_vars_state_locked(struct bxe_softc *); -static void bxe_dump_link_params_state_locked(struct bxe_softc *); -static void bxe_dump_driver_state(struct bxe_softc *); -static void bxe_dump_hw_state(struct bxe_softc *); -static void bxe_dump_fw(struct bxe_softc *); +static __noinline void bxe_dump_enet(struct bxe_softc *,struct mbuf *); +static __noinline void bxe_dump_mbuf (struct bxe_softc *, struct mbuf *); +static __noinline void bxe_dump_tx_mbuf_chain(struct bxe_softc *, int, int); +static __noinline void bxe_dump_rx_mbuf_chain(struct bxe_softc *, int, int); +static __noinline void bxe_dump_tx_parsing_bd(struct bxe_fastpath *,int, + struct eth_tx_parse_bd *); +static __noinline void bxe_dump_txbd(struct bxe_fastpath *, int, + union eth_tx_bd_types *); +static __noinline void bxe_dump_rxbd(struct bxe_fastpath *, int, + struct eth_rx_bd *); +static __noinline void bxe_dump_cqe(struct bxe_fastpath *, + int, union eth_rx_cqe *); +static __noinline void bxe_dump_tx_chain(struct bxe_fastpath *, int, int); +static __noinline void bxe_dump_rx_cq_chain(struct bxe_fastpath *, int, int); +static __noinline void bxe_dump_rx_bd_chain(struct bxe_fastpath *, int, int); +static __noinline void bxe_dump_status_block(struct bxe_softc *); +static __noinline void bxe_dump_stats_block(struct bxe_softc *); +static __noinline void bxe_dump_fp_state(struct bxe_fastpath *); +static __noinline void bxe_dump_port_state_locked(struct bxe_softc *); +static __noinline void bxe_dump_link_vars_state_locked(struct bxe_softc *); +static __noinline void bxe_dump_link_params_state_locked(struct bxe_softc *); +static __noinline void bxe_dump_driver_state(struct bxe_softc *); +static __noinline void bxe_dump_hw_state(struct bxe_softc *); +static __noinline void bxe_dump_fw(struct bxe_softc *); static void bxe_decode_mb_msgs(struct bxe_softc *, uint32_t, uint32_t); static void bxe_decode_ramrod_cmd(struct bxe_softc *, int); static void bxe_breakpoint(struct bxe_softc *); @@ -433,11 +426,6 @@ DRIVER_MODULE(bxe, pci, bxe_driver, bxe_devclass, 0, 0); SYSCTL_NODE(_hw, OID_AUTO, bxe, CTLFLAG_RD, 0, "bxe driver parameters"); /* Allowable values are TRUE (1) or FALSE (0). */ -static int bxe_stats_enable = FALSE; -TUNABLE_INT("hw.bxe.stats_enable", &bxe_stats_enable); -SYSCTL_UINT(_hw_bxe, OID_AUTO, stats_enable, CTLFLAG_RDTUN, &bxe_stats_enable, - 0, "stats Enable/Disable"); - static int bxe_dcc_enable = FALSE; TUNABLE_INT("hw.bxe.dcc_enable", &bxe_dcc_enable); SYSCTL_UINT(_hw_bxe, OID_AUTO, dcc_enable, CTLFLAG_RDTUN, &bxe_dcc_enable, @@ -456,18 +444,6 @@ SYSCTL_UINT(_hw_bxe, OID_AUTO, int_mode, CTLFLAG_RDTUN, &bxe_int_mode, 0, "Interrupt (MSI-X|MSI|INTx) mode"); /* - * Specifies whether the driver should disable Transparent Packet - * Aggregation (TPA, also known as LRO). By default TPA is enabled. - * - * Allowable values are TRUE (1) or FALSE (0). - */ -static int bxe_tpa_enable = FALSE; -TUNABLE_INT("hw.bxe.tpa_enable", &bxe_tpa_enable); -SYSCTL_UINT(_hw_bxe, OID_AUTO, tpa_enable, CTLFLAG_RDTUN, &bxe_tpa_enable, - 0, "TPA Enable/Disable"); - - -/* * Specifies the number of queues that will be used when a multi-queue * RSS mode is selected using bxe_multi_mode below. * @@ -480,8 +456,8 @@ SYSCTL_UINT(_hw_bxe, OID_AUTO, queue_count, CTLFLAG_RDTUN, &bxe_queue_count, /* * ETH_RSS_MODE_DISABLED (0) - * Disables all multi-queue/packet sorting algorithms. Each - * received frame is routed to the same receive queue. + * Disables all multi-queue/packet sorting algorithms. All + * received frames are routed to a single receive queue. * * ETH_RSS_MODE_REGULAR (1) * The default mode which assigns incoming frames to receive @@ -579,7 +555,7 @@ bxe_reg_write32(struct bxe_softc *sc, bus_size_t offset, uint32_t val) (uintmax_t)offset); } - DBPRINT(sc, BXE_INSANE, "%s(): offset = 0x%jX, val = 0x%08X\n", + DBPRINT(sc, BXE_INSANE_REGS, "%s(): offset = 0x%jX, val = 0x%08X\n", __FUNCTION__, (uintmax_t)offset, val); bus_space_write_4(sc->bxe_btag, sc->bxe_bhandle, offset, val); @@ -602,7 +578,7 @@ bxe_reg_write16(struct bxe_softc *sc, bus_size_t offset, uint16_t val) (uintmax_t)offset); } - DBPRINT(sc, BXE_INSANE, "%s(): offset = 0x%jX, val = 0x%04X\n", + DBPRINT(sc, BXE_INSANE_REGS, "%s(): offset = 0x%jX, val = 0x%04X\n", __FUNCTION__, (uintmax_t)offset, val); bus_space_write_2(sc->bxe_btag, sc->bxe_bhandle, offset, val); @@ -619,7 +595,7 @@ static void bxe_reg_write8(struct bxe_softc *sc, bus_size_t offset, uint8_t val) { - DBPRINT(sc, BXE_INSANE, "%s(): offset = 0x%jX, val = 0x%02X\n", + DBPRINT(sc, BXE_INSANE_REGS, "%s(): offset = 0x%jX, val = 0x%02X\n", __FUNCTION__, (uintmax_t)offset, val); bus_space_write_1(sc->bxe_btag, sc->bxe_bhandle, offset, val); @@ -645,7 +621,7 @@ bxe_reg_read32(struct bxe_softc *sc, bus_size_t offset) val = bus_space_read_4(sc->bxe_btag, sc->bxe_bhandle, offset); - DBPRINT(sc, BXE_INSANE, "%s(): offset = 0x%jX, val = 0x%08X\n", + DBPRINT(sc, BXE_INSANE_REGS, "%s(): offset = 0x%jX, val = 0x%08X\n", __FUNCTION__, (uintmax_t)offset, val); return (val); @@ -671,7 +647,7 @@ bxe_reg_read16(struct bxe_softc *sc, bus_size_t offset) val = bus_space_read_2(sc->bxe_btag, sc->bxe_bhandle, offset); - DBPRINT(sc, BXE_INSANE, "%s(): offset = 0x%jX, val = 0x%08X\n", + DBPRINT(sc, BXE_INSANE_REGS, "%s(): offset = 0x%jX, val = 0x%08X\n", __FUNCTION__, (uintmax_t)offset, val); return (val); @@ -690,10 +666,10 @@ bxe_reg_read8(struct bxe_softc *sc, bus_size_t offset) { uint8_t val = bus_space_read_1(sc->bxe_btag, sc->bxe_bhandle, offset); - DBPRINT(sc, BXE_INSANE, "%s(): offset = 0x%jX, val = 0x%02X\n", + DBPRINT(sc, BXE_INSANE_REGS, "%s(): offset = 0x%jX, val = 0x%02X\n", __FUNCTION__, (uintmax_t)offset, val); - return(val); + return (val); } #endif @@ -996,6 +972,7 @@ bxe_probe(device_t dev) * Returns: * None. */ +/* ToDo: Create a sysctl for this info. */ static void bxe_print_adapter_info(struct bxe_softc *sc) { @@ -1025,19 +1002,14 @@ bxe_print_adapter_info(struct bxe_softc *sc) printf("); Flags ("); /* Miscellaneous flags. */ - if (sc->bxe_flags & BXE_USING_MSI_FLAG) + if (sc->msi_count > 0) printf("MSI"); - if (sc->bxe_flags & BXE_USING_MSIX_FLAG) { + if (sc->msix_count > 0) { if (i > 0) printf("|"); printf("MSI-X"); i++; } - if (sc->bxe_flags & BXE_SAFC_TX_FLAG) { - if (i > 0) printf("|"); - printf("SAFC"); i++; - } - if (TPA_ENABLED(sc)) { if (i > 0) printf("|"); printf("TPA"); i++; @@ -1056,6 +1028,9 @@ bxe_print_adapter_info(struct bxe_softc *sc) break; } + printf("); BD's (RX:%d,TX:%d", + (int) USABLE_RX_BD, (int) USABLE_TX_BD); + /* Firmware versions and device features. */ printf("); Firmware (%d.%d.%d); Bootcode (%d.%d.%d)\n", BCM_5710_FW_MAJOR_VERSION, @@ -1069,6 +1044,64 @@ bxe_print_adapter_info(struct bxe_softc *sc) } /* + * Release any interrupts allocated by the driver. + * + * Returns: + * None + */ +static void +bxe_interrupt_free(struct bxe_softc *sc) +{ + device_t dev; + int i; + + DBENTER(BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); + + dev = sc->dev; + + if (sc->msix_count > 0) { + /* Free MSI-X resources. */ + + for (i = 0; i < sc->msix_count; i++) { + DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | + BXE_VERBOSE_INTR), "%s(): Releasing MSI-X[%d] " + "vector.\n", __FUNCTION__, i); + if (sc->bxe_msix_res[i] && sc->bxe_msix_rid[i]) + bus_release_resource(dev, SYS_RES_IRQ, + sc->bxe_msix_rid[i], sc->bxe_msix_res[i]); + } + + pci_release_msi(dev); + + } else if (sc->msi_count > 0) { + /* Free MSI resources. */ + + for (i = 0; i < sc->msi_count; i++) { + DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | + BXE_VERBOSE_INTR), "%s(): Releasing MSI[%d] " + "vector.\n", __FUNCTION__, i); + if (sc->bxe_msi_res[i] && sc->bxe_msi_rid[i]) + bus_release_resource(dev, SYS_RES_IRQ, + sc->bxe_msi_rid[i], sc->bxe_msi_res[i]); + } + + pci_release_msi(dev); + + } else { + /* Free legacy interrupt resources. */ + + DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | + BXE_VERBOSE_INTR), "%s(): Releasing legacy interrupt.\n", + __FUNCTION__); + if (sc->bxe_irq_res != NULL) + bus_release_resource(dev, SYS_RES_IRQ, + sc->bxe_irq_rid, sc->bxe_irq_res); + } + + DBEXIT(BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); +} + +/* * This function determines and allocates the appropriate * interrupt based on system capabilites and user request. * @@ -1086,30 +1119,19 @@ bxe_print_adapter_info(struct bxe_softc *sc) * 0 = Success, !0 = Failure. */ static int -bxe_interrupt_allocate(struct bxe_softc *sc) +bxe_interrupt_alloc(struct bxe_softc *sc) { device_t dev; - int i, rid, rc; + int error, i, rid, rc; int msi_count, msi_required, msi_allocated; int msix_count, msix_required, msix_allocated; - rc = 0; - dev = sc->dev; - msi_count = 0; - msi_required = 0; - msi_allocated = 0; - msix_count = 0; - msix_required = 0; - msix_allocated = 0; - DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_INTR); - /* Assume SAFC not enabled for TX. */ - sc->bxe_flags &= ~BXE_SAFC_TX_FLAG; - - /* Clear any previous priority queue mappings. */ - for (i = 0; i < BXE_MAX_PRIORITY; i++) - sc->pri_map[i] = 0; + rc = 0; + dev = sc->dev; + msi_count = msi_required = msi_allocated = 0; + msix_count = msix_required = msix_allocated = 0; /* Get the number of available MSI/MSI-X interrupts from the OS. */ if (sc->int_mode > 0) { @@ -1140,7 +1162,8 @@ bxe_interrupt_allocate(struct bxe_softc *sc) /* BSD resource identifier */ rid = 1; - if (pci_alloc_msix(dev, &msix_allocated) == 0) { + error = pci_alloc_msix(dev, &msix_allocated); + if (error == 0) { DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_INTR), "%s(): Required/Allocated (%d/%d) MSI-X vector(s).\n", __FUNCTION__, msix_required, msix_allocated); @@ -1148,7 +1171,6 @@ bxe_interrupt_allocate(struct bxe_softc *sc) /* Make sure we got all the interrupts we asked for. */ if (msix_allocated >= msix_required) { sc->msix_count = msix_required; - sc->bxe_flags |= BXE_USING_MSIX_FLAG; msi_count = 0; /* Allocate the MSI-X vectors. */ @@ -1165,7 +1187,7 @@ bxe_interrupt_allocate(struct bxe_softc *sc) "%s(%d): Failed to map MSI-X[%d] vector!\n", __FILE__, __LINE__, (3)); rc = ENXIO; - goto bxe_interrupt_allocate_exit; + goto bxe_interrupt_alloc_exit; } } } else { @@ -1176,7 +1198,6 @@ bxe_interrupt_allocate(struct bxe_softc *sc) /* Release any resources acquired. */ pci_release_msi(dev); - sc->bxe_flags &= ~BXE_USING_MSIX_FLAG; sc->msix_count = msix_count = 0; /* We'll try MSI next. */ @@ -1200,7 +1221,8 @@ bxe_interrupt_allocate(struct bxe_softc *sc) msi_required); rid = 1; - if (pci_alloc_msi(dev, &msi_allocated) == 0) { + error = pci_alloc_msi(dev, &msi_allocated); + if (error == 0) { DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_INTR), "%s(): Required/Allocated (%d/%d) MSI vector(s).\n", __FUNCTION__, msi_required, msi_allocated); @@ -1212,7 +1234,6 @@ bxe_interrupt_allocate(struct bxe_softc *sc) */ if (msi_required >= msi_allocated) { sc->msi_count = msi_required; - sc->bxe_flags |= BXE_USING_MSI_FLAG; /* Allocate the MSI vectors. */ for (i = 0; i < msi_required; i++) { sc->bxe_msi_rid[i] = i + rid; @@ -1226,7 +1247,7 @@ bxe_interrupt_allocate(struct bxe_softc *sc) "%s(%d): Failed to map MSI vector (%d)!\n", __FILE__, __LINE__, (i)); rc = ENXIO; - goto bxe_interrupt_allocate_exit; + goto bxe_interrupt_alloc_exit; } } } @@ -1237,7 +1258,6 @@ bxe_interrupt_allocate(struct bxe_softc *sc) /* Release any resources acquired. */ pci_release_msi(dev); - sc->bxe_flags &= ~BXE_USING_MSI_FLAG; sc->msi_count = msi_count = 0; /* We'll try INTx next. */ @@ -1262,7 +1282,7 @@ bxe_interrupt_allocate(struct bxe_softc *sc) BXE_PRINTF("%s(%d): PCI map interrupt failed!\n", __FILE__, __LINE__); rc = ENXIO; - goto bxe_interrupt_allocate_exit; + goto bxe_interrupt_alloc_exit; } sc->bxe_irq_rid = rid; } @@ -1271,27 +1291,55 @@ bxe_interrupt_allocate(struct bxe_softc *sc) "%s(): Actual: int_mode = %d, multi_mode = %d, num_queues = %d\n", __FUNCTION__, sc->int_mode, sc->multi_mode, sc->num_queues); -bxe_interrupt_allocate_exit: +bxe_interrupt_alloc_exit: DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_INTR); return (rc); } +/* + * This function releases taskqueues. + * + * Returns: + * None + */ static void bxe_interrupt_detach(struct bxe_softc *sc) { +#ifdef BXE_TASK + struct bxe_fastpath *fp; +#endif device_t dev; int i; + DBENTER(BXE_VERBOSE_UNLOAD); + dev = sc->dev; - DBENTER(BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); + +#ifdef BXE_TASK + /* Free the OS taskqueue resources. */ + for (i = 0; i < sc->num_queues; i++) { + fp = &sc->fp[i]; + + if (fp->tq != NULL) { + taskqueue_drain(fp->tq, &fp->task); + taskqueue_free(fp->tq); + } + } + + if (sc->tq != NULL) { + taskqueue_drain(sc->tq, &sc->task); + taskqueue_free(sc->tq); + } +#endif + /* Release interrupt resources. */ - if ((sc->bxe_flags & BXE_USING_MSIX_FLAG) && sc->msix_count) { + if (sc->msix_count > 0) { for (i = 0; i < sc->msix_count; i++) { if (sc->bxe_msix_tag[i] && sc->bxe_msix_res[i]) bus_teardown_intr(dev, sc->bxe_msix_res[i], sc->bxe_msix_tag[i]); } - } else if ((sc->bxe_flags & BXE_USING_MSI_FLAG) && sc->msi_count) { + } else if (sc->msi_count > 0) { for (i = 0; i < sc->msi_count; i++) { if (sc->bxe_msi_tag[i] && sc->bxe_msi_res[i]) bus_teardown_intr(dev, sc->bxe_msi_res[i], @@ -1302,6 +1350,8 @@ bxe_interrupt_detach(struct bxe_softc *sc) bus_teardown_intr(dev, sc->bxe_irq_res, sc->bxe_irq_tag); } + + DBEXIT(BXE_VERBOSE_UNLOAD); } /* @@ -1336,7 +1386,7 @@ bxe_interrupt_attach(struct bxe_softc *sc) #endif /* Setup interrupt handlers. */ - if (sc->bxe_flags & BXE_USING_MSIX_FLAG) { + if (sc->msix_count > 0) { DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_INTR), "%s(): Enabling slowpath MSI-X[0] vector.\n",__FUNCTION__); /* @@ -1344,13 +1394,9 @@ bxe_interrupt_attach(struct bxe_softc *sc) * driver instance to the interrupt handler for the * slowpath. */ - rc = bus_setup_intr(sc->dev, - sc->bxe_msix_res[0], - INTR_TYPE_NET | INTR_MPSAFE, - NULL, - bxe_intr_sp, - sc, - &sc->bxe_msix_tag[0]); + rc = bus_setup_intr(sc->dev, sc->bxe_msix_res[0], + INTR_TYPE_NET | INTR_MPSAFE, NULL, bxe_intr_sp, + sc, &sc->bxe_msix_tag[0]); if (rc) { BXE_PRINTF( @@ -1360,10 +1406,8 @@ bxe_interrupt_attach(struct bxe_softc *sc) } #if __FreeBSD_version >= 800504 - bus_describe_intr(sc->dev, - sc->bxe_msix_res[0], - sc->bxe_msix_tag[0], - "sp"); + bus_describe_intr(sc->dev, sc->bxe_msix_res[0], + sc->bxe_msix_tag[0], "sp"); #endif /* Now initialize the fastpath vectors. */ @@ -1377,13 +1421,9 @@ bxe_interrupt_attach(struct bxe_softc *sc) * fastpath context to the interrupt handler in this * case. Also the first msix_res was used by the sp. */ - rc = bus_setup_intr(sc->dev, - sc->bxe_msix_res[i + 1], - INTR_TYPE_NET | INTR_MPSAFE, - NULL, - bxe_intr_fp, - fp, - &sc->bxe_msix_tag[i + 1]); + rc = bus_setup_intr(sc->dev, sc->bxe_msix_res[i + 1], + INTR_TYPE_NET | INTR_MPSAFE, NULL, bxe_intr_fp, + fp, &sc->bxe_msix_tag[i + 1]); if (rc) { BXE_PRINTF( @@ -1393,11 +1433,8 @@ bxe_interrupt_attach(struct bxe_softc *sc) } #if __FreeBSD_version >= 800504 - bus_describe_intr(sc->dev, - sc->bxe_msix_res[i + 1], - sc->bxe_msix_tag[i + 1], - "fp[%02d]", - i); + bus_describe_intr(sc->dev, sc->bxe_msix_res[i + 1], + sc->bxe_msix_tag[i + 1], "fp[%02d]", i); #endif /* Bind the fastpath instance to a CPU. */ @@ -1409,13 +1446,13 @@ bxe_interrupt_attach(struct bxe_softc *sc) #ifdef BXE_TASK TASK_INIT(&fp->task, 0, bxe_task_fp, fp); fp->tq = taskqueue_create_fast("bxe_fpq", M_NOWAIT, - taskqueue_thread_enqueue, &fp->tq); + taskqueue_thread_enqueue, &fp->tq); taskqueue_start_threads(&fp->tq, 1, PI_NET, "%s fpq", - device_get_nameunit(sc->dev)); + device_get_nameunit(sc->dev)); #endif fp->state = BXE_FP_STATE_IRQ; } - } else if (sc->bxe_flags & BXE_USING_MSI_FLAG) { + } else if (sc->msi_count > 0) { DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_INTR), "%s(): Enabling slowpath MSI[0] vector.\n", __FUNCTION__); @@ -1424,12 +1461,8 @@ bxe_interrupt_attach(struct bxe_softc *sc) * instance to the interrupt handler for the slowpath. */ rc = bus_setup_intr(sc->dev,sc->bxe_msi_res[0], - INTR_TYPE_NET | INTR_MPSAFE, - NULL, - bxe_intr_sp, - sc, - &sc->bxe_msi_tag[0] - ); + INTR_TYPE_NET | INTR_MPSAFE, NULL, bxe_intr_sp, + sc, &sc->bxe_msi_tag[0]); if (rc) { BXE_PRINTF( @@ -1439,10 +1472,8 @@ bxe_interrupt_attach(struct bxe_softc *sc) } #if __FreeBSD_version >= 800504 - bus_describe_intr(sc->dev, - sc->bxe_msi_res[0], - sc->bxe_msi_tag[0], - "sp"); + bus_describe_intr(sc->dev, sc->bxe_msi_res[0], + sc->bxe_msi_tag[0], "sp"); #endif /* Now initialize the fastpath vectors. */ @@ -1457,14 +1488,9 @@ bxe_interrupt_attach(struct bxe_softc *sc) * fastpath context to the interrupt handler in this * case. */ - rc = bus_setup_intr(sc->dev, - sc->bxe_msi_res[i + 1], - INTR_TYPE_NET | INTR_MPSAFE, - NULL, - bxe_intr_fp, - fp, - &sc->bxe_msi_tag[i + 1] - ); + rc = bus_setup_intr(sc->dev, sc->bxe_msi_res[i + 1], + INTR_TYPE_NET | INTR_MPSAFE, NULL, bxe_intr_fp, + fp, &sc->bxe_msi_tag[i + 1]); if (rc) { BXE_PRINTF( @@ -1474,19 +1500,16 @@ bxe_interrupt_attach(struct bxe_softc *sc) } #if __FreeBSD_version >= 800504 - bus_describe_intr(sc->dev, - sc->bxe_msi_res[i + 1], - sc->bxe_msi_tag[i + 1], - "fp[%02d]", - i); + bus_describe_intr(sc->dev, sc->bxe_msi_res[i + 1], + sc->bxe_msi_tag[i + 1], "fp[%02d]", i); #endif #ifdef BXE_TASK TASK_INIT(&fp->task, 0, bxe_task_fp, fp); fp->tq = taskqueue_create_fast("bxe_fpq", M_NOWAIT, - taskqueue_thread_enqueue, &fp->tq); + taskqueue_thread_enqueue, &fp->tq); taskqueue_start_threads(&fp->tq, 1, PI_NET, "%s fpq", - device_get_nameunit(sc->dev)); + device_get_nameunit(sc->dev)); #endif } @@ -1495,23 +1518,19 @@ bxe_interrupt_attach(struct bxe_softc *sc) fp = &sc->fp[0]; #endif DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_INTR), - "%s(): Enabling INTx interrupts.\n", __FUNCTION__); + "%s(): Enabling INTx interrupts.\n", __FUNCTION__); /* * Setup the interrupt handler. Note that we pass the * driver instance to the interrupt handler which * will handle both the slowpath and fastpath. */ - rc = bus_setup_intr(sc->dev,sc->bxe_irq_res, - INTR_TYPE_NET | INTR_MPSAFE, - NULL, - bxe_intr_legacy, - sc, - &sc->bxe_irq_tag); + rc = bus_setup_intr(sc->dev,sc->bxe_irq_res, INTR_TYPE_NET | + INTR_MPSAFE, NULL, bxe_intr_legacy, sc, &sc->bxe_irq_tag); if (rc) { BXE_PRINTF("%s(%d): Failed to allocate interrupt!\n", - __FILE__, __LINE__); + __FILE__, __LINE__); goto bxe_interrupt_attach_exit; } #ifdef BXE_TASK @@ -1616,56 +1635,78 @@ bxe_probe_pci_caps(struct bxe_softc *sc) DBEXIT(BXE_EXTREME_LOAD); } +/* + * Setup firmware pointers for BCM57710. + * + * Returns: + * None + */ static void bxe_init_e1_firmware(struct bxe_softc *sc) { - INIT_OPS(sc) = (struct raw_op *)init_ops_e1; - INIT_DATA(sc) = (const uint32_t *)init_data_e1; - INIT_OPS_OFFSETS(sc) = (const uint16_t *)init_ops_offsets_e1; - INIT_TSEM_INT_TABLE_DATA(sc) = tsem_int_table_data_e1; - INIT_TSEM_PRAM_DATA(sc) = tsem_pram_data_e1; - INIT_USEM_INT_TABLE_DATA(sc) = usem_int_table_data_e1; - INIT_USEM_PRAM_DATA(sc) = usem_pram_data_e1; - INIT_XSEM_INT_TABLE_DATA(sc) = xsem_int_table_data_e1; - INIT_XSEM_PRAM_DATA(sc) = xsem_pram_data_e1; - INIT_CSEM_INT_TABLE_DATA(sc) = csem_int_table_data_e1; - INIT_CSEM_PRAM_DATA(sc) = csem_pram_data_e1; + INIT_OPS(sc) = (struct raw_op *)init_ops_e1; + INIT_DATA(sc) = (const uint32_t *)init_data_e1; + INIT_OPS_OFFSETS(sc) = (const uint16_t *)init_ops_offsets_e1; + INIT_TSEM_INT_TABLE_DATA(sc) = tsem_int_table_data_e1; + INIT_TSEM_PRAM_DATA(sc) = tsem_pram_data_e1; + INIT_USEM_INT_TABLE_DATA(sc) = usem_int_table_data_e1; + INIT_USEM_PRAM_DATA(sc) = usem_pram_data_e1; + INIT_XSEM_INT_TABLE_DATA(sc) = xsem_int_table_data_e1; + INIT_XSEM_PRAM_DATA(sc) = xsem_pram_data_e1; + INIT_CSEM_INT_TABLE_DATA(sc) = csem_int_table_data_e1; + INIT_CSEM_PRAM_DATA(sc) = csem_pram_data_e1; } +/* + * Setup firmware pointers for BCM57711. + * + * Returns: + * None + */ static void bxe_init_e1h_firmware(struct bxe_softc *sc) { - INIT_OPS(sc) = (struct raw_op *)init_ops_e1h; - INIT_DATA(sc) = (const uint32_t *)init_data_e1h; - INIT_OPS_OFFSETS(sc) = (const uint16_t *)init_ops_offsets_e1h; - INIT_TSEM_INT_TABLE_DATA(sc) = tsem_int_table_data_e1h; - INIT_TSEM_PRAM_DATA(sc) = tsem_pram_data_e1h; - INIT_USEM_INT_TABLE_DATA(sc) = usem_int_table_data_e1h; - INIT_USEM_PRAM_DATA(sc) = usem_pram_data_e1h; - INIT_XSEM_INT_TABLE_DATA(sc) = xsem_int_table_data_e1h; - INIT_XSEM_PRAM_DATA(sc) = xsem_pram_data_e1h; - INIT_CSEM_INT_TABLE_DATA(sc) = csem_int_table_data_e1h; - INIT_CSEM_PRAM_DATA(sc) = csem_pram_data_e1h; + INIT_OPS(sc) = (struct raw_op *)init_ops_e1h; + INIT_DATA(sc) = (const uint32_t *)init_data_e1h; + INIT_OPS_OFFSETS(sc) = (const uint16_t *)init_ops_offsets_e1h; + INIT_TSEM_INT_TABLE_DATA(sc) = tsem_int_table_data_e1h; + INIT_TSEM_PRAM_DATA(sc) = tsem_pram_data_e1h; + INIT_USEM_INT_TABLE_DATA(sc) = usem_int_table_data_e1h; + INIT_USEM_PRAM_DATA(sc) = usem_pram_data_e1h; + INIT_XSEM_INT_TABLE_DATA(sc) = xsem_int_table_data_e1h; + INIT_XSEM_PRAM_DATA(sc) = xsem_pram_data_e1h; + INIT_CSEM_INT_TABLE_DATA(sc) = csem_int_table_data_e1h; + INIT_CSEM_PRAM_DATA(sc) = csem_pram_data_e1h; } +/* + * Sets up pointers for loading controller firmware. + * + * Returns: + * 0 = Success, !0 = Failure + */ static int bxe_init_firmware(struct bxe_softc *sc) { + int rc; + + rc = 0; + if (CHIP_IS_E1(sc)) bxe_init_e1_firmware(sc); else if (CHIP_IS_E1H(sc)) bxe_init_e1h_firmware(sc); else { - BXE_PRINTF("%s(%d): Unsupported chip revision\n", + BXE_PRINTF("%s(%d): No firmware to support chip revision!\n", __FILE__, __LINE__); - return (ENXIO); + rc = ENXIO; } - return (0); -} + return (rc); +} static void -bxe_set_tunables(struct bxe_softc *sc) +bxe_tunables_set(struct bxe_softc *sc) { /* * Get our starting point for interrupt mode/number of queues. @@ -1724,15 +1765,7 @@ bxe_set_tunables(struct bxe_softc *sc) "%s(): Requested: int_mode = %d, multi_mode = %d num_queues = %d\n", __FUNCTION__, sc->int_mode, sc->multi_mode, sc->num_queues); - /* Set transparent packet aggregation (TPA), aka LRO, flag. */ - if (bxe_tpa_enable!= FALSE) - sc->bxe_flags |= BXE_TPA_ENABLE_FLAG; - - /* Capture the stats enable/disable setting. */ - if (bxe_stats_enable == FALSE) - sc->stats_enable = FALSE; - else - sc->stats_enable = TRUE; + sc->stats_enable = TRUE; /* Select the host coalescing tick count values (limit values). */ if (bxe_tx_ticks > 100) { @@ -1766,11 +1799,13 @@ bxe_set_tunables(struct bxe_softc *sc) /* + * Allocates PCI resources from OS. + * * Returns: * 0 = Success, !0 = Failure */ static int -bxe_alloc_pci_resources(struct bxe_softc *sc) +bxe_pci_resources_alloc(struct bxe_softc *sc) { int rid, rc = 0; @@ -1782,32 +1817,32 @@ bxe_alloc_pci_resources(struct bxe_softc *sc) * processor memory. */ rid = PCIR_BAR(0); - sc->bxe_res = bus_alloc_resource_any( - sc->dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); + sc->bxe_res = bus_alloc_resource_any(sc->dev, + SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->bxe_res == NULL) { BXE_PRINTF("%s(%d):PCI BAR0 memory allocation failed\n", __FILE__, __LINE__); rc = ENXIO; - goto bxe_alloc_pci_resources_exit; + goto bxe_pci_resources_alloc_exit; } /* Get OS resource handles for BAR0 memory. */ - sc->bxe_btag = rman_get_bustag(sc->bxe_res); - sc->bxe_bhandle = rman_get_bushandle(sc->bxe_res); - sc->bxe_vhandle = (vm_offset_t) rman_get_virtual(sc->bxe_res); + sc->bxe_btag = rman_get_bustag(sc->bxe_res); + sc->bxe_bhandle = rman_get_bushandle(sc->bxe_res); + sc->bxe_vhandle = (vm_offset_t) rman_get_virtual(sc->bxe_res); /* * Allocate PCI memory resources for BAR2. * Doorbell (DB) memory. */ rid = PCIR_BAR(2); - sc->bxe_db_res = bus_alloc_resource_any( - sc->dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); + sc->bxe_db_res = bus_alloc_resource_any(sc->dev, + SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->bxe_db_res == NULL) { BXE_PRINTF("%s(%d): PCI BAR2 memory allocation failed\n", __FILE__, __LINE__); rc = ENXIO; - goto bxe_alloc_pci_resources_exit; + goto bxe_pci_resources_alloc_exit; } /* Get OS resource handles for BAR2 memory. */ @@ -1815,45 +1850,52 @@ bxe_alloc_pci_resources(struct bxe_softc *sc) sc->bxe_db_bhandle = rman_get_bushandle(sc->bxe_db_res); sc->bxe_db_vhandle = (vm_offset_t) rman_get_virtual(sc->bxe_db_res); -bxe_alloc_pci_resources_exit: +bxe_pci_resources_alloc_exit: DBEXIT(BXE_VERBOSE_LOAD); - return(rc); + return (rc); } /* + * Frees PCI resources allocated in bxe_pci_resources_alloc(). + * * Returns: * None */ static void -bxe_release_pci_resources(struct bxe_softc *sc) +bxe_pci_resources_free(struct bxe_softc *sc) { + DBENTER(BXE_VERBOSE_UNLOAD); + /* Release the PCIe BAR0 mapped memory. */ if (sc->bxe_res != NULL) { - DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET), - "%s(): Releasing PCI BAR0 memory.\n", __FUNCTION__); - bus_release_resource(sc->dev, - SYS_RES_MEMORY, PCIR_BAR(0), sc->bxe_res); + bus_release_resource(sc->dev, SYS_RES_MEMORY, + PCIR_BAR(0), sc->bxe_res); } /* Release the PCIe BAR2 (doorbell) mapped memory. */ if (sc->bxe_db_res != NULL) { - DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET), - "%s(): Releasing PCI BAR2 memory.\n", __FUNCTION__); - bus_release_resource(sc->dev, - SYS_RES_MEMORY, PCIR_BAR(2), sc->bxe_db_res); + bus_release_resource(sc->dev, SYS_RES_MEMORY, + PCIR_BAR(2), sc->bxe_db_res); } + + DBENTER(BXE_VERBOSE_UNLOAD); } /* + * Determines the media reported to the OS by examining + * the installed PHY type. + * * Returns: * 0 = Success, !0 = Failure */ static int bxe_media_detect(struct bxe_softc *sc) { - int rc = 0; + int rc; + + rc = 0; /* Identify supported media based on the PHY type. */ switch (XGXS_EXT_PHY_TYPE(sc->link_params.ext_phy_config)) { @@ -1887,8 +1929,6 @@ bxe_media_detect(struct bxe_softc *sc) case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_FAILURE: case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_NOT_CONN: default: - BXE_PRINTF("%s(%d): PHY not supported by driver!\n", - __FILE__, __LINE__); sc->media = 0; rc = ENODEV; } @@ -1915,7 +1955,7 @@ bxe_attach(device_t dev) int rc; sc = device_get_softc(dev); - DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); + DBENTER(BXE_INFO_LOAD | BXE_INFO_RESET); sc->dev = dev; sc->bxe_unit = device_get_unit(dev); @@ -1923,18 +1963,34 @@ bxe_attach(device_t dev) sc->bxe_flags = 0; sc->state = BXE_STATE_CLOSED; rc = 0; - bxe_set_tunables(sc); - bxe_alloc_mutexes(sc); + DBPRINT(sc, BXE_FATAL, "%s(): ************************\n", + __FUNCTION__); + DBPRINT(sc, BXE_FATAL, "%s(): ** Debug mode enabled **\n", + __FUNCTION__); + DBPRINT(sc, BXE_FATAL, "%s(): ************************\n", + __FUNCTION__); + DBPRINT(sc, BXE_FATAL, "%s(): sc vaddr = 0x%08X:%08X\n", + __FUNCTION__, (uint32_t) U64_HI(sc), (uint32_t) U64_LO(sc)); + + /* Get the user configurable values for driver load. */ + bxe_tunables_set(sc); - /* Prepare the tick routine. */ - callout_init(&sc->bxe_tick_callout, CALLOUT_MPSAFE); + bxe_mutexes_alloc(sc); + + /* Prepare tick routine. */ + callout_init_mtx(&sc->bxe_tick_callout, &sc->bxe_core_mtx, 0); /* Enable bus master capability */ pci_enable_busmaster(dev); - if ((rc = bxe_alloc_pci_resources(sc)) != 0) + /* Enable PCI BAR mapped memory for register access. */ + rc = bxe_pci_resources_alloc(sc); + if (rc != 0) { + BXE_PRINTF("%s(%d): Error allocating PCI resources!\n", + __FILE__, __LINE__); goto bxe_attach_fail; + } /* Put indirect address registers into a sane state. */ pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, @@ -1945,19 +2001,26 @@ bxe_attach(device_t dev) REG_WR(sc, PXP2_REG_PGL_ADDR_94_F0 + BP_PORT(sc) * 16, 0); /* Get hardware info from shared memory and validate data. */ - if (bxe_get_function_hwinfo(sc)) { + rc = bxe_hwinfo_function_get(sc); + if (rc != 0) { DBPRINT(sc, BXE_WARN, "%s(): Failed to get hardware info!\n", __FUNCTION__); - rc = ENODEV; goto bxe_attach_fail; } /* Setup supported media options. */ - if ((rc = bxe_media_detect(sc)) != 0) + rc = bxe_media_detect(sc); + if (rc != 0) { + BXE_PRINTF("%s(%d): Unknown media (PHY) type!\n", + __FILE__, __LINE__); goto bxe_attach_fail; + } + /* Interface entrypoint for media type/status reporting. */ ifmedia_init(&sc->bxe_ifmedia, IFM_IMASK, bxe_ifmedia_upd, bxe_ifmedia_status); + + /* Default interface values. */ ifmedia_add(&sc->bxe_ifmedia, IFM_ETHER | sc->media | IFM_FDX, 0, NULL); ifmedia_add(&sc->bxe_ifmedia, @@ -1967,38 +2030,37 @@ bxe_attach(device_t dev) sc->bxe_ifmedia.ifm_media = sc->bxe_ifmedia.ifm_cur->ifm_media; - /* Set init arrays */ + /* Setup firmware arrays (firmware load comes later). */ rc = bxe_init_firmware(sc); if (rc) { - BXE_PRINTF("%s(%d): Error loading firmware\n", + BXE_PRINTF("%s(%d): Error preparing firmware load!\n", __FILE__, __LINE__); goto bxe_attach_fail; } - #ifdef BXE_DEBUG /* Allocate a memory buffer for grcdump output.*/ sc->grcdump_buffer = malloc(BXE_GRCDUMP_BUF_SIZE, M_TEMP, M_NOWAIT); if (sc->grcdump_buffer == NULL) { - /* Failure is OK, just print a message and continue attach. */ BXE_PRINTF("%s(%d): Failed to allocate grcdump memory " "buffer!\n", __FILE__, __LINE__); + rc = ENOBUFS; } #endif /* Check that NVRAM contents are valid.*/ - if (bxe_nvram_test(sc)) { + rc = bxe_nvram_test(sc); + if (rc != 0) { BXE_PRINTF("%s(%d): Failed NVRAM test!\n", __FILE__, __LINE__); - rc = ENODEV; goto bxe_attach_fail; } /* Allocate the appropriate interrupts.*/ - if (bxe_interrupt_allocate(sc)) { + rc = bxe_interrupt_alloc(sc); + if (rc != 0) { BXE_PRINTF("%s(%d): Interrupt allocation failed!\n", __FILE__, __LINE__); - rc = ENODEV; goto bxe_attach_fail; } @@ -2016,7 +2078,7 @@ bxe_attach(device_t dev) } /* Check if PXE/UNDI is still active and unload it. */ - if (!BP_NOMCP(sc)) + if (!NOMCP(sc)) bxe_undi_unload(sc); /* @@ -2032,6 +2094,7 @@ bxe_attach(device_t dev) sc->rx_ring_size = USABLE_RX_BD; /* Assume receive IP/TCP/UDP checksum is enabled. */ + /* ToDo: Change when IOCTL changes checksum offload? */ sc->rx_csum = 1; /* Disable WoL. */ @@ -2041,10 +2104,10 @@ bxe_attach(device_t dev) sc->mbuf_alloc_size = MCLBYTES; /* Allocate DMA memory resources. */ - if (bxe_dma_alloc(sc->dev)) { + rc = bxe_host_structures_alloc(sc->dev); + if (rc != 0) { BXE_PRINTF("%s(%d): DMA memory allocation failed!\n", __FILE__, __LINE__); - rc = ENOMEM; goto bxe_attach_fail; } @@ -2060,10 +2123,13 @@ bxe_attach(device_t dev) /* Initialize the FreeBSD ifnet interface. */ ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); + + /* Written by driver before attach, read-only afterwards. */ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + + /* Driver entrypoints from the network interface. */ ifp->if_ioctl = bxe_ioctl; ifp->if_start = bxe_tx_start; - #if __FreeBSD_version >= 800000 ifp->if_transmit = bxe_tx_mq_start; ifp->if_qflush = bxe_mq_flush; @@ -2077,10 +2143,8 @@ bxe_attach(device_t dev) ifp->if_mtu = ETHERMTU; ifp->if_hwassist = BXE_IF_HWASSIST; ifp->if_capabilities = BXE_IF_CAPABILITIES; - if (TPA_ENABLED(sc)) { - ifp->if_capabilities |= IFCAP_LRO; - } - ifp->if_capenable = ifp->if_capabilities; + /* TPA not enabled by default. */ + ifp->if_capenable = BXE_IF_CAPABILITIES & ~IFCAP_LRO; ifp->if_baudrate = IF_Gbps(10UL); ifp->if_snd.ifq_drv_maxlen = sc->tx_ring_size; @@ -2092,7 +2156,8 @@ bxe_attach(device_t dev) ether_ifattach(ifp, sc->link_params.mac_addr); /* Attach the interrupts to the interrupt handlers. */ - if (bxe_interrupt_attach(sc)) { + rc = bxe_interrupt_attach(sc); + if (rc != 0) { BXE_PRINTF("%s(%d): Interrupt allocation failed!\n", __FILE__, __LINE__); goto bxe_attach_fail; @@ -2108,8 +2173,8 @@ bxe_attach_fail: if (rc != 0) bxe_detach(dev); - DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); - return(rc); + DBEXIT(BXE_INFO_LOAD | BXE_INFO_RESET); + return (rc); } @@ -2593,7 +2658,7 @@ bxe_link_settings_requested_exit: * 0 = Success, !0 = Failure */ static int -bxe_get_function_hwinfo(struct bxe_softc *sc) +bxe_hwinfo_function_get(struct bxe_softc *sc) { uint32_t mac_hi, mac_lo, val; int func, rc; @@ -2604,7 +2669,7 @@ bxe_get_function_hwinfo(struct bxe_softc *sc) func = BP_FUNC(sc); /* Get the common hardware configuration first. */ - bxe_get_common_hwinfo(sc); + bxe_hwinfo_common_get(sc); /* Assume no outer VLAN/multi-function support. */ sc->e1hov = sc->e1hmf = 0; @@ -2621,13 +2686,13 @@ bxe_get_function_hwinfo(struct bxe_softc *sc) } else { if (BP_E1HVN(sc)) { rc = EPERM; - goto bxe_get_function_hwinfo_exit; + goto bxe_hwinfo_function_get_exit; } } } - if (!BP_NOMCP(sc)) { - bxe_get_port_hwinfo(sc); + if (!NOMCP(sc)) { + bxe_hwinfo_port_get(sc); sc->fw_seq = SHMEM_RD(sc, func_mb[func].drv_mb_header) & DRV_MSG_SEQ_NUMBER_MASK; } @@ -2636,7 +2701,7 @@ bxe_get_function_hwinfo(struct bxe_softc *sc) /* * Fetch the factory configured MAC address for multi function * devices. If this is not a multi-function device then the MAC - * address was already read in the bxe_get_port_hwinfo() routine. + * address was already read in the bxe_hwinfo_port_get() routine. * The MAC addresses used by the port are not the same as the MAC * addressed used by the function. */ @@ -2647,6 +2712,7 @@ bxe_get_function_hwinfo(struct bxe_softc *sc) if ((mac_lo == 0) && (mac_hi == 0)) { BXE_PRINTF("%s(%d): Invalid Ethernet address!\n", __FILE__, __LINE__); + rc = ENODEV; } else { sc->link_params.mac_addr[0] = (u_char)(mac_hi >> 8); sc->link_params.mac_addr[1] = (u_char)(mac_hi); @@ -2658,9 +2724,9 @@ bxe_get_function_hwinfo(struct bxe_softc *sc) } -bxe_get_function_hwinfo_exit: +bxe_hwinfo_function_get_exit: DBEXIT(BXE_VERBOSE_LOAD); - return(rc); + return (rc); } @@ -2674,15 +2740,16 @@ bxe_get_function_hwinfo_exit: * for future use. * * Returns: - * None + * 0 = Success, !0 = Failure */ -static void -bxe_get_port_hwinfo(struct bxe_softc *sc) +static int +bxe_hwinfo_port_get(struct bxe_softc *sc) { - int i, port; + int i, port, rc; uint32_t val, mac_hi, mac_lo; DBENTER(BXE_VERBOSE_LOAD); + rc = 0; port = BP_PORT(sc); sc->link_params.sc = sc; @@ -2736,6 +2803,7 @@ bxe_get_port_hwinfo(struct bxe_softc *sc) if (mac_lo == 0 && mac_hi == 0) { BXE_PRINTF("%s(%d): No Ethernet address programmed on the " "controller!\n", __FILE__, __LINE__); + rc = ENODEV; } else { sc->link_params.mac_addr[0] = (u_char)(mac_hi >> 8); sc->link_params.mac_addr[1] = (u_char)(mac_hi); @@ -2746,6 +2814,7 @@ bxe_get_port_hwinfo(struct bxe_softc *sc) } DBEXIT(BXE_VERBOSE_LOAD); + return (rc); } @@ -2753,17 +2822,22 @@ bxe_get_port_hwinfo(struct bxe_softc *sc) * Get common hardware configuration. * * Multiple port devices such as the BCM57710 have configuration - * information that is specific to each Ethernet port of the controller. + * information that is shared between all ports of the Ethernet + * controller. This function reads that configuration + * information from the bootcode's shared memory and saves it + * for future use. * * Returns: - * None + * 0 = Success, !0 = Failure */ -static void -bxe_get_common_hwinfo(struct bxe_softc *sc) +static int +bxe_hwinfo_common_get(struct bxe_softc *sc) { uint32_t val; + int rc; DBENTER(BXE_VERBOSE_LOAD); + rc = 0; /* Get the chip revision. */ sc->common.chip_id = sc->link_params.chip_id = @@ -2806,10 +2880,12 @@ bxe_get_common_hwinfo(struct bxe_softc *sc) (sc->common.shmem_base < 0xA0000) || (sc->common.shmem_base > 0xC0000)) { - DBPRINT(sc, BXE_FATAL, "%s(): MCP is not active!\n", - __FUNCTION__); + BXE_PRINTF("%s(%d): MCP is not active!\n", + __FILE__, __LINE__); + /* ToDo: Remove the NOMCP support. */ sc->bxe_flags |= BXE_NO_MCP_FLAG; - goto bxe_get_common_hwinfo_exit; + rc = ENODEV; + goto bxe_hwinfo_common_get_exit; } /* Make sure the shared memory contents are valid. */ @@ -2818,7 +2894,8 @@ bxe_get_common_hwinfo(struct bxe_softc *sc) (SHR_MEM_VALIDITY_DEV_INFO | SHR_MEM_VALIDITY_MB)) { BXE_PRINTF("%s(%d): Invalid NVRAM! Bad validity " "signature.\n", __FILE__, __LINE__); - goto bxe_get_common_hwinfo_exit; + rc = ENODEV; + goto bxe_hwinfo_common_get_exit; } /* Read the device configuration from shared memory. */ @@ -2854,11 +2931,13 @@ bxe_get_common_hwinfo(struct bxe_softc *sc) BXE_PRINTF("%s(%d): Warning: This driver needs bootcode " "0x%08X but found 0x%08X, please upgrade!\n", __FILE__, __LINE__, MIN_BXE_BC_VER, sc->common.bc_ver); - goto bxe_get_common_hwinfo_exit; + rc = ENODEV; + goto bxe_hwinfo_common_get_exit; } -bxe_get_common_hwinfo_exit: +bxe_hwinfo_common_get_exit: DBEXIT(BXE_VERBOSE_LOAD); + return (rc); } @@ -2979,51 +3058,45 @@ bxe_undi_unload(struct bxe_softc *sc) * Stops the controller, resets the controller, and releases resources. * * Returns: - * 0 on success, positive value on failure. + * 0 on success, !0 = failure. */ static int bxe_detach(device_t dev) { struct bxe_softc *sc; struct ifnet *ifp; -#ifdef BXE_TASK - struct bxe_fastpath *fp; - int i; -#endif + int rc; sc = device_get_softc(dev); - DBENTER(BXE_VERBOSE_RESET); + DBENTER(BXE_INFO_UNLOAD); + + rc = 0; ifp = sc->bxe_ifp; if (ifp != NULL && ifp->if_vlantrunk != NULL) { BXE_PRINTF("%s(%d): Cannot detach while VLANs are in use.\n", __FILE__, __LINE__); - return(EBUSY); + rc = EBUSY; + goto bxe_detach_exit; } /* Stop and reset the controller if it was open. */ if (sc->state != BXE_STATE_CLOSED) { BXE_CORE_LOCK(sc); - bxe_stop_locked(sc, UNLOAD_CLOSE); + rc = bxe_stop_locked(sc, UNLOAD_CLOSE); BXE_CORE_UNLOCK(sc); } -#ifdef BXE_TASK - /* Free the OS taskqueue resources. */ - for (i = 0; i < sc->num_queues; i++) { - fp = &sc->fp[i]; +#ifdef BXE_DEBUG + /* Free memory buffer for grcdump output.*/ + if (sc->grcdump_buffer != NULL) + free(sc->grcdump_buffer, M_TEMP); +#endif - if (fp->tq) { - taskqueue_drain(fp->tq, &fp->task); - taskqueue_free(fp->tq); - } - } + /* Clean-up any remaining interrupt resources. */ + bxe_interrupt_detach(sc); + bxe_interrupt_free(sc); - if (sc->tq) { - taskqueue_drain(sc->tq, &sc->task); - taskqueue_free(sc->tq); - } -#endif /* Release the network interface. */ if (ifp != NULL) ether_ifdetach(ifp); @@ -3031,8 +3104,15 @@ bxe_detach(device_t dev) /* Release all remaining resources. */ bxe_release_resources(sc); + + /* Free all PCI resources. */ + bxe_pci_resources_free(sc); pci_disable_busmaster(dev); + bxe_mutexes_free(sc); + +bxe_detach_exit: + DBEXIT(BXE_INFO_UNLOAD); return(0); } @@ -3079,9 +3159,8 @@ bxe_stop_leading(struct bxe_softc *sc) uint16_t dsb_sp_prod_idx; int rc, timeout; - DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_RAMROD); - - DBPRINT(sc, BXE_VERBOSE_LOAD, "%s(): Stop client connection " + DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | + BXE_VERBOSE_UNLOAD), "%s(): Stop client connection " "on fp[00].\n", __FUNCTION__); /* Send the ETH_HALT ramrod. */ @@ -3089,26 +3168,24 @@ bxe_stop_leading(struct bxe_softc *sc) bxe_sp_post(sc,RAMROD_CMD_ID_ETH_HALT, 0, 0, sc->fp[0].cl_id, 0); /* Poll for the ETH_HALT ramrod on the leading connection. */ - rc = bxe_wait_ramrod(sc, BXE_FP_STATE_HALTED, 0, &(sc->fp[0].state), 1); - if (rc) + rc = bxe_wait_ramrod(sc, BXE_FP_STATE_HALTED, + 0, &(sc->fp[0].state), 1); + if (rc) { + DBPRINT(sc, BXE_FATAL, "%s(): Timeout waiting for " + "STATE_HALTED ramrod completion!\n", __FUNCTION__); goto bxe_stop_leading_exit; + } + /* Get the default status block SP producer index. */ dsb_sp_prod_idx = *sc->dsb_sp_prod; - /* - * Now that the connection is in the - * HALTED state send PORT_DELETE ramrod. - */ + /* After HALT we send PORT_DELETE ramrod. */ bxe_sp_post(sc, RAMROD_CMD_ID_ETH_PORT_DEL, 0, 0, 0, 1); - /* - * Wait for completion. This can take a * long time if the other port - * is busy. Give the command some time to complete but don't wait for a - * completion since there's nothing we can do. - */ + /* Be patient but don't wait forever. */ timeout = 500; while (dsb_sp_prod_idx == *sc->dsb_sp_prod) { - if (!timeout) { + if (timeout == 0) { DBPRINT(sc, BXE_FATAL, "%s(): Timeout waiting for " "PORT_DEL ramrod completion!\n", __FUNCTION__); rc = EBUSY; @@ -3124,8 +3201,7 @@ bxe_stop_leading(struct bxe_softc *sc) sc->fp[0].state = BXE_FP_STATE_CLOSED; bxe_stop_leading_exit: - DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_RAMROD); - return(rc); + return (rc); } /* @@ -3140,9 +3216,8 @@ bxe_setup_multi(struct bxe_softc *sc, int index) struct bxe_fastpath *fp; int rc; - DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_RAMROD); - - DBPRINT(sc, BXE_VERBOSE_LOAD, "%s(): Setup client connection " + DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | + BXE_VERBOSE_UNLOAD), "%s(): Setup client connection " "on fp[%02d].\n", __FUNCTION__, index); fp = &sc->fp[index]; @@ -3154,10 +3229,9 @@ bxe_setup_multi(struct bxe_softc *sc, int index) bxe_sp_post(sc, RAMROD_CMD_ID_ETH_CLIENT_SETUP, index, 0, fp->cl_id, 0); /* Wait for the ramrod to complete. */ - rc = bxe_wait_ramrod(sc, BXE_FP_STATE_OPEN, index, &(fp->state), 1); + rc = bxe_wait_ramrod(sc, BXE_FP_STATE_OPEN, index, &fp->state, 1); - DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_RAMROD); - return(rc); + return (rc); } /* @@ -3175,9 +3249,8 @@ bxe_stop_multi(struct bxe_softc *sc, int index) struct bxe_fastpath *fp; int rc; - DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_RAMROD); - - DBPRINT(sc, BXE_VERBOSE_LOAD, "%s(): Stop client connection " + DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | + BXE_VERBOSE_UNLOAD), "%s(): Stop client connection " "on fp[%02d].\n", __FUNCTION__, index); fp = &sc->fp[index]; @@ -3186,8 +3259,8 @@ bxe_stop_multi(struct bxe_softc *sc, int index) fp->state = BXE_FP_STATE_HALTING; bxe_sp_post(sc, RAMROD_CMD_ID_ETH_HALT, index, 0, fp->cl_id, 0); - /* Wait for the ramrod completion. */ - rc = bxe_wait_ramrod(sc, BXE_FP_STATE_HALTED, index, &(fp->state), 1); + /* Wait for the HALT ramrod completion. */ + rc = bxe_wait_ramrod(sc, BXE_FP_STATE_HALTED, index, &fp->state, 1); if (rc){ BXE_PRINTF("%s(%d): fp[%02d] client ramrod halt failed!\n", __FILE__, __LINE__, index); @@ -3196,12 +3269,11 @@ bxe_stop_multi(struct bxe_softc *sc, int index) /* Delete the CFC entry. */ bxe_sp_post(sc, RAMROD_CMD_ID_ETH_CFC_DEL, index, 0, 0, 1); - /* Poll for the ramrod completion. */ - rc = bxe_wait_ramrod(sc, BXE_FP_STATE_CLOSED, index, &(fp->state), 1); + /* Poll for the DELETE ramrod completion. */ + rc = bxe_wait_ramrod(sc, BXE_FP_STATE_CLOSED, index, &fp->state, 1); bxe_stop_multi_exit: - DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_RAMROD); - return(rc); + return (rc); } /* @@ -3268,7 +3340,7 @@ bxe__link_reset(struct bxe_softc *sc) { DBENTER(BXE_VERBOSE_PHY); - if (!BP_NOMCP(sc)) { + if (!NOMCP(sc)) { bxe_acquire_phy_lock(sc); bxe_link_reset(&sc->link_params, &sc->link_vars, 1); bxe_release_phy_lock(sc); @@ -3285,7 +3357,7 @@ bxe__link_reset(struct bxe_softc *sc) * Stop the controller. * * Returns: - * Nothing. + * 0 = Success, !0 = Failure */ static int bxe_stop_locked(struct bxe_softc *sc, int unload_mode) @@ -3298,18 +3370,20 @@ bxe_stop_locked(struct bxe_softc *sc, int unload_mode) uint8_t entry, *mac_addr; int count, i, port, rc; - DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); + DBENTER(BXE_INFO_LOAD | BXE_INFO_RESET | BXE_INFO_UNLOAD); + ifp = sc->bxe_ifp; port = BP_PORT(sc), - reset_code = 0; - rc = 0; + rc = reset_code = 0; + + BXE_CORE_LOCK_ASSERT(sc); /* Stop the periodic tick. */ callout_stop(&sc->bxe_tick_callout); sc->state = BXE_STATE_CLOSING_WAIT4_HALT; - /* Stop receiving all types of Ethernet traffic. */ + /* Prevent any further RX traffic. */ sc->rx_mode = BXE_RX_MODE_NONE; bxe_set_storm_rx_mode(sc); @@ -3320,6 +3394,7 @@ bxe_stop_locked(struct bxe_softc *sc, int unload_mode) /* Tell the bootcode to stop watching for a heartbeat. */ SHMEM_WR(sc, func_mb[BP_FUNC(sc)].drv_pulse_mb, (DRV_PULSE_ALWAYS_ALIVE | sc->fw_drv_pulse_wr_seq)); + /* Stop the statistics updates. */ bxe_stats_handle(sc, STATS_EVENT_STOP); @@ -3327,6 +3402,9 @@ bxe_stop_locked(struct bxe_softc *sc, int unload_mode) for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; + if (fp == NULL || fp->tx_pkt_cons_sb == NULL) + break; + count = 1000; while (bxe_has_tx_work(fp)) { @@ -3334,7 +3412,7 @@ bxe_stop_locked(struct bxe_softc *sc, int unload_mode) if (count == 0) { BXE_PRINTF( - "%s(%d): Timeout wating for fp[%d] transmits to complete!\n", + "%s(%d): Timeout wating for fp[%02d] transmits to complete!\n", __FILE__, __LINE__, i); break; } @@ -3351,8 +3429,8 @@ bxe_stop_locked(struct bxe_softc *sc, int unload_mode) /* Disable Interrupts */ bxe_int_disable(sc); - DELAY(1000); + /* Clear the MAC addresses. */ if (CHIP_IS_E1(sc)) { config = BXE_SP(sc, mcast_config); @@ -3376,8 +3454,10 @@ bxe_stop_locked(struct bxe_softc *sc, int unload_mode) REG_WR(sc, MC_HASH_OFFSET(sc, i), 0); REG_WR(sc, MISC_REG_E1HMF_MODE, 0); } + /* Determine if any WoL settings needed. */ if (unload_mode == UNLOAD_NORMAL) + /* Driver initiatied WoL is disabled. */ reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS; else if (sc->bxe_flags & BXE_NO_WOL_FLAG) { /* Driver initiated WoL is disabled, use OOB WoL settings. */ @@ -3398,38 +3478,29 @@ bxe_stop_locked(struct bxe_softc *sc, int unload_mode) /* Prevent WoL. */ reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS; } + /* Stop all non-leading client connections. */ for (i = 1; i < sc->num_queues; i++) { if (bxe_stop_multi(sc, i)){ goto bxe_stop_locked_exit; } } + /* Stop the leading client connection. */ rc = bxe_stop_leading(sc); - if (rc) { -#ifdef BXE_DEBUG - if ((sc->state != BXE_STATE_CLOSING_WAIT4_UNLOAD) || - (sc->fp[0].state != BXE_FP_STATE_CLOSED)) { - BXE_PRINTF("%s(%d): Failed to close leading " - "client connection!\n", __FILE__, __LINE__); - } -#endif - } - DELAY(10000); bxe_stop_locked_exit: - - if (BP_NOMCP(sc)) { + if (NOMCP(sc)) { DBPRINT(sc, BXE_INFO, - "%s(): Old No MCP load counts: %d, %d, %d\n", __FUNCTION__, - load_count[0], load_count[1], load_count[2]); + "%s(): Old No MCP load counts: %d, %d, %d\n", + __FUNCTION__, load_count[0], load_count[1], load_count[2]); load_count[0]--; load_count[1 + port]--; DBPRINT(sc, BXE_INFO, - "%s(): New No MCP load counts: %d, %d, %d\n", __FUNCTION__, - load_count[0], load_count[1], load_count[2]); + "%s(): New No MCP load counts: %d, %d, %d\n", + __FUNCTION__, load_count[0], load_count[1], load_count[2]); if (load_count[0] == 0) reset_code = FW_MSG_CODE_DRV_UNLOAD_COMMON; @@ -3454,32 +3525,31 @@ bxe_stop_locked_exit: DELAY(10000); /* Report UNLOAD_DONE to MCP */ - if (!BP_NOMCP(sc)) + if (!NOMCP(sc)) bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_DONE); sc->port.pmf = 0; /* Free RX chains and buffers. */ - bxe_free_rx_chains(sc); + bxe_clear_rx_chains(sc); /* Free TX chains and buffers. */ - bxe_free_tx_chains(sc); + bxe_clear_tx_chains(sc); sc->state = BXE_STATE_CLOSED; bxe_ack_int(sc); - DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET |BXE_VERBOSE_UNLOAD); - return(rc); + DBEXIT(BXE_INFO_LOAD | BXE_INFO_RESET |BXE_INFO_UNLOAD); + return (rc); } - /* * Device shutdown function. * * Stops and resets the controller. * * Returns: - * Nothing + * 0 = Success, !0 = Failure */ static int bxe_shutdown(device_t dev) @@ -3487,13 +3557,13 @@ bxe_shutdown(device_t dev) struct bxe_softc *sc; sc = device_get_softc(dev); - DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); + DBENTER(BXE_INFO_LOAD | BXE_INFO_RESET | BXE_INFO_UNLOAD); BXE_CORE_LOCK(sc); bxe_stop_locked(sc, UNLOAD_NORMAL); BXE_CORE_UNLOCK(sc); - DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); + DBEXIT(BXE_INFO_LOAD | BXE_INFO_RESET | BXE_INFO_UNLOAD); return (0); } @@ -3571,7 +3641,9 @@ bxe__link_status_update(struct bxe_softc *sc) bxe_stats_handle(sc, STATS_EVENT_LINK_UP); else bxe_stats_handle(sc, STATS_EVENT_STOP); + bxe_read_mf_cfg(sc); + /* Indicate link status. */ bxe_link_report(sc); @@ -3630,7 +3702,7 @@ bxe_initial_phy_init(struct bxe_softc *sc) DBENTER(BXE_VERBOSE_PHY); rc = 0; - if (!BP_NOMCP(sc)) { + if (!NOMCP(sc)) { /* * It is recommended to turn off RX flow control for 5771x @@ -3646,6 +3718,7 @@ bxe_initial_phy_init(struct bxe_softc *sc) bxe_release_phy_lock(sc); bxe_calc_fc_adv(sc); + if (sc->link_vars.link_up) { bxe_stats_handle(sc,STATS_EVENT_LINK_UP); bxe_link_report(sc); @@ -3673,9 +3746,10 @@ static int bxe_alloc_buf_rings(struct bxe_softc *sc) { struct bxe_fastpath *fp; - int i, rc = 0; + int i, rc; DBENTER(BXE_VERBOSE_LOAD); + rc = 0; for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; @@ -3685,14 +3759,15 @@ bxe_alloc_buf_rings(struct bxe_softc *sc) M_DEVBUF, M_DONTWAIT, &fp->mtx); if (fp->br == NULL) { rc = ENOMEM; - return(rc); + goto bxe_alloc_buf_rings_exit; } } else BXE_PRINTF("%s(%d): Bug!\n", __FILE__, __LINE__); } +bxe_alloc_buf_rings_exit: DBEXIT(BXE_VERBOSE_LOAD); - return(rc); + return (rc); } /* @@ -3737,9 +3812,9 @@ bxe_init_locked(struct bxe_softc *sc, int load_mode) { struct ifnet *ifp; uint32_t load_code; - int i, port; + int error, i, port; - DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); + DBENTER(BXE_INFO_LOAD | BXE_INFO_RESET); BXE_CORE_LOCK_ASSERT(sc); @@ -3753,7 +3828,7 @@ bxe_init_locked(struct bxe_softc *sc, int load_mode) /* Check if the driver is still running and bail out if it is. */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - DBPRINT(sc, BXE_INFO, + DBPRINT(sc, BXE_WARN, "%s(): Init called while driver is running!\n", __FUNCTION__); goto bxe_init_locked_exit; @@ -3770,7 +3845,7 @@ bxe_init_locked(struct bxe_softc *sc, int load_mode) */ sc->state = BXE_STATE_OPENING_WAIT4_LOAD; - if (BP_NOMCP(sc)) { + if (NOMCP(sc)) { port = BP_PORT(sc); DBPRINT(sc, BXE_INFO, @@ -3817,7 +3892,8 @@ bxe_init_locked(struct bxe_softc *sc, int load_mode) sc->intr_sem = 1; /* Initialize hardware. */ - if (bxe_init_hw(sc, load_code)){ + error = bxe_init_hw(sc, load_code); + if (error != 0){ BXE_PRINTF("%s(%d): Hardware initialization failed, " "aborting!\n", __FILE__, __LINE__); goto bxe_init_locked_failed1; @@ -3826,6 +3902,7 @@ bxe_init_locked(struct bxe_softc *sc, int load_mode) /* Calculate and save the Ethernet MTU size. */ sc->port.ether_mtu = ifp->if_mtu + ETHER_HDR_LEN + (ETHER_VLAN_ENCAP_LEN * 2) + ETHER_CRC_LEN + 4; + DBPRINT(sc, BXE_INFO, "%s(): Setting MTU = %d\n", __FUNCTION__, sc->port.ether_mtu); @@ -3836,12 +3913,18 @@ bxe_init_locked(struct bxe_softc *sc, int load_mode) sc->mbuf_alloc_size = PAGE_SIZE; else sc->mbuf_alloc_size = MJUM9BYTES; + DBPRINT(sc, BXE_INFO, "%s(): mbuf_alloc_size = %d, " "max_frame_size = %d\n", __FUNCTION__, sc->mbuf_alloc_size, sc->port.ether_mtu); /* Setup NIC internals and enable interrupts. */ - bxe_init_nic(sc, load_code); + error = bxe_init_nic(sc, load_code); + if (error != 0) { + BXE_PRINTF("%s(%d): NIC initialization failed, " + "aborting!\n", __FILE__, __LINE__); + goto bxe_init_locked_failed1; + } if ((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) && (sc->common.shmem2_base)){ @@ -3855,7 +3938,8 @@ bxe_init_locked(struct bxe_softc *sc, int load_mode) #if __FreeBSD_version >= 800000 /* Allocate buffer rings for multiqueue operation. */ - if (bxe_alloc_buf_rings(sc)) { + error = bxe_alloc_buf_rings(sc); + if (error != 0) { BXE_PRINTF("%s(%d): Buffer ring initialization failed, " "aborting!\n", __FILE__, __LINE__); goto bxe_init_locked_failed1; @@ -3863,7 +3947,7 @@ bxe_init_locked(struct bxe_softc *sc, int load_mode) #endif /* Tell MCP that driver load is done. */ - if (!BP_NOMCP(sc)) { + if (!NOMCP(sc)) { load_code = bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE); if (!load_code) { BXE_PRINTF("%s(%d): Driver load failed! No MCP " @@ -3878,10 +3962,12 @@ bxe_init_locked(struct bxe_softc *sc, int load_mode) sc->intr_sem = 0; /* Setup the leading connection for the controller. */ - if (bxe_setup_leading(sc)) + error = bxe_setup_leading(sc); + if (error != 0) { DBPRINT(sc, BXE_FATAL, "%s(): Initial PORT_SETUP ramrod " "failed. State is not OPEN!\n", __FUNCTION__); - + goto bxe_init_locked_failed3; + } if (CHIP_IS_E1H(sc)) { if (sc->mf_config[BP_E1HVN(sc)] & FUNC_MF_CFG_FUNC_DISABLED) { @@ -3917,7 +4003,6 @@ bxe_init_locked(struct bxe_softc *sc, int load_mode) else bxe_set_mac_addr_e1h(sc, 1); - DELAY(1000); /* Perform PHY initialization for the primary port. */ @@ -3950,7 +4035,7 @@ bxe_init_locked(struct bxe_softc *sc, int load_mode) bxe__link_status_update(sc); DELAY(1000); - /* Tell the stack the driver is running and the TX queue is open. */ + /* Tell the stack the driver is running. */ ifp->if_drv_flags = IFF_DRV_RUNNING; /* Schedule our periodic timer tick. */ @@ -3958,23 +4043,20 @@ bxe_init_locked(struct bxe_softc *sc, int load_mode) /* Everything went OK, go ahead and exit. */ goto bxe_init_locked_exit; - /* Try and gracefully shutdown the device because of a failure. */ bxe_init_locked_failed4: - + /* Try and gracefully shutdown the device because of a failure. */ for (i = 1; i < sc->num_queues; i++) bxe_stop_multi(sc, i); +bxe_init_locked_failed3: bxe_stop_leading(sc); - bxe_stats_handle(sc, STATS_EVENT_STOP); bxe_init_locked_failed2: - bxe_int_disable(sc); bxe_init_locked_failed1: - - if (!BP_NOMCP(sc)) { + if (!NOMCP(sc)) { bxe_fw_command(sc, DRV_MSG_CODE_LOAD_DONE); bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_REQ_WOL_MCP); bxe_fw_command(sc, DRV_MSG_CODE_UNLOAD_DONE); @@ -3985,11 +4067,10 @@ bxe_init_locked_failed1: bxe_free_buf_rings(sc); #endif - DBPRINT(sc, BXE_INFO, "%s(): Initialization failed!\n", __FUNCTION__); + DBPRINT(sc, BXE_WARN, "%s(): Initialization failed!\n", __FUNCTION__); bxe_init_locked_exit: - - DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); + DBEXIT(BXE_INFO_LOAD | BXE_INFO_RESET); } /* @@ -4039,7 +4120,7 @@ bxe_wait_ramrod(struct bxe_softc *sc, int state, int idx, int *state_p, } /* We timed out polling for a completion. */ - DBPRINT(sc, BXE_FATAL, "%s(): Timeout %s for state 0x%08X on fp[%d]. " + DBPRINT(sc, BXE_FATAL, "%s(): Timeout %s for state 0x%08X on fp[%02d]. " "Got 0x%x instead\n", __FUNCTION__, poll ? "polling" : "waiting", state, idx, *state_p); @@ -4060,7 +4141,7 @@ bxe_write_dmae_phys_len(struct bxe_softc *sc, bus_addr_t phys_addr, uint32_t addr, uint32_t len) { int dmae_wr_max, offset; - DBENTER(BXE_VERBOSE_LOAD); + DBENTER(BXE_INSANE_REGS); dmae_wr_max = DMAE_LEN32_WR_MAX(sc); offset = 0; @@ -4071,7 +4152,7 @@ bxe_write_dmae_phys_len(struct bxe_softc *sc, bus_addr_t phys_addr, len -= dmae_wr_max; } bxe_write_dmae(sc, phys_addr + offset, addr + offset, len); - DBEXIT(BXE_VERBOSE_LOAD); + DBEXIT(BXE_INSANE_REGS); } @@ -4119,17 +4200,17 @@ bxe_init_ind_wr(struct bxe_softc *sc, uint32_t addr, const uint32_t *data, static void bxe_write_big_buf(struct bxe_softc *sc, uint32_t addr, uint32_t len) { - DBENTER(BXE_VERBOSE_LOAD); -#ifdef USE_DMAE + DBENTER(BXE_INSANE_REGS); +#ifdef BXE_USE_DMAE if (sc->dmae_ready) - bxe_write_dmae_phys_len(sc, sc->gunzip_mapping, addr, len); + bxe_write_dmae_phys_len(sc, sc->gz_dma.paddr, addr, len); else - bxe_init_str_wr(sc, addr, sc->gunzip_buf, len); + bxe_init_str_wr(sc, addr, sc->gz, len); #else - bxe_init_str_wr(sc, addr, sc->gunzip_buf, len); + bxe_init_str_wr(sc, addr, sc->gz, len); #endif - DBEXIT(BXE_VERBOSE_LOAD); + DBEXIT(BXE_INSANE_REGS); } /* @@ -4148,9 +4229,9 @@ bxe_init_fill(struct bxe_softc *sc, uint32_t addr, int fill, uint32_t len) DBENTER(BXE_VERBOSE_LOAD); - length = (((len * 4) > FW_BUF_SIZE) ? FW_BUF_SIZE : (len * 4)); + length = (((len * 4) > BXE_FW_BUF_SIZE) ? BXE_FW_BUF_SIZE : (len * 4)); leftovers = length / 4; - memset(sc->gunzip_buf, fill, length); + memset(sc->gz, fill, length); for (i = 0; i < len; i += leftovers) { cur_len = min(leftovers, len - i); @@ -4173,13 +4254,15 @@ bxe_init_wr_64(struct bxe_softc *sc, uint32_t addr, const uint32_t *data, uint32_t buf_len32, cur_len, len; int i; - buf_len32 = FW_BUF_SIZE / 4; + DBENTER(BXE_INSANE_REGS); + + buf_len32 = BXE_FW_BUF_SIZE / 4; len = len64 * 2; /* 64 bit value is in a blob: first low DWORD, then high DWORD. */ data64 = HILO_U64((*(data + 1)), (*data)); - len64 = min((uint32_t)(FW_BUF_SIZE / 8), len64); + len64 = min((uint32_t)(BXE_FW_BUF_SIZE / 8), len64); for (i = 0; i < len64; i++) { - pdata = ((uint64_t *)(sc->gunzip_buf)) + i; + pdata = ((uint64_t *)(sc->gz)) + i; *pdata = data64; } @@ -4187,6 +4270,8 @@ bxe_init_wr_64(struct bxe_softc *sc, uint32_t addr, const uint32_t *data, cur_len = min(buf_len32, len - i); bxe_write_big_buf(sc, addr + i*4, cur_len); } + + DBEXIT(BXE_INSANE_REGS); } @@ -4247,15 +4332,15 @@ static void bxe_write_big_buf_wb(struct bxe_softc *sc, uint32_t addr, uint32_t len) { if (sc->dmae_ready) - bxe_write_dmae_phys_len(sc, sc->gunzip_mapping, addr, len); + bxe_write_dmae_phys_len(sc, sc->gz_dma.paddr, addr, len); else - bxe_init_ind_wr(sc, addr, sc->gunzip_buf, len); + bxe_init_ind_wr(sc, addr, sc->gz, len); } #define VIRT_WR_DMAE_LEN(sc, data, addr, len32, le32_swap) \ do { \ - memcpy(sc->gunzip_buf, data, (len32)*4); \ + memcpy(sc->gz, data, (len32)*4); \ bxe_write_big_buf_wb(sc, addr, len32); \ } while (0) @@ -4271,7 +4356,7 @@ bxe_init_wr_wb(struct bxe_softc *sc, uint32_t addr, const uint32_t *data, { const uint32_t *old_data; - DBENTER(BXE_VERBOSE_LOAD); + DBENTER(BXE_INSANE_REGS); old_data = data; data = (const uint32_t *)bxe_sel_blob(sc, addr, (const uint8_t *)data); if (sc->dmae_ready) { @@ -4282,7 +4367,7 @@ bxe_init_wr_wb(struct bxe_softc *sc, uint32_t addr, const uint32_t *data, } else bxe_init_ind_wr(sc, addr, data, len); - DBEXIT(BXE_VERBOSE_LOAD); + DBEXIT(BXE_INSANE_REGS); } static void @@ -4316,6 +4401,8 @@ bxe_init_block(struct bxe_softc *sc, uint32_t block, uint32_t stage) uint16_t op_end, op_start; int hw_wr; + DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); + op_start = INIT_OPS_OFFSETS(sc)[BLOCK_OPS_IDX(block, stage, STAGE_START)]; op_end = INIT_OPS_OFFSETS(sc)[BLOCK_OPS_IDX(block, stage, STAGE_END)]; @@ -4370,11 +4457,14 @@ bxe_init_block(struct bxe_softc *sc, uint32_t block, uint32_t stage) break; } } + + DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); } /* * Handles controller initialization when called from an unlocked routine. * ifconfig calls this function. + * * Returns: * None. */ @@ -4384,16 +4474,12 @@ bxe_init(void *xsc) struct bxe_softc *sc; sc = xsc; - DBENTER(BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); BXE_CORE_LOCK(sc); bxe_init_locked(sc, LOAD_NORMAL); BXE_CORE_UNLOCK(sc); - - DBEXIT(BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); } - /* * Release all resources used by the driver. * @@ -4407,7 +4493,6 @@ static void bxe_release_resources(struct bxe_softc *sc) { device_t dev; - int i; DBENTER(BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); @@ -4417,57 +4502,14 @@ bxe_release_resources(struct bxe_softc *sc) if (sc->bxe_ifp != NULL) if_free(sc->bxe_ifp); - /* Release interrupt resources. */ - bxe_interrupt_detach(sc); - - if ((sc->bxe_flags & BXE_USING_MSIX_FLAG) && sc->msix_count) { - - for (i = 0; i < sc->msix_count; i++) { - DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | - BXE_VERBOSE_INTR), "%s(): Releasing MSI-X[%d] " - "vector.\n", __FUNCTION__, i); - if (sc->bxe_msix_res[i] && sc->bxe_msix_rid[i]) - bus_release_resource(dev, SYS_RES_IRQ, - sc->bxe_msix_rid[i], sc->bxe_msix_res[i]); - } - - pci_release_msi(dev); - - } else if ((sc->bxe_flags & BXE_USING_MSI_FLAG) && sc->msi_count) { - - for (i = 0; i < sc->msi_count; i++) { - DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | - BXE_VERBOSE_INTR), "%s(): Releasing MSI[%d] " - "vector.\n", __FUNCTION__, i); - if (sc->bxe_msi_res[i] && sc->bxe_msi_rid[i]) - bus_release_resource(dev, SYS_RES_IRQ, - sc->bxe_msi_rid[i], sc->bxe_msi_res[i]); - } - - pci_release_msi(dev); - - } else { - - DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | - BXE_VERBOSE_INTR), "%s(): Releasing legacy interrupt.\n", - __FUNCTION__); - if (sc->bxe_irq_res != NULL) - bus_release_resource(dev, SYS_RES_IRQ, - sc->bxe_irq_rid, sc->bxe_irq_res); - } - /* Free the DMA resources. */ - bxe_dma_free(sc); - - bxe_release_pci_resources(sc); + bxe_host_structures_free(sc); #if __FreeBSD_version >= 800000 /* Free multiqueue buffer rings. */ bxe_free_buf_rings(sc); #endif - /* Free remaining fastpath resources. */ - bxe_free_mutexes(sc); } @@ -4484,7 +4526,7 @@ bxe_release_resources(struct bxe_softc *sc) static void bxe_reg_wr_ind(struct bxe_softc *sc, uint32_t offset, uint32_t val) { - DBPRINT(sc, BXE_INSANE, "%s(); offset = 0x%08X, val = 0x%08X\n", + DBPRINT(sc, BXE_INSANE_REGS, "%s(); offset = 0x%08X, val = 0x%08X\n", __FUNCTION__, offset, val); pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, offset, 4); @@ -4518,7 +4560,7 @@ bxe_reg_rd_ind(struct bxe_softc *sc, uint32_t offset) pci_write_config(sc->dev, PCICFG_GRC_ADDRESS, PCICFG_VENDOR_ID_OFFSET, 4); - DBPRINT(sc, BXE_INSANE, "%s(); offset = 0x%08X, val = 0x%08X\n", + DBPRINT(sc, BXE_INSANE_REGS, "%s(); offset = 0x%08X, val = 0x%08X\n", __FUNCTION__, offset, val); return (val); } @@ -4548,7 +4590,7 @@ bxe_post_dmae(struct bxe_softc *sc, struct dmae_command *dmae, int idx) for (i = 0; i < (sizeof(struct dmae_command) / 4); i++) { REG_WR(sc, cmd_offset + i * 4, *(((uint32_t *)dmae) + i)); - DBPRINT(sc, BXE_INSANE, "%s(): DMAE cmd[%d].%d : 0x%08X\n", + DBPRINT(sc, BXE_INSANE_REGS, "%s(): DMAE cmd[%d].%d : 0x%08X\n", __FUNCTION__, idx, i, cmd_offset + i * 4); } @@ -4666,7 +4708,7 @@ bxe_read_dmae(struct bxe_softc *sc, uint32_t src_addr, uint32_t *data, *wb_comp; int i, timeout; - DBENTER(BXE_INSANE); + DBENTER(BXE_INSANE_REGS); wb_comp = BXE_SP(sc, wb_comp); /* Fall back to indirect access if DMAE is not ready. */ @@ -4728,7 +4770,7 @@ bxe_read_dmae(struct bxe_softc *sc, uint32_t src_addr, BXE_DMAE_UNLOCK(sc); bxe_read_dmae_exit: - DBEXIT(BXE_INSANE); + DBEXIT(BXE_INSANE_REGS); } /* @@ -4962,7 +5004,7 @@ bxe_int_enable(struct bxe_softc *sc) port = BP_PORT(sc); hc_addr = port ? HC_REG_CONFIG_1 : HC_REG_CONFIG_0; val = REG_RD(sc, hc_addr); - if (sc->bxe_flags & BXE_USING_MSIX_FLAG) { + if (sc->msix_count > 0) { if (sc->msix_count == 1) { /* Single interrupt, multiple queues.*/ @@ -4993,7 +5035,7 @@ bxe_int_enable(struct bxe_softc *sc) HC_CONFIG_0_REG_ATTN_BIT_EN_0); } - } else if (sc->bxe_flags & BXE_USING_MSI_FLAG) { + } else if (sc->msi_count > 0) { if (sc->msi_count == 1) { @@ -5080,7 +5122,7 @@ bxe_int_disable(struct bxe_softc *sc) uint32_t hc_addr, val; int port; - DBENTER(BXE_VERBOSE_INTR); + DBENTER(BXE_VERBOSE_INTR | BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); port = BP_PORT(sc); hc_addr = port ? HC_REG_CONFIG_1 : HC_REG_CONFIG_0; @@ -5097,7 +5139,7 @@ bxe_int_disable(struct bxe_softc *sc) __FUNCTION__, val); } - DBEXIT(BXE_VERBOSE_INTR); + DBEXIT(BXE_VERBOSE_INTR | BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); } #define BXE_CRC32_RESIDUAL 0xdebb20e3 @@ -5592,10 +5634,6 @@ bxe_ack_sb(struct bxe_softc *sc, uint8_t sb_id, uint8_t storm, uint16_t index, struct igu_ack_register igu_ack; uint32_t hc_addr; - DBPRINT(sc, BXE_VERBOSE_INTR, "%s(): sb_id = %d, storm = %d, " - "index = %d, int_mode = %d, update = %d.\n", __FUNCTION__, sb_id, - storm, index, int_mode, update); - hc_addr = (HC_REG_COMMAND_REG + BP_PORT(sc) * 32 + COMMAND_REG_INT_ACK); igu_ack.status_block_index = index; igu_ack.sb_id_and_flags = @@ -5605,11 +5643,6 @@ bxe_ack_sb(struct bxe_softc *sc, uint8_t sb_id, uint8_t storm, uint16_t index, (int_mode << IGU_ACK_REGISTER_INTERRUPT_MODE_SHIFT)); rmb(); - - DBPRINT(sc, BXE_VERBOSE_INTR, - "%s(): Writing 0x%08X to HC addr 0x%08X\n", __FUNCTION__, - (*(uint32_t *) &igu_ack), hc_addr); - REG_WR(sc, hc_addr, (*(uint32_t *) &igu_ack)); wmb(); } @@ -5618,7 +5651,8 @@ bxe_ack_sb(struct bxe_softc *sc, uint8_t sb_id, uint8_t storm, uint16_t index, * Update fastpath status block index. * * Returns: - * 0 + * 0 = Nu completes, 1 = TX completes, 2 = RX completes, + * 3 = RX & TX completes */ static __inline uint16_t bxe_update_fpsb_idx(struct bxe_fastpath *fp) @@ -5686,7 +5720,7 @@ bxe_sp_event(struct bxe_fastpath *fp, union eth_rx_cqe *rr_cqe) cid = SW_CID(rr_cqe->ramrod_cqe.conn_and_cmd_data); command = CQE_CMD(rr_cqe->ramrod_cqe.conn_and_cmd_data); DBPRINT(sc, BXE_VERBOSE_RAMROD, "%s(): CID = %d, ramrod command = %d, " - "device state = 0x%08X, fp[%d].state = 0x%08X, type = %d\n", + "device state = 0x%08X, fp[%02d].state = 0x%08X, type = %d\n", __FUNCTION__, cid, command, sc->state, fp->index, fp->state, rr_cqe->ramrod_cqe.ramrod_type); @@ -5699,13 +5733,13 @@ bxe_sp_event(struct bxe_fastpath *fp, union eth_rx_cqe *rr_cqe) switch (command | fp->state) { case (RAMROD_CMD_ID_ETH_CLIENT_SETUP | BXE_FP_STATE_OPENING): DBPRINT(sc, BXE_VERBOSE_RAMROD, - "%s(): Completed fp[%d] CLIENT_SETUP Ramrod.\n", + "%s(): Completed fp[%02d] CLIENT_SETUP Ramrod.\n", __FUNCTION__, cid); fp->state = BXE_FP_STATE_OPEN; break; case (RAMROD_CMD_ID_ETH_HALT | BXE_FP_STATE_HALTING): DBPRINT(sc, BXE_VERBOSE_RAMROD, - "%s(): Completed fp[%d] ETH_HALT ramrod\n", + "%s(): Completed fp[%02d] ETH_HALT ramrod\n", __FUNCTION__, cid); fp->state = BXE_FP_STATE_HALTED; break; @@ -5734,7 +5768,7 @@ bxe_sp_event(struct bxe_fastpath *fp, union eth_rx_cqe *rr_cqe) break; case (RAMROD_CMD_ID_ETH_CFC_DEL | BXE_STATE_CLOSING_WAIT4_HALT): DBPRINT(sc, BXE_VERBOSE_RAMROD, - "%s(): Completed fp[%d] ETH_CFC_DEL ramrod.\n", + "%s(): Completed fp[%02d] ETH_CFC_DEL ramrod.\n", __FUNCTION__, cid); sc->fp[cid].state = BXE_FP_STATE_CLOSED; break; @@ -5787,7 +5821,7 @@ bxe_acquire_hw_lock(struct bxe_softc *sc, uint32_t resource) /* Validating that the resource is within range. */ if (resource > HW_LOCK_MAX_RESOURCE_VALUE) { - DBPRINT(sc, BXE_INFO, "%s(): Resource is out of range! " + DBPRINT(sc, BXE_WARN, "%s(): Resource is out of range! " "resource(0x%08X) > HW_LOCK_MAX_RESOURCE_VALUE(0x%08X)\n", __FUNCTION__, resource, HW_LOCK_MAX_RESOURCE_VALUE); rc = EINVAL; @@ -5797,7 +5831,7 @@ bxe_acquire_hw_lock(struct bxe_softc *sc, uint32_t resource) /* Validating that the resource is not already taken. */ lock_status = REG_RD(sc, hw_lock_control_reg); if (lock_status & resource_bit) { - DBPRINT(sc, BXE_INFO, "%s(): Failed to acquire lock! " + DBPRINT(sc, BXE_WARN, "%s(): Failed to acquire lock! " "lock_status = 0x%08X, resource_bit = 0x%08X\n", __FUNCTION__, lock_status, resource_bit); rc = EEXIST; @@ -5815,7 +5849,7 @@ bxe_acquire_hw_lock(struct bxe_softc *sc, uint32_t resource) DELAY(5000); } - DBPRINT(sc, BXE_INFO, "%s(): Timeout!\n", __FUNCTION__); + DBPRINT(sc, BXE_WARN, "%s(): Timeout!\n", __FUNCTION__); rc = EAGAIN; bxe_acquire_hw_lock_exit: @@ -5846,7 +5880,7 @@ bxe_release_hw_lock(struct bxe_softc *sc, uint32_t resource) rc = 0; /* Validating that the resource is within range */ if (resource > HW_LOCK_MAX_RESOURCE_VALUE) { - DBPRINT(sc, BXE_INFO, "%s(): Resource is out of range! " + DBPRINT(sc, BXE_WARN, "%s(): Resource is out of range! " "resource(0x%08X) > HW_LOCK_MAX_RESOURCE_VALUE(0x%08X)\n", __FUNCTION__, resource, HW_LOCK_MAX_RESOURCE_VALUE); rc = EINVAL; @@ -5861,7 +5895,7 @@ bxe_release_hw_lock(struct bxe_softc *sc, uint32_t resource) /* Validating that the resource is currently taken */ lock_status = REG_RD(sc, hw_lock_control_reg); if (!(lock_status & resource_bit)) { - DBPRINT(sc, BXE_INFO, "%s(): The resource is not currently " + DBPRINT(sc, BXE_WARN, "%s(): The resource is not currently " "locked! lock_status = 0x%08X, resource_bit = 0x%08X\n", __FUNCTION__, lock_status, resource_bit); rc = EFAULT; @@ -6045,15 +6079,13 @@ bxe_set_spio(struct bxe_softc *sc, int spio_num, uint32_t mode) uint32_t spio_reg, spio_mask; int rc; - DBENTER(BXE_VERBOSE_MISC); - rc = 0; spio_mask = 1 << spio_num; /* Validate the SPIO. */ if ((spio_num < MISC_REGISTERS_SPIO_4) || (spio_num > MISC_REGISTERS_SPIO_7)) { - DBPRINT(sc, BXE_FATAL, "%s(): Invalid SPIO (%d)!\n", + DBPRINT(sc, BXE_WARN, "%s(): Invalid SPIO (%d)!\n", __FUNCTION__, spio_num); rc = EINVAL; goto bxe_set_spio_exit; @@ -6071,24 +6103,24 @@ bxe_set_spio(struct bxe_softc *sc, int spio_num, uint32_t mode) switch (mode) { case MISC_REGISTERS_SPIO_OUTPUT_LOW : - DBPRINT(sc, BXE_INFO, "%s(): Set SPIO %d -> output low\n", - __FUNCTION__, spio_num); + DBPRINT(sc, BXE_VERBOSE_MISC, "%s(): Set SPIO %d -> " + "output low\n", __FUNCTION__, spio_num); spio_reg &= ~(spio_mask << MISC_REGISTERS_SPIO_FLOAT_POS); spio_reg |= (spio_mask << MISC_REGISTERS_SPIO_CLR_POS); break; case MISC_REGISTERS_SPIO_OUTPUT_HIGH : - DBPRINT(sc, BXE_INFO, "%s(): Set SPIO %d -> output high\n", - __FUNCTION__, spio_num); + DBPRINT(sc, BXE_VERBOSE_MISC, "%s(): Set SPIO %d -> " + "output high\n", __FUNCTION__, spio_num); spio_reg &= ~(spio_mask << MISC_REGISTERS_SPIO_FLOAT_POS); spio_reg |= (spio_mask << MISC_REGISTERS_SPIO_SET_POS); break; case MISC_REGISTERS_SPIO_INPUT_HI_Z: - DBPRINT(sc, BXE_INFO, "%s(): Set SPIO %d -> input\n", - __FUNCTION__, spio_num); + DBPRINT(sc, BXE_VERBOSE_MISC, "%s(): Set SPIO %d -> " + "input\n", __FUNCTION__, spio_num); spio_reg |= (spio_mask << MISC_REGISTERS_SPIO_FLOAT_POS); break; default: - DBPRINT(sc, BXE_FATAL, "%s(): Unknown SPIO mode (0x%08X)!\n", + DBPRINT(sc, BXE_WARN, "%s(): Unknown SPIO mode (0x%08X)!\n", __FUNCTION__, mode); break; } @@ -6101,7 +6133,6 @@ bxe_set_spio(struct bxe_softc *sc, int spio_num, uint32_t mode) } bxe_set_spio_exit: - DBEXIT(BXE_VERBOSE_MISC); return (rc); } @@ -6202,9 +6233,6 @@ bxe_link_attn(struct bxe_softc *sc) bxe_stats_handle(sc, STATS_EVENT_LINK_UP); } - /* Report the new link status. */ - bxe_link_report(sc); - /* Need additional handling for multi-function devices. */ if (IS_E1HMF(sc)) { port = BP_PORT(sc); @@ -6254,11 +6282,9 @@ bxe_pmf_update(struct bxe_softc *sc) uint32_t val; int port; - DBENTER(BXE_VERBOSE_INTR); - /* Record that this driver instance is managing the port. */ sc->port.pmf = 1; - DBPRINT(sc, BXE_INFO, "%s(): Enabling port management function.\n", + DBPRINT(sc, BXE_INFO, "%s(): Enabling this port as PMF.\n", __FUNCTION__); /* Enable NIG attention. */ @@ -6268,8 +6294,6 @@ bxe_pmf_update(struct bxe_softc *sc) REG_WR(sc, HC_REG_LEADING_EDGE_0 + port * 8, val); bxe_stats_handle(sc, STATS_EVENT_PMF); - - DBEXIT(BXE_VERBOSE_INTR); } /* 8073 Download definitions */ @@ -6376,9 +6400,9 @@ bxe_sp_post(struct bxe_softc *sc, int command, int cid, uint32_t data_hi, { int func, rc; - DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_RAMROD); - - DBRUNMSG(BXE_VERBOSE_RAMROD, bxe_decode_ramrod_cmd(sc, command)); + DBRUNMSG((BXE_EXTREME_LOAD | BXE_EXTREME_RESET | + BXE_EXTREME_UNLOAD | BXE_EXTREME_RAMROD), + bxe_decode_ramrod_cmd(sc, command)); DBPRINT(sc, BXE_VERBOSE_RAMROD, "%s(): cid = %d, data_hi = 0x%08X, " "data_low = 0x%08X, remaining spq entries = %d\n", __FUNCTION__, @@ -6437,8 +6461,6 @@ bxe_sp_post(struct bxe_softc *sc, int command, int cid, uint32_t data_hi, bxe_sp_post_exit: BXE_SP_UNLOCK(sc); - DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_RAMROD); - return (rc); } @@ -6512,7 +6534,7 @@ bxe_update_dsb_idx(struct bxe_softc *sc) uint16_t rc; rc = 0; - dsb = sc->def_status_block; + dsb = sc->def_sb; /* Read memory barrier since block is written by hardware. */ rmb(); @@ -7130,9 +7152,9 @@ bxe_attn_int(struct bxe_softc* sc) DBENTER(BXE_VERBOSE_INTR); - attn_bits = le32toh(sc->def_status_block->atten_status_block.attn_bits); + attn_bits = le32toh(sc->def_sb->atten_status_block.attn_bits); attn_ack = - le32toh(sc->def_status_block->atten_status_block.attn_bits_ack); + le32toh(sc->def_sb->atten_status_block.attn_bits_ack); attn_state = sc->attn_state; asserted = attn_bits & ~attn_ack & ~attn_state; deasserted = ~attn_bits & attn_ack & attn_state; @@ -7262,7 +7284,7 @@ bxe_attn_int(struct bxe_softc* sc) #ifdef __i386__ #define BITS_PER_LONG 32 -#else /*Only support x86_64(AMD64 and EM64T)*/ +#else #define BITS_PER_LONG 64 #endif @@ -7290,19 +7312,19 @@ static void bxe_stats_storm_post(struct bxe_softc *sc) { struct eth_query_ramrod_data ramrod_data = {0}; - int rc; + int i, rc; DBENTER(BXE_INSANE_STATS); if (!sc->stats_pending) { ramrod_data.drv_counter = sc->stats_counter++; ramrod_data.collect_port = sc->port.pmf ? 1 : 0; - ramrod_data.ctr_id_vector = (1 << BP_CL_ID(sc)); + for (i = 0; i < sc->num_queues; i++) + ramrod_data.ctr_id_vector |= (1 << sc->fp[i].cl_id); rc = bxe_sp_post(sc, RAMROD_CMD_ID_ETH_STAT_QUERY, 0, ((uint32_t *)&ramrod_data)[1], ((uint32_t *)&ramrod_data)[0], 0); - if (rc == 0) { /* Stats ramrod has it's own slot on the SPQ. */ sc->spq_left++; @@ -7313,22 +7335,32 @@ bxe_stats_storm_post(struct bxe_softc *sc) DBEXIT(BXE_INSANE_STATS); } +/* + * Setup the adrress used by the driver to report port-based statistics + * back to the controller. + * + * Returns: + * None. + */ static void bxe_stats_port_base_init(struct bxe_softc *sc) { uint32_t *stats_comp; struct dmae_command *dmae; - if (!sc->port.pmf || !sc->port.port_stx) { + DBENTER(BXE_VERBOSE_STATS); + + /* Only the port management function (PMF) does this work. */ + if ((sc->port.pmf == 0) || !sc->port.port_stx) { BXE_PRINTF("%s(%d): Invalid statistcs port setup!\n", __FILE__, __LINE__); - return; + goto bxe_stats_port_base_init_exit; } stats_comp = BXE_SP(sc, stats_comp); + sc->executer_idx = 0; - sc->executer_idx = 0; /* dmae clients */ - + /* DMA the address of the drivers port statistics block. */ dmae = BXE_SP(sc, dmae[sc->executer_idx++]); dmae->opcode = (DMAE_CMD_SRC_PCI | DMAE_CMD_DST_GRC | DMAE_CMD_C_DST_PCI | DMAE_CMD_C_ENABLE | @@ -7352,8 +7384,18 @@ bxe_stats_port_base_init(struct bxe_softc *sc) *stats_comp = 0; bxe_stats_hw_post(sc); bxe_stats_comp(sc); + +bxe_stats_port_base_init_exit: + DBEXIT(BXE_VERBOSE_STATS); } +/* + * Setup the adrress used by the driver to report function-based statistics + * back to the controller. + * + * Returns: + * None. + */ static void bxe_stats_func_base_init(struct bxe_softc *sc) { @@ -7361,12 +7403,22 @@ bxe_stats_func_base_init(struct bxe_softc *sc) int vn, vn_max; uint32_t func_stx; + DBENTER(BXE_VERBOSE_STATS); + + /* Only the port management function (PMF) does this work. */ + if ((sc->port.pmf == 0) || !sc->func_stx) { + BXE_PRINTF("%s(%d): Invalid statistcs function setup!\n", + __FILE__, __LINE__); + goto bxe_stats_func_base_init_exit; + } + port = BP_PORT(sc); func_stx = sc->func_stx; vn_max = IS_E1HMF(sc) ? E1HVN_MAX : E1VN_MAX; + /* Initialize each function individually. */ for (vn = VN_0; vn < vn_max; vn++) { - func = 2*vn + port; + func = 2 * vn + port; sc->func_stx = SHMEM_RD(sc, func_mb[func].fw_mb_param); bxe_stats_func_init(sc); bxe_stats_hw_post(sc); @@ -7374,20 +7426,38 @@ bxe_stats_func_base_init(struct bxe_softc *sc) } sc->func_stx = func_stx; + +bxe_stats_func_base_init_exit: + DBEXIT(BXE_VERBOSE_STATS); } +/* + * DMA the function-based statistics to the controller. + * + * Returns: + * None. + */ static void bxe_stats_func_base_update(struct bxe_softc *sc) { uint32_t *stats_comp; struct dmae_command *dmae; + DBENTER(BXE_VERBOSE_STATS); + + /* Only the port management function (PMF) does this work. */ + if ((sc->port.pmf == 0) || !sc->func_stx) { + BXE_PRINTF("%s(%d): Invalid statistcs function update!\n", + __FILE__, __LINE__); + goto bxe_stats_func_base_update_exit; + } + dmae = &sc->stats_dmae; stats_comp = BXE_SP(sc, stats_comp); - sc->executer_idx = 0; memset(dmae, 0, sizeof(struct dmae_command)); + /* DMA the function statistics from the driver to the H/W. */ dmae->opcode = (DMAE_CMD_SRC_GRC | DMAE_CMD_DST_PCI | DMAE_CMD_C_DST_PCI | DMAE_CMD_C_ENABLE | DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET | @@ -7410,6 +7480,9 @@ bxe_stats_func_base_update(struct bxe_softc *sc) *stats_comp = 0; bxe_stats_hw_post(sc); bxe_stats_comp(sc); + +bxe_stats_func_base_update_exit: + DBEXIT(BXE_VERBOSE_STATS); } @@ -7428,7 +7501,7 @@ bxe_stats_init(struct bxe_softc *sc) DBENTER(BXE_VERBOSE_STATS); if (sc->stats_enable == FALSE) - return; + goto bxe_stats_init_exit; port = BP_PORT(sc); func = BP_FUNC(sc); @@ -7436,19 +7509,21 @@ bxe_stats_init(struct bxe_softc *sc) sc->stats_counter = 0; sc->stats_pending = 0; - /* Fetch the offset of port statistics in shared memory. */ - if (BP_NOMCP(sc)){ + /* Fetch the offset of port & function statistics in shared memory. */ + if (NOMCP(sc)){ sc->port.port_stx = 0; sc->func_stx = 0; } else{ sc->port.port_stx = SHMEM_RD(sc, port_mb[port].port_stx); sc->func_stx = SHMEM_RD(sc, func_mb[func].fw_mb_param); } - /* If this is still 0 then no management firmware running. */ + DBPRINT(sc, BXE_VERBOSE_STATS, "%s(): sc->port.port_stx = 0x%08X\n", __FUNCTION__, sc->port.port_stx); + DBPRINT(sc, BXE_VERBOSE_STATS, "%s(): sc->func_stx = 0x%08X\n", + __FUNCTION__, sc->func_stx); - /* port stats */ + /* Port statistics. */ memset(&(sc->port.old_nig_stats), 0, sizeof(struct nig_stats)); sc->port.old_nig_stats.brb_discard = REG_RD(sc, NIG_REG_STAT0_BRB_DISCARD + port * 0x38); @@ -7459,10 +7534,11 @@ bxe_stats_init(struct bxe_softc *sc) REG_RD_DMAE(sc, NIG_REG_STAT0_EGRESS_MAC_PKT1 + port * 0x50, &(sc->port.old_nig_stats.egress_mac_pkt1_lo), 2); - /* function stats */ + /* Function statistics. */ for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; - /* Clear function statistics memory. */ + + /* Clear all per-queue statistics. */ memset(&fp->old_tclient, 0, sizeof(struct tstorm_per_client_stats)); memset(&fp->old_uclient, 0, @@ -7473,18 +7549,21 @@ bxe_stats_init(struct bxe_softc *sc) sizeof(struct bxe_q_stats)); } + /* ToDo: Clear any driver specific statistics? */ + sc->stats_state = STATS_STATE_DISABLED; - /* Init port statistics if we're the port management function. */ - if (sc->port.pmf) { - /* Port_stx are in 57710 when ncsi presnt & always in 57711.*/ + if (sc->port.pmf == 1) { + /* Init port & function stats if we're PMF. */ if (sc->port.port_stx) bxe_stats_port_base_init(sc); if (sc->func_stx) bxe_stats_func_base_init(sc); } else if (sc->func_stx) + /* Update function stats if we're not PMF. */ bxe_stats_func_base_update(sc); +bxe_stats_init_exit: DBEXIT(BXE_VERBOSE_STATS); } @@ -7548,9 +7627,10 @@ bxe_stats_hw_post(struct bxe_softc *sc) } /* + * Delay routine which polls for the DMA engine to complete. * * Returns: - * 1 + * 0 = Failure, !0 = Success */ static int bxe_stats_comp(struct bxe_softc *sc) @@ -7562,6 +7642,7 @@ bxe_stats_comp(struct bxe_softc *sc) stats_comp = BXE_SP(sc, stats_comp); cnt = 10; + while (*stats_comp != DMAE_COMP_VAL) { if (!cnt) { BXE_PRINTF("%s(%d): Timeout waiting for statistics " @@ -7573,11 +7654,12 @@ bxe_stats_comp(struct bxe_softc *sc) } DBEXIT(BXE_VERBOSE_STATS); + /* ToDo: Shouldn't this return the value of cnt? */ return (1); } /* - * Initialize port statistics. + * DMA port statistcs from controller to driver. * * Returns: * None. @@ -7595,13 +7677,14 @@ bxe_stats_pmf_update(struct bxe_softc *sc) loader_idx = PMF_DMAE_C(sc); /* We shouldn't be here if any of the following are false. */ - if (!IS_E1HMF(sc) || !sc->port.pmf || !sc->port.port_stx) { - DBPRINT(sc, BXE_WARN, "%s(): Bug!\n", __FUNCTION__); + if (!IS_E1HMF(sc) || (sc->port.pmf == 0) || !sc->port.port_stx) { + BXE_PRINTF("%s(%d): Statistics bug!\n", __FILE__, __LINE__); goto bxe_stats_pmf_update_exit; } sc->executer_idx = 0; + /* Instruct DMA engine to copy port statistics from H/W to driver. */ opcode = (DMAE_CMD_SRC_GRC | DMAE_CMD_DST_PCI | DMAE_CMD_C_DST_PCI | DMAE_CMD_C_ENABLE | DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET | @@ -7638,6 +7721,7 @@ bxe_stats_pmf_update(struct bxe_softc *sc) dmae->comp_addr_hi = U64_HI(BXE_SP_MAPPING(sc, stats_comp)); dmae->comp_val = DMAE_COMP_VAL; + /* Start the DMA and wait for the result. */ *stats_comp = 0; bxe_stats_hw_post(sc); bxe_stats_comp(sc); @@ -7647,7 +7731,10 @@ bxe_stats_pmf_update_exit: } /* - * Prepare the DMAE parameters required for port statistics. + * Prepare the DMAE parameters required for all statistics. + * + * This function should only be called by the driver instance + * that is designated as the port management function (PMF). * * Returns: * None. @@ -7666,8 +7753,8 @@ bxe_stats_port_init(struct bxe_softc *sc) loader_idx = PMF_DMAE_C(sc); stats_comp = BXE_SP(sc, stats_comp); - /* Sanity check. */ - if (!sc->link_vars.link_up || !sc->port.pmf) { + /* Only the port management function (PMF) does this work. */ + if (!sc->link_vars.link_up || (sc->port.pmf == 0)) { BXE_PRINTF("%s(%d): Invalid statistics port setup!\n", __FILE__, __LINE__); goto bxe_stats_port_init_exit; @@ -7675,7 +7762,7 @@ bxe_stats_port_init(struct bxe_softc *sc) sc->executer_idx = 0; - /* Setup statistics reporting to MCP. */ + /* The same opcde is used for multiple DMA operations. */ opcode = (DMAE_CMD_SRC_PCI | DMAE_CMD_DST_GRC | DMAE_CMD_C_DST_GRC | DMAE_CMD_C_ENABLE | DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET | @@ -7728,7 +7815,7 @@ bxe_stats_port_init(struct bxe_softc *sc) (vn << DMAE_CMD_E1HVN_SHIFT)); if (sc->link_vars.mac_type == MAC_TYPE_BMAC) { - /* Enable statistics for the BMAC. */ + /* Enable statistics for the 10Gb BMAC. */ mac_addr = (port ? NIG_REG_INGRESS_BMAC1_MEM : NIG_REG_INGRESS_BMAC0_MEM); @@ -7764,7 +7851,7 @@ bxe_stats_port_init(struct bxe_softc *sc) dmae->comp_val = 1; } else if (sc->link_vars.mac_type == MAC_TYPE_EMAC) { - /* Enable statistics for the EMAC. */ + /* Enable statistics for the 1Gb EMAC. */ mac_addr = (port ? GRCBASE_EMAC1 : GRCBASE_EMAC0); @@ -7873,6 +7960,8 @@ bxe_stats_port_init_exit: /* * Prepare the DMAE parameters required for function statistics. * + * This function is called by all driver instances. + * * Returns: * None. */ @@ -7884,15 +7973,14 @@ bxe_stats_func_init(struct bxe_softc *sc) DBENTER(BXE_VERBOSE_STATS); - dmae = &sc->stats_dmae; - stats_comp = BXE_SP(sc, stats_comp); - if (!sc->func_stx) { BXE_PRINTF("%s(%d): Invalid statistics function setup!\n", __FILE__, __LINE__); goto bxe_stats_func_init_exit; } + dmae = &sc->stats_dmae; + stats_comp = BXE_SP(sc, stats_comp); sc->executer_idx = 0; memset(dmae, 0, sizeof(struct dmae_command)); @@ -7924,6 +8012,8 @@ bxe_stats_func_init_exit: } /* + * Starts a statistics update DMA and waits for completion. + * * Returns: * None. */ @@ -7933,9 +8023,8 @@ bxe_stats_start(struct bxe_softc *sc) DBENTER(BXE_VERBOSE_STATS); - if (sc->port.pmf) + if (sc->port.pmf == 1) bxe_stats_port_init(sc); - else if (sc->func_stx) bxe_stats_func_init(sc); @@ -7978,6 +8067,7 @@ bxe_stats_restart(struct bxe_softc *sc) } /* + * Update the Big MAC (10Gb BMAC) statistics. * * Returns: * None. @@ -7987,7 +8077,7 @@ bxe_stats_bmac_update(struct bxe_softc *sc) { struct bmac_stats *new; struct host_port_stats *pstats; - struct bxe_eth_stats *estats; + struct bxe_port_stats *estats; struct regpair diff; DBENTER(BXE_INSANE_STATS); @@ -7996,19 +8086,32 @@ bxe_stats_bmac_update(struct bxe_softc *sc) pstats = BXE_SP(sc, port_stats); estats = &sc->eth_stats; - UPDATE_STAT64(rx_stat_grerb, rx_stat_ifhcinbadoctets); - UPDATE_STAT64(rx_stat_grfcs, rx_stat_dot3statsfcserrors); - UPDATE_STAT64(rx_stat_grund, rx_stat_etherstatsundersizepkts); - UPDATE_STAT64(rx_stat_grovr, rx_stat_dot3statsframestoolong); - UPDATE_STAT64(rx_stat_grfrg, rx_stat_etherstatsfragments); - UPDATE_STAT64(rx_stat_grjbr, rx_stat_etherstatsjabbers); - UPDATE_STAT64(rx_stat_grxcf, rx_stat_maccontrolframesreceived); - UPDATE_STAT64(rx_stat_grxpf, rx_stat_xoffstateentered); - UPDATE_STAT64(rx_stat_grxpf, rx_stat_bmac_xpf); - UPDATE_STAT64(tx_stat_gtxpf, tx_stat_outxoffsent); - UPDATE_STAT64(tx_stat_gtxpf, tx_stat_flowcontroldone); - UPDATE_STAT64(tx_stat_gt64, tx_stat_etherstatspkts64octets); - UPDATE_STAT64(tx_stat_gt127, tx_stat_etherstatspkts65octetsto127octets); + UPDATE_STAT64(rx_stat_grerb, + rx_stat_ifhcinbadoctets); + UPDATE_STAT64(rx_stat_grfcs, + rx_stat_dot3statsfcserrors); + UPDATE_STAT64(rx_stat_grund, + rx_stat_etherstatsundersizepkts); + UPDATE_STAT64(rx_stat_grovr, + rx_stat_dot3statsframestoolong); + UPDATE_STAT64(rx_stat_grfrg, + rx_stat_etherstatsfragments); + UPDATE_STAT64(rx_stat_grjbr, + rx_stat_etherstatsjabbers); + UPDATE_STAT64(rx_stat_grxcf, + rx_stat_maccontrolframesreceived); + UPDATE_STAT64(rx_stat_grxpf, + rx_stat_xoffstateentered); + UPDATE_STAT64(rx_stat_grxpf, + rx_stat_bmac_xpf); + UPDATE_STAT64(tx_stat_gtxpf, + tx_stat_outxoffsent); + UPDATE_STAT64(tx_stat_gtxpf, + tx_stat_flowcontroldone); + UPDATE_STAT64(tx_stat_gt64, + tx_stat_etherstatspkts64octets); + UPDATE_STAT64(tx_stat_gt127, + tx_stat_etherstatspkts65octetsto127octets); UPDATE_STAT64(tx_stat_gt255, tx_stat_etherstatspkts128octetsto255octets); UPDATE_STAT64(tx_stat_gt511, @@ -8017,19 +8120,23 @@ bxe_stats_bmac_update(struct bxe_softc *sc) tx_stat_etherstatspkts512octetsto1023octets); UPDATE_STAT64(tx_stat_gt1518, tx_stat_etherstatspkts1024octetsto1522octets); - UPDATE_STAT64(tx_stat_gt2047, tx_stat_bmac_2047); - UPDATE_STAT64(tx_stat_gt4095, tx_stat_bmac_4095); - UPDATE_STAT64(tx_stat_gt9216, tx_stat_bmac_9216); - UPDATE_STAT64(tx_stat_gt16383, tx_stat_bmac_16383); + UPDATE_STAT64(tx_stat_gt2047, + tx_stat_bmac_2047); + UPDATE_STAT64(tx_stat_gt4095, + tx_stat_bmac_4095); + UPDATE_STAT64(tx_stat_gt9216, + tx_stat_bmac_9216); + UPDATE_STAT64(tx_stat_gt16383, + tx_stat_bmac_16383); UPDATE_STAT64(tx_stat_gterr, tx_stat_dot3statsinternalmactransmiterrors); - UPDATE_STAT64(tx_stat_gtufl, tx_stat_bmac_ufl); + UPDATE_STAT64(tx_stat_gtufl, + tx_stat_bmac_ufl); estats->pause_frames_received_hi = pstats->mac_stx[1].rx_stat_bmac_xpf_hi; estats->pause_frames_received_lo = pstats->mac_stx[1].rx_stat_bmac_xpf_lo; - estats->pause_frames_sent_hi = pstats->mac_stx[1].tx_stat_outxoffsent_hi; estats->pause_frames_sent_lo = @@ -8039,6 +8146,8 @@ bxe_stats_bmac_update(struct bxe_softc *sc) } /* + * Update the Ethernet MAC (1Gb EMAC) statistics. + * * Returns: * None. */ @@ -8047,7 +8156,7 @@ bxe_stats_emac_update(struct bxe_softc *sc) { struct emac_stats *new; struct host_port_stats *pstats; - struct bxe_eth_stats *estats; + struct bxe_port_stats *estats; DBENTER(BXE_INSANE_STATS); @@ -8092,9 +8201,9 @@ bxe_stats_emac_update(struct bxe_softc *sc) estats->pause_frames_received_lo = pstats->mac_stx[1].rx_stat_xonpauseframesreceived_lo; ADD_64(estats->pause_frames_received_hi, - pstats->mac_stx[1].rx_stat_xoffpauseframesreceived_hi, - estats->pause_frames_received_lo, - pstats->mac_stx[1].rx_stat_xoffpauseframesreceived_lo); + pstats->mac_stx[1].rx_stat_xoffpauseframesreceived_hi, + estats->pause_frames_received_lo, + pstats->mac_stx[1].rx_stat_xoffpauseframesreceived_lo); estats->pause_frames_sent_hi = pstats->mac_stx[1].tx_stat_outxonsent_hi; @@ -8117,7 +8226,7 @@ bxe_stats_hw_update(struct bxe_softc *sc) { struct nig_stats *new, *old; struct host_port_stats *pstats; - struct bxe_eth_stats *estats; + struct bxe_port_stats *estats; struct regpair diff; uint32_t nig_timer_max; int rc; @@ -8162,12 +8271,15 @@ bxe_stats_hw_update(struct bxe_softc *sc) pstats->host_port_stats_start = ++pstats->host_port_stats_end; - nig_timer_max = SHMEM_RD(sc, port_mb[BP_PORT(sc)].stat_nig_timer); - if (nig_timer_max != estats->nig_timer_max) { - estats->nig_timer_max = nig_timer_max; - DBPRINT(sc, BXE_WARN, - "%s(): NIG timer reached max value (%u)!\n", __FUNCTION__, - estats->nig_timer_max); + if (!NOMCP(sc)) { + nig_timer_max = + SHMEM_RD(sc, port_mb[BP_PORT(sc)].stat_nig_timer); + if (nig_timer_max != estats->nig_timer_max) { + estats->nig_timer_max = nig_timer_max; + DBPRINT(sc, BXE_WARN, + "%s(): NIG timer reached max value (%u)!\n", + __FUNCTION__, estats->nig_timer_max); + } } bxe_stats_hw_update_exit: @@ -8179,12 +8291,15 @@ bxe_stats_hw_update_exit: * Returns: * 0 = Success, !0 = Failure. */ +// DRC - Done static int bxe_stats_storm_update(struct bxe_softc *sc) { int rc, i, cl_id; struct eth_stats_query *stats; + struct bxe_port_stats *estats; struct host_func_stats *fstats; + struct bxe_q_stats *qstats; struct tstorm_per_port_stats *tport; struct tstorm_per_client_stats *tclient; struct ustorm_per_client_stats *uclient; @@ -8192,72 +8307,66 @@ bxe_stats_storm_update(struct bxe_softc *sc) struct tstorm_per_client_stats *old_tclient; struct ustorm_per_client_stats *old_uclient; struct xstorm_per_client_stats *old_xclient; - struct bxe_eth_stats *estats; - struct bxe_q_stats *qstats; struct bxe_fastpath * fp; uint32_t diff; DBENTER(BXE_INSANE_STATS); rc = 0; + diff = 0; stats = BXE_SP(sc, fw_stats); tport = &stats->tstorm_common.port_statistics; - fstats = BXE_SP(sc, func_stats); + memcpy(&(fstats->total_bytes_received_hi), &(BXE_SP(sc, func_stats_base)->total_bytes_received_hi), - sizeof(struct host_func_stats) - 2*sizeof(uint32_t)); + sizeof(struct host_func_stats) - 2 * sizeof(uint32_t)); - diff = 0; estats = &sc->eth_stats; estats->no_buff_discard_hi = 0; estats->no_buff_discard_lo = 0; estats->error_bytes_received_hi = 0; estats->error_bytes_received_lo = 0; -/* estats->etherstatsoverrsizepkts_hi = 0; + estats->etherstatsoverrsizepkts_hi = 0; estats->etherstatsoverrsizepkts_lo = 0; -*/ + for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; cl_id = fp->cl_id; tclient = &stats->tstorm_common.client_statistics[cl_id]; - uclient = &stats->ustorm_common.client_statistics[cl_id]; - xclient = &stats->xstorm_common.client_statistics[cl_id]; old_tclient = &fp->old_tclient; + uclient = &stats->ustorm_common.client_statistics[cl_id]; old_uclient = &fp->old_uclient; + xclient = &stats->xstorm_common.client_statistics[cl_id]; old_xclient = &fp->old_xclient; qstats = &fp->eth_q_stats; - /* Are STORM statistics valid? */ + /* Are TSTORM statistics valid? */ if ((uint16_t)(le16toh(tclient->stats_counter) + 1) != sc->stats_counter) { -#if 0 DBPRINT(sc, BXE_WARN, "%s(): Stats not updated by TSTORM " "(tstorm counter (%d) != stats_counter (%d))!\n", __FUNCTION__, tclient->stats_counter, sc->stats_counter); -#endif rc = 1; goto bxe_stats_storm_update_exit; } + /* Are USTORM statistics valid? */ if ((uint16_t)(le16toh(uclient->stats_counter) + 1) != sc->stats_counter) { -#if 0 DBPRINT(sc, BXE_WARN, "%s(): Stats not updated by USTORM " "(ustorm counter (%d) != stats_counter (%d))!\n", __FUNCTION__, uclient->stats_counter, sc->stats_counter); -#endif rc = 2; goto bxe_stats_storm_update_exit; } + /* Are XSTORM statistics valid? */ if ((uint16_t)(le16toh(xclient->stats_counter) + 1) != sc->stats_counter) { -#if 0 DBPRINT(sc, BXE_WARN, "%s(): Stats not updated by XSTORM " "(xstorm counter (%d) != stats_counter (%d))!\n", __FUNCTION__, xclient->stats_counter, sc->stats_counter); -#endif rc = 3; goto bxe_stats_storm_update_exit; } @@ -8313,9 +8422,8 @@ bxe_stats_storm_update(struct bxe_softc *sc) total_multicast_packets_received); UPDATE_EXTEND_TSTAT(rcv_broadcast_pkts, total_broadcast_packets_received); -/* UPDATE_EXTEND_TSTAT(packets_too_big_discard, - etherstatsoverrsizepkts); -*/ + UPDATE_EXTEND_TSTAT(packets_too_big_discard, + etherstatsoverrsizepkts); UPDATE_EXTEND_TSTAT(no_buff_discard, no_buff_discard); SUB_EXTEND_USTAT(ucast_no_buff_pkts, @@ -8329,19 +8437,19 @@ bxe_stats_storm_update(struct bxe_softc *sc) UPDATE_EXTEND_USTAT(bcast_no_buff_pkts, no_buff_discard); qstats->total_bytes_transmitted_hi = - (xclient->unicast_bytes_sent.hi); + le32toh(xclient->unicast_bytes_sent.hi); qstats->total_bytes_transmitted_lo = - (xclient->unicast_bytes_sent.lo); + le32toh(xclient->unicast_bytes_sent.lo); ADD_64(qstats->total_bytes_transmitted_hi, - (xclient->multicast_bytes_sent.hi), + le32toh(xclient->multicast_bytes_sent.hi), qstats->total_bytes_transmitted_lo, - (xclient->multicast_bytes_sent.lo)); + le32toh(xclient->multicast_bytes_sent.lo)); ADD_64(qstats->total_bytes_transmitted_hi, - (xclient->broadcast_bytes_sent.hi), + le32toh(xclient->broadcast_bytes_sent.hi), qstats->total_bytes_transmitted_lo, - (xclient->broadcast_bytes_sent.lo)); + le32toh(xclient->broadcast_bytes_sent.lo)); UPDATE_EXTEND_XSTAT(unicast_pkts_sent, total_unicast_packets_transmitted); @@ -8356,63 +8464,72 @@ bxe_stats_storm_update(struct bxe_softc *sc) old_tclient->ttl0_discard = tclient->ttl0_discard; ADD_64(fstats->total_bytes_received_hi, - qstats->total_bytes_received_hi, - fstats->total_bytes_received_lo, - qstats->total_bytes_received_lo); + qstats->total_bytes_received_hi, + fstats->total_bytes_received_lo, + qstats->total_bytes_received_lo); ADD_64(fstats->total_bytes_transmitted_hi, - qstats->total_bytes_transmitted_hi, - fstats->total_bytes_transmitted_lo, - qstats->total_bytes_transmitted_lo); + qstats->total_bytes_transmitted_hi, + fstats->total_bytes_transmitted_lo, + qstats->total_bytes_transmitted_lo); ADD_64(fstats->total_unicast_packets_received_hi, - qstats->total_unicast_packets_received_hi, - fstats->total_unicast_packets_received_lo, - qstats->total_unicast_packets_received_lo); + qstats->total_unicast_packets_received_hi, + fstats->total_unicast_packets_received_lo, + qstats->total_unicast_packets_received_lo); ADD_64(fstats->total_multicast_packets_received_hi, - qstats->total_multicast_packets_received_hi, - fstats->total_multicast_packets_received_lo, - qstats->total_multicast_packets_received_lo); + qstats->total_multicast_packets_received_hi, + fstats->total_multicast_packets_received_lo, + qstats->total_multicast_packets_received_lo); ADD_64(fstats->total_broadcast_packets_received_hi, - qstats->total_broadcast_packets_received_hi, - fstats->total_broadcast_packets_received_lo, - qstats->total_broadcast_packets_received_lo); + qstats->total_broadcast_packets_received_hi, + fstats->total_broadcast_packets_received_lo, + qstats->total_broadcast_packets_received_lo); ADD_64(fstats->total_unicast_packets_transmitted_hi, - qstats->total_unicast_packets_transmitted_hi, - fstats->total_unicast_packets_transmitted_lo, - qstats->total_unicast_packets_transmitted_lo); + qstats->total_unicast_packets_transmitted_hi, + fstats->total_unicast_packets_transmitted_lo, + qstats->total_unicast_packets_transmitted_lo); ADD_64(fstats->total_multicast_packets_transmitted_hi, - qstats->total_multicast_packets_transmitted_hi, - fstats->total_multicast_packets_transmitted_lo, - qstats->total_multicast_packets_transmitted_lo); + qstats->total_multicast_packets_transmitted_hi, + fstats->total_multicast_packets_transmitted_lo, + qstats->total_multicast_packets_transmitted_lo); ADD_64(fstats->total_broadcast_packets_transmitted_hi, - qstats->total_broadcast_packets_transmitted_hi, - fstats->total_broadcast_packets_transmitted_lo, - qstats->total_broadcast_packets_transmitted_lo); + qstats->total_broadcast_packets_transmitted_hi, + fstats->total_broadcast_packets_transmitted_lo, + qstats->total_broadcast_packets_transmitted_lo); ADD_64(fstats->valid_bytes_received_hi, - qstats->valid_bytes_received_hi, - fstats->valid_bytes_received_lo, - qstats->valid_bytes_received_lo); + qstats->valid_bytes_received_hi, + fstats->valid_bytes_received_lo, + qstats->valid_bytes_received_lo); ADD_64(estats->error_bytes_received_hi, - qstats->error_bytes_received_hi, - estats->error_bytes_received_lo, - qstats->error_bytes_received_lo); - - ADD_64(estats->no_buff_discard_hi, qstats->no_buff_discard_hi, - estats->no_buff_discard_lo, qstats->no_buff_discard_lo); + qstats->error_bytes_received_hi, + estats->error_bytes_received_lo, + qstats->error_bytes_received_lo); + ADD_64(estats->etherstatsoverrsizepkts_hi, + qstats->etherstatsoverrsizepkts_hi, + estats->etherstatsoverrsizepkts_lo, + qstats->etherstatsoverrsizepkts_lo); + ADD_64(estats->no_buff_discard_hi, + qstats->no_buff_discard_hi, + estats->no_buff_discard_lo, + qstats->no_buff_discard_lo); } ADD_64(fstats->total_bytes_received_hi, - estats->rx_stat_ifhcinbadoctets_hi, - fstats->total_bytes_received_lo, - estats->rx_stat_ifhcinbadoctets_lo); + estats->rx_stat_ifhcinbadoctets_hi, + fstats->total_bytes_received_lo, + estats->rx_stat_ifhcinbadoctets_lo); memcpy(estats, &(fstats->total_bytes_received_hi), - sizeof(struct host_func_stats) - 2*sizeof(uint32_t)); + sizeof(struct host_func_stats) - 2 * sizeof(uint32_t)); + ADD_64(estats->etherstatsoverrsizepkts_hi, + estats->rx_stat_dot3statsframestoolong_hi, + estats->etherstatsoverrsizepkts_lo, + estats->rx_stat_dot3statsframestoolong_lo); ADD_64(estats->error_bytes_received_hi, - estats->rx_stat_ifhcinbadoctets_hi, - estats->error_bytes_received_lo, - estats->rx_stat_ifhcinbadoctets_lo); + estats->rx_stat_ifhcinbadoctets_hi, + estats->error_bytes_received_lo, + estats->rx_stat_ifhcinbadoctets_lo); if (sc->port.pmf) { estats->mac_filter_discard = @@ -8431,7 +8548,7 @@ bxe_stats_storm_update(struct bxe_softc *sc) bxe_stats_storm_update_exit: DBEXIT(BXE_INSANE_STATS); - return(rc); + return (rc); } /* @@ -8444,7 +8561,7 @@ static void bxe_stats_net_update(struct bxe_softc *sc) { struct tstorm_per_client_stats *old_tclient; - struct bxe_eth_stats *estats; + struct bxe_port_stats *estats; struct ifnet *ifp; DBENTER(BXE_INSANE_STATS); @@ -8469,7 +8586,6 @@ bxe_stats_net_update(struct bxe_softc *sc) (u_long) estats->no_buff_discard_lo + (u_long) estats->mac_discard + (u_long) estats->rx_stat_etherstatsundersizepkts_lo + - (u_long) estats->jabber_packets_received + (u_long) estats->brb_drop_lo + (u_long) estats->brb_truncate_discard + (u_long) estats->rx_stat_dot3statsfcserrors_lo + @@ -8515,7 +8631,7 @@ bxe_stats_update(struct bxe_softc *sc) goto bxe_stats_update_exit; /* Check for any hardware statistics updates. */ - if (sc->port.pmf) + if (sc->port.pmf == 1) update = (bxe_stats_hw_update(sc) == 0); /* Check for any STORM statistics updates. */ @@ -8637,10 +8753,11 @@ bxe_stats_stop(struct bxe_softc *sc) DBENTER(BXE_VERBOSE_STATS); update = 0; + /* Wait for any pending completions. */ bxe_stats_comp(sc); - if (sc->port.pmf) + if (sc->port.pmf == 1) update = (bxe_stats_hw_update(sc) == 0); update |= (bxe_stats_storm_update(sc) == 0); @@ -8648,7 +8765,7 @@ bxe_stats_stop(struct bxe_softc *sc) if (update) { bxe_stats_net_update(sc); - if (sc->port.pmf) + if (sc->port.pmf == 1) bxe_stats_port_stop(sc); bxe_stats_hw_post(sc); @@ -8667,7 +8784,8 @@ bxe_stats_stop(struct bxe_softc *sc) static void bxe_stats_do_nothing(struct bxe_softc *sc) { - + DBENTER(BXE_VERBOSE_STATS); + DBEXIT(BXE_VERBOSE_STATS); } static const struct { @@ -8701,9 +8819,10 @@ bxe_stats_handle(struct bxe_softc *sc, enum bxe_stats_event event) { enum bxe_stats_state state; - DBENTER(BXE_INSANE_STATS); + DBENTER(BXE_EXTREME_STATS); state = sc->stats_state; + #ifdef BXE_DEBUG if (event != STATS_EVENT_UPDATE) DBPRINT(sc, BXE_VERBOSE_STATS, @@ -8720,7 +8839,7 @@ bxe_stats_handle(struct bxe_softc *sc, enum bxe_stats_event event) __FUNCTION__, sc->stats_state); #endif - DBEXIT(BXE_INSANE_STATS); + DBEXIT(BXE_EXTREME_STATS); } /* @@ -8798,167 +8917,137 @@ bxe_tx_encap(struct bxe_fastpath *fp, struct mbuf **m_head) struct eth_tx_bd *tx_data_bd; struct eth_tx_bd *tx_total_pkt_size_bd; struct eth_tx_start_bd *tx_start_bd; - uint16_t etype, bd_prod, pkt_prod, total_pkt_size; + uint16_t etype, sw_tx_bd_prod, sw_pkt_prod, total_pkt_size; +// uint16_t bd_index, pkt_index; uint8_t mac_type; - int i, e_hlen, error, nsegs, rc, nbds, vlan_off, ovlan; + int i, defragged, e_hlen, error, nsegs, rc, nbds, vlan_off, ovlan; struct bxe_softc *sc; sc = fp->sc; DBENTER(BXE_VERBOSE_SEND); - rc = nbds = ovlan = vlan_off = total_pkt_size = 0; + DBRUN(M_ASSERTPKTHDR(*m_head)); m0 = *m_head; - - tx_total_pkt_size_bd = NULL; + rc = defragged = nbds = ovlan = vlan_off = total_pkt_size = 0; tx_start_bd = NULL; tx_data_bd = NULL; tx_parse_bd = NULL; + tx_total_pkt_size_bd = NULL; - pkt_prod = fp->tx_pkt_prod; - bd_prod = TX_BD(fp->tx_bd_prod); + /* Get the H/W pointer (0 to 65535) for packets and BD's. */ + sw_pkt_prod = fp->tx_pkt_prod; + sw_tx_bd_prod = fp->tx_bd_prod; - mac_type = UNICAST_ADDRESS; + /* Create the S/W index (0 to MAX_TX_BD) for packets and BD's. */ +// pkt_index = TX_BD(sw_pkt_prod); +// bd_index = TX_BD(sw_tx_bd_prod); -#ifdef BXE_DEBUG - int debug_prod; - DBRUN(debug_prod = bd_prod); -#endif + mac_type = UNICAST_ADDRESS; /* Map the mbuf into the next open DMAable memory. */ - map = fp->tx_mbuf_map[TX_BD(pkt_prod)]; + map = fp->tx_mbuf_map[TX_BD(sw_pkt_prod)]; error = bus_dmamap_load_mbuf_sg(fp->tx_mbuf_tag, map, m0, segs, &nsegs, BUS_DMA_NOWAIT); - do{ - /* Handle any mapping errors. */ - if(__predict_false(error)){ - fp->tx_dma_mapping_failure++; - if (error == ENOMEM) { - /* Resource issue, try again later. */ - rc = ENOMEM; - }else if (error == EFBIG) { - /* Possibly recoverable. */ - fp->mbuf_defrag_attempts++; - m0 = m_defrag(*m_head, M_DONTWAIT); - if (m0 == NULL) { - fp->mbuf_defrag_failures++; - rc = ENOBUFS; - } else { - /* Defrag was successful, try mapping again.*/ - fp->mbuf_defrag_successes++; - *m_head = m0; - error = - bus_dmamap_load_mbuf_sg( - fp->tx_mbuf_tag, map, m0, - segs, &nsegs, BUS_DMA_NOWAIT); - if (error) { - fp->tx_dma_mapping_failure++; - rc = error; - } - } - }else { - /* Unrecoverable. */ - DBPRINT(sc, BXE_WARN_SEND, - "%s(): Unknown TX mapping error! " - "rc = %d.\n", __FUNCTION__, error); - DBRUN(bxe_dump_mbuf(sc, m0)); - rc = error; - } - - break; - } - - /* Make sure this enough room in the send queue. */ - if (__predict_false((nsegs + 2) > - (USABLE_TX_BD - fp->used_tx_bd))) { - fp->tx_queue_too_full++; - bus_dmamap_unload(fp->tx_mbuf_tag, map); - rc = ENOBUFS; - break; - } - - /* Now make sure it fits in the pkt window */ - if (__predict_false(nsegs > 12)) { - - /* - * The mbuf may be to big for the controller - * to handle. If the frame is a TSO frame - * we'll need to do an additional check. - */ - if(m0->m_pkthdr.csum_flags & CSUM_TSO){ - if (bxe_chktso_window(sc,nsegs,segs,m0) == 0) - /* OK to send. */ - break; - else - fp->window_violation_tso++; - } else - fp->window_violation_std++; - - /* - * If this is a standard frame then defrag is - * required. Unmap the mbuf, defrag it, then - * try mapping it again. - */ + /* Handle any mapping errors. */ + if(__predict_false(error != 0)){ + fp->tx_dma_mapping_failure++; + if (error == ENOMEM) { + /* Resource issue, try again later. */ + rc = ENOMEM; + } else if (error == EFBIG) { + /* Possibly recoverable with defragmentation. */ fp->mbuf_defrag_attempts++; - bus_dmamap_unload(fp->tx_mbuf_tag, map); m0 = m_defrag(*m_head, M_DONTWAIT); if (m0 == NULL) { fp->mbuf_defrag_failures++; rc = ENOBUFS; - break; + } else { + /* Defrag successful, try mapping again.*/ + *m_head = m0; + error = bus_dmamap_load_mbuf_sg( + fp->tx_mbuf_tag, map, m0, + segs, &nsegs, BUS_DMA_NOWAIT); + if (error) { + fp->tx_dma_mapping_failure++; + rc = error; + } } + } else { + /* Unknown, unrecoverable mapping error. */ + DBPRINT(sc, BXE_WARN_SEND, + "%s(): Unknown TX mapping error! " + "rc = %d.\n", __FUNCTION__, error); + DBRUN(bxe_dump_mbuf(sc, m0)); + rc = error; + } - /* Defrag was successful, try mapping again. */ - fp->mbuf_defrag_successes++; - *m_head = m0; - error = - bus_dmamap_load_mbuf_sg( - fp->tx_mbuf_tag, map, m0, - segs, &nsegs, BUS_DMA_NOWAIT); - - /* Handle any mapping errors. */ - if (__predict_false(error)) { - fp->tx_dma_mapping_failure++; - rc = error; - break; - } + goto bxe_tx_encap_continue; + } - /* Last try */ - if (m0->m_pkthdr.csum_flags & CSUM_TSO){ - if (bxe_chktso_window(sc,nsegs,segs,m0) == 1) - rc = ENOBUFS; - } else if (nsegs > 12 ){ - rc = ENOBUFS; - } else - rc = 0; - } - }while (0); + /* Make sure there's enough room in the send queue. */ + if (__predict_false((nsegs + 2) > + (USABLE_TX_BD - fp->tx_bd_used))) { + /* Recoverable, try again later. */ + fp->tx_hw_queue_full++; + bus_dmamap_unload(fp->tx_mbuf_tag, map); + rc = ENOMEM; + goto bxe_tx_encap_continue; + } + + /* Capture the current H/W TX chain high watermark. */ + if (__predict_false(fp->tx_hw_max_queue_depth < + fp->tx_bd_used)) + fp->tx_hw_max_queue_depth = fp->tx_bd_used; + + /* Now make sure it fits in the packet window. */ + if (__predict_false(nsegs > 12)) { + /* + * The mbuf may be to big for the controller + * to handle. If the frame is a TSO frame + * we'll need to do an additional check. + */ + if(m0->m_pkthdr.csum_flags & CSUM_TSO){ + if (bxe_chktso_window(sc,nsegs,segs,m0) == 0) + /* OK to send. */ + goto bxe_tx_encap_continue; + else + fp->tx_window_violation_tso++; + } else + fp->tx_window_violation_std++; + /* No sense trying to defrag again, we'll drop the frame. */ + if (defragged > 0) + rc = ENODEV; + } + +bxe_tx_encap_continue: /* Check for errors */ if (rc){ if(rc == ENOMEM){ /* Recoverable try again later */ }else{ - fp->soft_tx_errors++; - DBRUN(fp->tx_mbuf_alloc--); + fp->tx_soft_errors++; + fp->tx_mbuf_alloc--; m_freem(*m_head); *m_head = NULL; } - return (rc); + goto bxe_tx_encap_exit; } - /* We're committed to sending the frame, update the counter. */ - fp->tx_pkt_prod++; + /* Save the mbuf and mapping. */ + fp->tx_mbuf_ptr[TX_BD(sw_pkt_prod)] = m0; + fp->tx_mbuf_map[TX_BD(sw_pkt_prod)] = map; - /* set flag according to packet type (UNICAST_ADDRESS is default)*/ + /* Set flag according to packet type (UNICAST_ADDRESS is default). */ if (m0->m_flags & M_BCAST) mac_type = BROADCAST_ADDRESS; else if (m0->m_flags & M_MCAST) mac_type = MULTICAST_ADDRESS; - /* Prepare the first transmit BD for the mbuf(Get a link from the chain). */ - tx_start_bd = &fp->tx_bd_chain[TX_PAGE(bd_prod)][TX_IDX(bd_prod)].start_bd; + /* Prepare the first transmit (Start) BD for the mbuf. */ + tx_start_bd = &fp->tx_chain[TX_BD(sw_tx_bd_prod)].start_bd; tx_start_bd->addr_lo = htole32(U64_LO(segs[0].ds_addr)); tx_start_bd->addr_hi = htole32(U64_HI(segs[0].ds_addr)); @@ -8970,32 +9059,29 @@ bxe_tx_encap(struct bxe_fastpath *fp, struct mbuf **m_head) tx_start_bd->general_data |= (1 << ETH_TX_START_BD_HDR_NBDS_SHIFT); - nbds = nsegs + 1; /* Add 1 for parsing bd. Assuming nseg > 0 */ + /* All frames have at least Start BD + Parsing BD. */ + nbds = nsegs + 1; tx_start_bd->nbd = htole16(nbds); if (m0->m_flags & M_VLANTAG) { -// vlan_off += ETHER_VLAN_ENCAP_LEN; tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_VLAN_TAG; tx_start_bd->vlan = htole16(m0->m_pkthdr.ether_vtag); - DBPRINT(sc, BXE_VERBOSE_SEND, "%s(): Inserting VLAN tag %d\n", - __FUNCTION__, m0->m_pkthdr.ether_vtag); - } - else + } else /* * In cases where the VLAN tag is not used the firmware * expects to see a packet counter in the VLAN tag field * Failure to do so will cause an assertion which will * stop the controller. */ - tx_start_bd->vlan = htole16(pkt_prod); + tx_start_bd->vlan = htole16(fp->tx_pkt_prod); /* - * Add a parsing BD from the chain. The parsing bd is always added, - * however, it is only used for tso & chksum. + * Add a parsing BD from the chain. The parsing BD is always added, + * however, it is only used for TSO & chksum. */ - bd_prod = TX_BD(NEXT_TX_BD(bd_prod)); + sw_tx_bd_prod = NEXT_TX_BD(sw_tx_bd_prod); tx_parse_bd = (struct eth_tx_parse_bd *) - &fp->tx_bd_chain[TX_PAGE(bd_prod)][TX_IDX(bd_prod)].parse_bd; + &fp->tx_chain[TX_BD(sw_tx_bd_prod)].parse_bd; memset(tx_parse_bd, 0, sizeof(struct eth_tx_parse_bd)); /* Gather all info about the packet and add to tx_parse_bd */ @@ -9006,7 +9092,7 @@ bxe_tx_encap(struct bxe_fastpath *fp, struct mbuf **m_head) uint16_t flags = 0; struct udphdr *uh = NULL; - /* Map the Ethernet header to find the type & header length. */ + /* Map Ethernet header to find type & header length. */ eh = mtod(m0, struct ether_vlan_header *); /* Handle VLAN encapsulation if present. */ @@ -9024,23 +9110,22 @@ bxe_tx_encap(struct bxe_fastpath *fp, struct mbuf **m_head) ETH_TX_PARSE_BD_LLC_SNAP_EN_SHIFT); switch (etype) { - case ETHERTYPE_IP:{ - /* if mbuf's len < 20bytes, the ip_hdr is in next mbuf*/ + case ETHERTYPE_IP: + /* If mbuf len < 20bytes, IP header is in next mbuf. */ if (m0->m_len < sizeof(struct ip)) - ip = (struct ip *)m0->m_next->m_data; + ip = (struct ip *) m0->m_next->m_data; else - ip = (struct ip *)(m0->m_data + e_hlen); + ip = (struct ip *) (m0->m_data + e_hlen); /* Calculate IP header length (16 bit words). */ tx_parse_bd->ip_hlen = (ip->ip_hl << 1); /* Calculate enet + IP header length (16 bit words). */ - tx_parse_bd->total_hlen = tx_parse_bd->ip_hlen + (e_hlen >> 1); + tx_parse_bd->total_hlen = tx_parse_bd->ip_hlen + + (e_hlen >> 1); if (m0->m_pkthdr.csum_flags & CSUM_IP) { - DBPRINT(sc, BXE_EXTREME_SEND, "%s(): IP checksum " - "enabled.\n", __FUNCTION__); - fp->offload_frames_csum_ip++; + fp->tx_offload_frames_csum_ip++; flags |= ETH_TX_BD_FLAGS_IP_CSUM; } @@ -9048,132 +9133,130 @@ bxe_tx_encap(struct bxe_fastpath *fp, struct mbuf **m_head) if ((m0->m_pkthdr.csum_flags & CSUM_TCP)|| (m0->m_pkthdr.csum_flags & CSUM_TSO)){ - /* Perform TCP checksum offload. */ - DBPRINT(sc, BXE_EXTREME_SEND, "%s(): TCP checksum " - "enabled.\n", __FUNCTION__); - /* Get the TCP header. */ - th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); + th = (struct tcphdr *)((caddr_t)ip + + (ip->ip_hl << 2)); /* Add the TCP checksum offload flag. */ flags |= ETH_TX_BD_FLAGS_L4_CSUM; - fp->offload_frames_csum_tcp++; + fp->tx_offload_frames_csum_tcp++; /* Update the enet + IP + TCP header length. */ - tx_parse_bd->total_hlen += (uint16_t)(th->th_off << 1); + tx_parse_bd->total_hlen += + (uint16_t)(th->th_off << 1); /* Get the pseudo header checksum. */ - tx_parse_bd->tcp_pseudo_csum = ntohs(th->th_sum); + tx_parse_bd->tcp_pseudo_csum = + ntohs(th->th_sum); + } else if (m0->m_pkthdr.csum_flags & CSUM_UDP) { /* - * The hardware doesn't actually support UDP checksum - * offload but we can fake it by doing TCP checksum - * offload and factoring out the extra bytes that are - * different between the TCP header and the UDP header. - * calculation will begin 10 bytes before the actual - * start of the UDP header. To work around this we - * need to calculate the checksum of the 10 bytes - * before the UDP header and factor that out of the - * UDP pseudo header checksum before asking the H/W - * to calculate the full UDP checksum. + * The hardware doesn't actually support UDP + * checksum offload but we can fake it by + * doing TCP checksum offload and factoring + * out the extra bytes that are different + * between the TCP header and the UDP header. + * + * Calculation will begin 10 bytes before the + * actual start of the UDP header. To work + * around this we need to calculate the + * checksum of the 10 bytes before the UDP + * header and factor that out of the UDP + * pseudo header checksum before asking the + * H/W to calculate the full UDP checksum. */ uint16_t tmp_csum; uint32_t *tmp_uh; /* This value is 10. */ - uint8_t fix = (uint8_t) (offsetof(struct tcphdr, th_sum) - - (int) offsetof(struct udphdr, uh_sum)); - - /* Perform UDP checksum offload. */ - DBPRINT(sc, BXE_EXTREME_SEND, "%s(): UDP checksum " - "enabled.\n", __FUNCTION__); + uint8_t fix = (uint8_t) (offsetof(struct tcphdr, th_sum) - + (int) offsetof(struct udphdr, uh_sum)); - /* Add the TCP checksum offload flag for UDP frames too. */ + /* + * Add the TCP checksum offload flag for + * UDP frames too.* + */ flags |= ETH_TX_BD_FLAGS_L4_CSUM; - fp->offload_frames_csum_udp++; - tx_parse_bd->global_data |= ETH_TX_PARSE_BD_UDP_CS_FLG; + fp->tx_offload_frames_csum_udp++; + tx_parse_bd->global_data |= + ETH_TX_PARSE_BD_UDP_CS_FLG; /* Get a pointer to the UDP header. */ - uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); + uh = (struct udphdr *)((caddr_t)ip + + (ip->ip_hl << 2)); - /* Set a pointer 10 bytes before the actual UDP header. */ - tmp_uh = (uint32_t *)((uint8_t *)uh - fix); + /* Set pointer 10 bytes before UDP header. */ + tmp_uh = (uint32_t *)((uint8_t *)uh - + fix); /* - * Calculate a pseudo header checksum over the 10 bytes - * before the UDP header. + * Calculate a pseudo header checksum over + * the 10 bytes before the UDP header. */ tmp_csum = in_pseudo(ntohl(*tmp_uh), - ntohl(*(tmp_uh + 1)), - ntohl((*(tmp_uh + 2)) & 0x0000FFFF)); + ntohl(*(tmp_uh + 1)), + ntohl((*(tmp_uh + 2)) & 0x0000FFFF)); /* Update the enet + IP + UDP header length. */ - tx_parse_bd->total_hlen += (sizeof(struct udphdr) >> 1); - tx_parse_bd->tcp_pseudo_csum = ~in_addword(uh->uh_sum, ~tmp_csum); + tx_parse_bd->total_hlen += + (sizeof(struct udphdr) >> 1); + tx_parse_bd->tcp_pseudo_csum = + ~in_addword(uh->uh_sum, ~tmp_csum); } - /* Update the flags settings for VLAN/Offload. */ + /* Update the offload flags. */ tx_start_bd->bd_flags.as_bitfield |= flags; - break; - } + case ETHERTYPE_IPV6: - fp->unsupported_tso_request_ipv6++; - /* DRC - How to handle this error? */ + fp->tx_unsupported_tso_request_ipv6++; + /* ToDo: Add IPv6 support. */ break; default: - fp->unsupported_tso_request_not_tcp++; - /* DRC - How to handle this error? */ + fp->tx_unsupported_tso_request_not_tcp++; + /* ToDo - How to handle this error? */ } /* Setup the Parsing BD with TSO specific info */ if (m0->m_pkthdr.csum_flags & CSUM_TSO) { - uint16_t hdr_len = tx_parse_bd->total_hlen << 1; - DBPRINT(sc, BXE_EXTREME_SEND, "%s(): TSO is enabled.\n", - __FUNCTION__); - - tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_SW_LSO; + tx_start_bd->bd_flags.as_bitfield |= + ETH_TX_BD_FLAGS_SW_LSO; + fp->tx_offload_frames_tso++; - fp->offload_frames_tso++; - if (__predict_false(tx_start_bd->nbytes > hdr_len)) { + /* ToDo: Does this really help? */ + if (__predict_false(tx_start_bd->nbytes > hdr_len)) { + fp->tx_header_splits++; /* * Split the first BD into 2 BDs to make the - * FW job easy... + * firmwares job easy... */ tx_start_bd->nbd++; DBPRINT(sc, BXE_EXTREME_SEND, "%s(): TSO split headr size is %d (%x:%x) nbds %d\n", - __FUNCTION__, tx_start_bd->nbytes, tx_start_bd->addr_hi, + __FUNCTION__, tx_start_bd->nbytes, + tx_start_bd->addr_hi, tx_start_bd->addr_lo, nbds); - bd_prod = TX_BD(NEXT_TX_BD(bd_prod)); - - /* Get a new transmit BD (after the tx_parse_bd) and fill it. */ - tx_data_bd = &fp->tx_bd_chain[TX_PAGE(bd_prod)][TX_IDX(bd_prod)].reg_bd; - tx_data_bd->addr_hi = htole32(U64_HI(segs[0].ds_addr + hdr_len)); - tx_data_bd->addr_lo = htole32(U64_LO(segs[0].ds_addr + hdr_len)); - tx_data_bd->nbytes = htole16(segs[0].ds_len) - hdr_len; + sw_tx_bd_prod = NEXT_TX_BD(sw_tx_bd_prod); + + /* New transmit BD (after the tx_parse_bd). */ + tx_data_bd = + &fp->tx_chain[TX_BD(sw_tx_bd_prod)].reg_bd; + tx_data_bd->addr_hi = + htole32(U64_HI(segs[0].ds_addr + hdr_len)); + tx_data_bd->addr_lo = + htole32(U64_LO(segs[0].ds_addr + hdr_len)); + tx_data_bd->nbytes = + htole16(segs[0].ds_len) - hdr_len; if (tx_total_pkt_size_bd == NULL) tx_total_pkt_size_bd = tx_data_bd; - - /* - * This indicates that the transmit BD - * has no individual mapping and the - * FW ignores this flag in a BD that is - * not marked with the start flag. - */ - - DBPRINT(sc, BXE_EXTREME_SEND, - "%s(): TSO split data size is %d (%x:%x)\n", - __FUNCTION__, tx_data_bd->nbytes, - tx_data_bd->addr_hi, tx_data_bd->addr_lo); } /* - * For TSO the controller needs the following info: + * The controller needs the following info for TSO: * MSS, tcp_send_seq, ip_id, and tcp_pseudo_csum. */ tx_parse_bd->lso_mss = htole16(m0->m_pkthdr.tso_segsz); @@ -9190,10 +9273,10 @@ bxe_tx_encap(struct bxe_fastpath *fp, struct mbuf **m_head) } } - /* Prepare Remaining BDs. Start_tx_bd contains first seg(frag). */ + /* Prepare remaining BDs. Start_tx_bd contains first seg (frag). */ for (i = 1; i < nsegs ; i++) { - bd_prod = TX_BD(NEXT_TX_BD(bd_prod)); - tx_data_bd = &fp->tx_bd_chain[TX_PAGE(bd_prod)][TX_IDX(bd_prod)].reg_bd; + sw_tx_bd_prod = NEXT_TX_BD(sw_tx_bd_prod); + tx_data_bd = &fp->tx_chain[TX_BD(sw_tx_bd_prod)].reg_bd; tx_data_bd->addr_lo = htole32(U64_LO(segs[i].ds_addr)); tx_data_bd->addr_hi = htole32(U64_HI(segs[i].ds_addr)); tx_data_bd->nbytes = htole16(segs[i].ds_len); @@ -9205,56 +9288,27 @@ bxe_tx_encap(struct bxe_fastpath *fp, struct mbuf **m_head) if(tx_total_pkt_size_bd != NULL) tx_total_pkt_size_bd->total_pkt_bytes = total_pkt_size; - /* Update bd producer index value for next tx */ - bd_prod = TX_BD(NEXT_TX_BD(bd_prod)); - DBRUNMSG(BXE_EXTREME_SEND, bxe_dump_tx_chain(fp, debug_prod, nbds)); - - /* - * Ensure that the mbuf pointer for this - * transmission is placed at the array index - * of the last descriptor in this chain. - * This is done because a single map is used - * for all segments of the mbuf and we don't - * want to unload the map before all of the - * segments have been freed. - */ - fp->tx_mbuf_ptr[TX_BD(pkt_prod)] = m0; + /* Update TX BD producer index value for next TX */ + sw_tx_bd_prod = NEXT_TX_BD(sw_tx_bd_prod); - fp->used_tx_bd += nbds; + /* Update the used TX BD counter. */ + fp->tx_bd_used += nbds; /* - * Ring the tx doorbell, counting the next - * bd if the packet contains or ends with it. + * If the chain of tx_bd's describing this frame + * is adjacent to or spans an eth_tx_next_bd element + * then we need to increment the nbds value. */ - if(TX_IDX(bd_prod) < nbds) + if(TX_IDX(sw_tx_bd_prod) < nbds) nbds++; -//BXE_PRINTF("nsegs:%d, tpktsz:0x%x\n",nsegs, total_pkt_size) ; - - /* - * Update the buffer descriptor producer count and the packet - * producer count in doorbell data memory (eth_tx_db_data) then - * ring the doorbell. - */ -/* fp->hw_tx_prods->bds_prod = - htole16(le16toh(fp->hw_tx_prods->bds_prod) + nbds); -*/ - - /* Don't allow reordering of writes for nbd and packets. */ mb(); -/* - fp->hw_tx_prods->packets_prod = - htole32(le32toh(fp->hw_tx_prods->packets_prod) + 1); -*/ -// DOORBELL(sc, fp->index, 0); - -// BXE_PRINTF("doorbell: nbd %d bd %u index %d\n", nbds, bd_prod, fp->index); - fp->tx_db.data.prod += nbds; /* Producer points to the next free tx_bd at this point. */ - fp->tx_bd_prod = bd_prod; + fp->tx_pkt_prod++; + fp->tx_bd_prod = sw_tx_bd_prod; DOORBELL(sc, fp->index, fp->tx_db.raw); @@ -9268,8 +9322,9 @@ bxe_tx_encap(struct bxe_fastpath *fp, struct mbuf **m_head) bus_space_barrier(sc->bxe_db_btag, sc->bxe_db_bhandle, 0, 0, BUS_SPACE_BARRIER_READ); +bxe_tx_encap_exit: DBEXIT(BXE_VERBOSE_SEND); - return(rc); + return (rc); } @@ -9291,7 +9346,7 @@ bxe_tx_start(struct ifnet *ifp) /* Exit if the transmit queue is full or link down. */ if (((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) || !sc->link_vars.link_up) { - DBPRINT(sc, BXE_VERBOSE_SEND, + DBPRINT(sc, BXE_WARN, "%s(): No link or TX queue full, ignoring " "transmit request.\n", __FUNCTION__); goto bxe_tx_start_exit; @@ -9336,7 +9391,7 @@ bxe_tx_start_locked(struct ifnet *ifp, struct bxe_fastpath *fp) break; /* The transmit mbuf now belongs to us, keep track of it. */ - DBRUN(fp->tx_mbuf_alloc++); + fp->tx_mbuf_alloc++; /* * Pack the data into the transmit ring. If we @@ -9354,8 +9409,8 @@ bxe_tx_start_locked(struct ifnet *ifp, struct bxe_fastpath *fp) */ ifp->if_drv_flags |= IFF_DRV_OACTIVE; IFQ_DRV_PREPEND(&ifp->if_snd, m); - DBRUN(fp->tx_mbuf_alloc--); - sc->eth_stats.driver_xoff++; + fp->tx_mbuf_alloc--; + fp->tx_queue_xoff++; } else { } @@ -9375,8 +9430,6 @@ bxe_tx_start_locked(struct ifnet *ifp, struct bxe_fastpath *fp) if (tx_count > 0) /* Reset the TX watchdog timeout timer. */ fp->watchdog_timer = BXE_TX_TIMEOUT; - else - fp->tx_start_called_on_empty_queue++; DBEXIT(BXE_EXTREME_SEND); } @@ -9391,41 +9444,27 @@ bxe_tx_start_locked(struct ifnet *ifp, struct bxe_fastpath *fp) static int bxe_tx_mq_start(struct ifnet *ifp, struct mbuf *m) { - struct bxe_softc *sc; - struct bxe_fastpath *fp; - int fp_index, rc; + struct bxe_softc *sc; + struct bxe_fastpath *fp; + int fp_index, rc; sc = ifp->if_softc; - fp_index = 0; - DBENTER(BXE_EXTREME_SEND); - /* Map the flow ID to a queue number. */ - if ((m->m_flags & M_FLOWID) != 0) { - fp_index = m->m_pkthdr.flowid % sc->num_queues; - DBPRINT(sc, BXE_EXTREME_SEND, - "%s(): Found flowid %d\n", - __FUNCTION__, fp_index); - } else { - DBPRINT(sc, BXE_EXTREME_SEND, - "%s(): No flowid found, using %d\n", - __FUNCTION__, fp_index); - } + fp_index = 0; + /* If using flow ID, assign the TX queue based on the flow ID. */ + if ((m->m_flags & M_FLOWID) != 0) + fp_index = m->m_pkthdr.flowid % sc->num_queues; /* Select the fastpath TX queue for the frame. */ fp = &sc->fp[fp_index]; - /* Exit if the transmit queue is full or link down. */ + /* Skip H/W enqueue if transmit queue is full or link down. */ if (((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) || !sc->link_vars.link_up) { - /* We're stuck with the mbuf. Stash it for now. */ - DBPRINT(sc, BXE_EXTREME_SEND, - "%s(): TX queue full/link down, " - "parking mbuf...\n", __FUNCTION__); + /* Stash the mbuf if we can. */ rc = drbr_enqueue(ifp, fp->br, m); - /* DRC - Setup a task to try again. */ - /* taskqueue_enqueue(tq, task); */ goto bxe_tx_mq_start_exit; } @@ -9435,12 +9474,13 @@ bxe_tx_mq_start(struct ifnet *ifp, struct mbuf *m) bxe_tx_mq_start_exit: DBEXIT(BXE_EXTREME_SEND); - return(rc); + return (rc); } /* - * Multiqueue (TSS) transmit routine. + * Multiqueue (TSS) transmit routine. This routine is responsible + * for adding a frame to the hardware's transmit queue. * * Returns: * 0 if transmit succeeds, !0 otherwise. @@ -9451,55 +9491,40 @@ bxe_tx_mq_start_locked(struct ifnet *ifp, { struct bxe_softc *sc; struct mbuf *next; - int depth, rc = 0, tx_count = 0; + int depth, rc, tx_count; sc = fp->sc; - DBENTER(BXE_EXTREME_SEND); + + rc = tx_count = 0; + + /* Fetch the depth of the driver queue. */ depth = drbr_inuse(ifp, fp->br); - if (depth > fp->max_drbr_queue_depth) { - fp->max_drbr_queue_depth = depth; - } - DBPRINT(sc, BXE_EXTREME_SEND, - "%s(): fp[%02d], drbr queue depth=%d\n", - __FUNCTION__, fp->index, depth); + if (depth > fp->tx_max_drbr_queue_depth) + fp->tx_max_drbr_queue_depth = depth; BXE_FP_LOCK_ASSERT(fp); if (m == NULL) { - /* Check for any other work. */ - DBPRINT(sc, BXE_EXTREME_SEND, - "%s(): No initial work, dequeue mbuf...\n", - __FUNCTION__); + /* No new work, check for pending frames. */ next = drbr_dequeue(ifp, fp->br); } else if (drbr_needs_enqueue(ifp, fp->br)) { - /* Work pending, queue mbuf to maintain packet order. */ - DBPRINT(sc, BXE_EXTREME_SEND, - "%s(): Found queued data pending...\n", - __FUNCTION__); - if ((rc = drbr_enqueue(ifp, fp->br, m)) != 0) { - DBPRINT(sc, BXE_EXTREME_SEND, - "%s(): Enqueue failed...\n", - __FUNCTION__); + /* Both new and pending work, maintain packet order. */ + rc = drbr_enqueue(ifp, fp->br, m); + if (rc != 0) { + fp->tx_soft_errors++; goto bxe_tx_mq_start_locked_exit; } - DBPRINT(sc, BXE_EXTREME_SEND, - "%s(): Dequeueing old mbuf...\n", - __FUNCTION__); next = drbr_dequeue(ifp, fp->br); - } else { - /* Work with the mbuf we have. */ - DBPRINT(sc, BXE_EXTREME_SEND, - "%s(): Start with current mbuf...\n", - __FUNCTION__); + } else + /* New work only, nothing pending. */ next = m; - } /* Keep adding entries while there are frames to send. */ while (next != NULL) { /* The transmit mbuf now belongs to us, keep track of it. */ - DBRUN(fp->tx_mbuf_alloc++); + fp->tx_mbuf_alloc++; /* * Pack the data into the transmit ring. If we @@ -9507,9 +9532,8 @@ bxe_tx_mq_start_locked(struct ifnet *ifp, * head of the TX queue, set the OACTIVE flag, * and wait for the NIC to drain the chain. */ - if (__predict_false(bxe_tx_encap(fp, &next))) { - DBPRINT(sc, BXE_WARN, "%s(): TX encap failure...\n", - __FUNCTION__); + rc = bxe_tx_encap(fp, &next); + if (__predict_false(rc != 0)) { fp->tx_encap_failures++; /* Very Bad Frames(tm) may have been dropped. */ if (next != NULL) { @@ -9518,12 +9542,11 @@ bxe_tx_mq_start_locked(struct ifnet *ifp, * the frame. */ ifp->if_drv_flags |= IFF_DRV_OACTIVE; - DBPRINT(sc, BXE_EXTREME_SEND, - "%s(): Save mbuf for another time...\n", - __FUNCTION__); + fp->tx_frame_deferred++; + + /* This may reorder frame. */ rc = drbr_enqueue(ifp, fp->br, next); - DBRUN(fp->tx_mbuf_alloc--); - sc->eth_stats.driver_xoff++; + fp->tx_mbuf_alloc--; } /* Stop looking for more work. */ @@ -9536,27 +9559,27 @@ bxe_tx_mq_start_locked(struct ifnet *ifp, /* Send a copy of the frame to any BPF listeners. */ BPF_MTAP(ifp, next); - DBPRINT(sc, BXE_EXTREME_SEND, - "%s(): Check for queued mbufs...\n", - __FUNCTION__); + /* Handle any completions if we're running low. */ + if (fp->tx_bd_used >= BXE_TX_CLEANUP_THRESHOLD) + bxe_txeof(fp); + + /* Close TX since there's so little room left. */ + if (fp->tx_bd_used >= BXE_TX_CLEANUP_THRESHOLD) { + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + break; + } + next = drbr_dequeue(ifp, fp->br); } - DBPRINT(sc, BXE_EXTREME_SEND, - "%s(): Enqueued %d mbufs...\n", - __FUNCTION__, tx_count); - /* No TX packets were dequeued. */ - if (tx_count > 0) { + if (tx_count > 0) /* Reset the TX watchdog timeout timer. */ fp->watchdog_timer = BXE_TX_TIMEOUT; - } else { - fp->tx_start_called_on_empty_queue++; - } bxe_tx_mq_start_locked_exit: DBEXIT(BXE_EXTREME_SEND); - return(rc); + return (rc); } @@ -9575,10 +9598,11 @@ bxe_mq_flush(struct ifnet *ifp) for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; - DBPRINT(sc, BXE_VERBOSE_UNLOAD, "%s(): Clearing fp[%02d]...\n", - __FUNCTION__, fp->index); - if (fp->br != NULL) { + DBPRINT(sc, BXE_VERBOSE_UNLOAD, + "%s(): Clearing fp[%02d]...\n", + __FUNCTION__, fp->index); + BXE_FP_LOCK(fp); while ((m = buf_ring_dequeue_sc(fp->br)) != NULL) m_freem(m); @@ -9607,7 +9631,7 @@ bxe_ioctl(struct ifnet *ifp, u_long command, caddr_t data) int error, mask, reinit; sc = ifp->if_softc; - DBENTER(BXE_EXTREME_MISC); + DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_MISC); ifr = (struct ifreq *)data; error = 0; @@ -9616,72 +9640,65 @@ bxe_ioctl(struct ifnet *ifp, u_long command, caddr_t data) switch (command) { case SIOCSIFMTU: /* Set the MTU. */ - DBPRINT(sc, BXE_EXTREME_MISC, "%s(): Received SIOCSIFMTU\n", + DBPRINT(sc, BXE_VERBOSE_MISC, "%s(): Received SIOCSIFMTU\n", __FUNCTION__); /* Check that the MTU setting is supported. */ if ((ifr->ifr_mtu < BXE_MIN_MTU) || - (ifr->ifr_mtu > BXE_JUMBO_MTU)) { - DBPRINT(sc, BXE_WARN, "%s(): Unsupported MTU " - "(%d < %d < %d)!\n", __FUNCTION__, BXE_MIN_MTU, - ifr->ifr_mtu, BXE_JUMBO_MTU); + (ifr->ifr_mtu > BXE_JUMBO_MTU)) { error = EINVAL; break; } BXE_CORE_LOCK(sc); ifp->if_mtu = ifr->ifr_mtu; - bxe_change_mtu(sc, ifp->if_drv_flags & IFF_DRV_RUNNING); BXE_CORE_UNLOCK(sc); + + reinit = 1; break; case SIOCSIFFLAGS: /* Toggle the interface state up or down. */ - DBPRINT(sc, BXE_EXTREME_MISC, "%s(): Received SIOCSIFFLAGS\n", + DBPRINT(sc, BXE_VERBOSE_MISC, "%s(): Received SIOCSIFFLAGS\n", __FUNCTION__); BXE_CORE_LOCK(sc); - /* Check if the interface is up. */ if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - /* - * Change the promiscuous/multicast flags as - * necessary. - */ + /* Set promiscuous/multicast flags. */ bxe_set_rx_mode(sc); } else { /* Start the HW */ bxe_init_locked(sc, LOAD_NORMAL); } } else { - /* - * The interface is down. Check if the driver is - * running. - */ + /* Bring down the interface. */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) bxe_stop_locked(sc, UNLOAD_NORMAL); } BXE_CORE_UNLOCK(sc); + break; case SIOCADDMULTI: case SIOCDELMULTI: /* Add/Delete multicast addresses. */ - DBPRINT(sc, BXE_EXTREME_MISC, + DBPRINT(sc, BXE_VERBOSE_MISC, "%s(): Received SIOCADDMULTI/SIOCDELMULTI\n", __FUNCTION__); BXE_CORE_LOCK(sc); - - /* Don't bother unless the driver's running. */ + /* Check if the interface is up. */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) + /* Set receive mode flags. */ bxe_set_rx_mode(sc); - BXE_CORE_UNLOCK(sc); + break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: /* Set/Get Interface media */ - DBPRINT(sc, BXE_EXTREME_MISC, + DBPRINT(sc, BXE_VERBOSE_MISC, "%s(): Received SIOCSIFMEDIA/SIOCGIFMEDIA\n", __FUNCTION__); + error = ifmedia_ioctl(ifp, ifr, &sc->bxe_ifmedia, command); break; case SIOCSIFCAP: @@ -9697,13 +9714,13 @@ bxe_ioctl(struct ifnet *ifp, u_long command, caddr_t data) /* Toggle the LRO capabilites enable flag. */ if (mask & IFCAP_LRO) { - if (TPA_ENABLED(sc)) { - ifp->if_capenable ^= IFCAP_LRO; - sc->bxe_flags ^= BXE_TPA_ENABLE_FLAG; - DBPRINT(sc, BXE_INFO_MISC, - "%s(): Toggling LRO (bxe_flags = " - "0x%08X).\n", __FUNCTION__, sc->bxe_flags); - } + ifp->if_capenable ^= IFCAP_LRO; + sc->bxe_flags ^= BXE_TPA_ENABLE_FLAG; + DBPRINT(sc, BXE_INFO_MISC, + "%s(): Toggling LRO (bxe_flags = " + "0x%08X).\n", __FUNCTION__, sc->bxe_flags); + + /* LRO requires different buffer setup. */ reinit = 1; } @@ -9735,6 +9752,7 @@ bxe_ioctl(struct ifnet *ifp, u_long command, caddr_t data) /* Toggle VLAN_MTU capabilities enable flag. */ if (mask & IFCAP_VLAN_MTU) { + /* ToDo: Is this really true? */ BXE_PRINTF("%s(%d): Changing VLAN_MTU not supported.\n", __FILE__, __LINE__); error = EINVAL; @@ -9742,6 +9760,7 @@ bxe_ioctl(struct ifnet *ifp, u_long command, caddr_t data) /* Toggle VLANHWTAG capabilities enabled flag. */ if (mask & IFCAP_VLAN_HWTAGGING) { + /* ToDo: Is this really true? */ BXE_PRINTF( "%s(%d): Changing VLAN_HWTAGGING not supported!\n", __FILE__, __LINE__); @@ -9758,27 +9777,22 @@ bxe_ioctl(struct ifnet *ifp, u_long command, caddr_t data) /* Toggle TSO6 capabilities enabled flag. */ if (mask & IFCAP_TSO6) { - DBPRINT(sc, BXE_VERBOSE_MISC, - "%s(): Toggling IFCAP_TSO6.\n", __FUNCTION__); - - ifp->if_capenable ^= IFCAP_TSO6; - } - - /* Handle any other capabilities. */ - if (mask & ~(IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | - IFCAP_RXCSUM | IFCAP_TXCSUM)) { - BXE_PRINTF("%s(%d): Unsupported capability!\n", + /* ToDo: Add TSO6 support. */ + BXE_PRINTF( + "%s(%d): Changing TSO6 not supported!\n", __FILE__, __LINE__); - error = EINVAL; - } - - /* Restart the controller with the new capabilities. */ - if (reinit) { - bxe_stop_locked(sc, UNLOAD_NORMAL); - bxe_init_locked(sc, LOAD_NORMAL); } - BXE_CORE_UNLOCK(sc); + + /* + * ToDo: Look into supporting: + * VLAN_HWFILTER + * VLAN_HWCSUM + * VLAN_HWTSO + * POLLING + * WOL[_UCAST|_MCAST|_MAGIC] + * + */ break; default: /* We don't know how to handle the IOCTL, pass it on. */ @@ -9786,7 +9800,15 @@ bxe_ioctl(struct ifnet *ifp, u_long command, caddr_t data) break; } - DBEXIT(BXE_EXTREME_MISC); + /* Restart the controller with the new capabilities. */ + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && (reinit != 0)) { + BXE_CORE_LOCK(sc); + bxe_stop_locked(sc, UNLOAD_NORMAL); + bxe_init_locked(sc, LOAD_NORMAL); + BXE_CORE_UNLOCK(sc); + } + + DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_MISC); return (error); } @@ -9798,7 +9820,7 @@ bxe_ioctl(struct ifnet *ifp, u_long command, caddr_t data) * caller. * * Returns: - * The adjusted value of *fp->rx_cons_sb. + * The adjusted value of *fp->rx_cons_sb. */ static __inline uint16_t bxe_rx_cq_cons(struct bxe_fastpath *fp) @@ -9816,7 +9838,7 @@ bxe_rx_cq_cons(struct bxe_fastpath *fp) * need to adjust the value accordingly. */ if ((rx_cq_cons_sb & USABLE_RCQ_ENTRIES_PER_PAGE) == - USABLE_RCQ_ENTRIES_PER_PAGE) + USABLE_RCQ_ENTRIES_PER_PAGE) rx_cq_cons_sb++; return (rx_cq_cons_sb); @@ -9827,7 +9849,7 @@ bxe_has_tx_work(struct bxe_fastpath *fp) { rmb(); - return (((fp->tx_pkt_prod != le16toh(*fp->tx_cons_sb)) || \ + return (((fp->tx_pkt_prod != le16toh(*fp->tx_pkt_cons_sb)) || \ (fp->tx_pkt_prod != fp->tx_pkt_cons))); } @@ -9836,8 +9858,8 @@ bxe_has_tx_work(struct bxe_fastpath *fp) * completion queue. * * Returns: - * 0 = No received frames pending, !0 = Received frames - * pending + * 0 = No received frames pending, !0 = Received frames + * pending */ static __inline int bxe_has_rx_work(struct bxe_fastpath *fp) @@ -9860,7 +9882,6 @@ bxe_task_sp(void *xsc, int pending) uint32_t sp_status; sc = xsc; - DBENTER(BXE_EXTREME_INTR); DBPRINT(sc, BXE_EXTREME_INTR, "%s(): pending = %d.\n", __FUNCTION__, pending); @@ -9897,8 +9918,6 @@ bxe_task_sp(void *xsc, int pending) IGU_INT_NOP, 1); bxe_ack_sb(sc, DEF_SB_ID, TSTORM_ID, le16toh(sc->def_t_idx), IGU_INT_ENABLE, 1); - - DBEXIT(BXE_EXTREME_INTR); } @@ -9931,9 +9950,6 @@ bxe_intr_legacy(void *xsc) if (fp_status == 0) goto bxe_intr_legacy_exit; - /* Need to weed out calls due to shared interrupts. */ - DBENTER(BXE_EXTREME_INTR); - /* Handle the fastpath interrupt. */ /* * sb_id = 0 for ustorm, 1 for cstorm. @@ -9945,9 +9961,8 @@ bxe_intr_legacy(void *xsc) */ mask = (0x2 << fp->sb_id); - DBPRINT(sc, BXE_EXTREME_INTR, - "%s(): fp_status = 0x%08X, mask = 0x%08X\n", __FUNCTION__, - fp_status, mask); + DBPRINT(sc, BXE_INSANE_INTR, "%s(): fp_status = 0x%08X, mask = " + "0x%08X\n", __FUNCTION__, fp_status, mask); /* CSTORM event means fastpath completion. */ if (fp_status & mask) { @@ -10004,7 +10019,9 @@ bxe_intr_sp(void *xsc) struct bxe_softc *sc; sc = xsc; - DBENTER(BXE_EXTREME_INTR); + + DBPRINT(sc, BXE_INSANE_INTR, "%s(%d): Slowpath interrupt.\n", + __FUNCTION__, curcpu); /* Don't handle any interrupts if we're not ready. */ if (__predict_false(sc->intr_sem != 0)) @@ -10021,7 +10038,7 @@ bxe_intr_sp(void *xsc) #endif bxe_intr_sp_exit: - DBEXIT(BXE_EXTREME_INTR); + return; } /* @@ -10041,10 +10058,8 @@ bxe_intr_fp (void *xfp) fp = xfp; sc = fp->sc; - DBENTER(BXE_EXTREME_INTR); - - DBPRINT(sc, BXE_VERBOSE_INTR, - "%s(%d): MSI-X vector on fp[%d].sb_id = %d\n", + DBPRINT(sc, BXE_INSANE_INTR, + "%s(%d): fp[%02d].sb_id = %d interrupt.\n", __FUNCTION__, curcpu, fp->index, fp->sb_id); /* Don't handle any interrupts if we're not ready. */ @@ -10060,7 +10075,7 @@ bxe_intr_fp (void *xfp) #endif bxe_intr_fp_exit: - DBEXIT(BXE_EXTREME_INTR); + return; } /* @@ -10080,12 +10095,7 @@ bxe_task_fp (void *xfp, int pending) fp = xfp; sc = fp->sc; - DBENTER(BXE_EXTREME_INTR); - - DBPRINT(sc, BXE_EXTREME_INTR, "%s(): pending = %d.\n", __FUNCTION__, - pending); - - DBPRINT(sc, BXE_EXTREME_INTR, "%s(%d): Fastpath task on fp[%d]" + DBPRINT(sc, BXE_EXTREME_INTR, "%s(%d): Fastpath task on fp[%02d]" ".sb_id = %d\n", __FUNCTION__, curcpu, fp->index, fp->sb_id); /* Update the fast path indices */ @@ -10105,8 +10115,6 @@ bxe_task_fp (void *xfp, int pending) /* Acknowledge the fastpath status block indices. */ bxe_ack_sb(sc, fp->sb_id, USTORM_ID, fp->fp_u_idx, IGU_INT_NOP, 1); bxe_ack_sb(sc, fp->sb_id, CSTORM_ID, fp->fp_c_idx, IGU_INT_ENABLE, 1); - - DBEXIT(BXE_EXTREME_INTR); } /* @@ -10120,12 +10128,8 @@ bxe_zero_sb(struct bxe_softc *sc, int sb_id) { int port; - port = BP_PORT(sc); - DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_INTR); - DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET), - "%s(): Clearing sb_id = %d on port %d.\n", __FUNCTION__, sb_id, - port); + port = BP_PORT(sc); /* "CSTORM" */ bxe_init_fill(sc, CSEM_REG_FAST_MEMORY + @@ -10151,13 +10155,14 @@ bxe_init_sb(struct bxe_softc *sc, struct host_status_block *sb, uint64_t section; int func, index, port; + DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_INTR); + port = BP_PORT(sc); func = BP_FUNC(sc); - DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_INTR); DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_INTR), - "%s(): Initializing sb_id = %d on port %d, function %d.\n", - __FUNCTION__, sb_id, port, func); + "%s(): Initializing sb_id = %d on port %d, function %d.\n", + __FUNCTION__, sb_id, port, func); /* Setup the USTORM status block. */ section = ((uint64_t)mapping) + offsetof(struct host_status_block, @@ -10418,300 +10423,580 @@ bxe_update_coalesce(struct bxe_softc *sc) } /* - * Free memory buffers from the TPA pool. + * Allocate an mbuf and assign it to the TPA pool. * * Returns: - * None + * 0 = Success, !0 = Failure + * + * Modifies: + * fp->tpa_mbuf_ptr[queue] + * fp->tpa_mbuf_map[queue] + * fp->tpa_mbuf_segs[queue] */ -static __inline void -bxe_free_tpa_pool(struct bxe_fastpath *fp, int last) +static int +bxe_alloc_tpa_mbuf(struct bxe_fastpath *fp, int queue) { struct bxe_softc *sc; - int j; + bus_dma_segment_t segs[1]; + bus_dmamap_t map; + struct mbuf *m; + int nsegs, rc; sc = fp->sc; + DBENTER(BXE_INSANE_TPA); + rc = 0; + + DBRUNIF((fp->disable_tpa == TRUE), + BXE_PRINTF("%s(): fp[%02d] TPA disabled!\n", + __FUNCTION__, fp->index)); + #ifdef BXE_DEBUG - int tpa_pool_max; + /* Simulate an mbuf allocation failure. */ + if (DB_RANDOMTRUE(bxe_debug_mbuf_allocation_failure)) { + sc->debug_sim_mbuf_alloc_failed++; + fp->mbuf_tpa_alloc_failed++; + rc = ENOMEM; + goto bxe_alloc_tpa_mbuf_exit; + } +#endif - tpa_pool_max = CHIP_IS_E1H(sc) ? ETH_MAX_AGGREGATION_QUEUES_E1H : - ETH_MAX_AGGREGATION_QUEUES_E1; - DBRUNIF((last > tpa_pool_max), DBPRINT(sc, BXE_FATAL, - "%s(): Index value out of range (%d > %d)!\n", __FUNCTION__, last, - tpa_pool_max)); + /* Allocate the new TPA mbuf. */ + m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, sc->mbuf_alloc_size); + if (__predict_false(m == NULL)) { + fp->mbuf_tpa_alloc_failed++; + rc = ENOBUFS; + goto bxe_alloc_tpa_mbuf_exit; + } + + DBRUN(fp->tpa_mbuf_alloc++); + + /* Initialize the mbuf buffer length. */ + m->m_pkthdr.len = m->m_len = sc->mbuf_alloc_size; + +#ifdef BXE_DEBUG + /* Simulate an mbuf mapping failure. */ + if (DB_RANDOMTRUE(bxe_debug_dma_map_addr_failure)) { + sc->debug_sim_mbuf_map_failed++; + fp->mbuf_tpa_mapping_failed++; + m_freem(m); + DBRUN(fp->tpa_mbuf_alloc--); + rc = ENOMEM; + goto bxe_alloc_tpa_mbuf_exit; + } #endif - if (!(TPA_ENABLED(sc))) - return; + /* Map the TPA mbuf into non-paged pool. */ + rc = bus_dmamap_load_mbuf_sg(fp->rx_mbuf_tag, + fp->tpa_mbuf_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT); + if (__predict_false(rc != 0)) { + fp->mbuf_tpa_mapping_failed++; + m_free(m); + DBRUN(fp->tpa_mbuf_alloc--); + goto bxe_alloc_tpa_mbuf_exit; + } - for (j = 0; j < last; j++) { - if (fp->rx_mbuf_tag) { - if (fp->tpa_mbuf_map[j] != NULL) { - bus_dmamap_sync(fp->rx_mbuf_tag, - fp->tpa_mbuf_map[j], BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(fp->rx_mbuf_tag, - fp->tpa_mbuf_map[j]); - } + /* All mubfs must map to a single segment. */ + KASSERT(nsegs == 1, ("%s(): Too many segments (%d) returned!", + __FUNCTION__, nsegs)); - if (fp->tpa_mbuf_ptr[j] != NULL) { - m_freem(fp->tpa_mbuf_ptr[j]); - DBRUN(fp->tpa_mbuf_alloc--); - fp->tpa_mbuf_ptr[j] = NULL; - } else { - DBPRINT(sc, BXE_FATAL, - "%s(): TPA bin %d empty on free!\n", - __FUNCTION__, j); - } - } + /* Release any existing TPA mbuf mapping. */ + if (fp->tpa_mbuf_map[queue] != NULL) { + bus_dmamap_sync(fp->rx_mbuf_tag, + fp->tpa_mbuf_map[queue], BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(fp->rx_mbuf_tag, + fp->tpa_mbuf_map[queue]); } + + /* Save the mbuf and mapping info for the TPA mbuf. */ + map = fp->tpa_mbuf_map[queue]; + fp->tpa_mbuf_map[queue] = fp->tpa_mbuf_spare_map; + fp->tpa_mbuf_spare_map = map; + bus_dmamap_sync(fp->rx_mbuf_tag, + fp->tpa_mbuf_map[queue], BUS_DMASYNC_PREREAD); + fp->tpa_mbuf_ptr[queue] = m; + fp->tpa_mbuf_segs[queue] = segs[0]; + +bxe_alloc_tpa_mbuf_exit: + DBEXIT(BXE_INSANE_TPA); + return (rc); } /* - * Free an entry in the receive scatter gather list. + * Allocate mbufs for a fastpath TPA pool. * * Returns: - * None + * 0 = Success, !0 = Failure. + * + * Modifies: + * fp->tpa_state[] + * fp->disable_tpa */ -static __inline void -bxe_free_rx_sge(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t index) +static int +bxe_fill_tpa_pool(struct bxe_fastpath *fp) { - struct eth_rx_sge *sge; + struct bxe_softc *sc; + int max_agg_queues, queue, rc; - sge = &fp->rx_sge_chain[RX_SGE_PAGE(index)][RX_SGE_IDX(index)]; - /* Skip "next page" elements */ - if (!sge) - return; + sc = fp->sc; + DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); + rc = 0; - if (fp->rx_sge_buf_tag) { - if (fp->rx_sge_buf_map[index]) { - bus_dmamap_sync(fp->rx_sge_buf_tag, - fp->rx_sge_buf_map[index], BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(fp->rx_sge_buf_tag, - fp->rx_sge_buf_map[index]); - } + if (!TPA_ENABLED(sc)) { + fp->disable_tpa = TRUE; + goto bxe_fill_tpa_pool_exit; + } - if (fp->rx_sge_buf_ptr[index]) { - DBRUN(fp->sge_mbuf_alloc--); - m_freem(fp->rx_sge_buf_ptr[index]); - fp->rx_sge_buf_ptr[index] = NULL; - } + max_agg_queues = CHIP_IS_E1(sc) ? ETH_MAX_AGGREGATION_QUEUES_E1 : + ETH_MAX_AGGREGATION_QUEUES_E1H; + + /* Assume the fill operation worked. */ + fp->disable_tpa = FALSE; - sge->addr_hi = sge->addr_lo = 0; + /* Fill the TPA pool. */ + for (queue = 0; queue < max_agg_queues; queue++) { + rc = bxe_alloc_tpa_mbuf(fp, queue); + if (rc != 0) { + BXE_PRINTF( + "%s(%d): fp[%02d] TPA disabled!\n", + __FILE__, __LINE__, fp->index); + fp->disable_tpa = TRUE; + break; + } + fp->tpa_state[queue] = BXE_TPA_STATE_STOP; } + +bxe_fill_tpa_pool_exit: + DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); + return (rc); } /* - * Free a range of scatter gather elements from the ring. + * Free all mbufs from a fastpath TPA pool. * * Returns: * None + * + * Modifies: + * fp->tpa_mbuf_ptr[] + * fp->tpa_mbuf_map[] + * fp->tpa_mbuf_alloc */ -static __inline void -bxe_free_rx_sge_range(struct bxe_softc *sc, struct bxe_fastpath *fp, int last) +static void +bxe_free_tpa_pool(struct bxe_fastpath *fp) { - int i; + struct bxe_softc *sc; + int i, max_agg_queues; + + sc = fp->sc; + DBENTER(BXE_INSANE_LOAD | BXE_INSANE_UNLOAD | BXE_INSANE_TPA); - for (i = 0; i < last; i++) - bxe_free_rx_sge(sc, fp, i); + if (fp->rx_mbuf_tag == NULL) + goto bxe_free_tpa_pool_exit; + + max_agg_queues = CHIP_IS_E1H(sc) ? + ETH_MAX_AGGREGATION_QUEUES_E1H : + ETH_MAX_AGGREGATION_QUEUES_E1; + + /* Release all mbufs and and all DMA maps in the TPA pool. */ + for (i = 0; i < max_agg_queues; i++) { + if (fp->tpa_mbuf_map[i] != NULL) { + bus_dmamap_sync(fp->rx_mbuf_tag, fp->tpa_mbuf_map[i], + BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(fp->rx_mbuf_tag, fp->tpa_mbuf_map[i]); + } + + if (fp->tpa_mbuf_ptr[i] != NULL) { + m_freem(fp->tpa_mbuf_ptr[i]); + DBRUN(fp->tpa_mbuf_alloc--); + fp->tpa_mbuf_ptr[i] = NULL; + } + } + +bxe_free_tpa_pool_exit: + DBEXIT(BXE_INSANE_LOAD | BXE_INSANE_UNLOAD | BXE_INSANE_TPA); } /* - * Allocate an mbuf of the specified size for the caller. + * Allocate an mbuf and assign it to the receive scatter gather chain. + * The caller must take care to save a copy of the existing mbuf in the + * SG mbuf chain. * * Returns: - * NULL on failure or an mbuf pointer on success. + * 0 = Success, !0= Failure. + * + * Modifies: + * fp->sg_chain[index] + * fp->rx_sge_buf_ptr[index] + * fp->rx_sge_buf_map[index] + * fp->rx_sge_spare_map */ -static struct mbuf* -bxe_alloc_mbuf(struct bxe_fastpath *fp, int size) +static int +bxe_alloc_rx_sge_mbuf(struct bxe_fastpath *fp, uint16_t index) { struct bxe_softc *sc; - struct mbuf *m_new; + struct eth_rx_sge *sge; + bus_dma_segment_t segs[1]; + bus_dmamap_t map; + struct mbuf *m; + int nsegs, rc; sc = fp->sc; - DBENTER(BXE_INSANE); + DBENTER(BXE_INSANE_TPA); + rc = 0; #ifdef BXE_DEBUG /* Simulate an mbuf allocation failure. */ if (DB_RANDOMTRUE(bxe_debug_mbuf_allocation_failure)) { - DBPRINT(sc, BXE_WARN, - "%s(): Simulated mbuf allocation failure!\n", __FUNCTION__); - fp->mbuf_alloc_failed++; - sc->debug_mbuf_sim_alloc_failed++; - m_new = NULL; - goto bxe_alloc_mbuf_exit; + sc->debug_sim_mbuf_alloc_failed++; + fp->mbuf_sge_alloc_failed++; + rc = ENOMEM; + goto bxe_alloc_rx_sge_mbuf_exit; } #endif - /* Allocate a new mbuf with memory attached. */ - if (size <= MCLBYTES) - m_new = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); - else - m_new = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, size); - - /* Check whether the allocation succeeded and handle a failure. */ - if (__predict_false(m_new == NULL)) { - DBPRINT(sc, BXE_WARN, "%s(): Failed to allocate %d byte " - "mbuf on fp[%02d]!\n", __FUNCTION__, size, fp->index); - fp->mbuf_alloc_failed++; - goto bxe_alloc_mbuf_exit; + /* Allocate a new SGE mbuf. */ + m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, SGE_PAGE_SIZE); + if (__predict_false(m == NULL)) { + fp->mbuf_sge_alloc_failed++; + rc = ENOMEM; + goto bxe_alloc_rx_sge_mbuf_exit; } - /* Do a little extra error checking when debugging. */ - DBRUN(M_ASSERTPKTHDR(m_new)); + DBRUN(fp->sge_mbuf_alloc++); /* Initialize the mbuf buffer length. */ - m_new->m_pkthdr.len = m_new->m_len = size; - DBRUN(sc->debug_memory_allocated += size); + m->m_pkthdr.len = m->m_len = SGE_PAGE_SIZE; + +#ifdef BXE_DEBUG + /* Simulate an mbuf mapping failure. */ + if (DB_RANDOMTRUE(bxe_debug_dma_map_addr_failure)) { + sc->debug_sim_mbuf_map_failed++; + fp->mbuf_sge_mapping_failed++; + m_freem(m); + DBRUN(fp->sge_mbuf_alloc--); + rc = ENOMEM; + goto bxe_alloc_rx_sge_mbuf_exit; + } +#endif + + /* Map the SGE mbuf into non-paged pool. */ + rc = bus_dmamap_load_mbuf_sg(fp->rx_sge_buf_tag, + fp->rx_sge_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT); + if (__predict_false(rc != 0)) { + fp->mbuf_sge_mapping_failed++; + m_freem(m); + DBRUN(fp->sge_mbuf_alloc--); + goto bxe_alloc_rx_sge_mbuf_exit; + } -bxe_alloc_mbuf_exit: - return (m_new); + /* All mubfs must map to a single segment. */ + KASSERT(nsegs == 1, ("%s(): Too many segments (%d) returned!", + __FUNCTION__, nsegs)); + + /* Unload any existing SGE mbuf mapping. */ + if (fp->rx_sge_buf_map[index] != NULL) { + bus_dmamap_sync(fp->rx_sge_buf_tag, + fp->rx_sge_buf_map[index], BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(fp->rx_sge_buf_tag, + fp->rx_sge_buf_map[index]); + } + + /* Add the new SGE mbuf to the SGE ring. */ + map = fp->rx_sge_buf_map[index]; + fp->rx_sge_buf_map[index] = fp->rx_sge_spare_map; + fp->rx_sge_spare_map = map; + bus_dmamap_sync(fp->rx_sge_buf_tag, + fp->rx_sge_buf_map[index], BUS_DMASYNC_PREREAD); + fp->rx_sge_buf_ptr[index] = m; + sge = &fp->sg_chain[index]; + sge->addr_hi = htole32(U64_HI(segs[0].ds_addr)); + sge->addr_lo = htole32(U64_LO(segs[0].ds_addr)); + +bxe_alloc_rx_sge_mbuf_exit: + DBEXIT(BXE_INSANE_TPA); + return (rc); } /* - * Map an mbuf into non-paged memory for the caller. + * Allocate mbufs for a SGE chain. * * Returns: * 0 = Success, !0 = Failure. * - * Side-effects: - * The mbuf passed will be released if a mapping failure occurs. - * The segment mapping will be udpated if the mapping is successful. + * Modifies: + * fp->disable_tpa + * fp->rx_sge_prod */ static int -bxe_map_mbuf(struct bxe_fastpath *fp, struct mbuf *m, bus_dma_tag_t tag, - bus_dmamap_t map, bus_dma_segment_t *seg) +bxe_fill_sg_chain(struct bxe_fastpath *fp) { struct bxe_softc *sc; - bus_dma_segment_t segs[4]; - int nsegs, rc; + uint16_t index; + int i, rc; + sc = fp->sc; + DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); rc = 0; - DBENTER(BXE_INSANE); - -#ifdef BXE_DEBUG - /* Simulate an mbuf mapping failure. */ - if (DB_RANDOMTRUE(bxe_debug_dma_map_addr_failure)) { - DBPRINT(sc, BXE_WARN, "%s(): Simulated mbuf mapping failure!\n", - __FUNCTION__); - sc->debug_mbuf_sim_map_failed++; - fp->mbuf_alloc_failed++; - sc->debug_memory_allocated -= m->m_len; - m_freem(m); - rc = EINVAL; - goto bxe_map_mbuf_exit; + if (!TPA_ENABLED(sc)) { + fp->disable_tpa = TRUE; + goto bxe_fill_sg_chain_exit; } -#endif - /* Map the buffer memory into non-paged memory. */ - rc = bus_dmamap_load_mbuf_sg(tag, map, m, segs, &nsegs, BUS_DMA_NOWAIT); + /* Assume the fill operation works. */ + fp->disable_tpa = FALSE; - /* Handle any mapping errors. */ - if (__predict_false(rc)) { - DBPRINT(sc, BXE_WARN, "%s(): mbuf mapping failure (%d) on " - "fp[%02d]!\n", __FUNCTION__, rc, fp->index); - fp->mbuf_alloc_failed++; - DBRUN(sc->debug_memory_allocated -= m->m_len); - m_freem(m); - goto bxe_map_mbuf_exit; + /* Fill the RX SGE chain. */ + index = 0; + for (i = 0; i < USABLE_RX_SGE; i++) { + rc = bxe_alloc_rx_sge_mbuf(fp, index); + if (rc != 0) { + BXE_PRINTF( + "%s(%d): fp[%02d] SGE memory allocation failure!\n", + __FILE__, __LINE__, fp->index); + index = 0; + fp->disable_tpa = TRUE; + break; + } + index = NEXT_SGE_IDX(index); } - /* All mubfs must map to a single segment. */ - KASSERT(nsegs == 1, ("%s(): Too many segments (%d) returned!", - __FUNCTION__, nsegs)); - - /* Save the DMA mapping tag for this memory buffer. */ - *seg = segs[0]; + /* Update the driver's copy of the RX SGE producer index. */ + fp->rx_sge_prod = index; -bxe_map_mbuf_exit: - DBEXIT(BXE_INSANE); +bxe_fill_sg_chain_exit: + DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); return (rc); } /* - * Allocate an mbuf for the TPA pool. + * Free all elements from the receive scatter gather chain. * * Returns: - * NULL on failure or an mbuf pointer on success. + * None + * + * Modifies: + * fp->rx_sge_buf_ptr[] + * fp->rx_sge_buf_map[] + * fp->sge_mbuf_alloc */ -static struct mbuf * -bxe_alloc_tpa_mbuf(struct bxe_fastpath *fp, int index, int size) +static void +bxe_free_sg_chain(struct bxe_fastpath *fp) { - bus_dma_segment_t seg; - struct mbuf *m; - int rc; - - /* Allocate the new mbuf. */ - if ((m = bxe_alloc_mbuf(fp, size)) == NULL) - goto bxe_alloc_tpa_mbuf_exit; + struct bxe_softc *sc; + int i; - /* Map the mbuf into non-paged pool. */ - rc = bxe_map_mbuf(fp, m, fp->rx_mbuf_tag, fp->tpa_mbuf_map[index], - &seg); + sc = fp->sc; + DBENTER(BXE_INSANE_TPA); - if (rc) { - m = NULL; - goto bxe_alloc_tpa_mbuf_exit; - } + if (fp->rx_sge_buf_tag == NULL) + goto bxe_free_sg_chain_exit; - DBRUN(fp->tpa_mbuf_alloc++); + /* Free all mbufs and unload all maps. */ + for (i = 0; i < TOTAL_RX_SGE; i++) { + /* Free the map and the mbuf if they're allocated. */ + if (fp->rx_sge_buf_map[i] != NULL) { + bus_dmamap_sync(fp->rx_sge_buf_tag, + fp->rx_sge_buf_map[i], BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(fp->rx_sge_buf_tag, + fp->rx_sge_buf_map[i]); + } - /* Save the mapping info for the mbuf. */ - fp->tpa_mbuf_segs[index] = seg; + if (fp->rx_sge_buf_ptr[i] != NULL) { + m_freem(fp->rx_sge_buf_ptr[i]); + DBRUN(fp->sge_mbuf_alloc--); + fp->rx_sge_buf_ptr[i] = NULL; + } + } -bxe_alloc_tpa_mbuf_exit: - return (m); +bxe_free_sg_chain_exit: + DBEXIT(BXE_INSANE_TPA); } /* - * Allocate a receive scatter gather entry + * Allocate an mbuf, if necessary, and add it to the receive chain. * * Returns: - * 0 = Success, != Failure. + * 0 = Success, !0 = Failure. */ static int -bxe_alloc_rx_sge(struct bxe_softc *sc, struct bxe_fastpath *fp, - uint16_t ring_prod) +bxe_alloc_rx_bd_mbuf(struct bxe_fastpath *fp, uint16_t index) { - struct eth_rx_sge *sge; - bus_dma_segment_t seg; + struct bxe_softc *sc; + struct eth_rx_bd *rx_bd; + bus_dma_segment_t segs[1]; + bus_dmamap_t map; struct mbuf *m; - int rc; + int nsegs, rc; - sge = &fp->rx_sge_chain[RX_SGE_PAGE(ring_prod)][RX_SGE_IDX(ring_prod)]; + sc = fp->sc; + DBENTER(BXE_INSANE_LOAD | BXE_INSANE_RESET | BXE_INSANE_RECV); rc = 0; - /* Allocate a new mbuf. */ - if ((m = bxe_alloc_mbuf(fp, PAGE_SIZE)) == NULL) { +#ifdef BXE_DEBUG + /* Simulate an mbuf allocation failure. */ + if (DB_RANDOMTRUE(bxe_debug_mbuf_allocation_failure)) { + sc->debug_sim_mbuf_alloc_failed++; + fp->mbuf_rx_bd_alloc_failed++; rc = ENOMEM; - goto bxe_alloc_rx_sge_exit; + goto bxe_alloc_rx_bd_mbuf_exit; } +#endif - /* Map the mbuf into non-paged pool. */ - rc = bxe_map_mbuf(fp, m, fp->rx_sge_buf_tag, - fp->rx_sge_buf_map[ring_prod], &seg); + /* Allocate the new RX BD mbuf. */ + m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, sc->mbuf_alloc_size); + if (__predict_false(m == NULL)) { + fp->mbuf_rx_bd_alloc_failed++; + rc = ENOBUFS; + goto bxe_alloc_rx_bd_mbuf_exit; + } - if (rc) - goto bxe_alloc_rx_sge_exit; + DBRUN(fp->rx_mbuf_alloc++); - DBRUN(fp->sge_mbuf_alloc++); + /* Initialize the mbuf buffer length. */ + m->m_pkthdr.len = m->m_len = sc->mbuf_alloc_size; - /* Add the SGE buffer to the SGE ring. */ - sge->addr_hi = htole32(U64_HI(seg.ds_addr)); - sge->addr_lo = htole32(U64_LO(seg.ds_addr)); - fp->rx_sge_buf_ptr[ring_prod] = m; +#ifdef BXE_DEBUG + /* Simulate an mbuf mapping failure. */ + if (DB_RANDOMTRUE(bxe_debug_dma_map_addr_failure)) { + sc->debug_sim_mbuf_map_failed++; + fp->mbuf_rx_bd_mapping_failed++; + m_freem(m); + DBRUN(fp->rx_mbuf_alloc--); + rc = ENOMEM; + goto bxe_alloc_rx_bd_mbuf_exit; + } +#endif -bxe_alloc_rx_sge_exit: + /* Map the TPA mbuf into non-paged pool. */ + rc = bus_dmamap_load_mbuf_sg(fp->rx_mbuf_tag, + fp->rx_mbuf_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT); + if (__predict_false(rc != 0)) { + fp->mbuf_rx_bd_mapping_failed++; + m_freem(m); + DBRUN(fp->rx_mbuf_alloc--); + goto bxe_alloc_rx_bd_mbuf_exit; + } + + /* All mubfs must map to a single segment. */ + KASSERT(nsegs == 1, ("%s(): Too many segments (%d) returned!", + __FUNCTION__, nsegs)); + + /* Release any existing RX BD mbuf mapping. */ + if (fp->rx_mbuf_map[index] != NULL) { + bus_dmamap_sync(fp->rx_mbuf_tag, + fp->rx_mbuf_map[index], BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(fp->rx_mbuf_tag, + fp->rx_mbuf_map[index]); + } + + /* Save the mbuf and mapping info. */ + map = fp->rx_mbuf_map[index]; + fp->rx_mbuf_map[index] = fp->rx_mbuf_spare_map; + fp->rx_mbuf_spare_map = map; + bus_dmamap_sync(fp->rx_mbuf_tag, + fp->rx_mbuf_map[index], BUS_DMASYNC_PREREAD); + fp->rx_mbuf_ptr[index] = m; + rx_bd = &fp->rx_chain[index]; + rx_bd->addr_hi = htole32(U64_HI(segs[0].ds_addr)); + rx_bd->addr_lo = htole32(U64_LO(segs[0].ds_addr)); + +bxe_alloc_rx_bd_mbuf_exit: + DBEXIT(BXE_INSANE_LOAD | BXE_INSANE_RESET | BXE_INSANE_RECV); return (rc); } + +/* + * Allocate mbufs for a receive chain. + * + * Returns: + * 0 = Success, !0 = Failure. + * + * Modifies: + * fp->rx_bd_prod + */ +static int +bxe_fill_rx_bd_chain(struct bxe_fastpath *fp) +{ + struct bxe_softc *sc; + uint16_t index; + int i, rc; + + sc = fp->sc; + DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); + rc = index = 0; + + /* Allocate buffers for all the RX BDs in RX BD Chain. */ + for (i = 0; i < USABLE_RX_BD; i++) { + rc = bxe_alloc_rx_bd_mbuf(fp, index); + if (rc != 0) { + BXE_PRINTF( + "%s(%d): Memory allocation failure! Cannot fill fp[%02d] RX chain.\n", + __FILE__, __LINE__, fp->index); + index = 0; + break; + } + index = NEXT_RX_BD(index); + } + + fp->rx_bd_prod = index; + DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); + return (rc); +} + +/* + * Free all buffers from the receive chain. + * + * Returns: + * None + * + * Modifies: + * fp->rx_mbuf_ptr[] + * fp->rx_mbuf_map[] + * fp->rx_mbuf_alloc + */ +static void +bxe_free_rx_bd_chain(struct bxe_fastpath *fp) +{ + struct bxe_softc *sc; + int i; + + sc = fp->sc; + DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); + + if (fp->rx_mbuf_tag == NULL) + goto bxe_free_rx_bd_chain_exit; + + /* Free all mbufs and unload all maps. */ + for (i = 0; i < TOTAL_RX_BD; i++) { + if (fp->rx_mbuf_map[i] != NULL) { + bus_dmamap_sync(fp->rx_mbuf_tag, fp->rx_mbuf_map[i], + BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(fp->rx_mbuf_tag, fp->rx_mbuf_map[i]); + } + + if (fp->rx_mbuf_ptr[i] != NULL) { + m_freem(fp->rx_mbuf_ptr[i]); + DBRUN(fp->rx_mbuf_alloc--); + fp->rx_mbuf_ptr[i] = NULL; + } + } + +bxe_free_rx_bd_chain_exit: + DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); +} + /* + * Setup mutexes used by the driver. + * * Returns: * None. */ static void -bxe_alloc_mutexes(struct bxe_softc *sc) +bxe_mutexes_alloc(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i; @@ -10726,7 +11011,7 @@ bxe_alloc_mutexes(struct bxe_softc *sc) BXE_PRINT_LOCK_INIT(sc, "bxe_print_lock"); /* Allocate one mutex for each fastpath structure. */ - for (i=0; i < sc->num_queues; i++ ) { + for (i = 0; i < sc->num_queues; i++ ) { fp = &sc->fp[i]; /* Allocate per fastpath mutexes. */ @@ -10739,23 +11024,25 @@ bxe_alloc_mutexes(struct bxe_softc *sc) } /* + * Free mutexes used by the driver. + * * Returns: * None. */ static void -bxe_free_mutexes(struct bxe_softc *sc) +bxe_mutexes_free(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i; DBENTER(BXE_VERBOSE_UNLOAD); - for (i=0; i < sc->num_queues; i++ ) { + for (i = 0; i < sc->num_queues; i++ ) { fp = &sc->fp[i]; /* Release per fastpath mutexes. */ - if (mtx_initialized(&(fp->mtx))) - mtx_destroy(&(fp->mtx)); + if (mtx_initialized(&fp->mtx)) + mtx_destroy(&fp->mtx); } BXE_PRINT_LOCK_DESTROY(sc); @@ -10769,7 +11056,42 @@ bxe_free_mutexes(struct bxe_softc *sc) } +/* + * Free memory and clear the RX data structures. + * + * Returns: + * Nothing. + */ +static void +bxe_clear_rx_chains(struct bxe_softc *sc) +{ + struct bxe_fastpath *fp; + int i; + DBENTER(BXE_VERBOSE_RESET); + + for (i = 0; i < sc->num_queues; i++) { + fp = &sc->fp[i]; + + /* Free all RX buffers. */ + bxe_free_rx_bd_chain(fp); + bxe_free_tpa_pool(fp); + bxe_free_sg_chain(fp); + + /* Check if any mbufs lost in the process. */ + DBRUNIF((fp->tpa_mbuf_alloc), DBPRINT(sc, BXE_FATAL, + "%s(): Memory leak! Lost %d mbufs from fp[%02d] TPA pool!\n", + __FUNCTION__, fp->tpa_mbuf_alloc, fp->index)); + DBRUNIF((fp->sge_mbuf_alloc), DBPRINT(sc, BXE_FATAL, + "%s(): Memory leak! Lost %d mbufs from fp[%02d] SGE chain!\n", + __FUNCTION__, fp->sge_mbuf_alloc, fp->index)); + DBRUNIF((fp->rx_mbuf_alloc), DBPRINT(sc, BXE_FATAL, + "%s(): Memory leak! Lost %d mbufs from fp[%02d] RX chain!\n", + __FUNCTION__, fp->rx_mbuf_alloc, fp->index)); + } + + DBEXIT(BXE_VERBOSE_RESET); +} /* * Initialize the receive rings. @@ -10777,69 +11099,26 @@ bxe_free_mutexes(struct bxe_softc *sc) * Returns: * None. */ -static void +static int bxe_init_rx_chains(struct bxe_softc *sc) { struct bxe_fastpath *fp; - struct eth_rx_sge *sge; - struct eth_rx_bd *rx_bd; - struct eth_rx_cqe_next_page *nextpg; - uint16_t rx_bd_prod, rx_sge_prod; - int func, i, j, rcq_idx, rx_idx, rx_sge_idx, max_agg_queues; + int func, i, rc; DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); - + rc = 0; func = BP_FUNC(sc); - max_agg_queues = CHIP_IS_E1(sc) ? ETH_MAX_AGGREGATION_QUEUES_E1 : - ETH_MAX_AGGREGATION_QUEUES_E1H; - - sc->rx_buf_size = sc->mbuf_alloc_size; - - /* Allocate memory for the TPA pool. */ - if (TPA_ENABLED(sc)) { - DBPRINT(sc, (BXE_INFO_LOAD | BXE_INFO_RESET), - "%s(): mtu = %d, rx_buf_size = %d\n", __FUNCTION__, - (int)sc->bxe_ifp->if_mtu, sc->rx_buf_size); - - for (i = 0; i < sc->num_queues; i++) { - fp = &sc->fp[i]; - DBPRINT(sc, (BXE_INSANE_LOAD | BXE_INSANE_RESET), - "%s(): Initializing fp[%02d] TPA pool.\n", - __FUNCTION__, i); - - for (j = 0; j < max_agg_queues; j++) { - DBPRINT(sc, - (BXE_INSANE_LOAD | BXE_INSANE_RESET), - "%s(): Initializing fp[%02d] TPA " - "pool[%d].\n", __FUNCTION__, i, j); - - fp->disable_tpa = 0; - fp->tpa_mbuf_ptr[j] = bxe_alloc_tpa_mbuf(fp, j, - sc->mbuf_alloc_size); - - if (fp->tpa_mbuf_ptr[j] == NULL) { - fp->tpa_mbuf_alloc_failed++; - BXE_PRINTF("TPA disabled on " - "fp[%02d]!\n", i); - bxe_free_tpa_pool(fp, j); - fp->disable_tpa = 1; - break; - } - fp->tpa_state[j] = BXE_TPA_STATE_STOP; - } - } - } /* Allocate memory for RX and CQ chains. */ for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET), - "%s(): Initializing fp[%d] RX chain.\n", __FUNCTION__, i); + "%s(): Initializing fp[%02d] RX chain.\n", __FUNCTION__, i); fp->rx_bd_cons = fp->rx_bd_prod = 0; fp->rx_cq_cons = fp->rx_cq_prod = 0; - /* Status block's completion queue consumer index. */ + /* Pointer to status block's CQ consumer index. */ fp->rx_cq_cons_sb = &fp->status_block-> u_status_block.index_values[HC_INDEX_U_ETH_RX_CQ_CONS]; @@ -10847,138 +11126,30 @@ bxe_init_rx_chains(struct bxe_softc *sc) fp->rx_bd_cons_sb = &fp->status_block-> u_status_block.index_values[HC_INDEX_U_ETH_RX_BD_CONS]; - if (TPA_ENABLED(sc)) { - DBPRINT(sc, (BXE_INSANE_LOAD | BXE_INSANE_RESET), - "%s(): Linking fp[%d] SGE rings.\n", __FUNCTION__, - i); - - /* Link the SGE Ring Pages to form SGE chain */ - for (j = 0; j < NUM_RX_SGE_PAGES; j++) { - rx_sge_idx = ((j + 1) % NUM_RX_SGE_PAGES); - sge = &fp->rx_sge_chain[j][MAX_RX_SGE_CNT]; - - DBPRINT(sc, (BXE_EXTREME_LOAD | BXE_EXTREME_RESET), - "%s(): fp[%02d].rx_sge_chain[%02d][0x%04X]=0x%jX\n", - __FUNCTION__, i, j, - (uint16_t) MAX_RX_SGE_CNT, - (uintmax_t) fp->rx_sge_chain_paddr[rx_sge_idx]); - - sge->addr_hi = - htole32(U64_HI(fp->rx_sge_chain_paddr[rx_sge_idx])); - sge->addr_lo = - htole32(U64_LO(fp->rx_sge_chain_paddr[rx_sge_idx])); - } - - bxe_init_sge_ring_bit_mask(fp); - } - - DBPRINT(sc, (BXE_INSANE_LOAD | BXE_INSANE_RESET), - "%s(): Linking fp[%d] RX chain pages.\n", __FUNCTION__, i); - - /* Link the pages to form the RX BD Chain. */ - for (j = 0; j < NUM_RX_PAGES; j++) { - rx_idx = ((j + 1) % NUM_RX_PAGES); - rx_bd = &fp->rx_bd_chain[j][USABLE_RX_BD_PER_PAGE]; - - DBPRINT(sc, (BXE_EXTREME_LOAD), - "%s(): fp[%02d].rx_bd_chain[%02d][0x%04X]=0x%jX\n", - __FUNCTION__, i, j, - (uint16_t) USABLE_RX_BD_PER_PAGE, - (uintmax_t) fp->rx_bd_chain_paddr[rx_idx]); - - rx_bd->addr_hi = - htole32(U64_HI(fp->rx_bd_chain_paddr[rx_idx])); - rx_bd->addr_lo = - htole32(U64_LO(fp->rx_bd_chain_paddr[rx_idx])); - } - - DBPRINT(sc, (BXE_INSANE_LOAD | BXE_INSANE_RESET), - "%s(): Linking fp[%d] RX completion chain pages.\n", - __FUNCTION__, i); - - /* Link the pages to form the RX Completion Queue.*/ - for (j = 0; j < NUM_RCQ_PAGES; j++) { - rcq_idx = ((j + 1) % NUM_RCQ_PAGES); - nextpg = (struct eth_rx_cqe_next_page *) - &fp->rx_cq_chain[j][USABLE_RCQ_ENTRIES_PER_PAGE]; - - DBPRINT(sc, (BXE_EXTREME_LOAD), - "%s(): fp[%02d].rx_cq_chain[%02d][0x%04X]=0x%jX\n", - __FUNCTION__, i, j, - (uint16_t) USABLE_RCQ_ENTRIES_PER_PAGE, - (uintmax_t) fp->rx_cq_chain_paddr[rcq_idx]); - - nextpg->addr_hi = - htole32(U64_HI(fp->rx_cq_chain_paddr[rcq_idx])); - nextpg->addr_lo = - htole32(U64_LO(fp->rx_cq_chain_paddr[rcq_idx])); - } - - if (TPA_ENABLED(sc)) { - /* Allocate SGEs and initialize the ring elements. */ - rx_sge_prod = 0; - - while (rx_sge_prod < sc->rx_ring_size) { - if (bxe_alloc_rx_sge(sc, fp, rx_sge_prod) != 0) { - fp->tpa_mbuf_alloc_failed++; - BXE_PRINTF( - "%s(%d): Memory allocation failure! " - "Disabling TPA for fp[%02d].\n", - __FILE__, __LINE__, i); - - /* Cleanup already allocated elements */ - bxe_free_rx_sge_range(sc, fp, - rx_sge_prod); - fp->disable_tpa = 1; - rx_sge_prod = 0; - break; - } - rx_sge_prod = NEXT_SGE_IDX(rx_sge_prod); - } - - fp->rx_sge_prod = rx_sge_prod; - } - - /* - * Allocate buffers for all the RX BDs in RX BD Chain. - */ - rx_bd_prod = 0; - DBRUN(fp->free_rx_bd = sc->rx_ring_size); - - for (j = 0; j < sc->rx_ring_size; j++) { - if (bxe_get_buf(fp, NULL, rx_bd_prod)) { - BXE_PRINTF( - "%s(%d): Memory allocation failure! Cannot fill fp[%d] RX chain.\n", - __FILE__, __LINE__, i); - break; - } - rx_bd_prod = NEXT_RX_BD(rx_bd_prod); - } - - /* Update the driver's copy of the producer indices. */ - fp->rx_bd_prod = rx_bd_prod; fp->rx_cq_prod = TOTAL_RCQ_ENTRIES; - fp->rx_pkts = fp->rx_calls = 0; + fp->rx_pkts = fp->rx_tpa_pkts = fp->rx_soft_errors = 0; - DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET), - "%s(): USABLE_RX_BD=0x%04X, USABLE_RCQ_ENTRIES=0x%04X\n", - __FUNCTION__, (uint16_t) USABLE_RX_BD, - (uint16_t) USABLE_RCQ_ENTRIES); - DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET), - "%s(): fp[%02d]->rx_bd_prod=0x%04X, rx_cq_prod=0x%04X\n", - __FUNCTION__, i, fp->rx_bd_prod, fp->rx_cq_prod); + /* Allocate memory for the receive chain. */ + rc = bxe_fill_rx_bd_chain(fp); + if (rc != 0) + goto bxe_init_rx_chains_exit; + + /* Allocate memory for TPA pool. */ + rc = bxe_fill_tpa_pool(fp); + if (rc != 0) + goto bxe_init_rx_chains_exit; + /* Allocate memory for scatter-gather chain. */ + rc = bxe_fill_sg_chain(fp); + if (rc != 0) + goto bxe_init_rx_chains_exit; - /* Prepare the recevie BD and CQ buffers for DMA access. */ - for (j = 0; j < NUM_RX_PAGES; j++) - bus_dmamap_sync(fp->rx_bd_chain_tag, - fp->rx_bd_chain_map[j], BUS_DMASYNC_PREREAD | - BUS_DMASYNC_PREWRITE); + /* Prepare the receive BD and CQ buffers for DMA access. */ + bus_dmamap_sync(fp->rx_dma.tag, fp->rx_dma.map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - for (j = 0; j < NUM_RCQ_PAGES; j++) - bus_dmamap_sync(fp->rx_cq_chain_tag, - fp->rx_cq_chain_map[j], BUS_DMASYNC_PREREAD | - BUS_DMASYNC_PREWRITE); + bus_dmamap_sync(fp->rcq_dma.tag, fp->rcq_dma.map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* * Tell the controller that we have rx_bd's and CQE's @@ -10989,6 +11160,7 @@ bxe_init_rx_chains(struct bxe_softc *sc) bxe_update_rx_prod(sc, fp, fp->rx_bd_prod, fp->rx_cq_prod, fp->rx_sge_prod); + /* ToDo - Move to dma_alloc(). */ /* * Tell controller where the receive CQ * chains start in physical memory. @@ -10996,214 +11168,123 @@ bxe_init_rx_chains(struct bxe_softc *sc) if (i == 0) { REG_WR(sc, BAR_USTORM_INTMEM + USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func), - U64_LO(fp->rx_cq_chain_paddr[0])); + U64_LO(fp->rcq_dma.paddr)); REG_WR(sc, BAR_USTORM_INTMEM + USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func) + 4, - U64_HI(fp->rx_cq_chain_paddr[0])); + U64_HI(fp->rcq_dma.paddr)); } } - /* - * ToDo: Need a cleanup path if memory allocation - * fails during initializtion. This is especially - * easy if multiqueue is used on a system with - * jumbo frames and many CPUs. On my 16GB system - * with 8 CPUs I get the following defaults: - * - * kern.ipc.nmbjumbo16: 3200 - * kern.ipc.nmbjumbo9: 6400 - * kern.ipc.nmbjumbop: 12800 - * kern.ipc.nmbclusters: 25600 - */ - - DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); -} - -/* - * Initialize the transmit chain. - * - * Returns: - * None. - */ -static void -bxe_init_tx_chains(struct bxe_softc *sc) -{ - struct bxe_fastpath *fp; - struct eth_tx_next_bd *tx_n_bd; - int i, j; - - DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); - - for (i = 0; i < sc->num_queues; i++) { - fp = &sc->fp[i]; - - DBPRINT(sc, (BXE_INSANE_LOAD | BXE_INSANE_RESET), - "%s(): Linking fp[%d] TX chain pages.\n", __FUNCTION__, i); - - for (j = 0; j < NUM_TX_PAGES; j++) { - tx_n_bd = - &fp->tx_bd_chain[j][USABLE_TX_BD_PER_PAGE].next_bd; - - DBPRINT(sc, (BXE_INSANE_LOAD | BXE_INSANE_RESET), - "%s(): Linking fp[%d] TX BD chain page[%d].\n", - __FUNCTION__, i, j); - - tx_n_bd->addr_hi = - htole32(U64_HI(fp->tx_bd_chain_paddr[(j + 1) % - NUM_TX_PAGES])); - tx_n_bd->addr_lo = - htole32(U64_LO(fp->tx_bd_chain_paddr[(j + 1) % - NUM_TX_PAGES])); - } - - fp->tx_db.data.header.header = DOORBELL_HDR_DB_TYPE; - fp->tx_db.data.zero_fill1 = 0; - fp->tx_db.data.prod = 0; - - fp->tx_pkt_prod = 0; - fp->tx_pkt_cons = 0; - fp->tx_bd_prod = 0; - fp->tx_bd_cons = 0; - fp->used_tx_bd = 0; - - /* - * Copy of TX BD Chain completion queue Consumer Index - * from the Status Block. - */ - fp->tx_cons_sb = - &fp->status_block->c_status_block.index_values[C_SB_ETH_TX_CQ_INDEX]; - - fp->tx_pkts = 0; - } +bxe_init_rx_chains_exit: + /* Release memory if an error occurred. */ + if (rc != 0) + bxe_clear_rx_chains(sc); DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); + return (rc); } /* - * Free memory and clear the RX data structures. + * Free memory and clear the TX data structures. * * Returns: * Nothing. */ static void -bxe_free_rx_chains(struct bxe_softc *sc) +bxe_clear_tx_chains(struct bxe_softc *sc) { struct bxe_fastpath *fp; - int i, j, max_agg_queues; + int i, j; DBENTER(BXE_VERBOSE_RESET); for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; - if (fp->rx_mbuf_tag) { - /* Free any mbufs still in the RX mbuf chain. */ - for (j = 0; j < TOTAL_RX_BD; j++) { - if (fp->rx_mbuf_ptr[j] != NULL) { - if (fp->rx_mbuf_map[j] != NULL) - bus_dmamap_sync(fp->rx_mbuf_tag, - fp->rx_mbuf_map[j], - BUS_DMASYNC_POSTREAD); - DBRUN(fp->rx_mbuf_alloc--); - m_freem(fp->rx_mbuf_ptr[j]); - fp->rx_mbuf_ptr[j] = NULL; - } - } - - /* Clear each RX chain page. */ - for (j = 0; j < NUM_RX_PAGES; j++) { - if (fp->rx_bd_chain[j] != NULL) - bzero((char *)fp->rx_bd_chain[j], - BXE_RX_CHAIN_PAGE_SZ); - } - /* Clear each RX completion queue page. */ - for (j = 0; j < NUM_RCQ_PAGES; j++) { - if (fp->rx_cq_chain[j] != NULL) - bzero((char *)fp->rx_cq_chain[j], - BXE_RX_CHAIN_PAGE_SZ); - } - - if (TPA_ENABLED(sc)) { - max_agg_queues = CHIP_IS_E1H(sc) ? - ETH_MAX_AGGREGATION_QUEUES_E1H : - ETH_MAX_AGGREGATION_QUEUES_E1; - - /* Free the TPA Pool mbufs. */ - bxe_free_tpa_pool(fp, max_agg_queues); - - /* - * Free any mbufs still in the RX SGE - * buf chain. - */ - bxe_free_rx_sge_range(fp->sc, fp, MAX_RX_SGE); - - /* Clear each RX SGE page. */ - for (j = 0; j < NUM_RX_SGE_PAGES; j++) { - if (fp->rx_sge_chain[j] != NULL) - bzero( - (char *)fp->rx_sge_chain[j], - BXE_RX_CHAIN_PAGE_SZ); + /* Free all mbufs and unload all maps. */ + if (fp->tx_mbuf_tag) { + for (j = 0; j < TOTAL_TX_BD; j++) { + if (fp->tx_mbuf_ptr[j] != NULL) { + bus_dmamap_sync(fp->tx_mbuf_tag, + fp->tx_mbuf_map[j], + BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(fp->tx_mbuf_tag, + fp->tx_mbuf_map[j]); + m_freem(fp->tx_mbuf_ptr[j]); + fp->tx_mbuf_alloc--; + fp->tx_mbuf_ptr[j] = NULL; } } } /* Check if we lost any mbufs in the process. */ - DBRUNIF((fp->rx_mbuf_alloc), DBPRINT(sc, BXE_FATAL, - "%s(): Memory leak! Lost %d mbufs from fp[%d] RX chain!\n", - __FUNCTION__, fp->rx_mbuf_alloc, fp->index)); + DBRUNIF((fp->tx_mbuf_alloc), DBPRINT(sc, BXE_FATAL, + "%s(): Memory leak! Lost %d mbufs from fp[%02d] TX chain!\n", + __FUNCTION__, fp->tx_mbuf_alloc, fp->index)); } DBEXIT(BXE_VERBOSE_RESET); } /* - * Free memory and clear the TX data structures. + * Initialize the transmit chain. * * Returns: - * Nothing. + * None. */ static void -bxe_free_tx_chains(struct bxe_softc *sc) +bxe_init_tx_chains(struct bxe_softc *sc) { struct bxe_fastpath *fp; int i, j; - DBENTER(BXE_VERBOSE_RESET); + DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; - if (fp->tx_mbuf_tag) { - /* - * Unmap, unload, and free any mbufs in the - * TX mbuf chain. - */ - for (j = 0; j < TOTAL_TX_BD; j++) { - if (fp->tx_mbuf_ptr[j] != NULL) { - if (fp->tx_mbuf_map[j] != NULL) - bus_dmamap_sync(fp->tx_mbuf_tag, - fp->tx_mbuf_map[j], - BUS_DMASYNC_POSTWRITE); - DBRUN(fp->tx_mbuf_alloc--); - m_freem(fp->tx_mbuf_ptr[j]); - fp->tx_mbuf_ptr[j] = NULL; - } - } - /* Clear each TX chain page. */ - for (j = 0; j < NUM_TX_PAGES; j++) { - if (fp->tx_bd_chain[j] != NULL) - bzero((char *)fp->tx_bd_chain[j], - BXE_TX_CHAIN_PAGE_SZ); - } + /* Initialize transmit doorbell. */ + fp->tx_db.data.header.header = DOORBELL_HDR_DB_TYPE; + fp->tx_db.data.zero_fill1 = 0; + fp->tx_db.data.prod = 0; + + /* Initialize tranmsit producer/consumer indices. */ + fp->tx_pkt_prod = fp->tx_pkt_cons = 0; + fp->tx_bd_prod = fp->tx_bd_cons = 0; + fp->tx_bd_used = 0; - /* Check if we lost any mbufs in the process. */ - DBRUNIF((fp->tx_mbuf_alloc), DBPRINT(sc, BXE_FATAL, - "%s(): Memory leak! Lost %d mbufs from fp[%d] TX chain!\n", - __FUNCTION__, fp->tx_mbuf_alloc, fp->index)); + /* Pointer to TX packet consumer in status block. */ + fp->tx_pkt_cons_sb = + &fp->status_block->c_status_block.index_values[C_SB_ETH_TX_CQ_INDEX]; + + /* Soft TX counters. */ + fp->tx_pkts = 0; + fp->tx_soft_errors = 0; + fp->tx_offload_frames_csum_ip = 0; + fp->tx_offload_frames_csum_tcp = 0; + fp->tx_offload_frames_csum_udp = 0; + fp->tx_offload_frames_tso = 0; + fp->tx_header_splits = 0; + fp->tx_encap_failures = 0; + fp->tx_hw_queue_full = 0; + fp->tx_hw_max_queue_depth = 0; + fp->tx_dma_mapping_failure = 0; + fp->tx_max_drbr_queue_depth = 0; + fp->tx_window_violation_std = 0; + fp->tx_window_violation_tso = 0; + fp->tx_unsupported_tso_request_ipv6 = 0; + fp->tx_unsupported_tso_request_not_tcp = 0; + fp->tx_chain_lost_mbuf = 0; + fp->tx_frame_deferred = 0; + fp->tx_queue_xoff = 0; + + /* Clear all TX mbuf pointers. */ + for (j = 0; j < TOTAL_TX_BD; j++) { + fp->tx_mbuf_ptr[j] = NULL; } } - DBEXIT(BXE_VERBOSE_RESET); + DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); } /* @@ -11232,9 +11313,9 @@ bxe_init_sp_ring(struct bxe_softc *sc) /* Tell the controller the address of the slowpath ring. */ REG_WR(sc, XSEM_REG_FAST_MEMORY + XSTORM_SPQ_PAGE_BASE_OFFSET(func), - U64_LO(sc->spq_paddr)); + U64_LO(sc->spq_dma.paddr)); REG_WR(sc, XSEM_REG_FAST_MEMORY + XSTORM_SPQ_PAGE_BASE_OFFSET(func) + 4, - U64_HI(sc->spq_paddr)); + U64_HI(sc->spq_dma.paddr)); REG_WR(sc, XSEM_REG_FAST_MEMORY + XSTORM_SPQ_PROD_OFFSET(func), sc->spq_prod_idx); @@ -11283,27 +11364,33 @@ bxe_init_context(struct bxe_softc *sc) context->ustorm_st_context.common.mc_alignment_log_size = 8; /* Set the size of the receive buffers. */ context->ustorm_st_context.common.bd_buff_size = - sc->rx_buf_size; + sc->mbuf_alloc_size; /* Set the address of the receive chain base page. */ context->ustorm_st_context.common.bd_page_base_hi = - U64_HI(fp->rx_bd_chain_paddr[0]); + U64_HI(fp->rx_dma.paddr); context->ustorm_st_context.common.bd_page_base_lo = - U64_LO(fp->rx_bd_chain_paddr[0]); + U64_LO(fp->rx_dma.paddr); - if (TPA_ENABLED(sc) && !(fp->disable_tpa)) { + if (TPA_ENABLED(sc) && (fp->disable_tpa == FALSE)) { /* Enable TPA and SGE chain support. */ context->ustorm_st_context.common.flags |= USTORM_ETH_ST_CONTEXT_CONFIG_ENABLE_TPA; + /* Set the size of the SGE buffer. */ context->ustorm_st_context.common.sge_buff_size = - (uint16_t) (PAGES_PER_SGE * BCM_PAGE_SIZE); + (uint16_t) (SGE_PAGE_SIZE * PAGES_PER_SGE); + /* Set the address of the SGE chain base page. */ context->ustorm_st_context.common.sge_page_base_hi = - U64_HI(fp->rx_sge_chain_paddr[0]); + U64_HI(fp->sg_dma.paddr); context->ustorm_st_context.common.sge_page_base_lo = - U64_LO(fp->rx_sge_chain_paddr[0]); + U64_LO(fp->sg_dma.paddr); + + DBPRINT(sc, BXE_VERBOSE_TPA, "%s(): MTU = %d\n", + __FUNCTION__, (int) sc->bxe_ifp->if_mtu); + /* Describe MTU to SGE alignment. */ context->ustorm_st_context.common.max_sges_for_packet = SGE_PAGE_ALIGN(sc->bxe_ifp->if_mtu) >> SGE_PAGE_SHIFT; @@ -11311,6 +11398,10 @@ bxe_init_context(struct bxe_softc *sc) ((context->ustorm_st_context.common. max_sges_for_packet + PAGES_PER_SGE - 1) & (~(PAGES_PER_SGE - 1))) >> PAGES_PER_SGE_SHIFT; + + DBPRINT(sc, BXE_VERBOSE_TPA, + "%s(): max_sges_for_packet = %d\n", __FUNCTION__, + context->ustorm_st_context.common.max_sges_for_packet); } /* Update USTORM context. */ @@ -11325,9 +11416,9 @@ bxe_init_context(struct bxe_softc *sc) /* Set the address of the transmit chain base page. */ context->xstorm_st_context.tx_bd_page_base_hi = - U64_HI(fp->tx_bd_chain_paddr[0]); + U64_HI(fp->tx_dma.paddr); context->xstorm_st_context.tx_bd_page_base_lo = - U64_LO(fp->tx_bd_chain_paddr[0]); + U64_LO(fp->tx_dma.paddr); /* Enable XSTORM statistics. */ context->xstorm_st_context.statistics_data = (cl_id | @@ -11592,7 +11683,7 @@ bxe_init_internal_func(struct bxe_softc *sc) } /* Enable TPA if needed */ - if (sc->bxe_flags & BXE_TPA_ENABLE_FLAG) + if (TPA_ENABLED(sc)) tstorm_config.config_flags |= TSTORM_ETH_FUNCTION_COMMON_CONFIG_ENABLE_TPA; @@ -11693,21 +11784,24 @@ bxe_init_internal_func(struct bxe_softc *sc) } /* Init completion queue mapping and TPA aggregation size. */ - max_agg_size = min((uint32_t)(sc->rx_buf_size + 8 * BCM_PAGE_SIZE * - PAGES_PER_SGE), (uint32_t)0xffff); + max_agg_size = min((uint32_t)(sc->mbuf_alloc_size + + (8 * BCM_PAGE_SIZE * PAGES_PER_SGE)), (uint32_t)0xffff); + + DBPRINT(sc, BXE_VERBOSE_TPA, "%s(): max_agg_size = 0x%08X\n", + __FUNCTION__, max_agg_size); for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; nextpg = (struct eth_rx_cqe_next_page *) - &fp->rx_cq_chain[0][USABLE_RCQ_ENTRIES_PER_PAGE]; + &fp->rcq_chain[USABLE_RCQ_ENTRIES_PER_PAGE]; /* Program the completion queue address. */ REG_WR(sc, BAR_USTORM_INTMEM + USTORM_CQE_PAGE_BASE_OFFSET(port, fp->cl_id), - U64_LO(fp->rx_cq_chain_paddr[0])); + U64_LO(fp->rcq_dma.paddr)); REG_WR(sc, BAR_USTORM_INTMEM + USTORM_CQE_PAGE_BASE_OFFSET(port, fp->cl_id) + 4, - U64_HI(fp->rx_cq_chain_paddr[0])); + U64_HI(fp->rcq_dma.paddr)); /* Program the first CQ next page address. */ REG_WR(sc, BAR_USTORM_INTMEM + @@ -11735,7 +11829,7 @@ bxe_init_internal_func(struct bxe_softc *sc) for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; - if (!fp->disable_tpa) { + if (fp->disable_tpa == FALSE) { rx_pause.sge_thr_low = 150; rx_pause.sge_thr_high = 250; } @@ -11818,18 +11912,18 @@ bxe_init_internal(struct bxe_softc *sc, uint32_t load_code) * Returns: * None */ -static void +static int bxe_init_nic(struct bxe_softc *sc, uint32_t load_code) { struct bxe_fastpath *fp; - int i; + int i, rc; DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); /* Intialize fastpath structures and the status block. */ for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; - fp->disable_tpa = 1; + fp->disable_tpa = TRUE; bzero((char *)fp->status_block, BXE_STATUS_BLK_SZ); fp->fp_u_idx = 0; @@ -11851,29 +11945,31 @@ bxe_init_nic(struct bxe_softc *sc, uint32_t load_code) fp->sb_id = fp->cl_id; DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET), - "%s(): fp[%d]: cl_id = %d, sb_id = %d\n", + "%s(): fp[%02d]: cl_id = %d, sb_id = %d\n", __FUNCTION__, fp->index, fp->cl_id, fp->sb_id); /* Initialize the fastpath status block. */ - bxe_init_sb(sc, fp->status_block, fp->status_block_paddr, + bxe_init_sb(sc, fp->status_block, fp->sb_dma.paddr, fp->sb_id); bxe_update_fpsb_idx(fp); } rmb(); - bzero((char *)sc->def_status_block, BXE_DEF_STATUS_BLK_SZ); + bzero((char *)sc->def_sb, BXE_DEF_STATUS_BLK_SZ); /* Initialize the Default Status Block. */ - bxe_init_def_sb(sc, sc->def_status_block, sc->def_status_block_paddr, - DEF_SB_ID); + bxe_init_def_sb(sc, sc->def_sb, sc->def_sb_dma.paddr, DEF_SB_ID); bxe_update_dsb_idx(sc); /* Initialize the coalescence parameters. */ bxe_update_coalesce(sc); - /* Intiialize the Receive BD Chain and Receive Completion Chain. */ - bxe_init_rx_chains(sc); + /* Initialize receive chains. */ + rc = bxe_init_rx_chains(sc); + if (rc != 0) { + goto bxe_init_nic_exit; + } /* Initialize the Transmit BD Chain. */ bxe_init_tx_chains(sc); @@ -11895,46 +11991,7 @@ bxe_init_nic(struct bxe_softc *sc, uint32_t load_code) /* Disable the interrupts from device until init is complete.*/ bxe_int_disable(sc); - DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); -} - -/* -* -* Returns: -* 0 = Success, !0 = Failure -*/ -static int -bxe_gunzip_init(struct bxe_softc *sc) -{ - int rc; - - rc = 0; - - DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); - - bxe_dmamem_alloc(sc, sc->gunzip_tag, sc->gunzip_map, sc->gunzip_buf, - FW_BUF_SIZE, &sc->gunzip_mapping); - - if (sc->gunzip_buf == NULL) - goto bxe_gunzip_init_nomem1; - - sc->strm = malloc(sizeof(*sc->strm), M_DEVBUF, M_NOWAIT); - if (sc->strm == NULL) - goto bxe_gunzip_init_nomem2; - - goto bxe_gunzip_init_exit; - -bxe_gunzip_init_nomem2: - bxe_dmamem_free(sc, sc->gunzip_tag, sc->gunzip_buf, sc->gunzip_map); - sc->gunzip_buf = NULL; - -bxe_gunzip_init_nomem1: - BXE_PRINTF( - "%s(%d): Cannot allocate firmware buffer for decompression!\n", - __FILE__, __LINE__); - rc = ENOMEM; - -bxe_gunzip_init_exit: +bxe_init_nic_exit: DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); return (rc); } @@ -11948,14 +12005,14 @@ bxe_gunzip_init_exit: static void bxe_lb_pckt(struct bxe_softc *sc) { -#ifdef USE_DMAE +#ifdef BXE_USE_DMAE uint32_t wb_write[3]; #endif DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); /* Ethernet source and destination addresses. */ -#ifdef USE_DMAE +#ifdef BXE_USE_DMAE wb_write[0] = 0x55555555; wb_write[1] = 0x55555555; wb_write[2] = 0x20; /* SOP */ @@ -11967,7 +12024,7 @@ bxe_lb_pckt(struct bxe_softc *sc) #endif /* NON-IP protocol. */ -#ifdef USE_DMAE +#ifdef BXE_USE_DMAE wb_write[0] = 0x09000000; wb_write[1] = 0x55555555; wb_write[2] = 0x10; /* EOP */ @@ -12130,7 +12187,8 @@ bxe_int_mem_test(struct bxe_softc *sc) val = REG_RD(sc, NIG_REG_INGRESS_EOP_LB_EMPTY); if (val != 1) { - DBPRINT(sc, BXE_INFO, "clear of NIG failed\n"); + DBPRINT(sc, BXE_INFO, "%s(): Unable to clear NIG!\n", + __FUNCTION__); rc = 6; goto bxe_int_mem_test_exit; } @@ -12495,7 +12553,7 @@ bxe_setup_fan_failure_detection(struct bxe_softc *sc) int is_required, port; is_required = 0; - if (BP_NOMCP(sc)) + if (NOMCP(sc)) return; val = SHMEM_RD(sc, dev_info.shared_hw_config.config2) & @@ -12787,7 +12845,7 @@ bxe_init_common(struct bxe_softc *sc) bxe_enable_blocks_attention(sc); - if (!BP_NOMCP(sc)) { + if (!NOMCP(sc)) { bxe_acquire_phy_lock(sc); bxe_common_init_phy(sc, sc->common.shmem_base); bxe_release_phy_lock(sc); @@ -12813,7 +12871,7 @@ bxe_init_port(struct bxe_softc *sc) uint32_t val, low, high; uint32_t swap_val, swap_override, aeu_gpio_mask, offset; uint32_t reg_addr; - int i, init_stage, port; + int init_stage, port; port = BP_PORT(sc); init_stage = port ? PORT1_STAGE : PORT0_STAGE; @@ -12855,14 +12913,6 @@ bxe_init_port(struct bxe_softc *sc) REG_WR(sc, BRB1_REG_PAUSE_LOW_THRESHOLD_0 + port * 4, low); REG_WR(sc, BRB1_REG_PAUSE_HIGH_THRESHOLD_0 + port * 4, high); - if (sc->bxe_flags & BXE_SAFC_TX_FLAG) { - REG_WR(sc, BRB1_REG_HIGH_LLFC_LOW_THRESHOLD_0 + port * 4, 0xa0); - REG_WR(sc, BRB1_REG_HIGH_LLFC_HIGH_THRESHOLD_0 + port * 4, - 0xd8); - REG_WR(sc, BRB1_REG_LOW_LLFC_LOW_THRESHOLD_0 + port *4, 0xa0); - REG_WR(sc, BRB1_REG_LOW_LLFC_HIGH_THRESHOLD_0 + port * 4, 0xd8); - } - /* Port PRS comes here. */ bxe_init_block(sc, PRS_BLOCK, init_stage); @@ -12901,6 +12951,7 @@ bxe_init_port(struct bxe_softc *sc) REG_WR(sc, HC_REG_LEADING_EDGE_0 + port * 8, 0); REG_WR(sc, HC_REG_TRAILING_EDGE_0 + port * 8, 0); } + bxe_init_block(sc, HC_BLOCK, init_stage); bxe_init_block(sc, MISC_AEU_BLOCK, init_stage); @@ -12927,33 +12978,12 @@ bxe_init_port(struct bxe_softc *sc) /* Enable outer VLAN support if required. */ REG_WR(sc, NIG_REG_LLH0_BRB1_DRV_MASK_MF + port * 4, (IS_E1HOV(sc) ? 0x1 : 0x2)); - - if (sc->bxe_flags & BXE_SAFC_TX_FLAG){ - high = 0; - for (i = 0; i < BXE_MAX_PRIORITY; i++) { - if (sc->pri_map[i] == 1) - high |= (1 << i); - } - REG_WR(sc, NIG_REG_LLFC_HIGH_PRIORITY_CLASSES_0 + - port * 4, high); - low = 0; - for (i = 0; i < BXE_MAX_PRIORITY; i++) { - if (sc->pri_map[i] == 0) - low |= (1 << i); - } - REG_WR(sc, NIG_REG_LLFC_LOW_PRIORITY_CLASSES_0 + - port * 4, low); - - REG_WR(sc, NIG_REG_PAUSE_ENABLE_0 + port * 4, 0); - REG_WR(sc, NIG_REG_LLFC_ENABLE_0 + port * 4, 1); - REG_WR(sc, NIG_REG_LLFC_OUT_EN_0 + port * 4, 1); - } else { - REG_WR(sc, NIG_REG_LLFC_ENABLE_0 + port * 4, 0); - REG_WR(sc, NIG_REG_LLFC_OUT_EN_0 + port * 4, 0); - REG_WR(sc, NIG_REG_PAUSE_ENABLE_0 + port * 4, 1); - } } + REG_WR(sc, NIG_REG_LLFC_ENABLE_0 + port * 4, 0); + REG_WR(sc, NIG_REG_LLFC_OUT_EN_0 + port * 4, 0); + REG_WR(sc, NIG_REG_PAUSE_ENABLE_0 + port * 4, 1); + bxe_init_block(sc, MCP_BLOCK, init_stage); bxe_init_block(sc, DMAE_BLOCK, init_stage); @@ -13127,7 +13157,6 @@ bxe_init_hw(struct bxe_softc *sc, uint32_t load_code) DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); sc->dmae_ready = 0; - bxe_gunzip_init(sc); switch (load_code) { case FW_MSG_CODE_DRV_LOAD_COMMON: rc = bxe_init_common(sc); @@ -13154,21 +13183,19 @@ bxe_init_hw(struct bxe_softc *sc, uint32_t load_code) } /* Fetch additional config data if the bootcode is running. */ - if (!BP_NOMCP(sc)) { + if (!NOMCP(sc)) { func = BP_FUNC(sc); /* Fetch the pulse sequence number. */ sc->fw_drv_pulse_wr_seq = (SHMEM_RD(sc, func_mb[func].drv_pulse_mb) & DRV_PULSE_SEQ_MASK); } - /* This needs to be done before gunzip end. */ + /* Clear the default status block. */ bxe_zero_def_sb(sc); for (i = 0; i < sc->num_queues; i++) bxe_zero_sb(sc, BP_L_ID(sc) + i); bxe_init_hw_exit: - bxe_gunzip_end(sc); - DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); return (rc); @@ -13194,8 +13221,6 @@ bxe_fw_command(struct bxe_softc *sc, uint32_t command) rc = 0; cnt = 1; - DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); - DBRUNMSG(BXE_VERBOSE, bxe_decode_mb_msgs(sc, (command | seq), 0)); BXE_FWMB_LOCK(sc); @@ -13225,321 +13250,285 @@ bxe_fw_command(struct bxe_softc *sc, uint32_t command) } BXE_FWMB_UNLOCK(sc); - - DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); return (rc); } /* - * Free any DMA memory owned by the driver. - * - * Scans through each data structre that requires DMA memory and frees - * the memory if allocated. + * Allocate a block of memory and map it for DMA. No partial + * completions allowed, release any resources acquired if we + * can't acquire all resources. * * Returns: - * Nothing. + * 0 = Success, !0 = Failure + * + * Modifies: + * dma->paddr + * dma->vaddr + * dma->tag + * dma->map + * dma->size + * */ -static void -bxe_dma_free(struct bxe_softc *sc) +static int +bxe_dma_malloc(struct bxe_softc *sc, bus_size_t size, + struct bxe_dma *dma, int mapflags, const char *msg) { - struct bxe_fastpath *fp; - int i, j; + int rc; DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); - if (sc->parent_tag != NULL) { + DBRUNIF(dma->size > 0, + BXE_PRINTF("%s(): Called for %s with size > 0 (%05d)!\n", + __FUNCTION__, msg, (int) dma->size)); - for (i = 0; i < sc->num_queues; i++) { - fp = &sc->fp[i]; - /* Trust no one! */ - if (fp) { - /* Free, unmap, and destroy the status block. */ - if (fp->status_block_tag != NULL) { - if (fp->status_block_map != NULL) { - if (fp->status_block != NULL) - bus_dmamem_free( - fp->status_block_tag, - fp->status_block, - fp->status_block_map); - - bus_dmamap_unload( - fp->status_block_tag, - fp->status_block_map); - bus_dmamap_destroy( - fp->status_block_tag, - fp->status_block_map); - } + rc = bus_dma_tag_create( + sc->parent_tag, /* parent */ + BCM_PAGE_SIZE, /* alignment for segs */ + BXE_DMA_BOUNDARY, /* cannot cross */ + BUS_SPACE_MAXADDR, /* restricted low */ + BUS_SPACE_MAXADDR, /* restricted hi */ + NULL, NULL, /* filter f(), arg */ + size, /* max size for this tag */ + 1, /* # of discontinuities */ + size, /* max seg size */ + BUS_DMA_ALLOCNOW, /* flags */ + NULL, NULL, /* lock f(), arg */ + &dma->tag); - bus_dma_tag_destroy( - fp->status_block_tag); - } + if (rc != 0) { + BXE_PRINTF("%s(%d): bus_dma_tag_create() " + "failed (rc = %d) for %s!\n", + __FILE__, __LINE__, rc, msg); + goto bxe_dma_malloc_fail_create; + } - /* - * Free, unmap and destroy all TX BD - * chain pages. - */ - if (fp->tx_bd_chain_tag != NULL) { - for (j = 0; j < NUM_TX_PAGES; j++ ) { - if (fp->tx_bd_chain_map[j] != NULL) { - if (fp->tx_bd_chain[j] != NULL) - bus_dmamem_free(fp->tx_bd_chain_tag, - fp->tx_bd_chain[j], - fp->tx_bd_chain_map[j]); - - bus_dmamap_unload(fp->tx_bd_chain_tag, - fp->tx_bd_chain_map[j]); - bus_dmamap_destroy(fp->tx_bd_chain_tag, - fp->tx_bd_chain_map[j]); - } - } + rc = bus_dmamem_alloc(dma->tag, (void **)&dma->vaddr, + BUS_DMA_NOWAIT, &dma->map); + if (rc != 0) { + BXE_PRINTF("%s(%d): bus_dmamem_alloc() " + "failed (rc = %d) for %s!\n", + __FILE__, __LINE__, rc, msg); + goto bxe_dma_malloc_fail_alloc; + } - bus_dma_tag_destroy(fp->tx_bd_chain_tag); - } + rc = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size, + bxe_dma_map_addr, &dma->paddr, mapflags | BUS_DMA_NOWAIT); + if (rc != 0) { + BXE_PRINTF("%s(%d): bus_dmamap_load() " + "failed (rc = %d) for %s!\n", + __FILE__, __LINE__, rc, msg); + goto bxe_dma_malloc_fail_load; + } - /* Free, unmap and destroy all RX BD chain pages. */ - if (fp->rx_bd_chain_tag != NULL) { - - for (j = 0; j < NUM_RX_PAGES; j++ ) { - if (fp->rx_bd_chain_map[j] != NULL) { - if (fp->rx_bd_chain[j] != NULL) - bus_dmamem_free(fp->rx_bd_chain_tag, - fp->rx_bd_chain[j], - fp->rx_bd_chain_map[j]); - - bus_dmamap_unload(fp->rx_bd_chain_tag, - fp->rx_bd_chain_map[j]); - bus_dmamap_destroy(fp->rx_bd_chain_tag, - fp->rx_bd_chain_map[j]); - } - } + dma->size = size; - bus_dma_tag_destroy(fp->rx_bd_chain_tag); - } + DBPRINT(sc, BXE_VERBOSE, "%s(): size=%06d, vaddr=0x%p, " + "paddr=0x%jX - %s\n", __FUNCTION__, (int) dma->size, + dma->vaddr, (uintmax_t) dma->paddr, msg); - /* - * Free, unmap and destroy all RX CQ - * chain pages. - */ - if (fp->rx_cq_chain_tag != NULL) { - for (j = 0; j < NUM_RCQ_PAGES; j++ ) { - if (fp->rx_cq_chain_map[j] != NULL) { - if (fp->rx_cq_chain[j] != NULL) - bus_dmamem_free(fp->rx_cq_chain_tag, - fp->rx_cq_chain[j], - fp->rx_cq_chain_map[j]); - - bus_dmamap_unload(fp->rx_cq_chain_tag, - fp->rx_cq_chain_map[j]); - bus_dmamap_destroy(fp->rx_cq_chain_tag, - fp->rx_cq_chain_map[j]); - } - } + goto bxe_dma_malloc_exit; - bus_dma_tag_destroy(fp->rx_cq_chain_tag); - } +bxe_dma_malloc_fail_load: + bus_dmamem_free(dma->tag, dma->vaddr, dma->map); - /* Unload and destroy the TX mbuf maps. */ - if (fp->tx_mbuf_tag != NULL) { - for (j = 0; j < TOTAL_TX_BD; j++) { - if (fp->tx_mbuf_map[j] != NULL) { - bus_dmamap_unload(fp->tx_mbuf_tag, - fp->tx_mbuf_map[j]); - bus_dmamap_destroy(fp->tx_mbuf_tag, - fp->tx_mbuf_map[j]); - } - } +bxe_dma_malloc_fail_alloc: + bus_dma_tag_destroy(dma->tag); + dma->vaddr = NULL; - bus_dma_tag_destroy(fp->tx_mbuf_tag); - } +bxe_dma_malloc_fail_create: + dma->map = NULL; + dma->tag = NULL; + dma->size = 0; +bxe_dma_malloc_exit: + DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); + return (rc); +} - if (TPA_ENABLED(sc)) { - int tpa_pool_max = CHIP_IS_E1H(sc) ? - ETH_MAX_AGGREGATION_QUEUES_E1H : - ETH_MAX_AGGREGATION_QUEUES_E1; +/* + * Release a block of DMA memory associated tag/map. + * + * Returns: + * None + */ +static void +bxe_dma_free(struct bxe_softc *sc, struct bxe_dma *dma) +{ + DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_UNLOAD); - /* Unload and destroy the TPA pool mbuf maps. */ - if (fp->rx_mbuf_tag != NULL) { + if (dma->size > 0) { + bus_dmamap_sync(dma->tag, dma->map, + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(dma->tag, dma->map); + bus_dmamem_free(dma->tag, dma->vaddr, dma->map); + bus_dma_tag_destroy(dma->tag); + dma->size = 0; + } - for (j = 0; j < tpa_pool_max; j++) { + DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_UNLOAD); +} - if (fp->tpa_mbuf_map[j] != NULL) { - bus_dmamap_unload(fp->rx_mbuf_tag, - fp->tpa_mbuf_map[j]); - bus_dmamap_destroy(fp->rx_mbuf_tag, - fp->tpa_mbuf_map[j]); - } - } - } +/* + * Free any DMA memory owned by the driver. + * + * Scans through each data structre that requires DMA memory and frees + * the memory if allocated. + * + * Returns: + * Nothing. + */ +static void +bxe_host_structures_free(struct bxe_softc *sc) +{ + struct bxe_fastpath *fp; + int i, j, max_agg_queues; - /* Free, unmap and destroy all RX SGE chain pages. */ - if (fp->rx_sge_chain_tag != NULL) { - for (j = 0; j < NUM_RX_SGE_PAGES; j++ ) { - if (fp->rx_sge_chain_map[j] != NULL) { - if (fp->rx_sge_chain[j] != NULL) - bus_dmamem_free(fp->rx_sge_chain_tag, - fp->rx_sge_chain[j], - fp->rx_sge_chain_map[j]); - - bus_dmamap_unload(fp->rx_sge_chain_tag, - fp->rx_sge_chain_map[j]); - bus_dmamap_destroy(fp->rx_sge_chain_tag, - fp->rx_sge_chain_map[j]); - } - } - - bus_dma_tag_destroy(fp->rx_sge_chain_tag); - } + DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); + max_agg_queues = CHIP_IS_E1H(sc) ? + ETH_MAX_AGGREGATION_QUEUES_E1H : + ETH_MAX_AGGREGATION_QUEUES_E1; - /* Unload and destroy the SGE Buf maps. */ - if (fp->rx_sge_buf_tag != NULL) { + if (sc->parent_tag == NULL) + goto bxe_host_structures_free_exit; - for (j = 0; j < TOTAL_RX_SGE; j++) { - if (fp->rx_sge_buf_map[j] != NULL) { - bus_dmamap_unload(fp->rx_sge_buf_tag, - fp->rx_sge_buf_map[j]); - bus_dmamap_destroy(fp->rx_sge_buf_tag, - fp->rx_sge_buf_map[j]); - } - } + for (i = 0; i < sc->num_queues; i++) { + fp = &sc->fp[i]; - bus_dma_tag_destroy(fp->rx_sge_buf_tag); - } - } + /* Trust no one! */ + if (fp == NULL) + break; - /* Unload and destroy the RX mbuf maps. */ - if (fp->rx_mbuf_tag != NULL) { - for (j = 0; j < TOTAL_RX_BD; j++) { - if (fp->rx_mbuf_map[j] != NULL) { - bus_dmamap_unload(fp->rx_mbuf_tag, - fp->rx_mbuf_map[j]); - bus_dmamap_destroy(fp->rx_mbuf_tag, - fp->rx_mbuf_map[j]); - } - } + /* Status block. */ + bxe_dma_free(sc, &fp->sb_dma); - bus_dma_tag_destroy(fp->rx_mbuf_tag); - } + /* TX chain. */ + bxe_dma_free(sc, &fp->tx_dma); + fp->tx_chain = NULL; - } - } + /* RX chain */ + bxe_dma_free(sc, &fp->rx_dma); + fp->rx_chain = NULL; - /* Destroy the def_status block. */ - if (sc->def_status_block_tag != NULL) { - if (sc->def_status_block_map != NULL) { - if (sc->def_status_block != NULL) - bus_dmamem_free( - sc->def_status_block_tag, - sc->def_status_block, - sc->def_status_block_map); - - bus_dmamap_unload(sc->def_status_block_tag, - sc->def_status_block_map); - bus_dmamap_destroy(sc->def_status_block_tag, - sc->def_status_block_map); - } + /* RCQ chain */ + bxe_dma_free(sc, &fp->rcq_dma); + fp->rcq_chain = NULL; - bus_dma_tag_destroy(sc->def_status_block_tag); - } + /* SG chain */ + bxe_dma_free(sc, &fp->sg_dma); + fp->sg_chain = NULL; - /* Destroy the statistics block. */ - if (sc->stats_tag != NULL) { - if (sc->stats_map != NULL) { - if (sc->stats_block != NULL) - bus_dmamem_free(sc->stats_tag, - sc->stats_block, sc->stats_map); - bus_dmamap_unload(sc->stats_tag, sc->stats_map); - bus_dmamap_destroy(sc->stats_tag, - sc->stats_map); + /* Unload and destroy the TX mbuf maps. */ + if (fp->tx_mbuf_tag != NULL) { + for (j = 0; j < TOTAL_TX_BD; j++) { + if (fp->tx_mbuf_map[j] != NULL) { + bus_dmamap_unload( + fp->tx_mbuf_tag, + fp->tx_mbuf_map[j]); + bus_dmamap_destroy( + fp->tx_mbuf_tag, + fp->tx_mbuf_map[j]); + } } - bus_dma_tag_destroy(sc->stats_tag); + bus_dma_tag_destroy(fp->tx_mbuf_tag); } - /* Destroy the Slow Path block. */ - if (sc->slowpath_tag != NULL) { - if (sc->slowpath_map != NULL) { - if (sc->slowpath != NULL) - bus_dmamem_free(sc->slowpath_tag, - sc->slowpath, sc->slowpath_map); - - bus_dmamap_unload(sc->slowpath_tag, - sc->slowpath_map); - bus_dmamap_destroy(sc->slowpath_tag, - sc->slowpath_map); + /* Unload and destroy the TPA pool mbuf maps. */ + if (fp->rx_mbuf_tag != NULL) { + if (fp->tpa_mbuf_spare_map != NULL) { + bus_dmamap_unload( + fp->rx_mbuf_tag, + fp->tpa_mbuf_spare_map); + bus_dmamap_destroy( + fp->rx_mbuf_tag, + fp->tpa_mbuf_spare_map); } - bus_dma_tag_destroy(sc->slowpath_tag); + for (j = 0; j < max_agg_queues; j++) { + if (fp->tpa_mbuf_map[j] != NULL) { + bus_dmamap_unload( + fp->rx_mbuf_tag, + fp->tpa_mbuf_map[j]); + bus_dmamap_destroy( + fp->rx_mbuf_tag, + fp->tpa_mbuf_map[j]); + } + } } - /* Destroy the Slow Path Ring. */ - if (sc->spq_tag != NULL) { - if (sc->spq_map != NULL) { - if (sc->spq != NULL) - bus_dmamem_free(sc->spq_tag, sc->spq, - sc->spq_map); + /* Unload and destroy the SGE Buf maps. */ + if (fp->rx_sge_buf_tag != NULL) { + if (fp->rx_sge_spare_map != NULL) { + bus_dmamap_unload( + fp->rx_sge_buf_tag, + fp->rx_sge_spare_map); + bus_dmamap_destroy( + fp->rx_sge_buf_tag, + fp->rx_sge_spare_map); + } - bus_dmamap_unload(sc->spq_tag, sc->spq_map); - bus_dmamap_destroy(sc->spq_tag, sc->spq_map); + for (j = 0; j < TOTAL_RX_SGE; j++) { + if (fp->rx_sge_buf_map[j] != NULL) { + bus_dmamap_unload( + fp->rx_sge_buf_tag, + fp->rx_sge_buf_map[j]); + bus_dmamap_destroy( + fp->rx_sge_buf_tag, + fp->rx_sge_buf_map[j]); + } } - bus_dma_tag_destroy(sc->spq_tag); + bus_dma_tag_destroy(fp->rx_sge_buf_tag); } + /* Unload and destroy the RX mbuf maps. */ + if (fp->rx_mbuf_tag != NULL) { + if (fp->rx_mbuf_spare_map != NULL) { + bus_dmamap_unload(fp->rx_mbuf_tag, + fp->rx_mbuf_spare_map); + bus_dmamap_destroy(fp->rx_mbuf_tag, + fp->rx_mbuf_spare_map); + } - free(sc->strm, M_DEVBUF); - sc->strm = NULL; - - if (sc->gunzip_tag != NULL) { - if (sc->gunzip_map != NULL) { - if (sc->gunzip_buf != NULL) - bus_dmamem_free(sc->gunzip_tag, - sc->gunzip_buf, sc->gunzip_map); - - bus_dmamap_unload(sc->gunzip_tag, - sc->gunzip_map); - bus_dmamap_destroy(sc->gunzip_tag, - sc->gunzip_map); + for (j = 0; j < TOTAL_RX_BD; j++) { + if (fp->rx_mbuf_map[j] != NULL) { + bus_dmamap_unload( + fp->rx_mbuf_tag, + fp->rx_mbuf_map[j]); + bus_dmamap_destroy( + fp->rx_mbuf_tag, + fp->rx_mbuf_map[j]); + } } - bus_dma_tag_destroy(sc->gunzip_tag); + bus_dma_tag_destroy(fp->rx_mbuf_tag); } - - bus_dma_tag_destroy(sc->parent_tag); } - DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); -} + /* Destroy the default status block */ + bxe_dma_free(sc, &sc->def_sb_dma); + sc->def_sb = NULL; -/* - * Free paged pool memory maps and tags. - * - * Returns: - * Nothing. - */ - -static void -bxe_dmamem_free(struct bxe_softc *sc, bus_dma_tag_t tag, caddr_t buf, - bus_dmamap_t map) -{ + /* Destroy the statistics block */ + bxe_dma_free(sc, &sc->stats_dma); + sc->stats = NULL; - DBENTER(BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); - - if (tag) { - if (sc->gunzip_buf != NULL) - bus_dmamem_free(tag, buf, map); + /* Destroy the slowpath block. */ + bxe_dma_free(sc, &sc->slowpath_dma); + sc->slowpath = NULL; - if (map != NULL) { - bus_dmamap_unload(tag, map); - bus_dmamap_destroy(tag, map); - } - - if (tag != NULL) - bus_dma_tag_destroy(tag); - } + /* Destroy the slowpath queue. */ + bxe_dma_free(sc, &sc->spq_dma); + sc->spq = NULL; + /* Destroy the slowpath queue. */ + bxe_dma_free(sc, &sc->gz_dma); + sc->gz = NULL; + free(sc->strm, M_DEVBUF); + sc->strm = NULL; - DBEXIT(BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); +bxe_host_structures_free_exit: + DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); } /* @@ -13575,31 +13564,30 @@ bxe_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) /* * Allocate any non-paged DMA memory needed by the driver. * - * Allocates DMA memory needed for the various global structures which are - * read or written by the hardware. - * * Returns: * 0 = Success, !0 = Failure. */ static int -bxe_dma_alloc(device_t dev) +bxe_host_structures_alloc(device_t dev) { struct bxe_softc *sc; struct bxe_fastpath *fp; - int error, rc; + int rc; bus_addr_t busaddr; bus_size_t max_size, max_seg_size; int i, j, max_segments; sc = device_get_softc(dev); - rc = 0; - DBENTER(BXE_VERBOSE_RESET); + rc = 0; + int max_agg_queues = CHIP_IS_E1H(sc) ? + ETH_MAX_AGGREGATION_QUEUES_E1H : + ETH_MAX_AGGREGATION_QUEUES_E1; /* * Allocate the parent bus DMA tag appropriate for PCI. */ - if (bus_dma_tag_create(NULL, /* parent tag */ + rc = bus_dma_tag_create(NULL, /* parent tag */ 1, /* alignment for segs */ BXE_DMA_BOUNDARY, /* cannot cross */ BUS_SPACE_MAXADDR, /* restricted low */ @@ -13612,136 +13600,112 @@ bxe_dma_alloc(device_t dev) 0, /* flags */ NULL, /* lock f() */ NULL, /* lock f() arg */ - &sc->parent_tag) /* dma tag */ - ) { + &sc->parent_tag); /* dma tag */ + if (rc != 0) { BXE_PRINTF("%s(%d): Could not allocate parent DMA tag!\n", __FILE__, __LINE__); rc = ENOMEM; - goto bxe_dma_alloc_exit; + goto bxe_host_structures_alloc_exit; } /* Allocate DMA memory for each fastpath structure. */ for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; - DBPRINT(sc, (BXE_EXTREME_LOAD | BXE_EXTREME_RESET), - "%s(): fp[%d] virtual address = %p, size = %lu\n", - __FUNCTION__, i, fp, - (long unsigned int)sizeof(struct bxe_fastpath)); /* - * Create a DMA tag for the status block, allocate and - * clear the memory, map the memory into DMA space, and - * fetch the physical address of the block. - */ + * Allocate status block* + */ + rc = bxe_dma_malloc(sc, BXE_STATUS_BLK_SZ, + &fp->sb_dma, BUS_DMA_NOWAIT, "fp status block"); + /* ToDo: Only using 32 bytes out of 4KB allocation! */ + if (rc != 0) + goto bxe_host_structures_alloc_exit; + fp->status_block = + (struct host_status_block *) fp->sb_dma.vaddr; - if (bus_dma_tag_create(sc->parent_tag, - BCM_PAGE_SIZE, /* alignment for segs */ - BXE_DMA_BOUNDARY, /* cannot cross */ - BUS_SPACE_MAXADDR, /* restricted low */ - BUS_SPACE_MAXADDR, /* restricted hi */ - NULL, /* filter f() */ - NULL, /* filter f() arg */ - BXE_STATUS_BLK_SZ, /* max map for this tag */ - 1, /* # of discontinuities */ - BXE_STATUS_BLK_SZ, /* max seg size */ - 0, /* flags */ - NULL, /* lock f() */ - NULL, /* lock f() arg */ - &fp->status_block_tag)) { - BXE_PRINTF( - "%s(%d): Could not allocate fp[%d] status block DMA tag!\n", - __FILE__, __LINE__, i); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - if (bus_dmamem_alloc(fp->status_block_tag, - (void **)&fp->status_block, BUS_DMA_NOWAIT, - &fp->status_block_map)) { - BXE_PRINTF( - "%s(%d): Could not allocate fp[%d] status block DMA memory!\n", - __FILE__, __LINE__, i); - rc = ENOMEM; - goto bxe_dma_alloc_exit; + /* + * Allocate TX chain. + */ + rc = bxe_dma_malloc(sc, BXE_TX_CHAIN_PAGE_SZ * + NUM_TX_PAGES, &fp->tx_dma, BUS_DMA_NOWAIT, + "tx chain pages"); + if (rc != 0) + goto bxe_host_structures_alloc_exit; + fp->tx_chain = (union eth_tx_bd_types *) fp->tx_dma.vaddr; + + /* Link the TX chain pages. */ + for (j = 1; j <= NUM_TX_PAGES; j++) { + struct eth_tx_next_bd *tx_n_bd = + &fp->tx_chain[TOTAL_TX_BD_PER_PAGE * j - 1].next_bd; + + busaddr = fp->tx_dma.paddr + + BCM_PAGE_SIZE * (j % NUM_TX_PAGES); + tx_n_bd->addr_hi = htole32(U64_HI(busaddr)); + tx_n_bd->addr_lo = htole32(U64_LO(busaddr)); } - bzero((char *)fp->status_block, BXE_STATUS_BLK_SZ); - - error = bus_dmamap_load(fp->status_block_tag, - fp->status_block_map, fp->status_block, BXE_STATUS_BLK_SZ, - bxe_dma_map_addr, &busaddr, BUS_DMA_NOWAIT); - - if (error) { - BXE_PRINTF( - "%s(%d): Could not map fp[%d] status block DMA memory!\n", - __FILE__, __LINE__, i); - rc = ENOMEM; - goto bxe_dma_alloc_exit; + /* + * Allocate RX chain. + */ + rc = bxe_dma_malloc(sc, BXE_RX_CHAIN_PAGE_SZ * + NUM_RX_PAGES, &fp->rx_dma, BUS_DMA_NOWAIT, + "rx chain pages"); + if (rc != 0) + goto bxe_host_structures_alloc_exit; + fp->rx_chain = (struct eth_rx_bd *) fp->rx_dma.vaddr; + + /* Link the RX chain pages. */ + for (j = 1; j <= NUM_RX_PAGES; j++) { + struct eth_rx_bd *rx_bd = + &fp->rx_chain[TOTAL_RX_BD_PER_PAGE * j - 2]; + + busaddr = fp->rx_dma.paddr + + BCM_PAGE_SIZE * (j % NUM_RX_PAGES); + rx_bd->addr_hi = htole32(U64_HI(busaddr)); + rx_bd->addr_lo = htole32(U64_LO(busaddr)); } - /* Physical address of Status Block */ - fp->status_block_paddr = busaddr; - DBPRINT(sc, (BXE_EXTREME_LOAD | BXE_EXTREME_RESET), - "%s(): fp[%d] status block physical address = 0x%jX\n", - __FUNCTION__, i, (uintmax_t) fp->status_block_paddr); - /* - * Create a DMA tag for the TX buffer descriptor chain, - * allocate and clear the memory, and fetch the - * physical address of the block. + * Allocate CQ chain. */ - if (bus_dma_tag_create(sc->parent_tag, - BCM_PAGE_SIZE, /* alignment for segs */ - BXE_DMA_BOUNDARY, /* cannot cross */ - BUS_SPACE_MAXADDR, /* restricted low */ - BUS_SPACE_MAXADDR, /* restricted hi */ - NULL, /* filter f() */ - NULL, /* filter f() arg */ - BXE_TX_CHAIN_PAGE_SZ,/* max map for this tag */ - 1, /* # of discontinuities */ - BXE_TX_CHAIN_PAGE_SZ,/* max seg size */ - 0, /* flags */ - NULL, /* lock f() */ - NULL, /* lock f() arg */ - &fp->tx_bd_chain_tag)) { - BXE_PRINTF( - "%s(%d): Could not allocate fp[%d] TX descriptor chain DMA tag!\n", - __FILE__, __LINE__, i); - rc = ENOMEM; - goto bxe_dma_alloc_exit; + rc = bxe_dma_malloc(sc, BXE_RX_CHAIN_PAGE_SZ * + NUM_RCQ_PAGES, &fp->rcq_dma, BUS_DMA_NOWAIT, + "rcq chain pages"); + if (rc != 0) + goto bxe_host_structures_alloc_exit; + fp->rcq_chain = (union eth_rx_cqe *) fp->rcq_dma.vaddr; + + /* Link the CQ chain pages. */ + for (j = 1; j <= NUM_RCQ_PAGES; j++) { + struct eth_rx_cqe_next_page *nextpg = + (struct eth_rx_cqe_next_page *) + &fp->rcq_chain[TOTAL_RCQ_ENTRIES_PER_PAGE * j - 1]; + + busaddr = fp->rcq_dma.paddr + + BCM_PAGE_SIZE * (j % NUM_RCQ_PAGES); + nextpg->addr_hi = htole32(U64_HI(busaddr)); + nextpg->addr_lo = htole32(U64_LO(busaddr)); } - for (j = 0; j < NUM_TX_PAGES; j++) { - if (bus_dmamem_alloc(fp->tx_bd_chain_tag, - (void **)&fp->tx_bd_chain[j], BUS_DMA_NOWAIT, - &fp->tx_bd_chain_map[j])) { - BXE_PRINTF( - "%s(%d): Could not allocate fp[%d] TX descriptor chain DMA memory!\n", - __FILE__, __LINE__, i); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - bzero((char *)fp->tx_bd_chain[j], BXE_TX_CHAIN_PAGE_SZ); - - error = bus_dmamap_load(fp->tx_bd_chain_tag, - fp->tx_bd_chain_map[j], fp->tx_bd_chain[j], - BXE_TX_CHAIN_PAGE_SZ, bxe_dma_map_addr, - &busaddr, BUS_DMA_NOWAIT); - - if (error) { - BXE_PRINTF( - "%s(%d): Could not map fp[%d] TX descriptor chain DMA memory!\n", - __FILE__, __LINE__, i); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - /* Physical Address of each page in the Tx BD Chain. */ - fp->tx_bd_chain_paddr[j] = busaddr; - DBPRINT(sc, (BXE_EXTREME_LOAD | BXE_EXTREME_RESET), - "%s(): fp[%d]->tx_bd_chain_paddr[%d] = 0x%jX\n", - __FUNCTION__, i, j, (uintmax_t)busaddr); + /* + * Allocate SG chain. + */ + rc = bxe_dma_malloc(sc, BXE_RX_CHAIN_PAGE_SZ * + NUM_RX_SGE_PAGES, &fp->sg_dma, BUS_DMA_NOWAIT, + "sg chain pages"); + if (rc != 0) + goto bxe_host_structures_alloc_exit; + fp->sg_chain = (struct eth_rx_sge *) fp->sg_dma.vaddr; + + /* Link the SG chain pages. */ + for (j = 1; j <= NUM_RX_SGE_PAGES; j++) { + struct eth_rx_sge *nextpg = + &fp->sg_chain[TOTAL_RX_SGE_PER_PAGE * j - 2]; + + busaddr = fp->sg_dma.paddr + + BCM_PAGE_SIZE * (j % NUM_RX_SGE_PAGES); + nextpg->addr_hi = htole32(U64_HI(busaddr)); + nextpg->addr_lo = htole32(U64_LO(busaddr)); } /* @@ -13773,84 +13737,25 @@ bxe_dma_alloc(device_t dev) NULL, /* lock f() arg */ &fp->tx_mbuf_tag)) { BXE_PRINTF( - "%s(%d): Could not allocate fp[%d] TX mbuf DMA tag!\n", + "%s(%d): Could not allocate fp[%d] " + "TX mbuf DMA tag!\n", __FILE__, __LINE__, i); rc = ENOMEM; - goto bxe_dma_alloc_exit; + goto bxe_host_structures_alloc_exit; } /* Create DMA maps for each the TX mbuf cluster(ext buf). */ for (j = 0; j < TOTAL_TX_BD; j++) { if (bus_dmamap_create(fp->tx_mbuf_tag, BUS_DMA_NOWAIT, - &(fp->tx_mbuf_map[j]))) { - BXE_PRINTF( - "%s(%d): Unable to create fp[%d] TX mbuf DMA map!\n", - __FILE__, __LINE__, i); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - } - - /* - * Create a DMA tag for the RX buffer - * descriptor chain, allocate and clear - * the memory, and fetch the physical - * address of the blocks. - */ - if (bus_dma_tag_create(sc->parent_tag, - BCM_PAGE_SIZE, /* alignment for segs */ - BXE_DMA_BOUNDARY, /* cannot cross */ - BUS_SPACE_MAXADDR, /* restricted low */ - BUS_SPACE_MAXADDR, /* restricted hi */ - NULL, /* filter f() */ - NULL, /* filter f() arg */ - BXE_RX_CHAIN_PAGE_SZ,/* max map for this tag */ - 1, /* # of discontinuities */ - BXE_RX_CHAIN_PAGE_SZ,/* max seg size */ - 0, /* flags */ - NULL, /* lock f() */ - NULL, /* lock f() arg */ - &fp->rx_bd_chain_tag)) { - BXE_PRINTF( - "%s(%d): Could not allocate fp[%d] RX BD chain DMA tag!\n", - __FILE__, __LINE__, i); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - for (j = 0; j < NUM_RX_PAGES; j++) { - if (bus_dmamem_alloc(fp->rx_bd_chain_tag, - (void **)&fp->rx_bd_chain[j], BUS_DMA_NOWAIT, - &fp->rx_bd_chain_map[j])) { + &fp->tx_mbuf_map[j])) { BXE_PRINTF( - "%s(%d): Could not allocate fp[%d] RX BD chain[%d] DMA memory!\n", + "%s(%d): Unable to create fp[%02d]." + "tx_mbuf_map[%d] DMA map!\n", __FILE__, __LINE__, i, j); rc = ENOMEM; - goto bxe_dma_alloc_exit; + goto bxe_host_structures_alloc_exit; } - - bzero((char *)fp->rx_bd_chain[j], BXE_RX_CHAIN_PAGE_SZ); - - error = bus_dmamap_load(fp->rx_bd_chain_tag, - fp->rx_bd_chain_map[j], fp->rx_bd_chain[j], - BXE_RX_CHAIN_PAGE_SZ, bxe_dma_map_addr, &busaddr, - BUS_DMA_NOWAIT); - - if (error) { - BXE_PRINTF( - "%s(%d): Could not map fp[%d] RX BD chain[%d] DMA memory!\n", - __FILE__, __LINE__, i, j); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - /* Physical address of each page in the RX BD chain */ - fp->rx_bd_chain_paddr[j] = busaddr; - - DBPRINT(sc, (BXE_EXTREME_LOAD | BXE_EXTREME_RESET), - "%s(): fp[%d]->rx_bd_chain_paddr[%d] = 0x%jX\n", - __FUNCTION__, i, j, (uintmax_t)busaddr); } /* @@ -13871,431 +13776,152 @@ bxe_dma_alloc(device_t dev) NULL, /* lock f() arg */ &fp->rx_mbuf_tag)) { BXE_PRINTF( - "%s(%d): Could not allocate fp[%d] RX mbuf DMA tag!\n", + "%s(%d): Could not allocate fp[%02d] " + "RX mbuf DMA tag!\n", __FILE__, __LINE__, i); rc = ENOMEM; - goto bxe_dma_alloc_exit; + goto bxe_host_structures_alloc_exit; } /* Create DMA maps for the RX mbuf clusters. */ + if (bus_dmamap_create(fp->rx_mbuf_tag, + BUS_DMA_NOWAIT, &fp->rx_mbuf_spare_map)) { + BXE_PRINTF( + "%s(%d): Unable to create fp[%02d]." + "rx_mbuf_spare_map DMA map!\n", + __FILE__, __LINE__, i); + rc = ENOMEM; + goto bxe_host_structures_alloc_exit; + } + for (j = 0; j < TOTAL_RX_BD; j++) { if (bus_dmamap_create(fp->rx_mbuf_tag, - BUS_DMA_NOWAIT, &(fp->rx_mbuf_map[j]))) { + BUS_DMA_NOWAIT, &fp->rx_mbuf_map[j])) { BXE_PRINTF( - "%s(%d): Unable to create fp[%d] RX mbuf DMA map!\n", - __FILE__, __LINE__, i); + "%s(%d): Unable to create fp[%02d]." + "rx_mbuf_map[%d] DMA map!\n", + __FILE__, __LINE__, i, j); rc = ENOMEM; - goto bxe_dma_alloc_exit; + goto bxe_host_structures_alloc_exit; } } /* - * Create a DMA tag for the RX Completion - * Queue, allocate and clear the memory, - * map the memory into DMA space, and fetch - * the physical address of the block. + * Create a DMA tag for RX SGE bufs. */ - if (bus_dma_tag_create(sc->parent_tag, - BCM_PAGE_SIZE, /* alignment for segs */ - BXE_DMA_BOUNDARY, /* cannot cross */ - BUS_SPACE_MAXADDR, /* restricted low */ - BUS_SPACE_MAXADDR, /* restricted hi */ - NULL, /* filter f() */ - NULL, /* filter f() arg */ - BXE_RX_CHAIN_PAGE_SZ,/* max map for this tag */ - 1, /* # of discontinuities */ - BXE_RX_CHAIN_PAGE_SZ,/* max seg size */ - 0, /* flags */ - NULL, /* lock f() */ - NULL, /* lock f() arg */ - &fp->rx_cq_chain_tag)) { + if (bus_dma_tag_create(sc->parent_tag, 1, + BXE_DMA_BOUNDARY, BUS_SPACE_MAXADDR, + BUS_SPACE_MAXADDR, NULL, NULL, PAGE_SIZE, 1, + PAGE_SIZE, 0, NULL, NULL, &fp->rx_sge_buf_tag)) { BXE_PRINTF( - "%s(%d): Could not allocate fp[%d] RX Completion Queue DMA tag!\n", + "%s(%d): Could not allocate fp[%02d] " + "RX SGE mbuf DMA tag!\n", __FILE__, __LINE__, i); rc = ENOMEM; - goto bxe_dma_alloc_exit; + goto bxe_host_structures_alloc_exit; } - for (j = 0; j < NUM_RCQ_PAGES; j++) { - if (bus_dmamem_alloc(fp->rx_cq_chain_tag, - (void **)&fp->rx_cq_chain[j], BUS_DMA_NOWAIT, - &fp->rx_cq_chain_map[j])) { - BXE_PRINTF( - "%s(%d): Could not allocate fp[%d] RX Completion Queue DMA memory!\n", - __FILE__, __LINE__, i); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - bzero((char *)fp->rx_cq_chain[j], - BXE_RX_CHAIN_PAGE_SZ); - - error = bus_dmamap_load(fp->rx_cq_chain_tag, - fp->rx_cq_chain_map[j], fp->rx_cq_chain[j], - BXE_RX_CHAIN_PAGE_SZ, bxe_dma_map_addr, &busaddr, - BUS_DMA_NOWAIT); - - if (error) { - BXE_PRINTF( - "%s(%d): Could not map fp[%d] RX Completion Queue DMA memory!\n", - __FILE__, __LINE__, i); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - /* - * Physical address of each page in the RX - * Completion Chain. - */ - fp->rx_cq_chain_paddr[j] = busaddr; - - DBPRINT(sc, (BXE_EXTREME_LOAD | BXE_EXTREME_RESET), - "%s(): fp[%d]->rx_cq_chain_paddr[%d] = 0x%jX\n", - __FUNCTION__, i, j, (uintmax_t)busaddr); + /* Create DMA maps for the SGE mbuf clusters. */ + if (bus_dmamap_create(fp->rx_sge_buf_tag, + BUS_DMA_NOWAIT, &fp->rx_sge_spare_map)) { + BXE_PRINTF( + "%s(%d): Unable to create fp[%02d]." + "rx_sge_spare_map DMA map!\n", + __FILE__, __LINE__, i); + rc = ENOMEM; + goto bxe_host_structures_alloc_exit; } - if (TPA_ENABLED(sc)) { - int tpa_pool_max = CHIP_IS_E1H(sc) ? - ETH_MAX_AGGREGATION_QUEUES_E1H : - ETH_MAX_AGGREGATION_QUEUES_E1; - - /* - * Create a DMA tag for the RX SGE Ring, - * allocate and clear the memory, map the - * memory into DMA space, and fetch the - * physical address of the block. - */ - if (bus_dma_tag_create(sc->parent_tag, - BCM_PAGE_SIZE, /* alignment for segs */ - BXE_DMA_BOUNDARY, /* cannot cross */ - BUS_SPACE_MAXADDR, /* restricted low */ - BUS_SPACE_MAXADDR, /* restricted hi */ - NULL, /* filter f() */ - NULL, /* filter f() arg */ - BXE_RX_CHAIN_PAGE_SZ,/* max map for this tag */ - 1, /* # of discontinuities */ - BXE_RX_CHAIN_PAGE_SZ,/* max seg size */ - 0, /* flags */ - NULL, /* lock f() */ - NULL, /* lock f() arg */ - &fp->rx_sge_chain_tag)) { + for (j = 0; j < TOTAL_RX_SGE; j++) { + if (bus_dmamap_create(fp->rx_sge_buf_tag, + BUS_DMA_NOWAIT, &fp->rx_sge_buf_map[j])) { BXE_PRINTF( - "%s(%d): Could not allocate fp[%d] RX SGE descriptor chain DMA tag!\n", - __FILE__, __LINE__, i); + "%s(%d): Unable to create fp[%02d]." + "rx_sge_buf_map[%d] DMA map!\n", + __FILE__, __LINE__, i, j); rc = ENOMEM; - goto bxe_dma_alloc_exit; + goto bxe_host_structures_alloc_exit; } + } - for (j = 0; j < NUM_RX_SGE_PAGES; j++) { - if (bus_dmamem_alloc(fp->rx_sge_chain_tag, - (void **)&fp->rx_sge_chain[j], - BUS_DMA_NOWAIT, &fp->rx_sge_chain_map[j])) { - BXE_PRINTF( - "%s(%d): Could not allocate fp[%d] RX SGE chain[%d] DMA memory!\n", - __FILE__, __LINE__, i, j); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - bzero((char *)fp->rx_sge_chain[j], - BXE_RX_CHAIN_PAGE_SZ); - - error = bus_dmamap_load(fp->rx_sge_chain_tag, - fp->rx_sge_chain_map[j], - fp->rx_sge_chain[j], BXE_RX_CHAIN_PAGE_SZ, - bxe_dma_map_addr, &busaddr, BUS_DMA_NOWAIT); - - if (error) { - BXE_PRINTF( - "%s(%d): Could not map fp[%d] RX SGE chain[%d] DMA memory!\n", - __FILE__, __LINE__, i, j); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - /* - * Physical address of each page in the RX - * SGE chain. - */ - DBPRINT(sc, - (BXE_EXTREME_LOAD | BXE_EXTREME_RESET), - "%s(): fp[%d]->rx_sge_chain_paddr[%d] = 0x%jX\n", - __FUNCTION__, i, j, (uintmax_t)busaddr); - fp->rx_sge_chain_paddr[j] = busaddr; - } + /* Create DMA maps for the TPA pool mbufs. */ + if (bus_dmamap_create(fp->rx_mbuf_tag, + BUS_DMA_NOWAIT, &fp->tpa_mbuf_spare_map)) { + BXE_PRINTF( + "%s(%d): Unable to create fp[%02d]." + "tpa_mbuf_spare_map DMA map!\n", + __FILE__, __LINE__, i); + rc = ENOMEM; + goto bxe_host_structures_alloc_exit; + } - /* - * Create a DMA tag for RX SGE bufs. - */ - if (bus_dma_tag_create(sc->parent_tag, 1, - BXE_DMA_BOUNDARY, BUS_SPACE_MAXADDR, - BUS_SPACE_MAXADDR, NULL, NULL, PAGE_SIZE, 1, - PAGE_SIZE, 0, NULL, NULL, &fp->rx_sge_buf_tag)) { + for (j = 0; j < max_agg_queues; j++) { + if (bus_dmamap_create(fp->rx_mbuf_tag, + BUS_DMA_NOWAIT, &fp->tpa_mbuf_map[j])) { BXE_PRINTF( - "%s(%d): Could not allocate fp[%d] RX SGE mbuf DMA tag!\n", - __FILE__, __LINE__, i); + "%s(%d): Unable to create fp[%02d]." + "tpa_mbuf_map[%d] DMA map!\n", + __FILE__, __LINE__, i, j); rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - /* Create DMA maps for the SGE mbuf clusters. */ - for (j = 0; j < TOTAL_RX_SGE; j++) { - if (bus_dmamap_create(fp->rx_sge_buf_tag, - BUS_DMA_NOWAIT, &(fp->rx_sge_buf_map[j]))) { - BXE_PRINTF( - "%s(%d): Unable to create fp[%d] RX SGE mbuf DMA map!\n", - __FILE__, __LINE__, i); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - } - - /* Create DMA maps for the TPA pool mbufs. */ - for (j = 0; j < tpa_pool_max; j++) { - if (bus_dmamap_create(fp->rx_mbuf_tag, - BUS_DMA_NOWAIT, &(fp->tpa_mbuf_map[j]))) { - BXE_PRINTF( - "%s(%d): Unable to create fp[%d] TPA DMA map!\n", - __FILE__, __LINE__, i); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } + goto bxe_host_structures_alloc_exit; } } + + bxe_init_sge_ring_bit_mask(fp); } /* - * Create a DMA tag for the def_status block, allocate and clear the - * memory, map the memory into DMA space, and fetch the physical - * address of the block. + * Allocate default status block. */ - if (bus_dma_tag_create(sc->parent_tag, BCM_PAGE_SIZE, BXE_DMA_BOUNDARY, - BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, - BXE_DEF_STATUS_BLK_SZ, 1, BXE_DEF_STATUS_BLK_SZ, 0, NULL, NULL, - &sc->def_status_block_tag)) { - BXE_PRINTF( - "%s(%d): Could not allocate def_status block DMA tag!\n", - __FILE__, __LINE__); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - if (bus_dmamem_alloc(sc->def_status_block_tag, - (void **)&sc->def_status_block, BUS_DMA_NOWAIT, - &sc->def_status_block_map)) { - BXE_PRINTF( - "%s(%d): Could not allocate def_status block DMA memory!\n", - __FILE__, __LINE__); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - bzero((char *)sc->def_status_block, BXE_DEF_STATUS_BLK_SZ); - - error = bus_dmamap_load(sc->def_status_block_tag, - sc->def_status_block_map, sc->def_status_block, - BXE_DEF_STATUS_BLK_SZ, bxe_dma_map_addr, &busaddr, BUS_DMA_NOWAIT); - - if (error) { - BXE_PRINTF( - "%s(%d): Could not map def_status block DMA memory!\n", - __FILE__, __LINE__); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - /* Physical Address of Default Status Block. */ - sc->def_status_block_paddr = busaddr; - DBPRINT(sc, (BXE_EXTREME_LOAD | BXE_EXTREME_RESET), - "%s(): Default status block physical address = 0x%08X\n", - __FUNCTION__, (uint32_t)sc->def_status_block_paddr); + rc = bxe_dma_malloc(sc, BXE_DEF_STATUS_BLK_SZ, &sc->def_sb_dma, + BUS_DMA_NOWAIT, "default status block"); + if (rc != 0) + goto bxe_host_structures_alloc_exit; + sc->def_sb = (struct host_def_status_block *) sc->def_sb_dma.vaddr; /* - * Create a DMA tag for the statistics block, allocate and clear the - * memory, map the memory into DMA space, and fetch the physical - * address of the block. + * Allocate statistics block. */ - if (bus_dma_tag_create(sc->parent_tag, BXE_DMA_ALIGN, BXE_DMA_BOUNDARY, - BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BXE_STATS_BLK_SZ, - 1, BXE_STATS_BLK_SZ, 0, NULL, NULL, &sc->stats_tag)) { - BXE_PRINTF( - "%s(%d): Could not allocate statistics block DMA tag!\n", - __FILE__, __LINE__); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - if (bus_dmamem_alloc(sc->stats_tag, (void **)&sc->stats_block, - BUS_DMA_NOWAIT, &sc->stats_map)) { - BXE_PRINTF( - "%s(%d): Could not allocate statistics block DMA memory!\n", - __FILE__, __LINE__); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - bzero((char *)sc->stats_block, BXE_STATS_BLK_SZ); - - error = bus_dmamap_load(sc->stats_tag, sc->stats_map, sc->stats_block, - BXE_STATS_BLK_SZ, bxe_dma_map_addr, &busaddr, BUS_DMA_NOWAIT); - - if (error) { - BXE_PRINTF( - "%s(%d): Could not map statistics block DMA memory!\n", - __FILE__, __LINE__); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - /* Physical Address of Statistics Block. */ - sc->stats_block_paddr = busaddr; - DBPRINT(sc, (BXE_EXTREME_LOAD | BXE_EXTREME_RESET), - "%s(): Statistics block physical address = 0x%08X\n", - __FUNCTION__, (uint32_t)sc->stats_block_paddr); + rc = bxe_dma_malloc(sc, BXE_STATS_BLK_SZ, &sc->stats_dma, + BUS_DMA_NOWAIT, "statistics block"); + if (rc != 0) + goto bxe_host_structures_alloc_exit; + sc->stats = (struct statistics_block *) sc->stats_dma.vaddr; /* - * Create a DMA tag for slowpath memory, allocate and clear the - * memory, map the memory into DMA space, and fetch the physical - * address of the block. + * Allocate slowpath block. */ - if (bus_dma_tag_create(sc->parent_tag, BCM_PAGE_SIZE, BXE_DMA_BOUNDARY, - BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BXE_SLOWPATH_SZ, - 1, BXE_SLOWPATH_SZ, 0, NULL, NULL, &sc->slowpath_tag)) { - BXE_PRINTF( - "%s(%d): Could not allocate slowpath DMA tag!\n", - __FILE__, __LINE__); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - if (bus_dmamem_alloc(sc->slowpath_tag, (void **)&sc->slowpath, - BUS_DMA_NOWAIT, &sc->slowpath_map)) { - BXE_PRINTF( - "%s(%d): Could not allocate slowpath DMA memory!\n", - __FILE__, __LINE__); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - bzero((char *)sc->slowpath, BXE_SLOWPATH_SZ); - - error = bus_dmamap_load(sc->slowpath_tag, sc->slowpath_map, - sc->slowpath, BXE_SLOWPATH_SZ, bxe_dma_map_addr, &busaddr, - BUS_DMA_NOWAIT); - - if (error) { - BXE_PRINTF("%s(%d): Could not map slowpath DMA memory!\n", - __FILE__, __LINE__); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - /* Physical Address For Slow Path Context. */ - sc->slowpath_paddr = busaddr; - DBPRINT(sc, (BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET), - "%s(): Slowpath context physical address = 0x%08X\n", - __FUNCTION__, (uint32_t)sc->slowpath_paddr); + rc = bxe_dma_malloc(sc, BXE_SLOWPATH_SZ, &sc->slowpath_dma, + BUS_DMA_NOWAIT, "slowpath block"); + if (rc != 0) + goto bxe_host_structures_alloc_exit; + sc->slowpath = (struct bxe_slowpath *) sc->slowpath_dma.vaddr; /* - * Create a DMA tag for the Slow Path Queue, allocate and clear the - * memory, map the memory into DMA space, and fetch the physical - * address of the block. + * Allocate slowpath queue. */ - if (bus_dma_tag_create(sc->parent_tag, BCM_PAGE_SIZE, BXE_DMA_BOUNDARY, - BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BXE_SPQ_SZ, 1, - BXE_SPQ_SZ, 0, NULL, NULL, &sc->spq_tag)) { - BXE_PRINTF("%s(%d): Could not allocate SPQ DMA tag!\n", - __FILE__, __LINE__); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - if (bus_dmamem_alloc(sc->spq_tag, (void **)&sc->spq, BUS_DMA_NOWAIT, - &sc->spq_map)) { - BXE_PRINTF("%s(%d): Could not allocate SPQ DMA memory!\n", - __FILE__, __LINE__); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - bzero((char *)sc->spq, BXE_SPQ_SZ); - - error = bus_dmamap_load(sc->spq_tag, sc->spq_map, sc->spq, BXE_SPQ_SZ, - bxe_dma_map_addr, &busaddr, BUS_DMA_NOWAIT); - - if (error) { - BXE_PRINTF("%s(%d): Could not map SPQ DMA memory!\n", - __FILE__, __LINE__); - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - - /* Physical address of slow path queue. */ - sc->spq_paddr = busaddr; - DBPRINT(sc, (BXE_EXTREME_LOAD | BXE_EXTREME_RESET), - "%s(): Slowpath queue physical address = 0x%08X\n", - __FUNCTION__, (uint32_t)sc->spq_paddr); - - if (bxe_gunzip_init(sc)) { - rc = ENOMEM; - goto bxe_dma_alloc_exit; - } - -bxe_dma_alloc_exit: - DBEXIT(BXE_VERBOSE_RESET); - return (rc); -} - -/* - * Allocate DMA memory used for the firmware gunzip memory. - * - * Returns: - * 0 for success, !0 = Failure. - */ - -static int -bxe_dmamem_alloc(struct bxe_softc *sc, bus_dma_tag_t tag, bus_dmamap_t map, - void *buf, uint32_t buflen, bus_addr_t *busaddr) -{ - int rc; - - rc = 0; - - DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); + rc = bxe_dma_malloc(sc, BXE_SPQ_SZ, &sc->spq_dma, + BUS_DMA_NOWAIT, "slowpath queue"); + if (rc != 0) + goto bxe_host_structures_alloc_exit; + sc->spq = (struct eth_spe *) sc->spq_dma.vaddr; /* - * Create a DMA tag for the block, allocate and clear the - * memory, map the memory into DMA space, and fetch the physical - * address of the block. + * Allocate firmware decompression buffer. */ - if (bus_dma_tag_create(sc->parent_tag, BXE_DMA_ALIGN, BXE_DMA_BOUNDARY, - BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, buflen, 1, buflen, - 0, NULL, NULL, &sc->gunzip_tag)) { - BXE_PRINTF("%s(%d): Could not allocate DMA tag!\n", - __FILE__, __LINE__); - rc = ENOMEM; - goto bxe_dmamem_alloc_exit; - } - - if (bus_dmamem_alloc(sc->gunzip_tag, (void **)&sc->gunzip_buf, - BUS_DMA_NOWAIT, &sc->gunzip_map)) { - BXE_PRINTF("%s(%d): Could not allocate DMA memory!\n", - __FILE__, __LINE__); - rc = ENOMEM; - goto bxe_dmamem_alloc_exit; + rc = bxe_dma_malloc(sc, BXE_FW_BUF_SIZE, &sc->gz_dma, + BUS_DMA_NOWAIT, "gunzip buffer"); + if (rc != 0) + goto bxe_host_structures_alloc_exit; + sc->gz = sc->gz_dma.vaddr; + if (sc->strm == NULL) { + goto bxe_host_structures_alloc_exit; } - bzero((char *)sc->gunzip_buf, buflen); - - if (bus_dmamap_load(sc->gunzip_tag, sc->gunzip_map, sc->gunzip_buf, - buflen, bxe_dma_map_addr, busaddr, BUS_DMA_NOWAIT)) { - BXE_PRINTF("%s(%d): Could not map DMA memory!\n", - __FILE__, __LINE__); - rc = ENOMEM; - } + sc->strm = malloc(sizeof(*sc->strm), M_DEVBUF, M_NOWAIT); -bxe_dmamem_alloc_exit: - DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET); +bxe_host_structures_alloc_exit: + DBEXIT(BXE_VERBOSE_RESET); return (rc); } @@ -14313,7 +13939,7 @@ bxe_set_mac_addr_e1(struct bxe_softc *sc, int set) uint8_t *eaddr; int port; - DBENTER(BXE_VERBOSE_MISC); + DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); config = BXE_SP(sc, mac_config); port = BP_PORT(sc); @@ -14371,7 +13997,7 @@ bxe_set_mac_addr_e1(struct bxe_softc *sc, int set) U64_HI(BXE_SP_MAPPING(sc, mac_config)), U64_LO(BXE_SP_MAPPING(sc, mac_config)), 0); - DBEXIT(BXE_VERBOSE_MISC); + DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); } /* @@ -14388,7 +14014,7 @@ bxe_set_mac_addr_e1h(struct bxe_softc *sc, int set) uint8_t *eaddr; int func, port; - DBENTER(BXE_VERBOSE_MISC); + DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); config = (struct mac_configuration_cmd_e1h *)BXE_SP(sc, mac_config); port = BP_PORT(sc); @@ -14428,7 +14054,7 @@ bxe_set_mac_addr_e1h(struct bxe_softc *sc, int set) config_table->flags = MAC_CONFIGURATION_ENTRY_E1H_ACTION_TYPE; - DBPRINT(sc, BXE_VERBOSE_MISC, + DBPRINT(sc, BXE_VERBOSE, "%s(): %s MAC (%04x:%04x:%04x), E1HOV = %d, CLID = %d\n", __FUNCTION__, (set ? "Setting" : "Clearing"), config_table->msb_mac_addr, config_table->middle_mac_addr, @@ -14439,7 +14065,7 @@ bxe_set_mac_addr_e1h(struct bxe_softc *sc, int set) U64_LO(BXE_SP_MAPPING(sc, mac_config)), 0); bxe_set_mac_addr_e1h_exit: - DBEXIT(BXE_VERBOSE_MISC); + DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); } /* @@ -14481,24 +14107,14 @@ bxe_set_rx_mode(struct bxe_softc *sc) * multicast address filtering. */ if (ifp->if_flags & IFF_PROMISC) { - DBPRINT(sc, BXE_VERBOSE_MISC, - "%s(): Enabling promiscuous mode.\n", __FUNCTION__); - /* Enable promiscuous mode. */ rx_mode = BXE_RX_MODE_PROMISC; } else if (ifp->if_flags & IFF_ALLMULTI || ifp->if_amcount > BXE_MAX_MULTICAST) { - DBPRINT(sc, BXE_VERBOSE_MISC, - "%s(): Enabling all multicast mode.\n", __FUNCTION__); - /* Enable all multicast addresses. */ rx_mode = BXE_RX_MODE_ALLMULTI; } else { /* Enable selective multicast mode. */ - DBPRINT(sc, BXE_VERBOSE_MISC, - "%s(): Enabling selective multicast mode.\n", - __FUNCTION__); - if (CHIP_IS_E1(sc)) { i = 0; config = BXE_SP(sc, mcast_config); @@ -14608,7 +14224,6 @@ bxe_reset_func(struct bxe_softc *sc) /* Configure IGU. */ REG_WR(sc, HC_REG_LEADING_EDGE_0 + port * 8, 0); REG_WR(sc, HC_REG_TRAILING_EDGE_0 + port * 8, 0); - REG_WR(sc, HC_REG_CONFIG_0 + (port * 4), 0x1000); /* Clear ILT. */ @@ -14670,8 +14285,10 @@ bxe_reset_common(struct bxe_softc *sc) DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); - REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR, 0xd3ffff7f); - REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR, 0x1403); + REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR, + 0xd3ffff7f); + REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR, + 0x1403); DBEXIT(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); } @@ -14687,7 +14304,6 @@ bxe_reset_chip(struct bxe_softc *sc, uint32_t reset_code) { DBENTER(BXE_VERBOSE_LOAD | BXE_VERBOSE_RESET | BXE_VERBOSE_UNLOAD); - DBRUNLV(BXE_INFO, bxe_decode_mb_msgs(sc, 0, reset_code)); switch (reset_code) { case FW_MSG_CODE_DRV_UNLOAD_COMMON: @@ -14712,10 +14328,12 @@ bxe_reset_chip(struct bxe_softc *sc, uint32_t reset_code) } /* - * Called by the OS to set media options (link, speed, etc.). + * Called by the OS to set media options (link, speed, etc.) + * when the user specifies "ifconfig bxe media XXX" or + * "ifconfig bxe mediaopt XXX". * * Returns: - * 0 = Success, positive value for failure. + * 0 = Success, !0 = Failure */ static int bxe_ifmedia_upd(struct ifnet *ifp) @@ -14730,44 +14348,32 @@ bxe_ifmedia_upd(struct ifnet *ifp) ifm = &sc->bxe_ifmedia; rc = 0; - /* This is an Ethernet controller. */ + /* We only support Ethernet media type. */ if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) { rc = EINVAL; goto bxe_ifmedia_upd_exit; } - BXE_CORE_LOCK(sc); - switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: - DBPRINT(sc, BXE_VERBOSE_PHY, - "%s(): Media set to IFM_AUTO, restarting autonegotiation.\n", - __FUNCTION__); + /* ToDo: What to do here? */ + /* Doing nothing translates to success here. */ break; case IFM_10G_CX4: - DBPRINT(sc, BXE_VERBOSE_PHY, - "%s(): Media set to IFM_10G_CX4, forced mode.\n", __FUNCTION__); - break; + /* Fall-through */ case IFM_10G_SR: - DBPRINT(sc, BXE_VERBOSE_PHY, - "%s(): Media set to IFM_10G_SR, forced mode.\n", __FUNCTION__); - break; + /* Fall-through */ case IFM_10G_T: - DBPRINT(sc, BXE_VERBOSE_PHY, - "%s(): Media set to IFM_10G_T, forced mode.\n", __FUNCTION__); - break; + /* Fall-through */ case IFM_10G_TWINAX: - DBPRINT(sc, BXE_VERBOSE_PHY, - "%s(): Media set to IFM_10G_TWINAX, forced mode.\n", __FUNCTION__); - break; + /* Fall-through */ default: + /* We don't support channging the media type. */ DBPRINT(sc, BXE_WARN, "%s(): Invalid media type!\n", __FUNCTION__); rc = EINVAL; } - BXE_CORE_UNLOCK(sc); - bxe_ifmedia_upd_exit: DBENTER(BXE_VERBOSE_PHY); return (rc); @@ -14789,7 +14395,7 @@ bxe_ifmedia_status(struct ifnet *ifp, struct ifmediareq *ifmr) DBENTER(BXE_EXTREME_LOAD | BXE_EXTREME_RESET); /* Report link down if the driver isn't running. */ - if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { ifmr->ifm_active |= IFM_NONE; goto bxe_ifmedia_status_exit; } @@ -14824,13 +14430,13 @@ bxe_ifmedia_status_exit: * None. */ static __inline void -bxe_update_last_max_sge(struct bxe_fastpath *fp, uint16_t idx) +bxe_update_last_max_sge(struct bxe_fastpath *fp, uint16_t index) { uint16_t last_max; last_max = fp->last_max_sge; - if (SUB_S16(idx, last_max) > 0) - fp->last_max_sge = idx; + if (SUB_S16(index, last_max) > 0) + fp->last_max_sge = index; } /* @@ -14842,13 +14448,13 @@ bxe_update_last_max_sge(struct bxe_fastpath *fp, uint16_t idx) static void bxe_clear_sge_mask_next_elems(struct bxe_fastpath *fp) { - int i, idx, j; + int i, index, j; - for (i = 1; i <= NUM_RX_SGE_PAGES; i++) { - idx = RX_SGE_CNT * i - 1; + for (i = 0; i < NUM_RX_SGE_PAGES; i++) { + index = i * TOTAL_RX_SGE_PER_PAGE + USABLE_RX_SGE_PER_PAGE; for (j = 0; j < 2; j++) { - SGE_MASK_CLEAR_BIT(fp, idx); - idx--; + SGE_MASK_CLEAR_BIT(fp, index); + index++; } } } @@ -14864,7 +14470,7 @@ bxe_update_sge_prod(struct bxe_fastpath *fp, struct eth_fast_path_rx_cqe *fp_cqe) { struct bxe_softc *sc; - uint16_t delta, last_max, last_elem, first_elem, sge_len; + uint16_t delta, first_elem, last_max, last_elem, sge_len; int i; sc = fp->sc; @@ -14874,7 +14480,7 @@ bxe_update_sge_prod(struct bxe_fastpath *fp, sge_len = SGE_PAGE_ALIGN(le16toh(fp_cqe->pkt_len) - le16toh(fp_cqe->len_on_bd)) >> SGE_PAGE_SHIFT; if (!sge_len) - return; + goto bxe_update_sge_prod_exit; /* First mark all used pages. */ for (i = 0; i < sge_len; i++) @@ -14893,10 +14499,10 @@ bxe_update_sge_prod(struct bxe_fastpath *fp, /* Now update the producer index. */ for (i = first_elem; i != last_elem; i = NEXT_SGE_MASK_ELEM(i)) { - if (fp->sge_mask[i]) + if (fp->rx_sge_mask[i]) break; - fp->sge_mask[i] = RX_SGE_MASK_ELEM_ONE_MASK; + fp->rx_sge_mask[i] = RX_SGE_MASK_ELEM_ONE_MASK; delta += RX_SGE_MASK_ELEM_SZ; } @@ -14906,16 +14512,18 @@ bxe_update_sge_prod(struct bxe_fastpath *fp, bxe_clear_sge_mask_next_elems(fp); } +bxe_update_sge_prod_exit: DBEXIT(BXE_EXTREME_RECV); } /* * Initialize scatter gather ring bitmask. * - * Elements may be taken from the scatter gather ring out of order since - * TCP frames may be out of order or intermingled among multiple TCP - * flows on the wire. The SGE bitmask tracks which elements are used - * or available. + * Each entry in the SGE is associated with an aggregation in process. + * Since there is no guarantee that all Ethernet frames associated with + * a partciular TCP flow will arrive at the adapter and be placed into + * the SGE chain contiguously, we maintain a bitmask for each SGE element + * that identifies which aggregation an Ethernet frame belongs to. * * Returns: * None @@ -14925,13 +14533,15 @@ bxe_init_sge_ring_bit_mask(struct bxe_fastpath *fp) { /* Set the mask to all 1s, it's faster to compare to 0 than to 0xf. */ - memset(fp->sge_mask, 0xff, + memset(fp->rx_sge_mask, 0xff, (TOTAL_RX_SGE >> RX_SGE_MASK_ELEM_SHIFT) * sizeof(uint64_t)); /* - * Clear the two last indices in the page to 1. These are the - * indices that correspond to the "next" element which will - * never be indicated and should be removed from calculations. + * The SGE chain is formatted just like the RX chain. + * The last two elements are reserved as a "next page pointer" + * to the next page of SGE elements. Clear the last two + * elements in each SGE chain page since they will never be + * used to track an aggregation. */ bxe_clear_sge_mask_next_elems(fp); } @@ -14948,32 +14558,55 @@ static void bxe_tpa_start(struct bxe_fastpath *fp, uint16_t queue, uint16_t cons, uint16_t prod) { - struct bxe_softc *sc = fp->sc; + struct bxe_softc *sc; struct mbuf *m_temp; struct eth_rx_bd *rx_bd; bus_dmamap_t map_temp; + int max_agg_queues; sc = fp->sc; - DBENTER(BXE_EXTREME_RECV); + DBENTER(BXE_INSANE_RECV | BXE_INSANE_TPA); - /* Move the empty mbuf and mapping from the TPA pool. */ + + + DBPRINT(sc, BXE_EXTREME_TPA, + "%s(): fp[%02d].tpa[%02d], cons=0x%04X, prod=0x%04X\n", + __FUNCTION__, fp->index, queue, cons, prod); + + max_agg_queues = CHIP_IS_E1(sc) ? ETH_MAX_AGGREGATION_QUEUES_E1 : + ETH_MAX_AGGREGATION_QUEUES_E1H; + + DBRUNIF((queue > max_agg_queues), + BXE_PRINTF("%s(): fp[%02d] illegal aggregation (%d > %d)!\n", + __FUNCTION__, fp->index, queue, max_agg_queues)); + + DBRUNIF((fp->tpa_state[queue] != BXE_TPA_STATE_STOP), + BXE_PRINTF("%s(): Starting aggregation on " + "fp[%02d].tpa[%02d] even though queue is not in the " + "TPA_STOP state!\n", __FUNCTION__, fp->index, queue)); + + /* Remove the existing mbuf and mapping from the TPA pool. */ m_temp = fp->tpa_mbuf_ptr[queue]; map_temp = fp->tpa_mbuf_map[queue]; + /* Only the paranoid survive! */ + if(m_temp == NULL) { + BXE_PRINTF("%s(%d): fp[%02d].tpa[%02d] not allocated!\n", + __FILE__, __LINE__, fp->index, queue); + /* ToDo: Additional error handling! */ + goto bxe_tpa_start_exit; + } + /* Move received mbuf and mapping to TPA pool. */ fp->tpa_mbuf_ptr[queue] = fp->rx_mbuf_ptr[cons]; fp->tpa_mbuf_map[queue] = fp->rx_mbuf_map[cons]; - DBRUNIF((fp->tpa_state[queue] != BXE_TPA_STATE_STOP), - DBPRINT(sc, BXE_FATAL, "%s(): Starting bin[%d] even though queue " - "is not in the TPA_STOP state!\n", __FUNCTION__, queue)); - /* Place the TPA bin into the START state. */ fp->tpa_state[queue] = BXE_TPA_STATE_START; DBRUN(fp->tpa_queue_used |= (1 << queue)); /* Get the rx_bd for the next open entry on the receive chain. */ - rx_bd = &fp->rx_bd_chain[RX_PAGE(prod)][RX_IDX(prod)]; + rx_bd = &fp->rx_chain[prod]; /* Update the rx_bd with the empty mbuf from the TPA pool. */ rx_bd->addr_hi = htole32(U64_HI(fp->tpa_mbuf_segs[queue].ds_addr)); @@ -14981,13 +14614,14 @@ bxe_tpa_start(struct bxe_fastpath *fp, uint16_t queue, uint16_t cons, fp->rx_mbuf_ptr[prod] = m_temp; fp->rx_mbuf_map[prod] = map_temp; - DBEXIT(BXE_EXTREME_RECV); +bxe_tpa_start_exit: + DBEXIT(BXE_INSANE_RECV | BXE_INSANE_TPA); } /* * When a TPA aggregation is completed, loop through the individual mbufs * of the aggregation, combining them into a single mbuf which will be sent - * up the stack. Refill all mbufs freed as we go along. + * up the stack. Refill all freed SGEs with mbufs as we go along. * * Returns: * 0 = Success, !0 = Failure. @@ -14996,22 +14630,27 @@ static int bxe_fill_frag_mbuf(struct bxe_softc *sc, struct bxe_fastpath *fp, struct mbuf *m, struct eth_fast_path_rx_cqe *fp_cqe, uint16_t cqe_idx) { + struct mbuf *m_frag; uint32_t frag_len, frag_size, pages, i; uint16_t sge_idx, len_on_bd; - int rc, j; + int j, rc; - DBENTER(BXE_EXTREME_RECV); + DBENTER(BXE_EXTREME_RECV | BXE_EXTREME_TPA); rc = 0; len_on_bd = le16toh(fp_cqe->len_on_bd); frag_size = le16toh(fp_cqe->pkt_len) - len_on_bd; pages = SGE_PAGE_ALIGN(frag_size) >> SGE_PAGE_SHIFT; + DBPRINT(sc, BXE_VERBOSE_TPA, + "%s(): len_on_bd=%d, frag_size=%d, pages=%d\n", + __FUNCTION__, len_on_bd, frag_size, pages); + /* Make sure the aggregated frame is not too big to handle. */ if (pages > 8 * PAGES_PER_SGE) { DBPRINT(sc, BXE_FATAL, - "%s(): SGL length (%d) is too long! CQE index is %d\n", - __FUNCTION__, pages, cqe_idx); + "%s(): fp[%02d].rx_sge[0x%04X] has too many pages (%d)!\n", + __FUNCTION__, fp->index, cqe_idx, pages); DBPRINT(sc, BXE_FATAL, "%s(): fp_cqe->pkt_len = %d fp_cqe->len_on_bd = %d\n", __FUNCTION__, le16toh(fp_cqe->pkt_len), len_on_bd); @@ -15021,7 +14660,7 @@ bxe_fill_frag_mbuf(struct bxe_softc *sc, struct bxe_fastpath *fp, } /* - * Run through the scatter gather list, pulling the individual + * Scan through the scatter gather list, pulling individual * mbufs into a single mbuf for the host stack. */ for (i = 0, j = 0; i < pages; i += PAGES_PER_SGE, j++) { @@ -15035,38 +14674,37 @@ bxe_fill_frag_mbuf(struct bxe_softc *sc, struct bxe_fastpath *fp, frag_len = min(frag_size, (uint32_t)(BCM_PAGE_SIZE * PAGES_PER_SGE)); - /* Update the mbuf with the fragment length. */ - fp->rx_sge_buf_ptr[sge_idx]->m_len = frag_len; + DBPRINT(sc, BXE_VERBOSE_TPA, + "%s(): i=%d, j=%d, frag_size=%d, frag_len=%d\n", + __FUNCTION__, i, j, frag_size, frag_len); - /* Unmap the mbuf from DMA space. */ - bus_dmamap_sync(fp->rx_sge_buf_tag, fp->rx_sge_buf_map[sge_idx], - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(fp->rx_sge_buf_tag, - fp->rx_sge_buf_map[sge_idx]); + m_frag = fp->rx_sge_buf_ptr[sge_idx]; - /* Concatenate the current fragment to the aggregated mbuf. */ - m_cat(m, fp->rx_sge_buf_ptr[sge_idx]); + /* Allocate a new mbuf for the SGE. */ + rc = bxe_alloc_rx_sge_mbuf(fp, sge_idx); + if (rc) { + /* + * Leave all remaining SGEs in the ring. + */ + goto bxe_fill_frag_mbuf_exit; + } - /* The SGE mbuf was freed in the call to m_cat(). */ - DBRUN(fp->sge_mbuf_alloc--); - fp->rx_sge_buf_ptr[sge_idx] = NULL; + /* Update the fragment its length. */ + m_frag->m_len = frag_len; - /* - * Try an allocate a new mbuf for the SGE that was just - * released. If an allocation error occurs stop where we - * are and drop the whole frame. - */ - rc = bxe_alloc_rx_sge(sc, fp, sge_idx); - if (rc) - goto bxe_fill_frag_mbuf_exit; + /* Concatenate the fragment to the head mbuf. */ + m_cat(m, m_frag); + DBRUN(fp->sge_mbuf_alloc--); + /* Update TPA mbuf size and remaining fragment size. */ m->m_pkthdr.len += frag_len; - frag_size -= frag_len; } bxe_fill_frag_mbuf_exit: - DBEXIT(BXE_EXTREME_RECV); + DBPRINT(sc, BXE_VERBOSE_TPA, + "%s(): frag_size=%d\n", __FUNCTION__, frag_size); + DBEXIT(BXE_EXTREME_RECV | BXE_EXTREME_TPA); return (rc); } @@ -15082,102 +14720,70 @@ static void bxe_tpa_stop(struct bxe_softc *sc, struct bxe_fastpath *fp, uint16_t queue, int pad, int len, union eth_rx_cqe *cqe, uint16_t cqe_idx) { - struct mbuf *m_old, *m_new; - struct ip *ip; + struct mbuf *m; struct ifnet *ifp; - struct ether_vlan_header *eh; - bus_dma_segment_t seg; - int rc, e_hlen; + int rc; - DBENTER(BXE_EXTREME_RECV); - DBPRINT(sc, BXE_VERBOSE_RECV, - "%s(): fp[%d], tpa queue = %d, len = %d, pad = %d\n", __FUNCTION__, - fp->index, queue, len, pad); + DBENTER(BXE_INSANE_RECV | BXE_INSANE_TPA); + DBPRINT(sc, (BXE_EXTREME_RECV | BXE_EXTREME_TPA), + "%s(): fp[%02d].tpa[%02d], len=%d, pad=%d\n", + __FUNCTION__, fp->index, queue, len, pad); rc = 0; ifp = sc->bxe_ifp; - /* Unmap m_old from DMA space. */ - m_old = fp->tpa_mbuf_ptr[queue]; - bus_dmamap_sync(fp->rx_mbuf_tag, fp->tpa_mbuf_map[queue], - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(fp->rx_mbuf_tag, fp->tpa_mbuf_map[queue]); + m = fp->tpa_mbuf_ptr[queue]; - /* Skip over the pad when passing the data up the stack. */ - m_adj(m_old, pad); + /* Allocate a replacement before modifying existing mbuf. */ + rc = bxe_alloc_tpa_mbuf(fp, queue); + if (rc) { + /* Drop the frame and log a soft error. */ + fp->rx_soft_errors++; + goto bxe_tpa_stop_exit; + } - /* Adjust the packet length to match the received data. */ - m_old->m_pkthdr.len = m_old->m_len = len; + /* We have a replacement, fixup the current mbuf. */ + m_adj(m, pad); + m->m_pkthdr.len = m->m_len = len; - /* Validate the checksum if offload enabled. */ - m_old->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID | + /* Mark the checksums valid (taken care of by firmware). */ + m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR; - m_old->m_pkthdr.csum_data = 0xffff; - - /* Map the header and find the Ethernet type & header length. */ - eh = mtod(m_old, struct ether_vlan_header *); - if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) - e_hlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; - else - e_hlen = ETHER_HDR_LEN; - - /* Get the IP header pointer. */ - ip = (struct ip *)(m_old->m_data + e_hlen); + m->m_pkthdr.csum_data = 0xffff; - ip->ip_sum = 0; - ip->ip_sum = in_cksum_hdr(ip); - - /* Try and aggregate all of the receive mbufs into a single mbuf. */ - if (!bxe_fill_frag_mbuf(sc, fp, m_old, &cqe->fast_path_cqe, cqe_idx)) { - /* - * We have an aggregated frame. If the frame has a vlan tag - * attach that information to the mbuf. - */ + /* Aggregate all of the SGEs into a single mbuf. */ + rc = bxe_fill_frag_mbuf(sc, fp, m, &cqe->fast_path_cqe, cqe_idx); + if (rc) { + /* Drop the packet and log an error. */ + fp->rx_soft_errors++; + m_freem(m); + } else { + /* Find VLAN tag and send frame up to the stack. */ if ((le16toh(cqe->fast_path_cqe.pars_flags.flags) & PARSING_FLAGS_VLAN)) { - m_old->m_pkthdr.ether_vtag = + m->m_pkthdr.ether_vtag = cqe->fast_path_cqe.vlan_tag; - m_old->m_flags |= M_VLANTAG; + m->m_flags |= M_VLANTAG; } - /* Send the packet to the appropriate interface. */ - m_old->m_pkthdr.rcvif = ifp; + /* Assign packet to the appropriate interface. */ + m->m_pkthdr.rcvif = ifp; - /* Pass the packet up to the stack. */ - fp->ipackets++; - DBRUN(fp->tpa_pkts++); - (*ifp->if_input)(ifp, m_old); - } else { - DBPRINT(sc, BXE_WARN, - "%s(): Failed to allocate new SGE page, dropping frame!\n", - __FUNCTION__); - fp->soft_rx_errors++; - m_freem(m_old); + /* Update packet statistics. */ + fp->rx_tpa_pkts++; + ifp->if_ipackets++; + + /* ToDo: Any potential locking issues here? */ + /* Pass the frame to the stack. */ + (*ifp->if_input)(ifp, m); } - /* We passed m_old up the stack or dropped the frame. */ + /* We passed mbuf up the stack or dropped the frame. */ DBRUN(fp->tpa_mbuf_alloc--); - /* Allocate a replacement mbuf. */ - if (__predict_false((m_new = bxe_alloc_mbuf(fp, - sc->mbuf_alloc_size)) == NULL)) - goto bxe_tpa_stop_exit; - - /* Map the new mbuf and place it in the pool. */ - rc = bxe_map_mbuf(fp, m_new, fp->rx_mbuf_tag, - fp->tpa_mbuf_map[queue], &seg); - if (rc) - goto bxe_tpa_stop_exit; - - DBRUN(fp->tpa_mbuf_alloc++); - - fp->tpa_mbuf_ptr[queue] = m_new; - fp->tpa_mbuf_segs[queue] = seg; - bxe_tpa_stop_exit: fp->tpa_state[queue] = BXE_TPA_STATE_STOP; DBRUN(fp->tpa_queue_used &= ~(1 << queue)); - - DBEXIT(BXE_EXTREME_RECV); + DBEXIT(BXE_INSANE_RECV | BXE_INSANE_TPA); } /* @@ -15195,7 +14801,7 @@ bxe_update_rx_prod(struct bxe_softc *sc, struct bxe_fastpath *fp, int i; /* Update producers. */ - rx_prods.bd_prod = bd_prod; + rx_prods.bd_prod = bd_prod; rx_prods.cqe_prod = cqe_prod; rx_prods.sge_prod = sge_prod; @@ -15213,7 +14819,7 @@ bxe_update_rx_prod(struct bxe_softc *sc, struct bxe_fastpath *fp, } /* - * Handles received frame interrupt events. + * Processes received frames. * * Returns: * Nothing. @@ -15228,6 +14834,7 @@ bxe_rxeof(struct bxe_fastpath *fp) uint16_t rx_cq_cons, rx_cq_cons_idx; uint16_t rx_cq_prod, rx_cq_cons_sb; unsigned long rx_pkts = 0; + int rc; sc = fp->sc; ifp = sc->bxe_ifp; @@ -15240,8 +14847,8 @@ bxe_rxeof(struct bxe_fastpath *fp) /* * Get working copies of the driver's view of the * RX indices. These are 16 bit values that are - * expected to increment from from 0 to 65535 - * and then wrap-around to 0 again. + * expected to increment from 0 to 65535 and then + * wrap-around to 0 again. */ rx_bd_cons = fp->rx_bd_cons; rx_bd_prod = fp->rx_bd_prod; @@ -15249,7 +14856,7 @@ bxe_rxeof(struct bxe_fastpath *fp) rx_cq_prod = fp->rx_cq_prod; DBPRINT(sc, (BXE_EXTREME_RECV), - "%s(%d): BEFORE: fp[%d], rx_bd_cons = 0x%04X, rx_bd_prod = 0x%04X, " + "%s(%d): BEFORE: fp[%02d], rx_bd_cons = 0x%04X, rx_bd_prod = 0x%04X, " "rx_cq_cons_sw = 0x%04X, rx_cq_prod_sw = 0x%04X\n", __FUNCTION__, curcpu, fp->index, rx_bd_cons, rx_bd_prod, rx_cq_cons, rx_cq_prod); @@ -15271,33 +14878,24 @@ bxe_rxeof(struct bxe_fastpath *fp) /* * Convert the 16 bit indices used by hardware - * into values that map to the arrays used by - * the driver (i.e. an index). + * into array indices used by the driver. */ - rx_cq_cons_idx = RCQ_ENTRY(rx_cq_cons); + rx_cq_cons_idx = RCQ_ENTRY(rx_cq_cons); rx_bd_prod_idx = RX_BD(rx_bd_prod); rx_bd_cons_idx = RX_BD(rx_bd_cons); wmb(); - /* Fetch the cookie. */ + /* Fetch the completion queue entry (i.e. cookie). */ cqe = (union eth_rx_cqe *) - &fp->rx_cq_chain[RCQ_PAGE(rx_cq_cons_idx)][RCQ_IDX(rx_cq_cons_idx)]; + &fp->rcq_chain[rx_cq_cons_idx]; cqe_fp_flags = cqe->fast_path_cqe.type_error_flags; -#ifdef BXE_DEBUG - /* Simulate an error on the received frame. */ - if (DB_RANDOMTRUE(bxe_debug_received_frame_error)) { - DBPRINT(sc, BXE_WARN, - "%s(): Simulated CQE error flags!\n", __FUNCTION__); - cqe_fp_flags |= ETH_RX_ERROR_FLAGS; - sc->debug_received_frame_error++; + /* Sanity check the cookie flags. */ + if (__predict_false(cqe_fp_flags == 0)) { + fp->rx_null_cqe_flags++; + DBRUN(bxe_dump_cqe(fp, rx_cq_cons_idx, cqe)); + /* ToDo: What error handling can be done here? */ } -#endif - - DBRUNIF((cqe_fp_flags == 0), - fp->null_cqe_flags++; - bxe_dump_cqe(fp, rx_cq_cons_idx, cqe)); - /* DRC - ANything else to do here? */ /* Check the CQE type for slowpath or fastpath completion. */ if (__predict_false(CQE_TYPE(cqe_fp_flags) == @@ -15314,7 +14912,8 @@ bxe_rxeof(struct bxe_fastpath *fp) pad = cqe->fast_path_cqe.placement_offset; /* Check if the completion is for TPA. */ - if ((!fp->disable_tpa) && (TPA_TYPE(cqe_fp_flags) != + if ((fp->disable_tpa == FALSE) && + (TPA_TYPE(cqe_fp_flags) != (TPA_TYPE_START | TPA_TYPE_END))) { uint16_t queue = cqe->fast_path_cqe.queue_index; @@ -15325,21 +14924,19 @@ bxe_rxeof(struct bxe_fastpath *fp) * the frames. */ - /* - * Check if a TPA aggregation has been started. - */ + /* Check if TPA aggregation has started. */ if (TPA_TYPE(cqe_fp_flags) == TPA_TYPE_START) { - bxe_tpa_start(fp, queue, - rx_bd_cons_idx, rx_bd_prod_idx); + bxe_tpa_start(fp, queue, rx_bd_cons_idx, + rx_bd_prod_idx); goto bxe_rxeof_next_rx; } - /* Check if a TPA aggregation has completed. */ + /* Check if TPA aggregation has completed. */ if (TPA_TYPE(cqe_fp_flags) == TPA_TYPE_END) { - if (!BXE_RX_SUM_FIX(cqe)) - DBPRINT(sc, BXE_FATAL, - "%s(): STOP on non-TCP data.\n", - __FUNCTION__); + DBRUNIF(!BXE_RX_SUM_FIX(cqe), + DBPRINT(sc, BXE_FATAL, + "%s(): STOP on non-TCP data.\n", + __FUNCTION__)); /* * This is the size of the linear @@ -15359,108 +14956,39 @@ bxe_rxeof(struct bxe_fastpath *fp) } } - /* Remove the mbuf from the RX chain. */ m = fp->rx_mbuf_ptr[rx_bd_cons_idx]; - fp->rx_mbuf_ptr[rx_bd_cons_idx] = NULL; - DBRUN(fp->free_rx_bd++); - DBRUNIF((fp->free_rx_bd > USABLE_RX_BD), - DBPRINT(sc, BXE_FATAL, - "%s(): fp[%d] - Too many free rx_bd's (0x%04X)!\n", - __FUNCTION__, fp->index, fp->free_rx_bd)); - - /* Unmap the mbuf from DMA space. */ - bus_dmamap_sync(fp->rx_mbuf_tag, - fp->rx_mbuf_map[rx_bd_cons_idx], - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(fp->rx_mbuf_tag, - fp->rx_mbuf_map[rx_bd_cons_idx]); + /* Allocate a replacement before modifying existing mbuf. */ + rc = bxe_alloc_rx_bd_mbuf(fp, rx_bd_prod_idx); + if (rc) { + /* Drop the frame and log a soft error. */ + fp->rx_soft_errors++; + goto bxe_rxeof_next_rx; + } /* Check if the received frame has any errors. */ if (__predict_false(cqe_fp_flags & ETH_RX_ERROR_FLAGS)) { DBPRINT(sc, BXE_WARN , - "%s(): Found error flags (0x%08X) " - "set in received frame on fp[%d]!\n", - __FUNCTION__, cqe_fp_flags, fp->index); - - fp->soft_rx_errors++; - - /* Reuse the mbuf for a new frame. */ - if (bxe_get_buf(fp, m, rx_bd_prod_idx)) { - DBPRINT(sc, BXE_FATAL, - "%s(): Can't reuse RX mbuf!\n", - __FUNCTION__); - DBRUN(bxe_breakpoint(sc)); + "%s(): fp[%02d].cqe[0x%04X] has errors " + "(0x%08X)!\n", __FUNCTION__, fp->index, + rx_cq_cons, cqe_fp_flags); - /* ToDo: Find alterntive to panic(). */ - panic("bxe%d: Can't reuse RX mbuf!\n", - sc->bxe_unit); - } - - /* Go handle any additional received frames. */ + fp->rx_soft_errors++; goto bxe_rxeof_next_rx; } - /* - * The high level logic used here is to - * immediatley replace each receive buffer - * as it is used so that the receive chain - * is full at all times. First we try to - * allocate a new receive buffer, but if - * that fails then we will reuse the - * existing mbuf and log an error for the - * lost packet. - */ - - /* Allocate a new mbuf for the receive chain. */ - if (__predict_false(bxe_get_buf(fp, - NULL, rx_bd_prod_idx))) { - /* - * Drop the current frame if we can't get - * a new mbuf. - */ - fp->soft_rx_errors++; - - /* - * Place the current mbuf back in the - * receive chain. - */ - if (__predict_false(bxe_get_buf(fp, m, - rx_bd_prod_idx))) { - /* This is really bad! */ - DBPRINT(sc, BXE_FATAL, - "%s(): Can't reuse RX mbuf!\n", - __FUNCTION__); - DBRUN(bxe_breakpoint(sc)); - - /* ToDo: Find alterntive to panic(). */ - panic( - "bxe%d: Double mbuf allocation failure!\n", - sc->bxe_unit); - } - - /* Go handle any additional received frames. */ - goto bxe_rxeof_next_rx; - } - - /* - * Skip over the pad when passing the data up the stack. - */ + /* We have a replacement, fixup the current mbuf. */ m_adj(m, pad); - - /* - * Adjust the packet length to match the received data. - */ m->m_pkthdr.len = m->m_len = len; - /* Send the packet to the appropriate interface. */ + /* Assign packet to the appropriate interface. */ m->m_pkthdr.rcvif = ifp; - /* Assume no hardware checksum. */ + /* Assume no hardware checksum complated. */ m->m_pkthdr.csum_flags = 0; - /* Validate the checksum if offload enabled. */ + /* Validate checksum if offload enabled. */ if (ifp->if_capenable & IFCAP_RXCSUM) { /* Check whether IP checksummed or not. */ if (sc->rx_csum && @@ -15517,8 +15045,9 @@ bxe_rxeof(struct bxe_fastpath *fp) /* Last chance to check for problems. */ DBRUN(bxe_validate_rx_packet(fp, rx_cq_cons, cqe, m)); - /* Pass the mbuf off to the upper layers. */ + /* Update packet statistics. */ ifp->if_ipackets++; + rx_pkts++; /* ToDo: Any potential locking issues here? */ /* Pass the frame to the stack. */ @@ -15530,7 +15059,6 @@ bxe_rxeof(struct bxe_fastpath *fp) bxe_rxeof_next_rx: rx_bd_prod = NEXT_RX_BD(rx_bd_prod); rx_bd_cons = NEXT_RX_BD(rx_bd_cons); - rx_pkts++; bxe_rxeof_next_cqe: rx_cq_prod = NEXT_RCQ_IDX(rx_cq_prod); @@ -15543,14 +15071,14 @@ bxe_rxeof_next_cqe: rmb(); } - /* Update the driver copy of the fastpath indices. */ + /* Update driver copy of the fastpath indices. */ fp->rx_bd_cons = rx_bd_cons; fp->rx_bd_prod = rx_bd_prod; fp->rx_cq_cons = rx_cq_cons; fp->rx_cq_prod = rx_cq_prod; DBPRINT(sc, (BXE_EXTREME_RECV), - "%s(%d): AFTER: fp[%d], rx_bd_cons = 0x%04X, rx_bd_prod = 0x%04X, " + "%s(%d): AFTER: fp[%02d], rx_bd_cons = 0x%04X, rx_bd_prod = 0x%04X, " "rx_cq_cons_sw = 0x%04X, rx_cq_prod_sw = 0x%04X\n", __FUNCTION__, curcpu, fp->index, rx_bd_cons, rx_bd_prod, rx_cq_cons, rx_cq_prod); @@ -15561,12 +15089,11 @@ bxe_rxeof_next_cqe: BUS_SPACE_BARRIER_READ); fp->rx_pkts += rx_pkts; - fp->rx_calls++; DBEXIT(BXE_EXTREME_RECV); } /* - * Handles transmit completion interrupt events. + * Processes transmit completions. * * Returns: * Nothing. @@ -15577,92 +15104,60 @@ bxe_txeof(struct bxe_fastpath *fp) struct bxe_softc *sc; struct ifnet *ifp; struct eth_tx_start_bd *txbd; - uint16_t hw_pkt_cons, sw_pkt_cons, sw_tx_bd_cons, sw_tx_chain_cons; - uint16_t pkt_cons, nbds; + uint16_t hw_pkt_cons, sw_pkt_cons, sw_tx_bd_cons; + uint16_t bd_index, pkt_index, nbds; int i; sc = fp->sc; ifp = sc->bxe_ifp; DBENTER(BXE_EXTREME_SEND); - DBPRINT(sc, BXE_EXTREME_SEND, "%s(): Servicing fp[%d]\n", - __FUNCTION__, fp->index); /* Get the hardware's view of the TX packet consumer index. */ - hw_pkt_cons = le16toh(*fp->tx_cons_sb); + hw_pkt_cons = le16toh(*fp->tx_pkt_cons_sb); sw_pkt_cons = fp->tx_pkt_cons; sw_tx_bd_cons = fp->tx_bd_cons; /* Cycle through any completed TX chain page entries. */ while (sw_pkt_cons != hw_pkt_cons) { - txbd = NULL; - sw_tx_chain_cons = TX_BD(sw_tx_bd_cons); - pkt_cons = TX_BD(sw_pkt_cons); + bd_index = TX_BD(sw_tx_bd_cons); + pkt_index = TX_BD(sw_pkt_cons); -#ifdef BXE_DEBUG - if (sw_tx_chain_cons > MAX_TX_BD) { - BXE_PRINTF( - "%s(): TX chain consumer out of range! 0x%04X > 0x%04X\n", - __FUNCTION__, sw_tx_chain_cons, (int)MAX_TX_BD); - bxe_breakpoint(sc); - } -#endif - - txbd = -&fp->tx_bd_chain[TX_PAGE(sw_tx_chain_cons)][TX_IDX(sw_tx_chain_cons)].start_bd; - -#ifdef BXE_DEBUG - if (txbd == NULL) { - BXE_PRINTF("%s(): Unexpected NULL tx_bd[0x%04X]!\n", - __FUNCTION__, sw_tx_chain_cons); - bxe_breakpoint(sc); - } -#endif - - /* - * Find the number of BD's that were used in the completed pkt. - */ + txbd = &fp->tx_chain[bd_index].start_bd; nbds = txbd->nbd; - /* - * Free the ext mbuf cluster from the mbuf of the completed - * frame. - */ - if (__predict_true(fp->tx_mbuf_ptr[pkt_cons] != NULL)) { - /* Unmap it from the mbuf. */ + /* Free the completed frame's mbuf. */ + if (__predict_true(fp->tx_mbuf_ptr[pkt_index] != NULL)) { + /* Unmap the mbuf from non-paged memory. */ bus_dmamap_unload(fp->tx_mbuf_tag, - fp->tx_mbuf_map[pkt_cons]); + fp->tx_mbuf_map[pkt_index]); - /* Return the mbuf to the stack. */ - DBRUN(fp->tx_mbuf_alloc--); - m_freem(fp->tx_mbuf_ptr[pkt_cons]); - fp->tx_mbuf_ptr[pkt_cons] = NULL; + /* Return the mbuf to the system. */ + m_freem(fp->tx_mbuf_ptr[pkt_index]); + fp->tx_mbuf_alloc--; + fp->tx_mbuf_ptr[pkt_index] = NULL; fp->opackets++; } else { fp->tx_chain_lost_mbuf++; } - /* Skip over the remaining used buffer descriptors. */ - fp->used_tx_bd -= nbds; + /* Updated packet consumer value. */ + sw_pkt_cons++; + /* Skip over the remaining used buffer descriptors. */ + fp->tx_bd_used -= nbds; for (i = 0; i < nbds; i++) sw_tx_bd_cons = NEXT_TX_BD(sw_tx_bd_cons); - /* Increment the software copy of packet consumer index */ - sw_pkt_cons++; - - /* - * Refresh the hw packet consumer index to see if there's - * new work. - */ - hw_pkt_cons = le16toh(*fp->tx_cons_sb); + /* Check for new work since we started. */ + hw_pkt_cons = le16toh(*fp->tx_pkt_cons_sb); rmb(); } /* Enable new transmits if we've made enough room. */ - if (fp->used_tx_bd < BXE_TX_CLEANUP_THRESHOLD) { + if (fp->tx_bd_used < BXE_TX_CLEANUP_THRESHOLD) { ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; - if (fp->used_tx_bd == 0) { + if (fp->tx_bd_used == 0) { /* * Clear the watchdog timer if we've emptied * the TX chain. @@ -15684,78 +15179,6 @@ bxe_txeof(struct bxe_fastpath *fp) } /* - * Encapsulate an mbuf cluster into the rx_bd. - * - * This routine will map an mbuf cluster into 1 rx_bd - * - * Returns: - * 0 for success, positive value for failure. - */ -static int -bxe_get_buf(struct bxe_fastpath *fp, struct mbuf *m, uint16_t prod) -{ - struct bxe_softc *sc; - bus_dma_segment_t seg; - struct mbuf *m_new; - struct eth_rx_bd *rx_bd; - int rc; - - sc = fp->sc; - m_new = NULL; - rc = 0; - - DBENTER(BXE_INSANE_LOAD | BXE_INSANE_RESET | BXE_INSANE_RECV); - - /* Make sure the inputs are valid. */ - DBRUNIF((prod > MAX_RX_BD), - BXE_PRINTF("%s(): RX producer out of range: 0x%04X > 0x%04X\n", - __FUNCTION__, prod, (uint16_t) MAX_RX_BD)); - - /* Check whether this is a new mbuf allocation. */ - if (m == NULL) { - if ((m_new = bxe_alloc_mbuf(fp, sc->mbuf_alloc_size)) == NULL) { - rc = ENOBUFS; - goto bxe_get_buf_exit; - } - - DBRUN(fp->rx_mbuf_alloc++); - } else { - /* Reuse the existing mbuf. */ - m_new = m; - m_new->m_pkthdr.len = m_new->m_len = sc->mbuf_alloc_size; - } - - /* Do some additional sanity checks on the mbuf. */ - DBRUN(m_sanity(m_new, FALSE)); - - rc = bxe_map_mbuf(fp, m_new, fp->rx_mbuf_tag, - fp->rx_mbuf_map[prod], &seg); - - if (__predict_false(rc)) { - DBRUN(fp->rx_mbuf_alloc--); - rc = ENOBUFS; - goto bxe_get_buf_exit; - } - - /* Setup the rx_bd for the first segment. */ - rx_bd = &fp->rx_bd_chain[RX_PAGE(prod)][RX_IDX(prod)]; - rx_bd->addr_lo = htole32(U64_LO(seg.ds_addr)); - rx_bd->addr_hi = htole32(U64_HI(seg.ds_addr)); - - /* Save the mbuf and update our counter. */ - fp->rx_mbuf_ptr[prod] = m_new; - - DBRUN(fp->free_rx_bd--); - DBRUNIF((fp->free_rx_bd > USABLE_RX_BD), - DBPRINT(sc, BXE_FATAL, "%s(): fp[%d] - Too many free rx_bd's " - "(0x%04X)!\n", __FUNCTION__, fp->index, fp->free_rx_bd)); - -bxe_get_buf_exit: - DBEXIT(BXE_INSANE_LOAD | BXE_INSANE_RESET | BXE_INSANE_RECV); - return (rc); -} - -/* * Transmit timeout handler. * * Returns: @@ -15764,9 +15187,10 @@ bxe_get_buf_exit: static int bxe_watchdog(struct bxe_fastpath *fp) { - struct bxe_softc *sc = fp->sc; + struct bxe_softc *sc; int rc = 0; + sc = fp->sc; DBENTER(BXE_INSANE_SEND); BXE_FP_LOCK(fp); @@ -15795,39 +15219,10 @@ bxe_watchdog(struct bxe_fastpath *fp) bxe_watchdog_exit: DBEXIT(BXE_INSANE_SEND); - return(rc); -} - - -/* - * Change the MTU size for the port. The MTU should be validated before - * calling this routine. - * - * Returns: - * 0 = Success, !0 = Failure. - */ -static int -bxe_change_mtu(struct bxe_softc *sc, int if_drv_running) -{ - struct ifnet *ifp; - int rc; - - BXE_CORE_LOCK_ASSERT(sc); - - rc = 0; - ifp = sc->bxe_ifp; - sc->bxe_ifp->if_mtu = ifp->if_mtu; - if (if_drv_running) { - DBPRINT(sc, BXE_INFO_IOCTL, "%s(): Changing the MTU to %d.\n", - __FUNCTION__, sc->bxe_ifp->if_mtu); - - bxe_stop_locked(sc, UNLOAD_NORMAL); - bxe_init_locked(sc, LOAD_NORMAL); - } - return (rc); } + /* * The periodic timer tick routine. * @@ -15850,21 +15245,22 @@ bxe_tick(void *xsc) sc = xsc; DBENTER(BXE_INSANE_MISC); + /* Check for TX timeouts on any fastpath. */ for (i = 0; i < sc->num_queues; i++) { fp = &sc->fp[i]; + if (bxe_watchdog(fp) != 0) break; } - BXE_CORE_LOCK(sc); func = BP_FUNC(sc); /* Schedule the next tick. */ callout_reset(&sc->bxe_tick_callout, hz, bxe_tick, sc); #if 0 - if (!BP_NOMCP(sc)) { + if (!NOMCP(sc)) { func = BP_FUNC(sc); ++sc->fw_drv_pulse_wr_seq; @@ -15894,8 +15290,6 @@ bxe_tick(void *xsc) if ((sc->state == BXE_STATE_OPEN) || (sc->state == BXE_STATE_DISABLED)) bxe_stats_handle(sc, STATS_EVENT_UPDATE); - - BXE_CORE_UNLOCK(sc); } #ifdef BXE_DEBUG @@ -16155,7 +15549,7 @@ bxe_add_sysctls(struct bxe_softc *sc) device_get_sysctl_ctx(sc->dev); struct sysctl_oid_list *children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); - struct bxe_eth_stats *estats = &sc->eth_stats; + struct bxe_port_stats *estats = &sc->eth_stats; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "estats_total_bytes_received_hi", @@ -16275,95 +15669,110 @@ bxe_add_sysctls(struct bxe_softc *sc) namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); + /* + * Receive related fastpath statistics.* + */ SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_pkts", CTLFLAG_RD, &fp->rx_pkts, "Received packets"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, - "tx_pkts", - CTLFLAG_RD, &fp->tx_pkts, - "Transmitted packets"); - - SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, - "mbuf_alloc_failed", - CTLFLAG_RD, &fp->mbuf_alloc_failed, - "Mbuf allocation failure count"); + "rx_tpa_pkts", + CTLFLAG_RD, &fp->rx_tpa_pkts, + "Received TPA packets"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, - "tpa_mbuf_alloc_failed", - CTLFLAG_RD, &fp->tpa_mbuf_alloc_failed, - "TPA mbuf allocation failure count"); + "rx_null_cqe_flags", + CTLFLAG_RD, &fp->rx_null_cqe_flags, + "CQEs with NULL flags count"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, - "mbuf_defrag_attempts", - CTLFLAG_RD, &fp->mbuf_defrag_attempts, - "Mbuf defrag attempt count"); + "rx_soft_errors", + CTLFLAG_RD, &fp->rx_soft_errors, + "Received frames dropped by driver count"); + /* + * Transmit related fastpath statistics.* + */ SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, - "mbuf_defrag_failures", - CTLFLAG_RD, &fp->mbuf_defrag_failures, - "Mbuf defrag failure count"); + "tx_pkts", + CTLFLAG_RD, &fp->tx_pkts, + "Transmitted packets"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, - "mbuf_defrag_successes", - CTLFLAG_RD, &fp->mbuf_defrag_successes, - "Mbuf defrag success count"); + "tx_soft_errors", + CTLFLAG_RD, &fp->tx_soft_errors, + "Transmit frames dropped by driver count"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, - "offload_frames_csum_ip", - CTLFLAG_RD, &fp->offload_frames_csum_ip, + "tx_offload_frames_csum_ip", + CTLFLAG_RD, &fp->tx_offload_frames_csum_ip, "IP checksum offload frame count"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, - "offload_frames_csum_tcp", - CTLFLAG_RD, &fp->offload_frames_csum_tcp, + "tx_offload_frames_csum_tcp", + CTLFLAG_RD, &fp->tx_offload_frames_csum_tcp, "TCP checksum offload frame count"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, - "offload_frames_csum_udp", - CTLFLAG_RD, &fp->offload_frames_csum_udp, + "tx_offload_frames_csum_udp", + CTLFLAG_RD, &fp->tx_offload_frames_csum_udp, "UDP checksum offload frame count"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, - "offload_frames_tso", - CTLFLAG_RD, &fp->offload_frames_tso, + "tx_offload_frames_tso", + CTLFLAG_RD, &fp->tx_offload_frames_tso, "TSO offload frame count"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, + "tx_header_splits", + CTLFLAG_RD, &fp->tx_header_splits, + "TSO frame header/data split count"); + + SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_encap_failures", CTLFLAG_RD, &fp->tx_encap_failures, "TX encapsulation failure count"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, - "tx_start_called_on_empty_queue", - CTLFLAG_RD, &fp->tx_start_called_on_empty_queue, - "TX start function called on empty " - "TX queue count"); + "tx_hw_queue_full", + CTLFLAG_RD, &fp->tx_hw_queue_full, + "TX H/W queue too full to add a frame count"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, - "tx_queue_too_full", - CTLFLAG_RD, &fp->tx_queue_too_full, - "TX queue too full to add a TX frame count"); + "tx_hw_max_queue_depth", + CTLFLAG_RD, &fp->tx_hw_max_queue_depth, + "TX H/W maximum queue depth count"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, - "window_violation_std", - CTLFLAG_RD, &fp->window_violation_std, + "tx_dma_mapping_failure", + CTLFLAG_RD, &fp->tx_dma_mapping_failure, + "TX DMA mapping failure"); + + SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, + "tx_max_drbr_queue_depth", + CTLFLAG_RD, &fp->tx_max_drbr_queue_depth, + 0, "TX S/W queue maximum depth"); + + SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, + "tx_window_violation_std", + CTLFLAG_RD, &fp->tx_window_violation_std, "Standard frame TX BD window violation count"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, - "window_violation_tso", - CTLFLAG_RD, &fp->window_violation_tso, + "tx_window_violation_tso", + CTLFLAG_RD, &fp->tx_window_violation_tso, "TSO frame TX BD window violation count"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, - "unsupported_tso_request_ipv6", - CTLFLAG_RD, &fp->unsupported_tso_request_ipv6, + "tx_unsupported_tso_request_ipv6", + CTLFLAG_RD, &fp->tx_unsupported_tso_request_ipv6, "TSO frames with unsupported IPv6 protocol count"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, - "unsupported_tso_request_not_tcp", - CTLFLAG_RD, &fp->unsupported_tso_request_not_tcp, + "tx_unsupported_tso_request_not_tcp", + CTLFLAG_RD, &fp->tx_unsupported_tso_request_not_tcp, "TSO frames with unsupported protocol count"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, @@ -16371,17 +15780,58 @@ bxe_add_sysctls(struct bxe_softc *sc) CTLFLAG_RD, &fp->tx_chain_lost_mbuf, "Mbufs lost on TX chain count"); - SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, - "max_drbr_queue_depth", - CTLFLAG_RD, &fp->max_drbr_queue_depth, - 0, "Driver queue maximum dpeth"); + SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, + "tx_frame_deferred", + CTLFLAG_RD, &fp->tx_frame_deferred, + "TX frame deferred from H/W queue to S/W queue count"); -#ifdef BXE_DEBUG SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, - "null_cqe_flags", - CTLFLAG_RD, &fp->null_cqe_flags, - "CQEs with NULL flags count"); -#endif + "tx_queue_xoff", + CTLFLAG_RD, &fp->tx_queue_xoff, + "TX queue full count"); + + /* + * Memory related fastpath statistics.* + */ + SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, + "mbuf_rx_bd_alloc_failed", + CTLFLAG_RD, &fp->mbuf_rx_bd_alloc_failed, + "RX BD mbuf allocation failure count"); + + SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, + "mbuf_rx_bd_mapping_failed", + CTLFLAG_RD, &fp->mbuf_rx_bd_mapping_failed, + "RX BD mbuf mapping failure count"); + + SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, + "mbuf_tpa_alloc_failed", + CTLFLAG_RD, &fp->mbuf_tpa_alloc_failed, + "TPA mbuf allocation failure count"); + + SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, + "mbuf_tpa_mapping_failed", + CTLFLAG_RD, &fp->mbuf_tpa_mapping_failed, + "TPA mbuf mapping failure count"); + + SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, + "mbuf_sge_alloc_failed", + CTLFLAG_RD, &fp->mbuf_sge_alloc_failed, + "SGE mbuf allocation failure count"); + + SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, + "mbuf_sge_mapping_failed", + CTLFLAG_RD, &fp->mbuf_sge_mapping_failed, + "SGE mbuf mapping failure count"); + + SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, + "mbuf_defrag_attempts", + CTLFLAG_RD, &fp->mbuf_defrag_attempts, + "Mbuf defrag attempt count"); + + SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, + "mbuf_defrag_failures", + CTLFLAG_RD, &fp->mbuf_defrag_failures, + "Mbuf defrag failure count"); } } while (0); @@ -16560,13 +16010,13 @@ bxe_dump_debug_reg_wread(struct bxe_softc *sc, uint32_t *index) pwreg_addrs = NULL; /* Read different registers for different controllers. */ - if (CHIP_IS_E1H(sc)) { - wregs_count = wregs_count_e1h; - pwreg_addrs = &wreg_addrs_e1h[0]; - } else { - wregs_count = wregs_count_e1; - pwreg_addrs = &wreg_addrs_e1[0]; - } + if (CHIP_IS_E1H(sc)) { + wregs_count = wregs_count_e1h; + pwreg_addrs = &wreg_addrs_e1h[0]; + } else { + wregs_count = wregs_count_e1; + pwreg_addrs = &wreg_addrs_e1[0]; + } for (reg_addrs_index = 0; reg_addrs_index < wregs_count; reg_addrs_index++) { @@ -16646,22 +16096,23 @@ bxe_grcdump(struct bxe_softc *sc, int log) * Returns: * Nothing. */ -static __attribute__ ((noinline)) +static __noinline void bxe_validate_rx_packet(struct bxe_fastpath *fp, uint16_t comp_cons, union eth_rx_cqe *cqe, struct mbuf *m) { struct bxe_softc *sc; + int error; sc = fp->sc; - /* Check that the mbuf is sane. */ - m_sanity(m, FALSE); - /* Make sure the packet has a valid length. */ - if ((m->m_len < ETHER_HDR_LEN) | - (m->m_len > ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD)) { + /* Check that the mbuf is sane. */ + error = m_sanity(m, FALSE); + if (error != 1 || ((m->m_len < ETHER_HDR_LEN) | + (m->m_len > ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD))) { m_print(m, 128); bxe_dump_enet(sc, m); bxe_dump_cqe(fp, comp_cons, cqe); + /* Make sure the packet has a valid length. */ } } @@ -16673,7 +16124,7 @@ void bxe_validate_rx_packet(struct bxe_fastpath *fp, uint16_t comp_cons, * Returns: * Nothing. */ -static __attribute__ ((noinline)) +static __noinline void bxe_dump_enet(struct bxe_softc *sc, struct mbuf *m) { struct ether_vlan_header *eh; @@ -16803,7 +16254,7 @@ bxe_dump_mbuf_data(struct mbuf *m, int len) * Returns: * Nothing. */ -static __attribute__ ((noinline)) +static __noinline void bxe_dump_mbuf(struct bxe_softc *sc, struct mbuf *m) { if (m == NULL) { @@ -16868,17 +16319,19 @@ void bxe_dump_mbuf(struct bxe_softc *sc, struct mbuf *m) * Returns: * Nothing. */ -static __attribute__ ((noinline)) +static __noinline void bxe_dump_rxbd(struct bxe_fastpath *fp, int idx, struct eth_rx_bd *rx_bd) { - struct bxe_softc *sc = fp->sc; + struct bxe_softc *sc; + + sc = fp->sc; /* Check if index out of range. */ if (idx > MAX_RX_BD) { BXE_PRINTF("fp[%02d].rx_bd[0x%04X] XX: Invalid rx_bd index!\n", fp->index, idx); - } else if ((idx & RX_DESC_MASK) >= USABLE_RX_BD_PER_PAGE) { + } else if ((idx & RX_BD_PER_PAGE_MASK) >= USABLE_RX_BD_PER_PAGE) { /* RX Chain page pointer. */ BXE_PRINTF("fp[%02d].rx_bd[0x%04X] NP: haddr=0x%08X:%08X\n", fp->index, idx, rx_bd->addr_hi, rx_bd->addr_lo); @@ -16894,11 +16347,13 @@ void bxe_dump_rxbd(struct bxe_fastpath *fp, int idx, * Returns: * Nothing. */ -static __attribute__ ((noinline)) +static __noinline void bxe_dump_cqe(struct bxe_fastpath *fp, int idx, union eth_rx_cqe *cqe) { - struct bxe_softc *sc = fp->sc; + struct bxe_softc *sc; + + sc = fp->sc; if (idx > MAX_RCQ_ENTRIES) { /* Index out of range. */ @@ -16931,26 +16386,28 @@ void bxe_dump_cqe(struct bxe_fastpath *fp, int idx, * Returns: * Nothing. */ -static __attribute__ ((noinline)) +static __noinline void bxe_dump_tx_parsing_bd(struct bxe_fastpath *fp, int idx, struct eth_tx_parse_bd *p_bd) { - struct bxe_softc *sc = fp->sc; - - if (idx > MAX_TX_BD){ - /* Index out of range. */ - BXE_PRINTF("fp[%02d].tx_bd[0x%04X] XX: Invalid tx_bd index!\n", - fp->index, idx); - } else { - BXE_PRINTF("fp[%02d]:tx_bd[0x%04X] PB: global_data=0x%b, " - "tcp_flags=0x%b, ip_hlen=%04d, total_hlen=%04d, " - "tcp_pseudo_csum=0x%04X, lso_mss=0x%04X, ip_id=0x%04X, " - "tcp_send_seq=0x%08X\n", fp->index, idx, - p_bd->global_data, BXE_ETH_TX_PARSE_BD_GLOBAL_DATA_PRINTFB, - p_bd->tcp_flags, BXE_ETH_TX_PARSE_BD_TCP_FLAGS_PRINTFB, - p_bd->ip_hlen, p_bd->total_hlen, p_bd->tcp_pseudo_csum, - p_bd->lso_mss, p_bd->ip_id, p_bd->tcp_send_seq); - } + struct bxe_softc *sc; + + sc = fp->sc; + + if (idx > MAX_TX_BD){ + /* Index out of range. */ + BXE_PRINTF("fp[%02d].tx_bd[0x%04X] XX: Invalid tx_bd index!\n", + fp->index, idx); + } else { + BXE_PRINTF("fp[%02d]:tx_bd[0x%04X] PB: global_data=0x%b, " + "tcp_flags=0x%b, ip_hlen=%04d, total_hlen=%04d, " + "tcp_pseudo_csum=0x%04X, lso_mss=0x%04X, ip_id=0x%04X, " + "tcp_send_seq=0x%08X\n", fp->index, idx, + p_bd->global_data, BXE_ETH_TX_PARSE_BD_GLOBAL_DATA_PRINTFB, + p_bd->tcp_flags, BXE_ETH_TX_PARSE_BD_TCP_FLAGS_PRINTFB, + p_bd->ip_hlen, p_bd->total_hlen, p_bd->tcp_pseudo_csum, + p_bd->lso_mss, p_bd->ip_id, p_bd->tcp_send_seq); + } } /* @@ -16959,11 +16416,13 @@ void bxe_dump_tx_parsing_bd(struct bxe_fastpath *fp, int idx, * Returns: * Nothing. */ -static __attribute__ ((noinline)) +static __noinline void bxe_dump_txbd(struct bxe_fastpath *fp, int idx, union eth_tx_bd_types *tx_bd) { - struct bxe_softc *sc = fp->sc; + struct bxe_softc *sc; + + sc = fp->sc; if (idx > MAX_TX_BD){ /* Index out of range. */ @@ -17002,24 +16461,26 @@ void bxe_dump_txbd(struct bxe_fastpath *fp, int idx, * Returns: * Nothing. */ -static __attribute__ ((noinline)) +static __noinline void bxe_dump_tx_chain(struct bxe_fastpath * fp, int tx_bd_prod, int count) { - struct bxe_softc *sc = fp->sc; + struct bxe_softc *sc; union eth_tx_bd_types *tx_bd; uint32_t val_hi, val_lo; int i, parsing_bd = 0; + sc = fp->sc; + /* First some info about the tx_bd chain structure. */ BXE_PRINTF( "----------------------------" " tx_bd chain " "----------------------------\n"); - val_hi = U64_HI(fp->tx_bd_chain_paddr); - val_lo = U64_LO(fp->tx_bd_chain_paddr); + val_hi = U64_HI(fp->tx_dma.paddr); + val_lo = U64_LO(fp->tx_dma.paddr); BXE_PRINTF( - "0x%08X:%08X - (fp[%02d]->tx_bd_chain_paddr) TX Chain physical address\n", + "0x%08X:%08X - (fp[%02d]->tx_dma.paddr) TX Chain physical address\n", val_hi, val_lo, fp->index); BXE_PRINTF( "page size = 0x%08X, tx chain pages = 0x%08X\n", @@ -17037,12 +16498,11 @@ void bxe_dump_tx_chain(struct bxe_fastpath * fp, int tx_bd_prod, int count) /* Now print out the tx_bd's themselves. */ for (i = 0; i < count; i++) { - tx_bd = - &fp->tx_bd_chain[TX_PAGE(tx_bd_prod)][TX_IDX(tx_bd_prod)]; + tx_bd = &fp->tx_chain[tx_bd_prod]; if (parsing_bd) { struct eth_tx_parse_bd *p_bd; p_bd = (struct eth_tx_parse_bd *) - &fp->tx_bd_chain[TX_PAGE(tx_bd_prod)][TX_IDX(tx_bd_prod)].parse_bd; + &fp->tx_chain[tx_bd_prod].parse_bd; bxe_dump_tx_parsing_bd(fp, tx_bd_prod, p_bd); parsing_bd = 0; } else { @@ -17071,23 +16531,23 @@ void bxe_dump_tx_chain(struct bxe_fastpath * fp, int tx_bd_prod, int count) * Returns: * Nothing. */ -static __attribute__ ((noinline)) +static __noinline void bxe_dump_rx_cq_chain(struct bxe_fastpath *fp, int rx_cq_prod, int count) { - struct bxe_softc *sc = fp->sc; + struct bxe_softc *sc; union eth_rx_cqe *cqe; int i; + sc = fp->sc; + /* First some info about the tx_bd chain structure. */ BXE_PRINTF( "----------------------------" " CQE Chain " "----------------------------\n"); - for (i=0; i< NUM_RCQ_PAGES; i++) { - BXE_PRINTF("fp[%02d]->rx_cq_chain_paddr[%d] = 0x%jX\n", - fp->index, i, (uintmax_t) fp->rx_cq_chain_paddr[i]); - } + BXE_PRINTF("fp[%02d]->rcq_dma.paddr = 0x%jX\n", + fp->index, (uintmax_t) fp->rcq_dma.paddr); BXE_PRINTF("page size = 0x%08X, cq chain pages " " = 0x%08X\n", @@ -17107,9 +16567,10 @@ void bxe_dump_rx_cq_chain(struct bxe_fastpath *fp, int rx_cq_prod, int count) "----------------------------\n"); for (i = 0; i < count; i++) { - cqe = (union eth_rx_cqe *)&fp->rx_cq_chain - [RCQ_PAGE(rx_cq_prod)][RCQ_IDX(rx_cq_prod)]; + cqe = (union eth_rx_cqe *)&fp->rcq_chain[rx_cq_prod]; + bxe_dump_cqe(fp, rx_cq_prod, cqe); + /* Don't skip next page pointers. */ rx_cq_prod = ((rx_cq_prod + 1) & MAX_RCQ_ENTRIES); } @@ -17126,8 +16587,8 @@ void bxe_dump_rx_cq_chain(struct bxe_fastpath *fp, int rx_cq_prod, int count) * Returns: * Nothing. */ -static __attribute__ ((noinline)) -void bxe_dump_rx_bd_chain(struct bxe_fastpath *fp, int rx_prod, int count) +static __noinline +void bxe_dump_rx_bd_chain(struct bxe_fastpath *fp, int prod, int count) { struct bxe_softc *sc; struct eth_rx_bd *rx_bd; @@ -17135,6 +16596,7 @@ void bxe_dump_rx_bd_chain(struct bxe_fastpath *fp, int rx_prod, int count) int i; sc = fp->sc; + /* First some info about the tx_bd chain structure. */ BXE_PRINTF( "----------------------------" @@ -17144,8 +16606,8 @@ void bxe_dump_rx_bd_chain(struct bxe_fastpath *fp, int rx_prod, int count) BXE_PRINTF( "----- RX_BD Chain -----\n"); - BXE_PRINTF("fp[%02d]->rx_cq_chain_paddr[0] = 0x%jX\n", - fp->index, (uintmax_t) fp->rx_cq_chain_paddr[0]); + BXE_PRINTF("fp[%02d]->rx_dma.paddr = 0x%jX\n", + fp->index, (uintmax_t) fp->rx_dma.paddr); BXE_PRINTF( "page size = 0x%08X, rx chain pages = 0x%08X\n", @@ -17166,15 +16628,14 @@ void bxe_dump_rx_bd_chain(struct bxe_fastpath *fp, int rx_prod, int count) /* Now print out the rx_bd's themselves. */ for (i = 0; i < count; i++) { - rx_bd = (struct eth_rx_bd *) - (&fp->rx_bd_chain[RX_PAGE(rx_prod)][RX_IDX(rx_prod)]); - m = sc->fp->rx_mbuf_ptr[rx_prod]; + rx_bd = (struct eth_rx_bd *) (&fp->rx_chain[prod]); + m = sc->fp->rx_mbuf_ptr[prod]; - bxe_dump_rxbd(fp, rx_prod, rx_bd); + bxe_dump_rxbd(fp, prod, rx_bd); bxe_dump_mbuf(sc, m); /* Don't skip next page pointers. */ - rx_prod = ((rx_prod + 1) & MAX_RX_BD); + prod = ((prod + 1) & MAX_RX_BD); } BXE_PRINTF( @@ -17189,7 +16650,7 @@ void bxe_dump_rx_bd_chain(struct bxe_fastpath *fp, int rx_prod, int count) * Returns: * Nothing. */ -static __attribute__ ((noinline)) +static __noinline void bxe_dump_hw_state(struct bxe_softc *sc) { int i; @@ -17216,7 +16677,7 @@ void bxe_dump_hw_state(struct bxe_softc *sc) * Returns: * Nothing. */ -static __attribute__ ((noinline)) +static __noinline void bxe_dump_rx_mbuf_chain(struct bxe_softc *sc, int chain_prod, int count) { struct mbuf *m; @@ -17246,7 +16707,7 @@ void bxe_dump_rx_mbuf_chain(struct bxe_softc *sc, int chain_prod, int count) * Returns: * Nothing. */ -static __attribute__ ((noinline)) +static __noinline void bxe_dump_tx_mbuf_chain(struct bxe_softc *sc, int chain_prod, int count) { struct mbuf *m; @@ -17276,15 +16737,15 @@ void bxe_dump_tx_mbuf_chain(struct bxe_softc *sc, int chain_prod, int count) * Returns: * Nothing. */ -static __attribute__ ((noinline)) +static __noinline void bxe_dump_status_block(struct bxe_softc *sc) { struct bxe_fastpath *fp; - struct host_def_status_block *dsb; + struct host_def_status_block *def_sb; struct host_status_block *fpsb; int i; - dsb = sc->def_status_block; + def_sb = sc->def_sb; BXE_PRINTF( "----------------------------" " Status Block " @@ -17359,92 +16820,92 @@ void bxe_dump_status_block(struct bxe_softc *sc) /* Print attention information. */ BXE_PRINTF( " 0x%02X - Status Block ID\n", - dsb->atten_status_block.status_block_id); + def_sb->atten_status_block.status_block_id); BXE_PRINTF( "0x%08X - Attn Bits\n", - dsb->atten_status_block.attn_bits); + def_sb->atten_status_block.attn_bits); BXE_PRINTF( "0x%08X - Attn Bits Ack\n", - dsb->atten_status_block.attn_bits_ack); + def_sb->atten_status_block.attn_bits_ack); BXE_PRINTF( " 0x%04X - Attn Block Index\n", - le16toh(dsb->atten_status_block.attn_bits_index)); + le16toh(def_sb->atten_status_block.attn_bits_index)); /* Print the USTORM fields (HC_USTORM_DEF_SB_NUM_INDICES). */ BXE_PRINTF( " 0x%02X - USTORM Status Block ID\n", - dsb->u_def_status_block.status_block_id); + def_sb->u_def_status_block.status_block_id); BXE_PRINTF( " 0x%04X - USTORM Status Block Index\n", - le16toh(dsb->u_def_status_block.status_block_index)); + le16toh(def_sb->u_def_status_block.status_block_index)); BXE_PRINTF( " 0x%04X - USTORM [ETH_RDMA_RX_CQ_CONS]\n", - le16toh(dsb->u_def_status_block.index_values[HC_INDEX_DEF_U_ETH_RDMA_RX_CQ_CONS])); + le16toh(def_sb->u_def_status_block.index_values[HC_INDEX_DEF_U_ETH_RDMA_RX_CQ_CONS])); BXE_PRINTF( " 0x%04X - USTORM [ETH_ISCSI_RX_CQ_CONS]\n", - le16toh(dsb->u_def_status_block.index_values[HC_INDEX_DEF_U_ETH_ISCSI_RX_CQ_CONS])); + le16toh(def_sb->u_def_status_block.index_values[HC_INDEX_DEF_U_ETH_ISCSI_RX_CQ_CONS])); BXE_PRINTF( " 0x%04X - USTORM [ETH_RDMA_RX_BD_CONS]\n", - le16toh(dsb->u_def_status_block.index_values[HC_INDEX_DEF_U_ETH_RDMA_RX_BD_CONS])); + le16toh(def_sb->u_def_status_block.index_values[HC_INDEX_DEF_U_ETH_RDMA_RX_BD_CONS])); BXE_PRINTF( " 0x%04X - USTORM [ETH_ISCSI_RX_BD_CONS]\n", - le16toh(dsb->u_def_status_block.index_values[HC_INDEX_DEF_U_ETH_ISCSI_RX_BD_CONS])); + le16toh(def_sb->u_def_status_block.index_values[HC_INDEX_DEF_U_ETH_ISCSI_RX_BD_CONS])); /* Print the CSTORM fields (HC_CSTORM_DEF_SB_NUM_INDICES). */ BXE_PRINTF( " 0x%02X - CSTORM Status Block ID\n", - dsb->c_def_status_block.status_block_id); + def_sb->c_def_status_block.status_block_id); BXE_PRINTF( " 0x%04X - CSTORM Status Block Index\n", - le16toh(dsb->c_def_status_block.status_block_index)); + le16toh(def_sb->c_def_status_block.status_block_index)); BXE_PRINTF( " 0x%04X - CSTORM [RDMA_EQ_CONS]\n", - le16toh(dsb->c_def_status_block.index_values[HC_INDEX_DEF_C_RDMA_EQ_CONS])); + le16toh(def_sb->c_def_status_block.index_values[HC_INDEX_DEF_C_RDMA_EQ_CONS])); BXE_PRINTF( " 0x%04X - CSTORM [RDMA_NAL_PROD]\n", - le16toh(dsb->c_def_status_block.index_values[HC_INDEX_DEF_C_RDMA_NAL_PROD])); + le16toh(def_sb->c_def_status_block.index_values[HC_INDEX_DEF_C_RDMA_NAL_PROD])); BXE_PRINTF( " 0x%04X - CSTORM [ETH_FW_TX_CQ_CONS]\n", - le16toh(dsb->c_def_status_block.index_values[HC_INDEX_DEF_C_ETH_FW_TX_CQ_CONS])); + le16toh(def_sb->c_def_status_block.index_values[HC_INDEX_DEF_C_ETH_FW_TX_CQ_CONS])); BXE_PRINTF( " 0x%04X - CSTORM [ETH_SLOW_PATH]\n", - le16toh(dsb->c_def_status_block.index_values[HC_INDEX_DEF_C_ETH_SLOW_PATH])); + le16toh(def_sb->c_def_status_block.index_values[HC_INDEX_DEF_C_ETH_SLOW_PATH])); BXE_PRINTF( " 0x%04X - CSTORM [ETH_RDMA_CQ_CONS]\n", - le16toh(dsb->c_def_status_block.index_values[HC_INDEX_DEF_C_ETH_RDMA_CQ_CONS])); + le16toh(def_sb->c_def_status_block.index_values[HC_INDEX_DEF_C_ETH_RDMA_CQ_CONS])); BXE_PRINTF( " 0x%04X - CSTORM [ETH_ISCSI_CQ_CONS]\n", - le16toh(dsb->c_def_status_block.index_values[HC_INDEX_DEF_C_ETH_ISCSI_CQ_CONS])); + le16toh(def_sb->c_def_status_block.index_values[HC_INDEX_DEF_C_ETH_ISCSI_CQ_CONS])); BXE_PRINTF( " 0x%04X - CSTORM [UNUSED]\n", - le16toh(dsb->c_def_status_block.index_values[6])); + le16toh(def_sb->c_def_status_block.index_values[6])); BXE_PRINTF( " 0x%04X - CSTORM [UNUSED]\n", - le16toh(dsb->c_def_status_block.index_values[7])); + le16toh(def_sb->c_def_status_block.index_values[7])); /* Print the TSTORM fields (HC_TSTORM_DEF_SB_NUM_INDICES). */ BXE_PRINTF( " 0x%02X - TSTORM Status Block ID\n", - dsb->t_def_status_block.status_block_id); + def_sb->t_def_status_block.status_block_id); BXE_PRINTF( " 0x%04X - TSTORM Status Block Index\n", - le16toh(dsb->t_def_status_block.status_block_index)); + le16toh(def_sb->t_def_status_block.status_block_index)); for (i = 0; i < HC_TSTORM_DEF_SB_NUM_INDICES; i++) BXE_PRINTF( " 0x%04X - TSTORM [UNUSED]\n", - le16toh(dsb->t_def_status_block.index_values[i])); + le16toh(def_sb->t_def_status_block.index_values[i])); /* Print the XSTORM fields (HC_XSTORM_DEF_SB_NUM_INDICES). */ BXE_PRINTF( " 0x%02X - XSTORM Status Block ID\n", - dsb->x_def_status_block.status_block_id); + def_sb->x_def_status_block.status_block_id); BXE_PRINTF( " 0x%04X - XSTORM Status Block Index\n", - le16toh(dsb->x_def_status_block.status_block_index)); + le16toh(def_sb->x_def_status_block.status_block_index)); for (i = 0; i < HC_XSTORM_DEF_SB_NUM_INDICES; i++) BXE_PRINTF( " 0x%04X - XSTORM [UNUSED]\n", - le16toh(dsb->x_def_status_block.index_values[i])); + le16toh(def_sb->x_def_status_block.index_values[i])); BXE_PRINTF( "----------------------------" @@ -17459,7 +16920,7 @@ void bxe_dump_status_block(struct bxe_softc *sc) * Returns: * Nothing. */ -static __attribute__ ((noinline)) +static __noinline void bxe_dump_stats_block(struct bxe_softc *sc) { @@ -17471,7 +16932,7 @@ void bxe_dump_stats_block(struct bxe_softc *sc) * Returns: * Nothing. */ -static __attribute__ ((noinline)) +static __noinline void bxe_dump_fp_state(struct bxe_fastpath *fp) { struct bxe_softc *sc; @@ -17501,9 +16962,6 @@ void bxe_dump_fp_state(struct bxe_fastpath *fp) /* Receive state. */ BXE_PRINTF( - " 0x%04X - (fp[%02d]->free_rx_bd)\n", - fp->free_rx_bd, fp->index); - BXE_PRINTF( " 0x%04X - (fp[%02d]->rx_bd_prod)\n", fp->rx_bd_prod, fp->index); BXE_PRINTF( @@ -17525,13 +16983,13 @@ void bxe_dump_fp_state(struct bxe_fastpath *fp) " %16lu - (fp[%02d]->ipackets)\n", fp->ipackets, fp->index); BXE_PRINTF( - " %16lu - (fp[%02d]->soft_rx_errors)\n", - fp->soft_rx_errors, fp->index); + " %16lu - (fp[%02d]->rx_soft_errors)\n", + fp->rx_soft_errors, fp->index); /* Transmit state. */ BXE_PRINTF( - " 0x%04X - (fp[%02d]->used_tx_bd)\n", - fp->used_tx_bd, fp->index); + " 0x%04X - (fp[%02d]->tx_bd_used)\n", + fp->tx_bd_used, fp->index); BXE_PRINTF( " 0x%04X - (fp[%02d]->tx_bd_prod)\n", fp->tx_bd_prod, fp->index); @@ -17554,14 +17012,14 @@ void bxe_dump_fp_state(struct bxe_fastpath *fp) " %16lu - (fp[%02d]->opackets)\n", fp->opackets, fp->index); BXE_PRINTF( - " %16lu - (fp[%02d]->soft_tx_errors)\n", - fp->soft_tx_errors, fp->index); + " %16lu - (fp[%02d]->tx_soft_errors)\n", + fp->tx_soft_errors, fp->index); /* TPA state. */ if (TPA_ENABLED(sc)) { BXE_PRINTF( - " %16lu - (fp[%02d]->tpa_pkts)\n", - fp->tpa_pkts, fp->index); + " %16lu - (fp[%02d]->rx_tpa_pkts)\n", + fp->rx_tpa_pkts, fp->index); BXE_PRINTF( " 0x%08X - (fp[%02d]->tpa_mbuf_alloc)\n", fp->tpa_mbuf_alloc, fp->index); @@ -17592,7 +17050,7 @@ void bxe_dump_fp_state(struct bxe_fastpath *fp) * Returns: * Nothing. */ -static __attribute__ ((noinline)) +static __noinline void bxe_dump_port_state_locked(struct bxe_softc *sc) { @@ -17622,7 +17080,7 @@ void bxe_dump_port_state_locked(struct bxe_softc *sc) * Returns: * Nothing. */ -static __attribute__ ((noinline)) +static __noinline void bxe_dump_link_vars_state_locked(struct bxe_softc *sc) { BXE_PRINTF( @@ -17685,7 +17143,7 @@ void bxe_dump_link_vars_state_locked(struct bxe_softc *sc) * Returns: * Nothing. */ -static __attribute__ ((noinline)) +static __noinline void bxe_dump_link_params_state_locked(struct bxe_softc *sc) { BXE_PRINTF( @@ -17739,7 +17197,7 @@ void bxe_dump_link_params_state_locked(struct bxe_softc *sc) * Returns: * Nothing. */ -static __attribute__ ((noinline)) +static __noinline void bxe_dump_driver_state(struct bxe_softc *sc) { uint32_t val_hi, val_lo; @@ -17773,12 +17231,10 @@ void bxe_dump_driver_state(struct bxe_softc *sc) sc->rx_lane_swap); BXE_PRINTF(" 0x%08X - (sc->tx_lane_swap) TX XAUI lane swap\n", sc->tx_lane_swap); - BXE_PRINTF(" %16lu - (sc->debug_mbuf_sim_alloc_failed)\n", - sc->debug_mbuf_sim_alloc_failed); - BXE_PRINTF(" %16lu - (sc->debug_mbuf_sim_map_failed)\n", - sc->debug_mbuf_sim_map_failed); - BXE_PRINTF(" %16lu - (sc->debug_memory_allocated)\n", - sc->debug_memory_allocated); + BXE_PRINTF(" %16lu - (sc->debug_sim_mbuf_alloc_failed)\n", + sc->debug_sim_mbuf_alloc_failed); + BXE_PRINTF(" %16lu - (sc->debug_sim_mbuf_map_failed)\n", + sc->debug_sim_mbuf_map_failed); BXE_PRINTF( "----------------------------" @@ -17791,44 +17247,39 @@ void bxe_dump_driver_state(struct bxe_softc *sc) } /* - * Dump bootcode debug buffer to the console. + * Dump bootcode (MCP) debug buffer to the console. * * Returns: * None */ -static __attribute__ ((noinline)) +static __noinline void bxe_dump_fw(struct bxe_softc *sc) { - uint32_t data[9], mark, offset; - int word; + uint32_t addr, mark, data[9], offset; + int word; - mark = REG_RD(sc, MCP_REG_MCPR_SCRATCH + 0xf104); - mark = ((mark + 0x3) & ~0x3); + addr = sc->common.shmem_base - 0x0800 + 4; + mark = REG_RD(sc, addr); + mark = MCP_REG_MCPR_SCRATCH + ((mark + 0x3) & ~0x3) - 0x08000000; BXE_PRINTF( - "----------------------------" - " Bootcode State " - "----------------------------\n"); - BXE_PRINTF("Begin MCP bootcode dump (mark = 0x%08X)\n", mark); - BXE_PRINTF( - "----------------------------" - "----------------" - "----------------------------\n"); + "---------------------------" + " MCP Debug Buffer " + "---------------------------\n"); - for (offset = mark - 0x08000000; offset <= 0xF900; + /* Read from "mark" to the end of the buffer. */ + for (offset = mark; offset <= sc->common.shmem_base; offset += (0x8 * 4)) { for (word = 0; word < 8; word++) - data[word] = htonl(REG_RD(sc, MCP_REG_MCPR_SCRATCH + - offset + 4 * word)); + data[word] = htonl(REG_RD(sc, offset + 4 * word)); data[8] = 0x0; printf("%s", (char *) data); } - for (offset = 0xF108; offset <= mark - 0x08000000; - offset += (0x8 * 4)) { + /* Read from the start of the buffer to "mark". */ + for (offset = addr + 4; offset <= mark; offset += (0x8 * 4)) { for (word = 0; word < 8; word++) - data[word] = htonl(REG_RD(sc, MCP_REG_MCPR_SCRATCH + - offset + 4 * word)); + data[word] = htonl(REG_RD(sc, offset + 4 * word)); data[8] = 0x0; printf("%s", (char *) data); } @@ -18129,26 +17580,9 @@ bxe_breakpoint(struct bxe_softc *sc) bxe_dump_fp_state(&sc->fp[i]); bxe_dump_status_block(sc); + bxe_dump_fw(sc); /* Call the OS debugger. */ breakpoint(); } #endif - -/* - * - * Returns: - * Nothing. - */ -static void -bxe_gunzip_end(struct bxe_softc *sc) -{ - free(sc->strm, M_DEVBUF); - sc->strm = NULL; - - if (sc->gunzip_buf) { - bxe_dmamem_free(sc, sc->gunzip_tag, sc->gunzip_buf, - sc->gunzip_map); - sc->gunzip_buf = NULL; - } -} diff --git a/sys/dev/bxe/if_bxe.h b/sys/dev/bxe/if_bxe.h index 8da3db4..a5af0bd 100644 --- a/sys/dev/bxe/if_bxe.h +++ b/sys/dev/bxe/if_bxe.h @@ -251,20 +251,22 @@ struct bxe_type { #define SGE_PAGE_SHIFT PAGE_SHIFT #define SGE_PAGE_ALIGN(addr) PAGE_ALIGN(addr) -/* SGE ring related macros */ +/* NUM_RX_SGE_PAGES must be a power of 2. */ #define NUM_RX_SGE_PAGES 2 -#define RX_SGE_CNT (BCM_PAGE_SIZE / sizeof(struct eth_rx_sge)) -#define MAX_RX_SGE_CNT (RX_SGE_CNT - 2) +#define TOTAL_RX_SGE_PER_PAGE (BCM_PAGE_SIZE / sizeof(struct eth_rx_sge)) /* 512 */ +#define USABLE_RX_SGE_PER_PAGE (TOTAL_RX_SGE_PER_PAGE - 2) /* 510 */ +#define RX_SGE_PER_PAGE_MASK (TOTAL_RX_SGE_PER_PAGE - 1) /* 511 */ +#define TOTAL_RX_SGE (TOTAL_RX_SGE_PER_PAGE * NUM_RX_SGE_PAGES) /* 1024 */ +#define USABLE_RX_SGE (USABLE_RX_SGE_PER_PAGE * NUM_RX_SGE_PAGES) /* 1020 */ +#define MAX_RX_SGE (TOTAL_RX_SGE - 1) /* 1023 */ + -/* RX_SGE_CNT is required to be a power of 2 */ -#define RX_SGE_MASK (RX_SGE_CNT - 1) -#define TOTAL_RX_SGE (RX_SGE_CNT * NUM_RX_SGE_PAGES) -#define MAX_RX_SGE (TOTAL_RX_SGE - 1) #define NEXT_SGE_IDX(x) \ - ((((x) & RX_SGE_MASK) == (MAX_RX_SGE_CNT - 1)) ? (x) + 3 : (x) + 1) + ((((x) & RX_SGE_PER_PAGE_MASK) == (USABLE_RX_SGE_PER_PAGE - 1)) \ + ? (x) + 3 : (x) + 1) #define RX_SGE(x) ((x) & MAX_RX_SGE) -#define RX_SGE_PAGE(x) (((x) & ~RX_SGE_MASK) >> 9) -#define RX_SGE_IDX(x) ((x) & RX_SGE_MASK) +#define RX_SGE_PAGE(x) (((x) & ~RX_SGE_PER_PAGE_MASK) >> 9) +#define RX_SGE_IDX(x) ((x) & RX_SGE_PER_PAGE_MASK) /* SGE producer mask related macros. */ /* Number of bits in one sge_mask array element. */ @@ -282,23 +284,23 @@ struct bxe_type { /* Number of uint64_t elements in SGE mask array. */ #define RX_SGE_MASK_LEN \ - ((NUM_RX_SGE_PAGES * RX_SGE_CNT) / RX_SGE_MASK_ELEM_SZ) + ((NUM_RX_SGE_PAGES * TOTAL_RX_SGE_PER_PAGE) / RX_SGE_MASK_ELEM_SZ) #define RX_SGE_MASK_LEN_MASK (RX_SGE_MASK_LEN - 1) #define NEXT_SGE_MASK_ELEM(el) (((el) + 1) & RX_SGE_MASK_LEN_MASK) + /* * Transmit Buffer Descriptor (tx_bd) definitions* */ -/* ToDo: Tune this value based on multi-queue/RSS enable/disable. */ -#define NUM_TX_PAGES 2 +/* NUM_TX_PAGES must be a power of 2. */ +#define NUM_TX_PAGES 1 +#define TOTAL_TX_BD_PER_PAGE (BCM_PAGE_SIZE / sizeof(union eth_tx_bd_types)) /* 256 */ +#define USABLE_TX_BD_PER_PAGE (TOTAL_TX_BD_PER_PAGE - 1) /* 255 */ +#define TOTAL_TX_BD (TOTAL_TX_BD_PER_PAGE * NUM_TX_PAGES) /* 512 */ +#define USABLE_TX_BD (USABLE_TX_BD_PER_PAGE * NUM_TX_PAGES) /* 510 */ +#define MAX_TX_BD (TOTAL_TX_BD - 1) /* 511 */ -#define TOTAL_TX_BD_PER_PAGE (BCM_PAGE_SIZE / sizeof(union eth_tx_bd_types)) -#define USABLE_TX_BD_PER_PAGE (TOTAL_TX_BD_PER_PAGE - 1) -#define TOTAL_TX_BD (TOTAL_TX_BD_PER_PAGE * NUM_TX_PAGES) -#define USABLE_TX_BD (USABLE_TX_BD_PER_PAGE * NUM_TX_PAGES) -#define MAX_TX_AVAIL (USABLE_TX_BD_PER_PAGE * NUM_TX_PAGES - 2) -#define MAX_TX_BD (TOTAL_TX_BD - 1) #define NEXT_TX_BD(x) \ ((((x) & USABLE_TX_BD_PER_PAGE) == \ (USABLE_TX_BD_PER_PAGE - 1)) ? (x) + 2 : (x) + 1) @@ -309,55 +311,33 @@ struct bxe_type { /* * Receive Buffer Descriptor (rx_bd) definitions* */ -#define NUM_RX_PAGES 2 - -/* 512 (0x200) of 8 byte bds in 4096 byte page. */ -#define TOTAL_RX_BD_PER_PAGE (BCM_PAGE_SIZE / sizeof(struct eth_rx_bd)) - -/* 510 (0x1fe) = 512 - 2 */ -#define USABLE_RX_BD_PER_PAGE (TOTAL_RX_BD_PER_PAGE - 2) - -/* 1024 (0x400) */ -#define TOTAL_RX_BD (TOTAL_RX_BD_PER_PAGE * NUM_RX_PAGES) -/* 1020 (0x3fc) = 1024 - 4 */ -#define USABLE_RX_BD (USABLE_RX_BD_PER_PAGE * NUM_RX_PAGES) - -/* 1023 (0x3ff) = 1024 -1 */ -#define MAX_RX_BD (TOTAL_RX_BD - 1) - -/* 511 (0x1ff) = 512 - 1 */ -#define RX_DESC_MASK (TOTAL_RX_BD_PER_PAGE - 1) +/* NUM_RX_PAGES must be a power of 2. */ +#define NUM_RX_PAGES 1 +#define TOTAL_RX_BD_PER_PAGE (BCM_PAGE_SIZE / sizeof(struct eth_rx_bd)) /* 512 */ +#define USABLE_RX_BD_PER_PAGE (TOTAL_RX_BD_PER_PAGE - 2) /* 510 */ +#define RX_BD_PER_PAGE_MASK (TOTAL_RX_BD_PER_PAGE - 1) /* 511 */ +#define TOTAL_RX_BD (TOTAL_RX_BD_PER_PAGE * NUM_RX_PAGES) /* 1024 */ +#define USABLE_RX_BD (USABLE_RX_BD_PER_PAGE * NUM_RX_PAGES) /* 1020 */ +#define MAX_RX_BD (TOTAL_RX_BD - 1) /* 1023 */ #define NEXT_RX_BD(x) \ - ((((x) & RX_DESC_MASK) == \ + ((((x) & RX_BD_PER_PAGE_MASK) == \ (USABLE_RX_BD_PER_PAGE - 1)) ? (x) + 3 : (x) + 1) /* x & 0x3ff */ #define RX_BD(x) ((x) & MAX_RX_BD) -#define RX_PAGE(x) (((x) & ~RX_DESC_MASK) >> 9) -#define RX_IDX(x) ((x) & RX_DESC_MASK) +#define RX_PAGE(x) (((x) & ~RX_BD_PER_PAGE_MASK) >> 9) +#define RX_IDX(x) ((x) & RX_BD_PER_PAGE_MASK) /* * Receive Completion Queue definitions* */ - -/* CQEs (32 bytes) are 4 times larger than rx_bd's (8 bytes). */ #define NUM_RCQ_PAGES (NUM_RX_PAGES * 4) - -/* 128 (0x80) */ -#define TOTAL_RCQ_ENTRIES_PER_PAGE (BCM_PAGE_SIZE / sizeof(union eth_rx_cqe)) - -/* 127 (0x7f)for the next page RCQ bd */ -#define USABLE_RCQ_ENTRIES_PER_PAGE (TOTAL_RCQ_ENTRIES_PER_PAGE - 1) - -/* 1024 (0x400) */ -#define TOTAL_RCQ_ENTRIES (TOTAL_RCQ_ENTRIES_PER_PAGE * NUM_RCQ_PAGES) - -/* 1016 (0x3f8) */ -#define USABLE_RCQ_ENTRIES (USABLE_RCQ_ENTRIES_PER_PAGE * NUM_RCQ_PAGES) - -/* 1023 (0x3ff) */ -#define MAX_RCQ_ENTRIES (TOTAL_RCQ_ENTRIES - 1) +#define TOTAL_RCQ_ENTRIES_PER_PAGE (BCM_PAGE_SIZE / sizeof(union eth_rx_cqe)) /* 128 */ +#define USABLE_RCQ_ENTRIES_PER_PAGE (TOTAL_RCQ_ENTRIES_PER_PAGE - 1) /* 127 */ +#define TOTAL_RCQ_ENTRIES (TOTAL_RCQ_ENTRIES_PER_PAGE * NUM_RCQ_PAGES) /* 1024 */ +#define USABLE_RCQ_ENTRIES (USABLE_RCQ_ENTRIES_PER_PAGE * NUM_RCQ_PAGES) /* 1016 */ +#define MAX_RCQ_ENTRIES (TOTAL_RCQ_ENTRIES - 1) /* 1023 */ #define NEXT_RCQ_IDX(x) \ ((((x) & USABLE_RCQ_ENTRIES_PER_PAGE) == \ @@ -383,11 +363,11 @@ struct bxe_type { } while (0) #define SGE_MASK_SET_BIT(fp, idx) \ - __SGE_MASK_SET_BIT(fp->sge_mask[(idx) >> RX_SGE_MASK_ELEM_SHIFT], \ + __SGE_MASK_SET_BIT(fp->rx_sge_mask[(idx) >> RX_SGE_MASK_ELEM_SHIFT], \ ((idx) & RX_SGE_MASK_ELEM_MASK)) #define SGE_MASK_CLEAR_BIT(fp, idx) \ - __SGE_MASK_CLEAR_BIT(fp->sge_mask[(idx) >> RX_SGE_MASK_ELEM_SHIFT], \ + __SGE_MASK_CLEAR_BIT(fp->rx_sge_mask[(idx) >> RX_SGE_MASK_ELEM_SHIFT], \ ((idx) & RX_SGE_MASK_ELEM_MASK)) #define BXE_TX_TIMEOUT 5 @@ -418,14 +398,14 @@ struct bxe_type { * IFCAP_TSO6, IFCAP_WOL_UCAST. */ #if __FreeBSD_version < 700000 -#define BXE_IF_CAPABILITIES \ +#define BXE_IF_CAPABILITIES \ (IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_HWCSUM | \ IFCAP_JUMBO_MTU) #else - /* TSO was introduced in FreeBSD 7 */ -#define BXE_IF_CAPABILITIES \ + /* TSO/LRO was introduced in FreeBSD 7 */ +#define BXE_IF_CAPABILITIES \ (IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_HWCSUM | \ - IFCAP_JUMBO_MTU | IFCAP_TSO4 | IFCAP_VLAN_HWCSUM) + IFCAP_JUMBO_MTU | IFCAP_TSO4 | IFCAP_VLAN_HWCSUM | IFCAP_LRO) #endif /* Some typical Ethernet frame sizes */ @@ -449,6 +429,10 @@ struct bxe_type { /* Resolution of the rate shaping timer - 100 usec */ #define RS_PERIODIC_TIMEOUT_USEC 100 +#define BXE_MBUF_ALLOC_RETRY_COUNT 10 +#define BXE_MBUF_MAPPING_RETRY_COUNT 10 +#define BXE_MBUF_RETRY_DELAY 100 + /* * Resolution of fairness algorithm, in usecs. * Coefficient for calculating the actual t_fair. @@ -546,7 +530,8 @@ enum bxe_stats_state { STATS_STATE_MAX }; -struct bxe_eth_stats { +/* Statistics for an Ethernet port. */ +struct bxe_port_stats { uint32_t total_bytes_received_hi; uint32_t total_bytes_received_lo; uint32_t total_bytes_transmitted_hi; @@ -567,6 +552,12 @@ struct bxe_eth_stats { uint32_t valid_bytes_received_lo; uint32_t error_bytes_received_hi; uint32_t error_bytes_received_lo; + uint32_t etherstatsoverrsizepkts_hi; + uint32_t etherstatsoverrsizepkts_lo; + uint32_t no_buff_discard_hi; + uint32_t no_buff_discard_lo; + + /* Layout must match struct mac_stx. */ uint32_t rx_stat_ifhcinbadoctets_hi; uint32_t rx_stat_ifhcinbadoctets_lo; uint32_t tx_stat_ifhcoutbadoctets_hi; @@ -643,39 +634,33 @@ struct bxe_eth_stats { uint32_t tx_stat_dot3statsinternalmactransmiterrors_lo; uint32_t tx_stat_bmac_ufl_hi; uint32_t tx_stat_bmac_ufl_lo; - uint32_t brb_drop_hi; - uint32_t brb_drop_lo; - uint32_t brb_truncate_hi; - uint32_t brb_truncate_lo; + /* End of mac_stx. */ + uint32_t pause_frames_received_hi; uint32_t pause_frames_received_lo; uint32_t pause_frames_sent_hi; uint32_t pause_frames_sent_lo; - uint32_t jabber_packets_received; - uint32_t etherstatspkts1024octetsto1522octets_hi; uint32_t etherstatspkts1024octetsto1522octets_lo; uint32_t etherstatspktsover1522octets_hi; uint32_t etherstatspktsover1522octets_lo; - - uint32_t no_buff_discard_hi; - uint32_t no_buff_discard_lo; - + uint32_t brb_drop_hi; + uint32_t brb_drop_lo; + uint32_t brb_truncate_hi; + uint32_t brb_truncate_lo; uint32_t mac_filter_discard; uint32_t xxoverflow_discard; uint32_t brb_truncate_discard; uint32_t mac_discard; - uint32_t driver_xoff; uint32_t rx_err_discard_pkt; uint32_t rx_skb_alloc_failed; uint32_t hw_csum_err; - uint32_t nig_timer_max; }; #define STATS_OFFSET32(stat_name) \ - (offsetof(struct bxe_eth_stats, stat_name) / 4) + (offsetof(struct bxe_port_stats, stat_name) / 4) #define MAX_CONTEXT 16 @@ -841,6 +826,18 @@ struct bxe_port { #define PMF_DMAE_C(sc) \ (BP_PORT(sc) * MAX_DMAE_C_PER_PORT + E1HVN_MAX) + +/* Used to manage DMA allocations. */ +struct bxe_dma { + bus_addr_t paddr; + void *vaddr; + bus_dma_tag_t tag; + bus_dmamap_t map; + bus_dma_segment_t seg; + bus_size_t size; + int nseg; +}; + /* * This is the slowpath data structure. It is mapped into non-paged memory * so that the hardware can access it's contents directly and must be page @@ -884,7 +881,7 @@ struct bxe_slowpath { #define BXE_SP(sc, var) (&sc->slowpath->var) #define BXE_SP_CHECK(sc, var) ((sc->slowpath) ? (&sc->slowpath->var) : NULL) #define BXE_SP_MAPPING(sc, var) \ - (sc->slowpath_paddr + offsetof(struct bxe_slowpath, var)) + (sc->slowpath_dma.paddr + offsetof(struct bxe_slowpath, var)) union db_prod { struct doorbell_set_prod data; @@ -933,51 +930,33 @@ struct bxe_fastpath { struct mtx mtx; char mtx_name[16]; - /* Hardware maintained status block. */ - bus_dma_tag_t status_block_tag; - bus_dmamap_t status_block_map; + /* Status block. */ + struct bxe_dma sb_dma; struct host_status_block *status_block; - bus_addr_t status_block_paddr; -#ifdef notyet - /* - * In this implementation the doorbell data block - * (eth_tx_db_data) is mapped into memory immediately - * following the status block and is part of the same - * memory allocation. - */ - struct eth_tx_db_data *hw_tx_prods; - bus_addr_t tx_prods_paddr; -#endif - /* Hardware maintained TX buffer descriptor chains. */ - bus_dma_tag_t tx_bd_chain_tag; - bus_dmamap_t tx_bd_chain_map[NUM_TX_PAGES]; + /* Transmit chain. */ + struct bxe_dma tx_dma; + union eth_tx_bd_types *tx_chain; + + /* Receive chain. */ + struct bxe_dma rx_dma; + struct eth_rx_bd *rx_chain; - union eth_tx_bd_types *tx_bd_chain[NUM_TX_PAGES]; - bus_addr_t tx_bd_chain_paddr[NUM_TX_PAGES]; + /* Receive completion queue chain. */ + struct bxe_dma rcq_dma; + union eth_rx_cqe *rcq_chain; - /* Bus resource tag for TX mbufs. */ + /* Bus resource tag, map, and mbufs for TX chain. */ bus_dma_tag_t tx_mbuf_tag; bus_dmamap_t tx_mbuf_map[TOTAL_TX_BD]; struct mbuf *tx_mbuf_ptr[TOTAL_TX_BD]; - /* Hardware maintained RX buffer descriptor chains. */ - bus_dma_tag_t rx_bd_chain_tag; - bus_dmamap_t rx_bd_chain_map[NUM_RX_PAGES]; - struct eth_rx_bd *rx_bd_chain[NUM_RX_PAGES]; - bus_addr_t rx_bd_chain_paddr[NUM_RX_PAGES]; - - /* Bus resource tag for RX mbufs. */ + /* Bus resource tag, map, and mbufs for RX chain. */ bus_dma_tag_t rx_mbuf_tag; bus_dmamap_t rx_mbuf_map[TOTAL_RX_BD]; + bus_dmamap_t rx_mbuf_spare_map; struct mbuf *rx_mbuf_ptr[TOTAL_RX_BD]; - /* Hardware maintained Completion Queue (CQ) chains. */ - bus_dma_tag_t rx_cq_chain_tag; - bus_dmamap_t rx_cq_chain_map[NUM_RCQ_PAGES]; - union eth_rx_cqe *rx_cq_chain[NUM_RCQ_PAGES]; - bus_addr_t rx_cq_chain_paddr[NUM_RCQ_PAGES]; - /* Ticks until chip reset. */ int watchdog_timer; @@ -1014,8 +993,6 @@ struct bxe_fastpath { /* Transmit packet producer index (used in eth_tx_bd). */ uint16_t tx_pkt_prod; - - /* Transmit packet consumer index. */ uint16_t tx_pkt_cons; /* Transmit buffer descriptor prod/cons indices. */ @@ -1044,25 +1021,27 @@ struct bxe_fastpath { uint16_t *rx_bd_cons_sb; /* Pointer to the transmit consumer in the status block. */ - uint16_t *tx_cons_sb; + uint16_t *tx_pkt_cons_sb; - /* Free/used buffer descriptor counters. */ - uint16_t used_tx_bd; + /* Used TX buffer descriptor counters. */ + uint16_t tx_bd_used; /* Begin: TPA Related data structure. */ - /* Hardware maintained RX Scatter Gather Entry chains. */ - bus_dma_tag_t rx_sge_chain_tag; - bus_dmamap_t rx_sge_chain_map[NUM_RX_SGE_PAGES]; - struct eth_rx_sge *rx_sge_chain[NUM_RX_SGE_PAGES]; - bus_addr_t rx_sge_chain_paddr[NUM_RX_SGE_PAGES]; + struct bxe_dma sg_dma; + struct eth_rx_sge *sg_chain; /* Bus tag for RX SGE bufs. */ bus_dma_tag_t rx_sge_buf_tag; bus_dmamap_t rx_sge_buf_map[TOTAL_RX_SGE]; + bus_dmamap_t rx_sge_spare_map; struct mbuf *rx_sge_buf_ptr[TOTAL_RX_SGE]; - uint64_t sge_mask[RX_SGE_MASK_LEN]; + /* + * Bitmask for each SGE element indicating which + * aggregation that element is a part of. + */ + uint64_t rx_sge_mask[RX_SGE_MASK_LEN]; uint16_t rx_sge_prod; /* The last maximal completed SGE. */ @@ -1072,6 +1051,7 @@ struct bxe_fastpath { /* Use the larger supported size for TPA queue length. */ bus_dmamap_t tpa_mbuf_map[ETH_MAX_AGGREGATION_QUEUES_E1H]; + bus_dmamap_t tpa_mbuf_spare_map; struct mbuf *tpa_mbuf_ptr[ETH_MAX_AGGREGATION_QUEUES_E1H]; bus_dma_segment_t tpa_mbuf_segs[ETH_MAX_AGGREGATION_QUEUES_E1H]; @@ -1088,21 +1068,46 @@ struct bxe_fastpath { struct xstorm_per_client_stats old_xclient; struct bxe_q_stats eth_q_stats; - uint16_t free_rx_bd; - #if __FreeBSD_version >= 800000 struct buf_ring *br; #endif - /* Recieve/transmit packet counters. */ + /* Receive path driver statistics. */ unsigned long rx_pkts; + unsigned long rx_tpa_pkts; + unsigned long rx_null_cqe_flags; + unsigned long rx_soft_errors; + + /* Transmit path driver statistics. */ unsigned long tx_pkts; - unsigned long tpa_pkts; - unsigned long rx_calls; - unsigned long mbuf_alloc_failed; + unsigned long tx_soft_errors; + unsigned long tx_offload_frames_csum_ip; + unsigned long tx_offload_frames_csum_tcp; + unsigned long tx_offload_frames_csum_udp; + unsigned long tx_offload_frames_tso; + unsigned long tx_header_splits; + unsigned long tx_encap_failures; + unsigned long tx_hw_queue_full; + unsigned long tx_hw_max_queue_depth; + unsigned long tx_dma_mapping_failure; + int tx_max_drbr_queue_depth; + unsigned long tx_window_violation_std; + unsigned long tx_window_violation_tso; + unsigned long tx_unsupported_tso_request_ipv6; + unsigned long tx_unsupported_tso_request_not_tcp; + unsigned long tx_chain_lost_mbuf; + unsigned long tx_frame_deferred; + unsigned long tx_queue_xoff; + + /* Memory path driver statistics. */ unsigned long mbuf_defrag_attempts; unsigned long mbuf_defrag_failures; - unsigned long mbuf_defrag_successes; + unsigned long mbuf_rx_bd_alloc_failed; + unsigned long mbuf_rx_bd_mapping_failed; + unsigned long mbuf_tpa_alloc_failed; + unsigned long mbuf_tpa_mapping_failed; + unsigned long mbuf_sge_alloc_failed; + unsigned long mbuf_sge_mapping_failed; /* Track the number of enqueued mbufs. */ int tx_mbuf_alloc; @@ -1110,29 +1115,9 @@ struct bxe_fastpath { int sge_mbuf_alloc; int tpa_mbuf_alloc; - int max_drbr_queue_depth; - uint64_t tpa_queue_used; - unsigned long null_cqe_flags; - unsigned long offload_frames_csum_ip; - unsigned long offload_frames_csum_tcp; - unsigned long offload_frames_csum_udp; - unsigned long offload_frames_tso; - unsigned long tx_encap_failures; - unsigned long tx_start_called_on_empty_queue; - unsigned long tx_queue_too_full; - unsigned long tx_dma_mapping_failure; - unsigned long window_violation_tso; - unsigned long window_violation_std; - unsigned long unsupported_tso_request_ipv6; - unsigned long unsupported_tso_request_not_tcp; - unsigned long tpa_mbuf_alloc_failed; - unsigned long tx_chain_lost_mbuf; - /* FreeBSD interface statistics. */ - unsigned long soft_rx_errors; - unsigned long soft_tx_errors; unsigned long ipackets; unsigned long opackets; @@ -1144,7 +1129,7 @@ struct bxe_fastpath { #define BXE_STATUS_BLK_SZ \ sizeof(struct host_status_block) /* +sizeof(struct eth_tx_db_data) */ #define BXE_DEF_STATUS_BLK_SZ sizeof(struct host_def_status_block) -#define BXE_STATS_BLK_SZ sizeof(struct bxe_eth_stats) +#define BXE_STATS_BLK_SZ sizeof(struct bxe_port_stats) #define BXE_SLOWPATH_SZ sizeof(struct bxe_slowpath) #define BXE_SPQ_SZ BCM_PAGE_SIZE #define BXE_TX_CHAIN_PAGE_SZ BCM_PAGE_SIZE @@ -1165,14 +1150,13 @@ struct bxe_softc { /* Bus tag for the bxe controller. */ bus_dma_tag_t parent_tag; + /* OS resources for BAR0 memory. */ struct resource *bxe_res; bus_space_tag_t bxe_btag; bus_space_handle_t bxe_bhandle; vm_offset_t bxe_vhandle; - /* OS resources for BAR2 memory. */ - /* OS resources for BAR1 doorbell memory. */ #define BXE_DB_SIZE (16 * 2048) struct resource *bxe_db_res; @@ -1216,7 +1200,6 @@ struct bxe_softc { struct taskqueue *tq; /* RX Driver parameters*/ uint32_t rx_csum; - int rx_buf_size; /* ToDo: Replace with OS specific defintions. */ #define ETH_HLEN 14 @@ -1225,11 +1208,8 @@ struct bxe_softc { #define ETH_MAX_PACKET_SIZE 1500 #define ETH_MAX_JUMBO_PACKET_SIZE 9600 - /* Hardware Maintained Host Default Status Block. */ - bus_dma_tag_t def_status_block_tag; - bus_dmamap_t def_status_block_map; - struct host_def_status_block *def_status_block; - bus_addr_t def_status_block_paddr; + struct bxe_dma def_sb_dma; + struct host_def_status_block *def_sb; #define DEF_SB_ID 16 uint16_t def_c_idx; @@ -1241,23 +1221,15 @@ struct bxe_softc { uint32_t attn_state; struct attn_route attn_group[MAX_DYNAMIC_ATTN_GRPS]; - /* H/W maintained statistics block. */ - bus_dma_tag_t stats_tag; - bus_dmamap_t stats_map; - struct statistics_block *stats_block; - bus_addr_t stats_block_paddr; + struct bxe_dma stats_dma; + struct statistics_block *stats; - /* H/W maintained slow path. */ - bus_dma_tag_t slowpath_tag; - bus_dmamap_t slowpath_map; + struct bxe_dma slowpath_dma; struct bxe_slowpath *slowpath; - bus_addr_t slowpath_paddr; - /* Slow path ring. */ - bus_dma_tag_t spq_tag; - bus_dmamap_t spq_map; + struct bxe_dma spq_dma; struct eth_spe *spq; - bus_addr_t spq_paddr; + uint16_t spq_prod_idx; struct eth_spe *spq_prod_bd; struct eth_spe *spq_last_bd; @@ -1273,17 +1245,15 @@ struct bxe_softc { /* Device flags. */ uint32_t bxe_flags; -#define BXE_ONE_PORT_FLAG 0x00000004 -#define BXE_NO_WOL_FLAG 0x00000008 -#define BXE_USING_DAC_FLAG 0x00000010 -#define BXE_USING_MSIX_FLAG 0x00000020 -#define BXE_USING_MSI_FLAG 0x00000040 -#define BXE_TPA_ENABLE_FLAG 0x00000080 -#define BXE_NO_MCP_FLAG 0x00000100 -#define BP_NOMCP(sc) (sc->bxe_flags & BXE_NO_MCP_FLAG) -#define BXE_SAFC_TX_FLAG 0x00000200 +#define BXE_ONE_PORT_FLAG 0x00000001 +#define BXE_NO_WOL_FLAG 0x00000002 +#define BXE_USING_DAC_FLAG 0x00000004 +#define BXE_TPA_ENABLE_FLAG 0x00000008 +#define BXE_NO_MCP_FLAG 0x00000010 #define TPA_ENABLED(sc) (sc->bxe_flags & BXE_TPA_ENABLE_FLAG) +#define NOMCP(sc) (sc->bxe_flags & BXE_NO_MCP_FLAG) + /* PCI Express function number for the device. */ int bxe_func; @@ -1386,8 +1356,6 @@ struct bxe_softc { int mrrs; int dcc_enable; -#define BXE_NUM_QUEUES(cos) \ - ((bxe_qs_per_cos & (0xff << (cos * 8))) >> (cos * 8)) #define BXE_MAX_QUEUES(sc) \ (IS_E1HMF(sc) ? (MAX_CONTEXT / E1HVN_MAX) : MAX_CONTEXT) @@ -1396,18 +1364,6 @@ struct bxe_softc { #define BXE_MAX_PRIORITY 8 #define BXE_MAX_ENTRIES_PER_PRI 16 - /* Number of queues per class of service. */ - uint8_t qs_per_cos[BXE_MAX_COS]; - - /* Priority to class of service mapping. */ - uint8_t pri_map[BXE_MAX_PRIORITY]; - - /* min rate per cos */ - uint16_t cos_min_rate[BXE_MAX_COS]; - - /* Class of service to queue mapping. */ - uint8_t cos_map[BXE_MAX_COS]; - /* Used for multiple function devices. */ uint32_t mf_config[E1HVN_MAX]; @@ -1449,15 +1405,13 @@ struct bxe_softc { /* Statistics. */ uint16_t stats_counter; - struct bxe_eth_stats eth_stats; + struct bxe_port_stats eth_stats; + /* Support for DMAE and compressed firmware. */ z_streamp strm; - bus_dma_tag_t gunzip_tag; - bus_dmamap_t gunzip_map; - void *gunzip_buf; - bus_addr_t gunzip_mapping; - int gunzip_outlen; -#define FW_BUF_SIZE 0x40000 + struct bxe_dma gz_dma; + void *gz; +#define BXE_FW_BUF_SIZE 0x40000 struct raw_op *init_ops; /* Init blocks offsets inside init_ops */ @@ -1500,10 +1454,9 @@ struct bxe_softc { uint8_t intr_sem; #ifdef BXE_DEBUG - unsigned long debug_mbuf_sim_alloc_failed; - unsigned long debug_mbuf_sim_map_failed; + unsigned long debug_sim_mbuf_alloc_failed; + unsigned long debug_sim_mbuf_map_failed; unsigned long debug_received_frame_error; - unsigned long debug_memory_allocated; /* A buffer for hardware/firmware state information (grcdump). */ uint32_t *grcdump_buffer; @@ -1763,7 +1716,7 @@ struct bxe_softc { (&fp->status_block->c_status_block.index_values[C_SB_ETH_TX_CQ_INDEX]) #define BXE_SP_DSB_INDEX \ - &sc->def_status_block->c_def_status_block.index_values[C_DEF_SB_SP_INDEX] + &sc->def_sb->c_def_status_block.index_values[C_DEF_SB_SP_INDEX] #define BXE_RX_SB_INDEX_NUM \ (((U_SB_ETH_RX_CQ_INDEX << \ diff --git a/sys/dev/cardbus/cardbus_cis.c b/sys/dev/cardbus/cardbus_cis.c index 2cfea19..3352a56 100644 --- a/sys/dev/cardbus/cardbus_cis.c +++ b/sys/dev/cardbus/cardbus_cis.c @@ -324,7 +324,7 @@ decode_tuple_bar(device_t cbdev, device_t child, int id, * hint when the cardbus bridge is a child of pci0 (the main * bus). The PC Card spec seems to indicate that this should * only be done on x86 based machines, which suggests that on - * non-x86 machines the adddresses can be anywhere. Since the + * non-x86 machines the addresses can be anywhere. Since the * hardware can do it on non-x86 machines, it should be able * to do it on x86 machines too. Therefore, we can and should * ignore this hint. Furthermore, the PC Card spec recommends @@ -430,7 +430,6 @@ cardbus_read_tuple_finish(device_t cbdev, device_t child, int rid, { if (res != CIS_CONFIG_SPACE) { bus_release_resource(child, SYS_RES_MEMORY, rid, res); - bus_delete_resource(child, SYS_RES_MEMORY, rid); } } @@ -467,7 +466,7 @@ cardbus_read_tuple_init(device_t cbdev, device_t child, uint32_t *start, } /* allocate the memory space to read CIS */ - res = bus_alloc_resource(child, SYS_RES_MEMORY, rid, 0, ~0, 1, + res = bus_alloc_resource_any(child, SYS_RES_MEMORY, rid, rman_make_alignment_flags(4096) | RF_ACTIVE); if (res == NULL) { device_printf(cbdev, "Unable to allocate resource " diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index 6b48325..8624fc1 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -110,6 +110,9 @@ enum { FW_IQ_QSIZE = 256, FW_IQ_ESIZE = 64, /* At least 64 mandated by the firmware spec */ + INTR_IQ_QSIZE = 64, + INTR_IQ_ESIZE = 64, /* Handles some CPLs too, do not reduce */ + CTRL_EQ_QSIZE = 128, CTRL_EQ_ESIZE = 64, @@ -141,7 +144,7 @@ enum { /* adapter flags */ FULL_INIT_DONE = (1 << 0), FW_OK = (1 << 1), - INTR_FWD = (1 << 2), + INTR_SHARED = (1 << 2), /* one set of intrq's for all ports */ CXGBE_BUSY = (1 << 9), @@ -294,7 +297,7 @@ struct sge_eq { uint16_t pidx; /* producer idx (desc idx) */ uint16_t pending; /* # of descriptors used since last doorbell */ uint16_t iqid; /* iq that gets egr_update for the eq */ - uint32_t cntxt_id; /* SGE context id for the eq */ + unsigned int cntxt_id; /* SGE context id for the eq */ }; struct sge_fl { @@ -384,17 +387,16 @@ struct sge_ctrlq { /* stats for common events first */ - uint64_t total_wrs; /* # of work requests sent down this queue */ /* stats for not-that-common events */ uint32_t no_desc; /* out of hardware descriptors */ - uint32_t too_long; /* WR longer than hardware max */ } __aligned(CACHE_LINE_SIZE); struct sge { uint16_t timer_val[SGE_NTIMERS]; uint8_t counter_val[SGE_NCOUNTERS]; + int fl_starve_threshold; int nrxq; /* total rx queues (all ports and the rest) */ int ntxq; /* total tx queues (all ports and the rest) */ @@ -403,7 +405,7 @@ struct sge { struct sge_iq fwq; /* Firmware event queue */ struct sge_ctrlq *ctrlq;/* Control queues */ - struct sge_iq *fiq; /* Forwarded interrupt queues (INTR_FWD) */ + struct sge_iq *intrq; /* Interrupt queues */ struct sge_txq *txq; /* NIC tx queues */ struct sge_rxq *rxq; /* NIC rx queues */ @@ -445,6 +447,7 @@ struct adapter { struct port_info *port[MAX_NPORTS]; uint8_t chan_map[NCHAN]; + struct l2t_data *l2t; /* L2 table */ struct tid_info tids; int registered_device_map; @@ -456,7 +459,9 @@ struct adapter { struct t4_virt_res vres; struct sysctl_ctx_list ctx; /* from first_port_up to last_port_down */ + struct sysctl_oid *oid_fwq; struct sysctl_oid *oid_ctrlq; + struct sysctl_oid *oid_intrq; struct mtx sc_lock; char lockname[16]; @@ -502,7 +507,10 @@ struct adapter { rxq = &pi->adapter->sge.rxq[pi->first_rxq]; \ for (iter = 0; iter < pi->nrxq; ++iter, ++rxq) -#define NFIQ(sc) ((sc)->intr_count > 1 ? (sc)->intr_count - 1 : 1) +/* One for errors, one for firmware events */ +#define T4_EXTRA_INTR 2 +#define NINTRQ(sc) ((sc)->intr_count > T4_EXTRA_INTR ? \ + (sc)->intr_count - T4_EXTRA_INTR : 1) static inline uint32_t t4_read_reg(struct adapter *sc, uint32_t reg) @@ -599,12 +607,9 @@ int t4_teardown_adapter_queues(struct adapter *); int t4_setup_eth_queues(struct port_info *); int t4_teardown_eth_queues(struct port_info *); void t4_intr_all(void *); -void t4_intr_fwd(void *); +void t4_intr(void *); void t4_intr_err(void *); void t4_intr_evt(void *); -void t4_intr_data(void *); -void t4_evt_rx(void *); -void t4_eth_rx(void *); int t4_mgmt_tx(struct adapter *, struct mbuf *); int t4_eth_tx(struct ifnet *, struct sge_txq *, struct mbuf *); void t4_update_fl_bufsize(struct ifnet *); diff --git a/sys/dev/cxgbe/common/common.h b/sys/dev/cxgbe/common/common.h index fa5ac9f..913be9b 100644 --- a/sys/dev/cxgbe/common/common.h +++ b/sys/dev/cxgbe/common/common.h @@ -54,7 +54,7 @@ enum { #define FW_VERSION_MAJOR 1 #define FW_VERSION_MINOR 3 -#define FW_VERSION_MICRO 8 +#define FW_VERSION_MICRO 10 struct port_stats { u64 tx_octets; /* total # of octets in good frames */ diff --git a/sys/dev/cxgbe/common/jhash.h b/sys/dev/cxgbe/common/jhash.h new file mode 100644 index 0000000..4546b7b --- /dev/null +++ b/sys/dev/cxgbe/common/jhash.h @@ -0,0 +1,140 @@ +#ifndef _JHASH_H +#define _JHASH_H + +/* jhash.h: Jenkins hash support. + * + * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net) + * + * http://burtleburtle.net/bob/hash/ + * + * These are the credits from Bob's sources: + * + * lookup2.c, by Bob Jenkins, December 1996, Public Domain. + * hash(), hash2(), hash3, and mix() are externally useful functions. + * Routines to test the hash are included if SELF_TEST is defined. + * You can use this free for any purpose. It has no warranty. + * + * $FreeBSD$ + */ + +/* NOTE: Arguments are modified. */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= b; a -= c; a ^= (c>>13); \ + b -= c; b -= a; b ^= (a<<8); \ + c -= a; c -= b; c ^= (b>>13); \ + a -= b; a -= c; a ^= (c>>12); \ + b -= c; b -= a; b ^= (a<<16); \ + c -= a; c -= b; c ^= (b>>5); \ + a -= b; a -= c; a ^= (c>>3); \ + b -= c; b -= a; b ^= (a<<10); \ + c -= a; c -= b; c ^= (b>>15); \ +} + +/* The golden ration: an arbitrary value */ +#define JHASH_GOLDEN_RATIO 0x9e3779b9 + +/* The most generic version, hashes an arbitrary sequence + * of bytes. No alignment or length assumptions are made about + * the input key. + */ +static inline u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c, len; + const u8 *k = key; + + len = length; + a = b = JHASH_GOLDEN_RATIO; + c = initval; + + while (len >= 12) { + a += (k[0] +((u32)k[1]<<8) +((u32)k[2]<<16) +((u32)k[3]<<24)); + b += (k[4] +((u32)k[5]<<8) +((u32)k[6]<<16) +((u32)k[7]<<24)); + c += (k[8] +((u32)k[9]<<8) +((u32)k[10]<<16)+((u32)k[11]<<24)); + + __jhash_mix(a,b,c); + + k += 12; + len -= 12; + } + + c += length; + switch (len) { + case 11: c += ((u32)k[10]<<24); + case 10: c += ((u32)k[9]<<16); + case 9 : c += ((u32)k[8]<<8); + case 8 : b += ((u32)k[7]<<24); + case 7 : b += ((u32)k[6]<<16); + case 6 : b += ((u32)k[5]<<8); + case 5 : b += k[4]; + case 4 : a += ((u32)k[3]<<24); + case 3 : a += ((u32)k[2]<<16); + case 2 : a += ((u32)k[1]<<8); + case 1 : a += k[0]; + }; + + __jhash_mix(a,b,c); + + return c; +} + +/* A special optimized version that handles 1 or more of u32s. + * The length parameter here is the number of u32s in the key. + */ +static inline u32 jhash2(u32 *k, u32 length, u32 initval) +{ + u32 a, b, c, len; + + a = b = JHASH_GOLDEN_RATIO; + c = initval; + len = length; + + while (len >= 3) { + a += k[0]; + b += k[1]; + c += k[2]; + __jhash_mix(a, b, c); + k += 3; len -= 3; + } + + c += length * 4; + + switch (len) { + case 2 : b += k[1]; + case 1 : a += k[0]; + }; + + __jhash_mix(a,b,c); + + return c; +} + + +/* A special ultra-optimized versions that knows they are hashing exactly + * 3, 2 or 1 word(s). + * + * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally + * done at the end is not done here. + */ +static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) +{ + a += JHASH_GOLDEN_RATIO; + b += JHASH_GOLDEN_RATIO; + c += initval; + + __jhash_mix(a, b, c); + + return c; +} + +static inline u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return jhash_3words(a, b, 0, initval); +} + +static inline u32 jhash_1word(u32 a, u32 initval) +{ + return jhash_3words(a, 0, 0, initval); +} + +#endif /* _JHASH_H */ diff --git a/sys/dev/cxgbe/common/t4fw_interface.h b/sys/dev/cxgbe/common/t4fw_interface.h index 88126be..3eb9615 100644 --- a/sys/dev/cxgbe/common/t4fw_interface.h +++ b/sys/dev/cxgbe/common/t4fw_interface.h @@ -43,6 +43,7 @@ enum fw_retval { FW_ENOMEM = 12, /* out of memory */ FW_EFAULT = 14, /* bad address; fw bad */ FW_EBUSY = 16, /* resource busy */ + FW_EEXIST = 17, /* File exists */ FW_EINVAL = 22, /* invalid argument */ FW_ENOSYS = 38, /* functionality not implemented */ FW_EPROTO = 71, /* protocol error */ @@ -59,6 +60,8 @@ enum fw_retval { FW_FCOE_NO_XCHG = 136, /* */ FW_SCSI_RSP_ERR = 137, /* */ FW_ERR_RDEV_IMPL_LOGO = 138, /* */ + FW_SCSI_UNDER_FLOW_ERR = 139, /* */ + FW_SCSI_OVER_FLOW_ERR = 140, /* */ }; /****************************************************************************** @@ -85,7 +88,8 @@ enum fw_wr_opcodes { FW_RI_FR_NSMR_WR = 0x19, FW_RI_INV_LSTAG_WR = 0x1a, FW_RI_WR = 0x0d, - FW_LASTC2E_WR = 0x4a + FW_ISCSI_NODE_WR = 0x4a, + FW_LASTC2E_WR = 0x4b }; /* @@ -514,7 +518,7 @@ struct fw_eth_tx_pkts_wr { __be32 r3; __be16 plen; __u8 npkt; - __u8 r4; + __u8 type; }; struct fw_eq_flush_wr { @@ -1465,6 +1469,65 @@ struct fw_ri_wr { #define G_FW_RI_WR_P2PTYPE(x) \ (((x) >> S_FW_RI_WR_P2PTYPE) & M_FW_RI_WR_P2PTYPE) +#ifdef FOISCSI +struct fw_iscsi_node_wr { + __u8 opcode; + __u8 subop; + __u8 node_attr_to_compl; + __u8 len16; + __u8 status; + __u8 r2; + __be16 immd_len; + __be64 cookie; + __be32 node_id; + __be32 ctrl_handle; + __be32 io_handle; + __be32 r3; +}; + +#define S_FW_ISCSI_NODE_WR_NODE_ATTR 7 +#define M_FW_ISCSI_NODE_WR_NODE_ATTR 0x1 +#define V_FW_ISCSI_NODE_WR_NODE_ATTR(x) ((x) << S_FW_ISCSI_NODE_WR_NODE_ATTR) +#define G_FW_ISCSI_NODE_WR_NODE_ATTR(x) \ + (((x) >> S_FW_ISCSI_NODE_WR_NODE_ATTR) & M_FW_ISCSI_NODE_WR_NODE_ATTR) +#define F_FW_ISCSI_NODE_WR_NODE_ATTR V_FW_ISCSI_NODE_WR_NODE_ATTR(1U) + +#define S_FW_ISCSI_NODE_WR_SESS_ATTR 6 +#define M_FW_ISCSI_NODE_WR_SESS_ATTR 0x1 +#define V_FW_ISCSI_NODE_WR_SESS_ATTR(x) ((x) << S_FW_ISCSI_NODE_WR_SESS_ATTR) +#define G_FW_ISCSI_NODE_WR_SESS_ATTR(x) \ + (((x) >> S_FW_ISCSI_NODE_WR_SESS_ATTR) & M_FW_ISCSI_NODE_WR_SESS_ATTR) +#define F_FW_ISCSI_NODE_WR_SESS_ATTR V_FW_ISCSI_NODE_WR_SESS_ATTR(1U) + +#define S_FW_ISCSI_NODE_WR_CONN_ATTR 5 +#define M_FW_ISCSI_NODE_WR_CONN_ATTR 0x1 +#define V_FW_ISCSI_NODE_WR_CONN_ATTR(x) ((x) << S_FW_ISCSI_NODE_WR_CONN_ATTR) +#define G_FW_ISCSI_NODE_WR_CONN_ATTR(x) \ + (((x) >> S_FW_ISCSI_NODE_WR_CONN_ATTR) & M_FW_ISCSI_NODE_WR_CONN_ATTR) +#define F_FW_ISCSI_NODE_WR_CONN_ATTR V_FW_ISCSI_NODE_WR_CONN_ATTR(1U) + +#define S_FW_ISCSI_NODE_WR_TGT_ATTR 4 +#define M_FW_ISCSI_NODE_WR_TGT_ATTR 0x1 +#define V_FW_ISCSI_NODE_WR_TGT_ATTR(x) ((x) << S_FW_ISCSI_NODE_WR_TGT_ATTR) +#define G_FW_ISCSI_NODE_WR_TGT_ATTR(x) \ + (((x) >> S_FW_ISCSI_NODE_WR_TGT_ATTR) & M_FW_ISCSI_NODE_WR_TGT_ATTR) +#define F_FW_ISCSI_NODE_WR_TGT_ATTR V_FW_ISCSI_NODE_WR_TGT_ATTR(1U) + +#define S_FW_ISCSI_NODE_WR_NODE_TYPE 3 +#define M_FW_ISCSI_NODE_WR_NODE_TYPE 0x1 +#define V_FW_ISCSI_NODE_WR_NODE_TYPE(x) ((x) << S_FW_ISCSI_NODE_WR_NODE_TYPE) +#define G_FW_ISCSI_NODE_WR_NODE_TYPE(x) \ + (((x) >> S_FW_ISCSI_NODE_WR_NODE_TYPE) & M_FW_ISCSI_NODE_WR_NODE_TYPE) +#define F_FW_ISCSI_NODE_WR_NODE_TYPE V_FW_ISCSI_NODE_WR_NODE_TYPE(1U) + +#define S_FW_ISCSI_NODE_WR_COMPL 0 +#define M_FW_ISCSI_NODE_WR_COMPL 0x1 +#define V_FW_ISCSI_NODE_WR_COMPL(x) ((x) << S_FW_ISCSI_NODE_WR_COMPL) +#define G_FW_ISCSI_NODE_WR_COMPL(x) \ + (((x) >> S_FW_ISCSI_NODE_WR_COMPL) & M_FW_ISCSI_NODE_WR_COMPL) +#define F_FW_ISCSI_NODE_WR_COMPL V_FW_ISCSI_NODE_WR_COMPL(1U) + +#endif /****************************************************************************** * C O M M A N D s @@ -1511,6 +1574,7 @@ enum fw_cmd_opcodes { FW_RSS_VI_CONFIG_CMD = 0x23, FW_SCHED_CMD = 0x24, FW_DEVLOG_CMD = 0x25, + FW_NETIF_CMD = 0x26, FW_LASTC2E_CMD = 0x40, FW_ERROR_CMD = 0x80, FW_DEBUG_CMD = 0x81, @@ -1941,6 +2005,8 @@ enum fw_caps_config_iscsi { FW_CAPS_CONFIG_ISCSI_TARGET_PDU = 0x00000002, FW_CAPS_CONFIG_ISCSI_INITIATOR_CNXOFLD = 0x00000004, FW_CAPS_CONFIG_ISCSI_TARGET_CNXOFLD = 0x00000008, + FW_CAPS_CONFIG_ISCSI_INITIATOR_SSNOFLD = 0x00000010, + FW_CAPS_CONFIG_ISCSI_TARGET_SSNOFLD = 0x00000020, }; enum fw_caps_config_fcoe { @@ -3941,6 +4007,39 @@ enum fw_port_cap { FW_PORT_CAP_TECHKX4 = 0x2000, }; +#define S_FW_PORT_AUXLINFO_MDI 3 +#define M_FW_PORT_AUXLINFO_MDI 0x3 +#define V_FW_PORT_AUXLINFO_MDI(x) ((x) << S_FW_PORT_AUXLINFO_MDI) +#define G_FW_PORT_AUXLINFO_MDI(x) \ + (((x) >> S_FW_PORT_AUXLINFO_MDI) & M_FW_PORT_AUXLINFO_MDI) + +#define S_FW_PORT_AUXLINFO_KX4 2 +#define M_FW_PORT_AUXLINFO_KX4 0x1 +#define V_FW_PORT_AUXLINFO_KX4(x) ((x) << S_FW_PORT_AUXLINFO_KX4) +#define G_FW_PORT_AUXLINFO_KX4(x) \ + (((x) >> S_FW_PORT_AUXLINFO_KX4) & M_FW_PORT_AUXLINFO_KX4) +#define F_FW_PORT_AUXLINFO_KX4 V_FW_PORT_AUXLINFO_KX4(1U) + +#define S_FW_PORT_AUXLINFO_KR 1 +#define M_FW_PORT_AUXLINFO_KR 0x1 +#define V_FW_PORT_AUXLINFO_KR(x) ((x) << S_FW_PORT_AUXLINFO_KR) +#define G_FW_PORT_AUXLINFO_KR(x) \ + (((x) >> S_FW_PORT_AUXLINFO_KR) & M_FW_PORT_AUXLINFO_KR) +#define F_FW_PORT_AUXLINFO_KR V_FW_PORT_AUXLINFO_KR(1U) + +#define S_FW_PORT_AUXLINFO_FEC 0 +#define M_FW_PORT_AUXLINFO_FEC 0x1 +#define V_FW_PORT_AUXLINFO_FEC(x) ((x) << S_FW_PORT_AUXLINFO_FEC) +#define G_FW_PORT_AUXLINFO_FEC(x) \ + (((x) >> S_FW_PORT_AUXLINFO_FEC) & M_FW_PORT_AUXLINFO_FEC) +#define F_FW_PORT_AUXLINFO_FEC V_FW_PORT_AUXLINFO_FEC(1U) + +#define S_FW_PORT_RCAP_AUX 11 +#define M_FW_PORT_RCAP_AUX 0x7 +#define V_FW_PORT_RCAP_AUX(x) ((x) << S_FW_PORT_RCAP_AUX) +#define G_FW_PORT_RCAP_AUX(x) \ + (((x) >> S_FW_PORT_RCAP_AUX) & M_FW_PORT_RCAP_AUX) + #define S_FW_PORT_CAP_SPEED 0 #define M_FW_PORT_CAP_SPEED 0x3f #define V_FW_PORT_CAP_SPEED(x) ((x) << S_FW_PORT_CAP_SPEED) @@ -4002,11 +4101,23 @@ enum fw_port_l2cfg_ctlbf { FW_PORT_L2_CTLBF_MTU = 0x40 }; +enum fw_port_dcb_cfg { + FW_PORT_DCB_CFG_PG = 0x01, + FW_PORT_DCB_CFG_PFC = 0x02, + FW_PORT_DCB_CFG_APPL = 0x04 +}; + +enum fw_port_dcb_cfg_rc { + FW_PORT_DCB_CFG_SUCCESS = 0x0, + FW_PORT_DCB_CFG_ERROR = 0x1 +}; + enum fw_port_dcb_type { FW_PORT_DCB_TYPE_PGID = 0x00, FW_PORT_DCB_TYPE_PGRATE = 0x01, FW_PORT_DCB_TYPE_PRIORATE = 0x02, - FW_PORT_DCB_TYPE_PFC = 0x03 + FW_PORT_DCB_TYPE_PFC = 0x03, + FW_PORT_DCB_TYPE_APP_ID = 0x04, }; struct fw_port_cmd { @@ -4038,7 +4149,7 @@ struct fw_port_cmd { __be16 acap; __be16 mtu; __u8 cbllen; - __u8 r7; + __u8 auxlinfo; __be32 r8; __be64 r9; } info; @@ -4068,6 +4179,14 @@ struct fw_port_cmd { __be16 r10[3]; __be64 r11; } pfc; + struct fw_port_app_priority { + __u8 type; + __u8 r10_lo[3]; + __u8 prio; + __u8 sel; + __be16 protocolid; + __u8 r12[8]; + } app_priority; } dcb; } u; }; @@ -5232,6 +5351,116 @@ struct fw_devlog_cmd { (((x) >> S_FW_DEVLOG_CMD_MEMADDR16_DEVLOG) & \ M_FW_DEVLOG_CMD_MEMADDR16_DEVLOG) +struct fw_netif_cmd { + __be32 op_portid; + __be32 retval_to_len16; + __be32 add_to_ipv4gw; + __be32 vlanid_mtuval; + __be32 gwaddr; + __be32 addr; + __be32 nmask; + __be32 bcaddr; +}; + +#define S_FW_NETIF_CMD_PORTID 0 +#define M_FW_NETIF_CMD_PORTID 0xf +#define V_FW_NETIF_CMD_PORTID(x) ((x) << S_FW_NETIF_CMD_PORTID) +#define G_FW_NETIF_CMD_PORTID(x) \ + (((x) >> S_FW_NETIF_CMD_PORTID) & M_FW_NETIF_CMD_PORTID) + +#define S_FW_NETIF_CMD_RETVAL 24 +#define M_FW_NETIF_CMD_RETVAL 0xff +#define V_FW_NETIF_CMD_RETVAL(x) ((x) << S_FW_NETIF_CMD_RETVAL) +#define G_FW_NETIF_CMD_RETVAL(x) \ + (((x) >> S_FW_NETIF_CMD_RETVAL) & M_FW_NETIF_CMD_RETVAL) + +#define S_FW_NETIF_CMD_IFIDX 16 +#define M_FW_NETIF_CMD_IFIDX 0xff +#define V_FW_NETIF_CMD_IFIDX(x) ((x) << S_FW_NETIF_CMD_IFIDX) +#define G_FW_NETIF_CMD_IFIDX(x) \ + (((x) >> S_FW_NETIF_CMD_IFIDX) & M_FW_NETIF_CMD_IFIDX) + +#define S_FW_NETIF_CMD_LEN16 0 +#define M_FW_NETIF_CMD_LEN16 0xff +#define V_FW_NETIF_CMD_LEN16(x) ((x) << S_FW_NETIF_CMD_LEN16) +#define G_FW_NETIF_CMD_LEN16(x) \ + (((x) >> S_FW_NETIF_CMD_LEN16) & M_FW_NETIF_CMD_LEN16) + +#define S_FW_NETIF_CMD_ADD 31 +#define M_FW_NETIF_CMD_ADD 0x1 +#define V_FW_NETIF_CMD_ADD(x) ((x) << S_FW_NETIF_CMD_ADD) +#define G_FW_NETIF_CMD_ADD(x) \ + (((x) >> S_FW_NETIF_CMD_ADD) & M_FW_NETIF_CMD_ADD) +#define F_FW_NETIF_CMD_ADD V_FW_NETIF_CMD_ADD(1U) + +#define S_FW_NETIF_CMD_LINK 30 +#define M_FW_NETIF_CMD_LINK 0x1 +#define V_FW_NETIF_CMD_LINK(x) ((x) << S_FW_NETIF_CMD_LINK) +#define G_FW_NETIF_CMD_LINK(x) \ + (((x) >> S_FW_NETIF_CMD_LINK) & M_FW_NETIF_CMD_LINK) +#define F_FW_NETIF_CMD_LINK V_FW_NETIF_CMD_LINK(1U) + +#define S_FW_NETIF_CMD_VLAN 29 +#define M_FW_NETIF_CMD_VLAN 0x1 +#define V_FW_NETIF_CMD_VLAN(x) ((x) << S_FW_NETIF_CMD_VLAN) +#define G_FW_NETIF_CMD_VLAN(x) \ + (((x) >> S_FW_NETIF_CMD_VLAN) & M_FW_NETIF_CMD_VLAN) +#define F_FW_NETIF_CMD_VLAN V_FW_NETIF_CMD_VLAN(1U) + +#define S_FW_NETIF_CMD_MTU 28 +#define M_FW_NETIF_CMD_MTU 0x1 +#define V_FW_NETIF_CMD_MTU(x) ((x) << S_FW_NETIF_CMD_MTU) +#define G_FW_NETIF_CMD_MTU(x) \ + (((x) >> S_FW_NETIF_CMD_MTU) & M_FW_NETIF_CMD_MTU) +#define F_FW_NETIF_CMD_MTU V_FW_NETIF_CMD_MTU(1U) + +#define S_FW_NETIF_CMD_DHCP 27 +#define M_FW_NETIF_CMD_DHCP 0x1 +#define V_FW_NETIF_CMD_DHCP(x) ((x) << S_FW_NETIF_CMD_DHCP) +#define G_FW_NETIF_CMD_DHCP(x) \ + (((x) >> S_FW_NETIF_CMD_DHCP) & M_FW_NETIF_CMD_DHCP) +#define F_FW_NETIF_CMD_DHCP V_FW_NETIF_CMD_DHCP(1U) + +#define S_FW_NETIF_CMD_IPV4BCADDR 3 +#define M_FW_NETIF_CMD_IPV4BCADDR 0x1 +#define V_FW_NETIF_CMD_IPV4BCADDR(x) ((x) << S_FW_NETIF_CMD_IPV4BCADDR) +#define G_FW_NETIF_CMD_IPV4BCADDR(x) \ + (((x) >> S_FW_NETIF_CMD_IPV4BCADDR) & M_FW_NETIF_CMD_IPV4BCADDR) +#define F_FW_NETIF_CMD_IPV4BCADDR V_FW_NETIF_CMD_IPV4BCADDR(1U) + +#define S_FW_NETIF_CMD_IPV4NMASK 2 +#define M_FW_NETIF_CMD_IPV4NMASK 0x1 +#define V_FW_NETIF_CMD_IPV4NMASK(x) ((x) << S_FW_NETIF_CMD_IPV4NMASK) +#define G_FW_NETIF_CMD_IPV4NMASK(x) \ + (((x) >> S_FW_NETIF_CMD_IPV4NMASK) & M_FW_NETIF_CMD_IPV4NMASK) +#define F_FW_NETIF_CMD_IPV4NMASK V_FW_NETIF_CMD_IPV4NMASK(1U) + +#define S_FW_NETIF_CMD_IPV4ADDR 1 +#define M_FW_NETIF_CMD_IPV4ADDR 0x1 +#define V_FW_NETIF_CMD_IPV4ADDR(x) ((x) << S_FW_NETIF_CMD_IPV4ADDR) +#define G_FW_NETIF_CMD_IPV4ADDR(x) \ + (((x) >> S_FW_NETIF_CMD_IPV4ADDR) & M_FW_NETIF_CMD_IPV4ADDR) +#define F_FW_NETIF_CMD_IPV4ADDR V_FW_NETIF_CMD_IPV4ADDR(1U) + +#define S_FW_NETIF_CMD_IPV4GW 0 +#define M_FW_NETIF_CMD_IPV4GW 0x1 +#define V_FW_NETIF_CMD_IPV4GW(x) ((x) << S_FW_NETIF_CMD_IPV4GW) +#define G_FW_NETIF_CMD_IPV4GW(x) \ + (((x) >> S_FW_NETIF_CMD_IPV4GW) & M_FW_NETIF_CMD_IPV4GW) +#define F_FW_NETIF_CMD_IPV4GW V_FW_NETIF_CMD_IPV4GW(1U) + +#define S_FW_NETIF_CMD_VLANID 16 +#define M_FW_NETIF_CMD_VLANID 0xfff +#define V_FW_NETIF_CMD_VLANID(x) ((x) << S_FW_NETIF_CMD_VLANID) +#define G_FW_NETIF_CMD_VLANID(x) \ + (((x) >> S_FW_NETIF_CMD_VLANID) & M_FW_NETIF_CMD_VLANID) + +#define S_FW_NETIF_CMD_MTUVAL 0 +#define M_FW_NETIF_CMD_MTUVAL 0xffff +#define V_FW_NETIF_CMD_MTUVAL(x) ((x) << S_FW_NETIF_CMD_MTUVAL) +#define G_FW_NETIF_CMD_MTUVAL(x) \ + (((x) >> S_FW_NETIF_CMD_MTUVAL) & M_FW_NETIF_CMD_MTUVAL) + enum fw_error_type { FW_ERROR_TYPE_EXCEPTION = 0x0, FW_ERROR_TYPE_HWMODULE = 0x1, diff --git a/sys/dev/cxgbe/offload.h b/sys/dev/cxgbe/offload.h index fa58853..f31b840 100644 --- a/sys/dev/cxgbe/offload.h +++ b/sys/dev/cxgbe/offload.h @@ -31,6 +31,24 @@ #ifndef __T4_OFFLOAD_H__ #define __T4_OFFLOAD_H__ +/* CPL message priority levels */ +enum { + CPL_PRIORITY_DATA = 0, /* data messages */ + CPL_PRIORITY_SETUP = 1, /* connection setup messages */ + CPL_PRIORITY_TEARDOWN = 0, /* connection teardown messages */ + CPL_PRIORITY_LISTEN = 1, /* listen start/stop messages */ + CPL_PRIORITY_ACK = 1, /* RX ACK messages */ + CPL_PRIORITY_CONTROL = 1 /* control messages */ +}; + +#define INIT_TP_WR(w, tid) do { \ + (w)->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | \ + V_FW_WR_IMMDLEN(sizeof(*w) - sizeof(w->wr))); \ + (w)->wr.wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(sizeof(*w), 16)) | \ + V_FW_WR_FLOWID(tid)); \ + (w)->wr.wr_lo = cpu_to_be64(0); \ +} while (0) + /* * Max # of ATIDs. The absolute HW max is 16K but we keep it lower. */ diff --git a/sys/dev/cxgbe/osdep.h b/sys/dev/cxgbe/osdep.h index 438a434..bde1eb4 100644 --- a/sys/dev/cxgbe/osdep.h +++ b/sys/dev/cxgbe/osdep.h @@ -82,6 +82,7 @@ typedef boolean_t bool; #define DIV_ROUND_UP(x, y) howmany(x, y) #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#define container_of(p, s, f) ((s *)(((uint8_t *)(p)) - offsetof(s, f))) #define swab16(x) bswap16(x) #define swab32(x) bswap32(x) diff --git a/sys/dev/cxgbe/t4_ioctl.h b/sys/dev/cxgbe/t4_ioctl.h index 8f1d133..ecc2c3d 100644 --- a/sys/dev/cxgbe/t4_ioctl.h +++ b/sys/dev/cxgbe/t4_ioctl.h @@ -46,6 +46,7 @@ enum { T4_GET_FILTER, /* get information about a filter */ T4_SET_FILTER, /* program a filter */ T4_DEL_FILTER, /* delete a filter */ + T4_GET_SGE_CONTEXT, /* get SGE context for a queue */ }; struct t4_reg { @@ -178,10 +179,26 @@ struct t4_filter_specification { struct t4_filter { uint32_t idx; + uint16_t l2tidx; + uint16_t smtidx; uint64_t hits; struct t4_filter_specification fs; }; +#define T4_SGE_CONTEXT_SIZE 24 +enum { + SGE_CONTEXT_EGRESS, + SGE_CONTEXT_INGRESS, + SGE_CONTEXT_FLM, + SGE_CONTEXT_CNM +}; + +struct t4_sge_context { + uint32_t mem_id; + uint32_t cid; + uint32_t data[T4_SGE_CONTEXT_SIZE / 4]; +}; + #define CHELSIO_T4_GETREG _IOWR('f', T4_GETREG, struct t4_reg) #define CHELSIO_T4_SETREG _IOW('f', T4_SETREG, struct t4_reg) #define CHELSIO_T4_REGDUMP _IOWR('f', T4_REGDUMP, struct t4_regdump) @@ -190,4 +207,6 @@ struct t4_filter { #define CHELSIO_T4_GET_FILTER _IOWR('f', T4_GET_FILTER, struct t4_filter) #define CHELSIO_T4_SET_FILTER _IOW('f', T4_SET_FILTER, struct t4_filter) #define CHELSIO_T4_DEL_FILTER _IOW('f', T4_DEL_FILTER, struct t4_filter) +#define CHELSIO_T4_GET_SGE_CONTEXT _IOWR('f', T4_GET_SGE_CONTEXT, \ + struct t4_sge_context) #endif diff --git a/sys/dev/cxgbe/t4_l2t.c b/sys/dev/cxgbe/t4_l2t.c new file mode 100644 index 0000000..31197b8 --- /dev/null +++ b/sys/dev/cxgbe/t4_l2t.c @@ -0,0 +1,361 @@ +/*- + * Copyright (c) 2011 Chelsio Communications, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/bus.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/rwlock.h> +#include <sys/socket.h> +#include <net/if.h> +#include <net/ethernet.h> +#include <net/if_vlan_var.h> +#include <net/if_dl.h> +#include <net/if_llatbl.h> +#include <net/route.h> +#include <netinet/in.h> +#include <netinet/in_var.h> +#include <netinet/if_ether.h> + +#include "common/common.h" +#include "common/jhash.h" +#include "common/t4_msg.h" +#include "offload.h" +#include "t4_l2t.h" + +/* identifies sync vs async L2T_WRITE_REQs */ +#define S_SYNC_WR 12 +#define V_SYNC_WR(x) ((x) << S_SYNC_WR) +#define F_SYNC_WR V_SYNC_WR(1) + +enum { + L2T_STATE_VALID, /* entry is up to date */ + L2T_STATE_STALE, /* entry may be used but needs revalidation */ + L2T_STATE_RESOLVING, /* entry needs address resolution */ + L2T_STATE_SYNC_WRITE, /* synchronous write of entry underway */ + + /* when state is one of the below the entry is not hashed */ + L2T_STATE_SWITCHING, /* entry is being used by a switching filter */ + L2T_STATE_UNUSED /* entry not in use */ +}; + +struct l2t_data { + struct rwlock lock; + volatile int nfree; /* number of free entries */ + struct l2t_entry *rover;/* starting point for next allocation */ + struct l2t_entry l2tab[L2T_SIZE]; +}; + +/* + * Module locking notes: There is a RW lock protecting the L2 table as a + * whole plus a spinlock per L2T entry. Entry lookups and allocations happen + * under the protection of the table lock, individual entry changes happen + * while holding that entry's spinlock. The table lock nests outside the + * entry locks. Allocations of new entries take the table lock as writers so + * no other lookups can happen while allocating new entries. Entry updates + * take the table lock as readers so multiple entries can be updated in + * parallel. An L2T entry can be dropped by decrementing its reference count + * and therefore can happen in parallel with entry allocation but no entry + * can change state or increment its ref count during allocation as both of + * these perform lookups. + * + * Note: We do not take refereces to ifnets in this module because both + * the TOE and the sockets already hold references to the interfaces and the + * lifetime of an L2T entry is fully contained in the lifetime of the TOE. + */ +static inline unsigned int +vlan_prio(const struct l2t_entry *e) +{ + return e->vlan >> 13; +} + +static inline void +l2t_hold(struct l2t_data *d, struct l2t_entry *e) +{ + if (atomic_fetchadd_int(&e->refcnt, 1) == 0) /* 0 -> 1 transition */ + atomic_add_int(&d->nfree, -1); +} + +/* + * To avoid having to check address families we do not allow v4 and v6 + * neighbors to be on the same hash chain. We keep v4 entries in the first + * half of available hash buckets and v6 in the second. + */ +enum { + L2T_SZ_HALF = L2T_SIZE / 2, + L2T_HASH_MASK = L2T_SZ_HALF - 1 +}; + +static inline unsigned int +arp_hash(const uint32_t *key, int ifindex) +{ + return jhash_2words(*key, ifindex, 0) & L2T_HASH_MASK; +} + +static inline unsigned int +ipv6_hash(const uint32_t *key, int ifindex) +{ + uint32_t xor = key[0] ^ key[1] ^ key[2] ^ key[3]; + + return L2T_SZ_HALF + (jhash_2words(xor, ifindex, 0) & L2T_HASH_MASK); +} + +static inline unsigned int +addr_hash(const uint32_t *addr, int addr_len, int ifindex) +{ + return addr_len == 4 ? arp_hash(addr, ifindex) : + ipv6_hash(addr, ifindex); +} + +/* + * Checks if an L2T entry is for the given IP/IPv6 address. It does not check + * whether the L2T entry and the address are of the same address family. + * Callers ensure an address is only checked against L2T entries of the same + * family, something made trivial by the separation of IP and IPv6 hash chains + * mentioned above. Returns 0 if there's a match, + */ +static inline int +addreq(const struct l2t_entry *e, const uint32_t *addr) +{ + if (e->v6) + return (e->addr[0] ^ addr[0]) | (e->addr[1] ^ addr[1]) | + (e->addr[2] ^ addr[2]) | (e->addr[3] ^ addr[3]); + return e->addr[0] ^ addr[0]; +} + +/* + * Write an L2T entry. Must be called with the entry locked (XXX: really?). + * The write may be synchronous or asynchronous. + */ +static int +write_l2e(struct adapter *sc, struct l2t_entry *e, int sync) +{ + struct mbuf *m; + struct cpl_l2t_write_req *req; + + if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) + return (ENOMEM); + + req = mtod(m, struct cpl_l2t_write_req *); + m->m_pkthdr.len = m->m_len = sizeof(*req); + + INIT_TP_WR(req, 0); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx | + V_SYNC_WR(sync) | V_TID_QID(sc->sge.fwq.abs_id))); + req->params = htons(V_L2T_W_PORT(e->lport) | V_L2T_W_NOREPLY(!sync)); + req->l2t_idx = htons(e->idx); + req->vlan = htons(e->vlan); + memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac)); + + t4_mgmt_tx(sc, m); + + if (sync && e->state != L2T_STATE_SWITCHING) + e->state = L2T_STATE_SYNC_WRITE; + + return (0); +} + +/* + * Add a packet to an L2T entry's queue of packets awaiting resolution. + * Must be called with the entry's lock held. + */ +static inline void +arpq_enqueue(struct l2t_entry *e, struct mbuf *m) +{ + mtx_assert(&e->lock, MA_OWNED); + + m->m_next = NULL; + if (e->arpq_head) + e->arpq_tail->m_next = m; + else + e->arpq_head = m; + e->arpq_tail = m; +} + +/* + * Allocate a free L2T entry. Must be called with l2t_data.lock held. + */ +static struct l2t_entry * +alloc_l2e(struct l2t_data *d) +{ + struct l2t_entry *end, *e, **p; + + rw_assert(&d->lock, RA_WLOCKED); + + if (!atomic_load_acq_int(&d->nfree)) + return (NULL); + + /* there's definitely a free entry */ + for (e = d->rover, end = &d->l2tab[L2T_SIZE]; e != end; ++e) + if (atomic_load_acq_int(&e->refcnt) == 0) + goto found; + + for (e = d->l2tab; atomic_load_acq_int(&e->refcnt); ++e) ; +found: + d->rover = e + 1; + atomic_add_int(&d->nfree, -1); + + /* + * The entry we found may be an inactive entry that is + * presently in the hash table. We need to remove it. + */ + if (e->state < L2T_STATE_SWITCHING) { + for (p = &d->l2tab[e->hash].first; *p; p = &(*p)->next) { + if (*p == e) { + *p = e->next; + e->next = NULL; + break; + } + } + } + + e->state = L2T_STATE_UNUSED; + return e; +} + +/* + * Called when an L2T entry has no more users. The entry is left in the hash + * table since it is likely to be reused but we also bump nfree to indicate + * that the entry can be reallocated for a different neighbor. We also drop + * the existing neighbor reference in case the neighbor is going away and is + * waiting on our reference. + * + * Because entries can be reallocated to other neighbors once their ref count + * drops to 0 we need to take the entry's lock to avoid races with a new + * incarnation. + */ +static void +t4_l2e_free(struct l2t_entry *e) +{ + struct llentry *lle = NULL; + struct l2t_data *d; + + mtx_lock(&e->lock); + if (atomic_load_acq_int(&e->refcnt) == 0) { /* hasn't been recycled */ + lle = e->lle; + e->lle = NULL; + /* + * Don't need to worry about the arpq, an L2T entry can't be + * released if any packets are waiting for resolution as we + * need to be able to communicate with the device to close a + * connection. + */ + } + mtx_unlock(&e->lock); + + d = container_of(e, struct l2t_data, l2tab[e->idx]); + atomic_add_int(&d->nfree, 1); + + if (lle) + LLE_FREE(lle); +} + +void +t4_l2t_release(struct l2t_entry *e) +{ + if (atomic_fetchadd_int(&e->refcnt, -1) == 1) + t4_l2e_free(e); +} + +/* + * Allocate an L2T entry for use by a switching rule. Such need to be + * explicitly freed and while busy they are not on any hash chain, so normal + * address resolution updates do not see them. + */ +struct l2t_entry * +t4_l2t_alloc_switching(struct l2t_data *d) +{ + struct l2t_entry *e; + + rw_rlock(&d->lock); + e = alloc_l2e(d); + if (e) { + mtx_lock(&e->lock); /* avoid race with t4_l2t_free */ + e->state = L2T_STATE_SWITCHING; + atomic_store_rel_int(&e->refcnt, 1); + mtx_unlock(&e->lock); + } + rw_runlock(&d->lock); + return e; +} + +/* + * Sets/updates the contents of a switching L2T entry that has been allocated + * with an earlier call to @t4_l2t_alloc_switching. + */ +int +t4_l2t_set_switching(struct adapter *sc, struct l2t_entry *e, uint16_t vlan, + uint8_t port, uint8_t *eth_addr) +{ + e->vlan = vlan; + e->lport = port; + memcpy(e->dmac, eth_addr, ETHER_ADDR_LEN); + return write_l2e(sc, e, 0); +} + +struct l2t_data * +t4_init_l2t(int flags) +{ + int i; + struct l2t_data *d; + + d = malloc(sizeof(*d), M_CXGBE, M_ZERO | flags); + if (!d) + return (NULL); + + d->rover = d->l2tab; + atomic_store_rel_int(&d->nfree, L2T_SIZE); + rw_init(&d->lock, "L2T"); + + for (i = 0; i < L2T_SIZE; i++) { + d->l2tab[i].idx = i; + d->l2tab[i].state = L2T_STATE_UNUSED; + mtx_init(&d->l2tab[i].lock, "L2T_E", NULL, MTX_DEF); + atomic_store_rel_int(&d->l2tab[i].refcnt, 0); + } + + return (d); +} + +int +t4_free_l2t(struct l2t_data *d) +{ + int i; + + for (i = 0; i < L2T_SIZE; i++) + mtx_destroy(&d->l2tab[i].lock); + rw_destroy(&d->lock); + free(d, M_CXGBE); + + return (0); +} diff --git a/sys/dev/cxgbe/t4_l2t.h b/sys/dev/cxgbe/t4_l2t.h new file mode 100644 index 0000000..c5520c6 --- /dev/null +++ b/sys/dev/cxgbe/t4_l2t.h @@ -0,0 +1,71 @@ +/*- + * Copyright (c) 2011 Chelsio Communications, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef __T4_L2T_H +#define __T4_L2T_H + +enum { L2T_SIZE = 4096 }; /* # of L2T entries */ + +/* + * Each L2T entry plays multiple roles. First of all, it keeps state for the + * corresponding entry of the HW L2 table and maintains a queue of offload + * packets awaiting address resolution. Second, it is a node of a hash table + * chain, where the nodes of the chain are linked together through their next + * pointer. Finally, each node is a bucket of a hash table, pointing to the + * first element in its chain through its first pointer. + */ +struct l2t_entry { + uint16_t state; /* entry state */ + uint16_t idx; /* entry index */ + uint32_t addr[4]; /* next hop IP or IPv6 address */ + struct ifnet *ifp; /* outgoing interface */ + uint16_t smt_idx; /* SMT index */ + uint16_t vlan; /* VLAN TCI (id: 0-11, prio: 13-15) */ + int ifindex; /* interface index */ + struct llentry *lle; /* llentry for next hop */ + struct l2t_entry *first; /* start of hash chain */ + struct l2t_entry *next; /* next l2t_entry on chain */ + struct mbuf *arpq_head; /* list of mbufs awaiting resolution */ + struct mbuf *arpq_tail; + struct mtx lock; + volatile uint32_t refcnt; /* entry reference count */ + uint16_t hash; /* hash bucket the entry is on */ + uint8_t v6; /* whether entry is for IPv6 */ + uint8_t lport; /* associated offload logical port */ + uint8_t dmac[ETHER_ADDR_LEN]; /* next hop's MAC address */ +}; + +struct l2t_data *t4_init_l2t(int); +int t4_free_l2t(struct l2t_data *); +struct l2t_entry *t4_l2t_alloc_switching(struct l2t_data *); +int t4_l2t_set_switching(struct adapter *, struct l2t_entry *, uint16_t, + uint8_t, uint8_t *); +void t4_l2t_release(struct l2t_entry *); + +#endif /* __T4_L2T_H */ diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index 469af8d..18b813d 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -62,6 +62,7 @@ __FBSDID("$FreeBSD$"); #include "common/t4_regs_values.h" #include "common/t4fw_interface.h" #include "t4_ioctl.h" +#include "t4_l2t.h" /* T4 bus driver interface */ static int t4_probe(device_t); @@ -213,12 +214,12 @@ SYSCTL_UINT(_hw_cxgbe, OID_AUTO, interrupt_types, CTLFLAG_RDTUN, &intr_types, 0, "interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively)"); /* - * Force the driver to use interrupt forwarding. + * Force the driver to use the same set of interrupts for all ports. */ -static int intr_fwd = 0; -TUNABLE_INT("hw.cxgbe.interrupt_forwarding", &intr_fwd); -SYSCTL_UINT(_hw_cxgbe, OID_AUTO, interrupt_forwarding, CTLFLAG_RDTUN, - &intr_fwd, 0, "always use forwarded interrupts"); +static int intr_shared = 0; +TUNABLE_INT("hw.cxgbe.interrupts_shared", &intr_shared); +SYSCTL_UINT(_hw_cxgbe, OID_AUTO, interrupts_shared, CTLFLAG_RDTUN, + &intr_shared, 0, "interrupts shared between all ports"); static unsigned int filter_mode = HW_TPL_FR_MT_PR_IV_P_FC; TUNABLE_INT("hw.cxgbe.filter_mode", &filter_mode); @@ -228,7 +229,7 @@ SYSCTL_UINT(_hw_cxgbe, OID_AUTO, filter_mode, CTLFLAG_RDTUN, struct intrs_and_queues { int intr_type; /* INTx, MSI, or MSI-X */ int nirq; /* Number of vectors */ - int intr_fwd; /* Interrupts forwarded */ + int intr_shared; /* Interrupts shared between all ports */ int ntxq10g; /* # of NIC txq's for each 10G port */ int nrxq10g; /* # of NIC rxq's for each 10G port */ int ntxq1g; /* # of NIC txq's for each 1G port */ @@ -240,6 +241,7 @@ struct filter_entry { uint32_t locked:1; /* filter is administratively locked */ uint32_t pending:1; /* filter action is pending firmware reply */ uint32_t smtidx:8; /* Source MAC Table index for smac */ + struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ struct t4_filter_specification fs; }; @@ -269,6 +271,7 @@ static void setup_memwin(struct adapter *); static int cfg_itype_and_nqueues(struct adapter *, int, int, struct intrs_and_queues *); static int prep_firmware(struct adapter *); +static int get_devlog_params(struct adapter *, struct devlog_params *); static int get_capabilities(struct adapter *, struct fw_caps_config_cmd *); static int get_params(struct adapter *, struct fw_caps_config_cmd *); static void t4_set_desc(struct adapter *); @@ -295,19 +298,22 @@ static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS); static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS); +static int sysctl_devlog(SYSCTL_HANDLER_ARGS); static inline void txq_start(struct ifnet *, struct sge_txq *); static uint32_t fconf_to_mode(uint32_t); static uint32_t mode_to_fconf(uint32_t); static uint32_t fspec_to_fconf(struct t4_filter_specification *); static int get_filter_mode(struct adapter *, uint32_t *); static int set_filter_mode(struct adapter *, uint32_t); +static inline uint64_t get_filter_hits(struct adapter *, uint32_t); static int get_filter(struct adapter *, struct t4_filter *); static int set_filter(struct adapter *, struct t4_filter *); static int del_filter(struct adapter *, struct t4_filter *); -static void clear_filter(struct adapter *, struct filter_entry *); +static void clear_filter(struct filter_entry *); static int set_filter_wr(struct adapter *, int); static int del_filter_wr(struct adapter *, int); void filter_rpl(struct adapter *, const struct cpl_set_tcb_rpl *); +static int get_sge_context(struct adapter *, struct t4_sge_context *); static int t4_mod_event(module_t, int, void *); struct t4_pciids { @@ -400,6 +406,9 @@ t4_attach(device_t dev) if (rc != 0) goto done; /* error message displayed already */ + /* Read firmware devlog parameters */ + (void) get_devlog_params(sc, &sc->params.devlog); + /* Get device capabilities and select which ones we'll use */ rc = get_capabilities(sc, &caps); if (rc != 0) { @@ -484,6 +493,8 @@ t4_attach(device_t dev) V_RXTSHIFTMAXR2(15) | V_PERSHIFTBACKOFFMAX(8) | V_PERSHIFTMAX(8) | V_KEEPALIVEMAXR1(4) | V_KEEPALIVEMAXR2(9)); t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12)); + t4_set_reg_field(sc, A_TP_PARA_REG3, F_TUNNELCNGDROP0 | + F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 | F_TUNNELCNGDROP3, 0); setup_memwin(sc); @@ -514,8 +525,8 @@ t4_attach(device_t dev) device_printf(dev, "unable to initialize port %d: %d\n", i, rc); free(pi, M_CXGBE); - sc->port[i] = NULL; /* indicates init failed */ - continue; + sc->port[i] = NULL; + goto done; } snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d", @@ -582,15 +593,15 @@ t4_attach(device_t dev) s->nrxq = n10g * iaq.nrxq10g + n1g * iaq.nrxq1g; s->ntxq = n10g * iaq.ntxq10g + n1g * iaq.ntxq1g; s->neq = s->ntxq + s->nrxq; /* the free list in an rxq is an eq */ - s->neq += NCHAN; /* control queues, 1 per hw channel */ + s->neq += sc->params.nports; /* control queues, 1 per port */ s->niq = s->nrxq + 1; /* 1 extra for firmware event queue */ - if (iaq.intr_fwd) { - sc->flags |= INTR_FWD; - s->niq += NFIQ(sc); /* forwarded interrupt queues */ - s->fiq = malloc(NFIQ(sc) * sizeof(struct sge_iq), M_CXGBE, - M_ZERO | M_WAITOK); - } - s->ctrlq = malloc(NCHAN * sizeof(struct sge_ctrlq), M_CXGBE, + if (iaq.intr_shared) + sc->flags |= INTR_SHARED; + s->niq += NINTRQ(sc); /* interrupt queues */ + + s->intrq = malloc(NINTRQ(sc) * sizeof(struct sge_iq), M_CXGBE, + M_ZERO | M_WAITOK); + s->ctrlq = malloc(sc->params.nports * sizeof(struct sge_ctrlq), M_CXGBE, M_ZERO | M_WAITOK); s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE, M_ZERO | M_WAITOK); @@ -604,6 +615,8 @@ t4_attach(device_t dev) sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE, M_ZERO | M_WAITOK); + sc->l2t = t4_init_l2t(M_WAITOK); + t4_sysctls(sc); /* @@ -691,11 +704,14 @@ t4_detach(device_t dev) bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid, sc->msix_res); + if (sc->l2t) + t4_free_l2t(sc->l2t); + free(sc->irq, M_CXGBE); free(sc->sge.rxq, M_CXGBE); free(sc->sge.txq, M_CXGBE); free(sc->sge.ctrlq, M_CXGBE); - free(sc->sge.fiq, M_CXGBE); + free(sc->sge.intrq, M_CXGBE); free(sc->sge.iqmap, M_CXGBE); free(sc->sge.eqmap, M_CXGBE); free(sc->tids.ftid_tab, M_CXGBE); @@ -1231,33 +1247,32 @@ cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g, nrxq10g = min(nc, max_nrxq_10g); nrxq1g = min(nc, max_nrxq_1g); - /* Extra 2 is for a) error interrupt b) firmware event */ - iaq->nirq = n10g * nrxq10g + n1g * nrxq1g + 2; - if (iaq->nirq <= navail && intr_fwd == 0) { + iaq->nirq = n10g * nrxq10g + n1g * nrxq1g + T4_EXTRA_INTR; + if (iaq->nirq <= navail && intr_shared == 0) { if (itype == INTR_MSI && !powerof2(iaq->nirq)) - goto fwd; + goto share; /* One for err, one for fwq, and one for each rxq */ - iaq->intr_fwd = 0; + iaq->intr_shared = 0; iaq->nrxq10g = nrxq10g; iaq->nrxq1g = nrxq1g; } else { -fwd: - iaq->intr_fwd = 1; +share: + iaq->intr_shared = 1; - if (navail > nc) { + if (navail >= nc + T4_EXTRA_INTR) { if (itype == INTR_MSIX) - navail = nc + 1; + navail = nc + T4_EXTRA_INTR; /* navail is and must remain a pow2 for MSI */ if (itype == INTR_MSI) { KASSERT(powerof2(navail), ("%d not power of 2", navail)); - while (navail / 2 > nc) + while (navail / 2 >= nc + T4_EXTRA_INTR) navail /= 2; } } @@ -1290,7 +1305,7 @@ fwd: * the kernel is willing to allocate (it's in navail). */ pci_release_msi(sc->dev); - goto fwd; + goto share; } device_printf(sc->dev, @@ -1414,6 +1429,34 @@ prep_firmware(struct adapter *sc) } static int +get_devlog_params(struct adapter *sc, struct devlog_params *dlog) +{ + struct fw_devlog_cmd devlog_cmd; + uint32_t meminfo; + int rc; + + bzero(&devlog_cmd, sizeof(devlog_cmd)); + devlog_cmd.op_to_write = htobe32(V_FW_CMD_OP(FW_DEVLOG_CMD) | + F_FW_CMD_REQUEST | F_FW_CMD_READ); + devlog_cmd.retval_len16 = htobe32(FW_LEN16(devlog_cmd)); + rc = -t4_wr_mbox(sc, sc->mbox, &devlog_cmd, sizeof(devlog_cmd), + &devlog_cmd); + if (rc != 0) { + device_printf(sc->dev, + "failed to get devlog parameters: %d.\n", rc); + bzero(dlog, sizeof (*dlog)); + return (rc); + } + + meminfo = be32toh(devlog_cmd.memtype_devlog_memaddr16_devlog); + dlog->memtype = G_FW_DEVLOG_CMD_MEMTYPE_DEVLOG(meminfo); + dlog->start = G_FW_DEVLOG_CMD_MEMADDR16_DEVLOG(meminfo) << 4; + dlog->size = be32toh(devlog_cmd.memsize_devlog); + + return (0); +} + +static int get_capabilities(struct adapter *sc, struct fw_caps_config_cmd *caps) { int rc; @@ -1923,16 +1966,18 @@ cxgbe_uninit_synchronized(struct port_info *pi) return (0); } -#define T4_ALLOC_IRQ(sc, irqid, rid, handler, arg, name) do { \ - rc = t4_alloc_irq(sc, &sc->irq[irqid], rid, handler, arg, name); \ +#define T4_ALLOC_IRQ(sc, irq, rid, handler, arg, name) do { \ + rc = t4_alloc_irq(sc, irq, rid, handler, arg, name); \ if (rc != 0) \ goto done; \ } while (0) static int first_port_up(struct adapter *sc) { - int rc, i; - char name[8]; + int rc, i, rid, p, q; + char s[8]; + struct irq *irq; + struct sge_iq *intrq; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); @@ -1946,39 +1991,52 @@ first_port_up(struct adapter *sc) /* * Setup interrupts. */ + irq = &sc->irq[0]; + rid = sc->intr_type == INTR_INTX ? 0 : 1; if (sc->intr_count == 1) { - KASSERT(sc->flags & INTR_FWD, - ("%s: single interrupt but not forwarded?", __func__)); - T4_ALLOC_IRQ(sc, 0, 0, t4_intr_all, sc, "all"); + KASSERT(sc->flags & INTR_SHARED, + ("%s: single interrupt but not shared?", __func__)); + + T4_ALLOC_IRQ(sc, irq, rid, t4_intr_all, sc, "all"); } else { /* Multiple interrupts. The first one is always error intr */ - T4_ALLOC_IRQ(sc, 0, 1, t4_intr_err, sc, "err"); - - if (sc->flags & INTR_FWD) { - /* The rest are shared by the fwq and all data intr */ - for (i = 1; i < sc->intr_count; i++) { - snprintf(name, sizeof(name), "mux%d", i - 1); - T4_ALLOC_IRQ(sc, i, i + 1, t4_intr_fwd, - &sc->sge.fiq[i - 1], name); + T4_ALLOC_IRQ(sc, irq, rid, t4_intr_err, sc, "err"); + irq++; + rid++; + + /* Firmware event queue normally has an interrupt of its own */ + if (sc->intr_count > T4_EXTRA_INTR) { + T4_ALLOC_IRQ(sc, irq, rid, t4_intr_evt, &sc->sge.fwq, + "evt"); + irq++; + rid++; + } + + intrq = &sc->sge.intrq[0]; + if (sc->flags & INTR_SHARED) { + + /* All ports share these interrupt queues */ + + for (i = 0; i < NINTRQ(sc); i++) { + snprintf(s, sizeof(s), "*.%d", i); + T4_ALLOC_IRQ(sc, irq, rid, t4_intr, intrq, s); + irq++; + rid++; + intrq++; } } else { - struct port_info *pi; - int p, q; - T4_ALLOC_IRQ(sc, 1, 2, t4_intr_evt, &sc->sge.fwq, - "evt"); + /* Each port has its own set of interrupt queues */ - p = q = 0; - pi = sc->port[p]; - for (i = 2; i < sc->intr_count; i++) { - snprintf(name, sizeof(name), "p%dq%d", p, q); - if (++q >= pi->nrxq) { - p++; - q = 0; - pi = sc->port[p]; + for (p = 0; p < sc->params.nports; p++) { + for (q = 0; q < sc->port[p]->nrxq; q++) { + snprintf(s, sizeof(s), "%d.%d", p, q); + T4_ALLOC_IRQ(sc, irq, rid, t4_intr, + intrq, s); + irq++; + rid++; + intrq++; } - T4_ALLOC_IRQ(sc, i, i + 1, t4_intr_data, - &sc->sge.rxq[i - 2], name); } } } @@ -2366,6 +2424,10 @@ t4_sysctls(struct adapter *sc) CTLTYPE_STRING | CTLFLAG_RD, &intr_pktcount, sizeof(intr_pktcount), sysctl_int_array, "A", "interrupt holdoff packet counter values"); + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog", + CTLTYPE_STRING | CTLFLAG_RD, sc, 0, + sysctl_devlog, "A", "device log"); + return (0); } @@ -2709,6 +2771,120 @@ sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS) return (sysctl_handle_64(oidp, &val, 0, req)); } +const char *devlog_level_strings[] = { + [FW_DEVLOG_LEVEL_EMERG] = "EMERG", + [FW_DEVLOG_LEVEL_CRIT] = "CRIT", + [FW_DEVLOG_LEVEL_ERR] = "ERR", + [FW_DEVLOG_LEVEL_NOTICE] = "NOTICE", + [FW_DEVLOG_LEVEL_INFO] = "INFO", + [FW_DEVLOG_LEVEL_DEBUG] = "DEBUG" +}; + +const char *devlog_facility_strings[] = { + [FW_DEVLOG_FACILITY_CORE] = "CORE", + [FW_DEVLOG_FACILITY_SCHED] = "SCHED", + [FW_DEVLOG_FACILITY_TIMER] = "TIMER", + [FW_DEVLOG_FACILITY_RES] = "RES", + [FW_DEVLOG_FACILITY_HW] = "HW", + [FW_DEVLOG_FACILITY_FLR] = "FLR", + [FW_DEVLOG_FACILITY_DMAQ] = "DMAQ", + [FW_DEVLOG_FACILITY_PHY] = "PHY", + [FW_DEVLOG_FACILITY_MAC] = "MAC", + [FW_DEVLOG_FACILITY_PORT] = "PORT", + [FW_DEVLOG_FACILITY_VI] = "VI", + [FW_DEVLOG_FACILITY_FILTER] = "FILTER", + [FW_DEVLOG_FACILITY_ACL] = "ACL", + [FW_DEVLOG_FACILITY_TM] = "TM", + [FW_DEVLOG_FACILITY_QFC] = "QFC", + [FW_DEVLOG_FACILITY_DCB] = "DCB", + [FW_DEVLOG_FACILITY_ETH] = "ETH", + [FW_DEVLOG_FACILITY_OFLD] = "OFLD", + [FW_DEVLOG_FACILITY_RI] = "RI", + [FW_DEVLOG_FACILITY_ISCSI] = "ISCSI", + [FW_DEVLOG_FACILITY_FCOE] = "FCOE", + [FW_DEVLOG_FACILITY_FOISCSI] = "FOISCSI", + [FW_DEVLOG_FACILITY_FOFCOE] = "FOFCOE" +}; + +static int +sysctl_devlog(SYSCTL_HANDLER_ARGS) +{ + struct adapter *sc = arg1; + struct devlog_params *dparams = &sc->params.devlog; + struct fw_devlog_e *buf, *e; + int i, j, rc, nentries, first = 0; + struct sbuf *sb; + uint64_t ftstamp = UINT64_MAX; + + if (dparams->start == 0) + return (ENXIO); + + nentries = dparams->size / sizeof(struct fw_devlog_e); + + buf = malloc(dparams->size, M_CXGBE, M_NOWAIT); + if (buf == NULL) + return (ENOMEM); + + rc = -t4_mem_read(sc, dparams->memtype, dparams->start, dparams->size, + (void *)buf); + if (rc != 0) + goto done; + + for (i = 0; i < nentries; i++) { + e = &buf[i]; + + if (e->timestamp == 0) + break; /* end */ + + e->timestamp = be64toh(e->timestamp); + e->seqno = be32toh(e->seqno); + for (j = 0; j < 8; j++) + e->params[j] = be32toh(e->params[j]); + + if (e->timestamp < ftstamp) { + ftstamp = e->timestamp; + first = i; + } + } + + if (buf[first].timestamp == 0) + goto done; /* nothing in the log */ + + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + goto done; + + sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); + sbuf_printf(sb, "\n%10s %15s %8s %8s %s\n", + "Seq#", "Tstamp", "Level", "Facility", "Message"); + + i = first; + do { + e = &buf[i]; + if (e->timestamp == 0) + break; /* end */ + + sbuf_printf(sb, "%10d %15ju %8s %8s ", + e->seqno, e->timestamp, + (e->level < ARRAY_SIZE(devlog_level_strings) ? + devlog_level_strings[e->level] : "UNKNOWN"), + (e->facility < ARRAY_SIZE(devlog_facility_strings) ? + devlog_facility_strings[e->facility] : "UNKNOWN")); + sbuf_printf(sb, e->fmt, e->params[0], e->params[1], + e->params[2], e->params[3], e->params[4], + e->params[5], e->params[6], e->params[7]); + + if (++i == nentries) + i = 0; + } while (i != first); + + rc = sbuf_finish(sb); + sbuf_delete(sb); +done: + free(buf, M_CXGBE); + return (rc); +} + static inline void txq_start(struct ifnet *ifp, struct sge_txq *txq) { @@ -2892,6 +3068,20 @@ done: return (rc); } +static inline uint64_t +get_filter_hits(struct adapter *sc, uint32_t fid) +{ + uint32_t tcb_base = t4_read_reg(sc, A_TP_CMM_TCB_BASE); + uint64_t hits; + + t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 0), + tcb_base + (fid + sc->tids.ftid_base) * TCB_SIZE); + t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 0)); + hits = t4_read_reg64(sc, MEMWIN0_BASE + 16); + + return (be64toh(hits)); +} + static int get_filter(struct adapter *sc, struct t4_filter *t) { @@ -2913,8 +3103,13 @@ get_filter(struct adapter *sc, struct t4_filter *t) for (i = t->idx; i < nfilters; i++, f++) { if (f->valid) { t->idx = i; + t->l2tidx = f->l2t ? f->l2t->idx : 0; + t->smtidx = f->smtidx; + if (f->fs.hitcnts) + t->hits = get_filter_hits(sc, t->idx); + else + t->hits = UINT64_MAX; t->fs = f->fs; - t->hits = 0; /* XXX implement */ return (0); } @@ -3034,11 +3229,12 @@ del_filter(struct adapter *sc, struct t4_filter *t) return (0); } -/* XXX: L2T */ static void -clear_filter(struct adapter *sc, struct filter_entry *f) +clear_filter(struct filter_entry *f) { - (void) sc; + if (f->l2t) + t4_l2t_release(f->l2t); + bzero(f, sizeof (*f)); } @@ -3053,8 +3249,18 @@ set_filter_wr(struct adapter *sc, int fidx) ADAPTER_LOCK_ASSERT_OWNED(sc); - if (f->fs.newdmac || f->fs.newvlan) - return (ENOTSUP); /* XXX: fix after L2T code */ + if (f->fs.newdmac || f->fs.newvlan) { + /* This filter needs an L2T entry; allocate one. */ + f->l2t = t4_l2t_alloc_switching(sc->l2t); + if (f->l2t == NULL) + return (EAGAIN); + if (t4_l2t_set_switching(sc, f->l2t, f->fs.vlan, f->fs.eport, + f->fs.dmac)) { + t4_l2t_release(f->l2t); + f->l2t = NULL; + return (ENOMEM); + } + } ftid = sc->tids.ftid_base + fidx; @@ -3089,7 +3295,7 @@ set_filter_wr(struct adapter *sc, int fidx) V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) | V_FW_FILTER_WR_TXCHAN(f->fs.eport) | V_FW_FILTER_WR_PRIO(f->fs.prio) | - V_FW_FILTER_WR_L2TIX(0)); /* XXX: L2T */ + V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0)); fwr->ethtype = htobe16(f->fs.val.ethtype); fwr->ethtypem = htobe16(f->fs.mask.ethtype); fwr->frag_to_ovlan_vldm = @@ -3101,7 +3307,7 @@ set_filter_wr(struct adapter *sc, int fidx) V_FW_FILTER_WR_OVLAN_VLDM(f->fs.mask.ovlan_vld)); fwr->smac_sel = 0; fwr->rx_chan_rx_rpl_iq = htobe16(V_FW_FILTER_WR_RX_CHAN(0) | - V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.fwq.abs_id)); + V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.intrq[0].abs_id)); fwr->maci_to_matchtypem = htobe32(V_FW_FILTER_WR_MACI(f->fs.val.macidx) | V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) | @@ -3136,7 +3342,7 @@ set_filter_wr(struct adapter *sc, int fidx) if (rc != 0) { sc->tids.ftids_in_use--; m_freem(m); - clear_filter(sc, f); + clear_filter(f); } return (rc); } @@ -3161,7 +3367,7 @@ del_filter_wr(struct adapter *sc, int fidx) m->m_len = m->m_pkthdr.len = sizeof(*fwr); bzero(fwr, sizeof (*fwr)); - t4_mk_filtdelwr(ftid, fwr, sc->sge.fwq.abs_id); + t4_mk_filtdelwr(ftid, fwr, sc->sge.intrq[0].abs_id); f->pending = 1; rc = t4_mgmt_tx(sc, m); @@ -3188,12 +3394,12 @@ filter_rpl(struct adapter *sc, const struct cpl_set_tcb_rpl *rpl) * Clear the filter when we get confirmation from the * hardware that the filter has been deleted. */ - clear_filter(sc, f); + clear_filter(f); sc->tids.ftids_in_use--; } else if (rc == FW_FILTER_WR_SMT_TBL_FULL) { device_printf(sc->dev, "filter %u setup failed due to full SMT\n", idx); - clear_filter(sc, f); + clear_filter(f); sc->tids.ftids_in_use--; } else if (rc == FW_FILTER_WR_FLT_ADDED) { f->smtidx = (be64toh(rpl->oldval) >> 24) & 0xff; @@ -3206,12 +3412,41 @@ filter_rpl(struct adapter *sc, const struct cpl_set_tcb_rpl *rpl) */ device_printf(sc->dev, "filter %u setup failed with error %u\n", idx, rc); - clear_filter(sc, f); + clear_filter(f); sc->tids.ftids_in_use--; } } } +static int +get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt) +{ + int rc = EINVAL; + + if (cntxt->cid > M_CTXTQID) + return (rc); + + if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS && + cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM) + return (rc); + + if (sc->flags & FW_OK) { + ADAPTER_LOCK(sc); /* Avoid parallel t4_wr_mbox */ + rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id, + &cntxt->data[0]); + ADAPTER_UNLOCK(sc); + } + + if (rc != 0) { + /* Read via firmware failed or wasn't even attempted */ + + rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, + &cntxt->data[0]); + } + + return (rc); +} + int t4_os_find_pci_capability(struct adapter *sc, int cap) { @@ -3375,6 +3610,9 @@ t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, rc = del_filter(sc, (struct t4_filter *)data); ADAPTER_UNLOCK(sc); break; + case CHELSIO_T4_GET_SGE_CONTEXT: + rc = get_sge_context(sc, (struct t4_sge_context *)data); + break; default: rc = EINVAL; } diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c index a0ef172..b676799 100644 --- a/sys/dev/cxgbe/t4_sge.c +++ b/sys/dev/cxgbe/t4_sge.c @@ -91,6 +91,8 @@ struct sgl { bus_dma_segment_t seg[TX_SGL_SEGS]; }; +static void t4_evt_rx(void *); +static void t4_eth_rx(void *); static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int, int, iq_intr_handler_t *, char *); static inline void init_fl(struct sge_fl *, int, char *); @@ -102,8 +104,10 @@ static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t, static int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *, int, int); static int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *); -static int alloc_iq(struct sge_iq *, int); -static int free_iq(struct sge_iq *); +static int alloc_intrq(struct adapter *, int, int, int); +static int free_intrq(struct sge_iq *); +static int alloc_fwq(struct adapter *, int); +static int free_fwq(struct sge_iq *); static int alloc_rxq(struct port_info *, struct sge_rxq *, int, int); static int free_rxq(struct port_info *, struct sge_rxq *); static int alloc_ctrlq(struct adapter *, struct sge_ctrlq *, int); @@ -139,9 +143,10 @@ static void write_eqflush_wr(struct sge_eq *); static __be64 get_flit(bus_dma_segment_t *, int, int); static int handle_sge_egr_update(struct adapter *, const struct cpl_sge_egr_update *); +static void handle_cpl(struct adapter *, struct sge_iq *); static int ctrl_tx(struct adapter *, struct sge_ctrlq *, struct mbuf *); -static int sysctl_abs_id(SYSCTL_HANDLER_ARGS); +static int sysctl_uint16(SYSCTL_HANDLER_ARGS); extern void filter_rpl(struct adapter *, const struct cpl_set_tcb_rpl *); @@ -198,6 +203,9 @@ t4_sge_init(struct adapter *sc) FL_BUF_SIZE(i)); } + i = t4_read_reg(sc, A_SGE_CONM_CTRL); + s->fl_starve_threshold = G_EGRTHRESHOLD(i) * 2 + 1; + t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, V_THRESHOLD_0(s->counter_val[0]) | V_THRESHOLD_1(s->counter_val[1]) | @@ -243,8 +251,7 @@ t4_destroy_dma_tag(struct adapter *sc) /* * Allocate and initialize the firmware event queue, control queues, and the - * forwarded interrupt queues (if any). The adapter owns all these queues as - * they are not associated with any particular port. + * interrupt queues. The adapter owns all of these queues. * * Returns errno on failure. Resources allocated up to that point may still be * allocated. Caller is responsible for cleanup in case this function fails. @@ -252,8 +259,8 @@ t4_destroy_dma_tag(struct adapter *sc) int t4_setup_adapter_queues(struct adapter *sc) { - int i, rc; - struct sge_iq *iq, *fwq; + int i, j, rc, intr_idx, qsize; + struct sge_iq *iq; struct sge_ctrlq *ctrlq; iq_intr_handler_t *handler; char name[16]; @@ -264,47 +271,76 @@ t4_setup_adapter_queues(struct adapter *sc) struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); + sc->oid_fwq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, + "fwq", CTLFLAG_RD, NULL, "firmware event queue"); sc->oid_ctrlq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD, NULL, "ctrl queues"); + sc->oid_intrq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, + "intrq", CTLFLAG_RD, NULL, "interrupt queues"); } - fwq = &sc->sge.fwq; - if (sc->flags & INTR_FWD) { - iq = &sc->sge.fiq[0]; - - /* - * Forwarded interrupt queues - allocate 1 if there's only 1 - * vector available, one less than the number of vectors - * otherwise (the first vector is reserved for the error - * interrupt in that case). - */ - i = sc->intr_count > 1 ? 1 : 0; - for (; i < sc->intr_count; i++, iq++) { - - snprintf(name, sizeof(name), "%s fiq%d", + /* + * Interrupt queues + */ + intr_idx = sc->intr_count - NINTRQ(sc); + if (sc->flags & INTR_SHARED) { + qsize = max((sc->sge.nrxq + 1) * 2, INTR_IQ_QSIZE); + for (i = 0; i < NINTRQ(sc); i++, intr_idx++) { + snprintf(name, sizeof(name), "%s intrq%d", device_get_nameunit(sc->dev), i); - init_iq(iq, sc, 0, 0, (sc->sge.nrxq + 1) * 2, 16, NULL, - name); - rc = alloc_iq(iq, i); + iq = &sc->sge.intrq[i]; + init_iq(iq, sc, 0, 0, qsize, INTR_IQ_ESIZE, NULL, name); + rc = alloc_intrq(sc, i % sc->params.nports, i, + intr_idx); + if (rc != 0) { device_printf(sc->dev, - "failed to create fwd intr queue %d: %d\n", - i, rc); + "failed to create %s: %d\n", name, rc); return (rc); } } - - handler = t4_evt_rx; - i = 0; /* forward fwq's interrupt to the first fiq */ } else { - handler = NULL; - i = 1; /* fwq should use vector 1 (0 is used by error) */ + int qidx = 0; + struct port_info *pi; + + for (i = 0; i < sc->params.nports; i++) { + pi = sc->port[i]; + qsize = max((pi->nrxq + 1) * 2, INTR_IQ_QSIZE); + for (j = 0; j < pi->nrxq; j++, qidx++, intr_idx++) { + snprintf(name, sizeof(name), "%s intrq%d", + device_get_nameunit(pi->dev), j); + + iq = &sc->sge.intrq[qidx]; + init_iq(iq, sc, 0, 0, qsize, INTR_IQ_ESIZE, + NULL, name); + rc = alloc_intrq(sc, i, qidx, intr_idx); + + if (rc != 0) { + device_printf(sc->dev, + "failed to create %s: %d\n", + name, rc); + return (rc); + } + } + } } + /* + * Firmware event queue + */ snprintf(name, sizeof(name), "%s fwq", device_get_nameunit(sc->dev)); - init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE, handler, name); - rc = alloc_iq(fwq, i); + if (sc->intr_count > T4_EXTRA_INTR) { + handler = NULL; + intr_idx = 1; + } else { + handler = t4_evt_rx; + intr_idx = 0; + } + + iq = &sc->sge.fwq; + init_iq(iq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE, handler, name); + rc = alloc_fwq(sc, intr_idx); if (rc != 0) { device_printf(sc->dev, "failed to create firmware event queue: %d\n", rc); @@ -313,10 +349,10 @@ t4_setup_adapter_queues(struct adapter *sc) } /* - * Control queues - one per hardware channel. + * Control queues - one per port. */ ctrlq = &sc->sge.ctrlq[0]; - for (i = 0; i < NCHAN; i++, ctrlq++) { + for (i = 0; i < sc->params.nports; i++, ctrlq++) { snprintf(name, sizeof(name), "%s ctrlq%d", device_get_nameunit(sc->dev), i); init_eq(&ctrlq->eq, CTRL_EQ_QSIZE, name); @@ -344,21 +380,22 @@ t4_teardown_adapter_queues(struct adapter *sc) ADAPTER_LOCK_ASSERT_NOTOWNED(sc); /* Do this before freeing the queues */ - if (sc->oid_ctrlq) { + if (sc->oid_fwq || sc->oid_ctrlq || sc->oid_intrq) { sysctl_ctx_free(&sc->ctx); + sc->oid_fwq = NULL; sc->oid_ctrlq = NULL; + sc->oid_intrq = NULL; } - for (i = 0; i < NCHAN; i++) + for (i = 0; i < sc->params.nports; i++) free_ctrlq(sc, &sc->sge.ctrlq[i]); iq = &sc->sge.fwq; - free_iq(iq); - if (sc->flags & INTR_FWD) { - for (i = 0; i < NFIQ(sc); i++) { - iq = &sc->sge.fiq[i]; - free_iq(iq); - } + free_fwq(iq); + + for (i = 0; i < NINTRQ(sc); i++) { + iq = &sc->sge.intrq[i]; + free_intrq(iq); } return (0); @@ -388,23 +425,19 @@ t4_setup_eth_queues(struct port_info *pi) snprintf(name, sizeof(name), "%s rxq%d-iq", device_get_nameunit(pi->dev), i); init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, - pi->qsize_rxq, RX_IQ_ESIZE, - sc->flags & INTR_FWD ? t4_eth_rx : NULL, name); + pi->qsize_rxq, RX_IQ_ESIZE, t4_eth_rx, name); snprintf(name, sizeof(name), "%s rxq%d-fl", device_get_nameunit(pi->dev), i); init_fl(&rxq->fl, pi->qsize_rxq / 8, name); - if (sc->flags & INTR_FWD) - intr_idx = (pi->first_rxq + i) % NFIQ(sc); - else - intr_idx = pi->first_rxq + i + 2; + intr_idx = pi->first_rxq + i; + if (sc->flags & INTR_SHARED) + intr_idx %= NINTRQ(sc); rc = alloc_rxq(pi, rxq, intr_idx, i); if (rc != 0) goto done; - - intr_idx++; } for_each_txq(pi, i, txq) { @@ -452,25 +485,26 @@ t4_teardown_eth_queues(struct port_info *pi) return (0); } -/* Deals with errors and forwarded interrupts */ +/* Deals with errors and the first (and only) interrupt queue */ void t4_intr_all(void *arg) { struct adapter *sc = arg; t4_intr_err(arg); - t4_intr_fwd(&sc->sge.fiq[0]); + t4_intr(&sc->sge.intrq[0]); } -/* Deals with forwarded interrupts on the given ingress queue */ +/* Deals with interrupts, and a few CPLs, on the given interrupt queue */ void -t4_intr_fwd(void *arg) +t4_intr(void *arg) { struct sge_iq *iq = arg, *q; struct adapter *sc = iq->adapter; struct rsp_ctrl *ctrl; + const struct rss_header *rss; int ndesc_pending = 0, ndesc_total = 0; - int qid; + int qid, rsp_type; if (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY)) return; @@ -479,17 +513,23 @@ t4_intr_fwd(void *arg) rmb(); - /* Only interrupt muxing expected on this queue */ - KASSERT(G_RSPD_TYPE(ctrl->u.type_gen) == X_RSPD_TYPE_INTR, - ("unexpected event on forwarded interrupt queue: %x", - G_RSPD_TYPE(ctrl->u.type_gen))); + rss = (const void *)iq->cdesc; + rsp_type = G_RSPD_TYPE(ctrl->u.type_gen); + + if (__predict_false(rsp_type == X_RSPD_TYPE_CPL)) { + handle_cpl(sc, iq); + goto nextdesc; + } qid = ntohl(ctrl->pldbuflen_qid) - sc->sge.iq_start; q = sc->sge.iqmap[qid]; - q->handler(q); + if (atomic_cmpset_32(&q->state, IQS_IDLE, IQS_BUSY)) { + q->handler(q); + atomic_cmpset_32(&q->state, IQS_BUSY, IQS_IDLE); + } - ndesc_total++; +nextdesc: ndesc_total++; if (++ndesc_pending >= iq->qsize / 4) { t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndesc_pending) | @@ -514,9 +554,7 @@ t4_intr_err(void *arg) { struct adapter *sc = arg; - if (sc->intr_type == INTR_INTX) - t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); - + t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); t4_slow_intr_handler(sc); } @@ -526,70 +564,32 @@ t4_intr_evt(void *arg) { struct sge_iq *iq = arg; - if (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY)) - return; - - t4_evt_rx(arg); - - atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE); -} - -void -t4_intr_data(void *arg) -{ - struct sge_iq *iq = arg; - - if (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY)) - return; - - t4_eth_rx(arg); - - atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE); + if (atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY)) { + t4_evt_rx(arg); + atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE); + } } -void +static void t4_evt_rx(void *arg) { struct sge_iq *iq = arg; struct adapter *sc = iq->adapter; struct rsp_ctrl *ctrl; - const struct rss_header *rss; int ndesc_pending = 0, ndesc_total = 0; KASSERT(iq == &sc->sge.fwq, ("%s: unexpected ingress queue", __func__)); while (is_new_response(iq, &ctrl)) { + int rsp_type; rmb(); - rss = (const void *)iq->cdesc; - - /* Should only get CPL on this queue */ - KASSERT(G_RSPD_TYPE(ctrl->u.type_gen) == X_RSPD_TYPE_CPL, - ("%s: unexpected type %d", __func__, - G_RSPD_TYPE(ctrl->u.type_gen))); + rsp_type = G_RSPD_TYPE(ctrl->u.type_gen); + if (__predict_false(rsp_type != X_RSPD_TYPE_CPL)) + panic("%s: unexpected rsp_type %d", __func__, rsp_type); - switch (rss->opcode) { - case CPL_FW4_MSG: - case CPL_FW6_MSG: { - const struct cpl_fw6_msg *cpl; - - cpl = (const void *)(rss + 1); - if (cpl->type == FW6_TYPE_CMD_RPL) - t4_handle_fw_rpl(sc, cpl->data); - - break; - } - case CPL_SGE_EGR_UPDATE: - handle_sge_egr_update(sc, (const void *)(rss + 1)); - break; - case CPL_SET_TCB_RPL: - filter_rpl(sc, (const void *) (rss + 1)); - break; - default: - device_printf(sc->dev, - "can't handle CPL opcode %d.", rss->opcode); - } + handle_cpl(sc, iq); ndesc_total++; if (++ndesc_pending >= iq->qsize / 4) { @@ -600,6 +600,7 @@ t4_evt_rx(void *arg) V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); ndesc_pending = 0; } + iq_next(iq); } @@ -613,7 +614,7 @@ t4_evt_rx(void *arg) #define RX_COPY_THRESHOLD MINCLSIZE #endif -void +static void t4_eth_rx(void *arg) { struct sge_rxq *rxq = arg; @@ -644,17 +645,9 @@ t4_eth_rx(void *arg) rss = (const void *)iq->cdesc; i = G_RSPD_TYPE(ctrl->u.type_gen); - if (__predict_false(i == X_RSPD_TYPE_CPL)) { - - /* Can't be anything except an egress update */ - KASSERT(rss->opcode == CPL_SGE_EGR_UPDATE, - ("%s: unexpected CPL %x", __func__, rss->opcode)); - - handle_sge_egr_update(sc, (const void *)(rss + 1)); - goto nextdesc; - } KASSERT(i == X_RSPD_TYPE_FLBUF && rss->opcode == CPL_RX_PKT, - ("%s: unexpected CPL %x rsp %d", __func__, rss->opcode, i)); + ("%s: unexpected type %d CPL opcode 0x%x", + __func__, i, rss->opcode)); sd_next = sd + 1; if (__predict_false(fl->cidx + 1 == fl->cap)) @@ -786,16 +779,15 @@ t4_eth_rx(void *arg) refill_fl(sc, fl, 64, 32); FL_UNLOCK(fl); -nextdesc: ndescs++; - iq_next(iq); - - if (ndescs > 32) { + if (++ndescs > 32) { t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) | V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); ndescs = 0; } + + iq_next(iq); } #ifdef INET @@ -1008,7 +1000,7 @@ t4_update_fl_bufsize(struct ifnet *ifp) /* * A non-NULL handler indicates this iq will not receive direct interrupts, the - * handler will be invoked by a forwarded interrupt queue. + * handler will be invoked by an interrupt queue. */ static inline void init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, @@ -1100,7 +1092,7 @@ free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, * * If the ingress queue will take interrupts directly (iq->handler == NULL) then * the intr_idx specifies the vector, starting from 0. Otherwise it specifies - * the index of the queue to which its interrupts will be forwarded. + * the index of the interrupt queue to which its interrupts will be forwarded. */ static int alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, @@ -1112,10 +1104,6 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, struct adapter *sc = iq->adapter; __be32 v = 0; - /* The adapter queues are nominally allocated in port[0]'s name */ - if (pi == NULL) - pi = sc->port[0]; - len = iq->qsize * iq->esize; rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba, (void **)&iq->desc); @@ -1135,10 +1123,10 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, v |= F_FW_IQ_CMD_IQASYNCH; if (iq->handler) { - KASSERT(intr_idx < NFIQ(sc), + KASSERT(intr_idx < NINTRQ(sc), ("%s: invalid indirect intr_idx %d", __func__, intr_idx)); v |= F_FW_IQ_CMD_IQANDST; - v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.fiq[intr_idx].abs_id); + v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.intrq[intr_idx].abs_id); } else { KASSERT(intr_idx < sc->intr_count, ("%s: invalid direct intr_idx %d", __func__, intr_idx)); @@ -1248,7 +1236,8 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, sc->sge.eqmap[cntxt_id] = (void *)fl; FL_LOCK(fl); - refill_fl(sc, fl, -1, 8); + /* Just enough to make sure it doesn't starve right away. */ + refill_fl(sc, fl, roundup(sc->sge.fl_starve_threshold, 8), 8); FL_UNLOCK(fl); } @@ -1333,13 +1322,67 @@ free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl) } static int -alloc_iq(struct sge_iq *iq, int intr_idx) +alloc_intrq(struct adapter *sc, int port_idx, int intrq_idx, int intr_idx) +{ + int rc; + struct sysctl_oid *oid; + struct sysctl_oid_list *children; + char name[16]; + struct sge_iq *intrq = &sc->sge.intrq[intrq_idx]; + + rc = alloc_iq_fl(sc->port[port_idx], intrq, NULL, intr_idx, -1); + if (rc != 0) + return (rc); + + children = SYSCTL_CHILDREN(sc->oid_intrq); + + snprintf(name, sizeof(name), "%d", intrq_idx); + oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, name, CTLFLAG_RD, + NULL, "interrupt queue"); + children = SYSCTL_CHILDREN(oid); + + SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx", + CTLTYPE_INT | CTLFLAG_RD, &intrq->cidx, 0, sysctl_uint16, "I", + "consumer index"); + + return (rc); +} + +static int +free_intrq(struct sge_iq *iq) +{ + return free_iq_fl(NULL, iq, NULL); + +} + +static int +alloc_fwq(struct adapter *sc, int intr_idx) { - return alloc_iq_fl(NULL, iq, NULL, intr_idx, -1); + int rc; + struct sysctl_oid_list *children; + struct sge_iq *fwq = &sc->sge.fwq; + + rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1); + if (rc != 0) + return (rc); + + children = SYSCTL_CHILDREN(sc->oid_fwq); + + SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "abs_id", + CTLTYPE_INT | CTLFLAG_RD, &fwq->abs_id, 0, sysctl_uint16, "I", + "absolute id of the queue"); + SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cntxt_id", + CTLTYPE_INT | CTLFLAG_RD, &fwq->cntxt_id, 0, sysctl_uint16, "I", + "SGE context id of the queue"); + SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx", + CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I", + "consumer index"); + + return (rc); } static int -free_iq(struct sge_iq *iq) +free_fwq(struct sge_iq *iq) { return free_iq_fl(NULL, iq, NULL); } @@ -1356,6 +1399,10 @@ alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx) if (rc != 0) return (rc); + FL_LOCK(&rxq->fl); + refill_fl(pi->adapter, &rxq->fl, rxq->fl.needed / 8, 8); + FL_UNLOCK(&rxq->fl); + #ifdef INET rc = tcp_lro_init(&rxq->lro); if (rc != 0) @@ -1375,8 +1422,14 @@ alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx) children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id", - CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.abs_id, 0, sysctl_abs_id, "I", + CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.abs_id, 0, sysctl_uint16, "I", "absolute id of the queue"); + SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id", + CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cntxt_id, 0, sysctl_uint16, "I", + "SGE context id of the queue"); + SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx", + CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cidx, 0, sysctl_uint16, "I", + "consumer index"); #ifdef INET SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, &rxq->lro.lro_queued, 0, NULL); @@ -1389,6 +1442,19 @@ alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx) CTLFLAG_RD, &rxq->vlan_extraction, "# of times hardware extracted 802.1Q tag"); + children = SYSCTL_CHILDREN(oid); + oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD, + NULL, "freelist"); + children = SYSCTL_CHILDREN(oid); + + SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id", + CTLTYPE_INT | CTLFLAG_RD, &rxq->fl.cntxt_id, 0, sysctl_uint16, "I", + "SGE context id of the queue"); + SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, + &rxq->fl.cidx, 0, "consumer index"); + SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, + &rxq->fl.pidx, 0, "producer index"); + return (rc); } @@ -1433,7 +1499,10 @@ alloc_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq, int idx) eq->cap = eq->qsize - SPG_LEN / CTRL_EQ_ESIZE; eq->spg = (void *)&eq->desc[eq->cap]; eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */ - eq->iqid = sc->sge.fwq.cntxt_id; + if (sc->flags & INTR_SHARED) + eq->iqid = sc->sge.intrq[idx % NINTRQ(sc)].cntxt_id; + else + eq->iqid = sc->sge.intrq[sc->port[idx]->first_rxq].cntxt_id; bzero(&c, sizeof(c)); @@ -1446,8 +1515,8 @@ alloc_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq, int idx) c.physeqid_pkd = htobe32(0); c.fetchszm_to_iqid = htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | - V_FW_EQ_CTRL_CMD_PCIECHN(idx) | F_FW_EQ_CTRL_CMD_FETCHRO | - V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); + V_FW_EQ_CTRL_CMD_PCIECHN(sc->port[idx]->tx_chan) | + F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); c.dcaen_to_eqsize = htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) | V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) | @@ -1479,13 +1548,12 @@ alloc_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq, int idx) NULL, "ctrl queue"); children = SYSCTL_CHILDREN(oid); - SYSCTL_ADD_UQUAD(&sc->ctx, children, OID_AUTO, "total_wrs", CTLFLAG_RD, - &ctrlq->total_wrs, "total # of work requests"); + SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "pidx", + CTLTYPE_INT | CTLFLAG_RD, &ctrlq->eq.pidx, 0, sysctl_uint16, "I", + "producer index"); SYSCTL_ADD_UINT(&sc->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD, &ctrlq->no_desc, 0, "# of times ctrlq ran out of hardware descriptors"); - SYSCTL_ADD_UINT(&sc->ctx, children, OID_AUTO, "too_long", CTLFLAG_RD, - &ctrlq->too_long, 0, "# of oversized work requests"); return (rc); } @@ -1526,6 +1594,7 @@ alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx) char name[16]; struct sysctl_oid *oid; struct sysctl_oid_list *children; + struct sge_iq *intrq; txq->ifp = pi->ifp; TASK_INIT(&txq->resume_tx, 0, cxgbe_txq_start, txq); @@ -1544,7 +1613,12 @@ alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx) txq->sdesc = malloc(eq->cap * sizeof(struct tx_sdesc), M_CXGBE, M_ZERO | M_WAITOK); txq->br = buf_ring_alloc(eq->qsize, M_CXGBE, M_WAITOK, &eq->eq_lock); - eq->iqid = sc->sge.rxq[pi->first_rxq].iq.cntxt_id; + + intrq = &sc->sge.intrq[0]; + if (sc->flags & INTR_SHARED) + eq->iqid = intrq[(pi->first_txq + idx) % NINTRQ(sc)].cntxt_id; + else + eq->iqid = intrq[pi->first_rxq + (idx % pi->nrxq)].cntxt_id; rc = bus_dma_tag_create(sc->dmat, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, 64 * 1024, TX_SGL_SEGS, @@ -1603,6 +1677,15 @@ alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx) NULL, "tx queue"); children = SYSCTL_CHILDREN(oid); + SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, + &eq->cntxt_id, 0, "SGE context id of the queue"); + SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx", + CTLTYPE_INT | CTLFLAG_RD, &eq->cidx, 0, sysctl_uint16, "I", + "consumer index"); + SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "pidx", + CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I", + "producer index"); + SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, &txq->txcsum, "# of times hardware assisted with checksum"); SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_insertion", @@ -2324,7 +2407,7 @@ write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts) wr->equiq_to_len16 = htobe32(ctrl); wr->plen = htobe16(txpkts->plen); wr->npkt = txpkts->npkt; - wr->r3 = wr->r4 = 0; + wr->r3 = wr->type = 0; /* Everything else already written */ @@ -2695,6 +2778,32 @@ handle_sge_egr_update(struct adapter *sc, const struct cpl_sge_egr_update *cpl) return (0); } +static void +handle_cpl(struct adapter *sc, struct sge_iq *iq) +{ + const struct rss_header *rss = (const void *)iq->cdesc; + const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); + + switch (rss->opcode) { + case CPL_FW4_MSG: + case CPL_FW6_MSG: + if (cpl->type == FW6_TYPE_CMD_RPL) + t4_handle_fw_rpl(sc, cpl->data); + break; + + case CPL_SGE_EGR_UPDATE: + handle_sge_egr_update(sc, (const void *)cpl); + break; + + case CPL_SET_TCB_RPL: + filter_rpl(sc, (const void *)cpl); + break; + + default: + panic("%s: unexpected CPL opcode 0x%x", __func__, rss->opcode); + } +} + /* * m0 is freed on successful transmission. */ @@ -2710,7 +2819,8 @@ ctrl_tx(struct adapter *sc, struct sge_ctrlq *ctrlq, struct mbuf *m0) M_ASSERTPKTHDR(m0); if (m0->m_pkthdr.len > SGE_MAX_WR_LEN) { - ctrlq->too_long++; + log(LOG_ERR, "%s: %s work request too long (%d)", + device_get_nameunit(sc->dev), __func__, m0->m_pkthdr.len); return (EMSGSIZE); } ndesc = howmany(m0->m_pkthdr.len, CTRL_EQ_ESIZE); @@ -2738,7 +2848,6 @@ ctrl_tx(struct adapter *sc, struct sge_ctrlq *ctrlq, struct mbuf *m0) eq->pidx -= eq->cap; eq->pending += ndesc; - ctrlq->total_wrs++; ring_eq_db(sc, eq); failed: EQ_UNLOCK(eq); @@ -2749,7 +2858,7 @@ failed: } static int -sysctl_abs_id(SYSCTL_HANDLER_ARGS) +sysctl_uint16(SYSCTL_HANDLER_ARGS) { uint16_t *id = arg1; int i = *id; diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c index 4aa08f6..08735ac 100644 --- a/sys/dev/e1000/if_igb.c +++ b/sys/dev/e1000/if_igb.c @@ -36,6 +36,7 @@ #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #include "opt_inet.h" +#include "opt_inet6.h" #include "opt_altq.h" #endif @@ -99,7 +100,7 @@ int igb_display_debug_stats = 0; /********************************************************************* * Driver version: *********************************************************************/ -char igb_driver_version[] = "version - 2.2.3"; +char igb_driver_version[] = "version - 2.2.5"; /********************************************************************* @@ -170,13 +171,15 @@ static int igb_detach(device_t); static int igb_shutdown(device_t); static int igb_suspend(device_t); static int igb_resume(device_t); -static void igb_start(struct ifnet *); -static void igb_start_locked(struct tx_ring *, struct ifnet *ifp); #if __FreeBSD_version >= 800000 static int igb_mq_start(struct ifnet *, struct mbuf *); static int igb_mq_start_locked(struct ifnet *, struct tx_ring *, struct mbuf *); static void igb_qflush(struct ifnet *); +static void igb_deferred_mq_start(void *, int); +#else +static void igb_start(struct ifnet *); +static void igb_start_locked(struct tx_ring *, struct ifnet *ifp); #endif static int igb_ioctl(struct ifnet *, u_long, caddr_t); static void igb_init(void *); @@ -263,6 +266,7 @@ static void igb_handle_link(void *context, int pending); static void igb_set_sysctl_value(struct adapter *, const char *, const char *, int *, int); static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS); +static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS); #ifdef DEVICE_POLLING static poll_handler_t igb_poll; @@ -342,25 +346,6 @@ TUNABLE_INT("hw.igb.hdr_split", &igb_header_split); static int igb_num_queues = 0; TUNABLE_INT("hw.igb.num_queues", &igb_num_queues); -/* How many packets rxeof tries to clean at a time */ -static int igb_rx_process_limit = 100; -TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit); - -/* Flow control setting - default to FULL */ -static int igb_fc_setting = e1000_fc_full; -TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting); - -/* Energy Efficient Ethernet - default to off */ -static int igb_eee_disabled = TRUE; -TUNABLE_INT("hw.igb.eee_disabled", &igb_eee_disabled); - -/* -** DMA Coalescing, only for i350 - default to off, -** this feature is for power savings -*/ -static int igb_dma_coalesce = FALSE; -TUNABLE_INT("hw.igb.dma_coalesce", &igb_dma_coalesce); - /********************************************************************* * Device identification routine * @@ -431,6 +416,11 @@ igb_attach(device_t dev) INIT_DEBUGOUT("igb_attach: begin"); + if (resource_disabled("igb", device_get_unit(dev))) { + device_printf(dev, "Disabled by device hint\n"); + return (ENXIO); + } + adapter = device_get_softc(dev); adapter->dev = adapter->osdep.dev = dev; IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); @@ -448,7 +438,7 @@ igb_attach(device_t dev) SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW, + OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, igb_set_flowcntl, "I", "Flow Control"); callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); @@ -474,8 +464,8 @@ igb_attach(device_t dev) /* Sysctl for limiting the amount of work done in the taskqueue */ igb_set_sysctl_value(adapter, "rx_processing_limit", - "max number of rx packets to process", &adapter->rx_process_limit, - igb_rx_process_limit); + "max number of rx packets to process", + &adapter->rx_process_limit, 100); /* * Validate number of transmit and receive descriptors. It @@ -550,13 +540,14 @@ igb_attach(device_t dev) /* Some adapter-specific advanced features */ if (adapter->hw.mac.type >= e1000_i350) { - igb_set_sysctl_value(adapter, "dma_coalesce", - "configure dma coalesce", - &adapter->dma_coalesce, igb_dma_coalesce); + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW, + adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce"); igb_set_sysctl_value(adapter, "eee_disabled", "enable Energy Efficient Ethernet", &adapter->hw.dev_spec._82575.eee_disable, - igb_eee_disabled); + TRUE); e1000_set_eee_i350(&adapter->hw); } @@ -656,6 +647,7 @@ igb_attach(device_t dev) return (0); err_late: + igb_detach(dev); igb_free_transmit_structures(adapter); igb_free_receive_structures(adapter); igb_release_hw_control(adapter); @@ -693,6 +685,8 @@ igb_detach(device_t dev) return (EBUSY); } + ether_ifdetach(adapter->ifp); + if (adapter->led_dev != NULL) led_destroy(adapter->led_dev); @@ -724,8 +718,6 @@ igb_detach(device_t dev) if (adapter->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); - ether_ifdetach(adapter->ifp); - callout_drain(&adapter->timer); igb_free_pci_resources(adapter); @@ -734,7 +726,8 @@ igb_detach(device_t dev) igb_free_transmit_structures(adapter); igb_free_receive_structures(adapter); - free(adapter->mta, M_DEVBUF); + if (adapter->mta != NULL) + free(adapter->mta, M_DEVBUF); IGB_CORE_LOCK_DESTROY(adapter); @@ -784,14 +777,27 @@ igb_resume(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ifnet *ifp = adapter->ifp; +#if __FreeBSD_version >= 800000 + struct tx_ring *txr = adapter->tx_rings; +#endif IGB_CORE_LOCK(adapter); igb_init_locked(adapter); igb_init_manageability(adapter); if ((ifp->if_flags & IFF_UP) && - (ifp->if_drv_flags & IFF_DRV_RUNNING)) + (ifp->if_drv_flags & IFF_DRV_RUNNING)) { +#if __FreeBSD_version < 800000 igb_start(ifp); +#else + for (int i = 0; i < adapter->num_queues; i++, txr++) { + IGB_TX_LOCK(txr); + if (!drbr_empty(ifp, txr->br)) + igb_mq_start_locked(ifp, txr, NULL); + IGB_TX_UNLOCK(txr); + } +#endif + } IGB_CORE_UNLOCK(adapter); @@ -799,6 +805,7 @@ igb_resume(device_t dev) } +#if __FreeBSD_version < 800000 /********************************************************************* * Transmit entry point * @@ -875,7 +882,7 @@ igb_start(struct ifnet *ifp) return; } -#if __FreeBSD_version >= 800000 +#else /* __FreeBSD_version >= 800000 */ /* ** Multiqueue Transmit driver ** @@ -900,7 +907,7 @@ igb_mq_start(struct ifnet *ifp, struct mbuf *m) IGB_TX_UNLOCK(txr); } else { err = drbr_enqueue(ifp, txr->br, m); - taskqueue_enqueue(que->tq, &que->que_task); + taskqueue_enqueue(que->tq, &txr->txq_task); } return (err); @@ -961,6 +968,22 @@ igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m) } /* + * Called from a taskqueue to drain queued transmit packets. + */ +static void +igb_deferred_mq_start(void *arg, int pending) +{ + struct tx_ring *txr = arg; + struct adapter *adapter = txr->adapter; + struct ifnet *ifp = adapter->ifp; + + IGB_TX_LOCK(txr); + if (!drbr_empty(ifp, txr->br)) + igb_mq_start_locked(ifp, txr, NULL); + IGB_TX_UNLOCK(txr); +} + +/* ** Flush all ring buffers */ static void @@ -978,7 +1001,7 @@ igb_qflush(struct ifnet *ifp) } if_qflush(ifp); } -#endif /* __FreeBSD_version >= 800000 */ +#endif /* __FreeBSD_version < 800000 */ /********************************************************************* * Ioctl entry point @@ -993,11 +1016,12 @@ static int igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct adapter *adapter = ifp->if_softc; - struct ifreq *ifr = (struct ifreq *)data; -#ifdef INET - struct ifaddr *ifa = (struct ifaddr *)data; + struct ifreq *ifr = (struct ifreq *)data; +#if defined(INET) || defined(INET6) + struct ifaddr *ifa = (struct ifaddr *)data; + bool avoid_reset = FALSE; #endif - int error = 0; + int error = 0; if (adapter->in_detach) return (error); @@ -1005,20 +1029,22 @@ igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data) switch (command) { case SIOCSIFADDR: #ifdef INET - if (ifa->ifa_addr->sa_family == AF_INET) { - /* - * XXX - * Since resetting hardware takes a very long time - * and results in link renegotiation we only - * initialize the hardware only when it is absolutely - * required. - */ + if (ifa->ifa_addr->sa_family == AF_INET) + avoid_reset = TRUE; +#endif +#ifdef INET6 + if (ifa->ifa_addr->sa_family == AF_INET6) + avoid_reset = TRUE; +#endif +#if defined(INET) || defined(INET6) + /* + ** Calling init results in link renegotiation, + ** so we avoid doing it when possible. + */ + if (avoid_reset) { ifp->if_flags |= IFF_UP; - if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { - IGB_CORE_LOCK(adapter); - igb_init_locked(adapter); - IGB_CORE_UNLOCK(adapter); - } + if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) + igb_init(adapter); if (!(ifp->if_flags & IFF_NOARP)) arp_ifinit(ifp, ifa); } else @@ -1143,6 +1169,10 @@ igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data) ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; reinit = 1; } + if (mask & IFCAP_VLAN_HWTSO) { + ifp->if_capenable ^= IFCAP_VLAN_HWTSO; + reinit = 1; + } if (mask & IFCAP_LRO) { ifp->if_capenable ^= IFCAP_LRO; reinit = 1; @@ -2180,6 +2210,7 @@ igb_allocate_legacy(struct adapter *adapter) { device_t dev = adapter->dev; struct igb_queue *que = adapter->queues; + struct tx_ring *txr = adapter->tx_rings; int error, rid = 0; /* Turn off all interrupts */ @@ -2198,6 +2229,10 @@ igb_allocate_legacy(struct adapter *adapter) return (ENXIO); } +#if __FreeBSD_version >= 800000 + TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr); +#endif + /* * Try allocating a fast interrupt and the associated deferred * processing contexts. @@ -2268,9 +2303,13 @@ igb_allocate_msix(struct adapter *adapter) */ if (adapter->num_queues > 1) bus_bind_intr(dev, que->res, i); +#if __FreeBSD_version >= 800000 + TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start, + que->txr); +#endif /* Make tasklet for deferred handling */ TASK_INIT(&que->que_task, 0, igb_handle_que, que); - que->tq = taskqueue_create_fast("igb_que", M_NOWAIT, + que->tq = taskqueue_create("igb_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", device_get_nameunit(adapter->dev)); @@ -2477,13 +2516,24 @@ igb_free_pci_resources(struct adapter *adapter) else (adapter->msix != 0) ? (rid = 1):(rid = 0); + que = adapter->queues; if (adapter->tag != NULL) { + taskqueue_drain(que->tq, &adapter->link_task); bus_teardown_intr(dev, adapter->res, adapter->tag); adapter->tag = NULL; } if (adapter->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); + for (int i = 0; i < adapter->num_queues; i++, que++) { + if (que->tq != NULL) { +#if __FreeBSD_version >= 800000 + taskqueue_drain(que->tq, &que->txr->txq_task); +#endif + taskqueue_drain(que->tq, &que->que_task); + taskqueue_free(que->tq); + } + } mem: if (adapter->msix) pci_release_msi(dev); @@ -2669,6 +2719,12 @@ igb_reset(struct adapter *adapter) fc->pause_time = IGB_FC_PAUSE_TIME; fc->send_xon = TRUE; + if (fc->requested_mode) + fc->current_mode = fc->requested_mode; + else + fc->current_mode = e1000_fc_full; + + adapter->fc = fc->current_mode; /* Issue a global reset */ e1000_reset_hw(hw); @@ -2678,9 +2734,13 @@ igb_reset(struct adapter *adapter) device_printf(dev, "Hardware Initialization Failed\n"); /* Setup DMA Coalescing */ - if ((hw->mac.type == e1000_i350) && - (adapter->dma_coalesce == TRUE)) { - u32 reg; + if (hw->mac.type == e1000_i350) { + u32 reg = ~E1000_DMACR_DMAC_EN; + + if (adapter->dmac == 0) { /* Disabling it */ + E1000_WRITE_REG(hw, E1000_DMACR, reg); + goto reset_out; + } hwm = (pba - 4) << 10; reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT) @@ -2689,8 +2749,8 @@ igb_reset(struct adapter *adapter) /* transition to L0x or L1 if available..*/ reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); - /* timer = +-1000 usec in 32usec intervals */ - reg |= (1000 >> 5); + /* timer = value in adapter->dmac in 32usec intervals */ + reg |= (adapter->dmac >> 5); E1000_WRITE_REG(hw, E1000_DMACR, reg); /* No lower threshold */ @@ -2715,6 +2775,7 @@ igb_reset(struct adapter *adapter) device_printf(dev, "DMA Coalescing enabled\n"); } +reset_out: E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); e1000_get_phy_info(hw); e1000_check_for_link(hw); @@ -2744,10 +2805,11 @@ igb_setup_interface(device_t dev, struct adapter *adapter) ifp->if_softc = adapter; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = igb_ioctl; - ifp->if_start = igb_start; #if __FreeBSD_version >= 800000 ifp->if_transmit = igb_mq_start; ifp->if_qflush = igb_qflush; +#else + ifp->if_start = igb_start; #endif IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1); ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1; @@ -2774,15 +2836,19 @@ igb_setup_interface(device_t dev, struct adapter *adapter) * support full VLAN capability. */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); - ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; - ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; + ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING + | IFCAP_VLAN_HWTSO + | IFCAP_VLAN_MTU; + ifp->if_capenable |= IFCAP_VLAN_HWTAGGING + | IFCAP_VLAN_HWTSO + | IFCAP_VLAN_MTU; /* - ** Dont turn this on by default, if vlans are + ** Don't turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If - ** using vlans directly on the em driver you can + ** using vlans directly on the igb driver you can ** enable this and get full hardware tag filtering. */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; @@ -5542,19 +5608,18 @@ static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS) { int error; - struct adapter *adapter; + struct adapter *adapter = (struct adapter *) arg1; - error = sysctl_handle_int(oidp, &igb_fc_setting, 0, req); + error = sysctl_handle_int(oidp, &adapter->fc, 0, req); - if (error) + if ((error) || (req->newptr == NULL)) return (error); - adapter = (struct adapter *) arg1; - switch (igb_fc_setting) { + switch (adapter->fc) { case e1000_fc_rx_pause: case e1000_fc_tx_pause: case e1000_fc_full: - adapter->hw.fc.requested_mode = igb_fc_setting; + adapter->hw.fc.requested_mode = adapter->fc; break; case e1000_fc_none: default: @@ -5563,5 +5628,54 @@ igb_set_flowcntl(SYSCTL_HANDLER_ARGS) adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; e1000_force_mac_fc(&adapter->hw); - return error; + return (error); +} + +/* +** Manage DMA Coalesce: +** Control values: +** 0/1 - off/on +** Legal timer values are: +** 250,500,1000-10000 in thousands +*/ +static int +igb_sysctl_dmac(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + int error; + + error = sysctl_handle_int(oidp, &adapter->dmac, 0, req); + + if ((error) || (req->newptr == NULL)) + return (error); + + switch (adapter->dmac) { + case 0: + /*Disabling */ + break; + case 1: /* Just enable and use default */ + adapter->dmac = 1000; + break; + case 250: + case 500: + case 1000: + case 2000: + case 3000: + case 4000: + case 5000: + case 6000: + case 7000: + case 8000: + case 9000: + case 10000: + /* Legal values - allow */ + break; + default: + /* Do nothing, illegal value */ + adapter->dmac = 0; + return (error); + } + /* Reinit the interface */ + igb_init(adapter); + return (error); } diff --git a/sys/dev/e1000/if_igb.h b/sys/dev/e1000/if_igb.h index 609d650..f0ab685 100644 --- a/sys/dev/e1000/if_igb.h +++ b/sys/dev/e1000/if_igb.h @@ -297,6 +297,7 @@ struct tx_ring { struct buf_ring *br; #endif bus_dma_tag_t txtag; + struct task txq_task; u32 bytes; u32 packets; @@ -395,11 +396,12 @@ struct adapter { u32 shadow_vfta[IGB_VFTA_SIZE]; /* Info about the interface */ - u8 link_active; + u16 link_active; + u16 fc; u16 link_speed; u16 link_duplex; u32 smartspeed; - u32 dma_coalesce; + u32 dmac; /* Interface queues */ struct igb_queue *queues; diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c index d6225d8..4cfcea8 100644 --- a/sys/dev/hwpmc/hwpmc_mod.c +++ b/sys/dev/hwpmc/hwpmc_mod.c @@ -1991,7 +1991,7 @@ pmc_hook_handler(struct thread *td, int function, void *arg) * had already processed the interrupt). We don't * lose the interrupt sample. */ - atomic_clear_int(&pmc_cpumask, (1 << PCPU_GET(cpuid))); + CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmc_cpumask); pmc_process_samples(PCPU_GET(cpuid)); break; @@ -4083,7 +4083,7 @@ pmc_process_interrupt(int cpu, struct pmc *pm, struct trapframe *tf, done: /* mark CPU as needing processing */ - atomic_set_int(&pmc_cpumask, (1 << cpu)); + CPU_SET_ATOMIC(cpu, &pmc_cpumask); return (error); } @@ -4193,7 +4193,7 @@ pmc_process_samples(int cpu) break; if (ps->ps_nsamples == PMC_SAMPLE_INUSE) { /* Need a rescan at a later time. */ - atomic_set_int(&pmc_cpumask, (1 << cpu)); + CPU_SET_ATOMIC(cpu, &pmc_cpumask); break; } @@ -4782,7 +4782,7 @@ pmc_cleanup(void) PMCDBG(MOD,INI,0, "%s", "cleanup"); /* switch off sampling */ - pmc_cpumask = 0; + CPU_ZERO(&pmc_cpumask); pmc_intr = NULL; sx_xlock(&pmc_sx); diff --git a/sys/dev/iicbus/ad7417.c b/sys/dev/iicbus/ad7417.c new file mode 100644 index 0000000..6ae16be --- /dev/null +++ b/sys/dev/iicbus/ad7417.c @@ -0,0 +1,621 @@ +/*- + * Copyright (c) 2010 Andreas Tobler + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/systm.h> +#include <sys/module.h> +#include <sys/callout.h> +#include <sys/conf.h> +#include <sys/cpu.h> +#include <sys/ctype.h> +#include <sys/kernel.h> +#include <sys/reboot.h> +#include <sys/rman.h> +#include <sys/sysctl.h> +#include <sys/limits.h> + +#include <machine/bus.h> +#include <machine/md_var.h> + +#include <dev/iicbus/iicbus.h> +#include <dev/iicbus/iiconf.h> + +#include <dev/ofw/openfirm.h> +#include <dev/ofw/ofw_bus.h> +#include <powerpc/powermac/powermac_thermal.h> + +/* CPU A/B sensors, temp and adc: AD7417. */ + +#define AD7417_TEMP 0x00 +#define AD7417_CONFIG 0x01 +#define AD7417_ADC 0x04 +#define AD7417_CONFIG2 0x05 +#define AD7417_CONFMASK 0xe0 + +uint8_t adc741x_config; + +struct ad7417_sensor { + struct pmac_therm therm; + device_t dev; + int id; + enum { + ADC7417_TEMP_SENSOR, + ADC7417_ADC_SENSOR + } type; +}; + +struct write_data { + uint8_t reg; + uint8_t val; +}; + +struct read_data { + uint8_t reg; + uint16_t val; +}; + +/* Regular bus attachment functions */ +static int ad7417_probe(device_t); +static int ad7417_attach(device_t); + +/* Utility functions */ +static int ad7417_sensor_sysctl(SYSCTL_HANDLER_ARGS); +static int ad7417_write(device_t dev, uint32_t addr, uint8_t reg, + uint8_t *buf, int len); +static int ad7417_read_1(device_t dev, uint32_t addr, uint8_t reg, + uint8_t *data); +static int ad7417_read_2(device_t dev, uint32_t addr, uint8_t reg, + uint16_t *data); +static int ad7417_write_read(device_t dev, uint32_t addr, + struct write_data out, struct read_data *in); +static int ad7417_diode_read(struct ad7417_sensor *sens); +static int ad7417_adc_read(struct ad7417_sensor *sens); +static int ad7417_sensor_read(struct ad7417_sensor *sens); + +struct ad7417_softc { + device_t sc_dev; + uint32_t sc_addr; + struct ad7417_sensor *sc_sensors; + int sc_nsensors; +}; +static device_method_t ad7417_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, ad7417_probe), + DEVMETHOD(device_attach, ad7417_attach), + { 0, 0 }, +}; + +static driver_t ad7417_driver = { + "ad7417", + ad7417_methods, + sizeof(struct ad7417_softc) +}; + +static devclass_t ad7417_devclass; + +DRIVER_MODULE(ad7417, iicbus, ad7417_driver, ad7417_devclass, 0, 0); +MALLOC_DEFINE(M_AD7417, "ad7417", "Supply-Monitor AD7417"); + + +static int +ad7417_write(device_t dev, uint32_t addr, uint8_t reg, uint8_t *buff, int len) +{ + unsigned char buf[4]; + int try = 0; + + struct iic_msg msg[] = { + { addr, IIC_M_WR, 0, buf } + }; + + msg[0].len = len + 1; + buf[0] = reg; + memcpy(buf + 1, buff, len); + + for (;;) + { + if (iicbus_transfer(dev, msg, 1) == 0) + return (0); + + if (++try > 5) { + device_printf(dev, "iicbus write failed\n"); + return (-1); + } + pause("ad7417_write", hz); + } +} + +static int +ad7417_read_1(device_t dev, uint32_t addr, uint8_t reg, uint8_t *data) +{ + uint8_t buf[4]; + int err, try = 0; + + struct iic_msg msg[2] = { + { addr, IIC_M_WR | IIC_M_NOSTOP, 1, ® }, + { addr, IIC_M_RD, 1, buf }, + }; + + for (;;) + { + err = iicbus_transfer(dev, msg, 2); + if (err != 0) + goto retry; + + *data = *((uint8_t*)buf); + return (0); + retry: + if (++try > 5) { + device_printf(dev, "iicbus read failed\n"); + return (-1); + } + pause("ad7417_read_1", hz); + } +} + +static int +ad7417_read_2(device_t dev, uint32_t addr, uint8_t reg, uint16_t *data) +{ + uint8_t buf[4]; + int err, try = 0; + + struct iic_msg msg[2] = { + { addr, IIC_M_WR | IIC_M_NOSTOP, 1, ® }, + { addr, IIC_M_RD, 2, buf }, + }; + + for (;;) + { + err = iicbus_transfer(dev, msg, 2); + if (err != 0) + goto retry; + + *data = *((uint16_t*)buf); + return (0); + retry: + if (++try > 5) { + device_printf(dev, "iicbus read failed\n"); + return (-1); + } + pause("ad7417_read_2", hz); + } +} + +static int +ad7417_write_read(device_t dev, uint32_t addr, struct write_data out, + struct read_data *in) +{ + uint8_t buf[4]; + int err, try = 0; + + /* Do a combined write/read. */ + struct iic_msg msg[3] = { + { addr, IIC_M_WR, 2, buf }, + { addr, IIC_M_WR | IIC_M_NOSTOP, 1, &in->reg }, + { addr, IIC_M_RD, 2, buf }, + }; + + /* Prepare the write msg. */ + buf[0] = out.reg; + buf[1] = out.val & 0xff; + + for (;;) + { + err = iicbus_transfer(dev, msg, 3); + if (err != 0) + goto retry; + + in->val = *((uint16_t*)buf); + return (0); + retry: + if (++try > 5) { + device_printf(dev, "iicbus write/read failed\n"); + return (-1); + } + pause("ad7417_write_read", hz); + } +} + +static int +ad7417_init_adc(device_t dev, uint32_t addr) +{ + uint8_t buf; + int err; + + adc741x_config = 0; + /* Clear Config2 */ + buf = 0; + + err = ad7417_write(dev, addr, AD7417_CONFIG2, &buf, 1); + + /* Read & cache Config1 */ + buf = 0; + err = ad7417_write(dev, addr, AD7417_CONFIG, &buf, 1); + err = ad7417_read_1(dev, addr, AD7417_CONFIG, &buf); + adc741x_config = (uint8_t)buf; + + /* Disable shutdown mode */ + adc741x_config &= 0xfe; + buf = adc741x_config; + err = ad7417_write(dev, addr, AD7417_CONFIG, &buf, 1); + if (err < 0) + return (-1); + + return (0); + +} +static int +ad7417_probe(device_t dev) +{ + const char *name, *compatible; + struct ad7417_softc *sc; + + name = ofw_bus_get_name(dev); + compatible = ofw_bus_get_compat(dev); + + if (!name) + return (ENXIO); + + if (strcmp(name, "supply-monitor") != 0 || + strcmp(compatible, "ad7417") != 0) + return (ENXIO); + + sc = device_get_softc(dev); + sc->sc_dev = dev; + sc->sc_addr = iicbus_get_addr(dev); + + device_set_desc(dev, "Supply-Monitor AD7417"); + + return (0); +} + +/* + * This function returns the number of sensors. If we call it the second time + * and we have allocated memory for sc->sc_sensors, we fill in the properties. + */ +static int +ad7417_fill_sensor_prop(device_t dev) +{ + phandle_t child; + struct ad7417_softc *sc; + u_int id[10]; + char location[96]; + char type[32]; + int i = 0, j, len = 0, prop_len, prev_len = 0; + + sc = device_get_softc(dev); + + child = ofw_bus_get_node(dev); + + /* Fill the sensor location property. */ + prop_len = OF_getprop(child, "hwsensor-location", location, + sizeof(location)); + while (len < prop_len) { + if (sc->sc_sensors != NULL) + strcpy(sc->sc_sensors[i].therm.name, location + len); + prev_len = strlen(location + len) + 1; + len += prev_len; + i++; + } + if (sc->sc_sensors == NULL) + return (i); + + /* Fill the sensor type property. */ + len = 0; + i = 0; + prev_len = 0; + prop_len = OF_getprop(child, "hwsensor-type", type, sizeof(type)); + while (len < prop_len) { + if (strcmp(type + len, "temperature") == 0) + sc->sc_sensors[i].type = ADC7417_TEMP_SENSOR; + else + sc->sc_sensors[i].type = ADC7417_ADC_SENSOR; + prev_len = strlen(type + len) + 1; + len += prev_len; + i++; + } + + /* Fill the sensor id property. Taken from OF. */ + prop_len = OF_getprop(child, "hwsensor-id", id, sizeof(id)); + for (j = 0; j < i; j++) + sc->sc_sensors[j].id = id[j]; + + /* Fill the sensor zone property. Taken from OF. */ + prop_len = OF_getprop(child, "hwsensor-zone", id, sizeof(id)); + for (j = 0; j < i; j++) + sc->sc_sensors[j].therm.zone = id[j]; + + /* Finish setting up sensor properties */ + for (j = 0; j < i; j++) { + sc->sc_sensors[j].dev = dev; + + /* HACK: Apple wired a random diode to the ADC line */ + if (strstr(sc->sc_sensors[j].therm.name, "DIODE TEMP") + != NULL) { + sc->sc_sensors[j].type = ADC7417_TEMP_SENSOR; + sc->sc_sensors[j].therm.read = + (int (*)(struct pmac_therm *))(ad7417_diode_read); + } else { + sc->sc_sensors[j].therm.read = + (int (*)(struct pmac_therm *))(ad7417_sensor_read); + } + + if (sc->sc_sensors[j].type != ADC7417_TEMP_SENSOR) + continue; + + /* Make up some ranges */ + sc->sc_sensors[j].therm.target_temp = 500 + ZERO_C_TO_K; + sc->sc_sensors[j].therm.max_temp = 900 + ZERO_C_TO_K; + + pmac_thermal_sensor_register(&sc->sc_sensors[j].therm); + } + + return (i); +} + +static int +ad7417_attach(device_t dev) +{ + struct ad7417_softc *sc; + struct sysctl_oid *oid, *sensroot_oid; + struct sysctl_ctx_list *ctx; + char sysctl_name[32]; + int i, j; + const char *unit; + const char *desc; + + sc = device_get_softc(dev); + + sc->sc_nsensors = 0; + + /* Count the actual number of sensors. */ + sc->sc_nsensors = ad7417_fill_sensor_prop(dev); + + device_printf(dev, "%d sensors detected.\n", sc->sc_nsensors); + + if (sc->sc_nsensors == 0) + device_printf(dev, "WARNING: No AD7417 sensors detected!\n"); + + sc->sc_sensors = malloc (sc->sc_nsensors * sizeof(struct ad7417_sensor), + M_AD7417, M_WAITOK | M_ZERO); + + ctx = device_get_sysctl_ctx(dev); + sensroot_oid = SYSCTL_ADD_NODE(ctx, + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "sensor", + CTLFLAG_RD, 0, "AD7417 Sensor Information"); + + /* Now we can fill the properties into the allocated struct. */ + sc->sc_nsensors = ad7417_fill_sensor_prop(dev); + + /* Add sysctls for the sensors. */ + for (i = 0; i < sc->sc_nsensors; i++) { + for (j = 0; j < strlen(sc->sc_sensors[i].therm.name); j++) { + sysctl_name[j] = + tolower(sc->sc_sensors[i].therm.name[j]); + if (isspace(sysctl_name[j])) + sysctl_name[j] = '_'; + } + sysctl_name[j] = 0; + + oid = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sensroot_oid), + OID_AUTO, + sysctl_name, CTLFLAG_RD, 0, + "Sensor Information"); + + if (sc->sc_sensors[i].type == ADC7417_TEMP_SENSOR) { + unit = "temp"; + desc = "Sensor temp in C"; + } else { + unit = "volt"; + desc = "Sensor Volt in V"; + } + /* I use i to pass the sensor id. */ + SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, + unit, CTLTYPE_INT | CTLFLAG_RD, dev, + i, ad7417_sensor_sysctl, + sc->sc_sensors[i].type == ADC7417_TEMP_SENSOR ? + "IK" : "I", desc); + } + /* Dump sensor location, ID & type. */ + if (bootverbose) { + device_printf(dev, "Sensors\n"); + for (i = 0; i < sc->sc_nsensors; i++) { + device_printf(dev, "Location: %s ID: %d type: %d\n", + sc->sc_sensors[i].therm.name, + sc->sc_sensors[i].id, + sc->sc_sensors[i].type); + } + } + + return (0); +} + +static int +ad7417_get_temp(device_t dev, uint32_t addr, int *temp) +{ + uint16_t buf[2]; + uint16_t read; + int err; + + err = ad7417_read_2(dev, addr, AD7417_TEMP, buf); + + if (err < 0) + return (-1); + + read = *((int16_t*)buf); + + /* The ADC is 10 bit, the resolution is 0.25 C. + The temperature is in tenth kelvin. + */ + *temp = (((int16_t)(read & 0xffc0)) >> 6) * 25 / 10; + return (0); +} + +static int +ad7417_get_adc(device_t dev, uint32_t addr, unsigned int *value, + uint8_t chan) +{ + uint8_t tmp; + int err; + struct write_data config; + struct read_data data; + + tmp = chan << 5; + config.reg = AD7417_CONFIG; + data.reg = AD7417_ADC; + data.val = 0; + + err = ad7417_read_1(dev, addr, AD7417_CONFIG, &config.val); + + config.val = (config.val & ~AD7417_CONFMASK) | (tmp & AD7417_CONFMASK); + + err = ad7417_write_read(dev, addr, config, &data); + if (err < 0) + return (-1); + + *value = ((uint32_t)data.val) >> 6; + + return (0); +} + +static int +ad7417_diode_read(struct ad7417_sensor *sens) +{ + static int eeprom_read = 0; + static cell_t eeprom[2][40]; + phandle_t eeprom_node; + int rawval, diode_slope, diode_offset; + int temp; + + if (!eeprom_read) { + eeprom_node = OF_finddevice("/u3/i2c/cpuid@a0"); + OF_getprop(eeprom_node, "cpuid", eeprom[0], sizeof(eeprom[0])); + eeprom_node = OF_finddevice("/u3/i2c/cpuid@a2"); + OF_getprop(eeprom_node, "cpuid", eeprom[1], sizeof(eeprom[1])); + eeprom_read = 1; + } + + rawval = ad7417_adc_read(sens); + if (rawval < 0) + return (-1); + + if (strstr(sens->therm.name, "CPU B") != NULL) { + diode_slope = eeprom[1][0x11] >> 16; + diode_offset = (int16_t)(eeprom[1][0x11] & 0xffff) << 12; + } else { + diode_slope = eeprom[0][0x11] >> 16; + diode_offset = (int16_t)(eeprom[0][0x11] & 0xffff) << 12; + } + + temp = (rawval*diode_slope + diode_offset) >> 2; + temp = (10*(temp >> 16)) + ((10*(temp & 0xffff)) >> 16); + + return (temp + ZERO_C_TO_K); +} + +static int +ad7417_adc_read(struct ad7417_sensor *sens) +{ + struct ad7417_softc *sc; + uint8_t chan; + int temp; + + sc = device_get_softc(sens->dev); + + switch (sens->id) { + case 11: + case 16: + chan = 1; + break; + case 12: + case 17: + chan = 2; + break; + case 13: + case 18: + chan = 3; + break; + case 14: + case 19: + chan = 4; + break; + default: + chan = 1; + } + + if (ad7417_get_adc(sc->sc_dev, sc->sc_addr, &temp, chan) < 0) + return (-1); + + return (temp); +} + + +static int +ad7417_sensor_read(struct ad7417_sensor *sens) +{ + struct ad7417_softc *sc; + int temp; + + sc = device_get_softc(sens->dev); + + /* Init the ADC. */ + if (ad7417_init_adc(sc->sc_dev, sc->sc_addr) < 0) + return (-1); + + if (sens->type == ADC7417_TEMP_SENSOR) { + if (ad7417_get_temp(sc->sc_dev, sc->sc_addr, &temp) < 0) + return (-1); + temp += ZERO_C_TO_K; + } else { + temp = ad7417_adc_read(sens); + } + return (temp); +} + +static int +ad7417_sensor_sysctl(SYSCTL_HANDLER_ARGS) +{ + device_t dev; + struct ad7417_softc *sc; + struct ad7417_sensor *sens; + int value = 0; + int error; + + dev = arg1; + sc = device_get_softc(dev); + sens = &sc->sc_sensors[arg2]; + + value = sens->therm.read(&sens->therm); + if (value < 0) + return (ENXIO); + + error = sysctl_handle_int(oidp, &value, 0, req); + + return (error); +} diff --git a/sys/dev/iicbus/ds1775.c b/sys/dev/iicbus/ds1775.c index 0edc074..23ad6f4 100644 --- a/sys/dev/iicbus/ds1775.c +++ b/sys/dev/iicbus/ds1775.c @@ -49,33 +49,29 @@ __FBSDID("$FreeBSD$"); #include <dev/ofw/openfirm.h> #include <dev/ofw/ofw_bus.h> - -#define FCU_ZERO_C_TO_K 2732 +#include <powerpc/powermac/powermac_thermal.h> /* Drivebay sensor: LM75/DS1775. */ #define DS1775_TEMP 0x0 -struct ds1775_sensor { - char location[32]; -}; - /* Regular bus attachment functions */ static int ds1775_probe(device_t); static int ds1775_attach(device_t); +struct ds1775_softc { + struct pmac_therm sc_sensor; + device_t sc_dev; + struct intr_config_hook enum_hook; + uint32_t sc_addr; +}; + /* Utility functions */ +static int ds1775_sensor_read(struct ds1775_softc *sc); static int ds1775_sensor_sysctl(SYSCTL_HANDLER_ARGS); static void ds1775_start(void *xdev); static int ds1775_read_2(device_t dev, uint32_t addr, uint8_t reg, uint16_t *data); -struct ds1775_softc { - device_t sc_dev; - struct intr_config_hook enum_hook; - uint32_t sc_addr; - struct ds1775_sensor *sc_sensors; - -}; static device_method_t ds1775_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ds1775_probe), @@ -92,26 +88,33 @@ static driver_t ds1775_driver = { static devclass_t ds1775_devclass; DRIVER_MODULE(ds1755, iicbus, ds1775_driver, ds1775_devclass, 0, 0); -MALLOC_DEFINE(M_DS1775, "ds1775", "Temp-Monitor DS1775"); static int ds1775_read_2(device_t dev, uint32_t addr, uint8_t reg, uint16_t *data) { uint8_t buf[4]; + int err, try = 0; struct iic_msg msg[2] = { { addr, IIC_M_WR | IIC_M_NOSTOP, 1, ® }, { addr, IIC_M_RD, 2, buf }, }; - if (iicbus_transfer(dev, msg, 2) != 0) { - device_printf(dev, "iicbus read failed\n"); - return (EIO); + for (;;) + { + err = iicbus_transfer(dev, msg, 2); + if (err != 0) + goto retry; + + *data = *((uint16_t*)buf); + return (0); + retry: + if (++try > 5) { + device_printf(dev, "iicbus read failed\n"); + return (-1); + } + pause("ds1775_read_2", hz); } - - *data = *((uint16_t*)buf); - - return (0); } static int @@ -169,7 +172,6 @@ ds1775_start(void *xdev) { phandle_t child; struct ds1775_softc *sc; - struct ds1775_sensor *sens; struct sysctl_oid *sensroot_oid; struct sysctl_ctx_list *ctx; ssize_t plen; @@ -183,30 +185,43 @@ ds1775_start(void *xdev) child = ofw_bus_get_node(dev); - sc->sc_sensors = malloc (sizeof(struct ds1775_sensor), - M_DS1775, M_WAITOK | M_ZERO); - - sens = sc->sc_sensors; - ctx = device_get_sysctl_ctx(dev); sensroot_oid = device_get_sysctl_tree(dev); - plen = OF_getprop(child, "hwsensor-location", sens->location, - sizeof(sens->location)); + if (OF_getprop(child, "hwsensor-zone", &sc->sc_sensor.zone, + sizeof(int)) < 0) + sc->sc_sensor.zone = 0; + + plen = OF_getprop(child, "hwsensor-location", sc->sc_sensor.name, + sizeof(sc->sc_sensor.name)); units = "C"; if (plen == -1) { strcpy(sysctl_name, "sensor"); } else { - for (i = 0; i < strlen(sens->location); i++) { - sysctl_name[i] = tolower(sens->location[i]); + for (i = 0; i < strlen(sc->sc_sensor.name); i++) { + sysctl_name[i] = tolower(sc->sc_sensor.name[i]); if (isspace(sysctl_name[i])) sysctl_name[i] = '_'; } sysctl_name[i] = 0; } - sprintf(sysctl_desc,"%s (%s)", sens->location, units); + /* Make up target temperatures. These are low, for the drive bay. */ + if (sc->sc_sensor.zone == 0) { + sc->sc_sensor.target_temp = 500 + ZERO_C_TO_K; + sc->sc_sensor.max_temp = 600 + ZERO_C_TO_K; + } + else { + sc->sc_sensor.target_temp = 300 + ZERO_C_TO_K; + sc->sc_sensor.max_temp = 600 + ZERO_C_TO_K; + } + + sc->sc_sensor.read = + (int (*)(struct pmac_therm *sc))(ds1775_sensor_read); + pmac_thermal_sensor_register(&sc->sc_sensor); + + sprintf(sysctl_desc,"%s (%s)", sc->sc_sensor.name, units); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(sensroot_oid), OID_AUTO, sysctl_name, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, @@ -216,44 +231,38 @@ ds1775_start(void *xdev) } static int -ds1775_sensor_read(device_t dev, struct ds1775_sensor *sens, int *temp) +ds1775_sensor_read(struct ds1775_softc *sc) { - struct ds1775_softc *sc; uint16_t buf[2]; uint16_t read; + int err; - sc = device_get_softc(dev); - - ds1775_read_2(sc->sc_dev, sc->sc_addr, DS1775_TEMP, buf); + err = ds1775_read_2(sc->sc_dev, sc->sc_addr, DS1775_TEMP, buf); + if (err < 0) + return (-1); read = *((int16_t *)buf); /* The default mode of the ADC is 9 bit, the resolution is 0.5 C per bit. The temperature is in tenth kelvin. */ - *temp = ((int16_t)(read) >> 7) * 5; - - return (0); + return (((int16_t)(read) >> 7) * 5 + ZERO_C_TO_K); } + static int ds1775_sensor_sysctl(SYSCTL_HANDLER_ARGS) { device_t dev; struct ds1775_softc *sc; - struct ds1775_sensor *sens; - int value; int error; unsigned int temp; dev = arg1; sc = device_get_softc(dev); - sens = &sc->sc_sensors[arg2]; - error = ds1775_sensor_read(dev, sens, &value); - if (error != 0) - return (error); - - temp = value + FCU_ZERO_C_TO_K; + temp = ds1775_sensor_read(sc); + if (temp < 0) + return (EIO); error = sysctl_handle_int(oidp, &temp, 0, req); diff --git a/sys/dev/iicbus/max6690.c b/sys/dev/iicbus/max6690.c index cbfdc26..83f3b50 100644 --- a/sys/dev/iicbus/max6690.c +++ b/sys/dev/iicbus/max6690.c @@ -49,20 +49,22 @@ __FBSDID("$FreeBSD$"); #include <dev/ofw/openfirm.h> #include <dev/ofw/ofw_bus.h> - -#define FCU_ZERO_C_TO_K 2732 +#include <powerpc/powermac/powermac_thermal.h> /* Inlet, Backside, U3 Heatsink sensor: MAX6690. */ #define MAX6690_INT_TEMP 0x0 #define MAX6690_EXT_TEMP 0x1 +#define MAX6690_RSL_STATUS 0x2 #define MAX6690_EEXT_TEMP 0x10 #define MAX6690_IEXT_TEMP 0x11 #define MAX6690_TEMP_MASK 0xe0 struct max6690_sensor { + struct pmac_therm therm; + device_t dev; + int id; - char location[32]; }; /* Regular bus attachment functions */ @@ -70,10 +72,11 @@ static int max6690_probe(device_t); static int max6690_attach(device_t); /* Utility functions */ +static int max6690_sensor_read(struct max6690_sensor *sens); static int max6690_sensor_sysctl(SYSCTL_HANDLER_ARGS); static void max6690_start(void *xdev); -static int max6690_read_1(device_t dev, uint32_t addr, uint8_t reg, - uint8_t *data); +static int max6690_read(device_t dev, uint32_t addr, uint8_t reg, + uint8_t *data); struct max6690_softc { device_t sc_dev; @@ -101,23 +104,43 @@ DRIVER_MODULE(max6690, iicbus, max6690_driver, max6690_devclass, 0, 0); MALLOC_DEFINE(M_MAX6690, "max6690", "Temp-Monitor MAX6690"); static int -max6690_read_1(device_t dev, uint32_t addr, uint8_t reg, uint8_t *data) +max6690_read(device_t dev, uint32_t addr, uint8_t reg, uint8_t *data) { uint8_t buf[4]; - - struct iic_msg msg[2] = { + uint8_t busy[1], rsl; + int err, try = 0; + + /* Busy register RSL. */ + rsl = MAX6690_RSL_STATUS; + /* first read the status register, 0x2. If busy, retry. */ + struct iic_msg msg[4] = { + { addr, IIC_M_WR | IIC_M_NOSTOP, 1, &rsl }, + { addr, IIC_M_RD, 1, busy }, { addr, IIC_M_WR | IIC_M_NOSTOP, 1, ® }, { addr, IIC_M_RD, 1, buf }, }; - if (iicbus_transfer(dev, msg, 2) != 0) { - device_printf(dev, "iicbus read failed\n"); - return (EIO); + for (;;) + { + err = iicbus_transfer(dev, msg, 4); + if (err != 0) + goto retry; + if (busy[0] & 0x80) + goto retry; + /* Check for invalid value and retry. */ + if (buf[0] == 0xff) + goto retry; + + *data = *((uint8_t*)buf); + return (0); + + retry: + if (++try > 5) { + device_printf(dev, "iicbus read failed\n"); + return (-1); + } + pause("max6690_read", hz); } - - *data = *((uint8_t*)buf); - - return (0); } static int @@ -167,7 +190,7 @@ max6690_fill_sensor_prop(device_t dev) sizeof(location)); while (len < prop_len) { if (sc->sc_sensors != NULL) - strcpy(sc->sc_sensors[i].location, location + len); + strcpy(sc->sc_sensors[i].therm.name, location + len); prev_len = strlen(location + len) + 1; len += prev_len; i++; @@ -180,6 +203,22 @@ max6690_fill_sensor_prop(device_t dev) for (j = 0; j < i; j++) sc->sc_sensors[j].id = (id[j] & 0xf); + /* Fill the sensor zone property. */ + prop_len = OF_getprop(child, "hwsensor-zone", id, sizeof(id)); + for (j = 0; j < i; j++) + sc->sc_sensors[j].therm.zone = id[j]; + + /* Set up remaining sensor properties */ + for (j = 0; j < i; j++) { + sc->sc_sensors[j].dev = dev; + + sc->sc_sensors[j].therm.target_temp = 400 + ZERO_C_TO_K; + sc->sc_sensors[j].therm.max_temp = 800 + ZERO_C_TO_K; + + sc->sc_sensors[j].therm.read = + (int (*)(struct pmac_therm *))(max6690_sensor_read); + } + return (i); } static int @@ -240,10 +279,15 @@ max6690_start(void *xdev) /* Now we can fill the properties into the allocated struct. */ sc->sc_nsensors = max6690_fill_sensor_prop(dev); + /* Register with powermac_thermal */ + for (i = 0; i < sc->sc_nsensors; i++) + pmac_thermal_sensor_register(&sc->sc_sensors[i].therm); + /* Add sysctls for the sensors. */ for (i = 0; i < sc->sc_nsensors; i++) { - for (j = 0; j < strlen(sc->sc_sensors[i].location); j++) { - sysctl_name[j] = tolower(sc->sc_sensors[i].location[j]); + for (j = 0; j < strlen(sc->sc_sensors[i].therm.name); j++) { + sysctl_name[j] = + tolower(sc->sc_sensors[i].therm.name[j]); if (isspace(sysctl_name[j])) sysctl_name[j] = '_'; } @@ -265,7 +309,7 @@ max6690_start(void *xdev) device_printf(dev, "Sensors\n"); for (i = 0; i < sc->sc_nsensors; i++) { device_printf(dev, "Location : %s ID: %d\n", - sc->sc_sensors[i].location, + sc->sc_sensors[i].therm.name, sc->sc_sensors[i].id); } } @@ -274,16 +318,18 @@ max6690_start(void *xdev) } static int -max6690_sensor_read(device_t dev, struct max6690_sensor *sens, int *temp) +max6690_sensor_read(struct max6690_sensor *sens) { uint8_t reg_int = 0, reg_ext = 0; - uint8_t integer; - uint8_t fraction; + uint8_t integer = 0; + uint8_t fraction = 0; + int err, temp; + struct max6690_softc *sc; - sc = device_get_softc(dev); + sc = device_get_softc(sens->dev); - /* The internal sensor id's are even, the external ar odd. */ + /* The internal sensor id's are even, the external are odd. */ if ((sens->id % 2) == 0) { reg_int = MAX6690_INT_TEMP; reg_ext = MAX6690_IEXT_TEMP; @@ -292,18 +338,20 @@ max6690_sensor_read(device_t dev, struct max6690_sensor *sens, int *temp) reg_ext = MAX6690_EEXT_TEMP; } - max6690_read_1(sc->sc_dev, sc->sc_addr, reg_int, &integer); + err = max6690_read(sc->sc_dev, sc->sc_addr, reg_int, &integer); + err = max6690_read(sc->sc_dev, sc->sc_addr, reg_ext, &fraction); - max6690_read_1(sc->sc_dev, sc->sc_addr, reg_ext, &fraction); + if (err < 0) + return (-1); fraction &= MAX6690_TEMP_MASK; /* The temperature is in tenth kelvin, the fractional part resolution is 0.125. */ - *temp = (integer * 10) + (fraction >> 5) * 10 / 8; + temp = (integer * 10) + (fraction >> 5) * 10 / 8; - return (0); + return (temp + ZERO_C_TO_K); } static int @@ -312,7 +360,6 @@ max6690_sensor_sysctl(SYSCTL_HANDLER_ARGS) device_t dev; struct max6690_softc *sc; struct max6690_sensor *sens; - int value = 0; int error; unsigned int temp; @@ -320,11 +367,9 @@ max6690_sensor_sysctl(SYSCTL_HANDLER_ARGS) sc = device_get_softc(dev); sens = &sc->sc_sensors[arg2]; - error = max6690_sensor_read(dev, sens, &value); - if (error != 0) - return (error); - - temp = value + FCU_ZERO_C_TO_K; + temp = max6690_sensor_read(sens); + if (temp < 0) + return (EIO); error = sysctl_handle_int(oidp, &temp, 0, req); diff --git a/sys/dev/ipw/if_ipw.c b/sys/dev/ipw/if_ipw.c index db76bfa..7560430 100644 --- a/sys/dev/ipw/if_ipw.c +++ b/sys/dev/ipw/if_ipw.c @@ -199,6 +199,8 @@ static devclass_t ipw_devclass; DRIVER_MODULE(ipw, pci, ipw_driver, ipw_devclass, 0, 0); +MODULE_VERSION(ipw, 1); + static int ipw_probe(device_t dev) { diff --git a/sys/dev/iwi/if_iwi.c b/sys/dev/iwi/if_iwi.c index dc81309..73b861c 100644 --- a/sys/dev/iwi/if_iwi.c +++ b/sys/dev/iwi/if_iwi.c @@ -232,6 +232,8 @@ static devclass_t iwi_devclass; DRIVER_MODULE(iwi, pci, iwi_driver, iwi_devclass, 0, 0); +MODULE_VERSION(iwi, 1); + static __inline uint8_t MEM_READ_1(struct iwi_softc *sc, uint32_t addr) { diff --git a/sys/dev/iwn/if_iwn.c b/sys/dev/iwn/if_iwn.c index 29e391f..ab3dec7 100644 --- a/sys/dev/iwn/if_iwn.c +++ b/sys/dev/iwn/if_iwn.c @@ -401,6 +401,8 @@ static devclass_t iwn_devclass; DRIVER_MODULE(iwn, pci, iwn_driver, iwn_devclass, 0, 0); +MODULE_VERSION(iwn, 1); + MODULE_DEPEND(iwn, firmware, 1, 1, 1); MODULE_DEPEND(iwn, pci, 1, 1, 1); MODULE_DEPEND(iwn, wlan, 1, 1, 1); @@ -565,6 +567,7 @@ iwn_attach(device_t dev) ic->ic_caps = IEEE80211_C_STA /* station mode supported */ | IEEE80211_C_MONITOR /* monitor mode supported */ + | IEEE80211_C_BGSCAN /* background scanning */ | IEEE80211_C_TXPMGT /* tx power management */ | IEEE80211_C_SHSLOT /* short slot time supported */ | IEEE80211_C_WPA @@ -574,8 +577,6 @@ iwn_attach(device_t dev) #endif | IEEE80211_C_WME /* WME */ ; - if (sc->hw_type != IWN_HW_REV_TYPE_4965) - ic->ic_caps |= IEEE80211_C_BGSCAN; /* background scanning */ /* Read MAC address, channels, etc from EEPROM. */ if ((error = iwn_read_eeprom(sc, macaddr)) != 0) { @@ -605,9 +606,9 @@ iwn_attach(device_t dev) ic->ic_htcaps = IEEE80211_HTCAP_SMPS_OFF /* SMPS mode disabled */ | IEEE80211_HTCAP_SHORTGI20 /* short GI in 20MHz */ -#ifdef notyet | IEEE80211_HTCAP_CHWIDTH40 /* 40MHz channel width*/ | IEEE80211_HTCAP_SHORTGI40 /* short GI in 40MHz */ +#ifdef notyet | IEEE80211_HTCAP_GREENFIELD #if IWN_RBUF_SIZE == 8192 | IEEE80211_HTCAP_MAXAMSDU_7935 /* max A-MSDU length */ @@ -2104,6 +2105,7 @@ rate2plcp(int rate) static void iwn_newassoc(struct ieee80211_node *ni, int isnew) { +#define RV(v) ((v) & IEEE80211_RATE_VAL) struct ieee80211com *ic = ni->ni_ic; struct iwn_softc *sc = ic->ic_ifp->if_softc; struct iwn_node *wn = (void *)ni; @@ -2117,7 +2119,7 @@ iwn_newassoc(struct ieee80211_node *ni, int isnew) if (IEEE80211_IS_CHAN_HT(ni->ni_chan)) { ridx = ni->ni_rates.rs_nrates - 1; for (i = ni->ni_htrates.rs_nrates - 1; i >= 0; i--) { - plcp = ni->ni_htrates.rs_rates[i] | IWN_RFLAG_MCS; + plcp = RV(ni->ni_htrates.rs_rates[i]) | IWN_RFLAG_MCS; if (IEEE80211_IS_CHAN_HT40(ni->ni_chan)) { plcp |= IWN_RFLAG_HT40; if (ni->ni_htcap & IEEE80211_HTCAP_SHORTGI40) @@ -2129,8 +2131,7 @@ iwn_newassoc(struct ieee80211_node *ni, int isnew) else plcp |= IWN_RFLAG_ANT(txant1); if (ridx >= 0) { - rate = ni->ni_rates.rs_rates[ridx]; - rate &= IEEE80211_RATE_VAL; + rate = RV(ni->ni_rates.rs_rates[ridx]); wn->ridx[rate] = plcp; } wn->ridx[IEEE80211_RATE_MCS | i] = plcp; @@ -2138,8 +2139,7 @@ iwn_newassoc(struct ieee80211_node *ni, int isnew) } } else { for (i = 0; i < ni->ni_rates.rs_nrates; i++) { - rate = ni->ni_rates.rs_rates[i] & IEEE80211_RATE_VAL; - + rate = RV(ni->ni_rates.rs_rates[i]); plcp = rate2plcp(rate); ridx = ic->ic_rt->rateCodeToIndex[rate]; if (ridx < IWN_RIDX_OFDM6 && @@ -2149,6 +2149,7 @@ iwn_newassoc(struct ieee80211_node *ni, int isnew) wn->ridx[rate] = htole32(plcp); } } +#undef RV } static int @@ -3313,7 +3314,8 @@ iwn_tx_data(struct iwn_softc *sc, struct mbuf *m, struct ieee80211_node *ni) } ac = M_WME_GETAC(m); - if (IEEE80211_AMPDU_RUNNING(&ni->ni_tx_ampdu[ac])) { + if (IEEE80211_QOS_HAS_SEQ(wh) && + IEEE80211_AMPDU_RUNNING(&ni->ni_tx_ampdu[ac])) { struct ieee80211_tx_ampdu *tap = &ni->ni_tx_ampdu[ac]; ring = &sc->txq[*(int *)tap->txa_private]; @@ -3991,6 +3993,7 @@ iwn5000_add_node(struct iwn_softc *sc, struct iwn_node_info *node, int async) static int iwn_set_link_quality(struct iwn_softc *sc, struct ieee80211_node *ni) { +#define RV(v) ((v) & IEEE80211_RATE_VAL) struct iwn_node *wn = (void *)ni; struct ieee80211_rateset *rs = &ni->ni_rates; struct iwn_cmd_link_quality linkq; @@ -4017,11 +4020,11 @@ iwn_set_link_quality(struct iwn_softc *sc, struct ieee80211_node *ni) if (IEEE80211_IS_CHAN_HT(ni->ni_chan)) rate = IEEE80211_RATE_MCS | txrate; else - rate = rs->rs_rates[txrate] & IEEE80211_RATE_VAL; + rate = RV(rs->rs_rates[txrate]); linkq.retry[i] = wn->ridx[rate]; if ((le32toh(wn->ridx[rate]) & IWN_RFLAG_MCS) && - (le32toh(wn->ridx[rate]) & 0xff) > 7) + RV(le32toh(wn->ridx[rate])) > 7) linkq.mimo = i + 1; /* Next retry at immediate lower bit-rate. */ @@ -4029,6 +4032,7 @@ iwn_set_link_quality(struct iwn_softc *sc, struct ieee80211_node *ni) txrate--; } return iwn_cmd(sc, IWN_CMD_LINK_QUALITY, &linkq, sizeof linkq, 1); +#undef RV } /* @@ -5159,7 +5163,7 @@ iwn_scan(struct iwn_softc *sc) if (IEEE80211_IS_CHAN_A(ic->ic_curchan) && sc->hw_type == IWN_HW_REV_TYPE_4965) { /* Ant A must be avoided in 5GHz because of an HW bug. */ - rxchain |= IWN_RXCHAIN_FORCE_SEL(IWN_ANT_BC); + rxchain |= IWN_RXCHAIN_FORCE_SEL(IWN_ANT_B); } else /* Use all available RX antennas. */ rxchain |= IWN_RXCHAIN_FORCE_SEL(sc->rxchainmask); hdr->rxchain = htole16(rxchain); @@ -5170,14 +5174,19 @@ iwn_scan(struct iwn_softc *sc) tx->id = sc->broadcast_id; tx->lifetime = htole32(IWN_LIFETIME_INFINITE); - if (IEEE80211_IS_CHAN_A(ic->ic_curchan)) { + if (IEEE80211_IS_CHAN_5GHZ(ic->ic_curchan)) { /* Send probe requests at 6Mbps. */ tx->rate = htole32(0xd); rs = &ic->ic_sup_rates[IEEE80211_MODE_11A]; } else { hdr->flags = htole32(IWN_RXON_24GHZ | IWN_RXON_AUTO); - /* Send probe requests at 1Mbps. */ - tx->rate = htole32(10 | IWN_RFLAG_CCK); + if (sc->hw_type == IWN_HW_REV_TYPE_4965 && + sc->rxon.associd && sc->rxon.chan > 14) + tx->rate = htole32(0xd); + else { + /* Send probe requests at 1Mbps. */ + tx->rate = htole32(10 | IWN_RFLAG_CCK); + } rs = &ic->ic_sup_rates[IEEE80211_MODE_11G]; } /* Use the first valid TX antenna. */ diff --git a/sys/dev/ixgbe/LICENSE b/sys/dev/ixgbe/LICENSE index 0cf44c8..0d4f1db 100644 --- a/sys/dev/ixgbe/LICENSE +++ b/sys/dev/ixgbe/LICENSE @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright (c) 2001-2010, Intel Corporation + Copyright (c) 2001-2011, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/sys/dev/ixgbe/README b/sys/dev/ixgbe/README index 0b27deb..5e4921f 100644 --- a/sys/dev/ixgbe/README +++ b/sys/dev/ixgbe/README @@ -1,8 +1,8 @@ -FreeBSD Driver for 10 Gigabit PCI Express Server Adapters -============================================= +FreeBSD Driver for Intel(R) Ethernet 10 Gigabit PCI Express Server Adapters +============================================================================ /*$FreeBSD$*/ -May 14, 2008 +November 12, 2010 Contents @@ -11,15 +11,15 @@ Contents - Overview - Supported Adapters - Building and Installation -- Additional Configurations +- Additional Configurations and Tuning - Known Limitations Overview ======== -This file describes the FreeBSD* driver for the 10 Gigabit PCIE Family of -Adapters. Drivers has been developed for use with FreeBSD 7 or later. +This file describes the FreeBSD* driver for the Intel(R) Ethernet 10 Gigabit +Family of Adapters. Driver has been developed for use with FreeBSD 7.2 or later. For questions related to hardware requirements, refer to the documentation supplied with your Intel 10GbE adapter. All hardware requirements listed @@ -29,100 +29,98 @@ apply to use with FreeBSD. Supported Adapters ================== -The following Intel network adapters are compatible with the drivers in this -release: - -Controller Adapter Name Physical Layer ----------- ------------ -------------- -82598EB Intel(R) 10 Gigabit XF SR/AF 10G Base -LR (850 nm optical fiber) - Dual Port Server Adapter 10G Base -SR (1310 nm optical fiber) -82598EB Intel(R) 10 Gigabit XF SR/LR - Server Adapter - Intel(R) 82598EB 10 Gigabit AF - Network Connection - Intel(R) 82598EB 10 Gigabit AT - CX4 Network Connection +The driver in this release is compatible with 82598 and 82599-based Intel +Network Connections. +SFP+ Devices with Pluggable Optics +---------------------------------- -Building and Installation -========================= +82599-BASED ADAPTERS -NOTE: You must have kernel sources installed in order to compile the driver - module. - - In the instructions below, x.x.x is the driver version as indicated in - the name of the driver tar. - -1. Move the base driver tar file to the directory of your choice. For - example, use /home/username/ixgbe or /usr/local/src/ixgbe. - -2. Untar/unzip the archive: - tar xfz ixgbe-x.x.x.tar.gz - -3. To install man page: - cd ixgbe-x.x.x - gzip -c ixgbe.4 > /usr/share/man/man4/ixgbee.4.gz - -4. To load the driver onto a running system: - cd ixgbe-x.x.x/src - make load - -5. To assign an IP address to the interface, enter the following: - ifconfig ix<interface_num> <IP_address> - -6. Verify that the interface works. Enter the following, where <IP_address> - is the IP address for another machine on the same subnet as the interface - that is being tested: - ping <IP_address> - -7. If you want the driver to load automatically when the system is booted: - - cd ixgbe-x.x.x/src - make - make install - - Edit /boot/loader.conf, and add the following line: - ixgbe_load="YES" - - OR - - compile the driver into the kernel (see item 8). - - - Edit /etc/rc.conf, and create the appropriate ifconfig_ixgbe<interface_num> - entry: - - ifconfig_ix<interface_num>="<ifconfig_settings>" - - Example usage: - - ifconfig_ix0="inet 192.168.10.1 netmask 255.255.255.0" - - NOTE: For assistance, see the ifconfig man page. - -8. If you want to compile the driver into the kernel, enter: - - FreeBSD 7 or later: - - cd ixgbe-x.x.x/src - - cp *.[ch] /usr/src/sys/dev/ixgbe - - cp Makefile.kernel /usr/src/sys/modules/ixgbe/Makefile - - Edit the kernel configuration file (i.e., GENERIC or MYKERNEL) in - /usr/src/sys/i386/conf (replace "i386" with the appropriate system - architecture if necessary), and ensure the following line is present: - - device ixgbe - - Compile and install the kernel. The system must be reboot for the kernel - updates to take affect. For additional information on compiling the kernel, - consult the FreeBSD operating system documentation. +NOTE: If your 82599-based Intel(R) Ethernet Network Adapter came with Intel +optics, or is an Intel(R) Ethernet Server Adapter X520-2, then it only supports +Intel optics and/or the direct attach cables listed below. +When 82599-based SFP+ devices are connected back to back, they should be set to +the same Speed setting via Ethtool. Results may vary if you mix speed settings. + +Supplier Type Part Numbers + +SR Modules +Intel DUAL RATE 1G/10G SFP+ SR (bailed) FTLX8571D3BCV-IT +Intel DUAL RATE 1G/10G SFP+ SR (bailed) AFBR-703SDZ-IN2 +Intel DUAL RATE 1G/10G SFP+ SR (bailed) AFBR-703SDDZ-IN1 +LR Modules +Intel DUAL RATE 1G/10G SFP+ LR (bailed) FTLX1471D3BCV-IT +Intel DUAL RATE 1G/10G SFP+ LR (bailed) AFCT-701SDZ-IN2 +Intel DUAL RATE 1G/10G SFP+ LR (bailed) AFCT-701SDDZ-IN1 + +The following is a list of 3rd party SFP+ modules and direct attach cables that +have received some testing. Not all modules are applicable to all devices. + +Supplier Type Part Numbers + +Finisar SFP+ SR bailed, 10g single rate FTLX8571D3BCL +Avago SFP+ SR bailed, 10g single rate AFBR-700SDZ +Finisar SFP+ LR bailed, 10g single rate FTLX8571D3BCV-IT + +Finisar DUAL RATE 1G/10G SFP+ SR (No Bail) FTLX8571D3QCV-IT +Avago DUAL RATE 1G/10G SFP+ SR (No Bail) AFBR-703SDZ-IN1 +Finisar DUAL RATE 1G/10G SFP+ LR (No Bail) FTLX1471D3QCV-IT +Avago DUAL RATE 1G/10G SFP+ LR (No Bail) AFCT-701SDZ-IN1 +Finistar 1000BASE-T SFP FCLF8522P2BTL +Avago 1000BASE-T SFP ABCU-5710RZ + +82599-based adapters support all passive and active limiting direct attach +cables that comply with SFF-8431 v4.1 and SFF-8472 v10.4 specifications. + +Laser turns off for SFP+ when ifconfig down +-------------------------------------------------------- +"ifconfig down" turns off the laser for 82599-based SFP+ fiber adapters. +"ifconfig up" turns on the later. + +82598-BASED ADAPTERS + +NOTES for 82598-Based Adapters: +- Intel(R) Ethernet Network Adapters that support removable optical modules + only support their original module type (i.e., the Intel(R) 10 Gigabit SR + Dual Port Express Module only supports SR optical modules). If you plug + in a different type of module, the driver will not load. +- Hot Swapping/hot plugging optical modules is not supported. +- Only single speed, 10 gigabit modules are supported. +- LAN on Motherboard (LOMs) may support DA, SR, or LR modules. Other module + types are not supported. Please see your system documentation for details. + +The following is a list of 3rd party SFP+ modules and direct attach cables that have +received some testing. Not all modules are applicable to all devices. + +Supplier Type Part Numbers + +Finisar SFP+ SR bailed, 10g single rate FTLX8571D3BCL +Avago SFP+ SR bailed, 10g single rate AFBR-700SDZ +Finisar SFP+ LR bailed, 10g single rate FTLX1471D3BCL + +82598-based adapters support all passive direct attach cables that comply +with SFF-8431 v4.1 and SFF-8472 v10.4 specifications. Active direct attach +cables are not supported. + +Third party optic modules and cables referred to above are listed only for the +purpose of highlighting third party specifications and potential compatibility, +and are not recommendations or endorsements or sponsorship of any third party's +product by Intel. Intel is not endorsing or promoting products made by any +third party and the third party reference is provided only to share information +regarding certain optic modules and cables with the above specifications. There +may be other manufacturers or suppliers, producing or supplying optic modules +and cables with similar or matching descriptions. Customers must use their own +discretion and diligence to purchase optic modules and cables from any third +party of their choice. Customer are solely responsible for assessing the +suitability of the product and/or devices and for the selection of the vendor +for purchasing any product. INTEL ASSUMES NO LIABILITY WHATSOEVER, AND INTEL +DISCLAIMS ANY EXPRESS OR IMPLIED WARRANTY, RELATING TO SALE AND/OR USE OF +SUCH THIRD PARTY PRODUCTS OR SELECTION OF VENDOR BY CUSTOMERS. Configuration and Tuning -========================= +======================== The driver supports Transmit/Receive Checksum Offload and Jumbo Frames on all 10 Gigabit adapters. @@ -143,7 +141,7 @@ all 10 Gigabit adapters. The Jumbo Frames MTU range for Intel Adapters is 1500 to 16114. The default MTU range is 1500. To modify the setting, enter the following: - ifconfig ix <interface_num> <hostname or IP address> mtu 9000 + ifconfig ix<interface_num> <hostname or IP address> mtu 9000 To confirm an interface's MTU value, use the ifconfig command. To confirm the MTU used between two specific devices, use: @@ -200,6 +198,8 @@ all 10 Gigabit adapters. TSO --- + TSO is enabled by default. + To disable: ifconfig <interface_num> -tso @@ -209,23 +209,21 @@ all 10 Gigabit adapters. ifconfig <interface_num> tso LRO - ___ + --- - Large Receive Offload is available in version 1.4.4, it is on - by default. It can be toggled off and on by using: - sysctl dev.ix.X.enable_lro=[0,1] + Large Receive Offload is available in the driver; it is on by default. + It can be disabled by using: + ifconfig <interface_num> -lro + To enable: + ifconfig <interface_num> lro - NOTE: when changing this feature you MUST be sure the interface - is reinitialized, it is easy to do this with ifconfig down/up. - The LRO code will ultimately move into the kernel stack code, - but for this first release it was included with the driver. Important system configuration changes: --------------------------------------- - When there is a choice run on a 64bit OS rather than 32, it makes - a significant difference in improvement. - + When there is a choice run on a 64bit OS rather than 32, it makes a + significant difference in improvement. + The default scheduler SCHED_4BSD is not smart about SMP locality issues. Significant improvement can be achieved by switching to the ULE scheduler. @@ -233,34 +231,79 @@ all 10 Gigabit adapters. SCHED_ULE. Note that this is only advisable on FreeBSD 7, on 6.X there have been stability problems with ULE. - Change the file /etc/sysctl.conf, add the line: + The interface can generate high number of interrupts. To avoid running + into the limit set by the kernel, adjust hw.intr_storm_threshold + setting using sysctl: - hw.intr_storm_threshold: 8000 (the default is 1000) + sysctl hw.intr_storm_threshold=9000 (the default is 1000) + + For this change to take effect on boot, edit /etc/sysctl.conf and add the + line: + hw.intr_storm_threshold=9000 + + If you still see Interrupt Storm detected messages, increase the limit to a + higher number. Best throughput results are seen with a large MTU; use 9000 if possible. - The default number of descriptors is 256, increasing this to 1024 or even - 2048 may improve performance. + The default number of descriptors is 1024, increasing this to 2K or even + 4K may improve performance in some workloads, but change carefully. Known Limitations ================= + +For known hardware and troubleshooting issues, refer to the following website. + + http://support.intel.com/support/go/network/adapter/home.htm + +Either select the link for your adapter or perform a search for the adapter +number. The adapter's page lists many issues. For a complete list of hardware +issues download your adapter's user guide and read the Release Notes. + + UDP stress test with 10GbE driver + --------------------------------- Under small packets UDP stress test with 10GbE driver, the FreeBSD system will drop UDP packets due to the fullness of socket buffers. You may want to change the driver's Flow Control variables to the minimum value for controlling packet reception. + Attempting to configure larger MTUs with a large numbers of processors may + generate the error message "ix0:could not setup receive structures" + -------------------------------------------------------------------------- + When using the ixgbe driver with RSS autoconfigured based on the number of + cores (the default setting) and that number is larger than 4, increase the + memory resources allocated for the mbuf pool as follows: + + Add to the sysctl.conf file for the system: + + kern.ipc.nmbclusters=262144 + kern.ipc.nmbjumbop=262144 + + Lower than expected performance on dual port 10GbE devices + ---------------------------------------------------------- + Some PCI-E x8 slots are actually configured as x4 slots. These slots have + insufficient bandwidth for full 10Gbe line rate with dual port 10GbE devices. + The driver can detect this situation and will write the following message in + the system log: "PCI-Express bandwidth available for this card is not + sufficient for optimal performance. For optimal performance a x8 PCI-Express + slot is required." + + If this error occurs, moving your adapter to a true x8 slot will resolve the + issue. + + Support ======= For general information and support, go to the Intel support website at: - http://support.intel.com + www.intel.com/support/ If an issue is identified with the released source code on the supported kernel with a supported adapter, email the specific information related to -the issue to freebsd@intel.com. +the issue to freebsd@intel.com diff --git a/sys/dev/ixgbe/ixgbe.c b/sys/dev/ixgbe/ixgbe.c index f58adbe..a382a53 100644 --- a/sys/dev/ixgbe/ixgbe.c +++ b/sys/dev/ixgbe/ixgbe.c @@ -34,6 +34,7 @@ #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_inet.h" +#include "opt_inet6.h" #endif #include "ixgbe.h" @@ -46,7 +47,7 @@ int ixgbe_display_debug_stats = 0; /********************************************************************* * Driver version *********************************************************************/ -char ixgbe_driver_version[] = "2.3.10"; +char ixgbe_driver_version[] = "2.3.11"; /********************************************************************* * PCI Device ID Table @@ -318,7 +319,7 @@ static int fdir_pballoc = 1; * ixgbe_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * - * return 0 on success, positive on failure + * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int @@ -357,7 +358,7 @@ ixgbe_probe(device_t dev) ixgbe_driver_version); device_set_desc_copy(dev, adapter_name); ++ixgbe_total_ports; - return (0); + return (BUS_PROBE_DEFAULT); } ent++; } @@ -385,6 +386,11 @@ ixgbe_attach(device_t dev) INIT_DEBUGOUT("ixgbe_attach: begin"); + if (resource_disabled("ixgbe", device_get_unit(dev))) { + device_printf(dev, "Disabled by device hint\n"); + return (ENXIO); + } + /* Allocate, clear, and link in our adapter structure */ adapter = device_get_softc(dev); adapter->dev = adapter->osdep.dev = dev; @@ -862,8 +868,9 @@ ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data) { struct adapter *adapter = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; -#ifdef INET +#if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; + bool avoid_reset = FALSE; #endif int error = 0; @@ -871,26 +878,28 @@ ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data) case SIOCSIFADDR: #ifdef INET - if (ifa->ifa_addr->sa_family == AF_INET) { - /* - * Since resetting hardware takes a very long time - * and results in link renegotiation we only - * initialize the hardware only when it is absolutely - * required. - */ + if (ifa->ifa_addr->sa_family == AF_INET) + avoid_reset = TRUE; +#endif +#ifdef INET6 + if (ifa->ifa_addr->sa_family == AF_INET6) + avoid_reset = TRUE; +#endif +#if defined(INET) || defined(INET6) + /* + ** Calling init results in link renegotiation, + ** so we avoid doing it when possible. + */ + if (avoid_reset) { ifp->if_flags |= IFF_UP; - if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { - IXGBE_CORE_LOCK(adapter); - ixgbe_init_locked(adapter); - IXGBE_CORE_UNLOCK(adapter); - } + if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) + ixgbe_init(adapter); if (!(ifp->if_flags & IFF_NOARP)) arp_ifinit(ifp, ifa); } else -#endif error = ether_ioctl(ifp, command, data); break; - +#endif case SIOCSIFMTU: IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) { @@ -951,6 +960,8 @@ ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; + if (mask & IFCAP_VLAN_HWTSO) + ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXGBE_CORE_LOCK(adapter); ixgbe_init_locked(adapter); @@ -1338,7 +1349,7 @@ ixgbe_legacy_irq(void *arg) /********************************************************************* * - * MSI Queue Interrupt Service routine + * MSIX Queue Interrupt Service routine * **********************************************************************/ void @@ -1357,6 +1368,17 @@ ixgbe_msix_que(void *arg) IXGBE_TX_LOCK(txr); more_tx = ixgbe_txeof(txr); + /* + ** Make certain that if the stack + ** has anything queued the task gets + ** scheduled to handle it. + */ +#if __FreeBSD_version < 800000 + if (!IFQ_DRV_IS_EMPTY(&adapter->ifp->if_snd)) +#else + if (!drbr_empty(adapter->ifp, txr->br)) +#endif + more_tx = 1; IXGBE_TX_UNLOCK(txr); /* Do AIM now? */ @@ -1570,7 +1592,7 @@ ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp) struct mbuf *m_head; bus_dma_segment_t segs[adapter->num_segs]; bus_dmamap_t map; - struct ixgbe_tx_buf *txbuf, *txbuf_mapped; + struct ixgbe_tx_buf *txbuf; union ixgbe_adv_tx_desc *txd = NULL; m_head = *m_headp; @@ -1589,7 +1611,6 @@ ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp) */ first = txr->next_avail_desc; txbuf = &txr->tx_buffers[first]; - txbuf_mapped = txbuf; map = txbuf->map; /* @@ -1708,6 +1729,8 @@ ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp) txr->next_avail_desc = i; txbuf->m_head = m_head; + /* Swap the dma map between the first and last descriptor */ + txr->tx_buffers[first].map = txbuf->map; txbuf->map = map; bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); @@ -2265,7 +2288,9 @@ ixgbe_setup_msix(struct adapter *adapter) msi: msgs = pci_msi_count(dev); if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0) - device_printf(adapter->dev,"Using MSI interrupt\n"); + device_printf(adapter->dev,"Using an MSI interrupt\n"); + else + device_printf(adapter->dev,"Using a Legacy interrupt\n"); return (msgs); } @@ -2412,19 +2437,21 @@ ixgbe_setup_interface(device_t dev, struct adapter *adapter) ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO4 | IFCAP_VLAN_HWCSUM; - ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; ifp->if_capabilities |= IFCAP_JUMBO_MTU; + ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING + | IFCAP_VLAN_HWTSO + | IFCAP_VLAN_MTU; ifp->if_capenable = ifp->if_capabilities; /* Don't enable LRO by default */ ifp->if_capabilities |= IFCAP_LRO; /* - ** Dont turn this on by default, if vlans are + ** Don't turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If - ** using vlans directly on the em driver you can + ** using vlans directly on the ixgbe driver you can ** enable this and get full hardware tag filtering. */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; @@ -5333,7 +5360,7 @@ ixgbe_add_rx_process_limit(struct adapter *adapter, const char *name, static int ixgbe_set_advertise(SYSCTL_HANDLER_ARGS) { - int error; + int error = 0; struct adapter *adapter; struct ixgbe_hw *hw; ixgbe_link_speed speed, last; diff --git a/sys/dev/ixgbe/ixv.c b/sys/dev/ixgbe/ixv.c index f9f910a..e52a527 100644 --- a/sys/dev/ixgbe/ixv.c +++ b/sys/dev/ixgbe/ixv.c @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright (c) 2001-2010, Intel Corporation + Copyright (c) 2001-2011, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,7 +33,8 @@ /*$FreeBSD$*/ #ifdef HAVE_KERNEL_OPTION_HEADERS -#include "opt_device_polling.h" +#include "opt_inet.h" +#include "opt_inet6.h" #endif #include "ixv.h" @@ -41,7 +42,7 @@ /********************************************************************* * Driver version *********************************************************************/ -char ixv_driver_version[] = "1.0.0"; +char ixv_driver_version[] = "1.0.1"; /********************************************************************* * PCI Device ID Table @@ -234,7 +235,7 @@ static u32 ixv_shadow_vfta[VFTA_SIZE]; * ixv_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * - * return 0 on success, positive on failure + * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int @@ -271,7 +272,7 @@ ixv_probe(device_t dev) ixv_strings[ent->index], ixv_driver_version); device_set_desc_copy(dev, adapter_name); - return (0); + return (BUS_PROBE_DEFAULT); } ent++; } @@ -297,6 +298,11 @@ ixv_attach(device_t dev) INIT_DEBUGOUT("ixv_attach: begin"); + if (resource_disabled("ixgbe", device_get_unit(dev))) { + device_printf(dev, "Disabled by device hint\n"); + return (ENXIO); + } + /* Allocate, clear, and link in our adapter structure */ adapter = device_get_softc(dev); adapter->dev = adapter->osdep.dev = dev; @@ -690,10 +696,38 @@ ixv_ioctl(struct ifnet * ifp, u_long command, caddr_t data) { struct adapter *adapter = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; +#if defined(INET) || defined(INET6) + struct ifaddr *ifa = (struct ifaddr *) data; + bool avoid_reset = FALSE; +#endif int error = 0; switch (command) { + case SIOCSIFADDR: +#ifdef INET + if (ifa->ifa_addr->sa_family == AF_INET) + avoid_reset = TRUE; +#endif +#ifdef INET6 + if (ifa->ifa_addr->sa_family == AF_INET6) + avoid_reset = TRUE; +#endif +#if defined(INET) || defined(INET6) + /* + ** Calling init results in link renegotiation, + ** so we avoid doing it when possible. + */ + if (avoid_reset) { + ifp->if_flags |= IFF_UP; + if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) + ixv_init(adapter); + if (!(ifp->if_flags & IFF_NOARP)) + arp_ifinit(ifp, ifa); + } else + error = ether_ioctl(ifp, command, data); + break; +#endif case SIOCSIFMTU: IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); if (ifr->ifr_mtu > IXV_MAX_FRAME_SIZE - ETHER_HDR_LEN) { @@ -1161,7 +1195,7 @@ ixv_xmit(struct tx_ring *txr, struct mbuf **m_headp) struct mbuf *m_head; bus_dma_segment_t segs[32]; bus_dmamap_t map; - struct ixv_tx_buf *txbuf, *txbuf_mapped; + struct ixv_tx_buf *txbuf; union ixgbe_adv_tx_desc *txd = NULL; m_head = *m_headp; @@ -1180,7 +1214,6 @@ ixv_xmit(struct tx_ring *txr, struct mbuf **m_headp) */ first = txr->next_avail_desc; txbuf = &txr->tx_buffers[first]; - txbuf_mapped = txbuf; map = txbuf->map; /* @@ -1283,6 +1316,7 @@ ixv_xmit(struct tx_ring *txr, struct mbuf **m_headp) txr->next_avail_desc = i; txbuf->m_head = m_head; + txr->tx_buffers[first].map = txbuf->map; txbuf->map = map; bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); @@ -1820,11 +1854,15 @@ ixv_setup_interface(device_t dev, struct adapter *adapter) ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO4 | IFCAP_VLAN_HWCSUM; - ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; - ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_LRO; - + ifp->if_capabilities |= IFCAP_JUMBO_MTU; + ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING + | IFCAP_VLAN_HWTSO + | IFCAP_VLAN_MTU; ifp->if_capenable = ifp->if_capabilities; + /* Don't enable LRO by default */ + ifp->if_capabilities |= IFCAP_LRO; + /* * Specify the media types supported by this adapter and register * callbacks to update media and link information diff --git a/sys/dev/mfi/mfi.c b/sys/dev/mfi/mfi.c index eb18ffe..1962648 100644 --- a/sys/dev/mfi/mfi.c +++ b/sys/dev/mfi/mfi.c @@ -788,7 +788,7 @@ mfi_aen_setup(struct mfi_softc *sc, uint32_t seq_start) class_locale.members.reserved = 0; class_locale.members.locale = mfi_event_locale; - class_locale.members.class = mfi_event_class; + class_locale.members.evt_class = mfi_event_class; if (seq_start == 0) { error = mfi_get_log_state(sc, &log_state); @@ -1082,8 +1082,8 @@ mfi_decode_evt(struct mfi_softc *sc, struct mfi_evt_detail *detail) { device_printf(sc->mfi_dev, "%d (%s/0x%04x/%s) - %s\n", detail->seq, - format_timestamp(detail->time), detail->class.members.locale, - format_class(detail->class.members.class), detail->description); + format_timestamp(detail->time), detail->evt_class.members.locale, + format_class(detail->evt_class.members.evt_class), detail->description); } static int @@ -1099,16 +1099,16 @@ mfi_aen_register(struct mfi_softc *sc, int seq, int locale) if (sc->mfi_aen_cm != NULL) { prior_aen.word = ((uint32_t *)&sc->mfi_aen_cm->cm_frame->dcmd.mbox)[1]; - if (prior_aen.members.class <= current_aen.members.class && + if (prior_aen.members.evt_class <= current_aen.members.evt_class && !((prior_aen.members.locale & current_aen.members.locale) ^current_aen.members.locale)) { return (0); } else { prior_aen.members.locale |= current_aen.members.locale; - if (prior_aen.members.class - < current_aen.members.class) - current_aen.members.class = - prior_aen.members.class; + if (prior_aen.members.evt_class + < current_aen.members.evt_class) + current_aen.members.evt_class = + prior_aen.members.evt_class; mfi_abort(sc, sc->mfi_aen_cm); } } @@ -1199,7 +1199,7 @@ mfi_parse_entries(struct mfi_softc *sc, int start_seq, int stop_seq) class_locale.members.reserved = 0; class_locale.members.locale = mfi_event_locale; - class_locale.members.class = mfi_event_class; + class_locale.members.evt_class = mfi_event_class; size = sizeof(struct mfi_evt_list) + sizeof(struct mfi_evt_detail) * (MAX_EVENTS - 1); diff --git a/sys/dev/mfi/mfireg.h b/sys/dev/mfi/mfireg.h index e08a16d..efee827 100644 --- a/sys/dev/mfi/mfireg.h +++ b/sys/dev/mfi/mfireg.h @@ -719,7 +719,7 @@ union mfi_evt { struct { uint16_t locale; uint8_t reserved; - int8_t class; + int8_t evt_class; } members; uint32_t word; } __packed; @@ -755,7 +755,7 @@ struct mfi_evt_detail { uint32_t seq; uint32_t time; uint32_t code; - union mfi_evt class; + union mfi_evt evt_class; uint8_t arg_type; uint8_t reserved1[15]; diff --git a/sys/dev/mmc/mmc.c b/sys/dev/mmc/mmc.c index 45ddd56..6ff47c3 100644 --- a/sys/dev/mmc/mmc.c +++ b/sys/dev/mmc/mmc.c @@ -1445,37 +1445,37 @@ mmc_read_ivar(device_t bus, device_t child, int which, uintptr_t *result) default: return (EINVAL); case MMC_IVAR_DSR_IMP: - *(int *)result = ivar->csd.dsr_imp; + *result = ivar->csd.dsr_imp; break; case MMC_IVAR_MEDIA_SIZE: - *(off_t *)result = ivar->sec_count; + *result = ivar->sec_count; break; case MMC_IVAR_RCA: - *(int *)result = ivar->rca; + *result = ivar->rca; break; case MMC_IVAR_SECTOR_SIZE: - *(int *)result = MMC_SECTOR_SIZE; + *result = MMC_SECTOR_SIZE; break; case MMC_IVAR_TRAN_SPEED: - *(int *)result = mmcbr_get_clock(bus); + *result = mmcbr_get_clock(bus); break; case MMC_IVAR_READ_ONLY: - *(int *)result = ivar->read_only; + *result = ivar->read_only; break; case MMC_IVAR_HIGH_CAP: - *(int *)result = ivar->high_cap; + *result = ivar->high_cap; break; case MMC_IVAR_CARD_TYPE: - *(int *)result = ivar->mode; + *result = ivar->mode; break; case MMC_IVAR_BUS_WIDTH: - *(int *)result = ivar->bus_width; + *result = ivar->bus_width; break; case MMC_IVAR_ERASE_SECTOR: - *(int *)result = ivar->erase_sector; + *result = ivar->erase_sector; break; case MMC_IVAR_MAX_DATA: - *(int *)result = mmcbr_get_max_data(bus); + *result = mmcbr_get_max_data(bus); break; } return (0); diff --git a/sys/dev/mmc/mmcvar.h b/sys/dev/mmc/mmcvar.h index 9126439..a28d3ac 100644 --- a/sys/dev/mmc/mmcvar.h +++ b/sys/dev/mmc/mmcvar.h @@ -79,7 +79,7 @@ enum mmc_device_ivars { __BUS_ACCESSOR(mmc, var, MMC, ivar, type) MMC_ACCESSOR(dsr_imp, DSR_IMP, int) -MMC_ACCESSOR(media_size, MEDIA_SIZE, off_t) +MMC_ACCESSOR(media_size, MEDIA_SIZE, long) MMC_ACCESSOR(rca, RCA, int) MMC_ACCESSOR(sector_size, SECTOR_SIZE, int) MMC_ACCESSOR(tran_speed, TRAN_SPEED, int) diff --git a/sys/dev/msk/if_msk.c b/sys/dev/msk/if_msk.c index 2adbf1c..be4ac5d 100644 --- a/sys/dev/msk/if_msk.c +++ b/sys/dev/msk/if_msk.c @@ -566,7 +566,7 @@ msk_miibus_statchg(device_t dev) msk_phy_writereg(sc_if, PHY_ADDR_MARV, PHY_MARV_INT_MASK, 0); /* Disable Rx/Tx MAC. */ gmac = GMAC_READ_2(sc, sc_if->msk_port, GM_GP_CTRL); - if ((GM_GPCR_RX_ENA | GM_GPCR_TX_ENA) != 0) { + if ((gmac & (GM_GPCR_RX_ENA | GM_GPCR_TX_ENA)) != 0) { gmac &= ~(GM_GPCR_RX_ENA | GM_GPCR_TX_ENA); GMAC_WRITE_2(sc, sc_if->msk_port, GM_GP_CTRL, gmac); /* Read again to ensure writing. */ @@ -1018,7 +1018,7 @@ msk_ioctl(struct ifnet *ifp, u_long command, caddr_t data) if (ifr->ifr_mtu > MSK_JUMBO_MTU || ifr->ifr_mtu < ETHERMIN) error = EINVAL; else if (ifp->if_mtu != ifr->ifr_mtu) { - if (ifr->ifr_mtu > ETHERMTU) { + if (ifr->ifr_mtu > ETHERMTU) { if ((sc_if->msk_flags & MSK_FLAG_JUMBO) == 0) { error = EINVAL; MSK_IF_UNLOCK(sc_if); @@ -1636,7 +1636,7 @@ msk_attach(device_t dev) * this workaround does not work so disable checksum offload * for VLAN interface. */ - ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO; + ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO; /* * Enable Rx checksum offloading for VLAN tagged frames * if controller support new descriptor format. @@ -1921,7 +1921,8 @@ mskc_attach(device_t dev) error = ENXIO; goto fail; } - mmd = malloc(sizeof(struct msk_mii_data), M_DEVBUF, M_WAITOK | M_ZERO); + mmd = malloc(sizeof(struct msk_mii_data), M_DEVBUF, M_WAITOK | + M_ZERO); if (mmd == NULL) { device_printf(dev, "failed to allocate memory for " "ivars of PORT_B\n"); @@ -1930,9 +1931,9 @@ mskc_attach(device_t dev) } mmd->port = MSK_PORT_B; mmd->pmd = sc->msk_pmd; - if (sc->msk_pmd == 'L' || sc->msk_pmd == 'S') + if (sc->msk_pmd == 'L' || sc->msk_pmd == 'S') mmd->mii_flags |= MIIF_HAVEFIBER; - if (sc->msk_pmd == 'P') + if (sc->msk_pmd == 'P') mmd->mii_flags |= MIIF_HAVEFIBER | MIIF_MACPRIV0; device_set_ivars(sc->msk_devs[MSK_PORT_B], mmd); } @@ -3741,10 +3742,10 @@ msk_init_locked(struct msk_if_softc *sc_if) ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TXCSUM); } - /* GMAC Control reset. */ - CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), GMC_RST_SET); - CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), GMC_RST_CLR); - CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), GMC_F_LOOPB_OFF); + /* GMAC Control reset. */ + CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), GMC_RST_SET); + CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), GMC_RST_CLR); + CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), GMC_F_LOOPB_OFF); if (sc->msk_hw_id == CHIP_ID_YUKON_EX || sc->msk_hw_id == CHIP_ID_YUKON_SUPR) CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), @@ -3854,13 +3855,13 @@ msk_init_locked(struct msk_if_softc *sc_if) msk_set_tx_stfwd(sc_if); } - if (sc->msk_hw_id == CHIP_ID_YUKON_FE_P && - sc->msk_hw_rev == CHIP_REV_YU_FE_P_A0) { - /* Disable dynamic watermark - from Linux. */ - reg = CSR_READ_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_EA)); - reg &= ~0x03; - CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_EA), reg); - } + if (sc->msk_hw_id == CHIP_ID_YUKON_FE_P && + sc->msk_hw_rev == CHIP_REV_YU_FE_P_A0) { + /* Disable dynamic watermark - from Linux. */ + reg = CSR_READ_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_EA)); + reg &= ~0x03; + CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_EA), reg); + } /* * Disable Force Sync bit and Alloc bit in Tx RAM interface diff --git a/sys/dev/mvs/mvs.c b/sys/dev/mvs/mvs.c index 5dbe30c..54808c5 100644 --- a/sys/dev/mvs/mvs.c +++ b/sys/dev/mvs/mvs.c @@ -1738,13 +1738,6 @@ mvs_end_transaction(struct mvs_slot *slot, enum mvs_err_type et) ch->numhslots++; } else xpt_done(ccb); - /* Unfreeze frozen command. */ - if (ch->frozen && !mvs_check_collision(dev, ch->frozen)) { - union ccb *fccb = ch->frozen; - ch->frozen = NULL; - mvs_begin_transaction(dev, fccb); - xpt_release_simq(ch->sim, TRUE); - } /* If we have no other active commands, ... */ if (ch->rslots == 0) { /* if there was fatal error - reset port. */ @@ -1764,6 +1757,13 @@ mvs_end_transaction(struct mvs_slot *slot, enum mvs_err_type et) } else if ((ch->rslots & ~ch->toslots) == 0 && et != MVS_ERR_TIMEOUT) mvs_rearm_timeout(dev); + /* Unfreeze frozen command. */ + if (ch->frozen && !mvs_check_collision(dev, ch->frozen)) { + union ccb *fccb = ch->frozen; + ch->frozen = NULL; + mvs_begin_transaction(dev, fccb); + xpt_release_simq(ch->sim, TRUE); + } /* Start PM timer. */ if (ch->numrslots == 0 && ch->pm_level > 3 && (ch->curr[ch->pm_present ? 15 : 0].caps & CTS_SATA_CAPS_D_PMREQ)) { @@ -2080,7 +2080,8 @@ mvs_softreset(device_t dev, union ccb *ccb) { struct mvs_channel *ch = device_get_softc(dev); int port = ccb->ccb_h.target_id & 0x0f; - int i; + int i, stuck; + uint8_t status; mvs_set_edma_mode(dev, MVS_EDMA_OFF); ATA_OUTB(ch->r_mem, SATA_SATAICTL, port << SATA_SATAICTL_PMPTX_SHIFT); @@ -2089,12 +2090,35 @@ mvs_softreset(device_t dev, union ccb *ccb) ATA_OUTB(ch->r_mem, ATA_CONTROL, 0); ccb->ccb_h.status &= ~CAM_STATUS_MASK; /* Wait for clearing busy status. */ - if ((i = mvs_wait(dev, 0, ATA_S_BUSY | ATA_S_DRQ, ccb->ccb_h.timeout)) < 0) { + if ((i = mvs_wait(dev, 0, ATA_S_BUSY, ccb->ccb_h.timeout)) < 0) { ccb->ccb_h.status |= CAM_CMD_TIMEOUT; + stuck = 1; } else { - ccb->ccb_h.status |= CAM_REQ_CMP; + status = mvs_getstatus(dev, 0); + if (status & ATA_S_ERROR) + ccb->ccb_h.status |= CAM_ATA_STATUS_ERROR; + else + ccb->ccb_h.status |= CAM_REQ_CMP; + if (status & ATA_S_DRQ) + stuck = 1; + else + stuck = 0; } mvs_tfd_read(dev, ccb); + + /* + * XXX: If some device on PMP failed to soft-reset, + * try to recover by sending dummy soft-reset to PMP. + */ + if (stuck && ch->pm_present && port != 15) { + ATA_OUTB(ch->r_mem, SATA_SATAICTL, + 15 << SATA_SATAICTL_PMPTX_SHIFT); + ATA_OUTB(ch->r_mem, ATA_CONTROL, ATA_A_RESET); + DELAY(10000); + ATA_OUTB(ch->r_mem, ATA_CONTROL, 0); + mvs_wait(dev, 0, ATA_S_BUSY | ATA_S_DRQ, ccb->ccb_h.timeout); + } + xpt_done(ccb); } diff --git a/sys/dev/nfe/if_nfe.c b/sys/dev/nfe/if_nfe.c index 6cdfa34..28a3c01 100644 --- a/sys/dev/nfe/if_nfe.c +++ b/sys/dev/nfe/if_nfe.c @@ -1889,7 +1889,7 @@ nfe_int_task(void *arg, int pending) if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { NFE_UNLOCK(sc); - nfe_enable_intr(sc); + nfe_disable_intr(sc); return; } diff --git a/sys/dev/pccard/pccard.c b/sys/dev/pccard/pccard.c index 00cd1dc..1de571c 100644 --- a/sys/dev/pccard/pccard.c +++ b/sys/dev/pccard/pccard.c @@ -1405,8 +1405,8 @@ pccard_ccr_read_impl(device_t brdev, device_t child, uint32_t offset, struct pccard_ivar *devi = PCCARD_IVAR(child); *val = pccard_ccr_read(devi->pf, offset); - device_printf(child, "ccr_read of %#x (%#x) is %#x\n", offset, - devi->pf->pf_ccr_offset, *val); + DEVPRINTF((child, "ccr_read of %#x (%#x) is %#x\n", offset, + devi->pf->pf_ccr_offset, *val)); return 0; } @@ -1421,8 +1421,8 @@ pccard_ccr_write_impl(device_t brdev, device_t child, uint32_t offset, * Can't use pccard_ccr_write since client drivers may access * registers not contained in the 'mask' if they are non-standard. */ - device_printf(child, "ccr_write of %#x to %#x (%#x)\n", val, offset, - devi->pf->pf_ccr_offset); + DEVPRINTF((child, "ccr_write of %#x to %#x (%#x)\n", val, offset, + devi->pf->pf_ccr_offset)); bus_space_write_1(pf->pf_ccrt, pf->pf_ccrh, pf->pf_ccr_offset + offset, val); return 0; diff --git a/sys/dev/pccbb/pccbb.c b/sys/dev/pccbb/pccbb.c index bbb9eae..3c60f37 100644 --- a/sys/dev/pccbb/pccbb.c +++ b/sys/dev/pccbb/pccbb.c @@ -800,24 +800,36 @@ cbb_power(device_t brdev, int volts) * We have a shortish timeout of 500ms here. Some bridges do * not generate a POWER_CYCLE event for 16-bit cards. In * those cases, we have to cope the best we can, and having - * only a short delay is better than the alternatives. + * only a short delay is better than the alternatives. Others + * raise the power cycle a smidge before it is really ready. + * We deal with those below. */ sane = 10; while (!(cbb_get(sc, CBB_SOCKET_STATE) & CBB_STATE_POWER_CYCLE) && cnt == sc->powerintr && sane-- > 0) msleep(&sc->powerintr, &sc->mtx, 0, "-", hz / 20); mtx_unlock(&sc->mtx); + + /* + * Relax for 100ms. Some bridges appear to assert this signal + * right away, but before the card has stabilized. Other + * cards need need more time to cope up reliabily. + * Experiments with troublesome setups show this to be a + * "cheap" way to enhance reliabilty. We need not do this for + * "off" since we don't touch the card after we turn it off. + */ + pause("cbbPwr", min(hz / 10, 1)); + /* * The TOPIC95B requires a little bit extra time to get its * act together, so delay for an additional 100ms. Also as * documented below, it doesn't seem to set the POWER_CYCLE * bit, so don't whine if it never came on. */ - if (sc->chipset == CB_TOPIC95) { + if (sc->chipset == CB_TOPIC95) pause("cbb95B", hz / 10); - } else if (sane <= 0) { + else if (sane <= 0) device_printf(sc->dev, "power timeout, doom?\n"); - } } /* diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c index 22046c1..9cd5a1c 100644 --- a/sys/dev/pci/pci.c +++ b/sys/dev/pci/pci.c @@ -2576,6 +2576,17 @@ pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl, uint16_t cmd; struct resource *res; + /* + * The BAR may already exist if the device is a CardBus card + * whose CIS is stored in this BAR. + */ + pm = pci_find_bar(dev, reg); + if (pm != NULL) { + maprange = pci_maprange(pm->pm_value); + barlen = maprange == 64 ? 2 : 1; + return (barlen); + } + pci_read_bar(dev, reg, &map, &testval); if (PCI_BAR_MEM(map)) { type = SYS_RES_MEMORY; diff --git a/sys/dev/pci/pci_pci.c b/sys/dev/pci/pci_pci.c index f68973b..da8465c 100644 --- a/sys/dev/pci/pci_pci.c +++ b/sys/dev/pci/pci_pci.c @@ -916,7 +916,8 @@ pcib_grow_window(struct pcib_softc *sc, struct pcib_window *w, int type, /* Move end_free down until it is properly aligned. */ end_free &= ~(align - 1); - front = end_free - count; + end_free--; + front = end_free - (count - 1); /* * The resource would now be allocated at (front, @@ -944,7 +945,7 @@ pcib_grow_window(struct pcib_softc *sc, struct pcib_window *w, int type, /* Move start_free up until it is properly aligned. */ start_free = roundup2(start_free, align); - back = start_free + count; + back = start_free + count - 1; /* * The resource would now be allocated at (start_free, @@ -957,7 +958,7 @@ pcib_grow_window(struct pcib_softc *sc, struct pcib_window *w, int type, if (bootverbose) printf("\tback candidate range: %#lx-%#lx\n", start_free, back); - back = roundup2(back, w->step) - 1; + back = roundup2(back + 1, w->step) - 1; back -= rman_get_end(w->res); } else back = 0; diff --git a/sys/dev/puc/puc.c b/sys/dev/puc/puc.c index b6fa3c5..9bb3ceb 100644 --- a/sys/dev/puc/puc.c +++ b/sys/dev/puc/puc.c @@ -726,3 +726,41 @@ puc_bus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result) } return (0); } + +int +puc_bus_print_child(device_t dev, device_t child) +{ + struct puc_port *port; + int retval; + + port = device_get_ivars(child); + retval = 0; + + retval += bus_print_child_header(dev, child); + retval += printf(" at port %d", port->p_nr); + retval += bus_print_child_footer(dev, child); + + return (retval); +} + +int +puc_bus_child_location_str(device_t dev, device_t child, char *buf, + size_t buflen) +{ + struct puc_port *port; + + port = device_get_ivars(child); + snprintf(buf, buflen, "port=%d", port->p_nr); + return (0); +} + +int +puc_bus_child_pnpinfo_str(device_t dev, device_t child, char *buf, + size_t buflen) +{ + struct puc_port *port; + + port = device_get_ivars(child); + snprintf(buf, buflen, "type=%d", port->p_type); + return (0); +} diff --git a/sys/dev/puc/puc_bfe.h b/sys/dev/puc/puc_bfe.h index f6d69c4..c67fab5 100644 --- a/sys/dev/puc/puc_bfe.h +++ b/sys/dev/puc/puc_bfe.h @@ -82,9 +82,12 @@ int puc_bfe_attach(device_t); int puc_bfe_detach(device_t); int puc_bfe_probe(device_t, const struct puc_cfg *); +int puc_bus_child_location_str(device_t, device_t, char *, size_t); +int puc_bus_child_pnpinfo_str(device_t, device_t, char *, size_t); struct resource *puc_bus_alloc_resource(device_t, device_t, int, int *, u_long, u_long, u_long, u_int); int puc_bus_get_resource(device_t, device_t, int, int, u_long *, u_long *); +int puc_bus_print_child(device_t, device_t); int puc_bus_read_ivar(device_t, device_t, int, uintptr_t *); int puc_bus_release_resource(device_t, device_t, int, int, struct resource *); int puc_bus_setup_intr(device_t, device_t, struct resource *, int, diff --git a/sys/dev/puc/puc_pccard.c b/sys/dev/puc/puc_pccard.c index 2cb9513..63d5787 100644 --- a/sys/dev/puc/puc_pccard.c +++ b/sys/dev/puc/puc_pccard.c @@ -82,7 +82,9 @@ static device_method_t puc_pccard_methods[] = { DEVMETHOD(bus_read_ivar, puc_bus_read_ivar), DEVMETHOD(bus_setup_intr, puc_bus_setup_intr), DEVMETHOD(bus_teardown_intr, puc_bus_teardown_intr), - DEVMETHOD(bus_print_child, bus_generic_print_child), + DEVMETHOD(bus_print_child, puc_bus_print_child), + DEVMETHOD(bus_child_pnpinfo_str, puc_bus_child_pnpinfo_str), + DEVMETHOD(bus_child_location_str, puc_bus_child_location_str), DEVMETHOD(bus_driver_added, bus_generic_driver_added), { 0, 0 } }; diff --git a/sys/dev/puc/puc_pci.c b/sys/dev/puc/puc_pci.c index 9a05b66..8c14717 100644 --- a/sys/dev/puc/puc_pci.c +++ b/sys/dev/puc/puc_pci.c @@ -132,7 +132,9 @@ static device_method_t puc_pci_methods[] = { DEVMETHOD(bus_read_ivar, puc_bus_read_ivar), DEVMETHOD(bus_setup_intr, puc_bus_setup_intr), DEVMETHOD(bus_teardown_intr, puc_bus_teardown_intr), - DEVMETHOD(bus_print_child, bus_generic_print_child), + DEVMETHOD(bus_print_child, puc_bus_print_child), + DEVMETHOD(bus_child_pnpinfo_str, puc_bus_child_pnpinfo_str), + DEVMETHOD(bus_child_location_str, puc_bus_child_location_str), DEVMETHOD(bus_driver_added, bus_generic_driver_added), { 0, 0 } }; diff --git a/sys/dev/puc/pucdata.c b/sys/dev/puc/pucdata.c index 83b02ea..2b38d9b 100644 --- a/sys/dev/puc/pucdata.c +++ b/sys/dev/puc/pucdata.c @@ -48,15 +48,15 @@ __FBSDID("$FreeBSD$"); #include <dev/puc/puc_bfe.h> static puc_config_f puc_config_amc; -static puc_config_f puc_config_cronyx; static puc_config_f puc_config_diva; +static puc_config_f puc_config_exar; static puc_config_f puc_config_icbook; +static puc_config_f puc_config_oxford_pcie; static puc_config_f puc_config_quatech; static puc_config_f puc_config_syba; static puc_config_f puc_config_siig; static puc_config_f puc_config_timedia; static puc_config_f puc_config_titan; -static puc_config_f puc_config_oxford_pcie; const struct puc_cfg puc_pci_devices[] = { @@ -548,11 +548,25 @@ const struct puc_cfg puc_pci_devices[] = { PUC_PORT_8S, 0x18, 0, 8, }, + { 0x13a8, 0x0152, 0xffff, 0, + "Exar XR17C/D152", + DEFAULT_RCLK * 8, + PUC_PORT_2S, 0x10, 0, -1, + .config_function = puc_config_exar + }, + + { 0x13a8, 0x0154, 0xffff, 0, + "Exar XR17C154", + DEFAULT_RCLK * 8, + PUC_PORT_4S, 0x10, 0, -1, + .config_function = puc_config_exar + }, + { 0x13a8, 0x0158, 0xffff, 0, - "Cronyx Omega2-PCI", + "Exar XR17C158", DEFAULT_RCLK * 8, PUC_PORT_8S, 0x10, 0, -1, - .config_function = puc_config_cronyx + .config_function = puc_config_exar }, { 0x13a8, 0x0258, 0xffff, 0, @@ -1014,28 +1028,28 @@ puc_config_amc(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port, } static int -puc_config_cronyx(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port, +puc_config_diva(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port, intptr_t *res) { + const struct puc_cfg *cfg = sc->sc_cfg; + if (cmd == PUC_CFG_GET_OFS) { - *res = port * 0x200; + if (cfg->subdevice == 0x1282) /* Everest SP */ + port <<= 1; + else if (cfg->subdevice == 0x104b) /* Maestro SP2 */ + port = (port == 3) ? 4 : port; + *res = port * 8 + ((port > 2) ? 0x18 : 0); return (0); } return (ENXIO); } static int -puc_config_diva(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port, +puc_config_exar(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port, intptr_t *res) { - const struct puc_cfg *cfg = sc->sc_cfg; - if (cmd == PUC_CFG_GET_OFS) { - if (cfg->subdevice == 0x1282) /* Everest SP */ - port <<= 1; - else if (cfg->subdevice == 0x104b) /* Maestro SP2 */ - port = (port == 3) ? 4 : port; - *res = port * 8 + ((port > 2) ? 0x18 : 0); + *res = port * 0x200; return (0); } return (ENXIO); @@ -1292,6 +1306,12 @@ puc_config_timedia(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port, uint16_t subdev; switch (cmd) { + case PUC_CFG_GET_CLOCK: + if (port < 2) + *res = DEFAULT_RCLK * 8; + else + *res = DEFAULT_RCLK; + return (0); case PUC_CFG_GET_DESC: snprintf(desc, sizeof(desc), "Timedia technology %d Port Serial", (int)sc->sc_cfg_data); @@ -1346,14 +1366,12 @@ puc_config_oxford_pcie(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port, bar = puc_get_bar(sc, cfg->rid); if (bar == NULL) return (ENXIO); - for (idx = 0; idx < sc->sc_nports; idx++) { - value = bus_read_1(bar->b_res, 0x1000 + (idx << 9) - + 0x92); + value = bus_read_1(bar->b_res, 0x1000 + (idx << 9) + + 0x92); bus_write_1(bar->b_res, 0x1000 + (idx << 9) + 0x92, - value | 0x10); + value | 0x10); } - return (0); case PUC_CFG_GET_LEN: *res = 0x200; diff --git a/sys/dev/safe/safe.c b/sys/dev/safe/safe.c index ac97098..18ef5e5 100644 --- a/sys/dev/safe/safe.c +++ b/sys/dev/safe/safe.c @@ -1580,9 +1580,12 @@ safe_callback(struct safe_softc *sc, struct safe_ringentry *re) * SHA-1 ICV's are byte-swapped; fix 'em up * before copy them to their destination. */ - bswap32(re->re_sastate.sa_saved_indigest[0]); - bswap32(re->re_sastate.sa_saved_indigest[1]); - bswap32(re->re_sastate.sa_saved_indigest[2]); + re->re_sastate.sa_saved_indigest[0] = + bswap32(re->re_sastate.sa_saved_indigest[0]); + re->re_sastate.sa_saved_indigest[1] = + bswap32(re->re_sastate.sa_saved_indigest[1]); + re->re_sastate.sa_saved_indigest[2] = + bswap32(re->re_sastate.sa_saved_indigest[2]); } crypto_copyback(crp->crp_flags, crp->crp_buf, crd->crd_inject, diff --git a/sys/dev/sdhci/sdhci.c b/sys/dev/sdhci/sdhci.c index 6bbc25f..24cba57 100644 --- a/sys/dev/sdhci/sdhci.c +++ b/sys/dev/sdhci/sdhci.c @@ -1443,46 +1443,46 @@ sdhci_read_ivar(device_t bus, device_t child, int which, uintptr_t *result) default: return (EINVAL); case MMCBR_IVAR_BUS_MODE: - *(int *)result = slot->host.ios.bus_mode; + *result = slot->host.ios.bus_mode; break; case MMCBR_IVAR_BUS_WIDTH: - *(int *)result = slot->host.ios.bus_width; + *result = slot->host.ios.bus_width; break; case MMCBR_IVAR_CHIP_SELECT: - *(int *)result = slot->host.ios.chip_select; + *result = slot->host.ios.chip_select; break; case MMCBR_IVAR_CLOCK: - *(int *)result = slot->host.ios.clock; + *result = slot->host.ios.clock; break; case MMCBR_IVAR_F_MIN: - *(int *)result = slot->host.f_min; + *result = slot->host.f_min; break; case MMCBR_IVAR_F_MAX: - *(int *)result = slot->host.f_max; + *result = slot->host.f_max; break; case MMCBR_IVAR_HOST_OCR: - *(int *)result = slot->host.host_ocr; + *result = slot->host.host_ocr; break; case MMCBR_IVAR_MODE: - *(int *)result = slot->host.mode; + *result = slot->host.mode; break; case MMCBR_IVAR_OCR: - *(int *)result = slot->host.ocr; + *result = slot->host.ocr; break; case MMCBR_IVAR_POWER_MODE: - *(int *)result = slot->host.ios.power_mode; + *result = slot->host.ios.power_mode; break; case MMCBR_IVAR_VDD: - *(int *)result = slot->host.ios.vdd; + *result = slot->host.ios.vdd; break; case MMCBR_IVAR_CAPS: - *(int *)result = slot->host.caps; + *result = slot->host.caps; break; case MMCBR_IVAR_TIMING: - *(int *)result = slot->host.ios.timing; + *result = slot->host.ios.timing; break; case MMCBR_IVAR_MAX_DATA: - *(int *)result = 65535; + *result = 65535; break; } return (0); diff --git a/sys/dev/siis/siis.c b/sys/dev/siis/siis.c index 01edae3..a7b018a 100644 --- a/sys/dev/siis/siis.c +++ b/sys/dev/siis/siis.c @@ -1178,11 +1178,22 @@ siis_timeout(struct siis_slot *slot) { device_t dev = slot->dev; struct siis_channel *ch = device_get_softc(dev); + union ccb *ccb = slot->ccb; mtx_assert(&ch->mtx, MA_OWNED); /* Check for stale timeout. */ if (slot->state < SIIS_SLOT_RUNNING) return; + + /* Handle soft-reset timeouts without doing hard-reset. */ + if ((ccb->ccb_h.func_code == XPT_ATA_IO) && + (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL) && + (ccb->ataio.cmd.control & ATA_A_RESET)) { + xpt_freeze_simq(ch->sim, ch->numrslots); + siis_end_transaction(slot, SIIS_ERR_TFE); + return; + } + device_printf(dev, "Timeout on slot %d\n", slot->slot); device_printf(dev, "%s is %08x ss %08x rs %08x es %08x sts %08x serr %08x\n", __func__, ATA_INL(ch->r_mem, SIIS_P_IS), @@ -1331,13 +1342,6 @@ siis_end_transaction(struct siis_slot *slot, enum siis_err_type et) ch->numhslots++; } else xpt_done(ccb); - /* Unfreeze frozen command. */ - if (ch->frozen && !siis_check_collision(dev, ch->frozen)) { - union ccb *fccb = ch->frozen; - ch->frozen = NULL; - siis_begin_transaction(dev, fccb); - xpt_release_simq(ch->sim, TRUE); - } /* If we have no other active commands, ... */ if (ch->rslots == 0) { /* if there were timeouts or fatal error - reset port. */ @@ -1355,6 +1359,13 @@ siis_end_transaction(struct siis_slot *slot, enum siis_err_type et) } else if ((ch->rslots & ~ch->toslots) == 0 && et != SIIS_ERR_TIMEOUT) siis_rearm_timeout(dev); + /* Unfreeze frozen command. */ + if (ch->frozen && !siis_check_collision(dev, ch->frozen)) { + union ccb *fccb = ch->frozen; + ch->frozen = NULL; + siis_begin_transaction(dev, fccb); + xpt_release_simq(ch->sim, TRUE); + } } static void diff --git a/sys/dev/sound/pci/hda/hdac.c b/sys/dev/sound/pci/hda/hdac.c index 7af5303..bb0f385 100644 --- a/sys/dev/sound/pci/hda/hdac.c +++ b/sys/dev/sound/pci/hda/hdac.c @@ -754,7 +754,17 @@ static const struct { #define HDA_CODEC_CX20561 HDA_CODEC_CONSTRUCT(CONEXANT, 0x5051) #define HDA_CODEC_CX20582 HDA_CODEC_CONSTRUCT(CONEXANT, 0x5066) #define HDA_CODEC_CX20583 HDA_CODEC_CONSTRUCT(CONEXANT, 0x5067) +#define HDA_CODEC_CX20584 HDA_CODEC_CONSTRUCT(CONEXANT, 0x5068) #define HDA_CODEC_CX20585 HDA_CODEC_CONSTRUCT(CONEXANT, 0x5069) +#define HDA_CODEC_CX20590 HDA_CODEC_CONSTRUCT(CONEXANT, 0x506e) +#define HDA_CODEC_CX20631 HDA_CODEC_CONSTRUCT(CONEXANT, 0x5097) +#define HDA_CODEC_CX20632 HDA_CODEC_CONSTRUCT(CONEXANT, 0x5098) +#define HDA_CODEC_CX20641 HDA_CODEC_CONSTRUCT(CONEXANT, 0x50a1) +#define HDA_CODEC_CX20642 HDA_CODEC_CONSTRUCT(CONEXANT, 0x50a2) +#define HDA_CODEC_CX20651 HDA_CODEC_CONSTRUCT(CONEXANT, 0x50ab) +#define HDA_CODEC_CX20652 HDA_CODEC_CONSTRUCT(CONEXANT, 0x50ac) +#define HDA_CODEC_CX20664 HDA_CODEC_CONSTRUCT(CONEXANT, 0x50b8) +#define HDA_CODEC_CX20665 HDA_CODEC_CONSTRUCT(CONEXANT, 0x50b9) #define HDA_CODEC_CXXXXX HDA_CODEC_CONSTRUCT(CONEXANT, 0xffff) /* VIA */ @@ -826,12 +836,13 @@ static const struct { #define HDA_CODEC_NVIDIAXXXX HDA_CODEC_CONSTRUCT(NVIDIA, 0xffff) /* INTEL */ -#define HDA_CODEC_INTELG45_1 HDA_CODEC_CONSTRUCT(INTEL, 0x2801) -#define HDA_CODEC_INTELG45_2 HDA_CODEC_CONSTRUCT(INTEL, 0x2802) -#define HDA_CODEC_INTELG45_3 HDA_CODEC_CONSTRUCT(INTEL, 0x2803) -#define HDA_CODEC_INTELG45_4 HDA_CODEC_CONSTRUCT(INTEL, 0x2804) -#define HDA_CODEC_INTELG45_5 HDA_CODEC_CONSTRUCT(INTEL, 0x29fb) -#define HDA_CODEC_INTELQ57 HDA_CODEC_CONSTRUCT(INTEL, 0x0054) +#define HDA_CODEC_INTELIP HDA_CODEC_CONSTRUCT(INTEL, 0x0054) +#define HDA_CODEC_INTELBL HDA_CODEC_CONSTRUCT(INTEL, 0x2801) +#define HDA_CODEC_INTELCA HDA_CODEC_CONSTRUCT(INTEL, 0x2802) +#define HDA_CODEC_INTELEL HDA_CODEC_CONSTRUCT(INTEL, 0x2803) +#define HDA_CODEC_INTELIP2 HDA_CODEC_CONSTRUCT(INTEL, 0x2804) +#define HDA_CODEC_INTELCPT HDA_CODEC_CONSTRUCT(INTEL, 0x2805) +#define HDA_CODEC_INTELCL HDA_CODEC_CONSTRUCT(INTEL, 0x29fb) #define HDA_CODEC_INTELXXXX HDA_CODEC_CONSTRUCT(INTEL, 0xffff) /* Codecs */ @@ -938,7 +949,17 @@ static const struct { { HDA_CODEC_CX20561, "Conexant CX20561 (Hermosa)" }, { HDA_CODEC_CX20582, "Conexant CX20582 (Pebble)" }, { HDA_CODEC_CX20583, "Conexant CX20583 (Pebble HSF)" }, + { HDA_CODEC_CX20584, "Conexant CX20584" }, { HDA_CODEC_CX20585, "Conexant CX20585" }, + { HDA_CODEC_CX20590, "Conexant CX20590" }, + { HDA_CODEC_CX20631, "Conexant CX20631" }, + { HDA_CODEC_CX20632, "Conexant CX20632" }, + { HDA_CODEC_CX20641, "Conexant CX20641" }, + { HDA_CODEC_CX20642, "Conexant CX20642" }, + { HDA_CODEC_CX20651, "Conexant CX20651" }, + { HDA_CODEC_CX20652, "Conexant CX20652" }, + { HDA_CODEC_CX20664, "Conexant CX20664" }, + { HDA_CODEC_CX20665, "Conexant CX20665" }, { HDA_CODEC_VT1708_8, "VIA VT1708_8" }, { HDA_CODEC_VT1708_9, "VIA VT1708_9" }, { HDA_CODEC_VT1708_A, "VIA VT1708_A" }, @@ -998,12 +1019,13 @@ static const struct { { HDA_CODEC_NVIDIAGT21X, "NVidia GT21x HDMI" }, { HDA_CODEC_NVIDIAMCP89, "NVidia MCP89 HDMI" }, { HDA_CODEC_NVIDIAGT240, "NVidia GT240 HDMI" }, - { HDA_CODEC_INTELG45_1, "Intel G45 HDMI" }, - { HDA_CODEC_INTELG45_2, "Intel G45 HDMI" }, - { HDA_CODEC_INTELG45_3, "Intel G45 HDMI" }, - { HDA_CODEC_INTELG45_4, "Intel G45 HDMI" }, - { HDA_CODEC_INTELG45_5, "Intel G45 HDMI" }, - { HDA_CODEC_INTELQ57, "Intel Q57 HDMI" }, + { HDA_CODEC_INTELIP, "Intel Ibex Peak HDMI" }, + { HDA_CODEC_INTELBL, "Intel Bearlake HDMI" }, + { HDA_CODEC_INTELCA, "Intel Cantiga HDMI" }, + { HDA_CODEC_INTELEL, "Intel Eaglelake HDMI" }, + { HDA_CODEC_INTELIP2, "Intel Ibex Peak HDMI" }, + { HDA_CODEC_INTELCPT, "Intel Cougar Point HDMI" }, + { HDA_CODEC_INTELCL, "Intel Crestline HDMI" }, { HDA_CODEC_SII1390, "Silicon Image SiI1390 HDMI" }, { HDA_CODEC_SII1392, "Silicon Image SiI1392 HDMI" }, /* Unknown codec */ @@ -4124,7 +4146,10 @@ hdac_attach(device_t dev) uint16_t vendor; uint8_t v; - device_printf(dev, "HDA Driver Revision: %s\n", HDA_DRV_TEST_REV); + HDA_BOOTVERBOSE( + device_printf(dev, "HDA Driver Revision: %s\n", + HDA_DRV_TEST_REV); + ); model = (uint32_t)pci_get_device(dev) << 16; model |= (uint32_t)pci_get_vendor(dev) & 0x0000ffff; @@ -4919,6 +4944,25 @@ hdac_vendor_patch_parse(struct hdac_devinfo *devinfo) if (w != NULL) w->connsenable[0] = 0; break; + case HDA_CODEC_CX20582: + case HDA_CODEC_CX20583: + case HDA_CODEC_CX20584: + case HDA_CODEC_CX20585: + case HDA_CODEC_CX20590: + /* + * These codecs have extra connectivity on record side + * too reach for the present parser. + */ + w = hdac_widget_get(devinfo, 20); + if (w != NULL) + w->connsenable[1] = 0; + w = hdac_widget_get(devinfo, 21); + if (w != NULL) + w->connsenable[1] = 0; + w = hdac_widget_get(devinfo, 22); + if (w != NULL) + w->connsenable[0] = 0; + break; } } diff --git a/sys/dev/sound/pcm/sound.c b/sys/dev/sound/pcm/sound.c index caa7841..958065f 100644 --- a/sys/dev/sound/pcm/sound.c +++ b/sys/dev/sound/pcm/sound.c @@ -51,7 +51,7 @@ int pcm_veto_load = 1; int snd_unit = -1; TUNABLE_INT("hw.snd.default_unit", &snd_unit); -static int snd_unit_auto = 0; +static int snd_unit_auto = -1; TUNABLE_INT("hw.snd.default_auto", &snd_unit_auto); SYSCTL_INT(_hw_snd, OID_AUTO, default_auto, CTLFLAG_RW, &snd_unit_auto, 0, "assign default unit to a newly attached device"); @@ -443,6 +443,7 @@ sysctl_hw_snd_default_unit(SYSCTL_HANDLER_ARGS) if (!PCM_REGISTERED(d) || CHN_EMPTY(d, channels.pcm)) return EINVAL; snd_unit = unit; + snd_unit_auto = 0; } return (error); } @@ -737,6 +738,32 @@ pcm_killchan(device_t dev) return (pcm_chn_destroy(ch)); } +static int +pcm_best_unit(int old) +{ + struct snddev_info *d; + int i, best, bestprio, prio; + + best = -1; + bestprio = -100; + for (i = 0; pcm_devclass != NULL && + i < devclass_get_maxunit(pcm_devclass); i++) { + d = devclass_get_softc(pcm_devclass, i); + if (!PCM_REGISTERED(d)) + continue; + prio = 0; + if (d->playcount == 0) + prio -= 10; + if (d->reccount == 0) + prio -= 2; + if (prio > bestprio || (prio == bestprio && i == old)) { + best = i; + bestprio = prio; + } + } + return (best); +} + int pcm_setstatus(device_t dev, char *str) { @@ -770,8 +797,12 @@ pcm_setstatus(device_t dev, char *str) PCM_UNLOCK(d); - if (snd_unit < 0 || snd_unit_auto != 0) + if (snd_unit_auto < 0) + snd_unit_auto = (snd_unit < 0) ? 1 : 0; + if (snd_unit < 0 || snd_unit_auto > 1) snd_unit = device_get_unit(dev); + else if (snd_unit_auto == 1) + snd_unit = pcm_best_unit(snd_unit); return (0); } @@ -1113,7 +1144,6 @@ pcm_unregister(device_t dev) struct snddev_info *d; struct pcm_channel *ch; struct thread *td; - int i; td = curthread; d = device_get_softc(dev); @@ -1216,21 +1246,9 @@ pcm_unregister(device_t dev) sndstat_release(td); if (snd_unit == device_get_unit(dev)) { - /* - * Reassign default unit to the next available dev, but - * first, reset snd_unit to something ridiculous. - */ - snd_unit = -1; - for (i = 0; pcm_devclass != NULL && - i < devclass_get_maxunit(pcm_devclass); i++) { - if (device_get_unit(dev) == i) - continue; - d = devclass_get_softc(pcm_devclass, i); - if (PCM_REGISTERED(d)) { - snd_unit = i; - break; - } - } + snd_unit = pcm_best_unit(-1); + if (snd_unit_auto == 0) + snd_unit_auto = 1; } return (0); diff --git a/sys/dev/uart/uart_dev_ns8250.c b/sys/dev/uart/uart_dev_ns8250.c index 3cdd5ad..489be29 100644 --- a/sys/dev/uart/uart_dev_ns8250.c +++ b/sys/dev/uart/uart_dev_ns8250.c @@ -242,8 +242,14 @@ ns8250_probe(struct uart_bas *bas) val = uart_getreg(bas, REG_IIR); if (val & 0x30) return (ENXIO); + /* + * Bit 6 of the MCR (= 0x40) appears to be 1 for the Sun1699 + * chip, but otherwise doesn't seem to have a function. In + * other words, uart(4) works regardless. Ignore that bit so + * the probe succeeds. + */ val = uart_getreg(bas, REG_MCR); - if (val & 0xe0) + if (val & 0xa0) return (ENXIO); return (0); diff --git a/sys/dev/usb/net/if_axe.c b/sys/dev/usb/net/if_axe.c index fbe63de..00d1c0b 100644 --- a/sys/dev/usb/net/if_axe.c +++ b/sys/dev/usb/net/if_axe.c @@ -514,7 +514,7 @@ static void axe_ax88178_init(struct axe_softc *sc) { struct usb_ether *ue; - int gpio0, phymode; + int gpio0, ledmode, phymode; uint16_t eeprom, val; ue = &sc->sc_ue; @@ -528,9 +528,11 @@ axe_ax88178_init(struct axe_softc *sc) if (eeprom == 0xffff) { phymode = AXE_PHY_MODE_MARVELL; gpio0 = 1; + ledmode = 0; } else { phymode = eeprom & 0x7f; gpio0 = (eeprom & 0x80) ? 0 : 1; + ledmode = eeprom >> 8; } if (bootverbose) @@ -548,9 +550,22 @@ axe_ax88178_init(struct axe_softc *sc) AXE_GPIO_WRITE(AXE_GPIO0_EN | AXE_GPIO2_EN, hz / 4); AXE_GPIO_WRITE(AXE_GPIO0_EN | AXE_GPIO2 | AXE_GPIO2_EN, hz / 32); - } else + } else { AXE_GPIO_WRITE(AXE_GPIO_RELOAD_EEPROM | AXE_GPIO1 | - AXE_GPIO1_EN, hz / 32); + AXE_GPIO1_EN, hz / 3); + if (ledmode == 1) { + AXE_GPIO_WRITE(AXE_GPIO1_EN, hz / 3); + AXE_GPIO_WRITE(AXE_GPIO1 | AXE_GPIO1_EN, + hz / 3); + } else { + AXE_GPIO_WRITE(AXE_GPIO1 | AXE_GPIO1_EN | + AXE_GPIO2 | AXE_GPIO2_EN, hz / 32); + AXE_GPIO_WRITE(AXE_GPIO1 | AXE_GPIO1_EN | + AXE_GPIO2_EN, hz / 4); + AXE_GPIO_WRITE(AXE_GPIO1 | AXE_GPIO1_EN | + AXE_GPIO2 | AXE_GPIO2_EN, hz / 32); + } + } break; case AXE_PHY_MODE_CICADA: case AXE_PHY_MODE_CICADA_V2: diff --git a/sys/dev/usb/net/if_udav.c b/sys/dev/usb/net/if_udav.c index a1a0a8d..a6598ef 100644 --- a/sys/dev/usb/net/if_udav.c +++ b/sys/dev/usb/net/if_udav.c @@ -210,6 +210,7 @@ static const struct usb_device_id udav_devs[] = { {USB_VPI(USB_VENDOR_SHANTOU, USB_PRODUCT_SHANTOU_ADM8515, 0)}, /* Kontron AG USB Ethernet */ {USB_VPI(USB_VENDOR_KONTRON, USB_PRODUCT_KONTRON_DM9601, 0)}, + {USB_VPI(USB_VENDOR_KONTRON, USB_PRODUCT_KONTRON_JP1082, 0)}, }; static void diff --git a/sys/dev/usb/serial/umcs.c b/sys/dev/usb/serial/umcs.c new file mode 100644 index 0000000..c74044e --- /dev/null +++ b/sys/dev/usb/serial/umcs.c @@ -0,0 +1,1075 @@ +/*- + * Copyright (c) 2010 Lev Serebryakov <lev@FreeBSD.org>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * This driver supports several multiport USB-to-RS232 serial adapters driven + * by MosChip mos7820 and mos7840, bridge chips. + * The adapters are sold under many different brand names. + * + * Datasheets are available at MosChip www site at + * http://www.moschip.com. The datasheets don't contain full + * programming information for the chip. + * + * It is nornal to have only two enabled ports in devices, based on + * quad-port mos7840. + * + */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/stdint.h> +#include <sys/stddef.h> +#include <sys/param.h> +#include <sys/queue.h> +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/bus.h> +#include <sys/linker_set.h> +#include <sys/module.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/condvar.h> +#include <sys/sysctl.h> +#include <sys/sx.h> +#include <sys/unistd.h> +#include <sys/callout.h> +#include <sys/malloc.h> +#include <sys/priv.h> + +#include <dev/usb/usb.h> +#include <dev/usb/usbdi.h> +#include <dev/usb/usbdi_util.h> +#include <dev/usb/usb_cdc.h> +#include "usbdevs.h" + +#define USB_DEBUG_VAR umcs_debug +#include <dev/usb/usb_debug.h> +#include <dev/usb/usb_process.h> + +#include <dev/usb/serial/usb_serial.h> + +#include <dev/usb/serial/umcs.h> + +#define UMCS7840_MODVER 1 + +#ifdef USB_DEBUG +static int umcs_debug = 0; + +SYSCTL_NODE(_hw_usb, OID_AUTO, umcs, CTLFLAG_RW, 0, "USB umcs quadport serial adapter"); +SYSCTL_INT(_hw_usb_umcs, OID_AUTO, debug, CTLFLAG_RW, &umcs_debug, 0, "Debug level"); +#endif /* USB_DEBUG */ + + +/* + * Two-port devices (both with 7820 chip and 7840 chip configured as two-port) + * have ports 0 and 2, with ports 1 and 3 omitted. + * So,PHYSICAL port numbers (indexes) on two-port device will be 0 and 2. + * This driver trys to use physical numbers as much as possible. + */ + +/* + * Indexed by PHYSICAL port number. + * Pack non-regular registers to array to easier if-less access. + */ +struct umcs7840_port_registers { + uint8_t reg_sp; /* SP register. */ + uint8_t reg_control; /* CONTROL register. */ + uint8_t reg_dcr; /* DCR0 register. DCR1 & DCR2 can be + * calculated */ +}; + +static const struct umcs7840_port_registers umcs7840_port_registers[UMCS7840_MAX_PORTS] = { + {.reg_sp = MCS7840_DEV_REG_SP1,.reg_control = MCS7840_DEV_REG_CONTROL1,.reg_dcr = MCS7840_DEV_REG_DCR0_1}, + {.reg_sp = MCS7840_DEV_REG_SP2,.reg_control = MCS7840_DEV_REG_CONTROL2,.reg_dcr = MCS7840_DEV_REG_DCR0_2}, + {.reg_sp = MCS7840_DEV_REG_SP3,.reg_control = MCS7840_DEV_REG_CONTROL3,.reg_dcr = MCS7840_DEV_REG_DCR0_3}, + {.reg_sp = MCS7840_DEV_REG_SP4,.reg_control = MCS7840_DEV_REG_CONTROL4,.reg_dcr = MCS7840_DEV_REG_DCR0_4}, +}; + +enum { + UMCS7840_BULK_RD_EP, + UMCS7840_BULK_WR_EP, + UMCS7840_N_TRANSFERS +}; + +struct umcs7840_softc_oneport { + struct usb_xfer *sc_xfer[UMCS7840_N_TRANSFERS]; /* Control structures + * for two transfers */ + + uint8_t sc_lcr; /* local line control register */ + uint8_t sc_mcr; /* local modem control register */ + uint8_t sc_lsr; /* local line status register */ + uint8_t sc_msr; /* local modem status register */ +}; + +struct umcs7840_softc { + struct ucom_super_softc sc_super_ucom; + struct ucom_softc sc_ucom[UMCS7840_MAX_PORTS]; /* Need to be continuous + * array, so indexed by + * LOGICAL port + * (subunit) number */ + + struct usb_xfer *sc_intr_xfer; /* Interrupt endpoint */ + + device_t sc_dev; /* Device for error prints */ + struct usb_device *sc_udev; /* USB Device for all operations */ + struct mtx sc_mtx; /* ucom requires this */ + + uint8_t sc_driver_done; /* Flag when enumeration is finished */ + + uint8_t sc_numports; /* Number of ports (subunits) */ + struct umcs7840_softc_oneport sc_ports[UMCS7840_MAX_PORTS]; /* Indexed by PHYSICAL + * port number. */ +}; + +/* prototypes */ +static usb_error_t umcs7840_get_reg_sync(struct umcs7840_softc *, uint8_t, uint8_t *); +static usb_error_t umcs7840_set_reg_sync(struct umcs7840_softc *, uint8_t, uint8_t); +static usb_error_t umcs7840_get_UART_reg_sync(struct umcs7840_softc *, uint8_t, uint8_t, uint8_t *); +static usb_error_t umcs7840_set_UART_reg_sync(struct umcs7840_softc *, uint8_t, uint8_t, uint8_t); + +static usb_error_t umcs7840_set_baudrate(struct umcs7840_softc *, uint8_t, uint32_t); +static usb_error_t umcs7840_calc_baudrate(uint32_t rate, uint16_t *, uint8_t *); + +static void umcs7840_cfg_get_status(struct ucom_softc *, uint8_t *, uint8_t *); +static void umcs7840_cfg_set_dtr(struct ucom_softc *, uint8_t); +static void umcs7840_cfg_set_rts(struct ucom_softc *, uint8_t); +static void umcs7840_cfg_set_break(struct ucom_softc *, uint8_t); +static void umcs7840_cfg_param(struct ucom_softc *, struct termios *); +static void umcs7840_cfg_open(struct ucom_softc *); +static void umcs7840_cfg_close(struct ucom_softc *); + +static int umcs7840_pre_param(struct ucom_softc *, struct termios *); + +static void umcs7840_start_read(struct ucom_softc *); +static void umcs7840_stop_read(struct ucom_softc *); + +static void umcs7840_start_write(struct ucom_softc *); +static void umcs7840_stop_write(struct ucom_softc *); + +static void umcs7840_poll(struct ucom_softc *ucom); + +static device_probe_t umcs7840_probe; +static device_attach_t umcs7840_attach; +static device_detach_t umcs7840_detach; + +static usb_callback_t umcs7840_intr_callback; +static usb_callback_t umcs7840_read_callback1; +static usb_callback_t umcs7840_read_callback2; +static usb_callback_t umcs7840_read_callback3; +static usb_callback_t umcs7840_read_callback4; +static usb_callback_t umcs7840_write_callback1; +static usb_callback_t umcs7840_write_callback2; +static usb_callback_t umcs7840_write_callback3; +static usb_callback_t umcs7840_write_callback4; + +static void umcs7840_read_callbackN(struct usb_xfer *, usb_error_t, uint8_t); +static void umcs7840_write_callbackN(struct usb_xfer *, usb_error_t, uint8_t); + +/* Indexed by LOGICAL port number (subunit), so two-port device uses 0 & 1 */ +static usb_callback_t *umcs7840_rw_callbacks[UMCS7840_MAX_PORTS][UMCS7840_N_TRANSFERS] = { + {&umcs7840_read_callback1, &umcs7840_write_callback1}, + {&umcs7840_read_callback2, &umcs7840_write_callback2}, + {&umcs7840_read_callback3, &umcs7840_write_callback3}, + {&umcs7840_read_callback4, &umcs7840_write_callback4}, +}; + +static const struct usb_config umcs7840_bulk_config_data[UMCS7840_N_TRANSFERS] = { + [UMCS7840_BULK_RD_EP] = { + .type = UE_BULK, + .endpoint = 0x01, + .direction = UE_DIR_IN, + .flags = {.pipe_bof = 1,.short_xfer_ok = 1,}, + .bufsize = 0, /* use wMaxPacketSize */ + .callback = &umcs7840_read_callback1, + .if_index = 0, + }, + + [UMCS7840_BULK_WR_EP] = { + .type = UE_BULK, + .endpoint = 0x02, + .direction = UE_DIR_OUT, + .flags = {.pipe_bof = 1,.short_xfer_ok = 1,}, + .bufsize = 0, /* use wMaxPacketSize */ + .callback = &umcs7840_write_callback1, + .if_index = 0, + }, +}; + +static const struct usb_config umcs7840_intr_config_data[1] = { + [0] = { + .type = UE_INTERRUPT, + .endpoint = 0x09, + .direction = UE_DIR_IN, + .flags = {.pipe_bof = 1,.short_xfer_ok = 1,}, + .bufsize = 0, /* use wMaxPacketSize */ + .callback = &umcs7840_intr_callback, + .if_index = 0, + }, +}; + +static struct ucom_callback umcs7840_callback = { + .ucom_cfg_get_status = &umcs7840_cfg_get_status, + + .ucom_cfg_set_dtr = &umcs7840_cfg_set_dtr, + .ucom_cfg_set_rts = &umcs7840_cfg_set_rts, + .ucom_cfg_set_break = &umcs7840_cfg_set_break, + + .ucom_cfg_param = &umcs7840_cfg_param, + .ucom_cfg_open = &umcs7840_cfg_open, + .ucom_cfg_close = &umcs7840_cfg_close, + + .ucom_pre_param = &umcs7840_pre_param, + + .ucom_start_read = &umcs7840_start_read, + .ucom_stop_read = &umcs7840_stop_read, + + .ucom_start_write = &umcs7840_start_write, + .ucom_stop_write = &umcs7840_stop_write, + + .ucom_poll = &umcs7840_poll, +}; + +static const struct usb_device_id umcs7840_devs[] = { + {USB_VPI(USB_VENDOR_MOSCHIP, USB_PRODUCT_MOSCHIP_MCS7820, 0)}, + {USB_VPI(USB_VENDOR_MOSCHIP, USB_PRODUCT_MOSCHIP_MCS7840, 0)}, +}; + +static device_method_t umcs7840_methods[] = { + DEVMETHOD(device_probe, umcs7840_probe), + DEVMETHOD(device_attach, umcs7840_attach), + DEVMETHOD(device_detach, umcs7840_detach), + {0, 0} +}; + +static devclass_t umcs7840_devclass; + +static driver_t umcs7840_driver = { + .name = "umcs7840", + .methods = umcs7840_methods, + .size = sizeof(struct umcs7840_softc), +}; + +DRIVER_MODULE(umcs7840, uhub, umcs7840_driver, umcs7840_devclass, 0, 0); +MODULE_DEPEND(umcs7840, ucom, 1, 1, 1); +MODULE_DEPEND(umcs7840, usb, 1, 1, 1); +MODULE_VERSION(umcs7840, UMCS7840_MODVER); + +static int +umcs7840_probe(device_t dev) +{ + struct usb_attach_arg *uaa = device_get_ivars(dev); + + if (uaa->usb_mode != USB_MODE_HOST) + return (ENXIO); + if (uaa->info.bConfigIndex != MCS7840_CONFIG_INDEX) + return (ENXIO); + if (uaa->info.bIfaceIndex != MCS7840_IFACE_INDEX) + return (ENXIO); + return (usbd_lookup_id_by_uaa(umcs7840_devs, sizeof(umcs7840_devs), uaa)); +} + +static int +umcs7840_attach(device_t dev) +{ + struct usb_config umcs7840_config_tmp[UMCS7840_N_TRANSFERS]; + struct usb_attach_arg *uaa = device_get_ivars(dev); + struct umcs7840_softc *sc = device_get_softc(dev); + + uint8_t iface_index = MCS7840_IFACE_INDEX; + int error; + int subunit; + int n; + uint8_t data; + + for (n = 0; n < UMCS7840_N_TRANSFERS; ++n) + umcs7840_config_tmp[n] = umcs7840_bulk_config_data[n]; + + device_set_usb_desc(dev); + mtx_init(&sc->sc_mtx, "umcs7840", NULL, MTX_DEF); + + sc->sc_dev = dev; + sc->sc_udev = uaa->device; + + /* + * Get number of ports + * Documentation (full datasheet) says, that number of ports is + * set as MCS7840_DEV_MODE_SELECT24S bit in MODE R/Only + * register. But vendor driver uses these undocumented + * register & bit. + * + * Experiments show, that MODE register can have `0' + * (4 ports) bit on 2-port device, so use vendor driver's way. + * + * Also, see notes in header file for these constants. + */ + umcs7840_get_reg_sync(sc, MCS7840_DEV_REG_GPIO, &data); + if (data & MCS7840_DEV_GPIO_4PORTS) { + sc->sc_numports = 4; + /* Store physical port numbers in sc_portno */ + sc->sc_ucom[0].sc_portno = 0; + sc->sc_ucom[1].sc_portno = 1; + sc->sc_ucom[2].sc_portno = 2; + sc->sc_ucom[3].sc_portno = 3; + } else { + sc->sc_numports = 2; + /* Store physical port numbers in sc_portno */ + sc->sc_ucom[0].sc_portno = 0; + sc->sc_ucom[1].sc_portno = 2; /* '1' is skipped */ + } + device_printf(dev, "Chip mcs%04x, found %d active ports\n", uaa->info.idProduct, sc->sc_numports); + if (!umcs7840_get_reg_sync(sc, MCS7840_DEV_REG_MODE, &data)) { + device_printf(dev, "On-die confguration: RST: active %s, HRD: %s, PLL: %s, POR: %s, Ports: %s, EEPROM write %s, IrDA is %savailable\n", + (data & MCS7840_DEV_MODE_RESET) ? "low" : "high", + (data & MCS7840_DEV_MODE_SER_PRSNT) ? "yes" : "no", + (data & MCS7840_DEV_MODE_PLLBYPASS) ? "bypassed" : "avail", + (data & MCS7840_DEV_MODE_PORBYPASS) ? "bypassed" : "avail", + (data & MCS7840_DEV_MODE_SELECT24S) ? "2" : "4", + (data & MCS7840_DEV_MODE_EEPROMWR) ? "enabled" : "disabled", + (data & MCS7840_DEV_MODE_IRDA) ? "" : "not "); + } + /* Setup all transfers */ + for (subunit = 0; subunit < sc->sc_numports; ++subunit) { + for (n = 0; n < UMCS7840_N_TRANSFERS; ++n) { + /* Set endpoint address */ + umcs7840_config_tmp[n].endpoint = umcs7840_bulk_config_data[n].endpoint + 2 * sc->sc_ucom[subunit].sc_portno; + umcs7840_config_tmp[n].callback = umcs7840_rw_callbacks[subunit][n]; + } + error = usbd_transfer_setup(uaa->device, + &iface_index, sc->sc_ports[sc->sc_ucom[subunit].sc_portno].sc_xfer, umcs7840_config_tmp, + UMCS7840_N_TRANSFERS, sc, &sc->sc_mtx); + if (error) { + device_printf(dev, "allocating USB transfers failed for subunit %d of %d\n", + subunit + 1, sc->sc_numports); + goto detach; + } + } + error = usbd_transfer_setup(uaa->device, + &iface_index, &sc->sc_intr_xfer, umcs7840_intr_config_data, + 1, sc, &sc->sc_mtx); + if (error) { + device_printf(dev, "allocating USB transfers failed for interrupt\n"); + goto detach; + } + /* clear stall at first run */ + mtx_lock(&sc->sc_mtx); + for (subunit = 0; subunit < sc->sc_numports; ++subunit) { + usbd_xfer_set_stall(sc->sc_ports[sc->sc_ucom[subunit].sc_portno].sc_xfer[UMCS7840_BULK_RD_EP]); + usbd_xfer_set_stall(sc->sc_ports[sc->sc_ucom[subunit].sc_portno].sc_xfer[UMCS7840_BULK_WR_EP]); + } + mtx_unlock(&sc->sc_mtx); + + error = ucom_attach(&sc->sc_super_ucom, sc->sc_ucom, sc->sc_numports, sc, + &umcs7840_callback, &sc->sc_mtx); + if (error) + goto detach; + + ucom_set_pnpinfo_usb(&sc->sc_super_ucom, dev); + + return (0); + +detach: + umcs7840_detach(dev); + return (ENXIO); +} + +static int +umcs7840_detach(device_t dev) +{ + struct umcs7840_softc *sc = device_get_softc(dev); + int subunit; + + ucom_detach(&sc->sc_super_ucom, sc->sc_ucom); + + for (subunit = 0; subunit < sc->sc_numports; ++subunit) + usbd_transfer_unsetup(sc->sc_ports[sc->sc_ucom[subunit].sc_portno].sc_xfer, UMCS7840_N_TRANSFERS); + usbd_transfer_unsetup(&sc->sc_intr_xfer, 1); + + mtx_destroy(&sc->sc_mtx); + return (0); +} + +static void +umcs7840_cfg_open(struct ucom_softc *ucom) +{ + struct umcs7840_softc *sc = ucom->sc_parent; + uint16_t pn = ucom->sc_portno; + uint8_t data; + + /* If it very first open, finish global configuration */ + if (!sc->sc_driver_done) { + /* + * USB enumeration is finished, pass internal memory to FIFOs + * If it is done in the end of "attach", kernel panics. + */ + if (umcs7840_get_reg_sync(sc, MCS7840_DEV_REG_CONTROL1, &data)) + return; + data |= MCS7840_DEV_CONTROL1_DRIVER_DONE; + if (umcs7840_set_reg_sync(sc, MCS7840_DEV_REG_CONTROL1, data)) + return; + sc->sc_driver_done = 1; + } + /* Toggle reset bit on-off */ + if (umcs7840_get_reg_sync(sc, umcs7840_port_registers[pn].reg_sp, &data)) + return; + data |= MCS7840_DEV_SPx_UART_RESET; + if (umcs7840_set_reg_sync(sc, umcs7840_port_registers[pn].reg_sp, data)) + return; + data &= ~MCS7840_DEV_SPx_UART_RESET; + if (umcs7840_set_reg_sync(sc, umcs7840_port_registers[pn].reg_sp, data)) + return; + + /* Set RS-232 mode */ + if (umcs7840_set_UART_reg_sync(sc, pn, MCS7840_UART_REG_SCRATCHPAD, MCS7840_UART_SCRATCHPAD_RS232)) + return; + + /* Disable RX on time of initialization */ + if (umcs7840_get_reg_sync(sc, umcs7840_port_registers[pn].reg_control, &data)) + return; + data |= MCS7840_DEV_CONTROLx_RX_DISABLE; + if (umcs7840_set_reg_sync(sc, umcs7840_port_registers[pn].reg_control, data)) + return; + + /* Disable all interrupts */ + if (umcs7840_set_UART_reg_sync(sc, pn, MCS7840_UART_REG_IER, 0)) + return; + + /* Reset FIFO -- documented */ + if (umcs7840_set_UART_reg_sync(sc, pn, MCS7840_UART_REG_FCR, 0)) + return; + if (umcs7840_set_UART_reg_sync(sc, pn, MCS7840_UART_REG_FCR, + MCS7840_UART_FCR_ENABLE | MCS7840_UART_FCR_FLUSHRHR | + MCS7840_UART_FCR_FLUSHTHR | MCS7840_UART_FCR_RTL_1_14)) + return; + + /* Set 8 bit, no parity, 1 stop bit -- documented */ + sc->sc_ports[pn].sc_lcr = MCS7840_UART_LCR_DATALEN8 | MCS7840_UART_LCR_STOPB1; + if (umcs7840_set_UART_reg_sync(sc, pn, MCS7840_UART_REG_LCR, sc->sc_ports[pn].sc_lcr)) + return; + + /* + * Enable DTR/RTS on modem control, enable modem interrupts -- + * documented + */ + sc->sc_ports[pn].sc_mcr = MCS7840_UART_MCR_DTR | MCS7840_UART_MCR_RTS | MCS7840_UART_MCR_IE; + if (umcs7840_set_UART_reg_sync(sc, pn, MCS7840_UART_REG_MCR, sc->sc_ports[pn].sc_mcr)) + return; + + /* Clearing Bulkin and Bulkout FIFO */ + if (umcs7840_get_reg_sync(sc, umcs7840_port_registers[pn].reg_sp, &data)) + return; + data |= MCS7840_DEV_SPx_RESET_OUT_FIFO | MCS7840_DEV_SPx_RESET_IN_FIFO; + if (umcs7840_set_reg_sync(sc, umcs7840_port_registers[pn].reg_sp, data)) + return; + data &= ~(MCS7840_DEV_SPx_RESET_OUT_FIFO | MCS7840_DEV_SPx_RESET_IN_FIFO); + if (umcs7840_set_reg_sync(sc, umcs7840_port_registers[pn].reg_sp, data)) + return; + + /* Set speed 9600 */ + if (umcs7840_set_baudrate(sc, pn, 9600)) + return; + + + /* Finally enable all interrupts -- documented */ + /* + * Copied from vendor driver, I don't know why we should read LCR + * here + */ + if (umcs7840_get_UART_reg_sync(sc, pn, MCS7840_UART_REG_LCR, &sc->sc_ports[pn].sc_lcr)) + return; + if (umcs7840_set_UART_reg_sync(sc, pn, MCS7840_UART_REG_IER, + MCS7840_UART_IER_RXSTAT | MCS7840_UART_IER_MODEM)) + return; + + /* Enable RX */ + if (umcs7840_get_reg_sync(sc, umcs7840_port_registers[pn].reg_control, &data)) + return; + data &= ~MCS7840_DEV_CONTROLx_RX_DISABLE; + if (umcs7840_set_reg_sync(sc, umcs7840_port_registers[pn].reg_control, data)) + return; + + /* Read LSR & MSR */ + if (umcs7840_get_UART_reg_sync(sc, pn, MCS7840_UART_REG_LSR, &sc->sc_ports[pn].sc_lsr)) + return; + if (umcs7840_get_UART_reg_sync(sc, pn, MCS7840_UART_REG_MSR, &sc->sc_ports[pn].sc_msr)) + return; + DPRINTF("Port %d has been opened, LSR=%02x MSR=%02x\n", pn, sc->sc_ports[pn].sc_lsr, sc->sc_ports[pn].sc_msr); +} + +static void +umcs7840_cfg_close(struct ucom_softc *ucom) +{ + struct umcs7840_softc *sc = ucom->sc_parent; + uint16_t pn = ucom->sc_portno; + uint8_t data; + + umcs7840_stop_read(ucom); + umcs7840_stop_write(ucom); + + umcs7840_set_UART_reg_sync(sc, pn, MCS7840_UART_REG_MCR, 0); + umcs7840_set_UART_reg_sync(sc, pn, MCS7840_UART_REG_IER, 0); + + /* Disable RX */ + if (umcs7840_get_reg_sync(sc, umcs7840_port_registers[pn].reg_control, &data)) + return; + data |= MCS7840_DEV_CONTROLx_RX_DISABLE; + if (umcs7840_set_reg_sync(sc, umcs7840_port_registers[pn].reg_control, data)) + return; + DPRINTF("Port %d has been closed\n", pn); +} + +static void +umcs7840_cfg_set_dtr(struct ucom_softc *ucom, uint8_t onoff) +{ + struct umcs7840_softc *sc = ucom->sc_parent; + uint8_t pn = ucom->sc_portno; + + if (onoff) + sc->sc_ports[pn].sc_mcr |= MCS7840_UART_MCR_DTR; + else + sc->sc_ports[pn].sc_mcr &= ~MCS7840_UART_MCR_DTR; + + umcs7840_set_UART_reg_sync(sc, pn, MCS7840_UART_REG_MCR, sc->sc_ports[pn].sc_mcr); + DPRINTF("Port %d DTR set to: %s\n", pn, onoff ? "on" : "off"); +} + +static void +umcs7840_cfg_set_rts(struct ucom_softc *ucom, uint8_t onoff) +{ + struct umcs7840_softc *sc = ucom->sc_parent; + uint8_t pn = ucom->sc_portno; + + if (onoff) + sc->sc_ports[pn].sc_mcr |= MCS7840_UART_MCR_RTS; + else + sc->sc_ports[pn].sc_mcr &= ~MCS7840_UART_MCR_RTS; + + umcs7840_set_UART_reg_sync(sc, pn, MCS7840_UART_REG_MCR, sc->sc_ports[pn].sc_mcr); + DPRINTF("Port %d RTS set to: %s\n", pn, onoff ? "on" : "off"); +} + +static void +umcs7840_cfg_set_break(struct ucom_softc *ucom, uint8_t onoff) +{ + struct umcs7840_softc *sc = ucom->sc_parent; + uint8_t pn = ucom->sc_portno; + + if (onoff) + sc->sc_ports[pn].sc_lcr |= MCS7840_UART_LCR_BREAK; + else + sc->sc_ports[pn].sc_lcr &= ~MCS7840_UART_LCR_BREAK; + + umcs7840_set_UART_reg_sync(sc, pn, MCS7840_UART_REG_LCR, sc->sc_ports[pn].sc_lcr); + DPRINTF("Port %d BREAK set to: %s\n", pn, onoff ? "on" : "off"); +} + + +static void +umcs7840_cfg_param(struct ucom_softc *ucom, struct termios *t) +{ + struct umcs7840_softc *sc = ucom->sc_parent; + uint8_t pn = ucom->sc_portno; + uint8_t lcr = sc->sc_ports[pn].sc_lcr; + uint8_t mcr = sc->sc_ports[pn].sc_mcr; + + DPRINTF("Port %d config:\n", pn); + if (t->c_cflag & CSTOPB) { + DPRINTF(" 2 stop bits\n"); + lcr |= MCS7840_UART_LCR_STOPB2; + } else { + lcr |= MCS7840_UART_LCR_STOPB1; + DPRINTF(" 1 stop bit\n"); + } + + lcr &= ~MCS7840_UART_LCR_PARITYMASK; + if (t->c_cflag & PARENB) { + lcr |= MCS7840_UART_LCR_PARITYON; + if (t->c_cflag & PARODD) { + lcr = MCS7840_UART_LCR_PARITYODD; + DPRINTF(" parity on - odd\n"); + } else { + lcr = MCS7840_UART_LCR_PARITYEVEN; + DPRINTF(" parity on - even\n"); + } + } else { + lcr &= ~MCS7840_UART_LCR_PARITYON; + DPRINTF(" parity off\n"); + } + + lcr &= ~MCS7840_UART_LCR_DATALENMASK; + switch (t->c_cflag & CSIZE) { + case CS5: + lcr |= MCS7840_UART_LCR_DATALEN5; + DPRINTF(" 5 bit\n"); + break; + case CS6: + lcr |= MCS7840_UART_LCR_DATALEN6; + DPRINTF(" 6 bit\n"); + break; + case CS7: + lcr |= MCS7840_UART_LCR_DATALEN7; + DPRINTF(" 7 bit\n"); + break; + case CS8: + lcr |= MCS7840_UART_LCR_DATALEN8; + DPRINTF(" 8 bit\n"); + break; + } + + if (t->c_cflag & CRTSCTS) { + mcr |= MCS7840_UART_MCR_CTSRTS; + DPRINTF(" CTS/RTS\n"); + } else + mcr &= ~MCS7840_UART_MCR_CTSRTS; + + if (t->c_cflag & (CDTR_IFLOW | CDSR_OFLOW)) { + mcr |= MCS7840_UART_MCR_DTRDSR; + DPRINTF(" DTR/DSR\n"); + } else + mcr &= ~MCS7840_UART_MCR_DTRDSR; + + sc->sc_ports[pn].sc_lcr = lcr; + umcs7840_set_UART_reg_sync(sc, pn, MCS7840_UART_REG_LCR, sc->sc_ports[pn].sc_lcr); + DPRINTF("Port %d LCR=%02x\n", pn, sc->sc_ports[pn].sc_lcr); + + sc->sc_ports[pn].sc_mcr = mcr; + umcs7840_set_UART_reg_sync(sc, pn, MCS7840_UART_REG_MCR, sc->sc_ports[pn].sc_mcr); + DPRINTF("Port %d MCR=%02x\n", pn, sc->sc_ports[pn].sc_mcr); + + umcs7840_set_baudrate(sc, pn, t->c_ospeed); +} + + +static int +umcs7840_pre_param(struct ucom_softc *ucom, struct termios *t) +{ + uint8_t clk; + uint16_t divisor; + + if (umcs7840_calc_baudrate(t->c_ospeed, &divisor, &clk) || !divisor) + return (EINVAL); + return (0); +} + +static void +umcs7840_start_read(struct ucom_softc *ucom) +{ + struct umcs7840_softc *sc = ucom->sc_parent; + uint8_t pn = ucom->sc_portno; + + /* Start interrupt transfer */ + usbd_transfer_start(sc->sc_intr_xfer); + + /* Start read transfer */ + usbd_transfer_start(sc->sc_ports[pn].sc_xfer[UMCS7840_BULK_RD_EP]); +} + +static void +umcs7840_stop_read(struct ucom_softc *ucom) +{ + struct umcs7840_softc *sc = ucom->sc_parent; + uint8_t pn = ucom->sc_portno; + + /* Stop read transfer */ + usbd_transfer_stop(sc->sc_ports[pn].sc_xfer[UMCS7840_BULK_RD_EP]); +} + +static void +umcs7840_start_write(struct ucom_softc *ucom) +{ + struct umcs7840_softc *sc = ucom->sc_parent; + uint8_t pn = ucom->sc_portno; + + /* Start interrupt transfer */ + usbd_transfer_start(sc->sc_intr_xfer); + + /* Start write transfer */ + usbd_transfer_start(sc->sc_ports[pn].sc_xfer[UMCS7840_BULK_WR_EP]); +} + +static void +umcs7840_stop_write(struct ucom_softc *ucom) +{ + struct umcs7840_softc *sc = ucom->sc_parent; + uint8_t pn = ucom->sc_portno; + + /* Stop write transfer */ + usbd_transfer_stop(sc->sc_ports[pn].sc_xfer[UMCS7840_BULK_WR_EP]); +} + +static void +umcs7840_cfg_get_status(struct ucom_softc *ucom, uint8_t *lsr, uint8_t *msr) +{ + struct umcs7840_softc *sc = ucom->sc_parent; + + *lsr = sc->sc_ports[ucom->sc_portno].sc_lsr; + *msr = sc->sc_ports[ucom->sc_portno].sc_msr; + DPRINTF("Port %d status: LSR=%02x MSR=%02x\n", ucom->sc_portno, *lsr, *msr); +} + +static void +umcs7840_intr_callback(struct usb_xfer *xfer, usb_error_t error) +{ + struct umcs7840_softc *sc = usbd_xfer_softc(xfer); + struct usb_page_cache *pc; + uint8_t buf[13]; + int actlen; + int subunit; + + usbd_xfer_status(xfer, &actlen, NULL, NULL, NULL); + + switch (USB_GET_STATE(xfer)) { + case USB_ST_TRANSFERRED: + if (actlen == 5 || actlen == 13) { + pc = usbd_xfer_get_frame(xfer, 0); + usbd_copy_out(pc, 0, buf, actlen); + /* Check status of all ports */ + for (subunit = 0; subunit < sc->sc_numports; ++subunit) { + uint8_t pn = sc->sc_ucom[subunit].sc_portno; + + if (buf[pn] & MCS7840_UART_ISR_NOPENDING) + continue; + DPRINTF("Port %d has pending interrupt: %02x (FIFO: %02x)\n", pn, buf[pn] & MCS7840_UART_ISR_INTMASK, buf[pn] & (~MCS7840_UART_ISR_INTMASK)); + switch (buf[pn] & MCS7840_UART_ISR_INTMASK) { + case MCS7840_UART_ISR_RXERR: + case MCS7840_UART_ISR_RXHASDATA: + case MCS7840_UART_ISR_RXTIMEOUT: + /* Read new LSR */ + if (umcs7840_get_UART_reg_sync(sc, pn, MCS7840_UART_REG_LSR, &sc->sc_ports[pn].sc_lsr)) + break; /* Inner switch */ + ucom_status_change(&sc->sc_ucom[subunit]); + /* Inner switch */ + break; + case MCS7840_UART_ISR_TXEMPTY: + /* Do nothing */ + break; /* Inner switch */ + case MCS7840_UART_ISR_MSCHANGE: + /* Read new MSR */ + if (umcs7840_get_UART_reg_sync(sc, pn, MCS7840_UART_REG_MSR, &sc->sc_ports[pn].sc_msr)) + break; /* Inner switch */ + DPRINTF("Port %d: new MSR %02x\n", pn, sc->sc_ports[pn].sc_msr); + ucom_status_change(&sc->sc_ucom[subunit]); + break; + } + } + } else + device_printf(sc->sc_dev, "Invalid interrupt data length %d", actlen); + /* FALLTHROUGH */ + case USB_ST_SETUP: +tr_setup: + usbd_xfer_set_frame_len(xfer, 0, usbd_xfer_max_len(xfer)); + usbd_transfer_submit(xfer); + return; + + default: /* Error */ + if (error != USB_ERR_CANCELLED) { + /* try to clear stall first */ + usbd_xfer_set_stall(xfer); + goto tr_setup; + } + return; + } +} + +static void +umcs7840_read_callback1(struct usb_xfer *xfer, usb_error_t error) +{ + umcs7840_read_callbackN(xfer, error, 0); +} + +static void +umcs7840_read_callback2(struct usb_xfer *xfer, usb_error_t error) +{ + umcs7840_read_callbackN(xfer, error, 1); +} +static void +umcs7840_read_callback3(struct usb_xfer *xfer, usb_error_t error) +{ + umcs7840_read_callbackN(xfer, error, 2); +} + +static void +umcs7840_read_callback4(struct usb_xfer *xfer, usb_error_t error) +{ + umcs7840_read_callbackN(xfer, error, 3); +} + +static void +umcs7840_read_callbackN(struct usb_xfer *xfer, usb_error_t error, uint8_t subunit) +{ + struct umcs7840_softc *sc = usbd_xfer_softc(xfer); + struct ucom_softc *ucom = &sc->sc_ucom[subunit]; + struct usb_page_cache *pc; + int actlen; + + usbd_xfer_status(xfer, &actlen, NULL, NULL, NULL); + + DPRINTF("Port %d read, state = %d, data length = %d\n", ucom->sc_portno, USB_GET_STATE(xfer), actlen); + + switch (USB_GET_STATE(xfer)) { + case USB_ST_TRANSFERRED: + pc = usbd_xfer_get_frame(xfer, 0); + ucom_put_data(ucom, pc, 0, actlen); + /* FALLTHROUGH */ + case USB_ST_SETUP: +tr_setup: + usbd_xfer_set_frame_len(xfer, 0, usbd_xfer_max_len(xfer)); + usbd_transfer_submit(xfer); + return; + + default: /* Error */ + if (error != USB_ERR_CANCELLED) { + /* try to clear stall first */ + usbd_xfer_set_stall(xfer); + goto tr_setup; + } + return; + } +} + +static void +umcs7840_write_callback1(struct usb_xfer *xfer, usb_error_t error) +{ + umcs7840_write_callbackN(xfer, error, 0); +} + +static void +umcs7840_write_callback2(struct usb_xfer *xfer, usb_error_t error) +{ + umcs7840_write_callbackN(xfer, error, 1); +} + +static void +umcs7840_write_callback3(struct usb_xfer *xfer, usb_error_t error) +{ + umcs7840_write_callbackN(xfer, error, 2); +} + +static void +umcs7840_write_callback4(struct usb_xfer *xfer, usb_error_t error) +{ + umcs7840_write_callbackN(xfer, error, 3); +} + +static void +umcs7840_write_callbackN(struct usb_xfer *xfer, usb_error_t error, uint8_t subunit) +{ + struct umcs7840_softc *sc = usbd_xfer_softc(xfer); + struct ucom_softc *ucom = &sc->sc_ucom[subunit]; + struct usb_page_cache *pc; + uint32_t actlen; + + DPRINTF("Port %d write, state = %d\n", ucom->sc_portno, USB_GET_STATE(xfer)); + + switch (USB_GET_STATE(xfer)) { + case USB_ST_SETUP: + case USB_ST_TRANSFERRED: +tr_setup: + pc = usbd_xfer_get_frame(xfer, 0); + if (ucom_get_data(ucom, pc, 0, usbd_xfer_max_len(xfer), &actlen)) { + DPRINTF("Port %d write, has %d bytes\n", ucom->sc_portno, actlen); + usbd_xfer_set_frame_len(xfer, 0, actlen); + usbd_transfer_submit(xfer); + } + return; + + default: /* Error */ + if (error != USB_ERR_CANCELLED) { + /* try to clear stall first */ + usbd_xfer_set_stall(xfer); + goto tr_setup; + } + return; + } +} + +static void +umcs7840_poll(struct ucom_softc *ucom) +{ + struct umcs7840_softc *sc = ucom->sc_parent; + + DPRINTF("Port %d poll\n", ucom->sc_portno); + usbd_transfer_poll(sc->sc_ports[ucom->sc_portno].sc_xfer, UMCS7840_N_TRANSFERS); + usbd_transfer_poll(&sc->sc_intr_xfer, 1); +} + +static usb_error_t +umcs7840_get_reg_sync(struct umcs7840_softc *sc, uint8_t reg, uint8_t *data) +{ + struct usb_device_request req; + usb_error_t err; + uint16_t len; + + req.bmRequestType = UT_READ_VENDOR_DEVICE; + req.bRequest = MCS7840_RDREQ; + USETW(req.wValue, 0); + USETW(req.wIndex, reg); + USETW(req.wLength, UMCS7840_READ_LENGTH); + + err = usbd_do_request_proc(sc->sc_udev, &sc->sc_super_ucom.sc_tq, &req, (void *)data, 0, &len, UMCS7840_CTRL_TIMEOUT); + if (err == USB_ERR_NORMAL_COMPLETION && len != 1) { + device_printf(sc->sc_dev, "Reading register %d failed: invalid length %d\n", reg, len); + return (USB_ERR_INVAL); + } else if (err) + device_printf(sc->sc_dev, "Reading register %d failed: %s\n", reg, usbd_errstr(err)); + return (err); +} + +static usb_error_t +umcs7840_set_reg_sync(struct umcs7840_softc *sc, uint8_t reg, uint8_t data) +{ + struct usb_device_request req; + usb_error_t err; + + req.bmRequestType = UT_WRITE_VENDOR_DEVICE; + req.bRequest = MCS7840_WRREQ; + USETW(req.wValue, data); + USETW(req.wIndex, reg); + USETW(req.wLength, 0); + + err = usbd_do_request_proc(sc->sc_udev, &sc->sc_super_ucom.sc_tq, &req, NULL, 0, NULL, UMCS7840_CTRL_TIMEOUT); + if (err) + device_printf(sc->sc_dev, "Writing register %d failed: %s\n", reg, usbd_errstr(err)); + + return (err); +} + +static usb_error_t +umcs7840_get_UART_reg_sync(struct umcs7840_softc *sc, uint8_t portno, uint8_t reg, uint8_t *data) +{ + struct usb_device_request req; + uint16_t wVal; + usb_error_t err; + uint16_t len; + + /* portno is port number */ + wVal = ((uint16_t)(portno + 1)) << 8; + + req.bmRequestType = UT_READ_VENDOR_DEVICE; + req.bRequest = MCS7840_RDREQ; + USETW(req.wValue, wVal); + USETW(req.wIndex, reg); + USETW(req.wLength, UMCS7840_READ_LENGTH); + + err = usbd_do_request_proc(sc->sc_udev, &sc->sc_super_ucom.sc_tq, &req, (void *)data, 0, &len, UMCS7840_CTRL_TIMEOUT); + if (err == USB_ERR_NORMAL_COMPLETION && len != 1) { + device_printf(sc->sc_dev, "Reading UART%d register %d failed: invalid length %d\n", portno, reg, len); + return (USB_ERR_INVAL); + } else if (err) + device_printf(sc->sc_dev, "Reading UART%d register %d failed: %s\n", portno, reg, usbd_errstr(err)); + return (err); +} + +static usb_error_t +umcs7840_set_UART_reg_sync(struct umcs7840_softc *sc, uint8_t portno, uint8_t reg, uint8_t data) +{ + struct usb_device_request req; + usb_error_t err; + uint16_t wVal; + + /* portno is port number */ + wVal = ((uint16_t)(portno + 1)) << 8 | data; + + req.bmRequestType = UT_WRITE_VENDOR_DEVICE; + req.bRequest = MCS7840_WRREQ; + USETW(req.wValue, wVal); + USETW(req.wIndex, reg); + USETW(req.wLength, 0); + + err = usbd_do_request_proc(sc->sc_udev, &sc->sc_super_ucom.sc_tq, &req, NULL, 0, NULL, UMCS7840_CTRL_TIMEOUT); + if (err) + device_printf(sc->sc_dev, "Writing UART%d register %d failed: %s\n", portno, reg, usbd_errstr(err)); + return (err); +} + +static usb_error_t +umcs7840_set_baudrate(struct umcs7840_softc *sc, uint8_t portno, uint32_t rate) +{ + usb_error_t err; + uint16_t divisor; + uint8_t clk; + uint8_t data; + + if (umcs7840_calc_baudrate(rate, &divisor, &clk)) { + DPRINTF("Port %d bad speed: %d\n", portno, rate); + return (-1); + } + if (divisor == 0 || (clk & MCS7840_DEV_SPx_CLOCK_MASK) != clk) { + DPRINTF("Port %d bad speed calculation: %d\n", portno, rate); + return (-1); + } + DPRINTF("Port %d set speed: %d (%02x / %d)\n", portno, rate, clk, divisor); + + /* Set clock source for standard BAUD frequences */ + err = umcs7840_get_reg_sync(sc, umcs7840_port_registers[portno].reg_sp, &data); + if (err) + return (err); + data &= MCS7840_DEV_SPx_CLOCK_MASK; + data |= clk; + err = umcs7840_set_reg_sync(sc, umcs7840_port_registers[portno].reg_sp, data); + if (err) + return (err); + + /* Set divider */ + sc->sc_ports[portno].sc_lcr |= MCS7840_UART_LCR_DIVISORS; + err = umcs7840_set_UART_reg_sync(sc, portno, MCS7840_UART_REG_LCR, sc->sc_ports[portno].sc_lcr); + if (err) + return (err); + + err = umcs7840_set_UART_reg_sync(sc, portno, MCS7840_UART_REG_DLL, (uint8_t)(divisor & 0xff)); + if (err) + return (err); + err = umcs7840_set_UART_reg_sync(sc, portno, MCS7840_UART_REG_DLM, (uint8_t)((divisor >> 8) & 0xff)); + if (err) + return (err); + + /* Turn off access to DLL/DLM registers of UART */ + sc->sc_ports[portno].sc_lcr &= ~MCS7840_UART_LCR_DIVISORS; + err = umcs7840_set_UART_reg_sync(sc, portno, MCS7840_UART_REG_LCR, sc->sc_ports[portno].sc_lcr); + if (err) + return (err); + return (0); +} + +/* Maximum speeds for standard frequences, when PLL is not used */ +static const uint32_t umcs7840_baudrate_divisors[] = {0, 115200, 230400, 403200, 460800, 806400, 921600, 1572864, 3145728,}; +static const uint8_t umcs7840_baudrate_divisors_len = sizeof(umcs7840_baudrate_divisors) / sizeof(umcs7840_baudrate_divisors[0]); + +static usb_error_t +umcs7840_calc_baudrate(uint32_t rate, uint16_t *divisor, uint8_t *clk) +{ + uint8_t i = 0; + + if (rate > umcs7840_baudrate_divisors[umcs7840_baudrate_divisors_len - 1]) + return (-1); + + for (i = 0; i < umcs7840_baudrate_divisors_len - 1 && + !(rate > umcs7840_baudrate_divisors[i] && rate <= umcs7840_baudrate_divisors[i + 1]); ++i); + *divisor = umcs7840_baudrate_divisors[i + 1] / rate; + /* 0x00 .. 0x70 */ + *clk = i << MCS7840_DEV_SPx_CLOCK_SHIFT; + return (0); +} diff --git a/sys/dev/usb/serial/umcs.h b/sys/dev/usb/serial/umcs.h new file mode 100644 index 0000000..310b4af --- /dev/null +++ b/sys/dev/usb/serial/umcs.h @@ -0,0 +1,644 @@ +/* $FreeBSD$ */ +/*- + * Copyright (c) 2010 Lev Serebryakov <lev@FreeBSD.org>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#ifndef _UMCS7840_H_ +#define _UMCS7840_H_ + +#define UMCS7840_MAX_PORTS 4 + +#define UMCS7840_READ_LENGTH 1 /* bytes */ +#define UMCS7840_CTRL_TIMEOUT 500 /* ms */ + +/* Read/Wrtire registers vendor commands */ +#define MCS7840_RDREQ 0x0d +#define MCS7840_WRREQ 0x0e + +/* Read/Wrtie EEPROM values */ +#define MCS7840_EEPROM_RW_WVALUE 0x0900 + +/* + * All these registers are documented only in full datasheet, + * which can be requested from MosChip tech support. + */ +#define MCS7840_DEV_REG_SP1 0x00 /* Options for for UART 1, R/W */ +#define MCS7840_DEV_REG_CONTROL1 0x01 /* Control bits for UART 1, + * R/W */ +#define MCS7840_DEV_REG_PINPONGHIGH 0x02 /* High bits of ping-pong + * register, R/W */ +#define MCS7840_DEV_REG_PINPONGLOW 0x03 /* Low bits of ping-pong + * register, R/W */ +/* DCRx_1 Registers goes here (see below, they are documented) */ +#define MCS7840_DEV_REG_GPIO 0x07 /* GPIO_0 and GPIO_1 bits, + * undocumented, see notes + * below R/W */ +#define MCS7840_DEV_REG_SP2 0x08 /* Options for for UART 2, R/W */ +#define MCS7840_DEV_REG_CONTROL2 0x09 /* Control bits for UART 2, + * R/W */ +#define MCS7840_DEV_REG_SP3 0x0a /* Options for for UART 3, R/W */ +#define MCS7840_DEV_REG_CONTROL3 0x0b /* Control bits for UART 3, + * R/W */ +#define MCS7840_DEV_REG_SP4 0x0c /* Options for for UART 4, R/W */ +#define MCS7840_DEV_REG_CONTROL4 0x0d /* Control bits for UART 4, + * R/W */ +#define MCS7840_DEV_REG_PLL_DIV_M 0x0e /* Pre-diviedr for PLL, R/W */ +#define MCS7840_DEV_REG_UNKNOWN1 0x0f /* NOT MENTIONED AND NOT USED */ +#define MCS7840_DEV_REG_PLL_DIV_N 0x10 /* Loop divider for PLL, R/W */ +#define MCS7840_DEV_REG_CLOCK_MUX 0x12 /* PLL input clock & Interrupt + * endpoint control, R/W */ +#define MCS7840_DEV_REG_UNKNOWN2 0x11 /* NOT MENTIONED AND NOT USED */ +#define MCS7840_DEV_REG_CLOCK_SELECT12 0x13 /* Clock source for ports 1 & + * 2, R/W */ +#define MCS7840_DEV_REG_CLOCK_SELECT34 0x14 /* Clock source for ports 3 & + * 4, R/W */ +#define MCS7840_DEV_REG_UNKNOWN3 0x15 /* NOT MENTIONED AND NOT USED */ +/* DCRx_2-DCRx_4 Registers goes here (see below, they are documented) */ +#define MCS7840_DEV_REG_UNKNOWN4 0x1f /* NOT MENTIONED AND NOT USED */ +#define MCS7840_DEV_REG_UNKNOWN5 0x20 /* NOT MENTIONED AND NOT USED */ +#define MCS7840_DEV_REG_UNKNOWN6 0x21 /* NOT MENTIONED AND NOT USED */ +#define MCS7840_DEV_REG_UNKNOWN7 0x22 /* NOT MENTIONED AND NOT USED */ +#define MCS7840_DEV_REG_UNKNOWN8 0x23 /* NOT MENTIONED AND NOT USED */ +#define MCS7840_DEV_REG_UNKNOWN9 0x24 /* NOT MENTIONED AND NOT USED */ +#define MCS7840_DEV_REG_UNKNOWNA 0x25 /* NOT MENTIONED AND NOT USED */ +#define MCS7840_DEV_REG_UNKNOWNB 0x26 /* NOT MENTIONED AND NOT USED */ +#define MCS7840_DEV_REG_UNKNOWNC 0x27 /* NOT MENTIONED AND NOT USED */ +#define MCS7840_DEV_REG_UNKNOWND 0x28 /* NOT MENTIONED AND NOT USED */ +#define MCS7840_DEV_REG_UNKNOWNE 0x29 /* NOT MENTIONED AND NOT USED */ +#define MCS7840_DEV_REG_UNKNOWNF 0x2a /* NOT MENTIONED AND NOT USED */ +#define MCS7840_DEV_REG_MODE 0x2b /* Hardware configuration, + * R/Only */ +#define MCS7840_DEV_REG_SP1_ICG 0x2c /* Inter character gap + * configuration for Port 1, + * R/W */ +#define MCS7840_DEV_REG_SP2_ICG 0x2d /* Inter character gap + * configuration for Port 2, + * R/W */ +#define MCS7840_DEV_REG_SP3_ICG 0x2e /* Inter character gap + * configuration for Port 3, + * R/W */ +#define MCS7840_DEV_REG_SP4_ICG 0x2f /* Inter character gap + * configuration for Port 4, + * R/W */ +#define MCS7840_DEV_REG_RX_SAMPLING12 0x30 /* RX sampling for ports 1 & + * 2, R/W */ +#define MCS7840_DEV_REG_RX_SAMPLING34 0x31 /* RX sampling for ports 3 & + * 4, R/W */ +#define MCS7840_DEV_REG_BI_FIFO_STAT1 0x32 /* Bulk-In FIFO Stat for Port + * 1, contains number of + * availiable bytes, R/Only */ +#define MCS7840_DEV_REG_BO_FIFO_STAT1 0x33 /* Bulk-out FIFO Stat for Port + * 1, contains number of + * availiable bytes, R/Only */ +#define MCS7840_DEV_REG_BI_FIFO_STAT2 0x34 /* Bulk-In FIFO Stat for Port + * 2, contains number of + * availiable bytes, R/Only */ +#define MCS7840_DEV_REG_BO_FIFO_STAT2 0x35 /* Bulk-out FIFO Stat for Port + * 2, contains number of + * availiable bytes, R/Only */ +#define MCS7840_DEV_REG_BI_FIFO_STAT3 0x36 /* Bulk-In FIFO Stat for Port + * 3, contains number of + * availiable bytes, R/Only */ +#define MCS7840_DEV_REG_BO_FIFO_STAT3 0x37 /* Bulk-out FIFO Stat for Port + * 3, contains number of + * availiable bytes, R/Only */ +#define MCS7840_DEV_REG_BI_FIFO_STAT4 0x38 /* Bulk-In FIFO Stat for Port + * 4, contains number of + * availiable bytes, R/Only */ +#define MCS7840_DEV_REG_BO_FIFO_STAT4 0x39 /* Bulk-out FIFO Stat for Port + * 4, contains number of + * availiable bytes, R/Only */ +#define MCS7840_DEV_REG_ZERO_PERIOD1 0x3a /* Period between zero out + * frames for Port 1, R/W */ +#define MCS7840_DEV_REG_ZERO_PERIOD2 0x3b /* Period between zero out + * frames for Port 1, R/W */ +#define MCS7840_DEV_REG_ZERO_PERIOD3 0x3c /* Period between zero out + * frames for Port 1, R/W */ +#define MCS7840_DEV_REG_ZERO_PERIOD4 0x3d /* Period between zero out + * frames for Port 1, R/W */ +#define MCS7840_DEV_REG_ZERO_ENABLE 0x3e /* Enable/disable of zero out + * frames, R/W */ +#define MCS7840_DEV_REG_THR_VAL_LOW1 0x3f /* Low 8 bits of threshhold + * value for Bulk-Out for Port + * 1, R/W */ +#define MCS7840_DEV_REG_THR_VAL_HIGH1 0x40 /* High 1 bit of threshhold + * value for Bulk-Out and + * enable flag for Port 1, R/W */ +#define MCS7840_DEV_REG_THR_VAL_LOW2 0x41 /* Low 8 bits of threshhold + * value for Bulk-Out for Port + * 2, R/W */ +#define MCS7840_DEV_REG_THR_VAL_HIGH2 0x42 /* High 1 bit of threshhold + * value for Bulk-Out and + * enable flag for Port 2, R/W */ +#define MCS7840_DEV_REG_THR_VAL_LOW3 0x43 /* Low 8 bits of threshhold + * value for Bulk-Out for Port + * 3, R/W */ +#define MCS7840_DEV_REG_THR_VAL_HIGH3 0x44 /* High 1 bit of threshhold + * value for Bulk-Out and + * enable flag for Port 3, R/W */ +#define MCS7840_DEV_REG_THR_VAL_LOW4 0x45 /* Low 8 bits of threshhold + * value for Bulk-Out for Port + * 4, R/W */ +#define MCS7840_DEV_REG_THR_VAL_HIGH4 0x46 /* High 1 bit of threshhold + * value for Bulk-Out and + * enable flag for Port 4, R/W */ + +/* Bits for SPx registers */ +#define MCS7840_DEV_SPx_LOOP_PIPES 0x01 /* Loop Bulk-Out FIFO to the + * Bulk-In FIFO, default = 0 */ +#define MCS7840_DEV_SPx_SKIP_ERR_DATA 0x02 /* Drop data bytes from UART, + * which were recevied with + * errors, default = 0 */ +#define MCS7840_DEV_SPx_RESET_OUT_FIFO 0x04 /* Reset Bulk-Out FIFO */ +#define MCS7840_DEV_SPx_RESET_IN_FIFO 0x08 /* Reset Bulk-In FIFO */ +#define MCS7840_DEV_SPx_CLOCK_MASK 0x70 /* Mask to extract Baud CLK + * source */ +#define MCS7840_DEV_SPx_CLOCK_X1 0x00 /* CLK = 1.8432Mhz, max speed + * = 115200 bps, default */ +#define MCS7840_DEV_SPx_CLOCK_X2 0x10 /* CLK = 3.6864Mhz, max speed + * = 230400 bps */ +#define MCS7840_DEV_SPx_CLOCK_X35 0x20 /* CLK = 6.4512Mhz, max speed + * = 403200 bps */ +#define MCS7840_DEV_SPx_CLOCK_X4 0x30 /* CLK = 7.3728Mhz, max speed + * = 460800 bps */ +#define MCS7840_DEV_SPx_CLOCK_X7 0x40 /* CLK = 12.9024Mhz, max speed + * = 806400 bps */ +#define MCS7840_DEV_SPx_CLOCK_X8 0x50 /* CLK = 14.7456Mhz, max speed + * = 921600 bps */ +#define MCS7840_DEV_SPx_CLOCK_24MHZ 0x60 /* CLK = 24.0000Mhz, max speed + * = 1.5 Mbps */ +#define MCS7840_DEV_SPx_CLOCK_48MHZ 0x70 /* CLK = 48.0000Mhz, max speed + * = 3.0 Mbps */ +#define MCS7840_DEV_SPx_CLOCK_SHIFT 4 /* Value 0..7 can be shifted + * to get clock value */ +#define MCS7840_DEV_SPx_UART_RESET 0x80 /* Reset UART */ + +/* Bits for CONTROLx registers */ +#define MCS7840_DEV_CONTROLx_HWFC 0x01 /* Enable hardware flow + * control (when power + * down? It is unclear + * in documents), + * default = 0 */ +#define MCS7840_DEV_CONTROLx_UNUNSED1 0x02 /* Reserved */ +#define MCS7840_DEV_CONTROLx_CTS_ENABLE 0x04 /* CTS changes are + * translated to MSR, + * default = 0 */ +#define MCS7840_DEV_CONTROLx_UNUSED2 0x08 /* Reserved for ports + * 2,3,4 */ +#define MCS7840_DEV_CONTROL1_DRIVER_DONE 0x08 /* USB enumerating is + * finished, USB + * enumeration memory + * can be used as FIFOs */ +#define MCS7840_DEV_CONTROLx_RX_NEGATE 0x10 /* Negate RX input, + * works for IrDA mode + * only, default = 0 */ +#define MCS7840_DEV_CONTROLx_RX_DISABLE 0x20 /* Disable RX logic, + * works only for + * RS-232/RS-485 mode, + * default = 0 */ +#define MCS7840_DEV_CONTROLx_FSM_CONTROL 0x40 /* Disable RX FSM when + * TX is in progress, + * works for IrDA mode + * only, default = 0 */ +#define MCS7840_DEV_CONTROLx_UNUSED3 0x80 /* Reserved */ + +/* + * Bits for PINPONGx registers + * These registers control how often two input buffers + * for Bulk-In FIFOs are swapped. One of buffers is used + * for USB trnasfer, other for receiving data from UART. + * Exact meaning of 15 bit value in these registers is unknown + */ +#define MCS7840_DEV_PINPONGHIGH_MULT 128 /* Only 7 bits in PINPONGLOW + * register */ +#define MCS7840_DEV_PINPONGLOW_BITS 7 /* Only 7 bits in PINPONGLOW + * register */ + +/* + * THIS ONE IS UNDOCUMENTED IN FULL DATASHEET, but e-mail from tech support + * confirms, that it is register for GPIO_0 and GPIO_1 data input/output. + * Chips has 2 GPIO, but first one (lower bit) MUST be used by device + * authors as "number of port" indicator, grounded (0) for two-port + * devices and pulled-up to 1 for 4-port devices. + */ +#define MCS7840_DEV_GPIO_4PORTS 0x01 /* Device has 4 ports + * configured */ +#define MCS7840_DEV_GPIO_GPIO_0 0x01 /* The same as above */ +#define MCS7840_DEV_GPIO_GPIO_1 0x02 /* GPIO_1 data */ + +/* + * Constants for PLL dividers + * Ouptut frequency of PLL is: + * Fout = (N/M) * Fin. + * Default PLL input frequency Fin is 12Mhz (on-chip). + */ +#define MCS7840_DEV_PLL_DIV_M_BITS 6 /* Number of useful bits for M + * divider */ +#define MCS7840_DEV_PLL_DIV_M_MASK 0x3f /* Mask for M divider */ +#define MCS7840_DEV_PLL_DIV_M_MIN 1 /* Minimum value for M, 0 is + * forbidden */ +#define MCS7840_DEV_PLL_DIV_M_DEF 1 /* Default value for M */ +#define MCS7840_DEV_PLL_DIV_M_MAX 63 /* Maximum value for M */ +#define MCS7840_DEV_PLL_DIV_N_BITS 6 /* Number of useful bits for N + * divider */ +#define MCS7840_DEV_PLL_DIV_N_MASK 0x3f /* Mask for N divider */ +#define MCS7840_DEV_PLL_DIV_N_MIN 1 /* Minimum value for N, 0 is + * forbidden */ +#define MCS7840_DEV_PLL_DIV_N_DEF 8 /* Default value for N */ +#define MCS7840_DEV_PLL_DIV_N_MAX 63 /* Maximum value for N */ + +/* Bits for CLOCK_MUX register */ +#define MCS7840_DEV_CLOCK_MUX_INPUTMASK 0x03 /* Mask to extract PLL clock + * input */ +#define MCS7840_DEV_CLOCK_MUX_IN12MHZ 0x00 /* 12Mhz PLL input, default */ +#define MCS7840_DEV_CLOCK_MUX_INEXTRN 0x01 /* External (device-depended) + * PLL input */ +#define MCS7840_DEV_CLOCK_MUX_INRSV1 0x02 /* Reserved */ +#define MCS7840_DEV_CLOCK_MUX_INRSV2 0x03 /* Reserved */ +#define MCS7840_DEV_CLOCK_MUX_PLLHIGH 0x04 /* 0 = PLL Output is + * 20MHz-100MHz (default), 1 = + * 100MHz-300MHz range */ +#define MCS7840_DEV_CLOCK_MUX_INTRFIFOS 0x08 /* Enable additional 8 bytes + * fro Interrupt USB pipe with + * USB FIFOs statuses, default + * = 0 */ +#define MCS7840_DEV_CLOCK_MUX_RESERVED1 0x10 /* Unused */ +#define MCS7840_DEV_CLOCK_MUX_RESERVED2 0x20 /* Unused */ +#define MCS7840_DEV_CLOCK_MUX_RESERVED3 0x40 /* Unused */ +#define MCS7840_DEV_CLOCK_MUX_RESERVED4 0x80 /* Unused */ + +/* Bits for CLOCK_SELECTxx registers */ +#define MCS7840_DEV_CLOCK_SELECT1_MASK 0x07 /* Bits for port 1 in + * CLOCK_SELECT12 */ +#define MCS7840_DEV_CLOCK_SELECT1_SHIFT 0 /* Shift for port 1in + * CLOCK_SELECT12 */ +#define MCS7840_DEV_CLOCK_SELECT2_MASK 0x38 /* Bits for port 2 in + * CLOCK_SELECT12 */ +#define MCS7840_DEV_CLOCK_SELECT2_SHIFT 3 /* Shift for port 2 in + * CLOCK_SELECT12 */ +#define MCS7840_DEV_CLOCK_SELECT3_MASK 0x07 /* Bits for port 3 in + * CLOCK_SELECT23 */ +#define MCS7840_DEV_CLOCK_SELECT3_SHIFT 0 /* Shift for port 3 in + * CLOCK_SELECT23 */ +#define MCS7840_DEV_CLOCK_SELECT4_MASK 0x38 /* Bits for port 4 in + * CLOCK_SELECT23 */ +#define MCS7840_DEV_CLOCK_SELECT4_SHIFT 3 /* Shift for port 4 in + * CLOCK_SELECT23 */ +#define MCS7840_DEV_CLOCK_SELECT_STD 0x00 /* STANDARD baudrate derived + * from 96Mhz, default for all + * ports */ +#define MCS7840_DEV_CLOCK_SELECT_30MHZ 0x01 /* 30Mhz */ +#define MCS7840_DEV_CLOCK_SELECT_96MHZ 0x02 /* 96Mhz direct */ +#define MCS7840_DEV_CLOCK_SELECT_120MHZ 0x03 /* 120Mhz */ +#define MCS7840_DEV_CLOCK_SELECT_PLL 0x04 /* PLL output (see for M and N + * dividers) */ +#define MCS7840_DEV_CLOCK_SELECT_EXT 0x05 /* External clock input + * (device-dependend) */ +#define MCS7840_DEV_CLOCK_SELECT_RES1 0x06 /* Unused */ +#define MCS7840_DEV_CLOCK_SELECT_RES2 0x07 /* Unused */ + +/* Bits for MODE register */ +#define MCS7840_DEV_MODE_RESERVED1 0x01 /* Unused */ +#define MCS7840_DEV_MODE_RESET 0x02 /* 0: RESET = Active High + * (default), 1: Reserved (?) */ +#define MCS7840_DEV_MODE_SER_PRSNT 0x04 /* 0: Reserved, 1: Do not use + * hardocded values (default) + * (?) */ +#define MCS7840_DEV_MODE_PLLBYPASS 0x08 /* 1: PLL output is bypassed, + * default = 0 */ +#define MCS7840_DEV_MODE_PORBYPASS 0x10 /* 1: Power-On Reset is + * bypassed, default = 0 */ +#define MCS7840_DEV_MODE_SELECT24S 0x20 /* 0: 4 Serial Ports / IrDA + * active, 1: 2 Serial Ports / + * IrDA active */ +#define MCS7840_DEV_MODE_EEPROMWR 0x40 /* EEPROM write is enabled, + * default */ +#define MCS7840_DEV_MODE_IRDA 0x80 /* IrDA mode is activated + * (could be turned on), + * default */ + +/* Bits for SPx ICG */ +#define MCS7840_DEV_SPx_ICG_DEF 0x24 /* All 8 bits is used as + * number of BAUD clocks of + * pause */ + +/* + * Bits for RX_SAMPLINGxx registers + * These registers control when bit value will be sampled within + * the baud period. + * 0 is very beginning of period, 15 is very end, 7 is the middle. + */ +#define MCS7840_DEV_RX_SAMPLING1_MASK 0x0f /* Bits for port 1 in + * RX_SAMPLING12 */ +#define MCS7840_DEV_RX_SAMPLING1_SHIFT 0 /* Shift for port 1in + * RX_SAMPLING12 */ +#define MCS7840_DEV_RX_SAMPLING2_MASK 0xf0 /* Bits for port 2 in + * RX_SAMPLING12 */ +#define MCS7840_DEV_RX_SAMPLING2_SHIFT 4 /* Shift for port 2 in + * RX_SAMPLING12 */ +#define MCS7840_DEV_RX_SAMPLING3_MASK 0x0f /* Bits for port 3 in + * RX_SAMPLING23 */ +#define MCS7840_DEV_RX_SAMPLING3_SHIFT 0 /* Shift for port 3 in + * RX_SAMPLING23 */ +#define MCS7840_DEV_RX_SAMPLING4_MASK 0xf0 /* Bits for port 4 in + * RX_SAMPLING23 */ +#define MCS7840_DEV_RX_SAMPLING4_SHIFT 4 /* Shift for port 4 in + * RX_SAMPLING23 */ +#define MCS7840_DEV_RX_SAMPLINGx_MIN 0 /* Max for any RX Sampling */ +#define MCS7840_DEV_RX_SAMPLINGx_DEF 7 /* Default for any RX + * Sampling, center of period */ +#define MCS7840_DEV_RX_SAMPLINGx_MAX 15 /* Min for any RX Sampling */ + +/* Bits for ZERO_PERIODx */ +#define MCS7840_DEV_ZERO_PERIODx_DEF 20 /* Number of Bulk-in requests + * befor sending zero-sized + * reply */ + +/* Bits for ZERO_ENABLE */ +#define MCS7840_DEV_ZERO_ENABLE_PORT1 0x01 /* Enable of sending + * zero-sized replies for port + * 1, default */ +#define MCS7840_DEV_ZERO_ENABLE_PORT2 0x02 /* Enable of sending + * zero-sized replies for port + * 2, default */ +#define MCS7840_DEV_ZERO_ENABLE_PORT3 0x04 /* Enable of sending + * zero-sized replies for port + * 3, default */ +#define MCS7840_DEV_ZERO_ENABLE_PORT4 0x08 /* Enable of sending + * zero-sized replies for port + * 4, default */ + +/* Bits for THR_VAL_HIGHx */ +#define MCS7840_DEV_THR_VAL_HIGH_MASK 0x01 /* Only one bit is used */ +#define MCS7840_DEV_THR_VAL_HIGH_MUL 256 /* This one bit is means "256" */ +#define MCS7840_DEV_THR_VAL_HIGH_SHIFT 8 /* This one bit is means "256" */ +#define MCS7840_DEV_THR_VAL_HIGH_ENABLE 0x80 /* Enable threshold */ + +/* These are documented in "public" datasheet */ +#define MCS7840_DEV_REG_DCR0_1 0x04 /* Device contol register 0 for Port + * 1, R/W */ +#define MCS7840_DEV_REG_DCR1_1 0x05 /* Device contol register 1 for Port + * 1, R/W */ +#define MCS7840_DEV_REG_DCR2_1 0x06 /* Device contol register 2 for Port + * 1, R/W */ +#define MCS7840_DEV_REG_DCR0_2 0x16 /* Device contol register 0 for Port + * 2, R/W */ +#define MCS7840_DEV_REG_DCR1_2 0x17 /* Device contol register 1 for Port + * 2, R/W */ +#define MCS7840_DEV_REG_DCR2_2 0x18 /* Device contol register 2 for Port + * 2, R/W */ +#define MCS7840_DEV_REG_DCR0_3 0x19 /* Device contol register 0 for Port + * 3, R/W */ +#define MCS7840_DEV_REG_DCR1_3 0x1a /* Device contol register 1 for Port + * 3, R/W */ +#define MCS7840_DEV_REG_DCR2_3 0x1b /* Device contol register 2 for Port + * 3, R/W */ +#define MCS7840_DEV_REG_DCR0_4 0x1c /* Device contol register 0 for Port + * 4, R/W */ +#define MCS7840_DEV_REG_DCR1_4 0x1d /* Device contol register 1 for Port + * 4, R/W */ +#define MCS7840_DEV_REG_DCR2_4 0x1e /* Device contol register 2 for Port + * 4, R/W */ + +/* Bits of DCR0 registers, documented in datasheet */ +#define MCS7840_DEV_DCR0_PWRSAVE 0x01 /* Shutdown transiver + * when USB Suspend is + * engaged, default = 1 */ +#define MCS7840_DEV_DCR0_RESERVED1 0x02 /* Unused */ +#define MCS7840_DEV_DCR0_GPIO_MODE_MASK 0x0c /* GPIO Mode bits, WORKS + * ONLY FOR PORT 1 */ +#define MCS7840_DEV_DCR0_GPIO_MODE_IN 0x00 /* GPIO Mode - Input + * (0b00), WORKS ONLY + * FOR PORT 1 */ +#define MCS7840_DEV_DCR0_GPIO_MODE_OUT 0x08 /* GPIO Mode - Input + * (0b10), WORKS ONLY + * FOR PORT 1 */ +#define MCS7840_DEV_DCR0_RTS_ACTIVE_HIGH 0x10 /* RTS Active is HIGH, + * default = 0 (low) */ +#define MCS7840_DEV_DCR0_RTS_AUTO 0x20 /* RTS is controlled by + * state of TX buffer, + * default = 0 + * (controlled by MCR) */ +#define MCS7840_DEV_DCR0_IRDA 0x40 /* IrDA mode */ +#define MCS7840_DEV_DCR0_RESERVED2 0x80 /* Unused */ + +/* Bits of DCR1 registers, documented in datasheet */ +#define MCS7840_DEV_DCR1_GPIO_CURRENT_MASK 0x03 /* Mask to extract GPIO + * current value, WORKS + * ONLY FOR PORT 1 */ +#define MCS7840_DEV_DCR1_GPIO_CURRENT_6MA 0x00 /* GPIO output current + * 6mA, WORKS ONLY FOR + * PORT 1 */ +#define MCS7840_DEV_DCR1_GPIO_CURRENT_8MA 0x01 /* GPIO output current + * 8mA, defauilt, WORKS + * ONLY FOR PORT 1 */ +#define MCS7840_DEV_DCR1_GPIO_CURRENT_10MA 0x02 /* GPIO output current + * 10mA, WORKS ONLY FOR + * PORT 1 */ +#define MCS7840_DEV_DCR1_GPIO_CURRENT_12MA 0x03 /* GPIO output current + * 12mA, WORKS ONLY FOR + * PORT 1 */ +#define MCS7840_DEV_DCR1_UART_CURRENT_MASK 0x0c /* Mask to extract UART + * signals current value */ +#define MCS7840_DEV_DCR1_UART_CURRENT_6MA 0x00 /* UART output current + * 6mA */ +#define MCS7840_DEV_DCR1_UART_CURRENT_8MA 0x04 /* UART output current + * 8mA, defauilt */ +#define MCS7840_DEV_DCR1_UART_CURRENT_10MA 0x08 /* UART output current + * 10mA */ +#define MCS7840_DEV_DCR1_UART_CURRENT_12MA 0x0c /* UART output current + * 12mA */ +#define MCS7840_DEV_DCR1_WAKEUP_DISABLE 0x10 /* Disable Remote USB + * Wakeup */ +#define MCS7840_DEV_DCR1_PLLPWRDOWN_DISABLE 0x20 /* Disable PLL power + * down when not needed, + * WORKS ONLY FOR PORT 1 */ +#define MCS7840_DEV_DCR1_LONG_INTERRUPT 0x40 /* Enable 13 bytes of + * interrupt data, with + * FIFO statistics, + * WORKS ONLY FOR PORT 1 */ +#define MCS7840_DEV_DCR1_RESERVED1 0x80 /* Unused */ + +/* + * Bits of DCR2 registers, documented in datasheet + * Wakeup will work only if DCR0_IRDA = 0 (RS-xxx mode) and + * DCR1_WAKEUP_DISABLE = 0 (wakeup enabled). + */ +#define MCS7840_DEV_DCR2_WAKEUP_CTS 0x01 /* Wakeup on CTS change, + * default = 0 */ +#define MCS7840_DEV_DCR2_WAKEUP_DCD 0x02 /* Wakeup on DCD change, + * default = 0 */ +#define MCS7840_DEV_DCR2_WAKEUP_RI 0x04 /* Wakeup on RI change, + * default = 1 */ +#define MCS7840_DEV_DCR2_WAKEUP_DSR 0x08 /* Wakeup on DSR change, + * default = 0 */ +#define MCS7840_DEV_DCR2_WAKEUP_RXD 0x10 /* Wakeup on RX Data change, + * default = 0 */ +#define MCS7840_DEV_DCR2_WAKEUP_RESUME 0x20 /* Wakeup issues RESUME + * signal, DISCONNECT + * otherwise, default = 1 */ +#define MCS7840_DEV_DCR2_RESERVED1 0x40 /* Unused */ +#define MCS7840_DEV_DCR2_SHDN_POLARITY 0x80 /* 0: Pin 12 Active Low, 1: + * Pin 12 Active High, default + * = 0 */ + +/* Interrupt endpoint bytes & bits */ +#define MCS7840_IEP_FIFO_STATUS_INDEX 5 +/* + * Thesse can be calculated as "1 << portnumber" for Bulk-out and + * "1 << (portnumber+1)" for Bulk-in + */ +#define MCS7840_IEP_BO_PORT1_HASDATA 0x01 +#define MCS7840_IEP_BI_PORT1_HASDATA 0x02 +#define MCS7840_IEP_BO_PORT2_HASDATA 0x04 +#define MCS7840_IEP_BI_PORT2_HASDATA 0x08 +#define MCS7840_IEP_BO_PORT3_HASDATA 0x10 +#define MCS7840_IEP_BI_PORT3_HASDATA 0x20 +#define MCS7840_IEP_BO_PORT4_HASDATA 0x40 +#define MCS7840_IEP_BI_PORT4_HASDATA 0x80 + +/* Documented UART registers (fully compatible with 16550 UART) */ +#define MCS7840_UART_REG_THR 0x00 /* Transmitter Holding + * Register W/Only */ +#define MCS7840_UART_REG_RHR 0x00 /* Receiver Holding Register + * R/Only */ +#define MCS7840_UART_REG_IER 0x01 /* Interrupt enable register - + * R/W */ +#define MCS7840_UART_REG_FCR 0x02 /* FIFO Control register - + * W/Only */ +#define MCS7840_UART_REG_ISR 0x02 /* Interrupt Status Registter + * R/Only */ +#define MCS7840_UART_REG_LCR 0x03 /* Line control register R/W */ +#define MCS7840_UART_REG_MCR 0x04 /* Modem control register R/W */ +#define MCS7840_UART_REG_LSR 0x05 /* Line status register R/Only */ +#define MCS7840_UART_REG_MSR 0x06 /* Modem status register + * R/Only */ +#define MCS7840_UART_REG_SCRATCHPAD 0x07 /* Scratch pad register */ + +#define MCS7840_UART_REG_DLL 0x00 /* Low bits of BAUD divider */ +#define MCS7840_UART_REG_DLM 0x01 /* High bits of BAUD divider */ + +/* IER bits */ +#define MCS7840_UART_IER_RXREADY 0x01 /* RX Ready interrumpt mask */ +#define MCS7840_UART_IER_TXREADY 0x02 /* TX Ready interrumpt mask */ +#define MCS7840_UART_IER_RXSTAT 0x04 /* RX Status interrumpt mask */ +#define MCS7840_UART_IER_MODEM 0x08 /* Modem status change + * interrumpt mask */ +#define MCS7840_UART_IER_SLEEP 0x10 /* SLEEP enable */ + +/* FCR bits */ +#define MCS7840_UART_FCR_ENABLE 0x01 /* Enable FIFO */ +#define MCS7840_UART_FCR_FLUSHRHR 0x02 /* Flush RHR and FIFO */ +#define MCS7840_UART_FCR_FLUSHTHR 0x04 /* Flush THR and FIFO */ +#define MCS7840_UART_FCR_RTLMASK 0xa0 /* Mask to select RHR + * Interrupt Trigger level */ +#define MCS7840_UART_FCR_RTL_1_1 0x00 /* L1 = 1, L2 = 1 */ +#define MCS7840_UART_FCR_RTL_1_4 0x40 /* L1 = 1, L2 = 4 */ +#define MCS7840_UART_FCR_RTL_1_8 0x80 /* L1 = 1, L2 = 8 */ +#define MCS7840_UART_FCR_RTL_1_14 0xa0 /* L1 = 1, L2 = 14 */ + +/* ISR bits */ +#define MCS7840_UART_ISR_NOPENDING 0x01 /* No interrupt pending */ +#define MCS7840_UART_ISR_INTMASK 0x3f /* Mask to select interrupt + * source */ +#define MCS7840_UART_ISR_RXERR 0x06 /* Recevir error */ +#define MCS7840_UART_ISR_RXHASDATA 0x04 /* Recevier has data */ +#define MCS7840_UART_ISR_RXTIMEOUT 0x0c /* Recevier timeout */ +#define MCS7840_UART_ISR_TXEMPTY 0x02 /* Transmitter empty */ +#define MCS7840_UART_ISR_MSCHANGE 0x00 /* Modem status change */ + +/* LCR bits */ +#define MCS7840_UART_LCR_DATALENMASK 0x03 /* Mask for data length */ +#define MCS7840_UART_LCR_DATALEN5 0x00 /* 5 data bits */ +#define MCS7840_UART_LCR_DATALEN6 0x01 /* 6 data bits */ +#define MCS7840_UART_LCR_DATALEN7 0x02 /* 7 data bits */ +#define MCS7840_UART_LCR_DATALEN8 0x03 /* 8 data bits */ + +#define MCS7840_UART_LCR_STOPBMASK 0x04 /* Mask for stop bits */ +#define MCS7840_UART_LCR_STOPB1 0x00 /* 1 stop bit in any case */ +#define MCS7840_UART_LCR_STOPB2 0x04 /* 1.5-2 stop bits depends on + * data length */ + +#define MCS7840_UART_LCR_PARITYMASK 0x38 /* Mask for all parity data */ +#define MCS7840_UART_LCR_PARITYON 0x08 /* Parity ON/OFF - ON */ +#define MCS7840_UART_LCR_PARITYODD 0x00 /* Parity Odd */ +#define MCS7840_UART_LCR_PARITYEVEN 0x10 /* Parity Even */ +#define MCS7840_UART_LCR_PARITYODD 0x00 /* Parity Odd */ +#define MCS7840_UART_LCR_PARITYFORCE 0x20 /* Force parity odd/even */ + +#define MCS7840_UART_LCR_BREAK 0x40 /* Send BREAK */ +#define MCS7840_UART_LCR_DIVISORS 0x80 /* Map DLL/DLM instead of + * xHR/IER */ + +/* LSR bits */ +#define MCS7840_UART_LSR_RHRAVAIL 0x01 /* Data available for read */ +#define MCS7840_UART_LSR_RHROVERRUN 0x02 /* Data FIFO/register overflow */ +#define MCS7840_UART_LSR_PARITYERR 0x04 /* Parity error */ +#define MCS7840_UART_LSR_FRAMEERR 0x10 /* Framing error */ +#define MCS7840_UART_LSR_BREAKERR 0x20 /* BREAK sigmal received */ +#define MCS7840_UART_LSR_THREMPTY 0x40 /* THR register is empty, + * ready for transmit */ +#define MCS7840_UART_LSR_HASERR 0x80 /* Has error in receiver FIFO */ + +/* MCR bits */ +#define MCS7840_UART_MCR_DTR 0x01 /* Force DTR to be active + * (low) */ +#define MCS7840_UART_MCR_RTS 0x02 /* Force RTS to be active + * (low) */ +#define MCS7840_UART_MCR_IE 0x04 /* Enable interrupts (from + * code, not documented) */ +#define MCS7840_UART_MCR_LOOPBACK 0x10 /* Enable local loopback test + * mode */ +#define MCS7840_UART_MCR_CTSRTS 0x20 /* Enable CTS/RTS flow control + * in 550 (FIFO) mode */ +#define MCS7840_UART_MCR_DTRDSR 0x40 /* Enable DTR/DSR flow control + * in 550 (FIFO) mode */ +#define MCS7840_UART_MCR_DCD 0x80 /* Enable DCD flow control in + * 550 (FIFO) mode */ + +/* MSR bits */ +#define MCS7840_UART_MSR_DELTACTS 0x01 /* CTS was changed since last + * read */ +#define MCS7840_UART_MSR_DELTADSR 0x02 /* DSR was changed since last + * read */ +#define MCS7840_UART_MSR_DELTARI 0x04 /* RI was changed from low to + * high since last read */ +#define MCS7840_UART_MSR_DELTADCD 0x08 /* DCD was changed since last + * read */ +#define MCS7840_UART_MSR_NEGCTS 0x10 /* Negated CTS signal */ +#define MCS7840_UART_MSR_NEGDSR 0x20 /* Negated DSR signal */ +#define MCS7840_UART_MSR_NEGRI 0x40 /* Negated RI signal */ +#define MCS7840_UART_MSR_NEGDCD 0x80 /* Negated DCD signal */ + +/* SCRATCHPAD bits */ +#define MCS7840_UART_SCRATCHPAD_RS232 0x00 /* RS-485 disabled */ +#define MCS7840_UART_SCRATCHPAD_RS485_DTRRX 0x80 /* RS-485 mode, DTR High + * = RX */ +#define MCS7840_UART_SCRATCHPAD_RS485_DTRTX 0xc0 /* RS-485 mode, DTR High + * = TX */ + +#define MCS7840_CONFIG_INDEX 0 +#define MCS7840_IFACE_INDEX 0 + +#endif diff --git a/sys/dev/usb/usb_device.h b/sys/dev/usb/usb_device.h index c8bc5eb..bf41221 100644 --- a/sys/dev/usb/usb_device.h +++ b/sys/dev/usb/usb_device.h @@ -187,6 +187,8 @@ struct usb_device { struct usb_host_endpoint *linux_endpoint_end; uint16_t devnum; #endif + + uint32_t clear_stall_errors; /* number of clear-stall failures */ }; /* globals */ diff --git a/sys/dev/usb/usb_freebsd.h b/sys/dev/usb/usb_freebsd.h index a44e530..ae69cdb 100644 --- a/sys/dev/usb/usb_freebsd.h +++ b/sys/dev/usb/usb_freebsd.h @@ -66,6 +66,7 @@ #define USB_HUB_MAX_DEPTH 5 #define USB_EP0_BUFSIZE 1024 /* bytes */ +#define USB_CS_RESET_LIMIT 20 /* failures = 20 * 50 ms = 1sec */ typedef uint32_t usb_timeout_t; /* milliseconds */ typedef uint32_t usb_frlength_t; /* bytes */ diff --git a/sys/dev/usb/usb_generic.c b/sys/dev/usb/usb_generic.c index 714ee6f..d62f8f9 100644 --- a/sys/dev/usb/usb_generic.c +++ b/sys/dev/usb/usb_generic.c @@ -966,10 +966,8 @@ ugen_re_enumerate(struct usb_fifo *f) /* ignore any errors */ DPRINTFN(6, "no FIFOs\n"); } - if (udev->re_enumerate_wait == 0) { - udev->re_enumerate_wait = 1; - usb_needs_explore(udev->bus, 0); - } + /* start re-enumeration of device */ + usbd_start_re_enumerate(udev); return (0); } diff --git a/sys/dev/usb/usb_hub.c b/sys/dev/usb/usb_hub.c index ce8a4a5..351b134 100644 --- a/sys/dev/usb/usb_hub.c +++ b/sys/dev/usb/usb_hub.c @@ -242,9 +242,14 @@ uhub_explore_sub(struct uhub_softc *sc, struct usb_port *up) if (child->flags.usb_mode == USB_MODE_HOST) { usbd_enum_lock(child); if (child->re_enumerate_wait) { - err = usbd_set_config_index(child, USB_UNCONFIG_INDEX); - if (err == 0) - err = usbd_req_re_enumerate(child, NULL); + err = usbd_set_config_index(child, + USB_UNCONFIG_INDEX); + if (err != 0) { + DPRINTF("Unconfigure failed: " + "%s: Ignored.\n", + usbd_errstr(err)); + } + err = usbd_req_re_enumerate(child, NULL); if (err == 0) err = usbd_set_config_index(child, 0); if (err == 0) { @@ -2471,3 +2476,19 @@ usbd_filter_power_mode(struct usb_device *udev, uint8_t power_mode) /* use fixed power mode given by hardware driver */ return (temp); } + +/*------------------------------------------------------------------------* + * usbd_start_re_enumerate + * + * This function starts re-enumeration of the given USB device. This + * function does not need to be called BUS-locked. This function does + * not wait until the re-enumeration is completed. + *------------------------------------------------------------------------*/ +void +usbd_start_re_enumerate(struct usb_device *udev) +{ + if (udev->re_enumerate_wait == 0) { + udev->re_enumerate_wait = 1; + usb_needs_explore(udev->bus, 0); + } +} diff --git a/sys/dev/usb/usb_process.c b/sys/dev/usb/usb_process.c index 0509ec2..051ded9 100644 --- a/sys/dev/usb/usb_process.c +++ b/sys/dev/usb/usb_process.c @@ -360,7 +360,12 @@ usb_proc_is_gone(struct usb_process *up) if (up->up_gone) return (1); - mtx_assert(up->up_mtx, MA_OWNED); + /* + * Allow calls when up_mtx is NULL, before the USB process + * structure is initialised. + */ + if (up->up_mtx != NULL) + mtx_assert(up->up_mtx, MA_OWNED); return (0); } diff --git a/sys/dev/usb/usb_request.c b/sys/dev/usb/usb_request.c index c099e71..4358ef4 100644 --- a/sys/dev/usb/usb_request.c +++ b/sys/dev/usb/usb_request.c @@ -238,6 +238,10 @@ usb_do_clear_stall_callback(struct usb_xfer *xfer, usb_error_t error) switch (USB_GET_STATE(xfer)) { case USB_ST_TRANSFERRED: + + /* reset error counter */ + udev->clear_stall_errors = 0; + if (ep == NULL) goto tr_setup; /* device was unconfigured */ if (ep->edesc && @@ -289,8 +293,23 @@ tr_setup: goto tr_setup; default: - if (xfer->error == USB_ERR_CANCELLED) { + if (error == USB_ERR_CANCELLED) break; + + DPRINTF("Clear stall failed.\n"); + if (udev->clear_stall_errors == USB_CS_RESET_LIMIT) + goto tr_setup; + + if (error == USB_ERR_TIMEOUT) { + udev->clear_stall_errors = USB_CS_RESET_LIMIT; + DPRINTF("Trying to re-enumerate.\n"); + usbd_start_re_enumerate(udev); + } else { + udev->clear_stall_errors++; + if (udev->clear_stall_errors == USB_CS_RESET_LIMIT) { + DPRINTF("Trying to re-enumerate.\n"); + usbd_start_re_enumerate(udev); + } } goto tr_setup; } @@ -1936,6 +1955,23 @@ usbd_req_re_enumerate(struct usb_device *udev, struct mtx *mtx) return (USB_ERR_INVAL); } retry: + /* + * Try to reset the High Speed parent HUB of a LOW- or FULL- + * speed device, if any. + */ + if (udev->parent_hs_hub != NULL && + udev->speed != USB_SPEED_HIGH) { + DPRINTF("Trying to reset parent High Speed TT.\n"); + err = usbd_req_reset_tt(udev->parent_hs_hub, NULL, + udev->hs_port_no); + if (err) { + DPRINTF("Resetting parent High " + "Speed TT failed (%s).\n", + usbd_errstr(err)); + } + } + + /* Try to reset the parent HUB port. */ err = usbd_req_reset_port(parent_hub, mtx, udev->port_no); if (err) { DPRINTFN(0, "addr=%d, port reset failed, %s\n", @@ -2033,3 +2069,65 @@ usbd_req_set_device_feature(struct usb_device *udev, struct mtx *mtx, USETW(req.wLength, 0); return (usbd_do_request(udev, mtx, &req, 0)); } + +/*------------------------------------------------------------------------* + * usbd_req_reset_tt + * + * Returns: + * 0: Success + * Else: Failure + *------------------------------------------------------------------------*/ +usb_error_t +usbd_req_reset_tt(struct usb_device *udev, struct mtx *mtx, + uint8_t port) +{ + struct usb_device_request req; + + /* For single TT HUBs the port should be 1 */ + + if (udev->ddesc.bDeviceClass == UDCLASS_HUB && + udev->ddesc.bDeviceProtocol == UDPROTO_HSHUBSTT) + port = 1; + + req.bmRequestType = UT_WRITE_CLASS_OTHER; + req.bRequest = UR_RESET_TT; + USETW(req.wValue, 0); + req.wIndex[0] = port; + req.wIndex[1] = 0; + USETW(req.wLength, 0); + return (usbd_do_request(udev, mtx, &req, 0)); +} + +/*------------------------------------------------------------------------* + * usbd_req_clear_tt_buffer + * + * For single TT HUBs the port should be 1. + * + * Returns: + * 0: Success + * Else: Failure + *------------------------------------------------------------------------*/ +usb_error_t +usbd_req_clear_tt_buffer(struct usb_device *udev, struct mtx *mtx, + uint8_t port, uint8_t addr, uint8_t type, uint8_t endpoint) +{ + struct usb_device_request req; + uint16_t wValue; + + /* For single TT HUBs the port should be 1 */ + + if (udev->ddesc.bDeviceClass == UDCLASS_HUB && + udev->ddesc.bDeviceProtocol == UDPROTO_HSHUBSTT) + port = 1; + + wValue = (endpoint & 0xF) | ((addr & 0x7F) << 4) | + ((endpoint & 0x80) << 8) | ((type & 3) << 12); + + req.bmRequestType = UT_WRITE_CLASS_OTHER; + req.bRequest = UR_CLEAR_TT_BUFFER; + USETW(req.wValue, wValue); + req.wIndex[0] = port; + req.wIndex[1] = 0; + USETW(req.wLength, 0); + return (usbd_do_request(udev, mtx, &req, 0)); +} diff --git a/sys/dev/usb/usb_request.h b/sys/dev/usb/usb_request.h index 12f373d..ac7a7c1 100644 --- a/sys/dev/usb/usb_request.h +++ b/sys/dev/usb/usb_request.h @@ -85,5 +85,9 @@ usb_error_t usbd_req_set_hub_u2_timeout(struct usb_device *udev, struct mtx *mtx, uint8_t port, uint8_t timeout); usb_error_t usbd_req_set_hub_depth(struct usb_device *udev, struct mtx *mtx, uint16_t depth); +usb_error_t usbd_req_reset_tt(struct usb_device *udev, struct mtx *mtx, + uint8_t port); +usb_error_t usbd_req_clear_tt_buffer(struct usb_device *udev, struct mtx *mtx, + uint8_t port, uint8_t addr, uint8_t type, uint8_t endpoint); #endif /* _USB_REQUEST_H_ */ diff --git a/sys/dev/usb/usb_transfer.c b/sys/dev/usb/usb_transfer.c index 5fd4f5a..d4c2408 100644 --- a/sys/dev/usb/usb_transfer.c +++ b/sys/dev/usb/usb_transfer.c @@ -2928,6 +2928,11 @@ repeat: usbd_transfer_unsetup(udev->ctrl_xfer, USB_CTRL_XFER_MAX); /* + * Reset clear stall error counter. + */ + udev->clear_stall_errors = 0; + + /* * Try to setup a new USB transfer for the * default control endpoint: */ diff --git a/sys/dev/usb/usbdevs b/sys/dev/usb/usbdevs index eb49eb2..a5f4cbb 100644 --- a/sys/dev/usb/usbdevs +++ b/sys/dev/usb/usbdevs @@ -2014,6 +2014,7 @@ product KODAK DC280 0x0130 Digital Science DC280 /* Kontron AG products */ product KONTRON DM9601 0x8101 USB Ethernet +product KONTRON JP1082 0x9700 USB Ethernet /* Konica Corp. Products */ product KONICA CAMERA 0x0720 Digital Color Camera @@ -2273,7 +2274,9 @@ product MOBILITY EASIDOCK 0x0304 EasiDock Ethernet /* MosChip products */ product MOSCHIP MCS7703 0x7703 MCS7703 Serial Port Adapter product MOSCHIP MCS7730 0x7730 MCS7730 Ethernet +product MOSCHIP MCS7820 0x7820 MCS7820 Serial Port Adapter product MOSCHIP MCS7830 0x7830 MCS7830 Ethernet +product MOSCHIP MCS7840 0x7840 MCS7840 Serial Port Adapter /* Motorola products */ product MOTOROLA MC141555 0x1555 MC141555 hub controller diff --git a/sys/dev/usb/usbdi.h b/sys/dev/usb/usbdi.h index 8f6da7c..91cd3fa 100644 --- a/sys/dev/usb/usbdi.h +++ b/sys/dev/usb/usbdi.h @@ -542,6 +542,7 @@ void usbd_m_copy_in(struct usb_page_cache *cache, usb_frlength_t dst_offset, struct mbuf *m, usb_size_t src_offset, usb_frlength_t src_len); void usbd_frame_zero(struct usb_page_cache *cache, usb_frlength_t offset, usb_frlength_t len); +void usbd_start_re_enumerate(struct usb_device *udev); int usb_fifo_attach(struct usb_device *udev, void *priv_sc, struct mtx *priv_mtx, struct usb_fifo_methods *pm, diff --git a/sys/dev/wpi/if_wpi.c b/sys/dev/wpi/if_wpi.c index e1fffe1..38ebb7e 100644 --- a/sys/dev/wpi/if_wpi.c +++ b/sys/dev/wpi/if_wpi.c @@ -273,6 +273,8 @@ static devclass_t wpi_devclass; DRIVER_MODULE(wpi, pci, wpi_driver, wpi_devclass, 0, 0); +MODULE_VERSION(wpi, 1); + static const uint8_t wpi_ridx_to_plcp[] = { /* OFDM: IEEE Std 802.11a-1999, pp. 14 Table 80 */ /* R1-R4 (ral/ural is R4-R1) */ diff --git a/sys/dev/xen/blkback/blkback.c b/sys/dev/xen/blkback/blkback.c index 458149d..e52c342 100644 --- a/sys/dev/xen/blkback/blkback.c +++ b/sys/dev/xen/blkback/blkback.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2009-2010 Spectra Logic Corporation + * Copyright (c) 2009-2011 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -61,6 +61,8 @@ __FBSDID("$FreeBSD$"); #include <sys/types.h> #include <sys/vnode.h> #include <sys/mount.h> +#include <sys/sysctl.h> +#include <sys/bitstring.h> #include <geom/geom.h> @@ -153,9 +155,19 @@ MALLOC_DEFINE(M_XENBLOCKBACK, "xbbd", "Xen Block Back Driver Data"); #define XBB_MAX_RING_PAGES \ BLKIF_RING_PAGES(BLKIF_SEGS_TO_BLOCKS(XBB_MAX_SEGMENTS_PER_REQUEST) \ * XBB_MAX_REQUESTS) +/** + * The maximum number of ring pages that we can allow per request list. + * We limit this to the maximum number of segments per request, because + * that is already a reasonable number of segments to aggregate. This + * number should never be smaller than XBB_MAX_SEGMENTS_PER_REQUEST, + * because that would leave situations where we can't dispatch even one + * large request. + */ +#define XBB_MAX_SEGMENTS_PER_REQLIST XBB_MAX_SEGMENTS_PER_REQUEST /*--------------------------- Forward Declarations ---------------------------*/ struct xbb_softc; +struct xbb_xen_req; static void xbb_attach_failed(struct xbb_softc *xbb, int err, const char *fmt, ...) __attribute__((format(printf, 3, 4))); @@ -163,16 +175,15 @@ static int xbb_shutdown(struct xbb_softc *xbb); static int xbb_detach(device_t dev); /*------------------------------ Data Structures -----------------------------*/ -/** - * \brief Object tracking an in-flight I/O from a Xen VBD consumer. - */ -struct xbb_xen_req { - /** - * Linked list links used to aggregate idle request in the - * request free pool (xbb->request_free_slist). - */ - SLIST_ENTRY(xbb_xen_req) links; +STAILQ_HEAD(xbb_xen_req_list, xbb_xen_req); + +typedef enum { + XBB_REQLIST_NONE = 0x00, + XBB_REQLIST_MAPPED = 0x01 +} xbb_reqlist_flags; + +struct xbb_xen_reqlist { /** * Back reference to the parent block back instance for this * request. Used during bio_done handling. @@ -180,17 +191,71 @@ struct xbb_xen_req { struct xbb_softc *xbb; /** - * The remote domain's identifier for this I/O request. + * BLKIF_OP code for this request. + */ + int operation; + + /** + * Set to BLKIF_RSP_* to indicate request status. + * + * This field allows an error status to be recorded even if the + * delivery of this status must be deferred. Deferred reporting + * is necessary, for example, when an error is detected during + * completion processing of one bio when other bios for this + * request are still outstanding. + */ + int status; + + /** + * Number of 512 byte sectors not transferred. */ - uint64_t id; + int residual_512b_sectors; + + /** + * Starting sector number of the first request in the list. + */ + off_t starting_sector_number; + + /** + * If we're going to coalesce, the next contiguous sector would be + * this one. + */ + off_t next_contig_sector; + + /** + * Number of child requests in the list. + */ + int num_children; + + /** + * Number of I/O requests dispatched to the backend. + */ + int pendcnt; + + /** + * Total number of segments for requests in the list. + */ + int nr_segments; + + /** + * Flags for this particular request list. + */ + xbb_reqlist_flags flags; /** * Kernel virtual address space reserved for this request - * structure and used to map the remote domain's pages for + * list structure and used to map the remote domain's pages for * this I/O, into our domain's address space. */ uint8_t *kva; + /** + * Base, psuedo-physical address, corresponding to the start + * of this request's kva region. + */ + uint64_t gnt_base; + + #ifdef XBB_USE_BOUNCE_BUFFERS /** * Pre-allocated domain local memory used to proxy remote @@ -200,53 +265,91 @@ struct xbb_xen_req { #endif /** - * Base, psuedo-physical address, corresponding to the start - * of this request's kva region. + * Array of grant handles (one per page) used to map this request. */ - uint64_t gnt_base; + grant_handle_t *gnt_handles; + + /** + * Device statistics request ordering type (ordered or simple). + */ + devstat_tag_type ds_tag_type; + + /** + * Device statistics request type (read, write, no_data). + */ + devstat_trans_flags ds_trans_type; + + /** + * The start time for this request. + */ + struct bintime ds_t0; + + /** + * Linked list of contiguous requests with the same operation type. + */ + struct xbb_xen_req_list contig_req_list; + + /** + * Linked list links used to aggregate idle requests in the + * request list free pool (xbb->reqlist_free_stailq) and pending + * requests waiting for execution (xbb->reqlist_pending_stailq). + */ + STAILQ_ENTRY(xbb_xen_reqlist) links; +}; + +STAILQ_HEAD(xbb_xen_reqlist_list, xbb_xen_reqlist); + +/** + * \brief Object tracking an in-flight I/O from a Xen VBD consumer. + */ +struct xbb_xen_req { + /** + * Linked list links used to aggregate requests into a reqlist + * and to store them in the request free pool. + */ + STAILQ_ENTRY(xbb_xen_req) links; + + /** + * The remote domain's identifier for this I/O request. + */ + uint64_t id; /** * The number of pages currently mapped for this request. */ - int nr_pages; + int nr_pages; /** * The number of 512 byte sectors comprising this requests. */ - int nr_512b_sectors; + int nr_512b_sectors; /** * The number of struct bio requests still outstanding for this * request on the backend device. This field is only used for * device (rather than file) backed I/O. */ - int pendcnt; + int pendcnt; /** * BLKIF_OP code for this request. */ - int operation; + int operation; /** - * BLKIF_RSP status code for this request. - * - * This field allows an error status to be recorded even if the - * delivery of this status must be deferred. Deferred reporting - * is necessary, for example, when an error is detected during - * completion processing of one bio when other bios for this - * request are still outstanding. + * Storage used for non-native ring requests. */ - int status; + blkif_request_t ring_req_storage; /** - * Device statistics request ordering type (ordered or simple). + * Pointer to the Xen request in the ring. */ - devstat_tag_type ds_tag_type; + blkif_request_t *ring_req; /** - * Device statistics request type (read, write, no_data). + * Consumer index for this request. */ - devstat_trans_flags ds_trans_type; + RING_IDX req_ring_idx; /** * The start time for this request. @@ -254,9 +357,9 @@ struct xbb_xen_req { struct bintime ds_t0; /** - * Array of grant handles (one per page) used to map this request. + * Pointer back to our parent request list. */ - grant_handle_t *gnt_handles; + struct xbb_xen_reqlist *reqlist; }; SLIST_HEAD(xbb_xen_req_slist, xbb_xen_req); @@ -321,7 +424,10 @@ typedef enum XBBF_RESOURCE_SHORTAGE = 0x04, /** Connection teardown in progress. */ - XBBF_SHUTDOWN = 0x08 + XBBF_SHUTDOWN = 0x08, + + /** A thread is already performing shutdown processing. */ + XBBF_IN_SHUTDOWN = 0x10 } xbb_flag_t; /** Backend device type. */ @@ -399,7 +505,7 @@ struct xbb_file_data { * Only a single file based request is outstanding per-xbb instance, * so we only need one of these. */ - struct iovec xiovecs[XBB_MAX_SEGMENTS_PER_REQUEST]; + struct iovec xiovecs[XBB_MAX_SEGMENTS_PER_REQLIST]; #ifdef XBB_USE_BOUNCE_BUFFERS /** @@ -411,7 +517,7 @@ struct xbb_file_data { * bounce-out the read data. This array serves as the temporary * storage for this saved data. */ - struct iovec saved_xiovecs[XBB_MAX_SEGMENTS_PER_REQUEST]; + struct iovec saved_xiovecs[XBB_MAX_SEGMENTS_PER_REQLIST]; /** * \brief Array of memoized bounce buffer kva offsets used @@ -422,7 +528,7 @@ struct xbb_file_data { * the request sg elements is unavoidable. We memoize the computed * bounce address here to reduce the cost of the second walk. */ - void *xiovecs_vaddr[XBB_MAX_SEGMENTS_PER_REQUEST]; + void *xiovecs_vaddr[XBB_MAX_SEGMENTS_PER_REQLIST]; #endif /* XBB_USE_BOUNCE_BUFFERS */ }; @@ -437,9 +543,9 @@ union xbb_backend_data { /** * Function signature of backend specific I/O handlers. */ -typedef int (*xbb_dispatch_t)(struct xbb_softc *xbb, blkif_request_t *ring_req, - struct xbb_xen_req *req, int nseg, - int operation, int flags); +typedef int (*xbb_dispatch_t)(struct xbb_softc *xbb, + struct xbb_xen_reqlist *reqlist, int operation, + int flags); /** * Per-instance configuration data. @@ -467,14 +573,23 @@ struct xbb_softc { xbb_dispatch_t dispatch_io; /** The number of requests outstanding on the backend device/file. */ - u_int active_request_count; + int active_request_count; /** Free pool of request tracking structures. */ - struct xbb_xen_req_slist request_free_slist; + struct xbb_xen_req_list request_free_stailq; /** Array, sized at connection time, of request tracking structures. */ struct xbb_xen_req *requests; + /** Free pool of request list structures. */ + struct xbb_xen_reqlist_list reqlist_free_stailq; + + /** List of pending request lists awaiting execution. */ + struct xbb_xen_reqlist_list reqlist_pending_stailq; + + /** Array, sized at connection time, of request list structures. */ + struct xbb_xen_reqlist *request_lists; + /** * Global pool of kva used for mapping remote domain ring * and I/O transaction data. @@ -487,6 +602,15 @@ struct xbb_softc { /** The size of the global kva pool. */ int kva_size; + /** The size of the KVA area used for request lists. */ + int reqlist_kva_size; + + /** The number of pages of KVA used for request lists */ + int reqlist_kva_pages; + + /** Bitmap of free KVA pages */ + bitstr_t *kva_free; + /** * \brief Cached value of the front-end's domain id. * @@ -508,12 +632,12 @@ struct xbb_softc { int abi; /** - * \brief The maximum number of requests allowed to be in - * flight at a time. + * \brief The maximum number of requests and request lists allowed + * to be in flight at a time. * * This value is negotiated via the XenStore. */ - uint32_t max_requests; + u_int max_requests; /** * \brief The maximum number of segments (1 page per segment) @@ -521,7 +645,15 @@ struct xbb_softc { * * This value is negotiated via the XenStore. */ - uint32_t max_request_segments; + u_int max_request_segments; + + /** + * \brief Maximum number of segments per request list. + * + * This value is derived from and will generally be larger than + * max_request_segments. + */ + u_int max_reqlist_segments; /** * The maximum size of any request to this back-end @@ -529,7 +661,13 @@ struct xbb_softc { * * This value is negotiated via the XenStore. */ - uint32_t max_request_size; + u_int max_request_size; + + /** + * The maximum size of any request list. This is derived directly + * from max_reqlist_segments. + */ + u_int max_reqlist_size; /** Various configuration and state bit flags. */ xbb_flag_t flags; @@ -574,6 +712,7 @@ struct xbb_softc { struct vnode *vn; union xbb_backend_data backend; + /** The native sector size of the backend. */ u_int sector_size; @@ -598,7 +737,14 @@ struct xbb_softc { * * Ring processing is serialized so we only need one of these. */ - struct xbb_sg xbb_sgs[XBB_MAX_SEGMENTS_PER_REQUEST]; + struct xbb_sg xbb_sgs[XBB_MAX_SEGMENTS_PER_REQLIST]; + + /** + * Temporary grant table map used in xbb_dispatch_io(). When + * XBB_MAX_SEGMENTS_PER_REQLIST gets large, keeping this on the + * stack could cause a stack overflow. + */ + struct gnttab_map_grant_ref maps[XBB_MAX_SEGMENTS_PER_REQLIST]; /** Mutex protecting per-instance data. */ struct mtx lock; @@ -614,8 +760,51 @@ struct xbb_softc { int pseudo_phys_res_id; #endif - /** I/O statistics. */ + /** + * I/O statistics from BlockBack dispatch down. These are + * coalesced requests, and we start them right before execution. + */ struct devstat *xbb_stats; + + /** + * I/O statistics coming into BlockBack. These are the requests as + * we get them from BlockFront. They are started as soon as we + * receive a request, and completed when the I/O is complete. + */ + struct devstat *xbb_stats_in; + + /** Disable sending flush to the backend */ + int disable_flush; + + /** Send a real flush for every N flush requests */ + int flush_interval; + + /** Count of flush requests in the interval */ + int flush_count; + + /** Don't coalesce requests if this is set */ + int no_coalesce_reqs; + + /** Number of requests we have received */ + uint64_t reqs_received; + + /** Number of requests we have completed*/ + uint64_t reqs_completed; + + /** How many forced dispatches (i.e. without coalescing) have happend */ + uint64_t forced_dispatch; + + /** How many normal dispatches have happend */ + uint64_t normal_dispatch; + + /** How many total dispatches have happend */ + uint64_t total_dispatch; + + /** How many times we have run out of KVA */ + uint64_t kva_shortages; + + /** How many times we have run out of request structures */ + uint64_t request_shortages; }; /*---------------------------- Request Processing ----------------------------*/ @@ -633,21 +822,14 @@ xbb_get_req(struct xbb_softc *xbb) struct xbb_xen_req *req; req = NULL; - mtx_lock(&xbb->lock); - /* - * Do not allow new requests to be allocated while we - * are shutting down. - */ - if ((xbb->flags & XBBF_SHUTDOWN) == 0) { - if ((req = SLIST_FIRST(&xbb->request_free_slist)) != NULL) { - SLIST_REMOVE_HEAD(&xbb->request_free_slist, links); - xbb->active_request_count++; - } else { - xbb->flags |= XBBF_RESOURCE_SHORTAGE; - } + mtx_assert(&xbb->lock, MA_OWNED); + + if ((req = STAILQ_FIRST(&xbb->request_free_stailq)) != NULL) { + STAILQ_REMOVE_HEAD(&xbb->request_free_stailq, links); + xbb->active_request_count++; } - mtx_unlock(&xbb->lock); + return (req); } @@ -660,34 +842,40 @@ xbb_get_req(struct xbb_softc *xbb) static inline void xbb_release_req(struct xbb_softc *xbb, struct xbb_xen_req *req) { - int wake_thread; + mtx_assert(&xbb->lock, MA_OWNED); - mtx_lock(&xbb->lock); - wake_thread = xbb->flags & XBBF_RESOURCE_SHORTAGE; - xbb->flags &= ~XBBF_RESOURCE_SHORTAGE; - SLIST_INSERT_HEAD(&xbb->request_free_slist, req, links); + STAILQ_INSERT_HEAD(&xbb->request_free_stailq, req, links); xbb->active_request_count--; - if ((xbb->flags & XBBF_SHUTDOWN) != 0) { - /* - * Shutdown is in progress. See if we can - * progress further now that one more request - * has completed and been returned to the - * free pool. - */ - xbb_shutdown(xbb); - } - mtx_unlock(&xbb->lock); + KASSERT(xbb->active_request_count >= 0, + ("xbb_release_req: negative active count")); +} - if (wake_thread != 0) - taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); +/** + * Return an xbb_xen_req_list of allocated xbb_xen_reqs to the free pool. + * + * \param xbb Per-instance xbb configuration structure. + * \param req_list The list of requests to free. + * \param nreqs The number of items in the list. + */ +static inline void +xbb_release_reqs(struct xbb_softc *xbb, struct xbb_xen_req_list *req_list, + int nreqs) +{ + mtx_assert(&xbb->lock, MA_OWNED); + + STAILQ_CONCAT(&xbb->request_free_stailq, req_list); + xbb->active_request_count -= nreqs; + + KASSERT(xbb->active_request_count >= 0, + ("xbb_release_reqs: negative active count")); } /** * Given a page index and 512b sector offset within that page, * calculate an offset into a request's kva region. * - * \param req The request structure whose kva region will be accessed. + * \param reqlist The request structure whose kva region will be accessed. * \param pagenr The page index used to compute the kva offset. * \param sector The 512b sector index used to compute the page relative * kva offset. @@ -695,9 +883,9 @@ xbb_release_req(struct xbb_softc *xbb, struct xbb_xen_req *req) * \return The computed global KVA offset. */ static inline uint8_t * -xbb_req_vaddr(struct xbb_xen_req *req, int pagenr, int sector) +xbb_reqlist_vaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector) { - return (req->kva + (PAGE_SIZE * pagenr) + (sector << 9)); + return (reqlist->kva + (PAGE_SIZE * pagenr) + (sector << 9)); } #ifdef XBB_USE_BOUNCE_BUFFERS @@ -705,7 +893,7 @@ xbb_req_vaddr(struct xbb_xen_req *req, int pagenr, int sector) * Given a page index and 512b sector offset within that page, * calculate an offset into a request's local bounce memory region. * - * \param req The request structure whose bounce region will be accessed. + * \param reqlist The request structure whose bounce region will be accessed. * \param pagenr The page index used to compute the bounce offset. * \param sector The 512b sector index used to compute the page relative * bounce offset. @@ -713,9 +901,9 @@ xbb_req_vaddr(struct xbb_xen_req *req, int pagenr, int sector) * \return The computed global bounce buffer address. */ static inline uint8_t * -xbb_req_bounce_addr(struct xbb_xen_req *req, int pagenr, int sector) +xbb_reqlist_bounce_addr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector) { - return (req->bounce + (PAGE_SIZE * pagenr) + (sector << 9)); + return (reqlist->bounce + (PAGE_SIZE * pagenr) + (sector << 9)); } #endif @@ -724,7 +912,7 @@ xbb_req_bounce_addr(struct xbb_xen_req *req, int pagenr, int sector) * calculate an offset into the request's memory region that the * underlying backend device/file should use for I/O. * - * \param req The request structure whose I/O region will be accessed. + * \param reqlist The request structure whose I/O region will be accessed. * \param pagenr The page index used to compute the I/O offset. * \param sector The 512b sector index used to compute the page relative * I/O offset. @@ -736,12 +924,12 @@ xbb_req_bounce_addr(struct xbb_xen_req *req, int pagenr, int sector) * this request. */ static inline uint8_t * -xbb_req_ioaddr(struct xbb_xen_req *req, int pagenr, int sector) +xbb_reqlist_ioaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector) { #ifdef XBB_USE_BOUNCE_BUFFERS - return (xbb_req_bounce_addr(req, pagenr, sector)); + return (xbb_reqlist_bounce_addr(reqlist, pagenr, sector)); #else - return (xbb_req_vaddr(req, pagenr, sector)); + return (xbb_reqlist_vaddr(reqlist, pagenr, sector)); #endif } @@ -750,7 +938,7 @@ xbb_req_ioaddr(struct xbb_xen_req *req, int pagenr, int sector) * an offset into the local psuedo-physical address space used to map a * front-end's request data into a request. * - * \param req The request structure whose pseudo-physical region + * \param reqlist The request list structure whose pseudo-physical region * will be accessed. * \param pagenr The page index used to compute the pseudo-physical offset. * \param sector The 512b sector index used to compute the page relative @@ -763,10 +951,126 @@ xbb_req_ioaddr(struct xbb_xen_req *req, int pagenr, int sector) * this request. */ static inline uintptr_t -xbb_req_gntaddr(struct xbb_xen_req *req, int pagenr, int sector) +xbb_get_gntaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector) { - return ((uintptr_t)(req->gnt_base - + (PAGE_SIZE * pagenr) + (sector << 9))); + struct xbb_softc *xbb; + + xbb = reqlist->xbb; + + return ((uintptr_t)(xbb->gnt_base_addr + + (uintptr_t)(reqlist->kva - xbb->kva) + + (PAGE_SIZE * pagenr) + (sector << 9))); +} + +/** + * Get Kernel Virtual Address space for mapping requests. + * + * \param xbb Per-instance xbb configuration structure. + * \param nr_pages Number of pages needed. + * \param check_only If set, check for free KVA but don't allocate it. + * \param have_lock If set, xbb lock is already held. + * + * \return On success, a pointer to the allocated KVA region. Otherwise NULL. + * + * Note: This should be unnecessary once we have either chaining or + * scatter/gather support for struct bio. At that point we'll be able to + * put multiple addresses and lengths in one bio/bio chain and won't need + * to map everything into one virtual segment. + */ +static uint8_t * +xbb_get_kva(struct xbb_softc *xbb, int nr_pages) +{ + intptr_t first_clear, num_clear; + uint8_t *free_kva; + int i; + + KASSERT(nr_pages != 0, ("xbb_get_kva of zero length")); + + first_clear = 0; + free_kva = NULL; + + mtx_lock(&xbb->lock); + + /* + * Look for the first available page. If there are none, we're done. + */ + bit_ffc(xbb->kva_free, xbb->reqlist_kva_pages, &first_clear); + + if (first_clear == -1) + goto bailout; + + /* + * Starting at the first available page, look for consecutive free + * pages that will satisfy the user's request. + */ + for (i = first_clear, num_clear = 0; i < xbb->reqlist_kva_pages; i++) { + /* + * If this is true, the page is used, so we have to reset + * the number of clear pages and the first clear page + * (since it pointed to a region with an insufficient number + * of clear pages). + */ + if (bit_test(xbb->kva_free, i)) { + num_clear = 0; + first_clear = -1; + continue; + } + + if (first_clear == -1) + first_clear = i; + + /* + * If this is true, we've found a large enough free region + * to satisfy the request. + */ + if (++num_clear == nr_pages) { + + bit_nset(xbb->kva_free, first_clear, + first_clear + nr_pages - 1); + + free_kva = xbb->kva + + (uint8_t *)(first_clear * PAGE_SIZE); + + KASSERT(free_kva >= (uint8_t *)xbb->kva && + free_kva + (nr_pages * PAGE_SIZE) <= + (uint8_t *)xbb->ring_config.va, + ("Free KVA %p len %d out of range, " + "kva = %#jx, ring VA = %#jx\n", free_kva, + nr_pages * PAGE_SIZE, (uintmax_t)xbb->kva, + (uintmax_t)xbb->ring_config.va)); + break; + } + } + +bailout: + + if (free_kva == NULL) { + xbb->flags |= XBBF_RESOURCE_SHORTAGE; + xbb->kva_shortages++; + } + + mtx_unlock(&xbb->lock); + + return (free_kva); +} + +/** + * Free allocated KVA. + * + * \param xbb Per-instance xbb configuration structure. + * \param kva_ptr Pointer to allocated KVA region. + * \param nr_pages Number of pages in the KVA region. + */ +static void +xbb_free_kva(struct xbb_softc *xbb, uint8_t *kva_ptr, int nr_pages) +{ + intptr_t start_page; + + mtx_assert(&xbb->lock, MA_OWNED); + + start_page = (intptr_t)(kva_ptr - xbb->kva) >> PAGE_SHIFT; + bit_nclear(xbb->kva_free, start_page, start_page + nr_pages - 1); + } /** @@ -775,23 +1079,23 @@ xbb_req_gntaddr(struct xbb_xen_req *req, int pagenr, int sector) * \param req The request structure to unmap. */ static void -xbb_unmap_req(struct xbb_xen_req *req) +xbb_unmap_reqlist(struct xbb_xen_reqlist *reqlist) { - struct gnttab_unmap_grant_ref unmap[XBB_MAX_SEGMENTS_PER_REQUEST]; + struct gnttab_unmap_grant_ref unmap[XBB_MAX_SEGMENTS_PER_REQLIST]; u_int i; u_int invcount; int error; invcount = 0; - for (i = 0; i < req->nr_pages; i++) { + for (i = 0; i < reqlist->nr_segments; i++) { - if (req->gnt_handles[i] == GRANT_REF_INVALID) + if (reqlist->gnt_handles[i] == GRANT_REF_INVALID) continue; - unmap[invcount].host_addr = xbb_req_gntaddr(req, i, 0); + unmap[invcount].host_addr = xbb_get_gntaddr(reqlist, i, 0); unmap[invcount].dev_bus_addr = 0; - unmap[invcount].handle = req->gnt_handles[i]; - req->gnt_handles[i] = GRANT_REF_INVALID; + unmap[invcount].handle = reqlist->gnt_handles[i]; + reqlist->gnt_handles[i] = GRANT_REF_INVALID; invcount++; } @@ -801,6 +1105,175 @@ xbb_unmap_req(struct xbb_xen_req *req) } /** + * Allocate an internal transaction tracking structure from the free pool. + * + * \param xbb Per-instance xbb configuration structure. + * + * \return On success, a pointer to the allocated xbb_xen_reqlist structure. + * Otherwise NULL. + */ +static inline struct xbb_xen_reqlist * +xbb_get_reqlist(struct xbb_softc *xbb) +{ + struct xbb_xen_reqlist *reqlist; + + reqlist = NULL; + + mtx_assert(&xbb->lock, MA_OWNED); + + if ((reqlist = STAILQ_FIRST(&xbb->reqlist_free_stailq)) != NULL) { + + STAILQ_REMOVE_HEAD(&xbb->reqlist_free_stailq, links); + reqlist->flags = XBB_REQLIST_NONE; + reqlist->kva = NULL; + reqlist->status = BLKIF_RSP_OKAY; + reqlist->residual_512b_sectors = 0; + reqlist->num_children = 0; + reqlist->nr_segments = 0; + STAILQ_INIT(&reqlist->contig_req_list); + } + + return (reqlist); +} + +/** + * Return an allocated transaction tracking structure to the free pool. + * + * \param xbb Per-instance xbb configuration structure. + * \param req The request list structure to free. + * \param wakeup If set, wakeup the work thread if freeing this reqlist + * during a resource shortage condition. + */ +static inline void +xbb_release_reqlist(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist, + int wakeup) +{ + + mtx_lock(&xbb->lock); + + if (wakeup) { + wakeup = xbb->flags & XBBF_RESOURCE_SHORTAGE; + xbb->flags &= ~XBBF_RESOURCE_SHORTAGE; + } + + if (reqlist->kva != NULL) + xbb_free_kva(xbb, reqlist->kva, reqlist->nr_segments); + + xbb_release_reqs(xbb, &reqlist->contig_req_list, reqlist->num_children); + + STAILQ_INSERT_TAIL(&xbb->reqlist_free_stailq, reqlist, links); + + if ((xbb->flags & XBBF_SHUTDOWN) != 0) { + /* + * Shutdown is in progress. See if we can + * progress further now that one more request + * has completed and been returned to the + * free pool. + */ + xbb_shutdown(xbb); + } + + mtx_unlock(&xbb->lock); + + if (wakeup != 0) + taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); +} + +/** + * Request resources and do basic request setup. + * + * \param xbb Per-instance xbb configuration structure. + * \param reqlist Pointer to reqlist pointer. + * \param ring_req Pointer to a block ring request. + * \param ring_index The ring index of this request. + * + * \return 0 for success, non-zero for failure. + */ +static int +xbb_get_resources(struct xbb_softc *xbb, struct xbb_xen_reqlist **reqlist, + blkif_request_t *ring_req, RING_IDX ring_idx) +{ + struct xbb_xen_reqlist *nreqlist; + struct xbb_xen_req *nreq; + + nreqlist = NULL; + nreq = NULL; + + mtx_lock(&xbb->lock); + + /* + * We don't allow new resources to be allocated if we're in the + * process of shutting down. + */ + if ((xbb->flags & XBBF_SHUTDOWN) != 0) { + mtx_unlock(&xbb->lock); + return (1); + } + + /* + * Allocate a reqlist if the caller doesn't have one already. + */ + if (*reqlist == NULL) { + nreqlist = xbb_get_reqlist(xbb); + if (nreqlist == NULL) + goto bailout_error; + } + + /* We always allocate a request. */ + nreq = xbb_get_req(xbb); + if (nreq == NULL) + goto bailout_error; + + mtx_unlock(&xbb->lock); + + if (*reqlist == NULL) { + *reqlist = nreqlist; + nreqlist->operation = ring_req->operation; + nreqlist->starting_sector_number = ring_req->sector_number; + STAILQ_INSERT_TAIL(&xbb->reqlist_pending_stailq, nreqlist, + links); + } + + nreq->reqlist = *reqlist; + nreq->req_ring_idx = ring_idx; + + if (xbb->abi != BLKIF_PROTOCOL_NATIVE) { + bcopy(ring_req, &nreq->ring_req_storage, sizeof(*ring_req)); + nreq->ring_req = &nreq->ring_req_storage; + } else { + nreq->ring_req = ring_req; + } + + binuptime(&nreq->ds_t0); + devstat_start_transaction(xbb->xbb_stats_in, &nreq->ds_t0); + STAILQ_INSERT_TAIL(&(*reqlist)->contig_req_list, nreq, links); + (*reqlist)->num_children++; + (*reqlist)->nr_segments += ring_req->nr_segments; + + return (0); + +bailout_error: + + /* + * We're out of resources, so set the shortage flag. The next time + * a request is released, we'll try waking up the work thread to + * see if we can allocate more resources. + */ + xbb->flags |= XBBF_RESOURCE_SHORTAGE; + xbb->request_shortages++; + + if (nreq != NULL) + xbb_release_req(xbb, nreq); + + mtx_unlock(&xbb->lock); + + if (nreqlist != NULL) + xbb_release_reqlist(xbb, nreqlist, /*wakeup*/ 0); + + return (1); +} + +/** * Create and transmit a response to a blkif request. * * \param xbb Per-instance xbb configuration structure. @@ -862,6 +1335,8 @@ xbb_send_response(struct xbb_softc *xbb, struct xbb_xen_req *req, int status) more_to_do = 1; } + xbb->reqs_completed++; + mtx_unlock(&xbb->lock); if (more_to_do) @@ -872,6 +1347,70 @@ xbb_send_response(struct xbb_softc *xbb, struct xbb_xen_req *req, int status) } /** + * Complete a request list. + * + * \param xbb Per-instance xbb configuration structure. + * \param reqlist Allocated internal request list structure. + */ +static void +xbb_complete_reqlist(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist) +{ + struct xbb_xen_req *nreq; + off_t sectors_sent; + + sectors_sent = 0; + + if (reqlist->flags & XBB_REQLIST_MAPPED) + xbb_unmap_reqlist(reqlist); + + /* + * All I/O is done, send the response. A lock should not be + * necessary here because the request list is complete, and + * therefore this is the only context accessing this request + * right now. The functions we call do their own locking if + * necessary. + */ + STAILQ_FOREACH(nreq, &reqlist->contig_req_list, links) { + off_t cur_sectors_sent; + + xbb_send_response(xbb, nreq, reqlist->status); + + /* We don't report bytes sent if there is an error. */ + if (reqlist->status == BLKIF_RSP_OKAY) + cur_sectors_sent = nreq->nr_512b_sectors; + else + cur_sectors_sent = 0; + + sectors_sent += cur_sectors_sent; + + devstat_end_transaction(xbb->xbb_stats_in, + /*bytes*/cur_sectors_sent << 9, + reqlist->ds_tag_type, + reqlist->ds_trans_type, + /*now*/NULL, + /*then*/&nreq->ds_t0); + } + + /* + * Take out any sectors not sent. If we wind up negative (which + * might happen if an error is reported as well as a residual), just + * report 0 sectors sent. + */ + sectors_sent -= reqlist->residual_512b_sectors; + if (sectors_sent < 0) + sectors_sent = 0; + + devstat_end_transaction(xbb->xbb_stats, + /*bytes*/ sectors_sent << 9, + reqlist->ds_tag_type, + reqlist->ds_trans_type, + /*now*/NULL, + /*then*/&reqlist->ds_t0); + + xbb_release_reqlist(xbb, reqlist, /*wakeup*/ 1); +} + +/** * Completion handler for buffer I/O requests issued by the device * backend driver. * @@ -881,18 +1420,34 @@ xbb_send_response(struct xbb_softc *xbb, struct xbb_xen_req *req, int status) static void xbb_bio_done(struct bio *bio) { - struct xbb_softc *xbb; - struct xbb_xen_req *req; + struct xbb_softc *xbb; + struct xbb_xen_reqlist *reqlist; + + reqlist = bio->bio_caller1; + xbb = reqlist->xbb; - req = bio->bio_caller1; - xbb = req->xbb; + reqlist->residual_512b_sectors += bio->bio_resid >> 9; - /* Only include transferred I/O in stats. */ - req->nr_512b_sectors -= bio->bio_resid >> 9; + /* + * This is a bit imprecise. With aggregated I/O a single + * request list can contain multiple front-end requests and + * a multiple bios may point to a single request. By carefully + * walking the request list, we could map residuals and errors + * back to the original front-end request, but the interface + * isn't sufficiently rich for us to properly report the error. + * So, we just treat the entire request list as having failed if an + * error occurs on any part. And, if an error occurs, we treat + * the amount of data transferred as 0. + * + * For residuals, we report it on the overall aggregated device, + * but not on the individual requests, since we don't currently + * do the work to determine which front-end request to which the + * residual applies. + */ if (bio->bio_error) { DPRINTF("BIO returned error %d for operation on device %s\n", bio->bio_error, xbb->dev_name); - req->status = BLKIF_RSP_ERROR; + reqlist->status = BLKIF_RSP_ERROR; if (bio->bio_error == ENXIO && xenbus_get_state(xbb->dev) == XenbusStateConnected) { @@ -911,23 +1466,18 @@ xbb_bio_done(struct bio *bio) vm_offset_t kva_offset; kva_offset = (vm_offset_t)bio->bio_data - - (vm_offset_t)req->bounce; - memcpy((uint8_t *)req->kva + kva_offset, + - (vm_offset_t)reqlist->bounce; + memcpy((uint8_t *)reqlist->kva + kva_offset, bio->bio_data, bio->bio_bcount); } #endif /* XBB_USE_BOUNCE_BUFFERS */ - if (atomic_fetchadd_int(&req->pendcnt, -1) == 1) { - xbb_unmap_req(req); - xbb_send_response(xbb, req, req->status); - devstat_end_transaction(xbb->xbb_stats, - /*bytes*/req->nr_512b_sectors << 9, - req->ds_tag_type, - req->ds_trans_type, - /*now*/NULL, - /*then*/&req->ds_t0); - xbb_release_req(xbb, req); - } + /* + * Decrement the pending count for the request list. When we're + * done with the requests, send status back for all of them. + */ + if (atomic_fetchadd_int(&reqlist->pendcnt, -1) == 1) + xbb_complete_reqlist(xbb, reqlist); g_destroy_bio(bio); } @@ -936,228 +1486,315 @@ xbb_bio_done(struct bio *bio) * Parse a blkif request into an internal request structure and send * it to the backend for processing. * - * \param xbb Per-instance xbb configuration structure. - * \param ring_req Front-end's I/O request as pulled from the shared - * communication ring. - * \param req Allocated internal request structure. - * \param req_ring_idx The location of ring_req within the shared - * communication ring. + * \param xbb Per-instance xbb configuration structure. + * \param reqlist Allocated internal request list structure. * + * \return On success, 0. For resource shortages, non-zero. + * * This routine performs the backend common aspects of request parsing * including compiling an internal request structure, parsing the S/G * list and any secondary ring requests in which they may reside, and * the mapping of front-end I/O pages into our domain. */ -static void -xbb_dispatch_io(struct xbb_softc *xbb, blkif_request_t *ring_req, - struct xbb_xen_req *req, RING_IDX req_ring_idx) +static int +xbb_dispatch_io(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist) { - struct gnttab_map_grant_ref maps[XBB_MAX_SEGMENTS_PER_REQUEST]; struct xbb_sg *xbb_sg; struct gnttab_map_grant_ref *map; struct blkif_request_segment *sg; struct blkif_request_segment *last_block_sg; + struct xbb_xen_req *nreq; u_int nseg; u_int seg_idx; u_int block_segs; int nr_sects; + int total_sects; int operation; uint8_t bio_flags; int error; - nseg = ring_req->nr_segments; - nr_sects = 0; - req->xbb = xbb; - req->id = ring_req->id; - req->operation = ring_req->operation; - req->status = BLKIF_RSP_OKAY; - req->ds_tag_type = DEVSTAT_TAG_SIMPLE; - req->nr_pages = nseg; - req->nr_512b_sectors = 0; + reqlist->ds_tag_type = DEVSTAT_TAG_SIMPLE; bio_flags = 0; - sg = NULL; + total_sects = 0; + nr_sects = 0; + + /* + * First determine whether we have enough free KVA to satisfy this + * request list. If not, tell xbb_run_queue() so it can go to + * sleep until we have more KVA. + */ + reqlist->kva = NULL; + if (reqlist->nr_segments != 0) { + reqlist->kva = xbb_get_kva(xbb, reqlist->nr_segments); + if (reqlist->kva == NULL) { + /* + * If we're out of KVA, return ENOMEM. + */ + return (ENOMEM); + } + } - binuptime(&req->ds_t0); - devstat_start_transaction(xbb->xbb_stats, &req->ds_t0); + binuptime(&reqlist->ds_t0); + devstat_start_transaction(xbb->xbb_stats, &reqlist->ds_t0); - switch (req->operation) { + switch (reqlist->operation) { case BLKIF_OP_WRITE_BARRIER: bio_flags |= BIO_ORDERED; - req->ds_tag_type = DEVSTAT_TAG_ORDERED; + reqlist->ds_tag_type = DEVSTAT_TAG_ORDERED; /* FALLTHROUGH */ case BLKIF_OP_WRITE: operation = BIO_WRITE; - req->ds_trans_type = DEVSTAT_WRITE; + reqlist->ds_trans_type = DEVSTAT_WRITE; if ((xbb->flags & XBBF_READ_ONLY) != 0) { DPRINTF("Attempt to write to read only device %s\n", xbb->dev_name); - goto fail_send_response; + reqlist->status = BLKIF_RSP_ERROR; + goto send_response; } break; case BLKIF_OP_READ: operation = BIO_READ; - req->ds_trans_type = DEVSTAT_READ; + reqlist->ds_trans_type = DEVSTAT_READ; break; case BLKIF_OP_FLUSH_DISKCACHE: + /* + * If this is true, the user has requested that we disable + * flush support. So we just complete the requests + * successfully. + */ + if (xbb->disable_flush != 0) { + goto send_response; + } + + /* + * The user has requested that we only send a real flush + * for every N flush requests. So keep count, and either + * complete the request immediately or queue it for the + * backend. + */ + if (xbb->flush_interval != 0) { + if (++(xbb->flush_count) < xbb->flush_interval) { + goto send_response; + } else + xbb->flush_count = 0; + } + operation = BIO_FLUSH; - req->ds_tag_type = DEVSTAT_TAG_ORDERED; - req->ds_trans_type = DEVSTAT_NO_DATA; + reqlist->ds_tag_type = DEVSTAT_TAG_ORDERED; + reqlist->ds_trans_type = DEVSTAT_NO_DATA; goto do_dispatch; /*NOTREACHED*/ default: DPRINTF("error: unknown block io operation [%d]\n", - req->operation); - goto fail_send_response; + reqlist->operation); + reqlist->status = BLKIF_RSP_ERROR; + goto send_response; } - /* Check that number of segments is sane. */ - if (unlikely(nseg == 0) - || unlikely(nseg > xbb->max_request_segments)) { - DPRINTF("Bad number of segments in request (%d)\n", nseg); - goto fail_send_response; - } - - map = maps; + reqlist->xbb = xbb; xbb_sg = xbb->xbb_sgs; - block_segs = MIN(req->nr_pages, BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK); - sg = ring_req->seg; - last_block_sg = sg + block_segs; + map = xbb->maps; seg_idx = 0; - while (1) { - while (sg < last_block_sg) { - - xbb_sg->first_sect = sg->first_sect; - xbb_sg->last_sect = sg->last_sect; - xbb_sg->nsect = - (int8_t)(sg->last_sect - sg->first_sect + 1); - - if ((sg->last_sect >= (PAGE_SIZE >> 9)) - || (xbb_sg->nsect <= 0)) - goto fail_send_response; - - nr_sects += xbb_sg->nsect; - map->host_addr = xbb_req_gntaddr(req, seg_idx, - /*sector*/0); - map->flags = GNTMAP_host_map; - map->ref = sg->gref; - map->dom = xbb->otherend_id; - if (operation == BIO_WRITE) - map->flags |= GNTMAP_readonly; - sg++; - map++; - xbb_sg++; - seg_idx++; + STAILQ_FOREACH(nreq, &reqlist->contig_req_list, links) { + blkif_request_t *ring_req; + RING_IDX req_ring_idx; + u_int req_seg_idx; + + ring_req = nreq->ring_req; + req_ring_idx = nreq->req_ring_idx; + nr_sects = 0; + nseg = ring_req->nr_segments; + nreq->id = ring_req->id; + nreq->nr_pages = nseg; + nreq->nr_512b_sectors = 0; + req_seg_idx = 0; + sg = NULL; + + /* Check that number of segments is sane. */ + if (unlikely(nseg == 0) + || unlikely(nseg > xbb->max_request_segments)) { + DPRINTF("Bad number of segments in request (%d)\n", + nseg); + reqlist->status = BLKIF_RSP_ERROR; + goto send_response; } - block_segs = MIN(nseg - seg_idx, - BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK); - if (block_segs == 0) - break; - - /* - * Fetch the next request block full of SG elements. - * For now, only the spacing between entries is different - * in the different ABIs, not the sg entry layout. - */ - req_ring_idx++; - switch (xbb->abi) { - case BLKIF_PROTOCOL_NATIVE: - sg = BLKRING_GET_SG_REQUEST(&xbb->rings.native, - req_ring_idx); - break; - case BLKIF_PROTOCOL_X86_32: - { - sg = BLKRING_GET_SG_REQUEST(&xbb->rings.x86_32, - req_ring_idx); - break; - } - case BLKIF_PROTOCOL_X86_64: - { - sg = BLKRING_GET_SG_REQUEST(&xbb->rings.x86_64, - req_ring_idx); - break; - } - default: - panic("Unexpected blkif protocol ABI."); - /* NOTREACHED */ - } + block_segs = MIN(nreq->nr_pages, + BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK); + sg = ring_req->seg; last_block_sg = sg + block_segs; - } + while (1) { + + while (sg < last_block_sg) { + KASSERT(seg_idx < + XBB_MAX_SEGMENTS_PER_REQLIST, + ("seg_idx %d is too large, max " + "segs %d\n", seg_idx, + XBB_MAX_SEGMENTS_PER_REQLIST)); + + xbb_sg->first_sect = sg->first_sect; + xbb_sg->last_sect = sg->last_sect; + xbb_sg->nsect = + (int8_t)(sg->last_sect - + sg->first_sect + 1); + + if ((sg->last_sect >= (PAGE_SIZE >> 9)) + || (xbb_sg->nsect <= 0)) { + reqlist->status = BLKIF_RSP_ERROR; + goto send_response; + } + + nr_sects += xbb_sg->nsect; + map->host_addr = xbb_get_gntaddr(reqlist, + seg_idx, /*sector*/0); + KASSERT(map->host_addr + PAGE_SIZE <= + xbb->ring_config.gnt_addr, + ("Host address %#jx len %d overlaps " + "ring address %#jx\n", + (uintmax_t)map->host_addr, PAGE_SIZE, + (uintmax_t)xbb->ring_config.gnt_addr)); + + map->flags = GNTMAP_host_map; + map->ref = sg->gref; + map->dom = xbb->otherend_id; + if (operation == BIO_WRITE) + map->flags |= GNTMAP_readonly; + sg++; + map++; + xbb_sg++; + seg_idx++; + req_seg_idx++; + } - /* Convert to the disk's sector size */ - req->nr_512b_sectors = nr_sects; - nr_sects = (nr_sects << 9) >> xbb->sector_size_shift; + block_segs = MIN(nseg - req_seg_idx, + BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK); + if (block_segs == 0) + break; - if ((req->nr_512b_sectors & ((xbb->sector_size >> 9) - 1)) != 0) { - device_printf(xbb->dev, "%s: I/O size (%d) is not a multiple " - "of the backing store sector size (%d)\n", - __func__, req->nr_512b_sectors << 9, - xbb->sector_size); - goto fail_send_response; + /* + * Fetch the next request block full of SG elements. + * For now, only the spacing between entries is + * different in the different ABIs, not the sg entry + * layout. + */ + req_ring_idx++; + switch (xbb->abi) { + case BLKIF_PROTOCOL_NATIVE: + sg = BLKRING_GET_SG_REQUEST(&xbb->rings.native, + req_ring_idx); + break; + case BLKIF_PROTOCOL_X86_32: + { + sg = BLKRING_GET_SG_REQUEST(&xbb->rings.x86_32, + req_ring_idx); + break; + } + case BLKIF_PROTOCOL_X86_64: + { + sg = BLKRING_GET_SG_REQUEST(&xbb->rings.x86_64, + req_ring_idx); + break; + } + default: + panic("Unexpected blkif protocol ABI."); + /* NOTREACHED */ + } + last_block_sg = sg + block_segs; + } + + /* Convert to the disk's sector size */ + nreq->nr_512b_sectors = nr_sects; + nr_sects = (nr_sects << 9) >> xbb->sector_size_shift; + total_sects += nr_sects; + + if ((nreq->nr_512b_sectors & + ((xbb->sector_size >> 9) - 1)) != 0) { + device_printf(xbb->dev, "%s: I/O size (%d) is not " + "a multiple of the backing store sector " + "size (%d)\n", __func__, + nreq->nr_512b_sectors << 9, + xbb->sector_size); + reqlist->status = BLKIF_RSP_ERROR; + goto send_response; + } } error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, - maps, req->nr_pages); + xbb->maps, reqlist->nr_segments); if (error != 0) panic("Grant table operation failed (%d)", error); - for (seg_idx = 0, map = maps; seg_idx < nseg; seg_idx++, map++) { + reqlist->flags |= XBB_REQLIST_MAPPED; + + for (seg_idx = 0, map = xbb->maps; seg_idx < reqlist->nr_segments; + seg_idx++, map++){ if (unlikely(map->status != 0)) { - DPRINTF("invalid buffer -- could not remap it (%d)\n", - map->status); - DPRINTF("Mapping(%d): Host Addr 0x%lx, flags 0x%x " - "ref 0x%x, dom %d\n", seg_idx, + DPRINTF("invalid buffer -- could not remap " + "it (%d)\n", map->status); + DPRINTF("Mapping(%d): Host Addr 0x%lx, flags " + "0x%x ref 0x%x, dom %d\n", seg_idx, map->host_addr, map->flags, map->ref, map->dom); - goto fail_unmap_req; + reqlist->status = BLKIF_RSP_ERROR; + goto send_response; } - req->gnt_handles[seg_idx] = map->handle; + reqlist->gnt_handles[seg_idx] = map->handle; } - if (ring_req->sector_number + nr_sects > xbb->media_num_sectors) { + if (reqlist->starting_sector_number + total_sects > + xbb->media_num_sectors) { DPRINTF("%s of [%" PRIu64 ",%" PRIu64 "] " "extends past end of device %s\n", operation == BIO_READ ? "read" : "write", - ring_req->sector_number, - ring_req->sector_number + nr_sects, xbb->dev_name); - goto fail_unmap_req; + reqlist->starting_sector_number, + reqlist->starting_sector_number + total_sects, + xbb->dev_name); + reqlist->status = BLKIF_RSP_ERROR; + goto send_response; } do_dispatch: error = xbb->dispatch_io(xbb, - ring_req, - req, - nseg, + reqlist, operation, bio_flags); if (error != 0) { - if (operation == BIO_FLUSH) - goto fail_send_response; - else - goto fail_unmap_req; + reqlist->status = BLKIF_RSP_ERROR; + goto send_response; } - return; + return (0); +send_response: -fail_unmap_req: - xbb_unmap_req(req); - /* FALLTHROUGH */ + xbb_complete_reqlist(xbb, reqlist); -fail_send_response: - xbb_send_response(xbb, req, BLKIF_RSP_ERROR); - xbb_release_req(xbb, req); - devstat_end_transaction(xbb->xbb_stats, - /*bytes*/0, - req->ds_tag_type, - req->ds_trans_type, - /*now*/NULL, - /*then*/&req->ds_t0); + return (0); +} + +static __inline int +xbb_count_sects(blkif_request_t *ring_req) +{ + int i; + int cur_size = 0; + + for (i = 0; i < ring_req->nr_segments; i++) { + int nsect; + + nsect = (int8_t)(ring_req->seg[i].last_sect - + ring_req->seg[i].first_sect + 1); + if (nsect <= 0) + break; + + cur_size += nsect; + } + + return (cur_size); } /** @@ -1172,95 +1809,210 @@ fail_send_response: static void xbb_run_queue(void *context, int pending) { - struct xbb_softc *xbb; - blkif_back_rings_t *rings; - RING_IDX rp; + struct xbb_softc *xbb; + blkif_back_rings_t *rings; + RING_IDX rp; + uint64_t cur_sector; + int cur_operation; + struct xbb_xen_reqlist *reqlist; - xbb = (struct xbb_softc *)context; - rings = &xbb->rings; + xbb = (struct xbb_softc *)context; + rings = &xbb->rings; /* - * Cache req_prod to avoid accessing a cache line shared - * with the frontend. + * Work gather and dispatch loop. Note that we have a bias here + * towards gathering I/O sent by blockfront. We first gather up + * everything in the ring, as long as we have resources. Then we + * dispatch one request, and then attempt to gather up any + * additional requests that have come in while we were dispatching + * the request. + * + * This allows us to get a clearer picture (via devstat) of how + * many requests blockfront is queueing to us at any given time. */ - rp = rings->common.sring->req_prod; + for (;;) { + int retval; + + /* + * Initialize reqlist to the last element in the pending + * queue, if there is one. This allows us to add more + * requests to that request list, if we have room. + */ + reqlist = STAILQ_LAST(&xbb->reqlist_pending_stailq, + xbb_xen_reqlist, links); + if (reqlist != NULL) { + cur_sector = reqlist->next_contig_sector; + cur_operation = reqlist->operation; + } else { + cur_operation = 0; + cur_sector = 0; + } - /* Ensure we see queued requests up to 'rp'. */ - rmb(); + /* + * Cache req_prod to avoid accessing a cache line shared + * with the frontend. + */ + rp = rings->common.sring->req_prod; + + /* Ensure we see queued requests up to 'rp'. */ + rmb(); + + /** + * Run so long as there is work to consume and the generation + * of a response will not overflow the ring. + * + * @note There's a 1 to 1 relationship between requests and + * responses, so an overflow should never occur. This + * test is to protect our domain from digesting bogus + * data. Shouldn't we log this? + */ + while (rings->common.req_cons != rp + && RING_REQUEST_CONS_OVERFLOW(&rings->common, + rings->common.req_cons) == 0){ + blkif_request_t ring_req_storage; + blkif_request_t *ring_req; + int cur_size; + + switch (xbb->abi) { + case BLKIF_PROTOCOL_NATIVE: + ring_req = RING_GET_REQUEST(&xbb->rings.native, + rings->common.req_cons); + break; + case BLKIF_PROTOCOL_X86_32: + { + struct blkif_x86_32_request *ring_req32; + + ring_req32 = RING_GET_REQUEST( + &xbb->rings.x86_32, rings->common.req_cons); + blkif_get_x86_32_req(&ring_req_storage, + ring_req32); + ring_req = &ring_req_storage; + break; + } + case BLKIF_PROTOCOL_X86_64: + { + struct blkif_x86_64_request *ring_req64; + + ring_req64 =RING_GET_REQUEST(&xbb->rings.x86_64, + rings->common.req_cons); + blkif_get_x86_64_req(&ring_req_storage, + ring_req64); + ring_req = &ring_req_storage; + break; + } + default: + panic("Unexpected blkif protocol ABI."); + /* NOTREACHED */ + } - /** - * Run so long as there is work to consume and the generation - * of a response will not overflow the ring. - * - * @note There's a 1 to 1 relationship between requests and responses, - * so an overflow should never occur. This test is to protect - * our domain from digesting bogus data. Shouldn't we log this? - */ - while (rings->common.req_cons != rp - && RING_REQUEST_CONS_OVERFLOW(&rings->common, - rings->common.req_cons) == 0) { - blkif_request_t ring_req_storage; - blkif_request_t *ring_req; - struct xbb_xen_req *req; - RING_IDX req_ring_idx; - - req = xbb_get_req(xbb); - if (req == NULL) { /* - * Resource shortage has been recorded. - * We'll be scheduled to run once a request - * object frees up due to a completion. + * Check for situations that would require closing + * off this I/O for further coalescing: + * - Coalescing is turned off. + * - Current I/O is out of sequence with the previous + * I/O. + * - Coalesced I/O would be too large. */ - break; - } + if ((reqlist != NULL) + && ((xbb->no_coalesce_reqs != 0) + || ((xbb->no_coalesce_reqs == 0) + && ((ring_req->sector_number != cur_sector) + || (ring_req->operation != cur_operation) + || ((ring_req->nr_segments + reqlist->nr_segments) > + xbb->max_reqlist_segments))))) { + reqlist = NULL; + } - switch (xbb->abi) { - case BLKIF_PROTOCOL_NATIVE: - ring_req = RING_GET_REQUEST(&xbb->rings.native, - rings->common.req_cons); - break; - case BLKIF_PROTOCOL_X86_32: - { - struct blkif_x86_32_request *ring_req32; - - ring_req32 = RING_GET_REQUEST(&xbb->rings.x86_32, - rings->common.req_cons); - blkif_get_x86_32_req(&ring_req_storage, ring_req32); - ring_req = &ring_req_storage; - break; + /* + * Grab and check for all resources in one shot. + * If we can't get all of the resources we need, + * the shortage is noted and the thread will get + * woken up when more resources are available. + */ + retval = xbb_get_resources(xbb, &reqlist, ring_req, + xbb->rings.common.req_cons); + + if (retval != 0) { + /* + * Resource shortage has been recorded. + * We'll be scheduled to run once a request + * object frees up due to a completion. + */ + break; + } + + /* + * Signify that we can overwrite this request with + * a response by incrementing our consumer index. + * The response won't be generated until after + * we've already consumed all necessary data out + * of the version of the request in the ring buffer + * (for native mode). We must update the consumer + * index before issueing back-end I/O so there is + * no possibility that it will complete and a + * response be generated before we make room in + * the queue for that response. + */ + xbb->rings.common.req_cons += + BLKIF_SEGS_TO_BLOCKS(ring_req->nr_segments); + xbb->reqs_received++; + + cur_size = xbb_count_sects(ring_req); + cur_sector = ring_req->sector_number + cur_size; + reqlist->next_contig_sector = cur_sector; + cur_operation = ring_req->operation; } - case BLKIF_PROTOCOL_X86_64: - { - struct blkif_x86_64_request *ring_req64; - - ring_req64 = RING_GET_REQUEST(&xbb->rings.x86_64, - rings->common.req_cons); - blkif_get_x86_64_req(&ring_req_storage, ring_req64); - ring_req = &ring_req_storage; + + /* Check for I/O to dispatch */ + reqlist = STAILQ_FIRST(&xbb->reqlist_pending_stailq); + if (reqlist == NULL) { + /* + * We're out of work to do, put the task queue to + * sleep. + */ break; } - default: - panic("Unexpected blkif protocol ABI."); - /* NOTREACHED */ - } /* - * Signify that we can overwrite this request with a - * response by incrementing our consumer index. The - * response won't be generated until after we've already - * consumed all necessary data out of the version of the - * request in the ring buffer (for native mode). We - * must update the consumer index before issueing back-end - * I/O so there is no possibility that it will complete - * and a response be generated before we make room in - * the queue for that response. + * Grab the first request off the queue and attempt + * to dispatch it. */ - req_ring_idx = xbb->rings.common.req_cons; - xbb->rings.common.req_cons += - BLKIF_SEGS_TO_BLOCKS(ring_req->nr_segments); + STAILQ_REMOVE_HEAD(&xbb->reqlist_pending_stailq, links); - xbb_dispatch_io(xbb, ring_req, req, req_ring_idx); + retval = xbb_dispatch_io(xbb, reqlist); + if (retval != 0) { + /* + * xbb_dispatch_io() returns non-zero only when + * there is a resource shortage. If that's the + * case, re-queue this request on the head of the + * queue, and go to sleep until we have more + * resources. + */ + STAILQ_INSERT_HEAD(&xbb->reqlist_pending_stailq, + reqlist, links); + break; + } else { + /* + * If we still have anything on the queue after + * removing the head entry, that is because we + * met one of the criteria to create a new + * request list (outlined above), and we'll call + * that a forced dispatch for statistical purposes. + * + * Otherwise, if there is only one element on the + * queue, we coalesced everything available on + * the ring and we'll call that a normal dispatch. + */ + reqlist = STAILQ_FIRST(&xbb->reqlist_pending_stailq); + + if (reqlist != NULL) + xbb->forced_dispatch++; + else + xbb->normal_dispatch++; + + xbb->total_dispatch++; + } } } @@ -1285,11 +2037,7 @@ xbb_intr(void *arg) * Backend handler for character device access. * * \param xbb Per-instance xbb configuration structure. - * \param ring_req Front-end's I/O request as pulled from the shared - * communication ring. - * \param req Allocated internal request structure. - * \param nseg The number of valid segments for this request in - * xbb->xbb_sgs. + * \param reqlist Allocated internal request list structure. * \param operation BIO_* I/O operation code. * \param bio_flags Additional bio_flag data to pass to any generated * bios (e.g. BIO_ORDERED).. @@ -1297,28 +2045,30 @@ xbb_intr(void *arg) * \return 0 for success, errno codes for failure. */ static int -xbb_dispatch_dev(struct xbb_softc *xbb, blkif_request_t *ring_req, - struct xbb_xen_req *req, int nseg, int operation, - int bio_flags) +xbb_dispatch_dev(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist, + int operation, int bio_flags) { struct xbb_dev_data *dev_data; - struct bio *bios[XBB_MAX_SEGMENTS_PER_REQUEST]; + struct bio *bios[XBB_MAX_SEGMENTS_PER_REQLIST]; + struct xbb_xen_req *nreq; off_t bio_offset; struct bio *bio; struct xbb_sg *xbb_sg; u_int nbio; u_int bio_idx; + u_int nseg; u_int seg_idx; int error; dev_data = &xbb->backend.dev; - bio_offset = (off_t)ring_req->sector_number + bio_offset = (off_t)reqlist->starting_sector_number << xbb->sector_size_shift; error = 0; nbio = 0; bio_idx = 0; if (operation == BIO_FLUSH) { + nreq = STAILQ_FIRST(&reqlist->contig_req_list); bio = g_new_bio(); if (unlikely(bio == NULL)) { DPRINTF("Unable to allocate bio for BIO_FLUSH\n"); @@ -1332,19 +2082,21 @@ xbb_dispatch_dev(struct xbb_softc *xbb, blkif_request_t *ring_req, bio->bio_offset = 0; bio->bio_data = 0; bio->bio_done = xbb_bio_done; - bio->bio_caller1 = req; + bio->bio_caller1 = nreq; bio->bio_pblkno = 0; - req->pendcnt = 1; + nreq->pendcnt = 1; - (*dev_data->csw->d_strategy)(bios[bio_idx]); + (*dev_data->csw->d_strategy)(bio); return (0); } - for (seg_idx = 0, bio = NULL, xbb_sg = xbb->xbb_sgs; - seg_idx < nseg; - seg_idx++, xbb_sg++) { + xbb_sg = xbb->xbb_sgs; + bio = NULL; + nseg = reqlist->nr_segments; + + for (seg_idx = 0; seg_idx < nseg; seg_idx++, xbb_sg++) { /* * KVA will not be contiguous, so any additional @@ -1353,10 +2105,10 @@ xbb_dispatch_dev(struct xbb_softc *xbb, blkif_request_t *ring_req, if ((bio != NULL) && (xbb_sg->first_sect != 0)) { if ((bio->bio_length & (xbb->sector_size - 1)) != 0) { - printf("%s: Discontiguous I/O request from " - "domain %d ends on non-sector " - "boundary\n", __func__, - xbb->otherend_id); + printf("%s: Discontiguous I/O request " + "from domain %d ends on " + "non-sector boundary\n", + __func__, xbb->otherend_id); error = EINVAL; goto fail_free_bios; } @@ -1365,12 +2117,12 @@ xbb_dispatch_dev(struct xbb_softc *xbb, blkif_request_t *ring_req, if (bio == NULL) { /* - * Make sure that the start of this bio is aligned - * to a device sector. + * Make sure that the start of this bio is + * aligned to a device sector. */ - if ((bio_offset & (xbb->sector_size - 1)) != 0) { - printf("%s: Misaligned I/O request from " - "domain %d\n", __func__, + if ((bio_offset & (xbb->sector_size - 1)) != 0){ + printf("%s: Misaligned I/O request " + "from domain %d\n", __func__, xbb->otherend_id); error = EINVAL; goto fail_free_bios; @@ -1385,12 +2137,11 @@ xbb_dispatch_dev(struct xbb_softc *xbb, blkif_request_t *ring_req, bio->bio_flags |= bio_flags; bio->bio_dev = dev_data->cdev; bio->bio_offset = bio_offset; - bio->bio_data = xbb_req_ioaddr(req, seg_idx, - xbb_sg->first_sect); + bio->bio_data = xbb_reqlist_ioaddr(reqlist, seg_idx, + xbb_sg->first_sect); bio->bio_done = xbb_bio_done; - bio->bio_caller1 = req; - bio->bio_pblkno = bio_offset - >> xbb->sector_size_shift; + bio->bio_caller1 = reqlist; + bio->bio_pblkno = bio_offset >> xbb->sector_size_shift; } bio->bio_length += xbb_sg->nsect << 9; @@ -1400,10 +2151,10 @@ xbb_dispatch_dev(struct xbb_softc *xbb, blkif_request_t *ring_req, if (xbb_sg->last_sect != (PAGE_SIZE - 512) >> 9) { if ((bio->bio_length & (xbb->sector_size - 1)) != 0) { - printf("%s: Discontiguous I/O request from " - "domain %d ends on non-sector " - "boundary\n", __func__, - xbb->otherend_id); + printf("%s: Discontiguous I/O request " + "from domain %d ends on " + "non-sector boundary\n", + __func__, xbb->otherend_id); error = EINVAL; goto fail_free_bios; } @@ -1415,7 +2166,7 @@ xbb_dispatch_dev(struct xbb_softc *xbb, blkif_request_t *ring_req, } } - req->pendcnt = nbio; + reqlist->pendcnt = nbio; for (bio_idx = 0; bio_idx < nbio; bio_idx++) { @@ -1423,10 +2174,10 @@ xbb_dispatch_dev(struct xbb_softc *xbb, blkif_request_t *ring_req, vm_offset_t kva_offset; kva_offset = (vm_offset_t)bios[bio_idx]->bio_data - - (vm_offset_t)req->bounce; + - (vm_offset_t)reqlist->bounce; if (operation == BIO_WRITE) { memcpy(bios[bio_idx]->bio_data, - (uint8_t *)req->kva + kva_offset, + (uint8_t *)reqlist->kva + kva_offset, bios[bio_idx]->bio_bcount); } #endif @@ -1438,7 +2189,7 @@ xbb_dispatch_dev(struct xbb_softc *xbb, blkif_request_t *ring_req, fail_free_bios: for (bio_idx = 0; bio_idx < (nbio-1); bio_idx++) g_destroy_bio(bios[bio_idx]); - + return (error); } @@ -1446,24 +2197,21 @@ fail_free_bios: * Backend handler for file access. * * \param xbb Per-instance xbb configuration structure. - * \param ring_req Front-end's I/O request as pulled from the shared - * communication ring. - * \param req Allocated internal request structure. - * \param nseg The number of valid segments for this request in - * xbb->xbb_sgs. + * \param reqlist Allocated internal request list. * \param operation BIO_* I/O operation code. - * \param bio_flags Additional bio_flag data to pass to any generated bios + * \param flags Additional bio_flag data to pass to any generated bios * (e.g. BIO_ORDERED).. * * \return 0 for success, errno codes for failure. */ static int -xbb_dispatch_file(struct xbb_softc *xbb, blkif_request_t *ring_req, - struct xbb_xen_req *req, int nseg, int operation, - int flags) +xbb_dispatch_file(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist, + int operation, int flags) { struct xbb_file_data *file_data; u_int seg_idx; + u_int nseg; + off_t sectors_sent; struct uio xuio; struct xbb_sg *xbb_sg; struct iovec *xiovec; @@ -1475,11 +2223,10 @@ xbb_dispatch_file(struct xbb_softc *xbb, blkif_request_t *ring_req, int error; file_data = &xbb->backend.file; + sectors_sent = 0; error = 0; bzero(&xuio, sizeof(xuio)); - req->pendcnt = 0; - switch (operation) { case BIO_READ: xuio.uio_rw = UIO_READ; @@ -1509,37 +2256,39 @@ xbb_dispatch_file(struct xbb_softc *xbb, blkif_request_t *ring_req, panic("invalid operation %d", operation); /* NOTREACHED */ } - xuio.uio_offset = (vm_offset_t)ring_req->sector_number + xuio.uio_offset = (vm_offset_t)reqlist->starting_sector_number << xbb->sector_size_shift; - xuio.uio_segflg = UIO_SYSSPACE; xuio.uio_iov = file_data->xiovecs; xuio.uio_iovcnt = 0; + xbb_sg = xbb->xbb_sgs; + nseg = reqlist->nr_segments; - for (seg_idx = 0, xiovec = NULL, xbb_sg = xbb->xbb_sgs; - seg_idx < nseg; seg_idx++, xbb_sg++) { + for (xiovec = NULL, seg_idx = 0; seg_idx < nseg; seg_idx++, xbb_sg++) { /* - * If the first sector is not 0, the KVA will not be - * contiguous and we'll need to go on to another segment. + * If the first sector is not 0, the KVA will + * not be contiguous and we'll need to go on + * to another segment. */ if (xbb_sg->first_sect != 0) xiovec = NULL; if (xiovec == NULL) { xiovec = &file_data->xiovecs[xuio.uio_iovcnt]; - xiovec->iov_base = xbb_req_ioaddr(req, seg_idx, - xbb_sg->first_sect); + xiovec->iov_base = xbb_reqlist_ioaddr(reqlist, + seg_idx, xbb_sg->first_sect); #ifdef XBB_USE_BOUNCE_BUFFERS /* - * Store the address of the incoming buffer at this - * particular offset as well, so we can do the copy - * later without having to do more work to - * recalculate this address. + * Store the address of the incoming + * buffer at this particular offset + * as well, so we can do the copy + * later without having to do more + * work to recalculate this address. */ p_vaddr = &file_data->xiovecs_vaddr[xuio.uio_iovcnt]; - *p_vaddr = xbb_req_vaddr(req, seg_idx, - xbb_sg->first_sect); + *p_vaddr = xbb_reqlist_vaddr(reqlist, seg_idx, + xbb_sg->first_sect); #endif /* XBB_USE_BOUNCE_BUFFERS */ xiovec->iov_len = 0; xuio.uio_iovcnt++; @@ -1550,9 +2299,9 @@ xbb_dispatch_file(struct xbb_softc *xbb, blkif_request_t *ring_req, xuio.uio_resid += xbb_sg->nsect << 9; /* - * If the last sector is not the full page size count, - * the next segment will not be contiguous in KVA and we - * need a new iovec. + * If the last sector is not the full page + * size count, the next segment will not be + * contiguous in KVA and we need a new iovec. */ if (xbb_sg->last_sect != (PAGE_SIZE - 512) >> 9) xiovec = NULL; @@ -1676,23 +2425,10 @@ xbb_dispatch_file(struct xbb_softc *xbb, blkif_request_t *ring_req, bailout_send_response: - /* - * All I/O is already done, send the response. A lock is not - * necessary here because we're single threaded, and therefore the - * only context accessing this request right now. If that changes, - * we may need some locking here. - */ - xbb_unmap_req(req); - xbb_send_response(xbb, req, (error == 0) ? BLKIF_RSP_OKAY : - BLKIF_RSP_ERROR); - devstat_end_transaction(xbb->xbb_stats, - /*bytes*/error == 0 ? req->nr_512b_sectors << 9 - : 0, - req->ds_tag_type, - req->ds_trans_type, - /*now*/NULL, - /*then*/&req->ds_t0); - xbb_release_req(xbb, req); + if (error != 0) + reqlist->status = BLKIF_RSP_ERROR; + + xbb_complete_reqlist(xbb, reqlist); return (0); } @@ -1913,6 +2649,12 @@ xbb_open_backend(struct xbb_softc *xbb) DPRINTF("opening dev=%s\n", xbb->dev_name); + if (rootvnode == NULL) { + xenbus_dev_fatal(xbb->dev, ENOENT, + "Root file system not mounted"); + return (ENOENT); + } + if ((xbb->flags & XBBF_READ_ONLY) == 0) flags |= FWRITE; @@ -1996,11 +2738,39 @@ xbb_open_backend(struct xbb_softc *xbb) /*------------------------ Inter-Domain Communication ------------------------*/ /** - * Cleanup all inter-domain communication mechanisms. + * Free dynamically allocated KVA or pseudo-physical address allocations. * * \param xbb Per-instance xbb configuration structure. */ static void +xbb_free_communication_mem(struct xbb_softc *xbb) +{ + if (xbb->kva != 0) { +#ifndef XENHVM + kmem_free(kernel_map, xbb->kva, xbb->kva_size); +#else + if (xbb->pseudo_phys_res != NULL) { + bus_release_resource(xbb->dev, SYS_RES_MEMORY, + xbb->pseudo_phys_res_id, + xbb->pseudo_phys_res); + xbb->pseudo_phys_res = NULL; + } +#endif + } + xbb->kva = 0; + xbb->gnt_base_addr = 0; + if (xbb->kva_free != NULL) { + free(xbb->kva_free, M_XENBLOCKBACK); + xbb->kva_free = NULL; + } +} + +/** + * Cleanup all inter-domain communication mechanisms. + * + * \param xbb Per-instance xbb configuration structure. + */ +static int xbb_disconnect(struct xbb_softc *xbb) { struct gnttab_unmap_grant_ref ops[XBB_MAX_RING_PAGES]; @@ -2011,13 +2781,24 @@ xbb_disconnect(struct xbb_softc *xbb) DPRINTF("\n"); if ((xbb->flags & XBBF_RING_CONNECTED) == 0) - return; + return (0); if (xbb->irq != 0) { unbind_from_irqhandler(xbb->irq); xbb->irq = 0; } + mtx_unlock(&xbb->lock); + taskqueue_drain(xbb->io_taskqueue, &xbb->io_task); + mtx_lock(&xbb->lock); + + /* + * No new interrupts can generate work, but we must wait + * for all currently active requests to drain. + */ + if (xbb->active_request_count != 0) + return (EAGAIN); + for (ring_idx = 0, op = ops; ring_idx < xbb->ring_config.ring_pages; ring_idx++, op++) { @@ -2033,7 +2814,37 @@ xbb_disconnect(struct xbb_softc *xbb) if (error != 0) panic("Grant table op failed (%d)", error); + xbb_free_communication_mem(xbb); + + if (xbb->requests != NULL) { + free(xbb->requests, M_XENBLOCKBACK); + xbb->requests = NULL; + } + + if (xbb->request_lists != NULL) { + struct xbb_xen_reqlist *reqlist; + int i; + + /* There is one request list for ever allocated request. */ + for (i = 0, reqlist = xbb->request_lists; + i < xbb->max_requests; i++, reqlist++){ +#ifdef XBB_USE_BOUNCE_BUFFERS + if (reqlist->bounce != NULL) { + free(reqlist->bounce, M_XENBLOCKBACK); + reqlist->bounce = NULL; + } +#endif + if (reqlist->gnt_handles != NULL) { + free(reqlist->gnt_handles, M_XENBLOCKBACK); + reqlist->gnt_handles = NULL; + } + } + free(xbb->request_lists, M_XENBLOCKBACK); + xbb->request_lists = NULL; + } + xbb->flags &= ~XBBF_RING_CONNECTED; + return (0); } /** @@ -2135,7 +2946,7 @@ xbb_connect_ring(struct xbb_softc *xbb) INTR_TYPE_BIO | INTR_MPSAFE, &xbb->irq); if (error) { - xbb_disconnect(xbb); + (void)xbb_disconnect(xbb); xenbus_dev_fatal(xbb->dev, error, "binding event channel"); return (error); } @@ -2145,6 +2956,10 @@ xbb_connect_ring(struct xbb_softc *xbb) return 0; } +/* Needed to make bit_alloc() macro work */ +#define calloc(count, size) malloc((count)*(size), M_XENBLOCKBACK, \ + M_NOWAIT|M_ZERO); + /** * Size KVA and pseudo-physical address allocations based on negotiated * values for the size and number of I/O requests, and the size of our @@ -2158,9 +2973,18 @@ xbb_connect_ring(struct xbb_softc *xbb) static int xbb_alloc_communication_mem(struct xbb_softc *xbb) { - xbb->kva_size = (xbb->ring_config.ring_pages - + (xbb->max_requests * xbb->max_request_segments)) - * PAGE_SIZE; + xbb->reqlist_kva_pages = xbb->max_requests * xbb->max_request_segments; + xbb->reqlist_kva_size = xbb->reqlist_kva_pages * PAGE_SIZE; + xbb->kva_size = xbb->reqlist_kva_size + + (xbb->ring_config.ring_pages * PAGE_SIZE); + + xbb->kva_free = bit_alloc(xbb->reqlist_kva_pages); + if (xbb->kva_free == NULL) + return (ENOMEM); + + DPRINTF("%s: kva_size = %d, reqlist_kva_size = %d\n", + device_get_nameunit(xbb->dev), xbb->kva_size, + xbb->reqlist_kva_size); #ifndef XENHVM xbb->kva = kmem_alloc_nofault(kernel_map, xbb->kva_size); if (xbb->kva == 0) @@ -2185,31 +3009,11 @@ xbb_alloc_communication_mem(struct xbb_softc *xbb) xbb->kva = (vm_offset_t)rman_get_virtual(xbb->pseudo_phys_res); xbb->gnt_base_addr = rman_get_start(xbb->pseudo_phys_res); #endif /* XENHVM */ - return (0); -} -/** - * Free dynamically allocated KVA or pseudo-physical address allocations. - * - * \param xbb Per-instance xbb configuration structure. - */ -static void -xbb_free_communication_mem(struct xbb_softc *xbb) -{ - if (xbb->kva != 0) { -#ifndef XENHVM - kmem_free(kernel_map, xbb->kva, xbb->kva_size); -#else - if (xbb->pseudo_phys_res != NULL) { - bus_release_resource(xbb->dev, SYS_RES_MEMORY, - xbb->pseudo_phys_res_id, - xbb->pseudo_phys_res); - xbb->pseudo_phys_res = NULL; - } -#endif - } - xbb->kva = 0; - xbb->gnt_base_addr = 0; + DPRINTF("%s: kva: %#jx, gnt_base_addr: %#jx\n", + device_get_nameunit(xbb->dev), (uintmax_t)xbb->kva, + (uintmax_t)xbb->gnt_base_addr); + return (0); } /** @@ -2228,6 +3032,14 @@ xbb_collect_frontend_info(struct xbb_softc *xbb) otherend_path = xenbus_get_otherend_path(xbb->dev); /* + * Protocol defaults valid even if all negotiation fails. + */ + xbb->ring_config.ring_pages = 1; + xbb->max_requests = BLKIF_MAX_RING_REQUESTS(PAGE_SIZE); + xbb->max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK; + xbb->max_request_size = xbb->max_request_segments * PAGE_SIZE; + + /* * Mandatory data (used in all versions of the protocol) first. */ error = xs_gather(XST_NIL, otherend_path, @@ -2255,19 +3067,19 @@ xbb_collect_frontend_info(struct xbb_softc *xbb) * tree. */ (void)xs_scanf(XST_NIL, otherend_path, - "ring-pages", NULL, "%" PRIu32, + "ring-pages", NULL, "%u", &xbb->ring_config.ring_pages); (void)xs_scanf(XST_NIL, otherend_path, - "max-requests", NULL, "%" PRIu32, + "max-requests", NULL, "%u", &xbb->max_requests); (void)xs_scanf(XST_NIL, otherend_path, - "max-request-segments", NULL, "%" PRIu32, + "max-request-segments", NULL, "%u", &xbb->max_request_segments); (void)xs_scanf(XST_NIL, otherend_path, - "max-request-size", NULL, "%" PRIu32, + "max-request-size", NULL, "%u", &xbb->max_request_size); if (xbb->ring_config.ring_pages > XBB_MAX_RING_PAGES) { @@ -2360,8 +3172,6 @@ xbb_alloc_requests(struct xbb_softc *xbb) { struct xbb_xen_req *req; struct xbb_xen_req *last_req; - uint8_t *req_kva; - u_long gnt_base; /* * Allocate request book keeping datastructures. @@ -2374,43 +3184,68 @@ xbb_alloc_requests(struct xbb_softc *xbb) return (ENOMEM); } - req_kva = (uint8_t *)xbb->kva; - gnt_base = xbb->gnt_base_addr; req = xbb->requests; last_req = &xbb->requests[xbb->max_requests - 1]; + STAILQ_INIT(&xbb->request_free_stailq); while (req <= last_req) { + STAILQ_INSERT_TAIL(&xbb->request_free_stailq, req, links); + req++; + } + return (0); +} + +static int +xbb_alloc_request_lists(struct xbb_softc *xbb) +{ + int i; + struct xbb_xen_reqlist *reqlist; + + /* + * If no requests can be merged, we need 1 request list per + * in flight request. + */ + xbb->request_lists = malloc(xbb->max_requests * + sizeof(*xbb->request_lists), M_XENBLOCKBACK, M_NOWAIT|M_ZERO); + if (xbb->request_lists == NULL) { + xenbus_dev_fatal(xbb->dev, ENOMEM, + "Unable to allocate request list structures"); + return (ENOMEM); + } + + STAILQ_INIT(&xbb->reqlist_free_stailq); + STAILQ_INIT(&xbb->reqlist_pending_stailq); + for (i = 0; i < xbb->max_requests; i++) { int seg; - req->xbb = xbb; - req->kva = req_kva; - req->gnt_handles = malloc(xbb->max_request_segments - * sizeof(*req->gnt_handles), - M_XENBLOCKBACK, M_NOWAIT|M_ZERO); - if (req->gnt_handles == NULL) { - xenbus_dev_fatal(xbb->dev, ENOMEM, - "Unable to allocate request " - "grant references"); - return (ENOMEM); - } + reqlist = &xbb->request_lists[i]; + + reqlist->xbb = xbb; + #ifdef XBB_USE_BOUNCE_BUFFERS - req->bounce = malloc(xbb->max_request_size, - M_XENBLOCKBACK, M_NOWAIT); - if (req->bounce == NULL) { + reqlist->bounce = malloc(xbb->max_reqlist_size, + M_XENBLOCKBACK, M_NOWAIT); + if (reqlist->bounce == NULL) { xenbus_dev_fatal(xbb->dev, ENOMEM, "Unable to allocate request " "bounce buffers"); return (ENOMEM); } #endif /* XBB_USE_BOUNCE_BUFFERS */ - req->gnt_base = gnt_base; - req_kva += xbb->max_request_segments * PAGE_SIZE; - gnt_base += xbb->max_request_segments * PAGE_SIZE; - SLIST_INSERT_HEAD(&xbb->request_free_slist, req, links); - for (seg = 0; seg < xbb->max_request_segments; seg++) - req->gnt_handles[seg] = GRANT_REF_INVALID; + reqlist->gnt_handles = malloc(xbb->max_reqlist_segments * + sizeof(*reqlist->gnt_handles), + M_XENBLOCKBACK, M_NOWAIT|M_ZERO); + if (reqlist->gnt_handles == NULL) { + xenbus_dev_fatal(xbb->dev, ENOMEM, + "Unable to allocate request " + "grant references"); + return (ENOMEM); + } + + for (seg = 0; seg < xbb->max_reqlist_segments; seg++) + reqlist->gnt_handles[seg] = GRANT_REF_INVALID; - req++; + STAILQ_INSERT_TAIL(&xbb->reqlist_free_stailq, reqlist, links); } return (0); } @@ -2491,6 +3326,22 @@ xbb_connect(struct xbb_softc *xbb) if (xbb_collect_frontend_info(xbb) != 0) return; + xbb->flags &= ~XBBF_SHUTDOWN; + + /* + * We limit the maximum number of reqlist segments to the maximum + * number of segments in the ring, or our absolute maximum, + * whichever is smaller. + */ + xbb->max_reqlist_segments = MIN(xbb->max_request_segments * + xbb->max_requests, XBB_MAX_SEGMENTS_PER_REQLIST); + + /* + * The maximum size is simply a function of the number of segments + * we can handle. + */ + xbb->max_reqlist_size = xbb->max_reqlist_segments * PAGE_SIZE; + /* Allocate resources whose size depends on front-end configuration. */ error = xbb_alloc_communication_mem(xbb); if (error != 0) { @@ -2505,6 +3356,12 @@ xbb_connect(struct xbb_softc *xbb) return; } + error = xbb_alloc_request_lists(xbb); + if (error != 0) { + /* Specific errors are reported by xbb_alloc_request_lists(). */ + return; + } + /* * Connect communication channel. */ @@ -2520,7 +3377,7 @@ xbb_connect(struct xbb_softc *xbb) * in this connection, and waiting for a front-end state * change will not help the situation. */ - xbb_disconnect(xbb); + (void)xbb_disconnect(xbb); return; } @@ -2542,7 +3399,7 @@ xbb_connect(struct xbb_softc *xbb) static int xbb_shutdown(struct xbb_softc *xbb) { - static int in_shutdown; + int error; DPRINTF("\n"); @@ -2553,7 +3410,7 @@ xbb_shutdown(struct xbb_softc *xbb) * the same time. Tell the caller that hits this * race to try back later. */ - if (in_shutdown != 0) + if ((xbb->flags & XBBF_IN_SHUTDOWN) != 0) return (EAGAIN); DPRINTF("\n"); @@ -2561,20 +3418,30 @@ xbb_shutdown(struct xbb_softc *xbb) /* Indicate shutdown is in progress. */ xbb->flags |= XBBF_SHUTDOWN; - /* Wait for requests to complete. */ - if (xbb->active_request_count != 0) - return (EAGAIN); - - DPRINTF("\n"); - /* Disconnect from the front-end. */ - xbb_disconnect(xbb); + error = xbb_disconnect(xbb); + if (error != 0) { + /* + * Requests still outstanding. We'll be called again + * once they complete. + */ + KASSERT(error == EAGAIN, + ("%s: Unexpected xbb_disconnect() failure %d", + __func__, error)); + + return (error); + } - in_shutdown = 1; + DPRINTF("\n"); + + xbb->flags |= XBBF_IN_SHUTDOWN; mtx_unlock(&xbb->lock); - xenbus_set_state(xbb->dev, XenbusStateClosed); + + if (xenbus_get_state(xbb->dev) < XenbusStateClosing) + xenbus_set_state(xbb->dev, XenbusStateClosing); + mtx_lock(&xbb->lock); - in_shutdown = 0; + xbb->flags &= ~XBBF_IN_SHUTDOWN; /* Indicate to xbb_detach() that is it safe to proceed. */ wakeup(xbb); @@ -2634,6 +3501,77 @@ xbb_probe(device_t dev) } /** + * Setup sysctl variables to control various Block Back parameters. + * + * \param xbb Xen Block Back softc. + * + */ +static void +xbb_setup_sysctl(struct xbb_softc *xbb) +{ + struct sysctl_ctx_list *sysctl_ctx = NULL; + struct sysctl_oid *sysctl_tree = NULL; + + sysctl_ctx = device_get_sysctl_ctx(xbb->dev); + if (sysctl_ctx == NULL) + return; + + sysctl_tree = device_get_sysctl_tree(xbb->dev); + if (sysctl_tree == NULL) + return; + + SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, + "disable_flush", CTLFLAG_RW, &xbb->disable_flush, 0, + "fake the flush command"); + + SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, + "flush_interval", CTLFLAG_RW, &xbb->flush_interval, 0, + "send a real flush for N flush requests"); + + SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, + "no_coalesce_reqs", CTLFLAG_RW, &xbb->no_coalesce_reqs,0, + "Don't coalesce contiguous requests"); + + SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, + "reqs_received", CTLFLAG_RW, &xbb->reqs_received, + "how many I/O requests we have received"); + + SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, + "reqs_completed", CTLFLAG_RW, &xbb->reqs_completed, + "how many I/O requests have been completed"); + + SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, + "forced_dispatch", CTLFLAG_RW, &xbb->forced_dispatch, + "how many I/O dispatches were forced"); + + SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, + "normal_dispatch", CTLFLAG_RW, &xbb->normal_dispatch, + "how many I/O dispatches were normal"); + + SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, + "total_dispatch", CTLFLAG_RW, &xbb->total_dispatch, + "total number of I/O dispatches"); + + SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, + "kva_shortages", CTLFLAG_RW, &xbb->kva_shortages, + "how many times we have run out of KVA"); + + SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, + "request_shortages", CTLFLAG_RW, + &xbb->request_shortages, + "how many times we have run out of requests"); + + SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, + "max_requests", CTLFLAG_RD, &xbb->max_requests, 0, + "maximum outstanding requests (negotiated)"); + + SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, + "max_request_segments", CTLFLAG_RD, + &xbb->max_request_segments, 0, + "maximum number of pages per requests (negotiated)"); +} + +/** * Attach to a XenBus device that has been claimed by our probe routine. * * \param dev NewBus device object representing this Xen Block Back instance. @@ -2643,8 +3581,8 @@ xbb_probe(device_t dev) static int xbb_attach(device_t dev) { - struct xbb_softc *xbb; - int error; + struct xbb_softc *xbb; + int error; DPRINTF("Attaching to %s\n", xenbus_get_node(dev)); @@ -2658,15 +3596,6 @@ xbb_attach(device_t dev) xbb->otherend_id = xenbus_get_otherend_id(dev); TASK_INIT(&xbb->io_task, /*priority*/0, xbb_run_queue, xbb); mtx_init(&xbb->lock, device_get_nameunit(dev), NULL, MTX_DEF); - SLIST_INIT(&xbb->request_free_slist); - - /* - * Protocol defaults valid even if all negotiation fails. - */ - xbb->ring_config.ring_pages = 1; - xbb->max_requests = BLKIF_MAX_RING_REQUESTS(PAGE_SIZE); - xbb->max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK; - xbb->max_request_size = xbb->max_request_segments * PAGE_SIZE; /* * Publish protocol capabilities for consumption by the @@ -2763,6 +3692,18 @@ xbb_attach(device_t dev) DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER, DEVSTAT_PRIORITY_OTHER); + + xbb->xbb_stats_in = devstat_new_entry("xbbi", device_get_unit(xbb->dev), + xbb->sector_size, + DEVSTAT_ALL_SUPPORTED, + DEVSTAT_TYPE_DIRECT + | DEVSTAT_TYPE_IF_OTHER, + DEVSTAT_PRIORITY_OTHER); + /* + * Setup sysctl variables. + */ + xbb_setup_sysctl(xbb); + /* * Create a taskqueue for doing work that must occur from a * thread context. @@ -2797,7 +3738,7 @@ xbb_attach(device_t dev) } /** - * Detach from a block back device instanced. + * Detach from a block back device instance. * * \param dev NewBus device object representing this Xen Block Back instance. * @@ -2823,7 +3764,6 @@ xbb_detach(device_t dev) "xbb_shutdown", 0); } mtx_unlock(&xbb->lock); - mtx_destroy(&xbb->lock); DPRINTF("\n"); @@ -2833,8 +3773,10 @@ xbb_detach(device_t dev) if (xbb->xbb_stats != NULL) devstat_remove_entry(xbb->xbb_stats); + if (xbb->xbb_stats_in != NULL) + devstat_remove_entry(xbb->xbb_stats_in); + xbb_close_backend(xbb); - xbb_free_communication_mem(xbb); if (xbb->dev_mode != NULL) { free(xbb->dev_mode, M_XENBUS); @@ -2851,29 +3793,7 @@ xbb_detach(device_t dev) xbb->dev_name = NULL; } - if (xbb->requests != NULL) { - struct xbb_xen_req *req; - struct xbb_xen_req *last_req; - - req = xbb->requests; - last_req = &xbb->requests[xbb->max_requests - 1]; - while (req <= last_req) { -#ifdef XBB_USE_BOUNCE_BUFFERS - if (req->bounce != NULL) { - free(req->bounce, M_XENBLOCKBACK); - req->bounce = NULL; - } -#endif - if (req->gnt_handles != NULL) { - free (req->gnt_handles, M_XENBLOCKBACK); - req->gnt_handles = NULL; - } - req++; - } - free(xbb->requests, M_XENBLOCKBACK); - xbb->requests = NULL; - } - + mtx_destroy(&xbb->lock); return (0); } @@ -2921,34 +3841,35 @@ xbb_resume(device_t dev) * * \return 0 for success, errno codes for failure. */ -static int +static void xbb_frontend_changed(device_t dev, XenbusState frontend_state) { struct xbb_softc *xbb = device_get_softc(dev); - DPRINTF("state=%s\n", xenbus_strstate(frontend_state)); + DPRINTF("frontend_state=%s, xbb_state=%s\n", + xenbus_strstate(frontend_state), + xenbus_strstate(xenbus_get_state(xbb->dev))); switch (frontend_state) { case XenbusStateInitialising: - case XenbusStateClosing: break; case XenbusStateInitialised: case XenbusStateConnected: xbb_connect(xbb); break; + case XenbusStateClosing: case XenbusStateClosed: - case XenbusStateInitWait: - mtx_lock(&xbb->lock); xbb_shutdown(xbb); mtx_unlock(&xbb->lock); + if (frontend_state == XenbusStateClosed) + xenbus_set_state(xbb->dev, XenbusStateClosed); break; default: xenbus_dev_fatal(xbb->dev, EINVAL, "saw state %d at frontend", frontend_state); break; } - return (0); } /*---------------------------- NewBus Registration ---------------------------*/ diff --git a/sys/dev/xen/blkfront/blkfront.c b/sys/dev/xen/blkfront/blkfront.c index 81c0e8b..2868313 100644 --- a/sys/dev/xen/blkfront/blkfront.c +++ b/sys/dev/xen/blkfront/blkfront.c @@ -739,7 +739,7 @@ setup_blkring(struct xb_softc *sc) /** * Callback received when the backend's state changes. */ -static int +static void blkfront_backend_changed(device_t dev, XenbusState backend_state) { struct xb_softc *sc = device_get_softc(dev); @@ -772,8 +772,6 @@ blkfront_backend_changed(device_t dev, XenbusState backend_state) blkfront_closing(dev); break; } - - return (0); } /* diff --git a/sys/dev/xen/control/control.c b/sys/dev/xen/control/control.c index c03d536..bc59fa0 100644 --- a/sys/dev/xen/control/control.c +++ b/sys/dev/xen/control/control.c @@ -173,8 +173,6 @@ static struct xctrl_shutdown_reason xctrl_shutdown_reasons[] = { }; struct xctrl_softc { - - /** Must be first */ struct xs_watch xctrl_watch; }; @@ -203,24 +201,29 @@ xctrl_suspend() unsigned long max_pfn, start_info_mfn; #ifdef SMP - cpumask_t map; + struct thread *td; + cpuset_t map; /* * Bind us to CPU 0 and stop any other VCPUs. */ - thread_lock(curthread); - sched_bind(curthread, 0); - thread_unlock(curthread); + td = curthread; + thread_lock(td); + sched_bind(td, 0); + thread_unlock(td); KASSERT(PCPU_GET(cpuid) == 0, ("xen_suspend: not running on cpu 0")); - map = PCPU_GET(other_cpus) & ~stopped_cpus; - if (map) + sched_pin(); + map = PCPU_GET(other_cpus); + sched_unpin(); + CPU_NAND(&map, &stopped_cpus); + if (!CPU_EMPTY(&map)) stop_cpus(map); #endif if (DEVICE_SUSPEND(root_bus) != 0) { printf("xen_suspend: device_suspend failed\n"); #ifdef SMP - if (map) + if (!CPU_EMPTY(&map)) restart_cpus(map); #endif return; @@ -289,7 +292,7 @@ xctrl_suspend() thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); - if (map) + if (!CPU_EMPTY(&map)) restart_cpus(map); #endif } @@ -445,6 +448,7 @@ xctrl_attach(device_t dev) /* Activate watch */ xctrl->xctrl_watch.node = "control/shutdown"; xctrl->xctrl_watch.callback = xctrl_on_watch_event; + xctrl->xctrl_watch.callback_data = (uintptr_t)xctrl; xs_register_watch(&xctrl->xctrl_watch); #ifndef XENHVM diff --git a/sys/dev/xen/netfront/netfront.c b/sys/dev/xen/netfront/netfront.c index 40ff031..c694514 100644 --- a/sys/dev/xen/netfront/netfront.c +++ b/sys/dev/xen/netfront/netfront.c @@ -650,7 +650,7 @@ netfront_send_fake_arp(device_t dev, struct netfront_info *info) /** * Callback received when the backend's state changes. */ -static int +static void netfront_backend_changed(device_t dev, XenbusState newstate) { struct netfront_info *sc = device_get_softc(dev); @@ -680,7 +680,6 @@ netfront_backend_changed(device_t dev, XenbusState newstate) xenbus_set_state(dev, XenbusStateClosed); break; } - return (0); } static void diff --git a/sys/fs/nfs/nfs_commonkrpc.c b/sys/fs/nfs/nfs_commonkrpc.c index ba1ac4e..df23e49 100644 --- a/sys/fs/nfs/nfs_commonkrpc.c +++ b/sys/fs/nfs/nfs_commonkrpc.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); */ #include "opt_inet6.h" +#include "opt_kdtrace.h" #include "opt_kgssapi.h" #include "opt_nfs.h" @@ -64,6 +65,28 @@ __FBSDID("$FreeBSD$"); #include <fs/nfs/nfsport.h> +#ifdef KDTRACE_HOOKS +#include <sys/dtrace_bsd.h> + +dtrace_nfsclient_nfs23_start_probe_func_t + dtrace_nfscl_nfs234_start_probe; + +dtrace_nfsclient_nfs23_done_probe_func_t + dtrace_nfscl_nfs234_done_probe; + +/* + * Registered probes by RPC type. + */ +uint32_t nfscl_nfs2_start_probes[NFS_NPROCS + 1]; +uint32_t nfscl_nfs2_done_probes[NFS_NPROCS + 1]; + +uint32_t nfscl_nfs3_start_probes[NFS_NPROCS + 1]; +uint32_t nfscl_nfs3_done_probes[NFS_NPROCS + 1]; + +uint32_t nfscl_nfs4_start_probes[NFS_NPROCS + 1]; +uint32_t nfscl_nfs4_done_probes[NFS_NPROCS + 1]; +#endif + NFSSTATESPINLOCK; NFSREQSPINLOCK; extern struct nfsstats newnfsstats; @@ -300,9 +323,7 @@ newnfs_disconnect(struct nfssockreq *nrp) client = nrp->nr_client; nrp->nr_client = NULL; mtx_unlock(&nrp->nr_mtx); -#ifdef KGSSAPI - rpc_gss_secpurge(client); -#endif + rpc_gss_secpurge_call(client); CLNT_CLOSE(client); CLNT_RELEASE(client); } else { @@ -314,21 +335,18 @@ static AUTH * nfs_getauth(struct nfssockreq *nrp, int secflavour, char *clnt_principal, char *srv_principal, gss_OID mech_oid, struct ucred *cred) { -#ifdef KGSSAPI rpc_gss_service_t svc; AUTH *auth; #ifdef notyet rpc_gss_options_req_t req_options; #endif -#endif switch (secflavour) { -#ifdef KGSSAPI case RPCSEC_GSS_KRB5: case RPCSEC_GSS_KRB5I: case RPCSEC_GSS_KRB5P: if (!mech_oid) { - if (!rpc_gss_mech_to_oid("kerberosv5", &mech_oid)) + if (!rpc_gss_mech_to_oid_call("kerberosv5", &mech_oid)) return (NULL); } if (secflavour == RPCSEC_GSS_KRB5) @@ -344,7 +362,7 @@ nfs_getauth(struct nfssockreq *nrp, int secflavour, char *clnt_principal, req_options.input_channel_bindings = NULL; req_options.enc_type = nfs_keytab_enctype; - auth = rpc_gss_secfind(nrp->nr_client, cred, + auth = rpc_gss_secfind_call(nrp->nr_client, cred, clnt_principal, srv_principal, mech_oid, svc, &req_options); #else @@ -354,7 +372,7 @@ nfs_getauth(struct nfssockreq *nrp, int secflavour, char *clnt_principal, * principals. As such, that case cannot yet be handled. */ if (clnt_principal == NULL) - auth = rpc_gss_secfind(nrp->nr_client, cred, + auth = rpc_gss_secfind_call(nrp->nr_client, cred, srv_principal, mech_oid, svc); else auth = NULL; @@ -362,7 +380,6 @@ nfs_getauth(struct nfssockreq *nrp, int secflavour, char *clnt_principal, if (auth != NULL) return (auth); /* fallthrough */ -#endif /* KGSSAPI */ case AUTH_SYS: default: return (authunix_create(cred)); @@ -568,6 +585,29 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, if ((nd->nd_flag & ND_NFSV4) && procnum == NFSV4PROC_COMPOUND) MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSDREQ, M_WAITOK); +#ifdef KDTRACE_HOOKS + if (dtrace_nfscl_nfs234_start_probe != NULL) { + uint32_t probe_id; + int probe_procnum; + + if (nd->nd_flag & ND_NFSV4) { + probe_id = + nfscl_nfs4_start_probes[nd->nd_procnum]; + probe_procnum = nd->nd_procnum; + } else if (nd->nd_flag & ND_NFSV3) { + probe_id = nfscl_nfs3_start_probes[procnum]; + probe_procnum = procnum; + } else { + probe_id = + nfscl_nfs2_start_probes[nd->nd_procnum]; + probe_procnum = procnum; + } + if (probe_id != 0) + (dtrace_nfscl_nfs234_start_probe) + (probe_id, vp, nd->nd_mreq, cred, + probe_procnum); + } +#endif } trycnt = 0; tryagain: @@ -762,6 +802,27 @@ tryagain: } } +#ifdef KDTRACE_HOOKS + if (nmp != NULL && dtrace_nfscl_nfs234_done_probe != NULL) { + uint32_t probe_id; + int probe_procnum; + + if (nd->nd_flag & ND_NFSV4) { + probe_id = nfscl_nfs4_done_probes[nd->nd_procnum]; + probe_procnum = nd->nd_procnum; + } else if (nd->nd_flag & ND_NFSV3) { + probe_id = nfscl_nfs3_done_probes[procnum]; + probe_procnum = procnum; + } else { + probe_id = nfscl_nfs2_done_probes[nd->nd_procnum]; + probe_procnum = procnum; + } + if (probe_id != 0) + (dtrace_nfscl_nfs234_done_probe)(probe_id, vp, + nd->nd_mreq, cred, probe_procnum, 0); + } +#endif + m_freem(nd->nd_mreq); AUTH_DESTROY(auth); if (rep != NULL) diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c index e725889..03b5786 100644 --- a/sys/fs/nfs/nfs_commonsubs.c +++ b/sys/fs/nfs/nfs_commonsubs.c @@ -1726,11 +1726,13 @@ nfsmout: * Any usecnt must be decremented by calling nfsv4_relref() before * calling nfsv4_lock(). It was done this way, so nfsv4_lock() could * be called in a loop. - * The last argument is set to indicate if the call slept, iff not NULL. + * The isleptp argument is set to indicate if the call slept, iff not NULL + * and the mp argument indicates to check for a forced dismount, iff not + * NULL. */ APPLESTATIC int nfsv4_lock(struct nfsv4lock *lp, int iwantlock, int *isleptp, - void *mutex) + void *mutex, struct mount *mp) { if (isleptp) @@ -1751,6 +1753,10 @@ nfsv4_lock(struct nfsv4lock *lp, int iwantlock, int *isleptp, lp->nfslock_lock |= NFSV4LOCK_LOCKWANTED; } while (lp->nfslock_lock & (NFSV4LOCK_LOCK | NFSV4LOCK_LOCKWANTED)) { + if (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { + lp->nfslock_lock &= ~NFSV4LOCK_LOCKWANTED; + return (0); + } lp->nfslock_lock |= NFSV4LOCK_WANTED; if (isleptp) *isleptp = 1; @@ -1801,9 +1807,12 @@ nfsv4_relref(struct nfsv4lock *lp) * not wait for threads that want the exclusive lock. If priority needs * to be given to threads that need the exclusive lock, a call to nfsv4_lock() * with the 2nd argument == 0 should be done before calling nfsv4_getref(). + * If the mp argument is not NULL, check for MNTK_UNMOUNTF being set and + * return without getting a refcnt for that case. */ APPLESTATIC void -nfsv4_getref(struct nfsv4lock *lp, int *isleptp, void *mutex) +nfsv4_getref(struct nfsv4lock *lp, int *isleptp, void *mutex, + struct mount *mp) { if (isleptp) @@ -1813,12 +1822,16 @@ nfsv4_getref(struct nfsv4lock *lp, int *isleptp, void *mutex) * Wait for a lock held. */ while (lp->nfslock_lock & NFSV4LOCK_LOCK) { + if (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) + return; lp->nfslock_lock |= NFSV4LOCK_WANTED; if (isleptp) *isleptp = 1; (void) nfsmsleep(&lp->nfslock_lock, mutex, PZERO - 1, "nfsv4lck", NULL); } + if (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) + return; lp->nfslock_usecnt++; } diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h index 6182ee8..5f944b5 100644 --- a/sys/fs/nfs/nfs_var.h +++ b/sys/fs/nfs/nfs_var.h @@ -247,10 +247,10 @@ int nfsv4_loadattr(struct nfsrv_descript *, vnode_t, struct nfsv3_pathconf *, struct statfs *, struct nfsstatfs *, struct nfsfsinfo *, NFSACL_T *, int, int *, u_int32_t *, u_int32_t *, NFSPROC_T *, struct ucred *); -int nfsv4_lock(struct nfsv4lock *, int, int *, void *); +int nfsv4_lock(struct nfsv4lock *, int, int *, void *, struct mount *); void nfsv4_unlock(struct nfsv4lock *, int); void nfsv4_relref(struct nfsv4lock *); -void nfsv4_getref(struct nfsv4lock *, int *, void *); +void nfsv4_getref(struct nfsv4lock *, int *, void *, struct mount *); int nfsv4_getref_nonblock(struct nfsv4lock *); int nfsv4_testlock(struct nfsv4lock *); int nfsrv_mtostr(struct nfsrv_descript *, char *, int); @@ -370,7 +370,7 @@ int nfsrpc_readlink(vnode_t, struct uio *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); int nfsrpc_read(vnode_t, struct uio *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); -int nfsrpc_write(vnode_t, struct uio *, int *, u_char *, +int nfsrpc_write(vnode_t, struct uio *, int *, int *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *, int); int nfsrpc_mknod(vnode_t, char *, int, struct vattr *, u_int32_t, enum vtype, struct ucred *, NFSPROC_T *, struct nfsvattr *, @@ -401,10 +401,10 @@ int nfsrpc_readdirplus(vnode_t, struct uio *, nfsuint64 *, int nfsrpc_commit(vnode_t, u_quad_t, int, struct ucred *, NFSPROC_T *, u_char *, struct nfsvattr *, int *, void *); int nfsrpc_advlock(vnode_t, off_t, int, struct flock *, int, - struct ucred *, NFSPROC_T *); + struct ucred *, NFSPROC_T *, void *, int); int nfsrpc_lockt(struct nfsrv_descript *, vnode_t, struct nfsclclient *, u_int64_t, u_int64_t, struct flock *, - struct ucred *, NFSPROC_T *); + struct ucred *, NFSPROC_T *, void *, int); int nfsrpc_lock(struct nfsrv_descript *, struct nfsmount *, vnode_t, u_int8_t *, int, struct nfscllockowner *, int, int, u_int64_t, u_int64_t, short, struct ucred *, NFSPROC_T *, int); @@ -439,16 +439,16 @@ struct nfsclclient *nfscl_findcl(struct nfsmount *); void nfscl_clientrelease(struct nfsclclient *); void nfscl_freelock(struct nfscllock *, int); int nfscl_getbytelock(vnode_t, u_int64_t, u_int64_t, short, - struct ucred *, NFSPROC_T *, struct nfsclclient *, int, u_int8_t *, - u_int8_t *, struct nfscllockowner **, int *, int *); + struct ucred *, NFSPROC_T *, struct nfsclclient *, int, void *, int, + u_int8_t *, u_int8_t *, struct nfscllockowner **, int *, int *); int nfscl_relbytelock(vnode_t, u_int64_t, u_int64_t, struct ucred *, NFSPROC_T *, int, struct nfsclclient *, - struct nfscllockowner **, int *); + void *, int, struct nfscllockowner **, int *); int nfscl_checkwritelocked(vnode_t, struct flock *, - struct ucred *, NFSPROC_T *); + struct ucred *, NFSPROC_T *, void *, int); void nfscl_lockrelease(struct nfscllockowner *, int, int); void nfscl_fillclid(u_int64_t, char *, u_int8_t *, u_int16_t); -void nfscl_filllockowner(NFSPROC_T *, u_int8_t *); +void nfscl_filllockowner(void *, u_int8_t *, int); void nfscl_freeopen(struct nfsclopen *, int); void nfscl_umount(struct nfsmount *, NFSPROC_T *); void nfscl_renewthread(struct nfsclclient *, NFSPROC_T *); @@ -466,9 +466,10 @@ void nfscl_lockexcl(struct nfsv4lock *, void *); void nfscl_lockunlock(struct nfsv4lock *); void nfscl_lockderef(struct nfsv4lock *); void nfscl_docb(struct nfsrv_descript *, NFSPROC_T *); -void nfscl_releasealllocks(struct nfsclclient *, vnode_t, NFSPROC_T *); +void nfscl_releasealllocks(struct nfsclclient *, vnode_t, NFSPROC_T *, void *, + int); int nfscl_lockt(vnode_t, struct nfsclclient *, u_int64_t, - u_int64_t, struct flock *, NFSPROC_T *); + u_int64_t, struct flock *, NFSPROC_T *, void *, int); int nfscl_mustflush(vnode_t); int nfscl_nodeleg(vnode_t, int); int nfscl_removedeleg(vnode_t, NFSPROC_T *, nfsv4stateid_t *); diff --git a/sys/fs/nfs/nfsport.h b/sys/fs/nfs/nfsport.h index c21482d..9866ef7 100644 --- a/sys/fs/nfs/nfsport.h +++ b/sys/fs/nfs/nfsport.h @@ -267,6 +267,7 @@ * NFS_V3NPROCS is one greater than the last V3 op and NFS_NPROCS is * one greater than the last number. */ +#ifndef NFS_V3NPROCS #define NFS_V3NPROCS 22 #define NFSPROC_LOOKUPP 22 @@ -293,6 +294,7 @@ * Must be defined as one higher than the last Proc# above. */ #define NFSV4_NPROCS 41 +#endif /* NFS_V3NPROCS */ /* * Stats structure @@ -358,7 +360,9 @@ struct ext_nfsstats { /* * Define NFS_NPROCS as NFSV4_NPROCS for the experimental kernel code. */ +#ifndef NFS_NPROCS #define NFS_NPROCS NFSV4_NPROCS +#endif #include <fs/nfs/nfskpiport.h> #include <fs/nfs/nfsdport.h> diff --git a/sys/fs/nfs/nfsproto.h b/sys/fs/nfs/nfsproto.h index 6b66c1f..a15f93b 100644 --- a/sys/fs/nfs/nfsproto.h +++ b/sys/fs/nfs/nfsproto.h @@ -226,6 +226,48 @@ #define NFSPROC_COMMIT 21 /* + * The lower numbers -> 21 are used by NFSv2 and v3. These define higher + * numbers used by NFSv4. + * NFS_V3NPROCS is one greater than the last V3 op and NFS_NPROCS is + * one greater than the last number. + */ +#ifndef NFS_V3NPROCS +#define NFS_V3NPROCS 22 + +#define NFSPROC_LOOKUPP 22 +#define NFSPROC_SETCLIENTID 23 +#define NFSPROC_SETCLIENTIDCFRM 24 +#define NFSPROC_LOCK 25 +#define NFSPROC_LOCKU 26 +#define NFSPROC_OPEN 27 +#define NFSPROC_CLOSE 28 +#define NFSPROC_OPENCONFIRM 29 +#define NFSPROC_LOCKT 30 +#define NFSPROC_OPENDOWNGRADE 31 +#define NFSPROC_RENEW 32 +#define NFSPROC_PUTROOTFH 33 +#define NFSPROC_RELEASELCKOWN 34 +#define NFSPROC_DELEGRETURN 35 +#define NFSPROC_RETDELEGREMOVE 36 +#define NFSPROC_RETDELEGRENAME1 37 +#define NFSPROC_RETDELEGRENAME2 38 +#define NFSPROC_GETACL 39 +#define NFSPROC_SETACL 40 + +/* + * Must be defined as one higher than the last Proc# above. + */ +#define NFSV4_NPROCS 41 +#endif /* NFS_V3NPROCS */ + +/* + * Define NFS_NPROCS as NFSV4_NPROCS for the experimental kernel code. + */ +#ifndef NFS_NPROCS +#define NFS_NPROCS NFSV4_NPROCS +#endif + +/* * NFSPROC_NOOP is a fake op# that can't be the same as any V2/3/4 Procedure * or Operation#. Since the NFS V4 Op #s go higher, use NFSV4OP_NOPS, which * is one greater than the highest Op#. diff --git a/sys/fs/nfsclient/nfs_clbio.c b/sys/fs/nfsclient/nfs_clbio.c index fee8b05..b33c8a5 100644 --- a/sys/fs/nfsclient/nfs_clbio.c +++ b/sys/fs/nfsclient/nfs_clbio.c @@ -35,6 +35,8 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include "opt_kdtrace.h" + #include <sys/param.h> #include <sys/systm.h> #include <sys/bio.h> @@ -55,6 +57,7 @@ __FBSDID("$FreeBSD$"); #include <fs/nfsclient/nfsmount.h> #include <fs/nfsclient/nfs.h> #include <fs/nfsclient/nfsnode.h> +#include <fs/nfsclient/nfs_kdtrace.h> extern int newnfs_directio_allow_mmap; extern struct nfsstats newnfsstats; @@ -302,7 +305,7 @@ ncl_putpages(struct vop_putpages_args *ap) } for (i = 0; i < npages; i++) - rtvals[i] = VM_PAGER_AGAIN; + rtvals[i] = VM_PAGER_ERROR; /* * When putting pages, do not extend file past EOF. @@ -345,16 +348,9 @@ ncl_putpages(struct vop_putpages_args *ap) pmap_qremove(kva, npages); relpbuf(bp, &ncl_pbuf_freecnt); - if (!error) { - int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE; - for (i = 0; i < nwritten; i++) { - rtvals[i] = VM_PAGER_OK; - vm_page_undirty(pages[i]); - } - if (must_commit) { - ncl_clearcommit(vp->v_mount); - } - } + vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid); + if (must_commit) + ncl_clearcommit(vp->v_mount); return rtvals[0]; } @@ -406,6 +402,7 @@ nfs_bioread_check_cons(struct vnode *vp, struct thread *td, struct ucred *cred) goto out; } np->n_attrstamp = 0; + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); error = VOP_GETATTR(vp, &vattr, cred); if (error) goto out; @@ -922,6 +919,7 @@ ncl_write(struct vop_write_args *ap) #endif flush_and_restart: np->n_attrstamp = 0; + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); error = ncl_vinvalbuf(vp, V_SAVE, td, 1); if (error) return (error); @@ -935,6 +933,7 @@ flush_and_restart: */ if (ioflag & IO_APPEND) { np->n_attrstamp = 0; + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); error = VOP_GETATTR(vp, &vattr, cred); if (error) return (error); @@ -1755,6 +1754,7 @@ ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td, mtx_lock(&np->n_mtx); np->n_flag |= NWRITEERR; np->n_attrstamp = 0; + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); mtx_unlock(&np->n_mtx); } bp->b_dirtyoff = bp->b_dirtyend = 0; diff --git a/sys/fs/nfsclient/nfs_clcomsubs.c b/sys/fs/nfsclient/nfs_clcomsubs.c index 6866a73..c7fd69b 100644 --- a/sys/fs/nfsclient/nfs_clcomsubs.c +++ b/sys/fs/nfsclient/nfs_clcomsubs.c @@ -482,7 +482,7 @@ nfscl_lockexcl(struct nfsv4lock *lckp, void *mutex) int igotlock; do { - igotlock = nfsv4_lock(lckp, 1, NULL, mutex); + igotlock = nfsv4_lock(lckp, 1, NULL, mutex, NULL); } while (!igotlock); } diff --git a/sys/fs/nfsclient/nfs_clkdtrace.c b/sys/fs/nfsclient/nfs_clkdtrace.c new file mode 100644 index 0000000..c7db3a4 --- /dev/null +++ b/sys/fs/nfsclient/nfs_clkdtrace.c @@ -0,0 +1,600 @@ +/*- + * Copyright (c) 2009 Robert N. M. Watson + * All rights reserved. + * + * This software was developed at the University of Cambridge Computer + * Laboratory with support from a grant from Google, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/module.h> + +#include <sys/dtrace.h> +#include <sys/dtrace_bsd.h> + +#include <fs/nfs/nfsproto.h> + +#include <fs/nfsclient/nfs_kdtrace.h> + +/* + * dtnfscl is a DTrace provider that tracks the intent to perform RPCs + * in the NFS client, as well as acess to and maintenance of the access and + * attribute caches. This is not quite the same as RPCs, because NFS may + * issue multiple RPC transactions in the event that authentication fails, + * there's a jukebox error, or none at all if the access or attribute cache + * hits. However, it cleanly represents the logical layer between RPC + * transmission and vnode/vfs operations, providing access to state linking + * the two. + */ + +static int dtnfsclient_unload(void); +static void dtnfsclient_getargdesc(void *, dtrace_id_t, void *, + dtrace_argdesc_t *); +static void dtnfsclient_provide(void *, dtrace_probedesc_t *); +static void dtnfsclient_destroy(void *, dtrace_id_t, void *); +static void dtnfsclient_enable(void *, dtrace_id_t, void *); +static void dtnfsclient_disable(void *, dtrace_id_t, void *); +static void dtnfsclient_load(void *); + +static dtrace_pattr_t dtnfsclient_attr = { +{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, +}; + +/* + * Description of NFSv4, NFSv3 and (optional) NFSv2 probes for a procedure. + */ +struct dtnfsclient_rpc { + char *nr_v4_name; + char *nr_v3_name; /* Or NULL if none. */ + char *nr_v2_name; /* Or NULL if none. */ + + /* + * IDs for the start and done cases, for NFSv2, NFSv3 and NFSv4. + */ + uint32_t nr_v2_id_start, nr_v2_id_done; + uint32_t nr_v3_id_start, nr_v3_id_done; + uint32_t nr_v4_id_start, nr_v4_id_done; +}; + +/* + * This table is indexed by NFSv3 procedure number, but also used for NFSv2 + * procedure names and NFSv4 operations. + */ +static struct dtnfsclient_rpc dtnfsclient_rpcs[NFS_NPROCS + 1] = { + { "null", "null", "null" }, + { "getattr", "getattr", "getattr" }, + { "setattr", "setattr", "setattr" }, + { "lookup", "lookup", "lookup" }, + { "access", "access", "noop" }, + { "readlink", "readlink", "readlink" }, + { "read", "read", "read" }, + { "write", "write", "write" }, + { "create", "create", "create" }, + { "mkdir", "mkdir", "mkdir" }, + { "symlink", "symlink", "symlink" }, + { "mknod", "mknod" }, + { "remove", "remove", "remove" }, + { "rmdir", "rmdir", "rmdir" }, + { "rename", "rename", "rename" }, + { "link", "link", "link" }, + { "readdir", "readdir", "readdir" }, + { "readdirplus", "readdirplus" }, + { "fsstat", "fsstat", "statfs" }, + { "fsinfo", "fsinfo" }, + { "pathconf", "pathconf" }, + { "commit", "commit" }, + { "lookupp" }, + { "setclientid" }, + { "setclientidcfrm" }, + { "lock" }, + { "locku" }, + { "open" }, + { "close" }, + { "openconfirm" }, + { "lockt" }, + { "opendowngrade" }, + { "renew" }, + { "putrootfh" }, + { "releaselckown" }, + { "delegreturn" }, + { "retdelegremove" }, + { "retdelegrename1" }, + { "retdelegrename2" }, + { "getacl" }, + { "setacl" }, + { "noop", "noop", "noop" } +}; + +/* + * Module name strings. + */ +static char *dtnfsclient_accesscache_str = "accesscache"; +static char *dtnfsclient_attrcache_str = "attrcache"; +static char *dtnfsclient_nfs2_str = "nfs2"; +static char *dtnfsclient_nfs3_str = "nfs3"; +static char *dtnfsclient_nfs4_str = "nfs4"; + +/* + * Function name strings. + */ +static char *dtnfsclient_flush_str = "flush"; +static char *dtnfsclient_load_str = "load"; +static char *dtnfsclient_get_str = "get"; + +/* + * Name strings. + */ +static char *dtnfsclient_done_str = "done"; +static char *dtnfsclient_hit_str = "hit"; +static char *dtnfsclient_miss_str = "miss"; +static char *dtnfsclient_start_str = "start"; + +static dtrace_pops_t dtnfsclient_pops = { + dtnfsclient_provide, + NULL, + dtnfsclient_enable, + dtnfsclient_disable, + NULL, + NULL, + dtnfsclient_getargdesc, + NULL, + NULL, + dtnfsclient_destroy +}; + +static dtrace_provider_id_t dtnfsclient_id; + +/* + * Most probes are generated from the above RPC table, but for access and + * attribute caches, we have specific IDs we recognize and handle specially + * in various spots. + */ +extern uint32_t nfscl_accesscache_flush_done_id; +extern uint32_t nfscl_accesscache_get_hit_id; +extern uint32_t nfscl_accesscache_get_miss_id; +extern uint32_t nfscl_accesscache_load_done_id; + +extern uint32_t nfscl_attrcache_flush_done_id; +extern uint32_t nfscl_attrcache_get_hit_id; +extern uint32_t nfscl_attrcache_get_miss_id; +extern uint32_t nfscl_attrcache_load_done_id; + +/* + * When tracing on a procedure is enabled, the DTrace ID for an RPC event is + * stored in one of these two NFS client-allocated arrays; 0 indicates that + * the event is not being traced so probes should not be called. + * + * For simplicity, we allocate both v2, v3 and v4 arrays as NFS_NPROCS + 1, and + * the v2, v3 arrays are simply sparse. + */ +extern uint32_t nfscl_nfs2_start_probes[NFS_NPROCS + 1]; +extern uint32_t nfscl_nfs2_done_probes[NFS_NPROCS + 1]; + +extern uint32_t nfscl_nfs3_start_probes[NFS_NPROCS + 1]; +extern uint32_t nfscl_nfs3_done_probes[NFS_NPROCS + 1]; + +extern uint32_t nfscl_nfs4_start_probes[NFS_NPROCS + 1]; +extern uint32_t nfscl_nfs4_done_probes[NFS_NPROCS + 1]; + +/* + * Look up a DTrace probe ID to see if it's associated with a "done" event -- + * if so, we will return a fourth argument type of "int". + */ +static int +dtnfs234_isdoneprobe(dtrace_id_t id) +{ + int i; + + for (i = 0; i < NFS_NPROCS + 1; i++) { + if (dtnfsclient_rpcs[i].nr_v4_id_done == id || + dtnfsclient_rpcs[i].nr_v3_id_done == id || + dtnfsclient_rpcs[i].nr_v2_id_done == id) + return (1); + } + return (0); +} + +static void +dtnfsclient_getargdesc(void *arg, dtrace_id_t id, void *parg, + dtrace_argdesc_t *desc) +{ + const char *p = NULL; + + if (id == nfscl_accesscache_flush_done_id || + id == nfscl_attrcache_flush_done_id || + id == nfscl_attrcache_get_miss_id) { + switch (desc->dtargd_ndx) { + case 0: + p = "struct vnode *"; + break; + default: + desc->dtargd_ndx = DTRACE_ARGNONE; + break; + } + } else if (id == nfscl_accesscache_get_hit_id || + id == nfscl_accesscache_get_miss_id) { + switch (desc->dtargd_ndx) { + case 0: + p = "struct vnode *"; + break; + case 1: + p = "uid_t"; + break; + case 2: + p = "uint32_t"; + break; + default: + desc->dtargd_ndx = DTRACE_ARGNONE; + break; + } + } else if (id == nfscl_accesscache_load_done_id) { + switch (desc->dtargd_ndx) { + case 0: + p = "struct vnode *"; + break; + case 1: + p = "uid_t"; + break; + case 2: + p = "uint32_t"; + break; + case 3: + p = "int"; + break; + default: + desc->dtargd_ndx = DTRACE_ARGNONE; + break; + } + } else if (id == nfscl_attrcache_get_hit_id) { + switch (desc->dtargd_ndx) { + case 0: + p = "struct vnode *"; + break; + case 1: + p = "struct vattr *"; + break; + default: + desc->dtargd_ndx = DTRACE_ARGNONE; + break; + } + } else if (id == nfscl_attrcache_load_done_id) { + switch (desc->dtargd_ndx) { + case 0: + p = "struct vnode *"; + break; + case 1: + p = "struct vattr *"; + break; + case 2: + p = "int"; + break; + default: + desc->dtargd_ndx = DTRACE_ARGNONE; + break; + } + } else { + switch (desc->dtargd_ndx) { + case 0: + p = "struct vnode *"; + break; + case 1: + p = "struct mbuf *"; + break; + case 2: + p = "struct ucred *"; + break; + case 3: + p = "int"; + break; + case 4: + if (dtnfs234_isdoneprobe(id)) { + p = "int"; + break; + } + /* FALLSTHROUGH */ + default: + desc->dtargd_ndx = DTRACE_ARGNONE; + break; + } + } + if (p != NULL) + strlcpy(desc->dtargd_native, p, sizeof(desc->dtargd_native)); +} + +static void +dtnfsclient_provide(void *arg, dtrace_probedesc_t *desc) +{ + int i; + + if (desc != NULL) + return; + + /* + * Register access cache probes. + */ + if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_accesscache_str, + dtnfsclient_flush_str, dtnfsclient_done_str) == 0) { + nfscl_accesscache_flush_done_id = dtrace_probe_create( + dtnfsclient_id, dtnfsclient_accesscache_str, + dtnfsclient_flush_str, dtnfsclient_done_str, 0, NULL); + } + if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_accesscache_str, + dtnfsclient_get_str, dtnfsclient_hit_str) == 0) { + nfscl_accesscache_get_hit_id = dtrace_probe_create( + dtnfsclient_id, dtnfsclient_accesscache_str, + dtnfsclient_get_str, dtnfsclient_hit_str, 0, NULL); + } + if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_accesscache_str, + dtnfsclient_get_str, dtnfsclient_miss_str) == 0) { + nfscl_accesscache_get_miss_id = dtrace_probe_create( + dtnfsclient_id, dtnfsclient_accesscache_str, + dtnfsclient_get_str, dtnfsclient_miss_str, 0, NULL); + } + if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_accesscache_str, + dtnfsclient_load_str, dtnfsclient_done_str) == 0) { + nfscl_accesscache_load_done_id = dtrace_probe_create( + dtnfsclient_id, dtnfsclient_accesscache_str, + dtnfsclient_load_str, dtnfsclient_done_str, 0, NULL); + } + + /* + * Register attribute cache probes. + */ + if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_attrcache_str, + dtnfsclient_flush_str, dtnfsclient_done_str) == 0) { + nfscl_attrcache_flush_done_id = dtrace_probe_create( + dtnfsclient_id, dtnfsclient_attrcache_str, + dtnfsclient_flush_str, dtnfsclient_done_str, 0, NULL); + } + if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_attrcache_str, + dtnfsclient_get_str, dtnfsclient_hit_str) == 0) { + nfscl_attrcache_get_hit_id = dtrace_probe_create( + dtnfsclient_id, dtnfsclient_attrcache_str, + dtnfsclient_get_str, dtnfsclient_hit_str, 0, NULL); + } + if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_attrcache_str, + dtnfsclient_get_str, dtnfsclient_miss_str) == 0) { + nfscl_attrcache_get_miss_id = dtrace_probe_create( + dtnfsclient_id, dtnfsclient_attrcache_str, + dtnfsclient_get_str, dtnfsclient_miss_str, 0, NULL); + } + if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_attrcache_str, + dtnfsclient_load_str, dtnfsclient_done_str) == 0) { + nfscl_attrcache_load_done_id = dtrace_probe_create( + dtnfsclient_id, dtnfsclient_attrcache_str, + dtnfsclient_load_str, dtnfsclient_done_str, 0, NULL); + } + + /* + * Register NFSv2 RPC procedures; note sparseness check for each slot + * in the NFSv3, NFSv4 procnum-indexed array. + */ + for (i = 0; i < NFS_NPROCS + 1; i++) { + if (dtnfsclient_rpcs[i].nr_v2_name != NULL && + dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs2_str, + dtnfsclient_rpcs[i].nr_v2_name, dtnfsclient_start_str) == + 0) { + dtnfsclient_rpcs[i].nr_v2_id_start = + dtrace_probe_create(dtnfsclient_id, + dtnfsclient_nfs2_str, + dtnfsclient_rpcs[i].nr_v2_name, + dtnfsclient_start_str, 0, + &nfscl_nfs2_start_probes[i]); + } + if (dtnfsclient_rpcs[i].nr_v2_name != NULL && + dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs2_str, + dtnfsclient_rpcs[i].nr_v2_name, dtnfsclient_done_str) == + 0) { + dtnfsclient_rpcs[i].nr_v2_id_done = + dtrace_probe_create(dtnfsclient_id, + dtnfsclient_nfs2_str, + dtnfsclient_rpcs[i].nr_v2_name, + dtnfsclient_done_str, 0, + &nfscl_nfs2_done_probes[i]); + } + } + + /* + * Register NFSv3 RPC procedures; note sparseness check for each slot + * in the NFSv4 procnum-indexed array. + */ + for (i = 0; i < NFS_NPROCS + 1; i++) { + if (dtnfsclient_rpcs[i].nr_v3_name != NULL && + dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs3_str, + dtnfsclient_rpcs[i].nr_v3_name, dtnfsclient_start_str) == + 0) { + dtnfsclient_rpcs[i].nr_v3_id_start = + dtrace_probe_create(dtnfsclient_id, + dtnfsclient_nfs3_str, + dtnfsclient_rpcs[i].nr_v3_name, + dtnfsclient_start_str, 0, + &nfscl_nfs3_start_probes[i]); + } + if (dtnfsclient_rpcs[i].nr_v3_name != NULL && + dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs3_str, + dtnfsclient_rpcs[i].nr_v3_name, dtnfsclient_done_str) == + 0) { + dtnfsclient_rpcs[i].nr_v3_id_done = + dtrace_probe_create(dtnfsclient_id, + dtnfsclient_nfs3_str, + dtnfsclient_rpcs[i].nr_v3_name, + dtnfsclient_done_str, 0, + &nfscl_nfs3_done_probes[i]); + } + } + + /* + * Register NFSv4 RPC procedures. + */ + for (i = 0; i < NFS_NPROCS + 1; i++) { + if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs4_str, + dtnfsclient_rpcs[i].nr_v4_name, dtnfsclient_start_str) == + 0) { + dtnfsclient_rpcs[i].nr_v4_id_start = + dtrace_probe_create(dtnfsclient_id, + dtnfsclient_nfs4_str, + dtnfsclient_rpcs[i].nr_v4_name, + dtnfsclient_start_str, 0, + &nfscl_nfs4_start_probes[i]); + } + if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs4_str, + dtnfsclient_rpcs[i].nr_v4_name, dtnfsclient_done_str) == + 0) { + dtnfsclient_rpcs[i].nr_v4_id_done = + dtrace_probe_create(dtnfsclient_id, + dtnfsclient_nfs4_str, + dtnfsclient_rpcs[i].nr_v4_name, + dtnfsclient_done_str, 0, + &nfscl_nfs4_done_probes[i]); + } + } +} + +static void +dtnfsclient_destroy(void *arg, dtrace_id_t id, void *parg) +{ +} + +static void +dtnfsclient_enable(void *arg, dtrace_id_t id, void *parg) +{ + uint32_t *p = parg; + void *f = dtrace_probe; + + if (id == nfscl_accesscache_flush_done_id) + dtrace_nfscl_accesscache_flush_done_probe = f; + else if (id == nfscl_accesscache_get_hit_id) + dtrace_nfscl_accesscache_get_hit_probe = f; + else if (id == nfscl_accesscache_get_miss_id) + dtrace_nfscl_accesscache_get_miss_probe = f; + else if (id == nfscl_accesscache_load_done_id) + dtrace_nfscl_accesscache_load_done_probe = f; + else if (id == nfscl_attrcache_flush_done_id) + dtrace_nfscl_attrcache_flush_done_probe = f; + else if (id == nfscl_attrcache_get_hit_id) + dtrace_nfscl_attrcache_get_hit_probe = f; + else if (id == nfscl_attrcache_get_miss_id) + dtrace_nfscl_attrcache_get_miss_probe = f; + else if (id == nfscl_attrcache_load_done_id) + dtrace_nfscl_attrcache_load_done_probe = f; + else + *p = id; +} + +static void +dtnfsclient_disable(void *arg, dtrace_id_t id, void *parg) +{ + uint32_t *p = parg; + + if (id == nfscl_accesscache_flush_done_id) + dtrace_nfscl_accesscache_flush_done_probe = NULL; + else if (id == nfscl_accesscache_get_hit_id) + dtrace_nfscl_accesscache_get_hit_probe = NULL; + else if (id == nfscl_accesscache_get_miss_id) + dtrace_nfscl_accesscache_get_miss_probe = NULL; + else if (id == nfscl_accesscache_load_done_id) + dtrace_nfscl_accesscache_load_done_probe = NULL; + else if (id == nfscl_attrcache_flush_done_id) + dtrace_nfscl_attrcache_flush_done_probe = NULL; + else if (id == nfscl_attrcache_get_hit_id) + dtrace_nfscl_attrcache_get_hit_probe = NULL; + else if (id == nfscl_attrcache_get_miss_id) + dtrace_nfscl_attrcache_get_miss_probe = NULL; + else if (id == nfscl_attrcache_load_done_id) + dtrace_nfscl_attrcache_load_done_probe = NULL; + else + *p = 0; +} + +static void +dtnfsclient_load(void *dummy) +{ + + if (dtrace_register("nfscl", &dtnfsclient_attr, + DTRACE_PRIV_USER, NULL, &dtnfsclient_pops, NULL, + &dtnfsclient_id) != 0) + return; + + dtrace_nfscl_nfs234_start_probe = + (dtrace_nfsclient_nfs23_start_probe_func_t)dtrace_probe; + dtrace_nfscl_nfs234_done_probe = + (dtrace_nfsclient_nfs23_done_probe_func_t)dtrace_probe; +} + + +static int +dtnfsclient_unload() +{ + + dtrace_nfscl_nfs234_start_probe = NULL; + dtrace_nfscl_nfs234_done_probe = NULL; + + return (dtrace_unregister(dtnfsclient_id)); +} + +static int +dtnfsclient_modevent(module_t mod __unused, int type, void *data __unused) +{ + int error = 0; + + switch (type) { + case MOD_LOAD: + break; + + case MOD_UNLOAD: + break; + + case MOD_SHUTDOWN: + break; + + default: + error = EOPNOTSUPP; + break; + } + + return (error); +} + +SYSINIT(dtnfsclient_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, + dtnfsclient_load, NULL); +SYSUNINIT(dtnfsclient_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, + dtnfsclient_unload, NULL); + +DEV_MODULE(dtnfscl, dtnfsclient_modevent, NULL); +MODULE_VERSION(dtnfscl, 1); +MODULE_DEPEND(dtnfscl, dtrace, 1, 1, 1); +MODULE_DEPEND(dtnfscl, opensolaris, 1, 1, 1); +MODULE_DEPEND(dtnfscl, nfscl, 1, 1, 1); +MODULE_DEPEND(dtnfscl, nfscommon, 1, 1, 1); diff --git a/sys/fs/nfsclient/nfs_clkrpc.c b/sys/fs/nfsclient/nfs_clkrpc.c index c4f7e94..0cd41b3 100644 --- a/sys/fs/nfsclient/nfs_clkrpc.c +++ b/sys/fs/nfsclient/nfs_clkrpc.c @@ -215,12 +215,9 @@ nfscbd_addsock(struct file *fp) int nfscbd_nfsd(struct thread *td, struct nfsd_nfscbd_args *args) { -#ifdef KGSSAPI char principal[128]; int error; -#endif -#ifdef KGSSAPI if (args != NULL) { error = copyinstr(args->principal, principal, sizeof(principal), NULL); @@ -229,7 +226,6 @@ nfscbd_nfsd(struct thread *td, struct nfsd_nfscbd_args *args) } else { principal[0] = '\0'; } -#endif /* * Only the first nfsd actually does any work. The RPC code @@ -244,20 +240,16 @@ nfscbd_nfsd(struct thread *td, struct nfsd_nfscbd_args *args) NFSD_UNLOCK(); -#ifdef KGSSAPI if (principal[0] != '\0') - rpc_gss_set_svc_name(principal, "kerberosv5", + rpc_gss_set_svc_name_call(principal, "kerberosv5", GSS_C_INDEFINITE, NFS_CALLBCKPROG, NFSV4_CBVERS); -#endif nfscbd_pool->sp_minthreads = 4; nfscbd_pool->sp_maxthreads = 4; svc_run(nfscbd_pool); -#ifdef KGSSAPI - rpc_gss_clear_svc_name(NFS_CALLBCKPROG, NFSV4_CBVERS); -#endif + rpc_gss_clear_svc_name_call(NFS_CALLBCKPROG, NFSV4_CBVERS); NFSD_LOCK(); nfs_numnfscbd--; diff --git a/sys/fs/nfsclient/nfs_clnode.c b/sys/fs/nfsclient/nfs_clnode.c index d4d652e..d15de6e 100644 --- a/sys/fs/nfsclient/nfs_clnode.c +++ b/sys/fs/nfsclient/nfs_clnode.c @@ -35,6 +35,8 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include "opt_kdtrace.h" + #include <sys/param.h> #include <sys/systm.h> #include <sys/fcntl.h> @@ -53,6 +55,7 @@ __FBSDID("$FreeBSD$"); #include <fs/nfsclient/nfsnode.h> #include <fs/nfsclient/nfsmount.h> #include <fs/nfsclient/nfs.h> +#include <fs/nfsclient/nfs_kdtrace.h> #include <nfs/nfs_lock.h> @@ -300,7 +303,9 @@ ncl_invalcaches(struct vnode *vp) mtx_lock(&np->n_mtx); for (i = 0; i < NFS_ACCESSCACHESIZE; i++) np->n_accesscache[i].stamp = 0; + KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp); np->n_attrstamp = 0; + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); mtx_unlock(&np->n_mtx); } diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c index 0c3a4c9..0538382 100644 --- a/sys/fs/nfsclient/nfs_clport.c +++ b/sys/fs/nfsclient/nfs_clport.c @@ -34,6 +34,8 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include "opt_kdtrace.h" + /* * generally, I don't like #includes inside .h files, but it seems to * be the easiest way to handle the port. @@ -43,6 +45,26 @@ __FBSDID("$FreeBSD$"); #include <netinet/if_ether.h> #include <net/if_types.h> +#include <fs/nfsclient/nfs_kdtrace.h> + +#ifdef KDTRACE_HOOKS +dtrace_nfsclient_attrcache_flush_probe_func_t + dtrace_nfscl_attrcache_flush_done_probe; +uint32_t nfscl_attrcache_flush_done_id; + +dtrace_nfsclient_attrcache_get_hit_probe_func_t + dtrace_nfscl_attrcache_get_hit_probe; +uint32_t nfscl_attrcache_get_hit_id; + +dtrace_nfsclient_attrcache_get_miss_probe_func_t + dtrace_nfscl_attrcache_get_miss_probe; +uint32_t nfscl_attrcache_get_miss_id; + +dtrace_nfsclient_attrcache_load_probe_func_t + dtrace_nfscl_attrcache_load_done_probe; +uint32_t nfscl_attrcache_load_done_id; +#endif /* !KDTRACE_HOOKS */ + extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1; extern struct vop_vector newnfs_vnodeops; extern struct vop_vector newnfs_fifoops; @@ -407,6 +429,7 @@ nfscl_loadattrcache(struct vnode **vpp, struct nfsvattr *nap, void *nvaper, */ vap->va_size = np->n_size; np->n_attrstamp = 0; + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); } else if (np->n_flag & NMODIFIED) { /* * We've modified the file: Use the larger @@ -439,9 +462,11 @@ nfscl_loadattrcache(struct vnode **vpp, struct nfsvattr *nap, void *nvaper, * We detect this by for the mtime moving back. We invalidate the * attrcache when this happens. */ - if (timespeccmp(&mtime_save, &vap->va_mtime, >)) + if (timespeccmp(&mtime_save, &vap->va_mtime, >)) { /* Size changed or mtime went backwards */ np->n_attrstamp = 0; + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); + } if (vaper != NULL) { NFSBCOPY((caddr_t)vap, (caddr_t)vaper, sizeof(*vap)); if (np->n_flag & NCHG) { @@ -451,6 +476,10 @@ nfscl_loadattrcache(struct vnode **vpp, struct nfsvattr *nap, void *nvaper, vaper->va_mtime = np->n_mtim; } } +#ifdef KDTRACE_HOOKS + if (np->n_attrstamp != 0) + KDTRACE_NFS_ATTRCACHE_LOAD_DONE(vp, vap, 0); +#endif NFSUNLOCKNODE(np); return (0); } @@ -500,7 +529,7 @@ nfscl_fillclid(u_int64_t clval, char *uuid, u_int8_t *cp, u_int16_t idlen) * Fill in a lock owner name. For now, pid + the process's creation time. */ void -nfscl_filllockowner(struct thread *td, u_int8_t *cp) +nfscl_filllockowner(void *id, u_int8_t *cp, int flags) { union { u_int32_t lval; @@ -508,37 +537,35 @@ nfscl_filllockowner(struct thread *td, u_int8_t *cp) } tl; struct proc *p; -if (td == NULL) { - printf("NULL td\n"); - bzero(cp, 12); - return; -} - p = td->td_proc; -if (p == NULL) { - printf("NULL pid\n"); - bzero(cp, 12); - return; -} - tl.lval = p->p_pid; - *cp++ = tl.cval[0]; - *cp++ = tl.cval[1]; - *cp++ = tl.cval[2]; - *cp++ = tl.cval[3]; -if (p->p_stats == NULL) { - printf("pstats null\n"); - bzero(cp, 8); - return; -} - tl.lval = p->p_stats->p_start.tv_sec; - *cp++ = tl.cval[0]; - *cp++ = tl.cval[1]; - *cp++ = tl.cval[2]; - *cp++ = tl.cval[3]; - tl.lval = p->p_stats->p_start.tv_usec; - *cp++ = tl.cval[0]; - *cp++ = tl.cval[1]; - *cp++ = tl.cval[2]; - *cp = tl.cval[3]; + if (id == NULL) { + printf("NULL id\n"); + bzero(cp, NFSV4CL_LOCKNAMELEN); + return; + } + if ((flags & F_POSIX) != 0) { + p = (struct proc *)id; + tl.lval = p->p_pid; + *cp++ = tl.cval[0]; + *cp++ = tl.cval[1]; + *cp++ = tl.cval[2]; + *cp++ = tl.cval[3]; + tl.lval = p->p_stats->p_start.tv_sec; + *cp++ = tl.cval[0]; + *cp++ = tl.cval[1]; + *cp++ = tl.cval[2]; + *cp++ = tl.cval[3]; + tl.lval = p->p_stats->p_start.tv_usec; + *cp++ = tl.cval[0]; + *cp++ = tl.cval[1]; + *cp++ = tl.cval[2]; + *cp = tl.cval[3]; + } else if ((flags & F_FLOCK) != 0) { + bcopy(&id, cp, sizeof(id)); + bzero(&cp[sizeof(id)], NFSV4CL_LOCKNAMELEN - sizeof(id)); + } else { + printf("nfscl_filllockowner: not F_POSIX or F_FLOCK\n"); + bzero(cp, NFSV4CL_LOCKNAMELEN); + } } /* @@ -943,6 +970,7 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p) sad.sin_family = AF_INET; sad.sin_len = sizeof (struct sockaddr_in); sad.sin_addr.s_addr = sin->sin_addr.s_addr; + CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); rt = rtalloc1((struct sockaddr *)&sad, 0, 0UL); if (rt != NULL) { if (rt->rt_ifp != NULL && @@ -956,6 +984,7 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p) } RTFREE_LOCKED(rt); } + CURVNET_RESTORE(); #ifdef INET6 } else if (nmp->nm_nam->sa_family == AF_INET6) { struct sockaddr_in6 sad6, *sin6; @@ -966,6 +995,7 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p) sad6.sin6_family = AF_INET6; sad6.sin6_len = sizeof (struct sockaddr_in6); sad6.sin6_addr = sin6->sin6_addr; + CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); rt = rtalloc1((struct sockaddr *)&sad6, 0, 0UL); if (rt != NULL) { if (rt->rt_ifp != NULL && @@ -980,6 +1010,7 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p) } RTFREE_LOCKED(rt); } + CURVNET_RESTORE(); #endif } return (retp); diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c index 7af0852..5d83d0b 100644 --- a/sys/fs/nfsclient/nfs_clrpcops.c +++ b/sys/fs/nfsclient/nfs_clrpcops.c @@ -68,7 +68,7 @@ static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *); -static int nfsrpc_writerpc(vnode_t , struct uio *, int *, u_char *, +static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *, struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *); static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *, @@ -1369,7 +1369,7 @@ nfsmout: * will then deadlock. */ APPLESTATIC int -nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, u_char *verfp, +nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff, int called_from_strategy) { @@ -1382,6 +1382,7 @@ nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, u_char *verfp, nfsv4stateid_t stateid; void *lckp; + *must_commit = 0; if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; newcred = cred; @@ -1412,7 +1413,7 @@ nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, u_char *verfp, if (nostateid) error = 0; else - error = nfsrpc_writerpc(vp, uiop, iomode, verfp, + error = nfsrpc_writerpc(vp, uiop, iomode, must_commit, newcred, &stateid, p, nap, attrflagp, stuff); if (error == NFSERR_STALESTATEID) nfscl_initiate_recovery(nmp->nm_clp); @@ -1447,7 +1448,7 @@ nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, u_char *verfp, */ static int nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode, - u_char *verfp, struct ucred *cred, nfsv4stateid_t *stateidp, + int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; @@ -1585,14 +1586,16 @@ nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode, else if (committed == NFSWRITE_DATASYNC && commit == NFSWRITE_UNSTABLE) committed = commit; - if (verfp != NULL) - NFSBCOPY((caddr_t)tl, verfp, NFSX_VERF); NFSLOCKMNT(nmp); if (!NFSHASWRITEVERF(nmp)) { NFSBCOPY((caddr_t)tl, (caddr_t)&nmp->nm_verf[0], NFSX_VERF); NFSSETWRITEVERF(nmp); + } else if (NFSBCMP(tl, nmp->nm_verf, + NFSX_VERF)) { + *must_commit = 1; + NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); } NFSUNLOCKMNT(nmp); } @@ -3456,7 +3459,7 @@ nfsmout: */ APPLESTATIC int nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, - int reclaim, struct ucred *cred, NFSPROC_T *p) + int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags) { struct nfscllockowner *lp; struct nfsclclient *clp; @@ -3508,11 +3511,11 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, error = nfscl_getcl(vp, cred, p, &clp); if (error) return (error); - error = nfscl_lockt(vp, clp, off, len, fl, p); + error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags); if (!error) { clidrev = clp->nfsc_clientidrev; error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred, - p); + p, id, flags); } else if (error == -1) { error = 0; } @@ -3527,7 +3530,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, return (error); do { error = nfscl_relbytelock(vp, off, len, cred, p, callcnt, - clp, &lp, &dorpc); + clp, id, flags, &lp, &dorpc); /* * If it returns a NULL lp, we're done. */ @@ -3535,7 +3538,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, if (callcnt == 0) nfscl_clientrelease(clp); else - nfscl_releasealllocks(clp, vp, p); + nfscl_releasealllocks(clp, vp, p, id, flags); return (error); } if (nmp->nm_clp != NULL) @@ -3569,10 +3572,10 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, } callcnt++; } while (error == 0 && nd->nd_repstat == 0); - nfscl_releasealllocks(clp, vp, p); + nfscl_releasealllocks(clp, vp, p, id, flags); } else if (op == F_SETLK) { error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p, - NULL, 0, NULL, NULL, &lp, &newone, &donelocally); + NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally); if (error || donelocally) { return (error); } @@ -3622,7 +3625,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, APPLESTATIC int nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp, struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl, - struct ucred *cred, NFSPROC_T *p) + struct ucred *cred, NFSPROC_T *p, void *id, int flags) { u_int32_t *tl; int error, type, size; @@ -3640,7 +3643,7 @@ nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp, tl += 2; *tl++ = clp->nfsc_clientid.lval[0]; *tl = clp->nfsc_clientid.lval[1]; - nfscl_filllockowner(p, own); + nfscl_filllockowner(id, own, flags); (void) nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN); error = nfscl_request(nd, vp, p, cred, NULL); if (error) diff --git a/sys/fs/nfsclient/nfs_clstate.c b/sys/fs/nfsclient/nfs_clstate.c index 8e9aa6a..aa81437 100644 --- a/sys/fs/nfsclient/nfs_clstate.c +++ b/sys/fs/nfsclient/nfs_clstate.c @@ -226,7 +226,7 @@ nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg, * If none found, add the new one or return error, depending upon * "create". */ - nfscl_filllockowner(p, own); + nfscl_filllockowner(p->td_proc, own, F_POSIX); NFSLOCKCLSTATE(); dp = NULL; /* First check the delegation list */ @@ -521,7 +521,7 @@ nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode, * If p != NULL, we want to search the parentage tree * for a matching OpenOwner and use that. */ - nfscl_filllockowner(p, own); + nfscl_filllockowner(p->td_proc, own, F_POSIX); error = nfscl_getopen(&clp->nfsc_owner, nfhp, fhlen, NULL, p, mode, NULL, &op); if (error == 0) { @@ -596,7 +596,7 @@ nfscl_getopen(struct nfsclownerhead *ohp, u_int8_t *nfhp, int fhlen, op = NULL; while (op == NULL && (nproc != NULL || rown != NULL)) { if (nproc != NULL) { - nfscl_filllockowner(nproc, own); + nfscl_filllockowner(nproc->td_proc, own, F_POSIX); ownp = own; } else { ownp = rown; @@ -687,11 +687,14 @@ nfscl_getcl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct nfsclclient *clp; struct nfsclclient *newclp = NULL; struct nfscllockowner *lp, *nlp; - struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + struct mount *mp; + struct nfsmount *nmp; char uuid[HOSTUUIDLEN]; int igotlock = 0, error, trystalecnt, clidinusedelay, i; u_int16_t idlen = 0; + mp = vnode_mount(vp); + nmp = VFSTONFS(mp); if (cred != NULL) { getcredhostuuid(cred, uuid, sizeof uuid); idlen = strlen(uuid); @@ -704,6 +707,17 @@ nfscl_getcl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, M_WAITOK); } NFSLOCKCLSTATE(); + /* + * If a forced dismount is already in progress, don't + * allocate a new clientid and get out now. For the case where + * clp != NULL, this is a harmless optimization. + */ + if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { + NFSUNLOCKCLSTATE(); + if (newclp != NULL) + free(newclp, M_NFSCLCLIENT); + return (EBADF); + } clp = nmp->nm_clp; if (clp == NULL) { if (newclp == NULL) { @@ -736,9 +750,21 @@ nfscl_getcl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, NFSLOCKCLSTATE(); while ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0 && !igotlock) igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL, - NFSCLSTATEMUTEXPTR); + NFSCLSTATEMUTEXPTR, mp); if (!igotlock) - nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR); + nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, mp); + if (igotlock == 0 && (mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { + /* + * Both nfsv4_lock() and nfsv4_getref() know to check + * for MNTK_UNMOUNTF and return without sleeping to + * wait for the exclusive lock to be released, since it + * might be held by nfscl_umount() and we need to get out + * now for that case and not wait until nfscl_umount() + * releases it. + */ + NFSUNLOCKCLSTATE(); + return (EBADF); + } NFSUNLOCKCLSTATE(); /* @@ -855,7 +881,7 @@ nfscl_clientrelease(struct nfsclclient *clp) APPLESTATIC int nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p, struct nfsclclient *rclp, - int recovery, u_int8_t *rownp, u_int8_t *ropenownp, + int recovery, void *id, int flags, u_int8_t *rownp, u_int8_t *ropenownp, struct nfscllockowner **lpp, int *newonep, int *donelocallyp) { struct nfscllockowner *lp; @@ -916,7 +942,7 @@ nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len, if (recovery) { ownp = rownp; } else { - nfscl_filllockowner(p, own); + nfscl_filllockowner(id, own, flags); ownp = own; } if (!recovery) { @@ -1053,7 +1079,8 @@ nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len, APPLESTATIC int nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len, __unused struct ucred *cred, NFSPROC_T *p, int callcnt, - struct nfsclclient *clp, struct nfscllockowner **lpp, int *dorpcp) + struct nfsclclient *clp, void *id, int flags, + struct nfscllockowner **lpp, int *dorpcp) { struct nfscllockowner *lp; struct nfsclowner *owp; @@ -1090,7 +1117,7 @@ nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len, sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK); *other_lop = *nlop; } - nfscl_filllockowner(p, own); + nfscl_filllockowner(id, own, flags); dp = NULL; NFSLOCKCLSTATE(); if (callcnt == 0) @@ -1162,7 +1189,8 @@ nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len, * Release all lockowners marked in progess for this process and file. */ APPLESTATIC void -nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p) +nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p, + void *id, int flags) { struct nfsclowner *owp; struct nfsclopen *op; @@ -1171,7 +1199,7 @@ nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p) u_int8_t own[NFSV4CL_LOCKNAMELEN]; np = VTONFS(vp); - nfscl_filllockowner(p, own); + nfscl_filllockowner(id, own, flags); NFSLOCKCLSTATE(); LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { @@ -1200,7 +1228,7 @@ nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p) */ APPLESTATIC int nfscl_checkwritelocked(vnode_t vp, struct flock *fl, - struct ucred *cred, NFSPROC_T *p) + struct ucred *cred, NFSPROC_T *p, void *id, int flags) { struct nfsclowner *owp; struct nfscllockowner *lp; @@ -1240,7 +1268,7 @@ nfscl_checkwritelocked(vnode_t vp, struct flock *fl, error = nfscl_getcl(vp, cred, p, &clp); if (error) return (1); - nfscl_filllockowner(p, own); + nfscl_filllockowner(id, own, flags); NFSLOCKCLSTATE(); /* @@ -1615,7 +1643,7 @@ nfscl_cleanup(NFSPROC_T *p) if (!nfscl_inited) return; - nfscl_filllockowner(p, own); + nfscl_filllockowner(p->td_proc, own, F_POSIX); NFSLOCKCLSTATE(); /* @@ -1713,6 +1741,7 @@ nfscl_cleanupkext(struct nfsclclient *clp) } #endif /* APPLEKEXT || __FreeBSD__ */ +static int fake_global; /* Used to force visibility of MNTK_UNMOUNTF */ /* * Called from nfs umount to free up the clientid. */ @@ -1723,6 +1752,33 @@ nfscl_umount(struct nfsmount *nmp, NFSPROC_T *p) struct ucred *cred; int igotlock; + /* + * For the case that matters, this is the thread that set + * MNTK_UNMOUNTF, so it will see it set. The code that follows is + * done to ensure that any thread executing nfscl_getcl() after + * this time, will see MNTK_UNMOUNTF set. nfscl_getcl() uses the + * mutex for NFSLOCKCLSTATE(), so it is "m" for the following + * explanation, courtesy of Alan Cox. + * What follows is a snippet from Alan Cox's email at: + * http://docs.FreeBSD.org/cgi/ + * mid.cgi?BANLkTikR3d65zPHo9==08ZfJ2vmqZucEvw + * + * 1. Set MNTK_UNMOUNTF + * 2. Acquire a standard FreeBSD mutex "m". + * 3. Update some data structures. + * 4. Release mutex "m". + * + * Then, other threads that acquire "m" after step 4 has occurred will + * see MNTK_UNMOUNTF as set. But, other threads that beat thread X to + * step 2 may or may not see MNTK_UNMOUNTF as set. + */ + NFSLOCKCLSTATE(); + if ((nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { + fake_global++; + NFSUNLOCKCLSTATE(); + NFSLOCKCLSTATE(); + } + clp = nmp->nm_clp; if (clp != NULL) { if ((clp->nfsc_flags & NFSCLFLAGS_INITED) == 0) @@ -1734,12 +1790,16 @@ nfscl_umount(struct nfsmount *nmp, NFSPROC_T *p) */ clp->nfsc_flags |= NFSCLFLAGS_UMOUNT; while (clp->nfsc_flags & NFSCLFLAGS_HASTHREAD) - (void) tsleep((caddr_t)clp, PWAIT, "nfsclumnt", hz); + (void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT, + "nfsclumnt", hz); - NFSLOCKCLSTATE(); + /* + * Now, get the exclusive lock on the client state, so + * that no uses of the state are still in progress. + */ do { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL, - NFSCLSTATEMUTEXPTR); + NFSCLSTATEMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKCLSTATE(); @@ -1756,8 +1816,8 @@ nfscl_umount(struct nfsmount *nmp, NFSPROC_T *p) nmp->nm_clp = NULL; NFSFREECRED(cred); FREE((caddr_t)clp, M_NFSCLCLIENT); - } - + } else + NFSUNLOCKCLSTATE(); } /* @@ -1790,7 +1850,7 @@ nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG; do { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL, - NFSCLSTATEMUTEXPTR); + NFSCLSTATEMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKCLSTATE(); @@ -2105,7 +2165,7 @@ nfscl_hasexpired(struct nfsclclient *clp, u_int32_t clidrev, NFSPROC_T *p) clp->nfsc_flags |= NFSCLFLAGS_EXPIREIT; do { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL, - NFSCLSTATEMUTEXPTR); + NFSCLSTATEMUTEXPTR, NULL); } while (!igotlock && (clp->nfsc_flags & NFSCLFLAGS_EXPIREIT)); if ((clp->nfsc_flags & NFSCLFLAGS_EXPIREIT) == 0) { if (igotlock) @@ -2464,7 +2524,7 @@ tryagain: } while (!igotlock) { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, - &islept, NFSCLSTATEMUTEXPTR); + &islept, NFSCLSTATEMUTEXPTR, NULL); if (islept) goto tryagain; } @@ -2556,14 +2616,18 @@ tryagain: } #endif /* APPLEKEXT || __FreeBSD__ */ + NFSLOCKCLSTATE(); if ((clp->nfsc_flags & NFSCLFLAGS_RECOVER) == 0) - (void) tsleep((caddr_t)clp, PWAIT, "nfscl", hz); + (void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT, "nfscl", + hz); if (clp->nfsc_flags & NFSCLFLAGS_UMOUNT) { - NFSFREECRED(cred); clp->nfsc_flags &= ~NFSCLFLAGS_HASTHREAD; + NFSUNLOCKCLSTATE(); + NFSFREECRED(cred); wakeup((caddr_t)clp); return; } + NFSUNLOCKCLSTATE(); } } @@ -3260,7 +3324,7 @@ nfscl_checkconflict(struct nfscllockownerhead *lhp, struct nfscllock *nlop, */ APPLESTATIC int nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off, - u_int64_t len, struct flock *fl, NFSPROC_T *p) + u_int64_t len, struct flock *fl, NFSPROC_T *p, void *id, int flags) { struct nfscllock *lop, nlck; struct nfscldeleg *dp; @@ -3278,7 +3342,7 @@ nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off, return (NFSERR_INVAL); } np = VTONFS(vp); - nfscl_filllockowner(p, own); + nfscl_filllockowner(id, own, flags); NFSLOCKCLSTATE(); dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, @@ -3553,7 +3617,7 @@ nfscl_relock(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp, off = lop->nfslo_first; len = lop->nfslo_end - lop->nfslo_first; error = nfscl_getbytelock(vp, off, len, lop->nfslo_type, cred, p, - clp, 1, lp->nfsl_owner, lp->nfsl_openowner, &nlp, &newone, + clp, 1, NULL, 0, lp->nfsl_owner, lp->nfsl_openowner, &nlp, &newone, &donelocally); if (error || donelocally) return (error); @@ -3864,7 +3928,7 @@ nfscl_removedeleg(vnode_t vp, NFSPROC_T *p, nfsv4stateid_t *stp) islept = 0; while (!igotlock) { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, - &islept, NFSCLSTATEMUTEXPTR); + &islept, NFSCLSTATEMUTEXPTR, NULL); if (islept) break; } @@ -3963,7 +4027,7 @@ nfscl_renamedeleg(vnode_t fvp, nfsv4stateid_t *fstp, int *gotfdp, vnode_t tvp, islept = 0; while (!igotlock) { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, - &islept, NFSCLSTATEMUTEXPTR); + &islept, NFSCLSTATEMUTEXPTR, NULL); if (islept) break; } @@ -4043,7 +4107,7 @@ nfscl_getref(struct nfsmount *nmp) NFSUNLOCKCLSTATE(); return (0); } - nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR); + nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, NULL); NFSUNLOCKCLSTATE(); return (1); } diff --git a/sys/fs/nfsclient/nfs_clsubs.c b/sys/fs/nfsclient/nfs_clsubs.c index 0c05631..214dfb7 100644 --- a/sys/fs/nfsclient/nfs_clsubs.c +++ b/sys/fs/nfsclient/nfs_clsubs.c @@ -35,6 +35,8 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include "opt_kdtrace.h" + /* * These functions support the macros and help fiddle mbuf chains for * the nfs op functions. They do things like create the rpc header and @@ -68,6 +70,7 @@ __FBSDID("$FreeBSD$"); #include <fs/nfsclient/nfsnode.h> #include <fs/nfsclient/nfsmount.h> #include <fs/nfsclient/nfs.h> +#include <fs/nfsclient/nfs_kdtrace.h> #include <netinet/in.h> @@ -238,6 +241,7 @@ ncl_getattrcache(struct vnode *vp, struct vattr *vaper) #ifdef NFS_ACDEBUG mtx_unlock(&Giant); /* ncl_printf() */ #endif + KDTRACE_NFS_ATTRCACHE_GET_MISS(vp); return( ENOENT); } newnfsstats.attrcache_hits++; @@ -267,6 +271,7 @@ ncl_getattrcache(struct vnode *vp, struct vattr *vaper) #ifdef NFS_ACDEBUG mtx_unlock(&Giant); /* ncl_printf() */ #endif + KDTRACE_NFS_ATTRCACHE_GET_HIT(vp, vap); return (0); } diff --git a/sys/fs/nfsclient/nfs_clvfsops.c b/sys/fs/nfsclient/nfs_clvfsops.c index 84725dd..d962a54 100644 --- a/sys/fs/nfsclient/nfs_clvfsops.c +++ b/sys/fs/nfsclient/nfs_clvfsops.c @@ -1458,10 +1458,20 @@ nfs_sync(struct mount *mp, int waitfor) td = curthread; + MNT_ILOCK(mp); + /* + * If a forced dismount is in progress, return from here so that + * the umount(2) syscall doesn't get stuck in VFS_SYNC() before + * calling VFS_UNMOUNT(). + */ + if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { + MNT_IUNLOCK(mp); + return (EBADF); + } + /* * Force stale buffer cache information to be flushed. */ - MNT_ILOCK(mp); loop: MNT_VNODE_FOREACH(vp, mp, mvp) { VI_LOCK(vp); diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index 1b08582..0a1d6a2 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); * vnode op calls for Sun NFS version 2, 3 and 4 */ +#include "opt_kdtrace.h" #include "opt_inet.h" #include <sys/param.h> @@ -70,6 +71,7 @@ __FBSDID("$FreeBSD$"); #include <fs/nfsclient/nfsnode.h> #include <fs/nfsclient/nfsmount.h> #include <fs/nfsclient/nfs.h> +#include <fs/nfsclient/nfs_kdtrace.h> #include <net/if.h> #include <netinet/in.h> @@ -77,6 +79,24 @@ __FBSDID("$FreeBSD$"); #include <nfs/nfs_lock.h> +#ifdef KDTRACE_HOOKS +#include <sys/dtrace_bsd.h> + +dtrace_nfsclient_accesscache_flush_probe_func_t + dtrace_nfscl_accesscache_flush_done_probe; +uint32_t nfscl_accesscache_flush_done_id; + +dtrace_nfsclient_accesscache_get_probe_func_t + dtrace_nfscl_accesscache_get_hit_probe, + dtrace_nfscl_accesscache_get_miss_probe; +uint32_t nfscl_accesscache_get_hit_id; +uint32_t nfscl_accesscache_get_miss_id; + +dtrace_nfsclient_accesscache_load_probe_func_t + dtrace_nfscl_accesscache_load_done_probe; +uint32_t nfscl_accesscache_load_done_id; +#endif /* !KDTRACE_HOOKS */ + /* Defs */ #define TRUE 1 #define FALSE 0 @@ -298,9 +318,15 @@ nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td, mtx_unlock(&np->n_mtx); if (retmode != NULL) *retmode = rmode; + KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0); } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } +#ifdef KDTRACE_HOOKS + if (error != 0) + KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0, + error); +#endif return (error); } @@ -397,6 +423,14 @@ nfs_access(struct vop_access_args *ap) } } mtx_unlock(&np->n_mtx); +#ifdef KDTRACE_HOOKS + if (gotahit != 0) + KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp, + ap->a_cred->cr_uid, mode); + else + KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp, + ap->a_cred->cr_uid, mode); +#endif if (gotahit == 0) { /* * Either a no, or a don't know. Go to the wire. @@ -507,6 +541,7 @@ nfs_open(struct vop_open_args *ap) } mtx_lock(&np->n_mtx); np->n_attrstamp = 0; + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); if (vp->v_type == VDIR) np->n_direofoffset = 0; mtx_unlock(&np->n_mtx); @@ -692,8 +727,10 @@ nfs_close(struct vop_close_args *ap) * is the cause of some caching/coherency issue that might * crop up.) */ - if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) + if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) { np->n_attrstamp = 0; + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); + } if (np->n_flag & NWRITEERR) { np->n_flag &= ~NWRITEERR; error = np->n_error; @@ -949,6 +986,7 @@ nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred, np->n_accesscache[i].stamp = 0; np->n_flag |= NDELEGMOD; mtx_unlock(&np->n_mtx); + KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp); } error = nfsrpc_setattr(vp, vap, NULL, cred, td, &nfsva, &attrflag, NULL); @@ -1030,6 +1068,7 @@ nfs_lookup(struct vop_lookup_args *ap) !(newnp->n_flag & NMODIFIED)) { mtx_lock(&newnp->n_mtx); newnp->n_attrstamp = 0; + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); mtx_unlock(&newnp->n_mtx); } if (nfscl_nodeleg(newvp, 0) == 0 || @@ -1233,6 +1272,7 @@ nfs_lookup(struct vop_lookup_args *ap) */ mtx_lock(&np->n_mtx); np->n_attrstamp = 0; + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); mtx_unlock(&np->n_mtx); } } @@ -1332,19 +1372,9 @@ ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, { struct nfsvattr nfsva; int error = 0, attrflag, ret; - u_char verf[NFSX_VERF]; - struct nfsmount *nmp = VFSTONFS(vp->v_mount); - *must_commit = 0; - error = nfsrpc_write(vp, uiop, iomode, verf, cred, + error = nfsrpc_write(vp, uiop, iomode, must_commit, cred, uiop->uio_td, &nfsva, &attrflag, NULL, called_from_strategy); - NFSLOCKMNT(nmp); - if (!error && NFSHASWRITEVERF(nmp) && - NFSBCMP(verf, nmp->nm_verf, NFSX_VERF)) { - *must_commit = 1; - NFSBCOPY(verf, nmp->nm_verf, NFSX_VERF); - } - NFSUNLOCKMNT(nmp); if (attrflag) { if (VTONFS(vp)->n_flag & ND_NFSV4) ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 1, @@ -1422,8 +1452,10 @@ nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, dnp = VTONFS(dvp); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; - if (!dattrflag) + if (!dattrflag) { dnp->n_attrstamp = 0; + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); + } mtx_unlock(&dnp->n_mtx); return (error); } @@ -1576,8 +1608,10 @@ again: } mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; - if (!dattrflag) + if (!dattrflag) { dnp->n_attrstamp = 0; + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); + } mtx_unlock(&dnp->n_mtx); return (error); } @@ -1640,6 +1674,7 @@ nfs_remove(struct vop_remove_args *ap) mtx_lock(&np->n_mtx); np->n_attrstamp = 0; mtx_unlock(&np->n_mtx); + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); return (error); } @@ -1688,8 +1723,10 @@ nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); mtx_lock(&dnp->n_mtx); dnp->n_flag |= NMODIFIED; - if (!dattrflag) + if (!dattrflag) { dnp->n_attrstamp = 0; + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); + } mtx_unlock(&dnp->n_mtx); if (error && NFS_ISV4(dvp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); @@ -1867,6 +1904,7 @@ nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr, } else { fdnp->n_attrstamp = 0; mtx_unlock(&fdnp->n_mtx); + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp); } mtx_lock(&tdnp->n_mtx); tdnp->n_flag |= NMODIFIED; @@ -1876,6 +1914,7 @@ nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr, } else { tdnp->n_attrstamp = 0; mtx_unlock(&tdnp->n_mtx); + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); } if (error && NFS_ISV4(fdvp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); @@ -1918,6 +1957,7 @@ nfs_link(struct vop_link_args *ap) } else { tdnp->n_attrstamp = 0; mtx_unlock(&tdnp->n_mtx); + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); } if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); @@ -1926,6 +1966,7 @@ nfs_link(struct vop_link_args *ap) mtx_lock(&np->n_mtx); np->n_attrstamp = 0; mtx_unlock(&np->n_mtx); + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); } /* * If negative lookup caching is enabled, I might as well @@ -2012,6 +2053,7 @@ nfs_symlink(struct vop_symlink_args *ap) } else { dnp->n_attrstamp = 0; mtx_unlock(&dnp->n_mtx); + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } return (error); } @@ -2047,6 +2089,7 @@ nfs_mkdir(struct vop_mkdir_args *ap) } else { dnp->n_attrstamp = 0; mtx_unlock(&dnp->n_mtx); + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } if (nfhp) { ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, @@ -2115,6 +2158,7 @@ nfs_rmdir(struct vop_rmdir_args *ap) } else { dnp->n_attrstamp = 0; mtx_unlock(&dnp->n_mtx); + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); } cache_purge(dvp); @@ -2480,10 +2524,12 @@ ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, error = nfsrpc_commit(vp, offset, cnt, cred, td, verf, &nfsva, &attrflag, NULL); if (!error) { + mtx_lock(&nmp->nm_mtx); if (NFSBCMP((caddr_t)nmp->nm_verf, verf, NFSX_VERF)) { NFSBCOPY(verf, (caddr_t)nmp->nm_verf, NFSX_VERF); error = NFSERR_STALEWRITEVERF; } + mtx_unlock(&nmp->nm_mtx); if (!error && attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); @@ -2892,8 +2938,11 @@ nfs_advlock(struct vop_advlock_args *ap) int ret, error = EOPNOTSUPP; u_quad_t size; - if (NFS_ISV4(vp) && (ap->a_flags & F_POSIX)) { - cred = p->p_ucred; + if (NFS_ISV4(vp) && (ap->a_flags & (F_POSIX | F_FLOCK)) != 0) { + if ((ap->a_flags & F_POSIX) != 0) + cred = p->p_ucred; + else + cred = td->td_ucred; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_iflag & VI_DOOMED) { VOP_UNLOCK(vp, 0); @@ -2906,7 +2955,8 @@ nfs_advlock(struct vop_advlock_args *ap) * RFC3530 Sec. 9.3.2. */ if (ap->a_op == F_UNLCK && - nfscl_checkwritelocked(vp, ap->a_fl, cred, td)) + nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id, + ap->a_flags)) (void) ncl_flush(vp, MNT_WAIT, cred, td, 1, 0); /* @@ -2915,7 +2965,7 @@ nfs_advlock(struct vop_advlock_args *ap) */ do { ret = nfsrpc_advlock(vp, np->n_size, ap->a_op, - ap->a_fl, 0, cred, td); + ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags); if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && ap->a_op == F_SETLK) { VOP_UNLOCK(vp, 0); @@ -2950,12 +3000,14 @@ nfs_advlock(struct vop_advlock_args *ap) if (ap->a_op == F_SETLK) { if ((np->n_flag & NMODIFIED) == 0) { np->n_attrstamp = 0; + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); ret = VOP_GETATTR(vp, &va, cred); } if ((np->n_flag & NMODIFIED) || ret || np->n_change != va.va_filerev) { (void) ncl_vinvalbuf(vp, V_SAVE, td, 1); np->n_attrstamp = 0; + KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); ret = VOP_GETATTR(vp, &va, cred); if (!ret) { np->n_mtime = va.va_mtime; @@ -3301,7 +3353,13 @@ nfs_pathconf(struct vop_pathconf_args *ap) struct thread *td = curthread; int attrflag, error; - if (NFS_ISV34(vp)) { + if (NFS_ISV4(vp) || (NFS_ISV3(vp) && (ap->a_name == _PC_LINK_MAX || + ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED || + ap->a_name == _PC_NO_TRUNC))) { + /* + * Since only the above 4 a_names are returned by the NFSv3 + * Pathconf RPC, there is no point in doing it for others. + */ error = nfsrpc_pathconf(vp, &pc, td->td_ucred, td, &nfsva, &attrflag, NULL); if (attrflag != 0) @@ -3310,7 +3368,10 @@ nfs_pathconf(struct vop_pathconf_args *ap) if (error != 0) return (error); } else { - /* For NFSv2, just fake them. */ + /* + * For NFSv2 (or NFSv3 when not one of the above 4 a_names), + * just fake them. + */ pc.pc_linkmax = LINK_MAX; pc.pc_namemax = NFS_MAXNAMLEN; pc.pc_notrunc = 1; diff --git a/sys/fs/nfsclient/nfs_kdtrace.h b/sys/fs/nfsclient/nfs_kdtrace.h new file mode 100644 index 0000000..f8f143f --- /dev/null +++ b/sys/fs/nfsclient/nfs_kdtrace.h @@ -0,0 +1,120 @@ +/*- + * Copyright (c) 2009 Robert N. M. Watson + * All rights reserved. + * + * This software was developed at the University of Cambridge Computer + * Laboratory with support from a grant from Google, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NFSCL_NFS_KDTRACE_H_ +#define _NFSCL_NFS_KDTRACE_H_ + +#ifdef KDTRACE_HOOKS +#include <sys/dtrace_bsd.h> + +/* + * Definitions for NFS access cache probes. + */ +extern uint32_t nfscl_accesscache_flush_done_id; +extern uint32_t nfscl_accesscache_get_hit_id; +extern uint32_t nfscl_accesscache_get_miss_id; +extern uint32_t nfscl_accesscache_load_done_id; + +#define KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp) do { \ + if (dtrace_nfscl_accesscache_flush_done_probe != NULL) \ + (dtrace_nfscl_accesscache_flush_done_probe)( \ + nfscl_accesscache_flush_done_id, (vp)); \ +} while (0) + +#define KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp, uid, mode) do { \ + if (dtrace_nfscl_accesscache_get_hit_probe != NULL) \ + (dtrace_nfscl_accesscache_get_hit_probe)( \ + nfscl_accesscache_get_hit_id, (vp), (uid), \ + (mode)); \ +} while (0) + +#define KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp, uid, mode) do { \ + if (dtrace_nfscl_accesscache_get_miss_probe != NULL) \ + (dtrace_nfscl_accesscache_get_miss_probe)( \ + nfscl_accesscache_get_miss_id, (vp), (uid), \ + (mode)); \ +} while (0) + +#define KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, uid, rmode, error) do { \ + if (dtrace_nfscl_accesscache_load_done_probe != NULL) \ + (dtrace_nfscl_accesscache_load_done_probe)( \ + nfscl_accesscache_load_done_id, (vp), (uid), \ + (rmode), (error)); \ +} while (0) + +/* + * Definitions for NFS attribute cache probes. + */ +extern uint32_t nfscl_attrcache_flush_done_id; +extern uint32_t nfscl_attrcache_get_hit_id; +extern uint32_t nfscl_attrcache_get_miss_id; +extern uint32_t nfscl_attrcache_load_done_id; + +#define KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp) do { \ + if (dtrace_nfscl_attrcache_flush_done_probe != NULL) \ + (dtrace_nfscl_attrcache_flush_done_probe)( \ + nfscl_attrcache_flush_done_id, (vp)); \ +} while (0) + +#define KDTRACE_NFS_ATTRCACHE_GET_HIT(vp, vap) do { \ + if (dtrace_nfscl_attrcache_get_hit_probe != NULL) \ + (dtrace_nfscl_attrcache_get_hit_probe)( \ + nfscl_attrcache_get_hit_id, (vp), (vap)); \ +} while (0) + +#define KDTRACE_NFS_ATTRCACHE_GET_MISS(vp) do { \ + if (dtrace_nfscl_attrcache_get_miss_probe != NULL) \ + (dtrace_nfscl_attrcache_get_miss_probe)( \ + nfscl_attrcache_get_miss_id, (vp)); \ +} while (0) + +#define KDTRACE_NFS_ATTRCACHE_LOAD_DONE(vp, vap, error) do { \ + if (dtrace_nfscl_attrcache_load_done_probe != NULL) \ + (dtrace_nfscl_attrcache_load_done_probe)( \ + nfscl_attrcache_load_done_id, (vp), (vap), \ + (error)); \ +} while (0) + +#else /* !KDTRACE_HOOKS */ + +#define KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp) +#define KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp, uid, mode) +#define KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp, uid, mode) +#define KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, uid, rmode, error) + +#define KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp) +#define KDTRACE_NFS_ATTRCACHE_GET_HIT(vp, vap) +#define KDTRACE_NFS_ATTRCACHE_GET_MISS(vp) +#define KDTRACE_NFS_ATTRCACHE_LOAD_DONE(vp, vap, error) + +#endif /* KDTRACE_HOOKS */ + +#endif /* !_NFSCL_NFS_KDTRACE_H_ */ diff --git a/sys/fs/nfsserver/nfs_nfsdcache.c b/sys/fs/nfsserver/nfs_nfsdcache.c index bcffd6c..fc513a3 100644 --- a/sys/fs/nfsserver/nfs_nfsdcache.c +++ b/sys/fs/nfsserver/nfs_nfsdcache.c @@ -405,6 +405,7 @@ nfsrvd_updatecache(struct nfsrv_descript *nd, struct socket *so) { struct nfsrvcache *rp; struct nfsrvcache *retrp = NULL; + mbuf_t m; rp = nd->nd_rp; if (!rp) @@ -457,9 +458,9 @@ nfsrvd_updatecache(struct nfsrv_descript *nd, struct socket *so) } if ((nd->nd_flag & ND_NFSV2) && nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) { - NFSUNLOCKCACHE(); rp->rc_status = nd->nd_repstat; rp->rc_flag |= RC_REPSTATUS; + NFSUNLOCKCACHE(); } else { if (!(rp->rc_flag & RC_UDP)) { nfsrc_tcpsavedreplies++; @@ -469,9 +470,11 @@ nfsrvd_updatecache(struct nfsrv_descript *nd, struct socket *so) nfsrc_tcpsavedreplies; } NFSUNLOCKCACHE(); - rp->rc_reply = m_copym(nd->nd_mreq, 0, M_COPYALL, - M_WAIT); + m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAIT); + NFSLOCKCACHE(); + rp->rc_reply = m; rp->rc_flag |= RC_REPMBUF; + NFSUNLOCKCACHE(); } if (rp->rc_flag & RC_UDP) { rp->rc_timestamp = NFSD_MONOSEC + @@ -518,6 +521,7 @@ nfsrvd_delcache(struct nfsrvcache *rp) APPLESTATIC void nfsrvd_sentcache(struct nfsrvcache *rp, struct socket *so, int err) { + tcp_seq tmp_seq; if (!(rp->rc_flag & RC_LOCKED)) panic("nfsrvd_sentcache not locked"); @@ -526,8 +530,12 @@ nfsrvd_sentcache(struct nfsrvcache *rp, struct socket *so, int err) so->so_proto->pr_domain->dom_family != AF_INET6) || so->so_proto->pr_protocol != IPPROTO_TCP) panic("nfs sent cache"); - if (nfsrv_getsockseqnum(so, &rp->rc_tcpseq)) + if (nfsrv_getsockseqnum(so, &tmp_seq)) { + NFSLOCKCACHE(); + rp->rc_tcpseq = tmp_seq; rp->rc_flag |= RC_TCPSEQ; + NFSUNLOCKCACHE(); + } } nfsrc_unlock(rp); } @@ -687,8 +695,11 @@ nfsrc_lock(struct nfsrvcache *rp) static void nfsrc_unlock(struct nfsrvcache *rp) { + + NFSLOCKCACHE(); rp->rc_flag &= ~RC_LOCKED; nfsrc_wanted(rp); + NFSUNLOCKCACHE(); } /* diff --git a/sys/fs/nfsserver/nfs_nfsdkrpc.c b/sys/fs/nfsserver/nfs_nfsdkrpc.c index 8ce70fa..2484919 100644 --- a/sys/fs/nfsserver/nfs_nfsdkrpc.c +++ b/sys/fs/nfsserver/nfs_nfsdkrpc.c @@ -386,18 +386,14 @@ nfsrvd_addsock(struct file *fp) int nfsrvd_nfsd(struct thread *td, struct nfsd_nfsd_args *args) { -#ifdef KGSSAPI char principal[MAXHOSTNAMELEN + 5]; int error; bool_t ret2, ret3, ret4; -#endif -#ifdef KGSSAPI error = copyinstr(args->principal, principal, sizeof (principal), NULL); if (error) return (error); -#endif /* * Only the first nfsd actually does any work. The RPC code @@ -412,38 +408,29 @@ nfsrvd_nfsd(struct thread *td, struct nfsd_nfsd_args *args) NFSD_UNLOCK(); -#ifdef KGSSAPI /* An empty string implies AUTH_SYS only. */ if (principal[0] != '\0') { - ret2 = rpc_gss_set_svc_name(principal, "kerberosv5", - GSS_C_INDEFINITE, NFS_PROG, NFS_VER2); - ret3 = rpc_gss_set_svc_name(principal, "kerberosv5", - GSS_C_INDEFINITE, NFS_PROG, NFS_VER3); - ret4 = rpc_gss_set_svc_name(principal, "kerberosv5", - GSS_C_INDEFINITE, NFS_PROG, NFS_VER4); - - if (!ret2 || !ret3 || !ret4) { - NFSD_LOCK(); - newnfs_numnfsd--; - nfsrvd_init(1); - NFSD_UNLOCK(); - return (EAUTH); - } + ret2 = rpc_gss_set_svc_name_call(principal, + "kerberosv5", GSS_C_INDEFINITE, NFS_PROG, NFS_VER2); + ret3 = rpc_gss_set_svc_name_call(principal, + "kerberosv5", GSS_C_INDEFINITE, NFS_PROG, NFS_VER3); + ret4 = rpc_gss_set_svc_name_call(principal, + "kerberosv5", GSS_C_INDEFINITE, NFS_PROG, NFS_VER4); + + if (!ret2 || !ret3 || !ret4) + printf("nfsd: can't register svc name\n"); } -#endif nfsrvd_pool->sp_minthreads = args->minthreads; nfsrvd_pool->sp_maxthreads = args->maxthreads; svc_run(nfsrvd_pool); -#ifdef KGSSAPI if (principal[0] != '\0') { - rpc_gss_clear_svc_name(NFS_PROG, NFS_VER2); - rpc_gss_clear_svc_name(NFS_PROG, NFS_VER3); - rpc_gss_clear_svc_name(NFS_PROG, NFS_VER4); + rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER2); + rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER3); + rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER4); } -#endif NFSD_LOCK(); newnfs_numnfsd--; diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c index d62be99..5b96729 100644 --- a/sys/fs/nfsserver/nfs_nfsdport.c +++ b/sys/fs/nfsserver/nfs_nfsdport.c @@ -2592,6 +2592,36 @@ nfsvno_pathconf(struct vnode *vp, int flag, register_t *retf, int error; error = VOP_PATHCONF(vp, flag, retf); + if (error == EOPNOTSUPP || error == EINVAL) { + /* + * Some file systems return EINVAL for name arguments not + * supported and some return EOPNOTSUPP for this case. + * So the NFSv3 Pathconf RPC doesn't fail for these cases, + * just fake them. + */ + switch (flag) { + case _PC_LINK_MAX: + *retf = LINK_MAX; + break; + case _PC_NAME_MAX: + *retf = NAME_MAX; + break; + case _PC_CHOWN_RESTRICTED: + *retf = 1; + break; + case _PC_NO_TRUNC: + *retf = 1; + break; + default: + /* + * Only happens if a _PC_xxx is added to the server, + * but this isn't updated. + */ + *retf = 0; + printf("nfsrvd pathconf flag=%d not supp\n", flag); + }; + error = 0; + } return (error); } diff --git a/sys/fs/nfsserver/nfs_nfsdserv.c b/sys/fs/nfsserver/nfs_nfsdserv.c index ee55031..fc296c0 100644 --- a/sys/fs/nfsserver/nfs_nfsdserv.c +++ b/sys/fs/nfsserver/nfs_nfsdserv.c @@ -454,7 +454,7 @@ nfsmout: APPLESTATIC int nfsrvd_lookup(struct nfsrv_descript *nd, __unused int isdgram, vnode_t dp, vnode_t *vpp, fhandle_t *fhp, NFSPROC_T *p, - __unused struct nfsexstuff *exp) + struct nfsexstuff *exp) { struct nameidata named; vnode_t vp, dirp = NULL; @@ -508,7 +508,15 @@ nfsrvd_lookup(struct nfsrv_descript *nd, __unused int isdgram, vrele(named.ni_startdir); nfsvno_relpathbuf(&named); vp = named.ni_vp; - nd->nd_repstat = nfsvno_getfh(vp, fhp, p); + if ((nd->nd_flag & ND_NFSV4) != 0 && !NFSVNO_EXPORTED(exp) && + vp->v_type != VDIR && vp->v_type != VLNK) + /* + * Only allow lookup of VDIR and VLNK for traversal of + * non-exported volumes during NFSv4 mounting. + */ + nd->nd_repstat = ENOENT; + if (nd->nd_repstat == 0) + nd->nd_repstat = nfsvno_getfh(vp, fhp, p); if (!(nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) nd->nd_repstat = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1); if (vpp != NULL && nd->nd_repstat == 0) diff --git a/sys/fs/nfsserver/nfs_nfsdsocket.c b/sys/fs/nfsserver/nfs_nfsdsocket.c index eeecded..33be284 100644 --- a/sys/fs/nfsserver/nfs_nfsdsocket.c +++ b/sys/fs/nfsserver/nfs_nfsdsocket.c @@ -525,10 +525,10 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, NFSLOCKV4ROOTMUTEX(); if (nfsrv_stablefirst.nsf_flags & NFSNSF_NEEDLOCK) igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, - NFSV4ROOTLOCKMUTEXPTR); + NFSV4ROOTLOCKMUTEXPTR, NULL); else igotlock = nfsv4_lock(&nfsv4rootfs_lock, 0, NULL, - NFSV4ROOTLOCKMUTEXPTR); + NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); if (igotlock) { /* @@ -576,7 +576,7 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, */ NFSLOCKV4ROOTMUTEX(); nfsv4_getref(&nfsv4rootfs_lock, NULL, - NFSV4ROOTLOCKMUTEXPTR); + NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); } @@ -786,6 +786,8 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, op != NFSV4OP_LOOKUP && op != NFSV4OP_GETATTR && op != NFSV4OP_GETFH && + op != NFSV4OP_ACCESS && + op != NFSV4OP_READLINK && op != NFSV4OP_SECINFO) nd->nd_repstat = NFSERR_NOFILEHANDLE; else if (nfsvno_testexp(nd, &vpnes) && diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c index ab94f0e..fc84d72 100644 --- a/sys/fs/nfsserver/nfs_nfsdstate.c +++ b/sys/fs/nfsserver/nfs_nfsdstate.c @@ -169,7 +169,7 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp, nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, - NFSV4ROOTLOCKMUTEXPTR); + NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKV4ROOTMUTEX(); @@ -419,7 +419,7 @@ nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp, nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, - NFSV4ROOTLOCKMUTEXPTR); + NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKV4ROOTMUTEX(); } else if (opflags != CLOPS_RENEW) { @@ -548,7 +548,7 @@ nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p) NFSLOCKV4ROOTMUTEX(); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, - NFSV4ROOTLOCKMUTEXPTR); + NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKV4ROOTMUTEX(); @@ -608,7 +608,7 @@ nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt) * exclusive lock cannot be acquired while dumping the clients. */ NFSLOCKV4ROOTMUTEX(); - nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR); + nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); NFSLOCKSTATE(); /* @@ -709,7 +709,7 @@ nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt, * exclusive lock on it cannot be acquired while dumping the locks. */ NFSLOCKV4ROOTMUTEX(); - nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR); + nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); NFSLOCKSTATE(); if (!ret) @@ -4254,7 +4254,7 @@ nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp, nfsv4_relref(&nfsv4rootfs_lock); do { gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, - NFSV4ROOTLOCKMUTEXPTR); + NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!gotlock); NFSUNLOCKV4ROOTMUTEX(); *haslockp = 1; @@ -4422,7 +4422,7 @@ nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p, nfsv4_relref(&nfsv4rootfs_lock); do { gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, - NFSV4ROOTLOCKMUTEXPTR); + NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!gotlock); NFSUNLOCKV4ROOTMUTEX(); *haslockp = 1; @@ -4616,7 +4616,7 @@ nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p) * exclusive lock cannot be acquired by another thread. */ NFSLOCKV4ROOTMUTEX(); - nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR); + nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); /* @@ -5179,7 +5179,7 @@ nfsrv_locklf(struct nfslockfile *lfp) lfp->lf_usecount++; do { gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL, - NFSSTATEMUTEXPTR); + NFSSTATEMUTEXPTR, NULL); } while (gotlock == 0); lfp->lf_usecount--; } diff --git a/sys/fs/nwfs/nwfs_io.c b/sys/fs/nwfs/nwfs_io.c index 141c52e..d764f79 100644 --- a/sys/fs/nwfs/nwfs_io.c +++ b/sys/fs/nwfs/nwfs_io.c @@ -544,7 +544,7 @@ nwfs_putpages(ap) npages = btoc(count); for (i = 0; i < npages; i++) { - rtvals[i] = VM_PAGER_AGAIN; + rtvals[i] = VM_PAGER_ERROR; } bp = getpbuf(&nwfs_pbuf_freecnt); @@ -569,13 +569,8 @@ nwfs_putpages(ap) pmap_qremove(kva, npages); relpbuf(bp, &nwfs_pbuf_freecnt); - if (!error) { - int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE; - for (i = 0; i < nwritten; i++) { - rtvals[i] = VM_PAGER_OK; - vm_page_undirty(pages[i]); - } - } + if (!error) + vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid); return rtvals[0]; #endif /* NWFS_RWCACHE */ } diff --git a/sys/fs/smbfs/smbfs_io.c b/sys/fs/smbfs/smbfs_io.c index 6c02fc6..4599a6f 100644 --- a/sys/fs/smbfs/smbfs_io.c +++ b/sys/fs/smbfs/smbfs_io.c @@ -609,7 +609,7 @@ smbfs_putpages(ap) npages = btoc(count); for (i = 0; i < npages; i++) { - rtvals[i] = VM_PAGER_AGAIN; + rtvals[i] = VM_PAGER_ERROR; } bp = getpbuf(&smbfs_pbuf_freecnt); @@ -639,13 +639,8 @@ smbfs_putpages(ap) relpbuf(bp, &smbfs_pbuf_freecnt); - if (!error) { - int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE; - for (i = 0; i < nwritten; i++) { - rtvals[i] = VM_PAGER_OK; - vm_page_undirty(pages[i]); - } - } + if (!error) + vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid); return rtvals[0]; #endif /* SMBFS_RWGENERIC */ } diff --git a/sys/geom/eli/g_eli.c b/sys/geom/eli/g_eli.c index 74c70ff..30497a4 100644 --- a/sys/geom/eli/g_eli.c +++ b/sys/geom/eli/g_eli.c @@ -672,7 +672,7 @@ static int g_eli_cpu_is_disabled(int cpu) { #ifdef SMP - return ((hlt_cpus_mask & (1 << cpu)) != 0); + return (CPU_ISSET(cpu, &hlt_cpus_mask)); #else return (0); #endif diff --git a/sys/geom/geom.h b/sys/geom/geom.h index 1dc6eb1..6256572 100644 --- a/sys/geom/geom.h +++ b/sys/geom/geom.h @@ -76,6 +76,7 @@ typedef void g_orphan_t (struct g_consumer *); typedef void g_start_t (struct bio *); typedef void g_spoiled_t (struct g_consumer *); +typedef void g_attrchanged_t (struct g_consumer *, const char *attr); typedef void g_dumpconf_t (struct sbuf *, const char *indent, struct g_geom *, struct g_consumer *, struct g_provider *); @@ -100,6 +101,7 @@ struct g_class { */ g_start_t *start; g_spoiled_t *spoiled; + g_attrchanged_t *attrchanged; g_dumpconf_t *dumpconf; g_access_t *access; g_orphan_t *orphan; @@ -128,6 +130,7 @@ struct g_geom { int rank; g_start_t *start; g_spoiled_t *spoiled; + g_attrchanged_t *attrchanged; g_dumpconf_t *dumpconf; g_access_t *access; g_orphan_t *orphan; @@ -217,6 +220,7 @@ struct g_classifier_hook { /* geom_dev.c */ struct cdev; void g_dev_print(void); +void g_dev_physpath_changed(void); struct g_provider *g_dev_getprovider(struct cdev *dev); /* geom_dump.c */ @@ -232,6 +236,7 @@ typedef void g_event_t(void *, int flag); int g_post_event(g_event_t *func, void *arg, int flag, ...); int g_waitfor_event(g_event_t *func, void *arg, int flag, ...); void g_cancel_event(void *ref); +int g_attr_changed(struct g_provider *pp, const char *attr, int flag); void g_orphan_provider(struct g_provider *pp, int error); void g_waitidlelock(void); diff --git a/sys/geom/geom_dev.c b/sys/geom/geom_dev.c index f291b32..210f2ee 100644 --- a/sys/geom/geom_dev.c +++ b/sys/geom/geom_dev.c @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include <sys/malloc.h> #include <sys/kernel.h> #include <sys/conf.h> +#include <sys/ctype.h> #include <sys/bio.h> #include <sys/lock.h> #include <sys/mutex.h> @@ -52,6 +53,12 @@ __FBSDID("$FreeBSD$"); #include <sys/limits.h> #include <geom/geom.h> #include <geom/geom_int.h> +#include <machine/stdarg.h> + +/* + * Use the consumer private field to reference a physdev alias (if any). + */ +#define cp_alias_dev private static d_open_t g_dev_open; static d_close_t g_dev_close; @@ -72,12 +79,14 @@ static struct cdevsw g_dev_cdevsw = { static g_taste_t g_dev_taste; static g_orphan_t g_dev_orphan; +static g_attrchanged_t g_dev_attrchanged; static struct g_class g_dev_class = { .name = "DEV", .version = G_VERSION, .taste = g_dev_taste, .orphan = g_dev_orphan, + .attrchanged = g_dev_attrchanged }; void @@ -93,6 +102,40 @@ g_dev_print(void) printf("\n"); } +static void +g_dev_attrchanged(struct g_consumer *cp, const char *attr) +{ + + if (strcmp(attr, "GEOM::physpath") != 0) + return; + + if (g_access(cp, 1, 0, 0) == 0) { + char *physpath; + int error, physpath_len; + + physpath_len = MAXPATHLEN; + physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); + error = + g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); + g_access(cp, -1, 0, 0); + if (error == 0 && strlen(physpath) != 0) { + struct cdev *dev; + struct cdev *old_alias_dev; + struct cdev **alias_devp; + + dev = cp->geom->softc; + old_alias_dev = cp->cp_alias_dev; + alias_devp = (struct cdev **)&cp->cp_alias_dev; + make_dev_physpath_alias(MAKEDEV_WAITOK, alias_devp, + dev, old_alias_dev, physpath); + } else if (cp->cp_alias_dev) { + destroy_dev((struct cdev *)cp->cp_alias_dev); + cp->cp_alias_dev = NULL; + } + g_free(physpath); + } +} + struct g_provider * g_dev_getprovider(struct cdev *dev) { @@ -107,7 +150,6 @@ g_dev_getprovider(struct cdev *dev) return (cp->provider); } - static struct g_geom * g_dev_taste(struct g_class *mp, struct g_provider *pp, int insist __unused) { @@ -167,6 +209,9 @@ g_dev_taste(struct g_class *mp, struct g_provider *pp, int insist __unused) adev->si_drv1 = gp; adev->si_drv2 = cp; } + + g_dev_attrchanged(cp, "GEOM::physpath"); + return (gp); } @@ -365,6 +410,11 @@ g_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread case DIOCGSTRIPEOFFSET: *(off_t *)data = cp->provider->stripeoffset; break; + case DIOCGPHYSPATH: + error = g_io_getattr("GEOM::physpath", cp, &i, data); + if (error == 0 && *(char *)data == '\0') + error = ENOENT; + break; default: if (cp->provider->geom->ioctl != NULL) { error = cp->provider->geom->ioctl(cp->provider, cmd, data, fflag, td); diff --git a/sys/geom/geom_disk.c b/sys/geom/geom_disk.c index a051772..17cae68 100644 --- a/sys/geom/geom_disk.c +++ b/sys/geom/geom_disk.c @@ -154,6 +154,12 @@ g_disk_access(struct g_provider *pp, int r, int w, int e) } pp->mediasize = dp->d_mediasize; pp->sectorsize = dp->d_sectorsize; + if (dp->d_flags & DISKFLAG_CANDELETE) + pp->flags |= G_PF_CANDELETE; + else + pp->flags &= ~G_PF_CANDELETE; + pp->stripeoffset = dp->d_stripeoffset; + pp->stripesize = dp->d_stripesize; dp->d_flags |= DISKFLAG_OPEN; if (dp->d_maxsize == 0) { printf("WARNING: Disk drive %s%d has no d_maxsize\n", @@ -341,6 +347,15 @@ g_disk_start(struct bio *bp) } while (bp2 != NULL); break; case BIO_GETATTR: + /* Give the driver a chance to override */ + if (dp->d_getattr != NULL) { + if (bp->bio_disk == NULL) + bp->bio_disk = dp; + error = dp->d_getattr(bp); + if (error != -1) + break; + error = EJUSTRETURN; + } if (g_handleattr_int(bp, "GEOM::candelete", (dp->d_flags & DISKFLAG_CANDELETE) != 0)) break; @@ -576,6 +591,18 @@ disk_gone(struct disk *dp) g_wither_provider(pp, ENXIO); } +void +disk_attr_changed(struct disk *dp, const char *attr, int flag) +{ + struct g_geom *gp; + struct g_provider *pp; + + gp = dp->d_geom; + if (gp != NULL) + LIST_FOREACH(pp, &gp->provider, provider) + (void)g_attr_changed(pp, attr, flag); +} + static void g_kern_disks(void *p, int flag __unused) { diff --git a/sys/geom/geom_disk.h b/sys/geom/geom_disk.h index 2d5f15d..e92f4aa 100644 --- a/sys/geom/geom_disk.h +++ b/sys/geom/geom_disk.h @@ -49,6 +49,7 @@ struct disk; typedef int disk_open_t(struct disk *); typedef int disk_close_t(struct disk *); typedef void disk_strategy_t(struct bio *bp); +typedef int disk_getattr_t(struct bio *bp); typedef int disk_ioctl_t(struct disk *, u_long cmd, void *data, int fflag, struct thread *td); /* NB: disk_ioctl_t SHALL be cast'able to d_ioctl_t */ @@ -75,6 +76,7 @@ struct disk { disk_strategy_t *d_strategy; disk_ioctl_t *d_ioctl; dumper_t *d_dump; + disk_getattr_t *d_getattr; /* Info fields from driver to geom_disk.c. Valid when open */ u_int d_sectorsize; @@ -104,6 +106,7 @@ struct disk *disk_alloc(void); void disk_create(struct disk *disk, int version); void disk_destroy(struct disk *disk); void disk_gone(struct disk *disk); +void disk_attr_changed(struct disk *dp, const char *attr, int flag); #define DISK_VERSION_00 0x58561059 #define DISK_VERSION_01 0x5856105a diff --git a/sys/geom/geom_dump.c b/sys/geom/geom_dump.c index d1e56d6..6f678cf 100644 --- a/sys/geom/geom_dump.c +++ b/sys/geom/geom_dump.c @@ -207,10 +207,8 @@ g_conf_provider(struct sbuf *sb, struct g_provider *pp) sbuf_printf(sb, "\t <mediasize>%jd</mediasize>\n", (intmax_t)pp->mediasize); sbuf_printf(sb, "\t <sectorsize>%u</sectorsize>\n", pp->sectorsize); - if (pp->stripesize > 0) { - sbuf_printf(sb, "\t <stripesize>%u</stripesize>\n", pp->stripesize); - sbuf_printf(sb, "\t <stripeoffset>%u</stripeoffset>\n", pp->stripeoffset); - } + sbuf_printf(sb, "\t <stripesize>%u</stripesize>\n", pp->stripesize); + sbuf_printf(sb, "\t <stripeoffset>%u</stripeoffset>\n", pp->stripeoffset); if (pp->geom->flags & G_GEOM_WITHER) ; else if (pp->geom->dumpconf != NULL) { diff --git a/sys/geom/geom_event.c b/sys/geom/geom_event.c index d6e5498..1e2fc8d 100644 --- a/sys/geom/geom_event.c +++ b/sys/geom/geom_event.c @@ -110,6 +110,53 @@ g_waitidlelock(void) } #endif +struct g_attrchanged_args { + struct g_provider *pp; + const char *attr; +}; + +static void +g_attr_changed_event(void *arg, int flag) +{ + struct g_attrchanged_args *args; + struct g_provider *pp; + struct g_consumer *cp; + struct g_consumer *next_cp; + + args = arg; + pp = args->pp; + + g_topology_assert(); + if (flag != EV_CANCEL && g_shutdown == 0) { + + /* + * Tell all consumers of the change. + */ + LIST_FOREACH_SAFE(cp, &pp->consumers, consumers, next_cp) { + if (cp->geom->attrchanged != NULL) + cp->geom->attrchanged(cp, args->attr); + } + } + g_free(args); +} + +int +g_attr_changed(struct g_provider *pp, const char *attr, int flag) +{ + struct g_attrchanged_args *args; + int error; + + args = g_malloc(sizeof *args, flag); + if (args == NULL) + return (ENOMEM); + args->pp = pp; + args->attr = attr; + error = g_post_event(g_attr_changed_event, args, flag, pp, NULL); + if (error != 0) + g_free(args); + return (error); +} + void g_orphan_provider(struct g_provider *pp, int error) { diff --git a/sys/geom/geom_subr.c b/sys/geom/geom_subr.c index a0958f3..6e2589b 100644 --- a/sys/geom/geom_subr.c +++ b/sys/geom/geom_subr.c @@ -350,6 +350,7 @@ g_new_geomf(struct g_class *mp, const char *fmt, ...) /* Fill in defaults from class */ gp->start = mp->start; gp->spoiled = mp->spoiled; + gp->attrchanged = mp->attrchanged; gp->dumpconf = mp->dumpconf; gp->access = mp->access; gp->orphan = mp->orphan; diff --git a/sys/geom/part/g_part.c b/sys/geom/part/g_part.c index a3f8575..f24e7b5 100644 --- a/sys/geom/part/g_part.c +++ b/sys/geom/part/g_part.c @@ -248,6 +248,7 @@ g_part_check_integrity(struct g_part_table *table, struct g_consumer *cp) { struct g_part_entry *e1, *e2; struct g_provider *pp; + off_t offset; int failed; failed = 0; @@ -294,6 +295,16 @@ g_part_check_integrity(struct g_part_table *table, struct g_consumer *cp) (intmax_t)table->gpt_last); failed++; } + if (pp->stripesize > 0) { + offset = e1->gpe_start * pp->sectorsize; + if (e1->gpe_offset > offset) + offset = e1->gpe_offset; + if ((offset + pp->stripeoffset) % pp->stripesize) { + DPRINTF("partition %d is not aligned on %u " + "bytes\n", e1->gpe_index, pp->stripesize); + /* Don't treat this as a critical failure */ + } + } e2 = e1; while ((e2 = LIST_NEXT(e2, gpe_entry)) != NULL) { if (e2->gpe_deleted || e2->gpe_internal) @@ -723,7 +734,11 @@ g_part_ctl_add(struct gctl_req *req, struct g_part_parms *gpp) if (gpp->gpp_parms & G_PART_PARM_OUTPUT) { sb = sbuf_new_auto(); G_PART_FULLNAME(table, entry, sb, gp->name); - sbuf_cat(sb, " added\n"); + if (pp->stripesize > 0 && entry->gpe_pp->stripeoffset != 0) + sbuf_printf(sb, " added, but partition is not " + "aligned on %u bytes\n", pp->stripesize); + else + sbuf_cat(sb, " added\n"); sbuf_finish(sb); gctl_set_param(req, "output", sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); diff --git a/sys/geom/part/g_part_bsd.c b/sys/geom/part/g_part_bsd.c index 269b943..be46775 100644 --- a/sys/geom/part/g_part_bsd.c +++ b/sys/geom/part/g_part_bsd.c @@ -46,6 +46,11 @@ __FBSDID("$FreeBSD$"); #include "g_part_if.h" +#define BOOT1_SIZE 512 +#define LABEL_SIZE 512 +#define BOOT2_OFF (BOOT1_SIZE + LABEL_SIZE) +#define BOOT2_SIZE (BBSIZE - BOOT2_OFF) + FEATURE(geom_part_bsd, "GEOM partitioning class for BSD disklabels"); struct g_part_bsd_table { @@ -170,22 +175,16 @@ g_part_bsd_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp) { struct g_part_bsd_table *table; const u_char *codeptr; - size_t hdsz, tlsz; - size_t codesz, tlofs; - hdsz = 512; - tlofs = hdsz + 148 + basetable->gpt_entries * 16; - tlsz = BBSIZE - tlofs; + if (gpp->gpp_codesize != BOOT1_SIZE && gpp->gpp_codesize != BBSIZE) + return (ENODEV); + table = (struct g_part_bsd_table *)basetable; - bzero(table->bbarea, hdsz); - bzero(table->bbarea + tlofs, tlsz); codeptr = gpp->gpp_codeptr; - codesz = MIN(hdsz, gpp->gpp_codesize); - if (codesz > 0) - bcopy(codeptr, table->bbarea, codesz); - codesz = MIN(tlsz, gpp->gpp_codesize - tlofs); - if (codesz > 0) - bcopy(codeptr + tlofs, table->bbarea + tlofs, codesz); + bcopy(codeptr, table->bbarea, BOOT1_SIZE); + if (gpp->gpp_codesize == BBSIZE) + bcopy(codeptr + BOOT2_OFF, table->bbarea + BOOT2_OFF, + BOOT2_SIZE); return (0); } diff --git a/sys/geom/part/g_part_ebr.c b/sys/geom/part/g_part_ebr.c index f6278cc..8ea9b47 100644 --- a/sys/geom/part/g_part_ebr.c +++ b/sys/geom/part/g_part_ebr.c @@ -289,7 +289,6 @@ g_part_ebr_create(struct g_part_table *basetable, struct g_part_parms *gpp) return (ENXIO); msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX); - msize -= msize % basetable->gpt_sectors; basetable->gpt_first = 0; basetable->gpt_last = msize - 1; basetable->gpt_entries = msize / basetable->gpt_sectors; @@ -523,7 +522,7 @@ g_part_ebr_read(struct g_part_table *basetable, struct g_consumer *cp) basetable->gpt_entries = msize / basetable->gpt_sectors; basetable->gpt_first = 0; - basetable->gpt_last = msize - (msize % basetable->gpt_sectors) - 1; + basetable->gpt_last = msize - 1; return (0); } diff --git a/sys/geom/part/g_part_mbr.c b/sys/geom/part/g_part_mbr.c index 63dcac4..825a109 100644 --- a/sys/geom/part/g_part_mbr.c +++ b/sys/geom/part/g_part_mbr.c @@ -253,15 +253,14 @@ g_part_mbr_create(struct g_part_table *basetable, struct g_part_parms *gpp) { struct g_provider *pp; struct g_part_mbr_table *table; - uint32_t msize; pp = gpp->gpp_provider; if (pp->sectorsize < MBRSIZE) return (ENOSPC); - msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX); basetable->gpt_first = basetable->gpt_sectors; - basetable->gpt_last = msize - (msize % basetable->gpt_sectors) - 1; + basetable->gpt_last = MIN(pp->mediasize / pp->sectorsize, + UINT32_MAX) - 1; table = (struct g_part_mbr_table *)basetable; le16enc(table->mbr + DOSMAGICOFFSET, DOSMAGIC); @@ -424,12 +423,13 @@ g_part_mbr_read(struct g_part_table *basetable, struct g_consumer *cp) struct g_part_mbr_table *table; struct g_part_mbr_entry *entry; u_char *buf, *p; - off_t chs, msize; + off_t chs, msize, first; u_int sectors, heads; int error, index; pp = cp->provider; table = (struct g_part_mbr_table *)basetable; + first = basetable->gpt_sectors; msize = MIN(pp->mediasize / pp->sectorsize, UINT32_MAX); buf = g_read_data(cp, 0L, pp->sectorsize, &error); @@ -462,7 +462,8 @@ g_part_mbr_read(struct g_part_table *basetable, struct g_consumer *cp) basetable->gpt_heads = heads; } } - + if (ent.dp_start < first) + first = ent.dp_start; entry = (struct g_part_mbr_entry *)g_part_new_entry(basetable, index + 1, ent.dp_start, ent.dp_start + ent.dp_size - 1); entry->ent = ent; @@ -470,7 +471,10 @@ g_part_mbr_read(struct g_part_table *basetable, struct g_consumer *cp) basetable->gpt_entries = NDOSPART; basetable->gpt_first = basetable->gpt_sectors; - basetable->gpt_last = msize - (msize % basetable->gpt_sectors) - 1; + basetable->gpt_last = msize - 1; + + if (first < basetable->gpt_first) + basetable->gpt_first = 1; g_free(buf); return (0); diff --git a/sys/geom/part/g_part_pc98.c b/sys/geom/part/g_part_pc98.c index 0bdc0a6..0d81a0e 100644 --- a/sys/geom/part/g_part_pc98.c +++ b/sys/geom/part/g_part_pc98.c @@ -248,7 +248,6 @@ g_part_pc98_create(struct g_part_table *basetable, struct g_part_parms *gpp) { struct g_provider *pp; struct g_part_pc98_table *table; - uint32_t cyl, msize; pp = gpp->gpp_provider; if (pp->sectorsize < SECSIZE || pp->mediasize < BOOTSIZE) @@ -256,11 +255,8 @@ g_part_pc98_create(struct g_part_table *basetable, struct g_part_parms *gpp) if (pp->sectorsize > SECSIZE) return (ENXIO); - cyl = basetable->gpt_heads * basetable->gpt_sectors; - - msize = MIN(pp->mediasize / SECSIZE, UINT32_MAX); - basetable->gpt_first = cyl; - basetable->gpt_last = msize - (msize % cyl) - 1; + basetable->gpt_first = basetable->gpt_heads * basetable->gpt_sectors; + basetable->gpt_last = MIN(pp->mediasize / SECSIZE, UINT32_MAX) - 1; table = (struct g_part_pc98_table *)basetable; le16enc(table->boot + DOSMAGICOFFSET, DOSMAGIC); @@ -488,7 +484,7 @@ g_part_pc98_read(struct g_part_table *basetable, struct g_consumer *cp) basetable->gpt_entries = NDOSPART; basetable->gpt_first = cyl; - basetable->gpt_last = msize - (msize % cyl) - 1; + basetable->gpt_last = msize - 1; g_free(buf); return (0); diff --git a/sys/geom/vinum/geom_vinum_drive.c b/sys/geom/vinum/geom_vinum_drive.c index 5ab68f3..f782fd0 100644 --- a/sys/geom/vinum/geom_vinum_drive.c +++ b/sys/geom/vinum/geom_vinum_drive.c @@ -126,6 +126,10 @@ gv_read_header(struct g_consumer *cp, struct gv_hdr *m_hdr) pp = cp->provider; KASSERT(pp != NULL, ("gv_read_header: null pp")); + if ((GV_HDR_OFFSET % pp->sectorsize) != 0 || + (GV_HDR_LEN % pp->sectorsize) != 0) + return (ENODEV); + d_hdr = g_read_data(cp, GV_HDR_OFFSET, pp->sectorsize, NULL); if (d_hdr == NULL) return (-1); diff --git a/sys/geom/vinum/geom_vinum_events.c b/sys/geom/vinum/geom_vinum_events.c index fcd45f1..db4e543 100644 --- a/sys/geom/vinum/geom_vinum_events.c +++ b/sys/geom/vinum/geom_vinum_events.c @@ -109,6 +109,12 @@ gv_drive_tasted(struct gv_softc *sc, struct g_provider *pp) buf = NULL; G_VINUM_DEBUG(2, "tasted drive on '%s'", pp->name); + if ((GV_CFG_OFFSET % pp->sectorsize) != 0 || + (GV_CFG_LEN % pp->sectorsize) != 0) { + G_VINUM_DEBUG(0, "provider %s has unsupported sectorsize.", + pp->name); + return; + } gp = sc->geom; g_topology_lock(); diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC index ec5b113..c4548c6 100644 --- a/sys/i386/conf/GENERIC +++ b/sys/i386/conf/GENERIC @@ -308,6 +308,7 @@ options USB_DEBUG # enable debug msgs device uhci # UHCI PCI->USB interface device ohci # OHCI PCI->USB interface device ehci # EHCI PCI->USB interface (USB 2.0) +device xhci # XHCI PCI->USB interface (USB 3.0) device usb # USB Bus (required) #device udbp # USB Double Bulk Pipe devices (needs netgraph) device uhid # "Human Interface Devices" @@ -350,3 +351,11 @@ device fwe # Ethernet over FireWire (non-standard!) device fwip # IP over FireWire (RFC 2734,3146) device dcons # Dumb console driver device dcons_crom # Configuration ROM for dcons + +# Sound support +device sound # Generic sound driver (required) +device snd_es137x # Ensoniq AudioPCI ES137x +device snd_hda # Intel High Definition Audio +device snd_ich # Intel, NVidia and other ICH AC'97 Audio +device snd_uaudio # USB Audio +device snd_via8233 # VIA VT8233x Audio diff --git a/sys/i386/i386/intr_machdep.c b/sys/i386/i386/intr_machdep.c index 77b8004..56529f7 100644 --- a/sys/i386/i386/intr_machdep.c +++ b/sys/i386/i386/intr_machdep.c @@ -409,8 +409,7 @@ DB_SHOW_COMMAND(irqs, db_show_irqs) * allocate CPUs round-robin. */ -/* The BSP is always a valid target. */ -static cpumask_t intr_cpus = (1 << 0); +static cpuset_t intr_cpus; static int current_cpu; /* @@ -432,7 +431,7 @@ intr_next_cpu(void) current_cpu++; if (current_cpu > mp_maxid) current_cpu = 0; - } while (!(intr_cpus & (1 << current_cpu))); + } while (!CPU_ISSET(current_cpu, &intr_cpus)); mtx_unlock_spin(&icu_lock); return (apic_id); } @@ -463,7 +462,7 @@ intr_add_cpu(u_int cpu) printf("INTR: Adding local APIC %d as a target\n", cpu_apic_ids[cpu]); - intr_cpus |= (1 << cpu); + CPU_SET(cpu, &intr_cpus); } /* @@ -483,6 +482,9 @@ intr_shuffle_irqs(void *arg __unused) return; #endif + /* The BSP is always a valid target. */ + CPU_SETOF(0, &intr_cpus); + /* Don't bother on UP. */ if (mp_ncpus == 1) return; diff --git a/sys/i386/i386/legacy.c b/sys/i386/i386/legacy.c index 2136d80..6fe5700 100644 --- a/sys/i386/i386/legacy.c +++ b/sys/i386/i386/legacy.c @@ -86,6 +86,7 @@ static device_method_t legacy_methods[] = { DEVMETHOD(bus_read_ivar, legacy_read_ivar), DEVMETHOD(bus_write_ivar, legacy_write_ivar), DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource), + DEVMETHOD(bus_adjust_resource, bus_generic_adjust_resource), DEVMETHOD(bus_release_resource, bus_generic_release_resource), DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index fbf444a..91050c4 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" +#include "opt_mp_watchdog.h" #include "opt_npx.h" #include "opt_perfmon.h" #include "opt_xbox.h" @@ -118,6 +119,7 @@ __FBSDID("$FreeBSD$"); #include <x86/mca.h> #include <machine/md_var.h> #include <machine/metadata.h> +#include <machine/mp_watchdog.h> #include <machine/pc/bios.h> #include <machine/pcb.h> #include <machine/pcb_ext.h> @@ -1357,9 +1359,8 @@ cpu_idle(int busy) CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu); -#if defined(SMP) && !defined(XEN) - if (mp_grab_cpu_hlt()) - return; +#if defined(MP_WATCHDOG) && !defined(XEN) + ap_watchdog(PCPU_GET(cpuid)); #endif #ifndef XEN /* If we are busy - try to use fast methods. */ diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index a4db401..78c90c0 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -29,7 +29,6 @@ __FBSDID("$FreeBSD$"); #include "opt_apic.h" #include "opt_cpu.h" #include "opt_kstack_pages.h" -#include "opt_mp_watchdog.h" #include "opt_pmap.h" #include "opt_sched.h" #include "opt_smp.h" @@ -51,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <sys/bus.h> #include <sys/cons.h> /* cngetc() */ +#include <sys/cpuset.h> #ifdef GPROF #include <sys/gmon.h> #endif @@ -77,7 +77,6 @@ __FBSDID("$FreeBSD$"); #include <machine/cputypes.h> #include <x86/mca.h> #include <machine/md_var.h> -#include <machine/mp_watchdog.h> #include <machine/pcb.h> #include <machine/psl.h> #include <machine/smp.h> @@ -173,7 +172,7 @@ static u_long *ipi_hardclock_counts[MAXCPU]; * Local data and functions. */ -static volatile cpumask_t ipi_nmi_pending; +static volatile cpuset_t ipi_nmi_pending; /* used to hold the AP's until we are ready to release them */ static struct mtx ap_boot_mtx; @@ -208,11 +207,8 @@ static int start_all_aps(void); static int start_ap(int apic_id); static void release_aps(void *dummy); -static int hlt_logical_cpus; static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */ -static cpumask_t hyperthreading_cpus_mask; static int hyperthreading_allowed = 1; -static struct sysctl_ctx_list logical_cpu_clist; static void mem_range_AP_init(void) @@ -289,8 +285,11 @@ topo_probe_0x4(void) * logical processors that belong to the same core * as BSP thus deducing number of threads per core. */ - cpuid_count(0x04, 0, p); - max_cores = ((p[0] >> 26) & 0x3f) + 1; + if (cpu_high >= 0x4) { + cpuid_count(0x04, 0, p); + max_cores = ((p[0] >> 26) & 0x3f) + 1; + } else + max_cores = 1; core_id_bits = mask_width(max_logical/max_cores); if (core_id_bits < 0) return; @@ -382,7 +381,7 @@ topo_probe(void) if (cpu_topo_probed) return; - logical_cpus_mask = 0; + CPU_ZERO(&logical_cpus_mask); if (mp_ncpus <= 1) cpu_cores = cpu_logical = 1; else if (cpu_vendor_id == CPU_VENDOR_AMD) @@ -524,7 +523,7 @@ cpu_mp_probe(void) * Always record BSP in CPU map so that the mbuf init code works * correctly. */ - all_cpus = 1; + CPU_SETOF(0, &all_cpus); if (mp_ncpus == 0) { /* * No CPUs were found, so this must be a UP system. Setup @@ -659,6 +658,7 @@ cpu_mp_announce(void) void init_secondary(void) { + cpuset_t tcpuset, tallcpus; struct pcpu *pc; vm_offset_t addr; int gsel_tss; @@ -783,19 +783,17 @@ init_secondary(void) CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); + tcpuset = PCPU_GET(cpumask); /* Determine if we are a logical CPU. */ /* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */ if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0) - logical_cpus_mask |= PCPU_GET(cpumask); - - /* Determine if we are a hyperthread. */ - if (hyperthreading_cpus > 1 && - PCPU_GET(apic_id) % hyperthreading_cpus != 0) - hyperthreading_cpus_mask |= PCPU_GET(cpumask); + CPU_OR(&logical_cpus_mask, &tcpuset); /* Build our map of 'other' CPUs. */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + CPU_NAND(&tallcpus, &tcpuset); + PCPU_SET(other_cpus, tallcpus); if (bootverbose) lapic_dump("AP"); @@ -874,7 +872,7 @@ assign_cpu_ids(void) if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) { cpu_info[i].cpu_hyperthread = 1; -#if defined(SCHED_ULE) + /* * Don't use HT CPU if it has been disabled by a * tunable. @@ -883,7 +881,6 @@ assign_cpu_ids(void) cpu_info[i].cpu_disabled = 1; continue; } -#endif } /* Don't use this CPU if it has been disabled by a tunable. */ @@ -893,6 +890,11 @@ assign_cpu_ids(void) } } + if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) { + hyperthreading_cpus = 0; + cpu_logical = 1; + } + /* * Assign CPU IDs to local APIC IDs and disable any CPUs * beyond MAXCPU. CPU 0 is always assigned to the BSP. @@ -932,6 +934,7 @@ assign_cpu_ids(void) static int start_all_aps(void) { + cpuset_t tallcpus; #ifndef PC98 u_char mpbiosreason; #endif @@ -991,11 +994,13 @@ start_all_aps(void) } CHECK_PRINT("trace"); /* show checkpoints */ - all_cpus |= (1 << cpu); /* record AP in CPU map */ + CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } /* build our map of 'other' CPUs */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + CPU_NAND(&tallcpus, PCPU_PTR(cpumask)); + PCPU_SET(other_cpus, tallcpus); /* restore the warmstart vector */ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; @@ -1192,6 +1197,30 @@ SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, #endif /* COUNT_XINVLTLB_HITS */ /* + * Send an IPI to specified CPU handling the bitmap logic. + */ +static void +ipi_send_cpu(int cpu, u_int ipi) +{ + u_int bitmap, old_pending, new_pending; + + KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); + + if (IPI_IS_BITMAPED(ipi)) { + bitmap = 1 << ipi; + ipi = IPI_BITMAP_VECTOR; + do { + old_pending = cpu_ipi_pending[cpu]; + new_pending = old_pending | bitmap; + } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], + old_pending, new_pending)); + if (old_pending) + return; + } + lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); +} + +/* * Flush the TLB on all other CPU's */ static void @@ -1215,28 +1244,19 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) } static void -smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) +smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) { - int ncpu, othercpus; + int cpu, ncpu, othercpus; othercpus = mp_ncpus - 1; - if (mask == (u_int)-1) { - ncpu = othercpus; - if (ncpu < 1) + if (CPU_ISFULLSET(&mask)) { + if (othercpus < 1) return; } else { - mask &= ~PCPU_GET(cpumask); - if (mask == 0) - return; - ncpu = bitcount32(mask); - if (ncpu > othercpus) { - /* XXX this should be a panic offence */ - printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", - ncpu, othercpus); - ncpu = othercpus; - } - /* XXX should be a panic, implied by mask == 0 above */ - if (ncpu < 1) + sched_pin(); + CPU_NAND(&mask, PCPU_PTR(cpumask)); + sched_unpin(); + if (CPU_EMPTY(&mask)) return; } if (!(read_eflags() & PSL_I)) @@ -1245,39 +1265,25 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o smp_tlb_addr1 = addr1; smp_tlb_addr2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); - if (mask == (u_int)-1) + if (CPU_ISFULLSET(&mask)) { + ncpu = othercpus; ipi_all_but_self(vector); - else - ipi_selected(mask, vector); + } else { + ncpu = 0; + while ((cpu = cpusetobj_ffs(&mask)) != 0) { + cpu--; + CPU_CLR(cpu, &mask); + CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, + vector); + ipi_send_cpu(cpu, vector); + ncpu++; + } + } while (smp_tlb_wait < ncpu) ia32_pause(); mtx_unlock_spin(&smp_ipi_mtx); } -/* - * Send an IPI to specified CPU handling the bitmap logic. - */ -static void -ipi_send_cpu(int cpu, u_int ipi) -{ - u_int bitmap, old_pending, new_pending; - - KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); - - if (IPI_IS_BITMAPED(ipi)) { - bitmap = 1 << ipi; - ipi = IPI_BITMAP_VECTOR; - do { - old_pending = cpu_ipi_pending[cpu]; - new_pending = old_pending | bitmap; - } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], - old_pending, new_pending)); - if (old_pending) - return; - } - lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); -} - void smp_cache_flush(void) { @@ -1324,7 +1330,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) } void -smp_masked_invltlb(cpumask_t mask) +smp_masked_invltlb(cpuset_t mask) { if (smp_started) { @@ -1336,7 +1342,7 @@ smp_masked_invltlb(cpumask_t mask) } void -smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) +smp_masked_invlpg(cpuset_t mask, vm_offset_t addr) { if (smp_started) { @@ -1348,7 +1354,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) } void -smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2) +smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { @@ -1401,7 +1407,7 @@ ipi_bitmap_handler(struct trapframe frame) * send an IPI to a set of cpus. */ void -ipi_selected(cpumask_t cpus, u_int ipi) +ipi_selected(cpuset_t cpus, u_int ipi) { int cpu; @@ -1411,12 +1417,12 @@ ipi_selected(cpumask_t cpus, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, cpus); + CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); - CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); - while ((cpu = ffs(cpus)) != 0) { + while ((cpu = cpusetobj_ffs(&cpus)) != 0) { cpu--; - cpus &= ~(1 << cpu); + CPU_CLR(cpu, &cpus); + CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); } } @@ -1434,7 +1440,7 @@ ipi_cpu(int cpu, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, 1 << cpu); + CPU_SET_ATOMIC(cpu, &ipi_nmi_pending); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); @@ -1447,8 +1453,10 @@ void ipi_all_but_self(u_int ipi) { + sched_pin(); if (IPI_IS_BITMAPED(ipi)) { ipi_selected(PCPU_GET(other_cpus), ipi); + sched_unpin(); return; } @@ -1458,7 +1466,9 @@ ipi_all_but_self(u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus)); + CPU_OR_ATOMIC(&ipi_nmi_pending, PCPU_PTR(other_cpus)); + sched_unpin(); + CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); } @@ -1466,7 +1476,7 @@ ipi_all_but_self(u_int ipi) int ipi_nmi_handler() { - cpumask_t cpumask; + cpuset_t cpumask; /* * As long as there is not a simple way to know about a NMI's @@ -1474,11 +1484,13 @@ ipi_nmi_handler() * the global pending bitword an IPI_STOP_HARD has been issued * and should be handled. */ + sched_pin(); cpumask = PCPU_GET(cpumask); - if ((ipi_nmi_pending & cpumask) == 0) + sched_unpin(); + if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask)) return (1); - atomic_clear_int(&ipi_nmi_pending, cpumask); + CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask); cpustop_handler(); return (0); } @@ -1490,23 +1502,25 @@ ipi_nmi_handler() void cpustop_handler(void) { - cpumask_t cpumask; + cpuset_t cpumask; u_int cpu; + sched_pin(); cpu = PCPU_GET(cpuid); cpumask = PCPU_GET(cpumask); + sched_unpin(); savectx(&stoppcbs[cpu]); /* Indicate that we are stopped */ - atomic_set_int(&stopped_cpus, cpumask); + CPU_OR_ATOMIC(&stopped_cpus, &cpumask); /* Wait for restart */ - while (!(started_cpus & cpumask)) + while (!CPU_OVERLAP(&started_cpus, &cpumask)) ia32_pause(); - atomic_clear_int(&started_cpus, cpumask); - atomic_clear_int(&stopped_cpus, cpumask); + CPU_NAND_ATOMIC(&started_cpus, &cpumask); + CPU_NAND_ATOMIC(&stopped_cpus, &cpumask); if (cpu == 0 && cpustop_restartfunc != NULL) { cpustop_restartfunc(); @@ -1530,158 +1544,6 @@ release_aps(void *dummy __unused) } SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); -static int -sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS) -{ - cpumask_t mask; - int error; - - mask = hlt_cpus_mask; - error = sysctl_handle_int(oidp, &mask, 0, req); - if (error || !req->newptr) - return (error); - - if (logical_cpus_mask != 0 && - (mask & logical_cpus_mask) == logical_cpus_mask) - hlt_logical_cpus = 1; - else - hlt_logical_cpus = 0; - - if (! hyperthreading_allowed) - mask |= hyperthreading_cpus_mask; - - if ((mask & all_cpus) == all_cpus) - mask &= ~(1<<0); - hlt_cpus_mask = mask; - return (error); -} -SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW, - 0, 0, sysctl_hlt_cpus, "IU", - "Bitmap of CPUs to halt. 101 (binary) will halt CPUs 0 and 2."); - -static int -sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS) -{ - int disable, error; - - disable = hlt_logical_cpus; - error = sysctl_handle_int(oidp, &disable, 0, req); - if (error || !req->newptr) - return (error); - - if (disable) - hlt_cpus_mask |= logical_cpus_mask; - else - hlt_cpus_mask &= ~logical_cpus_mask; - - if (! hyperthreading_allowed) - hlt_cpus_mask |= hyperthreading_cpus_mask; - - if ((hlt_cpus_mask & all_cpus) == all_cpus) - hlt_cpus_mask &= ~(1<<0); - - hlt_logical_cpus = disable; - return (error); -} - -static int -sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS) -{ - int allowed, error; - - allowed = hyperthreading_allowed; - error = sysctl_handle_int(oidp, &allowed, 0, req); - if (error || !req->newptr) - return (error); - -#ifdef SCHED_ULE - /* - * SCHED_ULE doesn't allow enabling/disabling HT cores at - * run-time. - */ - if (allowed != hyperthreading_allowed) - return (ENOTSUP); - return (error); -#endif - - if (allowed) - hlt_cpus_mask &= ~hyperthreading_cpus_mask; - else - hlt_cpus_mask |= hyperthreading_cpus_mask; - - if (logical_cpus_mask != 0 && - (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask) - hlt_logical_cpus = 1; - else - hlt_logical_cpus = 0; - - if ((hlt_cpus_mask & all_cpus) == all_cpus) - hlt_cpus_mask &= ~(1<<0); - - hyperthreading_allowed = allowed; - return (error); -} - -static void -cpu_hlt_setup(void *dummy __unused) -{ - - if (logical_cpus_mask != 0) { - TUNABLE_INT_FETCH("machdep.hlt_logical_cpus", - &hlt_logical_cpus); - sysctl_ctx_init(&logical_cpu_clist); - SYSCTL_ADD_PROC(&logical_cpu_clist, - SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, - "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0, - sysctl_hlt_logical_cpus, "IU", ""); - SYSCTL_ADD_UINT(&logical_cpu_clist, - SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, - "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD, - &logical_cpus_mask, 0, ""); - - if (hlt_logical_cpus) - hlt_cpus_mask |= logical_cpus_mask; - - /* - * If necessary for security purposes, force - * hyperthreading off, regardless of the value - * of hlt_logical_cpus. - */ - if (hyperthreading_cpus_mask) { - SYSCTL_ADD_PROC(&logical_cpu_clist, - SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, - "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW, - 0, 0, sysctl_hyperthreading_allowed, "IU", ""); - if (! hyperthreading_allowed) - hlt_cpus_mask |= hyperthreading_cpus_mask; - } - } -} -SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL); - -int -mp_grab_cpu_hlt(void) -{ - cpumask_t mask; -#ifdef MP_WATCHDOG - u_int cpuid; -#endif - int retval; - - mask = PCPU_GET(cpumask); -#ifdef MP_WATCHDOG - cpuid = PCPU_GET(cpuid); - ap_watchdog(cpuid); -#endif - - retval = 0; - while (mask & hlt_cpus_mask) { - retval = 1; - __asm __volatile("sti; hlt" : : : "memory"); - } - return (retval); -} - #ifdef COUNT_IPIS /* * Setup interrupt counters for IPI handlers. diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index d10bbe5..3f9248d 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -125,6 +125,8 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #ifdef SMP #include <sys/smp.h> +#else +#include <sys/cpuset.h> #endif #include <vm/vm.h> @@ -386,7 +388,7 @@ pmap_bootstrap(vm_paddr_t firstaddr) kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); #endif kernel_pmap->pm_root = NULL; - kernel_pmap->pm_active = -1; /* don't allow deactivation */ + CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); LIST_INIT(&allpmaps); @@ -930,19 +932,20 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde) void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invlpg(va); smp_invlpg(va); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invlpg(va); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg(pmap->pm_active & other_cpus, va); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg(other_cpus, va); } sched_unpin(); } @@ -950,23 +953,23 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; vm_offset_t addr; sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); smp_invlpg_range(sva, eva); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg_range(pmap->pm_active & other_cpus, - sva, eva); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg_range(other_cpus, sva, eva); } sched_unpin(); } @@ -974,19 +977,20 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) void pmap_invalidate_all(pmap_t pmap) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invltlb(); smp_invltlb(); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invltlb(); - if (pmap->pm_active & other_cpus) - smp_masked_invltlb(pmap->pm_active & other_cpus); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invltlb(other_cpus); } sched_unpin(); } @@ -1002,8 +1006,8 @@ pmap_invalidate_cache(void) } struct pde_action { - cpumask_t store; /* processor that updates the PDE */ - cpumask_t invalidate; /* processors that invalidate their TLB */ + cpuset_t store; /* processor that updates the PDE */ + cpuset_t invalidate; /* processors that invalidate their TLB */ vm_offset_t va; pd_entry_t *pde; pd_entry_t newpde; @@ -1016,7 +1020,10 @@ pmap_update_pde_kernel(void *arg) pd_entry_t *pde; pmap_t pmap; - if (act->store == PCPU_GET(cpumask)) + sched_pin(); + if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) { + sched_unpin(); + /* * Elsewhere, this operation requires allpmaps_lock for * synchronization. Here, it does not because it is being @@ -1026,6 +1033,8 @@ pmap_update_pde_kernel(void *arg) pde = pmap_pde(pmap, act->va); pde_store(pde, act->newpde); } + } else + sched_unpin(); } static void @@ -1033,8 +1042,12 @@ pmap_update_pde_user(void *arg) { struct pde_action *act = arg; - if (act->store == PCPU_GET(cpumask)) + sched_pin(); + if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) { + sched_unpin(); pde_store(act->pde, act->newpde); + } else + sched_unpin(); } static void @@ -1042,8 +1055,12 @@ pmap_update_pde_teardown(void *arg) { struct pde_action *act = arg; - if ((act->invalidate & PCPU_GET(cpumask)) != 0) + sched_pin(); + if (CPU_OVERLAP(&act->invalidate, PCPU_PTR(cpumask))) { + sched_unpin(); pmap_update_pde_invalidate(act->va, act->newpde); + } else + sched_unpin(); } /* @@ -1058,21 +1075,23 @@ static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { struct pde_action act; - cpumask_t active, cpumask; + cpuset_t active, cpumask, other_cpus; sched_pin(); cpumask = PCPU_GET(cpumask); + other_cpus = PCPU_GET(other_cpus); if (pmap == kernel_pmap) active = all_cpus; else active = pmap->pm_active; - if ((active & PCPU_GET(other_cpus)) != 0) { + if (CPU_OVERLAP(&active, &other_cpus)) { act.store = cpumask; act.invalidate = active; act.va = va; act.pde = pde; act.newpde = newpde; - smp_rendezvous_cpus(cpumask | active, + CPU_OR(&cpumask, &active); + smp_rendezvous_cpus(cpumask, smp_no_rendevous_barrier, pmap == kernel_pmap ? pmap_update_pde_kernel : pmap_update_pde_user, pmap_update_pde_teardown, &act); @@ -1081,7 +1100,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) pmap_kenter_pde(va, newpde); else pde_store(pde, newpde); - if ((active & cpumask) != 0) + if (CPU_OVERLAP(&active, &cpumask)) pmap_update_pde_invalidate(va, newpde); } sched_unpin(); @@ -1095,7 +1114,7 @@ PMAP_INLINE void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invlpg(va); } @@ -1104,7 +1123,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t addr; - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); } @@ -1113,7 +1132,7 @@ PMAP_INLINE void pmap_invalidate_all(pmap_t pmap) { - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invltlb(); } @@ -1132,7 +1151,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) pmap_kenter_pde(va, newpde); else pde_store(pde, newpde); - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) pmap_update_pde_invalidate(va, newpde); } #endif /* !SMP */ @@ -1689,7 +1708,7 @@ pmap_pinit0(pmap_t pmap) pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); #endif pmap->pm_root = NULL; - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1770,7 +1789,7 @@ pmap_pinit(pmap_t pmap) #endif } - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1886,7 +1905,7 @@ retry: * Deal with a SMP shootdown of other users of the pmap that we are * trying to dispose of. This can be a bit hairy. */ -static cpumask_t *lazymask; +static cpuset_t *lazymask; static u_int lazyptd; static volatile u_int lazywait; @@ -1895,36 +1914,42 @@ void pmap_lazyfix_action(void); void pmap_lazyfix_action(void) { - cpumask_t mymask = PCPU_GET(cpumask); #ifdef COUNT_IPIS (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; #endif if (rcr3() == lazyptd) load_cr3(PCPU_GET(curpcb)->pcb_cr3); - atomic_clear_int(lazymask, mymask); + CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask); atomic_store_rel_int(&lazywait, 1); } static void -pmap_lazyfix_self(cpumask_t mymask) +pmap_lazyfix_self(cpuset_t mymask) { if (rcr3() == lazyptd) load_cr3(PCPU_GET(curpcb)->pcb_cr3); - atomic_clear_int(lazymask, mymask); + CPU_NAND_ATOMIC(lazymask, &mymask); } static void pmap_lazyfix(pmap_t pmap) { - cpumask_t mymask, mask; + cpuset_t mymask, mask; u_int spins; + int lsb; - while ((mask = pmap->pm_active) != 0) { + mask = pmap->pm_active; + while (!CPU_EMPTY(&mask)) { spins = 50000000; - mask = mask & -mask; /* Find least significant set bit */ + + /* Find least significant set bit. */ + lsb = cpusetobj_ffs(&mask); + MPASS(lsb != 0); + lsb--; + CPU_SETOF(lsb, &mask); mtx_lock_spin(&smp_ipi_mtx); #ifdef PAE lazyptd = vtophys(pmap->pm_pdpt); @@ -1932,7 +1957,7 @@ pmap_lazyfix(pmap_t pmap) lazyptd = vtophys(pmap->pm_pdir); #endif mymask = PCPU_GET(cpumask); - if (mask == mymask) { + if (!CPU_CMP(&mask, &mymask)) { lazymask = &pmap->pm_active; pmap_lazyfix_self(mymask); } else { @@ -1949,6 +1974,7 @@ pmap_lazyfix(pmap_t pmap) mtx_unlock_spin(&smp_ipi_mtx); if (spins == 0) printf("pmap_lazyfix: spun for 50000000\n"); + mask = pmap->pm_active; } } @@ -1968,7 +1994,7 @@ pmap_lazyfix(pmap_t pmap) cr3 = vtophys(pmap->pm_pdir); if (cr3 == rcr3()) { load_cr3(PCPU_GET(curpcb)->pcb_cr3); - pmap->pm_active &= ~(PCPU_GET(cpumask)); + CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active); } } #endif /* SMP */ @@ -5078,11 +5104,11 @@ pmap_activate(struct thread *td) pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); #if defined(SMP) - atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); - atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); + CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask)); #else - oldpmap->pm_active &= ~1; - pmap->pm_active |= 1; + CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask)); #endif #ifdef PAE cr3 = vtophys(pmap->pm_pdpt); diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index 232e1a1..a084e09 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -573,11 +573,13 @@ kvtop(void *addr) static void cpu_reset_proxy() { + cpuset_t tcrp; cpu_reset_proxy_active = 1; while (cpu_reset_proxy_active == 1) ; /* Wait for other cpu to see that we've started */ - stop_cpus((1<<cpu_reset_proxyid)); + CPU_SETOF(cpu_reset_proxyid, &tcrp); + stop_cpus(tcrp); printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid); DELAY(1000000); cpu_reset_real(); @@ -596,25 +598,29 @@ cpu_reset() #endif #ifdef SMP - cpumask_t map; + cpuset_t map; u_int cnt; if (smp_active) { - map = PCPU_GET(other_cpus) & ~stopped_cpus; - if (map != 0) { + sched_pin(); + map = PCPU_GET(other_cpus); + CPU_NAND(&map, &stopped_cpus); + if (!CPU_EMPTY(&map)) { printf("cpu_reset: Stopping other CPUs\n"); stop_cpus(map); } if (PCPU_GET(cpuid) != 0) { cpu_reset_proxyid = PCPU_GET(cpuid); + sched_unpin(); cpustop_restartfunc = cpu_reset_proxy; cpu_reset_proxy_active = 0; printf("cpu_reset: Restarting BSP\n"); /* Restart CPU #0. */ /* XXX: restart_cpus(1 << 0); */ - atomic_store_rel_int(&started_cpus, (1 << 0)); + CPU_SETOF(0, &started_cpus); + wmb(); cnt = 0; while (cpu_reset_proxy_active == 0 && cnt < 10000000) @@ -626,7 +632,8 @@ cpu_reset() while (1); /* NOTREACHED */ - } + } else + sched_unpin(); DELAY(1000000); } @@ -795,7 +802,7 @@ sf_buf_alloc(struct vm_page *m, int flags) struct sf_head *hash_list; struct sf_buf *sf; #ifdef SMP - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; #endif int error; @@ -867,22 +874,23 @@ sf_buf_alloc(struct vm_page *m, int flags) */ #ifdef SMP if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) - sf->cpumask = 0; + CPU_ZERO(&sf->cpumask); shootdown: sched_pin(); cpumask = PCPU_GET(cpumask); - if ((sf->cpumask & cpumask) == 0) { - sf->cpumask |= cpumask; + if (!CPU_OVERLAP(&cpumask, &sf->cpumask)) { + CPU_OR(&sf->cpumask, &cpumask); invlpg(sf->kva); } if ((flags & SFB_CPUPRIVATE) == 0) { - other_cpus = PCPU_GET(other_cpus) & ~sf->cpumask; - if (other_cpus != 0) { - sf->cpumask |= other_cpus; + other_cpus = PCPU_GET(other_cpus); + CPU_NAND(&other_cpus, &sf->cpumask); + if (!CPU_EMPTY(&other_cpus)) { + CPU_OR(&sf->cpumask, &other_cpus); smp_masked_invlpg(other_cpus, sf->kva); } } - sched_unpin(); + sched_unpin(); #else if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) pmap_invalidate_page(kernel_pmap, sf->kva); diff --git a/sys/i386/include/_types.h b/sys/i386/include/_types.h index 7a969fe..3194fd6 100644 --- a/sys/i386/include/_types.h +++ b/sys/i386/include/_types.h @@ -69,7 +69,6 @@ typedef unsigned long long __uint64_t; * Standard type definitions. */ typedef unsigned long __clock_t; /* clock()... */ -typedef unsigned int __cpumask_t; typedef __int32_t __critical_t; typedef long double __double_t; typedef long double __float_t; diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h index eeada2e..3012a00 100644 --- a/sys/i386/include/pmap.h +++ b/sys/i386/include/pmap.h @@ -155,6 +155,7 @@ #ifndef LOCORE #include <sys/queue.h> +#include <sys/_cpuset.h> #include <sys/_lock.h> #include <sys/_mutex.h> @@ -433,7 +434,7 @@ struct pmap { struct mtx pm_mtx; pd_entry_t *pm_pdir; /* KVA of page directory */ TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ - cpumask_t pm_active; /* active on cpus */ + cpuset_t pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statistics */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ #ifdef PAE diff --git a/sys/i386/include/sf_buf.h b/sys/i386/include/sf_buf.h index 7bc1095..415dcbb 100644 --- a/sys/i386/include/sf_buf.h +++ b/sys/i386/include/sf_buf.h @@ -29,6 +29,7 @@ #ifndef _MACHINE_SF_BUF_H_ #define _MACHINE_SF_BUF_H_ +#include <sys/_cpuset.h> #include <sys/queue.h> struct vm_page; @@ -40,7 +41,7 @@ struct sf_buf { vm_offset_t kva; /* va of mapping */ int ref_count; /* usage of this mapping */ #ifdef SMP - cpumask_t cpumask; /* cpus on which mapping is valid */ + cpuset_t cpumask; /* cpus on which mapping is valid */ #endif }; diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h index d364cd9..04d67c9 100644 --- a/sys/i386/include/smp.h +++ b/sys/i386/include/smp.h @@ -66,17 +66,16 @@ void ipi_bitmap_handler(struct trapframe frame); #endif void ipi_cpu(int cpu, u_int ipi); int ipi_nmi_handler(void); -void ipi_selected(cpumask_t cpus, u_int ipi); +void ipi_selected(cpuset_t cpus, u_int ipi); u_int mp_bootaddress(u_int); -int mp_grab_cpu_hlt(void); void smp_cache_flush(void); void smp_invlpg(vm_offset_t addr); -void smp_masked_invlpg(cpumask_t mask, vm_offset_t addr); +void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr); void smp_invlpg_range(vm_offset_t startva, vm_offset_t endva); -void smp_masked_invlpg_range(cpumask_t mask, vm_offset_t startva, +void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva, vm_offset_t endva); void smp_invltlb(void); -void smp_masked_invltlb(cpumask_t mask); +void smp_masked_invltlb(cpuset_t mask); #ifdef XEN void ipi_to_irq_init(void); diff --git a/sys/i386/pci/pci_cfgreg.c b/sys/i386/pci/pci_cfgreg.c index ae56990..ac641a8 100644 --- a/sys/i386/pci/pci_cfgreg.c +++ b/sys/i386/pci/pci_cfgreg.c @@ -553,7 +553,7 @@ pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus) (uintmax_t)base); #ifdef SMP - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) #endif { diff --git a/sys/i386/xen/mp_machdep.c b/sys/i386/xen/mp_machdep.c index 2919570..2d05596 100644 --- a/sys/i386/xen/mp_machdep.c +++ b/sys/i386/xen/mp_machdep.c @@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <sys/bus.h> #include <sys/cons.h> /* cngetc() */ +#include <sys/cpuset.h> #ifdef GPROF #include <sys/gmon.h> #endif @@ -116,7 +117,7 @@ volatile int smp_tlb_wait; typedef void call_data_func_t(uintptr_t , uintptr_t); static u_int logical_cpus; -static volatile cpumask_t ipi_nmi_pending; +static volatile cpuset_t ipi_nmi_pending; /* used to hold the AP's until we are ready to release them */ static struct mtx ap_boot_mtx; @@ -149,7 +150,7 @@ static int start_ap(int apic_id); static void release_aps(void *dummy); static u_int hyperthreading_cpus; -static cpumask_t hyperthreading_cpus_mask; +static cpuset_t hyperthreading_cpus_mask; extern void Xhypervisor_callback(void); extern void failsafe_callback(void); @@ -239,7 +240,7 @@ cpu_mp_probe(void) * Always record BSP in CPU map so that the mbuf init code works * correctly. */ - all_cpus = 1; + CPU_SETOF(0, &all_cpus); if (mp_ncpus == 0) { /* * No CPUs were found, so this must be a UP system. Setup @@ -293,7 +294,8 @@ cpu_mp_start(void) start_all_aps(); /* Setup the initial logical CPUs info. */ - logical_cpus = logical_cpus_mask = 0; + logical_cpus = 0; + CPU_ZERO(&logical_cpus_mask); if (cpu_feature & CPUID_HTT) logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; @@ -521,6 +523,7 @@ xen_smp_intr_init_cpus(void *unused) void init_secondary(void) { + cpuset_t tcpuset, tallcpus; vm_offset_t addr; int gsel_tss; @@ -600,18 +603,21 @@ init_secondary(void) CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); + tcpuset = PCPU_GET(cpumask); /* Determine if we are a logical CPU. */ if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) - logical_cpus_mask |= PCPU_GET(cpumask); + CPU_OR(&logical_cpus_mask, &tcpuset); /* Determine if we are a hyperthread. */ if (hyperthreading_cpus > 1 && PCPU_GET(apic_id) % hyperthreading_cpus != 0) - hyperthreading_cpus_mask |= PCPU_GET(cpumask); + CPU_OR(&hyperthreading_cpus_mask, &tcpuset); /* Build our map of 'other' CPUs. */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + CPU_NAND(&tallcpus, &tcpuset); + PCPU_SET(other_cpus, tallcpus); #if 0 if (bootverbose) lapic_dump("AP"); @@ -725,6 +731,7 @@ assign_cpu_ids(void) int start_all_aps(void) { + cpuset_t tallcpus; int x,apic_id, cpu; struct pcpu *pc; @@ -778,12 +785,14 @@ start_all_aps(void) panic("bye-bye"); } - all_cpus |= (1 << cpu); /* record AP in CPU map */ + CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } /* build our map of 'other' CPUs */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + CPU_NAND(&tallcpus, PCPU_PTR(cpumask)); + PCPU_SET(other_cpus, tallcpus); pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); @@ -1012,29 +1021,20 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) } static void -smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) +smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) { - int ncpu, othercpus; + int cpu, ncpu, othercpus; struct _call_data data; othercpus = mp_ncpus - 1; - if (mask == (u_int)-1) { - ncpu = othercpus; - if (ncpu < 1) + if (CPU_ISFULLSET(&mask)) { + if (othercpus < 1) return; } else { - mask &= ~PCPU_GET(cpumask); - if (mask == 0) - return; - ncpu = bitcount32(mask); - if (ncpu > othercpus) { - /* XXX this should be a panic offence */ - printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", - ncpu, othercpus); - ncpu = othercpus; - } - /* XXX should be a panic, implied by mask == 0 above */ - if (ncpu < 1) + critical_enter(); + CPU_NAND(&mask, PCPU_PTR(cpumask)); + critical_exit(); + if (CPU_EMPTY(&mask)) return; } if (!(read_eflags() & PSL_I)) @@ -1046,10 +1046,20 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o call_data->arg1 = addr1; call_data->arg2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); - if (mask == (u_int)-1) + if (CPU_ISFULLSET(&mask)) { + ncpu = othercpus; ipi_all_but_self(vector); - else - ipi_selected(mask, vector); + } else { + ncpu = 0; + while ((cpu = cpusetobj_ffs(&mask)) != 0) { + cpu--; + CPU_CLR(cpu, &mask); + CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, + vector); + ipi_send_cpu(cpu, vector); + ncpu++; + } + } while (smp_tlb_wait < ncpu) ia32_pause(); call_data = NULL; @@ -1092,7 +1102,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) } void -smp_masked_invltlb(cpumask_t mask) +smp_masked_invltlb(cpuset_t mask) { if (smp_started) { @@ -1101,7 +1111,7 @@ smp_masked_invltlb(cpumask_t mask) } void -smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) +smp_masked_invlpg(cpuset_t mask, vm_offset_t addr) { if (smp_started) { @@ -1110,7 +1120,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) } void -smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2) +smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { @@ -1122,7 +1132,7 @@ smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2) * send an IPI to a set of cpus. */ void -ipi_selected(cpumask_t cpus, u_int ipi) +ipi_selected(cpuset_t cpus, u_int ipi) { int cpu; @@ -1132,11 +1142,11 @@ ipi_selected(cpumask_t cpus, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, cpus); + CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); - while ((cpu = ffs(cpus)) != 0) { + while ((cpu = cpusetobj_ffs(&cpus)) != 0) { cpu--; - cpus &= ~(1 << cpu); + CPU_CLR(cpu, &cpus); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); } @@ -1155,7 +1165,7 @@ ipi_cpu(int cpu, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, 1 << cpu); + CPU_SET_ATOMIC(cpu, &ipi_nmi_pending); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); @@ -1167,23 +1177,27 @@ ipi_cpu(int cpu, u_int ipi) void ipi_all_but_self(u_int ipi) { + cpuset_t other_cpus; /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit * of help in order to understand what is the source. * Set the mask of receiving CPUs for this purpose. */ + sched_pin(); + other_cpus = PCPU_GET(other_cpus); + sched_unpin(); if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus)); + CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); - ipi_selected(PCPU_GET(other_cpus), ipi); + ipi_selected(other_cpus, ipi); } int ipi_nmi_handler() { - cpumask_t cpumask; + cpuset_t cpumask; /* * As long as there is not a simple way to know about a NMI's @@ -1191,11 +1205,13 @@ ipi_nmi_handler() * the global pending bitword an IPI_STOP_HARD has been issued * and should be handled. */ + sched_pin(); cpumask = PCPU_GET(cpumask); - if ((ipi_nmi_pending & cpumask) == 0) + sched_unpin(); + if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask)) return (1); - atomic_clear_int(&ipi_nmi_pending, cpumask); + CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask); cpustop_handler(); return (0); } @@ -1207,20 +1223,25 @@ ipi_nmi_handler() void cpustop_handler(void) { - int cpu = PCPU_GET(cpuid); - int cpumask = PCPU_GET(cpumask); + cpuset_t cpumask; + int cpu; + + sched_pin(); + cpumask = PCPU_GET(cpumask); + cpu = PCPU_GET(cpuid); + sched_unpin(); savectx(&stoppcbs[cpu]); /* Indicate that we are stopped */ - atomic_set_int(&stopped_cpus, cpumask); + CPU_OR_ATOMIC(&stopped_cpus, &cpumask); /* Wait for restart */ - while (!(started_cpus & cpumask)) + while (!CPU_OVERLAP(&started_cpus, &cpumask)) ia32_pause(); - atomic_clear_int(&started_cpus, cpumask); - atomic_clear_int(&stopped_cpus, cpumask); + CPU_NAND_ATOMIC(&started_cpus, &cpumask); + CPU_NAND_ATOMIC(&stopped_cpus, &cpumask); if (cpu == 0 && cpustop_restartfunc != NULL) { cpustop_restartfunc(); diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c index eb3c803..3efa4f1 100644 --- a/sys/i386/xen/pmap.c +++ b/sys/i386/xen/pmap.c @@ -422,7 +422,7 @@ pmap_bootstrap(vm_paddr_t firstaddr) #ifdef PAE kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); #endif - kernel_pmap->pm_active = -1; /* don't allow deactivation */ + CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); LIST_INIT(&allpmaps); mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); @@ -802,22 +802,23 @@ pmap_cache_bits(int mode, boolean_t is_pde) void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x", pmap, va); sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invlpg(va); smp_invlpg(va); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invlpg(va); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg(pmap->pm_active & other_cpus, va); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg(other_cpus, va); } sched_unpin(); PT_UPDATES_FLUSH(); @@ -826,26 +827,26 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; vm_offset_t addr; CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x", pmap, sva, eva); sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); smp_invlpg_range(sva, eva); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg_range(pmap->pm_active & other_cpus, - sva, eva); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg_range(other_cpus, sva, eva); } sched_unpin(); PT_UPDATES_FLUSH(); @@ -854,21 +855,22 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) void pmap_invalidate_all(pmap_t pmap) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap); sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invltlb(); smp_invltlb(); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invltlb(); - if (pmap->pm_active & other_cpus) - smp_masked_invltlb(pmap->pm_active & other_cpus); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invltlb(other_cpus); } sched_unpin(); } @@ -893,7 +895,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x", pmap, va); - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invlpg(va); PT_UPDATES_FLUSH(); } @@ -907,7 +909,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) CTR3(KTR_PMAP, "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x", pmap, sva, eva); - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); PT_UPDATES_FLUSH(); @@ -919,7 +921,7 @@ pmap_invalidate_all(pmap_t pmap) CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap); - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invltlb(); } @@ -1449,7 +1451,7 @@ pmap_pinit0(pmap_t pmap) #ifdef PAE pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); #endif - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1556,7 +1558,7 @@ pmap_pinit(pmap_t pmap) } xen_flush_queue(); vm_page_unlock_queues(); - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1686,7 +1688,7 @@ retry: * Deal with a SMP shootdown of other users of the pmap that we are * trying to dispose of. This can be a bit hairy. */ -static cpumask_t *lazymask; +static cpuset_t *lazymask; static u_int lazyptd; static volatile u_int lazywait; @@ -1695,36 +1697,42 @@ void pmap_lazyfix_action(void); void pmap_lazyfix_action(void) { - cpumask_t mymask = PCPU_GET(cpumask); #ifdef COUNT_IPIS (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; #endif if (rcr3() == lazyptd) load_cr3(PCPU_GET(curpcb)->pcb_cr3); - atomic_clear_int(lazymask, mymask); + CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask); atomic_store_rel_int(&lazywait, 1); } static void -pmap_lazyfix_self(cpumask_t mymask) +pmap_lazyfix_self(cpuset_t mymask) { if (rcr3() == lazyptd) load_cr3(PCPU_GET(curpcb)->pcb_cr3); - atomic_clear_int(lazymask, mymask); + CPU_NAND_ATOMIC(lazymask, &mymask); } static void pmap_lazyfix(pmap_t pmap) { - cpumask_t mymask, mask; + cpuset_t mymask, mask; u_int spins; + int lsb; - while ((mask = pmap->pm_active) != 0) { + mask = pmap->pm_active; + while (!CPU_EMPTY(&mask)) { spins = 50000000; - mask = mask & -mask; /* Find least significant set bit */ + + /* Find least significant set bit. */ + lsb = cpusetobj_ffs(&mask); + MPASS(lsb != 0); + lsb--; + CPU_SETOF(lsb, &mask); mtx_lock_spin(&smp_ipi_mtx); #ifdef PAE lazyptd = vtophys(pmap->pm_pdpt); @@ -1732,7 +1740,7 @@ pmap_lazyfix(pmap_t pmap) lazyptd = vtophys(pmap->pm_pdir); #endif mymask = PCPU_GET(cpumask); - if (mask == mymask) { + if (!CPU_CMP(&mask, &mymask)) { lazymask = &pmap->pm_active; pmap_lazyfix_self(mymask); } else { @@ -1749,6 +1757,7 @@ pmap_lazyfix(pmap_t pmap) mtx_unlock_spin(&smp_ipi_mtx); if (spins == 0) printf("pmap_lazyfix: spun for 50000000\n"); + mask = pmap->pm_active; } } @@ -1768,7 +1777,7 @@ pmap_lazyfix(pmap_t pmap) cr3 = vtophys(pmap->pm_pdir); if (cr3 == rcr3()) { load_cr3(PCPU_GET(curpcb)->pcb_cr3); - pmap->pm_active &= ~(PCPU_GET(cpumask)); + CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active); } } #endif /* SMP */ @@ -4123,11 +4132,11 @@ pmap_activate(struct thread *td) pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); #if defined(SMP) - atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); - atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); + CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask)); #else - oldpmap->pm_active &= ~1; - pmap->pm_active |= 1; + CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask)); #endif #ifdef PAE cr3 = vtophys(pmap->pm_pdpt); diff --git a/sys/ia64/acpica/acpi_machdep.c b/sys/ia64/acpica/acpi_machdep.c index b7b612f..1466cfe 100644 --- a/sys/ia64/acpica/acpi_machdep.c +++ b/sys/ia64/acpica/acpi_machdep.c @@ -56,7 +56,14 @@ acpi_machdep_quirks(int *quirks) void acpi_cpu_c1() { +#ifdef INVARIANTS + register_t ie; + + ie = intr_disable(); + KASSERT(ie == 0, ("%s called with interrupts enabled\n", __func__)); +#endif ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0); + ia64_enable_intr(); } void * diff --git a/sys/ia64/ia64/busdma_machdep.c b/sys/ia64/ia64/busdma_machdep.c index 7554d31..d7152df 100644 --- a/sys/ia64/ia64/busdma_machdep.c +++ b/sys/ia64/ia64/busdma_machdep.c @@ -51,21 +51,21 @@ __FBSDID("$FreeBSD$"); #define MAX_BPAGES 1024 struct bus_dma_tag { - bus_dma_tag_t parent; - bus_size_t alignment; - bus_size_t boundary; - bus_addr_t lowaddr; - bus_addr_t highaddr; + bus_dma_tag_t parent; + bus_size_t alignment; + bus_size_t boundary; + bus_addr_t lowaddr; + bus_addr_t highaddr; bus_dma_filter_t *filter; - void *filterarg; - bus_size_t maxsize; - u_int nsegments; - bus_size_t maxsegsz; - int flags; - int ref_count; - int map_count; - bus_dma_lock_t *lockfunc; - void *lockfuncarg; + void *filterarg; + bus_size_t maxsize; + u_int nsegments; + bus_size_t maxsegsz; + int flags; + int ref_count; + int map_count; + bus_dma_lock_t *lockfunc; + void *lockfuncarg; bus_dma_segment_t *segments; }; @@ -90,27 +90,27 @@ static int total_deferred; SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); SYSCTL_INT(_hw_busdma, OID_AUTO, free_bpages, CTLFLAG_RD, &free_bpages, 0, - "Free bounce pages"); + "Free bounce pages"); SYSCTL_INT(_hw_busdma, OID_AUTO, reserved_bpages, CTLFLAG_RD, &reserved_bpages, - 0, "Reserved bounce pages"); + 0, "Reserved bounce pages"); SYSCTL_INT(_hw_busdma, OID_AUTO, active_bpages, CTLFLAG_RD, &active_bpages, 0, - "Active bounce pages"); + "Active bounce pages"); SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, - "Total bounce pages"); + "Total bounce pages"); SYSCTL_INT(_hw_busdma, OID_AUTO, total_bounced, CTLFLAG_RD, &total_bounced, 0, - "Total bounce requests"); -SYSCTL_INT(_hw_busdma, OID_AUTO, total_deferred, CTLFLAG_RD, &total_deferred, 0, - "Total bounce requests that were deferred"); + "Total bounce requests"); +SYSCTL_INT(_hw_busdma, OID_AUTO, total_deferred, CTLFLAG_RD, &total_deferred, + 0, "Total bounce requests that were deferred"); struct bus_dmamap { - struct bp_list bpages; - int pagesneeded; - int pagesreserved; - bus_dma_tag_t dmat; - void *buf; /* unmapped buffer pointer */ - bus_size_t buflen; /* unmapped buffer length */ + struct bp_list bpages; + int pagesneeded; + int pagesreserved; + bus_dma_tag_t dmat; + void *buf; /* unmapped buffer pointer */ + bus_size_t buflen; /* unmapped buffer length */ bus_dmamap_callback_t *callback; - void *callback_arg; + void *callback_arg; STAILQ_ENTRY(bus_dmamap) links; }; @@ -121,12 +121,12 @@ static struct bus_dmamap nobounce_dmamap; static void init_bounce_pages(void *dummy); static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, - int commit); + int commit); static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, - vm_offset_t vaddr, bus_size_t size); + vm_offset_t vaddr, bus_size_t size); static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); static __inline int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr, - bus_size_t len); + bus_size_t len); /* * Return true if a match is made. @@ -144,16 +144,14 @@ run_filter(bus_dma_tag_t dmat, bus_addr_t paddr, bus_size_t len) retval = 0; bndy = dmat->boundary; - do { - if (((paddr > dmat->lowaddr && paddr <= dmat->highaddr) - || ((paddr & (dmat->alignment - 1)) != 0) - || ((paddr & bndy) != ((paddr + len) & bndy))) - && (dmat->filter == NULL - || (*dmat->filter)(dmat->filterarg, paddr) != 0)) + if (((paddr > dmat->lowaddr && paddr <= dmat->highaddr) || + (paddr & (dmat->alignment - 1)) != 0 || + (paddr & bndy) != ((paddr + len) & bndy)) && + (dmat->filter == NULL || + (*dmat->filter)(dmat->filterarg, paddr) != 0)) retval = 1; - - dmat = dmat->parent; + dmat = dmat->parent; } while (retval == 0 && dmat != NULL); return (retval); } @@ -195,16 +193,16 @@ dflt_lock(void *arg, bus_dma_lock_op_t op) } #define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 + /* * Allocate a device specific dma_tag. */ int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, - bus_size_t boundary, bus_addr_t lowaddr, - bus_addr_t highaddr, bus_dma_filter_t *filter, - void *filterarg, bus_size_t maxsize, int nsegments, - bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, - void *lockfuncarg, bus_dma_tag_t *dmat) + bus_size_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, + bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, + int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, + void *lockfuncarg, bus_dma_tag_t *dmat) { bus_dma_tag_t newtag; int error = 0; @@ -250,7 +248,7 @@ bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, newtag->boundary = parent->boundary; else if (parent->boundary != 0) newtag->boundary = MIN(parent->boundary, - newtag->boundary); + newtag->boundary); if (newtag->filter == NULL) { /* * Short circuit looking at our parent directly @@ -279,7 +277,7 @@ bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, /* Performed initial allocation */ newtag->flags |= BUS_DMA_MIN_ALLOC_COMP; } - + if (error != 0) { free(newtag, M_DEVBUF); } else { @@ -347,7 +345,7 @@ bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) int maxpages; *mapp = (bus_dmamap_t)malloc(sizeof(**mapp), M_DEVBUF, - M_NOWAIT | M_ZERO); + M_NOWAIT | M_ZERO); if (*mapp == NULL) return (ENOMEM); @@ -408,7 +406,7 @@ bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) */ int bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, - bus_dmamap_t *mapp) + bus_dmamap_t *mapp) { int mflags; @@ -430,7 +428,7 @@ bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, if (flags & BUS_DMA_ZERO) mflags |= M_ZERO; - /* + /* * XXX: * (dmat->alignment < dmat->maxsize) is just a quick hack; the exact * alignment guarantees of malloc need to be nailed down, and the @@ -489,15 +487,9 @@ bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) * first indicates if this is the first invocation of this function. */ static int -_bus_dmamap_load_buffer(bus_dma_tag_t dmat, - bus_dmamap_t map, - void *buf, bus_size_t buflen, - struct thread *td, - int flags, - bus_addr_t *lastaddrp, - bus_dma_segment_t *segs, - int *segp, - int first) +_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, + bus_size_t buflen, struct thread *td, int flags, bus_addr_t *lastaddrp, + bus_dma_segment_t *segs, int *segp, int first) { bus_size_t sgsize; bus_addr_t curaddr, lastaddr, baddr, bmask; @@ -607,7 +599,7 @@ _bus_dmamap_load_buffer(bus_dma_tag_t dmat, if (curaddr == lastaddr && (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && (dmat->boundary == 0 || - (segs[seg].ds_addr & bmask) == (curaddr & bmask))) + (segs[seg].ds_addr & bmask) == (curaddr & bmask))) segs[seg].ds_len += sgsize; else { if (++seg >= dmat->nsegments) @@ -636,11 +628,11 @@ _bus_dmamap_load_buffer(bus_dma_tag_t dmat, */ int bus_dmamap_load(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, - bus_size_t buflen, bus_dmamap_callback_t *callback, - void *callback_arg, int flags) + bus_size_t buflen, bus_dmamap_callback_t *callback, void *callback_arg, + int flags) { - bus_addr_t lastaddr = 0; - int error, nsegs = 0; + bus_addr_t lastaddr = 0; + int error, nsegs = 0; if (map != NULL) { flags |= BUS_DMA_WAITOK; @@ -666,10 +658,8 @@ bus_dmamap_load(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, * Like _bus_dmamap_load(), but for mbufs. */ int -bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, - struct mbuf *m0, - bus_dmamap_callback2_t *callback, void *callback_arg, - int flags) +bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m0, + bus_dmamap_callback2_t *callback, void *callback_arg, int flags) { int nsegs, error; @@ -686,9 +676,8 @@ bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, for (m = m0; m != NULL && error == 0; m = m->m_next) { if (m->m_len > 0) { error = _bus_dmamap_load_buffer(dmat, map, - m->m_data, m->m_len, - NULL, flags, &lastaddr, - dmat->segments, &nsegs, first); + m->m_data, m->m_len, NULL, flags, + &lastaddr, dmat->segments, &nsegs, first); first = 0; } } @@ -707,9 +696,8 @@ bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, } int -bus_dmamap_load_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map, - struct mbuf *m0, bus_dma_segment_t *segs, - int *nsegs, int flags) +bus_dmamap_load_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m0, + bus_dma_segment_t *segs, int *nsegs, int flags) { int error; @@ -726,9 +714,8 @@ bus_dmamap_load_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map, for (m = m0; m != NULL && error == 0; m = m->m_next) { if (m->m_len > 0) { error = _bus_dmamap_load_buffer(dmat, map, - m->m_data, m->m_len, - NULL, flags, &lastaddr, - segs, nsegs, first); + m->m_data, m->m_len, NULL, flags, + &lastaddr, segs, nsegs, first); first = 0; } } @@ -744,10 +731,8 @@ bus_dmamap_load_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map, * Like _bus_dmamap_load(), but for uios. */ int -bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map, - struct uio *uio, - bus_dmamap_callback2_t *callback, void *callback_arg, - int flags) +bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map, struct uio *uio, + bus_dmamap_callback2_t *callback, void *callback_arg, int flags) { bus_addr_t lastaddr; int nsegs, error, first, i; @@ -826,8 +811,7 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) if (op & BUS_DMASYNC_PREWRITE) { while (bpage != NULL) { bcopy((void *)bpage->datavaddr, - (void *)bpage->vaddr, - bpage->datacount); + (void *)bpage->vaddr, bpage->datacount); bpage = STAILQ_NEXT(bpage, links); } total_bounced++; @@ -836,8 +820,7 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) if (op & BUS_DMASYNC_POSTREAD) { while (bpage != NULL) { bcopy((void *)bpage->vaddr, - (void *)bpage->datavaddr, - bpage->datacount); + (void *)bpage->datavaddr, bpage->datacount); bpage = STAILQ_NEXT(bpage, links); } total_bounced++; @@ -870,15 +853,11 @@ alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) struct bounce_page *bpage; bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF, - M_NOWAIT | M_ZERO); - + M_NOWAIT | M_ZERO); if (bpage == NULL) break; bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF, - M_NOWAIT, 0ul, - dmat->lowaddr, - PAGE_SIZE, - dmat->boundary); + M_NOWAIT, 0ul, dmat->lowaddr, PAGE_SIZE, dmat->boundary); if (bpage->vaddr == 0) { free(bpage, M_DEVBUF); break; @@ -914,7 +893,7 @@ reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, - bus_size_t size) + bus_size_t size) { struct bounce_page *bpage; @@ -974,8 +953,8 @@ free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { if (reserve_bounce_pages(map->dmat, map, 1) == 0) { STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); - STAILQ_INSERT_TAIL(&bounce_map_callbacklist, - map, links); + STAILQ_INSERT_TAIL(&bounce_map_callbacklist, map, + links); busdma_swi_pending = 1; total_deferred++; swi_sched(vm_ih, 0); @@ -997,7 +976,7 @@ busdma_swi(void) dmat = map->dmat; (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_LOCK); bus_dmamap_load(map->dmat, map, map->buf, map->buflen, - map->callback, map->callback_arg, /*flags*/0); + map->callback, map->callback_arg, /*flags*/0); (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_UNLOCK); mtx_lock(&bounce_lock); } diff --git a/sys/ia64/ia64/machdep.c b/sys/ia64/ia64/machdep.c index 41d2211..f3105fc 100644 --- a/sys/ia64/ia64/machdep.c +++ b/sys/ia64/ia64/machdep.c @@ -232,6 +232,9 @@ identifycpu(void) case 0x00: model_name = "Montecito"; break; + case 0x01: + model_name = "Montvale"; + break; } break; } @@ -316,7 +319,7 @@ cpu_startup(void *dummy) /* * Create sysctl tree for per-CPU information. */ - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { snprintf(nodename, sizeof(nodename), "%u", pc->pc_cpuid); sysctl_ctx_init(&pc->pc_md.sysctl_ctx); pc->pc_md.sysctl_tree = SYSCTL_ADD_NODE(&pc->pc_md.sysctl_ctx, @@ -411,12 +414,34 @@ cpu_halt() void cpu_idle(int busy) { - struct ia64_pal_result res; + register_t ie; - if (cpu_idle_hook != NULL) +#if 0 + if (!busy) { + critical_enter(); + cpu_idleclock(); + } +#endif + + ie = intr_disable(); + KASSERT(ie != 0, ("%s called with interrupts disabled\n", __func__)); + + if (sched_runnable()) + ia64_enable_intr(); + else if (cpu_idle_hook != NULL) { (*cpu_idle_hook)(); - else - res = ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0); + /* The hook must enable interrupts! */ + } else { + ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0); + ia64_enable_intr(); + } + +#if 0 + if (!busy) { + cpu_activeclock(); + critical_exit(); + } +#endif } int @@ -644,9 +669,12 @@ calculate_frequencies(void) { struct ia64_sal_result sal; struct ia64_pal_result pal; + register_t ie; + ie = intr_disable(); sal = ia64_sal_entry(SAL_FREQ_BASE, 0, 0, 0, 0, 0, 0, 0); pal = ia64_call_pal_static(PAL_FREQ_RATIOS, 0, 0, 0); + intr_restore(ie); if (sal.sal_status == 0 && pal.pal_status == 0) { if (bootverbose) { @@ -761,6 +789,8 @@ ia64_init(void) ia64_sal_init(); calculate_frequencies(); + set_cputicker(ia64_get_itc, (u_long)itc_freq * 1000000, 0); + /* * Setup the PCPU data for the bootstrap processor. It is needed * by printf(). Also, since printf() has critical sections, we diff --git a/sys/ia64/ia64/mp_machdep.c b/sys/ia64/ia64/mp_machdep.c index 5804f8c..15afea0 100644 --- a/sys/ia64/ia64/mp_machdep.c +++ b/sys/ia64/ia64/mp_machdep.c @@ -139,18 +139,18 @@ ia64_ih_rndzvs(struct thread *td, u_int xiv, struct trapframe *tf) static u_int ia64_ih_stop(struct thread *td, u_int xiv, struct trapframe *tf) { - cpumask_t mybit; + cpuset_t mybit; PCPU_INC(md.stats.pcs_nstops); mybit = PCPU_GET(cpumask); savectx(PCPU_PTR(md.pcb)); - atomic_set_int(&stopped_cpus, mybit); - while ((started_cpus & mybit) == 0) + CPU_OR_ATOMIC(&stopped_cpus, &mybit); + while (!CPU_OVERLAP(&started_cpus, &mybit)) cpu_spinwait(); - atomic_clear_int(&started_cpus, mybit); - atomic_clear_int(&stopped_cpus, mybit); + CPU_NAND_ATOMIC(&started_cpus, &mybit); + CPU_NAND_ATOMIC(&stopped_cpus, &mybit); return (0); } @@ -286,7 +286,7 @@ cpu_mp_add(u_int acpi_id, u_int id, u_int eid) cpuid = (IA64_LID_GET_SAPIC_ID(ia64_get_lid()) == sapic_id) ? 0 : smp_cpus++; - KASSERT((all_cpus & (1UL << cpuid)) == 0, + KASSERT(!CPU_ISSET(cpuid, &all_cpus), ("%s: cpu%d already in CPU map", __func__, acpi_id)); if (cpuid != 0) { @@ -300,7 +300,7 @@ cpu_mp_add(u_int acpi_id, u_int id, u_int eid) pc->pc_acpi_id = acpi_id; pc->pc_md.lid = IA64_LID_SET_SAPIC_ID(sapic_id); - all_cpus |= (1UL << pc->pc_cpuid); + CPU_SET(pc->pc_cpuid, &all_cpus); } void @@ -357,9 +357,10 @@ cpu_mp_start() /* Keep 'em spinning until we unleash them... */ ia64_ap_state.as_spin = 1; - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { pc->pc_md.current_pmap = kernel_pmap; - pc->pc_other_cpus = all_cpus & ~pc->pc_cpumask; + pc->pc_other_cpus = all_cpus; + CPU_NAND(&pc->pc_other_cpus, &pc->pc_cpumask); /* The BSP is obviously running already. */ if (pc->pc_cpuid == 0) { pc->pc_md.awake = 1; @@ -424,7 +425,7 @@ cpu_mp_unleash(void *dummy) cpus = 0; smp_cpus = 0; - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { cpus++; if (pc->pc_md.awake) { kproc_create(ia64_store_mca_state, pc, NULL, 0, 0, @@ -458,12 +459,12 @@ cpu_mp_unleash(void *dummy) * send an IPI to a set of cpus. */ void -ipi_selected(cpumask_t cpus, int ipi) +ipi_selected(cpuset_t cpus, int ipi) { struct pcpu *pc; - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { - if (cpus & pc->pc_cpumask) + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { + if (CPU_OVERLAP(&cpus, &pc->pc_cpumask)) ipi_send(pc, ipi); } } @@ -486,7 +487,7 @@ ipi_all_but_self(int ipi) { struct pcpu *pc; - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { if (pc != pcpup) ipi_send(pc, ipi); } diff --git a/sys/ia64/ia64/pal.S b/sys/ia64/ia64/pal.S index 2f0d0da..2e3f4cd 100644 --- a/sys/ia64/ia64/pal.S +++ b/sys/ia64/ia64/pal.S @@ -38,43 +38,40 @@ ia64_pal_entry: .quad 0 * u_int64_t arg1, u_int64_t arg2, u_int64_t arg3) */ ENTRY(ia64_call_pal_static, 4) - - .regstk 4,5,0,0 + + .regstk 4,4,0,0 palret = loc0 entry = loc1 rpsave = loc2 pfssave = loc3 -psrsave = loc4 - alloc pfssave=ar.pfs,4,5,0,0 + alloc pfssave=ar.pfs,4,4,0,0 ;; mov rpsave=rp - movl entry=@gprel(ia64_pal_entry) + 1: mov palret=ip // for return address ;; add entry=entry,gp - mov psrsave=psr + add palret=2f-1b,palret // calculate return address mov r28=in0 // procedure number - ;; - ld8 entry=[entry] // read entry point mov r29=in1 // copy arguments mov r30=in2 mov r31=in3 ;; - mov b6=entry - add palret=2f-1b,palret // calculate return address - ;; + ld8 entry=[entry] // read entry point mov b0=palret - rsm psr.i // disable interrupts + ;; + mov b6=entry ;; br.cond.sptk b6 // call into firmware -2: mov psr.l=psrsave + ;; +2: mov rp=rpsave mov ar.pfs=pfssave ;; - srlz.d br.ret.sptk rp + ;; END(ia64_call_pal_static) /* diff --git a/sys/ia64/ia64/pmap.c b/sys/ia64/ia64/pmap.c index 5f10ad6..625d0af7 100644 --- a/sys/ia64/ia64/pmap.c +++ b/sys/ia64/ia64/pmap.c @@ -535,7 +535,7 @@ pmap_invalidate_page(vm_offset_t va) critical_enter(); vhpt_ofs = ia64_thash(va) - PCPU_GET(md.vhpt); tag = ia64_ttag(va); - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { pte = (struct ia64_lpte *)(pc->pc_md.vhpt + vhpt_ofs); atomic_cmpset_64(&pte->tag, tag, 1UL << 63); } @@ -559,10 +559,14 @@ pmap_invalidate_page(vm_offset_t va) } while (sem != tag); ia64_ptc_ga(va, PAGE_SHIFT << 2); + ia64_mf(); + ia64_srlz_i(); /* PTC.G leave exclusive */ atomic_store_rel_long(&pmap_ptc_g_sem, 0); + ia64_invala(); + intr_restore(is); critical_exit(); } diff --git a/sys/ia64/include/_types.h b/sys/ia64/include/_types.h index 8fc1be2..0c2f5cc 100644 --- a/sys/ia64/include/_types.h +++ b/sys/ia64/include/_types.h @@ -59,7 +59,6 @@ typedef unsigned long __uint64_t; * Standard type definitions. */ typedef __int32_t __clock_t; /* clock()... */ -typedef unsigned int __cpumask_t; typedef __int64_t __critical_t; typedef double __double_t; typedef float __float_t; diff --git a/sys/ia64/include/ia64_cpu.h b/sys/ia64/include/ia64_cpu.h index bb8284d..74e649b 100644 --- a/sys/ia64/include/ia64_cpu.h +++ b/sys/ia64/include/ia64_cpu.h @@ -266,7 +266,7 @@ ia64_ptc_e(uint64_t v) static __inline void ia64_ptc_g(uint64_t va, uint64_t log2size) { - __asm __volatile("ptc.g %0,%1;; srlz.i;;" :: "r"(va), "r"(log2size)); + __asm __volatile("ptc.g %0,%1;;" :: "r"(va), "r"(log2size)); } /* @@ -275,7 +275,7 @@ ia64_ptc_g(uint64_t va, uint64_t log2size) static __inline void ia64_ptc_ga(uint64_t va, uint64_t log2size) { - __asm __volatile("ptc.ga %0,%1;; srlz.i;;" :: "r"(va), "r"(log2size)); + __asm __volatile("ptc.ga %0,%1;;" :: "r"(va), "r"(log2size)); } /* @@ -288,6 +288,15 @@ ia64_ptc_l(uint64_t va, uint64_t log2size) } /* + * Invalidate the ALAT on the local processor. + */ +static __inline void +ia64_invala(void) +{ + __asm __volatile("invala;;"); +} + +/* * Unordered memory load. */ diff --git a/sys/ia64/include/smp.h b/sys/ia64/include/smp.h index 26557a7..d2aff76 100644 --- a/sys/ia64/include/smp.h +++ b/sys/ia64/include/smp.h @@ -14,6 +14,8 @@ #ifndef LOCORE +#include <sys/_cpuset.h> + struct pcpu; struct ia64_ap_state { @@ -44,7 +46,7 @@ extern int ia64_ipi_wakeup; void ipi_all_but_self(int ipi); void ipi_cpu(int cpu, u_int ipi); -void ipi_selected(cpumask_t cpus, int ipi); +void ipi_selected(cpuset_t cpus, int ipi); void ipi_send(struct pcpu *, int ipi); #endif /* !LOCORE */ diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c index 2f889ca..3908da7 100644 --- a/sys/kern/imgact_aout.c +++ b/sys/kern/imgact_aout.c @@ -103,7 +103,7 @@ struct sysentvec aout_sysvec = { #elif defined(__amd64__) -#define AOUT32_USRSTACK 0xbfc0000 +#define AOUT32_USRSTACK 0xbfc00000 #define AOUT32_PS_STRINGS \ (AOUT32_USRSTACK - sizeof(struct freebsd32_ps_strings)) @@ -152,7 +152,7 @@ aout_fixup(register_t **stack_base, struct image_params *imgp) { *(char **)stack_base -= sizeof(uint32_t); - return (suword(*stack_base, imgp->args->argc)); + return (suword32(*stack_base, imgp->args->argc)); } static int diff --git a/sys/kern/kern_conf.c b/sys/kern/kern_conf.c index 59b876c..a4d90c7 100644 --- a/sys/kern/kern_conf.c +++ b/sys/kern/kern_conf.c @@ -963,6 +963,68 @@ make_dev_alias_p(int flags, struct cdev **cdev, struct cdev *pdev, return (res); } +int +make_dev_physpath_alias(int flags, struct cdev **cdev, struct cdev *pdev, + struct cdev *old_alias, const char *physpath) +{ + char *devfspath; + int physpath_len; + int max_parentpath_len; + int parentpath_len; + int devfspathbuf_len; + int mflags; + int ret; + + *cdev = NULL; + devfspath = NULL; + physpath_len = strlen(physpath); + ret = EINVAL; + if (physpath_len == 0) + goto out; + + if (strncmp("id1,", physpath, 4) == 0) { + physpath += 4; + physpath_len -= 4; + if (physpath_len == 0) + goto out; + } + + max_parentpath_len = SPECNAMELEN - physpath_len - /*/*/1; + parentpath_len = strlen(pdev->si_name); + if (max_parentpath_len < parentpath_len) { + printf("make_dev_physpath_alias: WARNING - Unable to alias %s " + "to %s/%s - path too long\n", + pdev->si_name, physpath, pdev->si_name); + ret = ENAMETOOLONG; + goto out; + } + + mflags = (flags & MAKEDEV_NOWAIT) ? M_NOWAIT : M_WAITOK; + devfspathbuf_len = physpath_len + /*/*/1 + parentpath_len + /*NUL*/1; + devfspath = malloc(devfspathbuf_len, M_DEVBUF, mflags); + if (devfspath == NULL) { + ret = ENOMEM; + goto out; + } + + sprintf(devfspath, "%s/%s", physpath, pdev->si_name); + if (old_alias != NULL + && strcmp(old_alias->si_name, devfspath) == 0) { + /* Retain the existing alias. */ + *cdev = old_alias; + old_alias = NULL; + ret = 0; + } else { + ret = make_dev_alias_p(flags, cdev, pdev, devfspath); + } +out: + if (old_alias != NULL) + destroy_dev(old_alias); + if (devfspath != NULL) + free(devfspath, M_DEVBUF); + return (ret); +} + static void destroy_devl(struct cdev *dev) { diff --git a/sys/kern/kern_cpuset.c b/sys/kern/kern_cpuset.c index 6489ffb..e1f2801 100644 --- a/sys/kern/kern_cpuset.c +++ b/sys/kern/kern_cpuset.c @@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include <sys/cpuset.h> #include <sys/sx.h> #include <sys/queue.h> +#include <sys/libkern.h> #include <sys/limits.h> #include <sys/bus.h> #include <sys/interrupt.h> @@ -617,6 +618,86 @@ out: } /* + * Calculate the ffs() of the cpuset. + */ +int +cpusetobj_ffs(const cpuset_t *set) +{ + size_t i; + int cbit; + + cbit = 0; + for (i = 0; i < _NCPUWORDS; i++) { + if (set->__bits[i] != 0) { + cbit = ffsl(set->__bits[i]); + cbit += i * _NCPUBITS; + break; + } + } + return (cbit); +} + +/* + * Return a string representing a valid layout for a cpuset_t object. + * It expects an incoming buffer at least sized as CPUSETBUFSIZ. + */ +char * +cpusetobj_strprint(char *buf, const cpuset_t *set) +{ + char *tbuf; + size_t i, bytesp, bufsiz; + + tbuf = buf; + bytesp = 0; + bufsiz = CPUSETBUFSIZ; + + for (i = _NCPUWORDS - 1; i > 0; i--) { + bytesp = snprintf(tbuf, bufsiz, "%lx, ", set->__bits[i]); + bufsiz -= bytesp; + tbuf += bytesp; + } + snprintf(tbuf, bufsiz, "%lx", set->__bits[0]); + return (buf); +} + +/* + * Build a valid cpuset_t object from a string representation. + * It expects an incoming buffer at least sized as CPUSETBUFSIZ. + */ +int +cpusetobj_strscan(cpuset_t *set, const char *buf) +{ + u_int nwords; + int i, ret; + + if (strlen(buf) > CPUSETBUFSIZ - 1) + return (-1); + + /* Allow to pass a shorter version of the mask when necessary. */ + nwords = 1; + for (i = 0; buf[i] != '\0'; i++) + if (buf[i] == ',') + nwords++; + if (nwords > _NCPUWORDS) + return (-1); + + CPU_ZERO(set); + for (i = nwords - 1; i > 0; i--) { + ret = sscanf(buf, "%lx, ", &set->__bits[i]); + if (ret == 0 || ret == -1) + return (-1); + buf = strstr(buf, " "); + if (buf == NULL) + return (-1); + buf++; + } + ret = sscanf(buf, "%lx", &set->__bits[0]); + if (ret == 0 || ret == -1) + return (-1); + return (0); +} + +/* * Apply an anonymous mask to a single thread. */ int @@ -754,12 +835,7 @@ cpuset_init(void *arg) { cpuset_t mask; - CPU_ZERO(&mask); -#ifdef SMP - mask.__bits[0] = all_cpus; -#else - mask.__bits[0] = 1; -#endif + mask = all_cpus; if (cpuset_modify(cpuset_zero, &mask)) panic("Can't set initial cpuset mask.\n"); cpuset_zero->cs_flags |= CPU_SET_RDONLY; diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 6510e13..bb25d17 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -701,8 +701,9 @@ proc_reap(struct thread *td, struct proc *p, int *status, int options, */ if (p->p_oppid && (t = pfind(p->p_oppid)) != NULL) { PROC_LOCK(p); - p->p_oppid = 0; proc_reparent(p, t); + p->p_pptr->p_dbg_child--; + p->p_oppid = 0; PROC_UNLOCK(p); pksignal(t, SIGCHLD, p->p_ksi); wakeup(t); @@ -794,7 +795,8 @@ kern_wait(struct thread *td, pid_t pid, int *status, int options, pid = -q->p_pgid; PROC_UNLOCK(q); } - if (options &~ (WUNTRACED|WNOHANG|WCONTINUED|WNOWAIT|WLINUXCLONE)) + /* If we don't know the option, just return. */ + if (options & ~(WUNTRACED|WNOHANG|WCONTINUED|WNOWAIT|WLINUXCLONE)) return (EINVAL); loop: if (q->p_flag & P_STATCHILD) { @@ -873,7 +875,10 @@ loop: } if (nfound == 0) { sx_xunlock(&proctree_lock); - return (ECHILD); + if (td->td_proc->p_dbg_child) + return (0); + else + return (ECHILD); } if (options & WNOHANG) { sx_xunlock(&proctree_lock); diff --git a/sys/kern/kern_idle.c b/sys/kern/kern_idle.c index af12d7d..f412d17 100644 --- a/sys/kern/kern_idle.c +++ b/sys/kern/kern_idle.c @@ -60,7 +60,7 @@ idle_setup(void *dummy) p = NULL; /* start with no idle process */ #ifdef SMP - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { #endif #ifdef SMP error = kproc_kthread_add(sched_idletd, NULL, &p, &td, diff --git a/sys/kern/kern_ktr.c b/sys/kern/kern_ktr.c index 2e5e06f..eff3d5b 100644 --- a/sys/kern/kern_ktr.c +++ b/sys/kern/kern_ktr.c @@ -40,8 +40,10 @@ __FBSDID("$FreeBSD$"); #include "opt_alq.h" #include <sys/param.h> +#include <sys/queue.h> #include <sys/alq.h> #include <sys/cons.h> +#include <sys/cpuset.h> #include <sys/kernel.h> #include <sys/ktr.h> #include <sys/libkern.h> @@ -68,10 +70,6 @@ __FBSDID("$FreeBSD$"); #define KTR_MASK (0) #endif -#ifndef KTR_CPUMASK -#define KTR_CPUMASK (~0) -#endif - #ifndef KTR_TIME #define KTR_TIME get_cyclecount() #endif @@ -84,11 +82,6 @@ FEATURE(ktr, "Kernel support for KTR kernel tracing facility"); SYSCTL_NODE(_debug, OID_AUTO, ktr, CTLFLAG_RD, 0, "KTR options"); -int ktr_cpumask = KTR_CPUMASK; -TUNABLE_INT("debug.ktr.cpumask", &ktr_cpumask); -SYSCTL_INT(_debug_ktr, OID_AUTO, cpumask, CTLFLAG_RW, - &ktr_cpumask, 0, "Bitmask of CPUs on which KTR logging is enabled"); - int ktr_mask = KTR_MASK; TUNABLE_INT("debug.ktr.mask", &ktr_mask); SYSCTL_INT(_debug_ktr, OID_AUTO, mask, CTLFLAG_RW, @@ -106,6 +99,54 @@ int ktr_version = KTR_VERSION; SYSCTL_INT(_debug_ktr, OID_AUTO, version, CTLFLAG_RD, &ktr_version, 0, "Version of the KTR interface"); +cpuset_t ktr_cpumask; +static char ktr_cpumask_str[CPUSETBUFSIZ]; +TUNABLE_STR("debug.ktr.cpumask", ktr_cpumask_str, sizeof(ktr_cpumask_str)); + +static void +ktr_cpumask_initializer(void *dummy __unused) +{ + + CPU_FILL(&ktr_cpumask); +#ifdef KTR_CPUMASK + if (cpusetobj_strscan(&ktr_cpumask, KTR_CPUMASK) == -1) + CPU_FILL(&ktr_cpumask); +#endif + + /* + * TUNABLE_STR() runs with SI_ORDER_MIDDLE priority, thus it must be + * already set, if necessary. + */ + if (ktr_cpumask_str[0] != '\0' && + cpusetobj_strscan(&ktr_cpumask, ktr_cpumask_str) == -1) + CPU_FILL(&ktr_cpumask); +} +SYSINIT(ktr_cpumask_initializer, SI_SUB_TUNABLES, SI_ORDER_ANY, + ktr_cpumask_initializer, NULL); + +static int +sysctl_debug_ktr_cpumask(SYSCTL_HANDLER_ARGS) +{ + char lktr_cpumask_str[CPUSETBUFSIZ]; + cpuset_t imask; + int error; + + cpusetobj_strprint(lktr_cpumask_str, &ktr_cpumask); + error = sysctl_handle_string(oidp, lktr_cpumask_str, + sizeof(lktr_cpumask_str), req); + if (error != 0 || req->newptr == NULL) + return (error); + if (cpusetobj_strscan(&imask, lktr_cpumask_str) == -1) + return (EINVAL); + CPU_COPY(&imask, &ktr_cpumask); + + return (error); +} +SYSCTL_PROC(_debug_ktr, OID_AUTO, cpumask, + CTLFLAG_RW | CTLFLAG_MPSAFE | CTLTYPE_STRING, NULL, 0, + sysctl_debug_ktr_cpumask, "S", + "Bitmask of CPUs on which KTR logging is enabled"); + volatile int ktr_idx = 0; struct ktr_entry ktr_buf[KTR_ENTRIES]; @@ -213,7 +254,7 @@ ktr_tracepoint(u_int mask, const char *file, int line, const char *format, if ((ktr_mask & mask) == 0) return; cpu = KTR_CPU; - if (((1 << cpu) & ktr_cpumask) == 0) + if (!CPU_ISSET(cpu, &ktr_cpumask)) return; #if defined(KTR_VERBOSE) || defined(KTR_ALQ) td = curthread; diff --git a/sys/kern/kern_pmc.c b/sys/kern/kern_pmc.c index 7532378..8d9c7c0 100644 --- a/sys/kern/kern_pmc.c +++ b/sys/kern/kern_pmc.c @@ -55,7 +55,7 @@ int (*pmc_hook)(struct thread *td, int function, void *arg) = NULL; int (*pmc_intr)(int cpu, struct trapframe *tf) = NULL; /* Bitmask of CPUs requiring servicing at hardclock time */ -volatile cpumask_t pmc_cpumask; +volatile cpuset_t pmc_cpumask; /* * A global count of SS mode PMCs. When non-zero, this means that @@ -112,7 +112,7 @@ pmc_cpu_is_active(int cpu) { #ifdef SMP return (pmc_cpu_is_present(cpu) && - (hlt_cpus_mask & (1 << cpu)) == 0); + !CPU_ISSET(cpu, &hlt_cpus_mask)); #else return (1); #endif @@ -139,7 +139,7 @@ int pmc_cpu_is_primary(int cpu) { #ifdef SMP - return ((logical_cpus_mask & (1 << cpu)) == 0); + return (!CPU_ISSET(cpu, &logical_cpus_mask)); #else return (1); #endif diff --git a/sys/kern/kern_racct.c b/sys/kern/kern_racct.c index 98bd9c5..01f7777 100644 --- a/sys/kern/kern_racct.c +++ b/sys/kern/kern_racct.c @@ -104,8 +104,6 @@ SDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *", int racct_types[] = { [RACCT_CPU] = RACCT_IN_THOUSANDS, - [RACCT_FSIZE] = - RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, [RACCT_DATA] = RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, [RACCT_STACK] = @@ -120,8 +118,6 @@ int racct_types[] = { RACCT_RECLAIMABLE | RACCT_DENIABLE, [RACCT_NOFILE] = RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, - [RACCT_SBSIZE] = - RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, [RACCT_VMEM] = RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, [RACCT_NPTS] = @@ -627,7 +623,6 @@ racct_proc_exit(struct proc *p) /* * XXX: Free this some other way. */ - racct_set(p, RACCT_FSIZE, 0); racct_set(p, RACCT_NPTS, 0); racct_set(p, RACCT_NTHR, 0); racct_set(p, RACCT_RSS, 0); diff --git a/sys/kern/kern_rctl.c b/sys/kern/kern_rctl.c index 2d43bdc..3d0a478 100644 --- a/sys/kern/kern_rctl.c +++ b/sys/kern/kern_rctl.c @@ -100,7 +100,6 @@ static struct dict subjectnames[] = { static struct dict resourcenames[] = { { "cpu", RACCT_CPU }, - { "fsize", RACCT_FSIZE }, { "data", RACCT_DATA }, { "stack", RACCT_STACK }, { "core", RACCT_CORE }, @@ -108,7 +107,6 @@ static struct dict resourcenames[] = { { "memlock", RACCT_MEMLOCK }, { "nproc", RACCT_NPROC }, { "nofile", RACCT_NOFILE }, - { "sbsize", RACCT_SBSIZE }, { "vmem", RACCT_VMEM }, { "npts", RACCT_NPTS }, { "swap", RACCT_SWAP }, diff --git a/sys/kern/kern_rmlock.c b/sys/kern/kern_rmlock.c index 7f2b4e7..3214e1b 100644 --- a/sys/kern/kern_rmlock.c +++ b/sys/kern/kern_rmlock.c @@ -263,7 +263,7 @@ _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) pc = pcpu_find(curcpu); /* Check if we just need to do a proper critical_exit. */ - if (!(pc->pc_cpumask & rm->rm_writecpus)) { + if (!CPU_OVERLAP(&pc->pc_cpumask, &rm->rm_writecpus)) { critical_exit(); return (1); } @@ -325,7 +325,7 @@ _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) critical_enter(); pc = pcpu_find(curcpu); - rm->rm_writecpus &= ~pc->pc_cpumask; + CPU_NAND(&rm->rm_writecpus, &pc->pc_cpumask); rm_tracker_add(pc, tracker); sched_pin(); critical_exit(); @@ -366,7 +366,8 @@ _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) * Fast path to combine two common conditions into a single * conditional jump. */ - if (0 == (td->td_owepreempt | (rm->rm_writecpus & pc->pc_cpumask))) + if (0 == (td->td_owepreempt | + CPU_OVERLAP(&rm->rm_writecpus, &pc->pc_cpumask))) return (1); /* We do not have a read token and need to acquire one. */ @@ -429,17 +430,17 @@ _rm_wlock(struct rmlock *rm) { struct rm_priotracker *prio; struct turnstile *ts; - cpumask_t readcpus; + cpuset_t readcpus; if (rm->lock_object.lo_flags & RM_SLEEPABLE) sx_xlock(&rm->rm_lock_sx); else mtx_lock(&rm->rm_lock_mtx); - if (rm->rm_writecpus != all_cpus) { + if (CPU_CMP(&rm->rm_writecpus, &all_cpus)) { /* Get all read tokens back */ - - readcpus = all_cpus & (all_cpus & ~rm->rm_writecpus); + readcpus = all_cpus; + CPU_NAND(&readcpus, &rm->rm_writecpus); rm->rm_writecpus = all_cpus; /* diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c index 001da3d..60e854f 100644 --- a/sys/kern/kern_shutdown.c +++ b/sys/kern/kern_shutdown.c @@ -233,30 +233,32 @@ print_uptime(void) printf("%lds\n", (long)ts.tv_sec); } -static void -doadump(void) +int +doadump(boolean_t textdump) { + boolean_t coredump; - /* - * Sometimes people have to call this from the kernel debugger. - * (if 'panic' can not dump) - * Give them a clue as to why they can't dump. - */ - if (dumper.dumper == NULL) { - printf("Cannot dump. Device not defined or unavailable.\n"); - return; - } + if (dumping) + return (EBUSY); + if (dumper.dumper == NULL) + return (ENXIO); savectx(&dumppcb); dumptid = curthread->td_tid; dumping++; + + coredump = TRUE; #ifdef DDB - if (textdump_pending) + if (textdump && textdump_pending) { + coredump = FALSE; textdump_dumpsys(&dumper); - else + } #endif + if (coredump) dumpsys(&dumper); + dumping--; + return (0); } static int @@ -425,7 +427,7 @@ kern_reboot(int howto) EVENTHANDLER_INVOKE(shutdown_post_sync, howto); if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) - doadump(); + doadump(TRUE); /* Now that we're going to really halt the system... */ EVENTHANDLER_INVOKE(shutdown_final, howto); @@ -553,11 +555,12 @@ panic(const char *fmt, ...) ; /* nothing */ #endif - bootopt = RB_AUTOBOOT | RB_DUMP; + bootopt = RB_AUTOBOOT; newpanic = 0; if (panicstr) bootopt |= RB_NOSYNC; else { + bootopt |= RB_DUMP; panicstr = fmt; newpanic = 1; } diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index d5b49da..e1861eb 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$"); #include <sys/proc.h> #include <sys/posix4.h> #include <sys/pioctl.h> +#include <sys/racct.h> #include <sys/resourcevar.h> #include <sys/sdt.h> #include <sys/sbuf.h> @@ -3173,14 +3174,15 @@ coredump(struct thread *td) * if it is larger than the limit. */ limit = (off_t)lim_cur(p, RLIMIT_CORE); - PROC_UNLOCK(p); - if (limit == 0) { + if (limit == 0 || racct_get_available(p, RACCT_CORE) == 0) { + PROC_UNLOCK(p); #ifdef AUDIT audit_proc_coredump(td, name, EFBIG); #endif free(name, M_TEMP); return (EFBIG); } + PROC_UNLOCK(p); restart: NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, name, td); diff --git a/sys/kern/ksched.c b/sys/kern/ksched.c index 7ee56d5..799b60d 100644 --- a/sys/kern/ksched.c +++ b/sys/kern/ksched.c @@ -206,7 +206,7 @@ ksched_setscheduler(struct ksched *ksched, if (param->sched_priority >= 0 && param->sched_priority <= (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE)) { rtp.type = RTP_PRIO_NORMAL; - rtp.prio = p4prio_to_rtpprio(param->sched_priority); + rtp.prio = p4prio_to_tsprio(param->sched_priority); rtp_to_pri(&rtp, td); } else e = EINVAL; diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c index 38bf37f..2f9a1f6 100644 --- a/sys/kern/link_elf.c +++ b/sys/kern/link_elf.c @@ -950,11 +950,11 @@ link_elf_load_file(linker_class_t cls, const char* filename, ef->ddbstrcnt = strcnt; ef->ddbstrtab = ef->strbase; +nosyms: error = link_elf_link_common_finish(lf); if (error != 0) goto out; -nosyms: *result = lf; out: diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c index fef9e25..592bb80 100644 --- a/sys/kern/sched_4bsd.c +++ b/sys/kern/sched_4bsd.c @@ -156,7 +156,7 @@ static struct runq runq; static struct runq runq_pcpu[MAXCPU]; long runq_length[MAXCPU]; -static cpumask_t idle_cpus_mask; +static cpuset_t idle_cpus_mask; #endif struct pcpuidlestat { @@ -951,7 +951,8 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) if (td->td_flags & TDF_IDLETD) { TD_SET_CAN_RUN(td); #ifdef SMP - idle_cpus_mask &= ~PCPU_GET(cpumask); + /* Spinlock held here, assume no migration. */ + CPU_NAND(&idle_cpus_mask, PCPU_PTR(cpumask)); #endif } else { if (TD_IS_RUNNING(td)) { @@ -1025,7 +1026,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) #ifdef SMP if (td->td_flags & TDF_IDLETD) - idle_cpus_mask |= PCPU_GET(cpumask); + CPU_OR(&idle_cpus_mask, PCPU_PTR(cpumask)); #endif sched_lock.mtx_lock = (uintptr_t)td; td->td_oncpu = PCPU_GET(cpuid); @@ -1054,7 +1055,8 @@ static int forward_wakeup(int cpunum) { struct pcpu *pc; - cpumask_t dontuse, id, map, map2, me; + cpuset_t dontuse, id, map, map2, me; + int iscpuset; mtx_assert(&sched_lock, MA_OWNED); @@ -1071,32 +1073,38 @@ forward_wakeup(int cpunum) /* * Check the idle mask we received against what we calculated * before in the old version. + * + * Also note that sched_lock is held now, thus no migration is + * expected. */ me = PCPU_GET(cpumask); /* Don't bother if we should be doing it ourself. */ - if ((me & idle_cpus_mask) && (cpunum == NOCPU || me == (1 << cpunum))) + if (CPU_OVERLAP(&me, &idle_cpus_mask) && + (cpunum == NOCPU || CPU_ISSET(cpunum, &me))) return (0); - dontuse = me | stopped_cpus | hlt_cpus_mask; - map2 = 0; + dontuse = me; + CPU_OR(&dontuse, &stopped_cpus); + CPU_OR(&dontuse, &hlt_cpus_mask); + CPU_ZERO(&map2); if (forward_wakeup_use_loop) { - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { id = pc->pc_cpumask; - if ((id & dontuse) == 0 && + if (!CPU_OVERLAP(&id, &dontuse) && pc->pc_curthread == pc->pc_idlethread) { - map2 |= id; + CPU_OR(&map2, &id); } } } if (forward_wakeup_use_mask) { - map = 0; - map = idle_cpus_mask & ~dontuse; + map = idle_cpus_mask; + CPU_NAND(&map, &dontuse); /* If they are both on, compare and use loop if different. */ if (forward_wakeup_use_loop) { - if (map != map2) { + if (CPU_CMP(&map, &map2)) { printf("map != map2, loop method preferred\n"); map = map2; } @@ -1108,18 +1116,22 @@ forward_wakeup(int cpunum) /* If we only allow a specific CPU, then mask off all the others. */ if (cpunum != NOCPU) { KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum.")); - map &= (1 << cpunum); + iscpuset = CPU_ISSET(cpunum, &map); + if (iscpuset == 0) + CPU_ZERO(&map); + else + CPU_SETOF(cpunum, &map); } - if (map) { + if (!CPU_EMPTY(&map)) { forward_wakeups_delivered++; - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { id = pc->pc_cpumask; - if ((map & id) == 0) + if (!CPU_OVERLAP(&map, &id)) continue; if (cpu_idle_wakeup(pc->pc_cpuid)) - map &= ~id; + CPU_NAND(&map, &id); } - if (map) + if (!CPU_EMPTY(&map)) ipi_selected(map, IPI_AST); return (1); } @@ -1135,7 +1147,7 @@ kick_other_cpu(int pri, int cpuid) int cpri; pcpu = pcpu_find(cpuid); - if (idle_cpus_mask & pcpu->pc_cpumask) { + if (CPU_OVERLAP(&idle_cpus_mask, &pcpu->pc_cpumask)) { forward_wakeups_delivered++; if (!cpu_idle_wakeup(cpuid)) ipi_cpu(cpuid, IPI_AST); @@ -1193,6 +1205,7 @@ void sched_add(struct thread *td, int flags) #ifdef SMP { + cpuset_t idle, me, tidlemsk; struct td_sched *ts; int forwarded = 0; int cpu; @@ -1262,11 +1275,20 @@ sched_add(struct thread *td, int flags) kick_other_cpu(td->td_priority, cpu); } else { if (!single_cpu) { - cpumask_t me = PCPU_GET(cpumask); - cpumask_t idle = idle_cpus_mask & me; - if (!idle && ((flags & SRQ_INTR) == 0) && - (idle_cpus_mask & ~(hlt_cpus_mask | me))) + /* + * Thread spinlock is held here, assume no + * migration is possible. + */ + me = PCPU_GET(cpumask); + idle = idle_cpus_mask; + tidlemsk = idle; + CPU_AND(&idle, &me); + CPU_OR(&me, &hlt_cpus_mask); + CPU_NAND(&tidlemsk, &me); + + if (CPU_EMPTY(&idle) && ((flags & SRQ_INTR) == 0) && + !CPU_EMPTY(&tidlemsk)) forwarded = forward_wakeup(cpu); } diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c index ac18e77..05267f3 100644 --- a/sys/kern/sched_ule.c +++ b/sys/kern/sched_ule.c @@ -564,7 +564,7 @@ struct cpu_search { #define CPUSET_FOREACH(cpu, mask) \ for ((cpu) = 0; (cpu) <= mp_maxid; (cpu)++) \ - if ((mask) & 1 << (cpu)) + if (CPU_ISSET(cpu, &mask)) static __inline int cpu_search(struct cpu_group *cg, struct cpu_search *low, struct cpu_search *high, const int match); @@ -2650,15 +2650,16 @@ static int sysctl_kern_sched_topology_spec_internal(struct sbuf *sb, struct cpu_group *cg, int indent) { + char cpusetbuf[CPUSETBUFSIZ]; int i, first; sbuf_printf(sb, "%*s<group level=\"%d\" cache-level=\"%d\">\n", indent, "", 1 + indent / 2, cg->cg_level); - sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"0x%x\">", indent, "", - cg->cg_count, cg->cg_mask); + sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"%s\">", indent, "", + cg->cg_count, cpusetobj_strprint(cpusetbuf, &cg->cg_mask)); first = TRUE; for (i = 0; i < MAXCPU; i++) { - if ((cg->cg_mask & (1 << i)) != 0) { + if (CPU_ISSET(i, &cg->cg_mask)) { if (!first) sbuf_printf(sb, ", "); else diff --git a/sys/kern/subr_devstat.c b/sys/kern/subr_devstat.c index 24963d5..eaf6427 100644 --- a/sys/kern/subr_devstat.c +++ b/sys/kern/subr_devstat.c @@ -49,8 +49,9 @@ static long devstat_generation = 1; static int devstat_version = DEVSTAT_VERSION; static int devstat_current_devnumber; static struct mtx devstat_mutex; +MTX_SYSINIT(devstat_mutex, &devstat_mutex, "devstat", MTX_DEF); -static struct devstatlist device_statq; +static struct devstatlist device_statq = STAILQ_HEAD_INITIALIZER(device_statq); static struct devstat *devstat_alloc(void); static void devstat_free(struct devstat *); static void devstat_add_entry(struct devstat *ds, const void *dev_name, @@ -70,13 +71,7 @@ devstat_new_entry(const void *dev_name, devstat_priority priority) { struct devstat *ds; - static int once; - if (!once) { - STAILQ_INIT(&device_statq); - mtx_init(&devstat_mutex, "devstat", NULL, MTX_DEF); - once = 1; - } mtx_assert(&devstat_mutex, MA_NOTOWNED); ds = devstat_alloc(); @@ -476,8 +471,9 @@ devstat_alloc(void) mtx_assert(&devstat_mutex, MA_NOTOWNED); if (!once) { - make_dev_credf(MAKEDEV_ETERNAL, &devstat_cdevsw, 0, NULL, - UID_ROOT, GID_WHEEL, 0400, DEVSTAT_DEVICE_NAME); + make_dev_credf(MAKEDEV_ETERNAL | MAKEDEV_CHECKNAME, + &devstat_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0400, + DEVSTAT_DEVICE_NAME); once = 1; } spp2 = NULL; diff --git a/sys/kern/subr_kdb.c b/sys/kern/subr_kdb.c index 342c5ca..c2f6e99 100644 --- a/sys/kern/subr_kdb.c +++ b/sys/kern/subr_kdb.c @@ -244,29 +244,44 @@ kdb_reboot(void) #define KEY_CRTLP 16 /* ^P */ #define KEY_CRTLR 18 /* ^R */ +/* States of th KDB "alternate break sequence" detecting state machine. */ +enum { + KDB_ALT_BREAK_SEEN_NONE, + KDB_ALT_BREAK_SEEN_CR, + KDB_ALT_BREAK_SEEN_CR_TILDE, +}; + int kdb_alt_break(int key, int *state) { int brk; + /* All states transition to KDB_ALT_BREAK_SEEN_CR on a CR. */ + if (key == KEY_CR) { + *state = KDB_ALT_BREAK_SEEN_CR; + return (0); + } + brk = 0; switch (*state) { - case 0: - if (key == KEY_CR) - *state = 1; - break; - case 1: + case KDB_ALT_BREAK_SEEN_CR: + *state = KDB_ALT_BREAK_SEEN_NONE; if (key == KEY_TILDE) - *state = 2; + *state = KDB_ALT_BREAK_SEEN_CR_TILDE; break; - case 2: + case KDB_ALT_BREAK_SEEN_CR_TILDE: + *state = KDB_ALT_BREAK_SEEN_NONE; if (key == KEY_CRTLB) brk = KDB_REQ_DEBUGGER; else if (key == KEY_CRTLP) brk = KDB_REQ_PANIC; else if (key == KEY_CRTLR) brk = KDB_REQ_REBOOT; - *state = 0; + break; + case KDB_ALT_BREAK_SEEN_NONE: + default: + *state = KDB_ALT_BREAK_SEEN_NONE; + break; } return (brk); } @@ -412,8 +427,9 @@ kdb_thr_ctx(struct thread *thr) return (&kdb_pcb); #if defined(SMP) && defined(KDB_STOPPEDPCB) - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { - if (pc->pc_curthread == thr && (stopped_cpus & pc->pc_cpumask)) + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { + if (pc->pc_curthread == thr && + CPU_OVERLAP(&stopped_cpus, &pc->pc_cpumask)) return (KDB_STOPPEDPCB(pc)); } #endif diff --git a/sys/kern/subr_msgbuf.c b/sys/kern/subr_msgbuf.c index 14cd39d..cd9c551 100644 --- a/sys/kern/subr_msgbuf.c +++ b/sys/kern/subr_msgbuf.c @@ -31,8 +31,16 @@ #include <sys/param.h> #include <sys/systm.h> +#include <sys/lock.h> +#include <sys/mutex.h> #include <sys/msgbuf.h> +/* + * Maximum number conversion buffer length: uintmax_t in base 2, plus <> + * around the priority, and a terminating NUL. + */ +#define MAXPRIBUF (sizeof(intmax_t) * NBBY + 3) + /* Read/write sequence numbers are modulo a multiple of the buffer size. */ #define SEQMOD(size) ((size) * 16) @@ -51,6 +59,10 @@ msgbuf_init(struct msgbuf *mbp, void *ptr, int size) mbp->msg_seqmod = SEQMOD(size); msgbuf_clear(mbp); mbp->msg_magic = MSG_MAGIC; + mbp->msg_lastpri = -1; + mbp->msg_needsnl = 0; + bzero(&mbp->msg_lock, sizeof(mbp->msg_lock)); + mtx_init(&mbp->msg_lock, "msgbuf", NULL, MTX_SPIN); } /* @@ -80,6 +92,12 @@ msgbuf_reinit(struct msgbuf *mbp, void *ptr, int size) } msgbuf_clear(mbp); } + + mbp->msg_lastpri = -1; + /* Assume that the old message buffer didn't end in a newline. */ + mbp->msg_needsnl = 1; + bzero(&mbp->msg_lock, sizeof(mbp->msg_lock)); + mtx_init(&mbp->msg_lock, "msgbuf", NULL, MTX_SPIN); } /* @@ -110,25 +128,140 @@ msgbuf_getcount(struct msgbuf *mbp) } /* - * Append a character to a message buffer. This function can be - * considered fully reentrant so long as the number of concurrent - * callers is less than the number of characters in the buffer. - * However, the message buffer is only guaranteed to be consistent - * for reading when there are no callers in this function. + * Add a character into the message buffer, and update the checksum and + * sequence number. + * + * The caller should hold the message buffer spinlock. + */ +static inline void +msgbuf_do_addchar(struct msgbuf *mbp, u_int *seq, int c) +{ + u_int pos; + + /* Make sure we properly wrap the sequence number. */ + pos = MSGBUF_SEQ_TO_POS(mbp, *seq); + + mbp->msg_cksum += (u_int)c - + (u_int)(u_char)mbp->msg_ptr[pos]; + + mbp->msg_ptr[pos] = c; + + *seq = MSGBUF_SEQNORM(mbp, *seq + 1); +} + +/* + * Append a character to a message buffer. */ void msgbuf_addchar(struct msgbuf *mbp, int c) { - u_int new_seq, pos, seq; - - do { - seq = mbp->msg_wseq; - new_seq = MSGBUF_SEQNORM(mbp, seq + 1); - } while (atomic_cmpset_rel_int(&mbp->msg_wseq, seq, new_seq) == 0); - pos = MSGBUF_SEQ_TO_POS(mbp, seq); - atomic_add_int(&mbp->msg_cksum, (u_int)(u_char)c - - (u_int)(u_char)mbp->msg_ptr[pos]); - mbp->msg_ptr[pos] = c; + mtx_lock_spin(&mbp->msg_lock); + + msgbuf_do_addchar(mbp, &mbp->msg_wseq, c); + + mtx_unlock_spin(&mbp->msg_lock); +} + +/* + * Append a NUL-terminated string with a priority to a message buffer. + * Filter carriage returns if the caller requests it. + * + * XXX The carriage return filtering behavior is present in the + * msglogchar() API, however testing has shown that we don't seem to send + * carriage returns down this path. So do we still need it? + */ +void +msgbuf_addstr(struct msgbuf *mbp, int pri, char *str, int filter_cr) +{ + u_int seq; + size_t len, prefix_len; + char prefix[MAXPRIBUF]; + int nl, i; + + len = strlen(str); + prefix_len = 0; + nl = 0; + + /* If we have a zero-length string, no need to do anything. */ + if (len == 0) + return; + + mtx_lock_spin(&mbp->msg_lock); + + /* + * If this is true, we may need to insert a new priority sequence, + * so prepare the prefix. + */ + if (pri != -1) + prefix_len = sprintf(prefix, "<%d>", pri); + + /* + * Starting write sequence number. + */ + seq = mbp->msg_wseq; + + /* + * Whenever there is a change in priority, we have to insert a + * newline, and a priority prefix if the priority is not -1. Here + * we detect whether there was a priority change, and whether we + * did not end with a newline. If that is the case, we need to + * insert a newline before this string. + */ + if (mbp->msg_lastpri != pri && mbp->msg_needsnl != 0) { + + msgbuf_do_addchar(mbp, &seq, '\n'); + mbp->msg_needsnl = 0; + } + + for (i = 0; i < len; i++) { + /* + * If we just had a newline, and the priority is not -1 + * (and therefore prefix_len != 0), then we need a priority + * prefix for this line. + */ + if (mbp->msg_needsnl == 0 && prefix_len != 0) { + int j; + + for (j = 0; j < prefix_len; j++) + msgbuf_do_addchar(mbp, &seq, prefix[j]); + } + + /* + * Don't copy carriage returns if the caller requested + * filtering. + * + * XXX This matches the behavior of msglogchar(), but is it + * necessary? Testing has shown that we don't seem to get + * carriage returns here. + */ + if ((filter_cr != 0) && (str[i] == '\r')) + continue; + + /* + * Clear this flag if we see a newline. This affects whether + * we need to insert a new prefix or insert a newline later. + */ + if (str[i] == '\n') + mbp->msg_needsnl = 0; + else + mbp->msg_needsnl = 1; + + msgbuf_do_addchar(mbp, &seq, str[i]); + } + /* + * Update the write sequence number for the actual number of + * characters we put in the message buffer. (Depends on whether + * carriage returns are filtered.) + */ + mbp->msg_wseq = seq; + + /* + * Set the last priority. + */ + mbp->msg_lastpri = pri; + + mtx_unlock_spin(&mbp->msg_lock); + } /* @@ -141,14 +274,21 @@ msgbuf_getchar(struct msgbuf *mbp) u_int len, wseq; int c; + mtx_lock_spin(&mbp->msg_lock); + wseq = mbp->msg_wseq; len = MSGBUF_SEQSUB(mbp, wseq, mbp->msg_rseq); - if (len == 0) + if (len == 0) { + mtx_unlock_spin(&mbp->msg_lock); return (-1); + } if (len > mbp->msg_size) mbp->msg_rseq = MSGBUF_SEQNORM(mbp, wseq - mbp->msg_size); c = (u_char)mbp->msg_ptr[MSGBUF_SEQ_TO_POS(mbp, mbp->msg_rseq)]; mbp->msg_rseq = MSGBUF_SEQNORM(mbp, mbp->msg_rseq + 1); + + mtx_unlock_spin(&mbp->msg_lock); + return (c); } @@ -161,10 +301,14 @@ msgbuf_getbytes(struct msgbuf *mbp, char *buf, int buflen) { u_int len, pos, wseq; + mtx_lock_spin(&mbp->msg_lock); + wseq = mbp->msg_wseq; len = MSGBUF_SEQSUB(mbp, wseq, mbp->msg_rseq); - if (len == 0) + if (len == 0) { + mtx_unlock_spin(&mbp->msg_lock); return (0); + } if (len > mbp->msg_size) { mbp->msg_rseq = MSGBUF_SEQNORM(mbp, wseq - mbp->msg_size); len = mbp->msg_size; @@ -175,6 +319,9 @@ msgbuf_getbytes(struct msgbuf *mbp, char *buf, int buflen) bcopy(&mbp->msg_ptr[pos], buf, len); mbp->msg_rseq = MSGBUF_SEQNORM(mbp, mbp->msg_rseq + len); + + mtx_unlock_spin(&mbp->msg_lock); + return (len); } @@ -193,16 +340,21 @@ msgbuf_peekbytes(struct msgbuf *mbp, char *buf, int buflen, u_int *seqp) { u_int len, pos, wseq; + mtx_lock_spin(&mbp->msg_lock); + if (buf == NULL) { /* Just initialise *seqp. */ *seqp = MSGBUF_SEQNORM(mbp, mbp->msg_wseq - mbp->msg_size); + mtx_unlock_spin(&mbp->msg_lock); return (0); } wseq = mbp->msg_wseq; len = MSGBUF_SEQSUB(mbp, wseq, *seqp); - if (len == 0) + if (len == 0) { + mtx_unlock_spin(&mbp->msg_lock); return (0); + } if (len > mbp->msg_size) { *seqp = MSGBUF_SEQNORM(mbp, wseq - mbp->msg_size); len = mbp->msg_size; @@ -212,6 +364,9 @@ msgbuf_peekbytes(struct msgbuf *mbp, char *buf, int buflen, u_int *seqp) len = min(len, (u_int)buflen); bcopy(&mbp->msg_ptr[MSGBUF_SEQ_TO_POS(mbp, *seqp)], buf, len); *seqp = MSGBUF_SEQNORM(mbp, *seqp + len); + + mtx_unlock_spin(&mbp->msg_lock); + return (len); } diff --git a/sys/kern/subr_pcpu.c b/sys/kern/subr_pcpu.c index de5cafc..a6b3ae0 100644 --- a/sys/kern/subr_pcpu.c +++ b/sys/kern/subr_pcpu.c @@ -74,7 +74,7 @@ static TAILQ_HEAD(, dpcpu_free) dpcpu_head = TAILQ_HEAD_INITIALIZER(dpcpu_head); static struct sx dpcpu_lock; uintptr_t dpcpu_off[MAXCPU]; struct pcpu *cpuid_to_pcpu[MAXCPU]; -struct cpuhead cpuhead = SLIST_HEAD_INITIALIZER(cpuhead); +struct cpuhead cpuhead = STAILQ_HEAD_INITIALIZER(cpuhead); /* * Initialize the MI portions of a struct pcpu. @@ -87,9 +87,9 @@ pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) KASSERT(cpuid >= 0 && cpuid < MAXCPU, ("pcpu_init: invalid cpuid %d", cpuid)); pcpu->pc_cpuid = cpuid; - pcpu->pc_cpumask = 1 << cpuid; + CPU_SETOF(cpuid, &pcpu->pc_cpumask); cpuid_to_pcpu[cpuid] = pcpu; - SLIST_INSERT_HEAD(&cpuhead, pcpu, pc_allcpu); + STAILQ_INSERT_TAIL(&cpuhead, pcpu, pc_allcpu); cpu_pcpu_init(pcpu, cpuid, size); pcpu->pc_rm_queue.rmq_next = &pcpu->pc_rm_queue; pcpu->pc_rm_queue.rmq_prev = &pcpu->pc_rm_queue; @@ -245,7 +245,7 @@ void pcpu_destroy(struct pcpu *pcpu) { - SLIST_REMOVE(&cpuhead, pcpu, pcpu, pc_allcpu); + STAILQ_REMOVE(&cpuhead, pcpu, pcpu, pc_allcpu); cpuid_to_pcpu[pcpu->pc_cpuid] = NULL; dpcpu_off[pcpu->pc_cpuid] = 0; } diff --git a/sys/kern/subr_prf.c b/sys/kern/subr_prf.c index d0d2ad7..48f2dd9 100644 --- a/sys/kern/subr_prf.c +++ b/sys/kern/subr_prf.c @@ -94,6 +94,7 @@ struct snprintf_arg { extern int log_open; static void msglogchar(int c, int pri); +static void msglogstr(char *str, int pri, int filter_cr); static void putchar(int ch, void *arg); static char *ksprintn(char *nbuf, uintmax_t num, int base, int *len, int upper); static void snprintf_func(int ch, void *arg); @@ -106,6 +107,14 @@ TUNABLE_INT("kern.log_console_output", &log_console_output); SYSCTL_INT(_kern, OID_AUTO, log_console_output, CTLFLAG_RW, &log_console_output, 0, "Duplicate console output to the syslog."); +/* + * See the comment in log_console() below for more explanation of this. + */ +static int log_console_add_linefeed = 0; +TUNABLE_INT("kern.log_console_add_linefeed", &log_console_add_linefeed); +SYSCTL_INT(_kern, OID_AUTO, log_console_add_linefeed, CTLFLAG_RW, + &log_console_add_linefeed, 0, "log_console() adds extra newlines."); + static int always_console_output = 0; TUNABLE_INT("kern.always_console_output", &always_console_output); SYSCTL_INT(_kern, OID_AUTO, always_console_output, CTLFLAG_RW, @@ -154,6 +163,7 @@ uprintf(const char *fmt, ...) goto out; } pca.flags = TOTTY; + pca.p_bufr = NULL; va_start(ap, fmt); tty_lock(pca.tty); retval = kvprintf(fmt, putchar, &pca, 10, ap); @@ -197,6 +207,7 @@ tprintf(struct proc *p, int pri, const char *fmt, ...) pca.pri = pri; pca.tty = tp; pca.flags = flags; + pca.p_bufr = NULL; va_start(ap, fmt); if (pca.tty != NULL) tty_lock(pca.tty); @@ -225,6 +236,7 @@ ttyprintf(struct tty *tp, const char *fmt, ...) va_start(ap, fmt); pca.tty = tp; pca.flags = TOTTY; + pca.p_bufr = NULL; retval = kvprintf(fmt, putchar, &pca, 10, ap); va_end(ap); return (retval); @@ -240,16 +252,37 @@ log(int level, const char *fmt, ...) { va_list ap; struct putchar_arg pca; +#ifdef PRINTF_BUFR_SIZE + char bufr[PRINTF_BUFR_SIZE]; +#endif pca.tty = NULL; pca.pri = level; pca.flags = log_open ? TOLOG : TOCONS; +#ifdef PRINTF_BUFR_SIZE + pca.p_bufr = bufr; + pca.p_next = pca.p_bufr; + pca.n_bufr = sizeof(bufr); + pca.remain = sizeof(bufr); + *pca.p_next = '\0'; +#else pca.p_bufr = NULL; +#endif va_start(ap, fmt); kvprintf(fmt, putchar, &pca, 10, ap); va_end(ap); +#ifdef PRINTF_BUFR_SIZE + /* Write any buffered console/log output: */ + if (*pca.p_bufr != '\0') { + if (pca.flags & TOLOG) + msglogstr(pca.p_bufr, level, /*filter_cr*/1); + + if (pca.flags & TOCONS) + cnputs(pca.p_bufr); + } +#endif msgbuftrigger = 1; } @@ -258,7 +291,7 @@ log(int level, const char *fmt, ...) void log_console(struct uio *uio) { - int c, i, error, nl; + int c, error, nl; char *consbuffer; int pri; @@ -271,20 +304,48 @@ log_console(struct uio *uio) nl = 0; while (uio->uio_resid > 0) { - c = imin(uio->uio_resid, CONSCHUNK); + c = imin(uio->uio_resid, CONSCHUNK - 1); error = uiomove(consbuffer, c, uio); if (error != 0) break; - for (i = 0; i < c; i++) { - msglogchar(consbuffer[i], pri); - if (consbuffer[i] == '\n') - nl = 1; - else - nl = 0; - } + /* Make sure we're NUL-terminated */ + consbuffer[c] = '\0'; + if (consbuffer[c - 1] == '\n') + nl = 1; + else + nl = 0; + msglogstr(consbuffer, pri, /*filter_cr*/ 1); + } + /* + * The previous behavior in log_console() is preserved when + * log_console_add_linefeed is non-zero. For that behavior, if an + * individual console write came in that was not terminated with a + * line feed, it would add a line feed. + * + * This results in different data in the message buffer than + * appears on the system console (which doesn't add extra line feed + * characters). + * + * A number of programs and rc scripts write a line feed, or a period + * and a line feed when they have completed their operation. On + * the console, this looks seamless, but when displayed with + * 'dmesg -a', you wind up with output that looks like this: + * + * Updating motd: + * . + * + * On the console, it looks like this: + * Updating motd:. + * + * We could add logic to detect that situation, or just not insert + * the extra newlines. Set the kern.log_console_add_linefeed + * sysctl/tunable variable to get the old behavior. + */ + if (!nl && log_console_add_linefeed) { + consbuffer[0] = '\n'; + consbuffer[1] = '\0'; + msglogstr(consbuffer, pri, /*filter_cr*/ 1); } - if (!nl) - msglogchar('\n', pri); msgbuftrigger = 1; free(uio, M_IOV); free(consbuffer, M_TEMP); @@ -330,9 +391,11 @@ vprintf(const char *fmt, va_list ap) retval = kvprintf(fmt, putchar, &pca, 10, ap); #ifdef PRINTF_BUFR_SIZE - /* Write any buffered console output: */ - if (*pca.p_bufr != '\0') + /* Write any buffered console/log output: */ + if (*pca.p_bufr != '\0') { cnputs(pca.p_bufr); + msglogstr(pca.p_bufr, pca.pri, /*filter_cr*/ 1); + } #endif if (!panicstr) @@ -342,18 +405,18 @@ vprintf(const char *fmt, va_list ap) } static void -putcons(int c, struct putchar_arg *ap) +putbuf(int c, struct putchar_arg *ap) { /* Check if no console output buffer was provided. */ - if (ap->p_bufr == NULL) + if (ap->p_bufr == NULL) { /* Output direct to the console. */ - cnputc(c); - else { + if (ap->flags & TOCONS) + cnputc(c); + + if (ap->flags & TOLOG) + msglogchar(c, ap->pri); + } else { /* Buffer the character: */ - if (c == '\n') { - *ap->p_next++ = '\r'; - ap->remain--; - } *ap->p_next++ = c; ap->remain--; @@ -361,12 +424,35 @@ putcons(int c, struct putchar_arg *ap) *ap->p_next = '\0'; /* Check if the buffer needs to be flushed. */ - if (ap->remain < 3 || c == '\n') { - cnputs(ap->p_bufr); + if (ap->remain == 2 || c == '\n') { + + if (ap->flags & TOLOG) + msglogstr(ap->p_bufr, ap->pri, /*filter_cr*/1); + + if (ap->flags & TOCONS) { + if ((panicstr == NULL) && (constty != NULL)) + msgbuf_addstr(&consmsgbuf, -1, + ap->p_bufr, /*filter_cr*/ 0); + + if ((constty == NULL) ||(always_console_output)) + cnputs(ap->p_bufr); + } + ap->p_next = ap->p_bufr; ap->remain = ap->n_bufr; *ap->p_next = '\0'; } + + /* + * Since we fill the buffer up one character at a time, + * this should not happen. We should always catch it when + * ap->remain == 2 (if not sooner due to a newline), flush + * the buffer and move on. One way this could happen is + * if someone sets PRINTF_BUFR_SIZE to 1 or something + * similarly silly. + */ + KASSERT(ap->remain > 2, ("Bad buffer logic, remain = %zd", + ap->remain)); } } @@ -381,26 +467,25 @@ putchar(int c, void *arg) struct putchar_arg *ap = (struct putchar_arg*) arg; struct tty *tp = ap->tty; int flags = ap->flags; + int putbuf_done = 0; /* Don't use the tty code after a panic or while in ddb. */ if (kdb_active) { if (c != '\0') cnputc(c); - } else if (panicstr || ((flags & TOCONS) && constty == NULL)) { - if (c != '\0') - putcons(c, ap); } else { - if ((flags & TOTTY) && tp != NULL) + if ((panicstr == NULL) && (flags & TOTTY) && (tp != NULL)) tty_putchar(tp, c); + if (flags & TOCONS) { - if (constty != NULL) - msgbuf_addchar(&consmsgbuf, c); - if (always_console_output && c != '\0') - putcons(c, ap); + putbuf(c, ap); + putbuf_done = 1; } } - if ((flags & TOLOG)) - msglogchar(c, ap->pri); + if ((flags & TOLOG) && (putbuf_done == 0)) { + if (c != '\0') + putbuf(c, ap); + } } /* @@ -890,6 +975,15 @@ msglogchar(int c, int pri) } } +static void +msglogstr(char *str, int pri, int filter_cr) +{ + if (!msgbufmapped) + return; + + msgbuf_addstr(msgbufp, pri, str, filter_cr); +} + void msgbufinit(void *ptr, int size) { diff --git a/sys/kern/subr_rman.c b/sys/kern/subr_rman.c index 3014b19..abd72c0 100644 --- a/sys/kern/subr_rman.c +++ b/sys/kern/subr_rman.c @@ -839,6 +839,7 @@ int_rman_release_resource(struct rman *rm, struct resource_i *r) * without freeing anything. */ r->r_flags &= ~RF_ALLOCATED; + r->r_dev = NULL; return 0; } diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c index 67774d8..c38177b 100644 --- a/sys/kern/subr_smp.c +++ b/sys/kern/subr_smp.c @@ -53,15 +53,15 @@ __FBSDID("$FreeBSD$"); #include "opt_sched.h" #ifdef SMP -volatile cpumask_t stopped_cpus; -volatile cpumask_t started_cpus; -cpumask_t hlt_cpus_mask; -cpumask_t logical_cpus_mask; +volatile cpuset_t stopped_cpus; +volatile cpuset_t started_cpus; +cpuset_t hlt_cpus_mask; +cpuset_t logical_cpus_mask; void (*cpustop_restartfunc)(void); #endif /* This is used in modules that need to work in both SMP and UP. */ -cpumask_t all_cpus; +cpuset_t all_cpus; int mp_ncpus; /* export this for libkvm consumers. */ @@ -200,8 +200,11 @@ forward_signal(struct thread *td) * */ static int -generic_stop_cpus(cpumask_t map, u_int type) +generic_stop_cpus(cpuset_t map, u_int type) { +#ifdef KTR + char cpusetbuf[CPUSETBUFSIZ]; +#endif static volatile u_int stopping_cpu = NOCPU; int i; @@ -216,7 +219,8 @@ generic_stop_cpus(cpumask_t map, u_int type) if (!smp_started) return (0); - CTR2(KTR_SMP, "stop_cpus(%x) with %u type", map, type); + CTR2(KTR_SMP, "stop_cpus(%s) with %u type", + cpusetobj_strprint(cpusetbuf, &map), type); if (stopping_cpu != PCPU_GET(cpuid)) while (atomic_cmpset_int(&stopping_cpu, NOCPU, @@ -228,7 +232,7 @@ generic_stop_cpus(cpumask_t map, u_int type) ipi_selected(map, type); i = 0; - while ((stopped_cpus & map) != map) { + while (!CPU_SUBSET(&stopped_cpus, &map)) { /* spin */ cpu_spinwait(); i++; @@ -245,14 +249,14 @@ generic_stop_cpus(cpumask_t map, u_int type) } int -stop_cpus(cpumask_t map) +stop_cpus(cpuset_t map) { return (generic_stop_cpus(map, IPI_STOP)); } int -stop_cpus_hard(cpumask_t map) +stop_cpus_hard(cpuset_t map) { return (generic_stop_cpus(map, IPI_STOP_HARD)); @@ -260,7 +264,7 @@ stop_cpus_hard(cpumask_t map) #if defined(__amd64__) int -suspend_cpus(cpumask_t map) +suspend_cpus(cpuset_t map) { return (generic_stop_cpus(map, IPI_SUSPEND)); @@ -281,19 +285,22 @@ suspend_cpus(cpumask_t map) * 1: ok */ int -restart_cpus(cpumask_t map) +restart_cpus(cpuset_t map) { +#ifdef KTR + char cpusetbuf[CPUSETBUFSIZ]; +#endif if (!smp_started) return 0; - CTR1(KTR_SMP, "restart_cpus(%x)", map); + CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map)); /* signal other cpus to restart */ - atomic_store_rel_int(&started_cpus, map); + CPU_COPY_STORE_REL(&map, &started_cpus); /* wait for each to clear its bit */ - while ((stopped_cpus & map) != 0) + while (CPU_OVERLAP(&stopped_cpus, &map)) cpu_spinwait(); return 1; @@ -348,11 +355,11 @@ smp_rendezvous_action(void) * cannot use a regular critical section however as having * critical_exit() preempt from this routine would also be * problematic (the preemption must not occur before the IPI - * has been acknowleged via an EOI). Instead, we + * has been acknowledged via an EOI). Instead, we * intentionally ignore td_owepreempt when leaving the - * critical setion. This should be harmless because we do not - * permit rendezvous action routines to schedule threads, and - * thus td_owepreempt should never transition from 0 to 1 + * critical section. This should be harmless because we do + * not permit rendezvous action routines to schedule threads, + * and thus td_owepreempt should never transition from 0 to 1 * during this routine. */ td = curthread; @@ -409,13 +416,13 @@ smp_rendezvous_action(void) } void -smp_rendezvous_cpus(cpumask_t map, +smp_rendezvous_cpus(cpuset_t map, void (* setup_func)(void *), void (* action_func)(void *), void (* teardown_func)(void *), void *arg) { - int i, ncpus = 0; + int curcpumap, i, ncpus = 0; if (!smp_started) { if (setup_func != NULL) @@ -428,11 +435,11 @@ smp_rendezvous_cpus(cpumask_t map, } CPU_FOREACH(i) { - if (((1 << i) & map) != 0) + if (CPU_ISSET(i, &map)) ncpus++; } if (ncpus == 0) - panic("ncpus is 0 with map=0x%x", map); + panic("ncpus is 0 with non-zero map"); mtx_lock_spin(&smp_ipi_mtx); @@ -452,10 +459,12 @@ smp_rendezvous_cpus(cpumask_t map, * Signal other processors, which will enter the IPI with * interrupts off. */ - ipi_selected(map & ~(1 << curcpu), IPI_RENDEZVOUS); + curcpumap = CPU_ISSET(curcpu, &map); + CPU_CLR(curcpu, &map); + ipi_selected(map, IPI_RENDEZVOUS); /* Check if the current CPU is in the map */ - if ((map & (1 << curcpu)) != 0) + if (curcpumap != 0) smp_rendezvous_action(); /* @@ -484,6 +493,7 @@ static struct cpu_group group[MAXCPU]; struct cpu_group * smp_topo(void) { + char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ]; struct cpu_group *top; /* @@ -530,9 +540,10 @@ smp_topo(void) if (top->cg_count != mp_ncpus) panic("Built bad topology at %p. CPU count %d != %d", top, top->cg_count, mp_ncpus); - if (top->cg_mask != all_cpus) - panic("Built bad topology at %p. CPU mask 0x%X != 0x%X", - top, top->cg_mask, all_cpus); + if (CPU_CMP(&top->cg_mask, &all_cpus)) + panic("Built bad topology at %p. CPU mask (%s) != (%s)", + top, cpusetobj_strprint(cpusetbuf, &top->cg_mask), + cpusetobj_strprint(cpusetbuf2, &all_cpus)); return (top); } @@ -557,11 +568,13 @@ static int smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share, int count, int flags, int start) { - cpumask_t mask; + char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ]; + cpuset_t mask; int i; - for (mask = 0, i = 0; i < count; i++, start++) - mask |= (1 << start); + CPU_ZERO(&mask); + for (i = 0; i < count; i++, start++) + CPU_SET(start, &mask); child->cg_parent = parent; child->cg_child = NULL; child->cg_children = 0; @@ -571,10 +584,12 @@ smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share, child->cg_mask = mask; parent->cg_children++; for (; parent != NULL; parent = parent->cg_parent) { - if ((parent->cg_mask & child->cg_mask) != 0) - panic("Duplicate children in %p. mask 0x%X child 0x%X", - parent, parent->cg_mask, child->cg_mask); - parent->cg_mask |= child->cg_mask; + if (CPU_OVERLAP(&parent->cg_mask, &child->cg_mask)) + panic("Duplicate children in %p. mask (%s) child (%s)", + parent, + cpusetobj_strprint(cpusetbuf, &parent->cg_mask), + cpusetobj_strprint(cpusetbuf2, &child->cg_mask)); + CPU_OR(&parent->cg_mask, &child->cg_mask); parent->cg_count += child->cg_count; } @@ -634,20 +649,20 @@ struct cpu_group * smp_topo_find(struct cpu_group *top, int cpu) { struct cpu_group *cg; - cpumask_t mask; + cpuset_t mask; int children; int i; - mask = (1 << cpu); + CPU_SETOF(cpu, &mask); cg = top; for (;;) { - if ((cg->cg_mask & mask) == 0) + if (!CPU_OVERLAP(&cg->cg_mask, &mask)) return (NULL); if (cg->cg_children == 0) return (cg); children = cg->cg_children; for (i = 0, cg = cg->cg_child; i < children; cg++, i++) - if ((cg->cg_mask & mask) != 0) + if (CPU_OVERLAP(&cg->cg_mask, &mask)) break; } return (NULL); @@ -655,7 +670,7 @@ smp_topo_find(struct cpu_group *top, int cpu) #else /* !SMP */ void -smp_rendezvous_cpus(cpumask_t map, +smp_rendezvous_cpus(cpuset_t map, void (*setup_func)(void *), void (*action_func)(void *), void (*teardown_func)(void *), diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c index a7f280a..ee36b35 100644 --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -829,10 +829,22 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data) case PT_ATTACH: /* security check done above */ + /* + * It would be nice if the tracing relationship was separate + * from the parent relationship but that would require + * another set of links in the proc struct or for "wait" + * to scan the entire proc table. To make life easier, + * we just re-parent the process we're trying to trace. + * The old parent is remembered so we can put things back + * on a "detach". + */ p->p_flag |= P_TRACED; p->p_oppid = p->p_pptr->p_pid; - if (p->p_pptr != td->td_proc) + if (p->p_pptr != td->td_proc) { + /* Remember that a child is being debugged(traced). */ + p->p_pptr->p_dbg_child++; proc_reparent(p, td->td_proc); + } data = SIGSTOP; goto sendsig; /* in PT_CONTINUE below */ @@ -919,11 +931,12 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data) PROC_UNLOCK(pp); PROC_LOCK(p); proc_reparent(p, pp); + p->p_pptr->p_dbg_child--; if (pp == initproc) p->p_sigparent = SIGCHLD; } - p->p_flag &= ~(P_TRACED | P_WAITED | P_FOLLOWFORK); p->p_oppid = 0; + p->p_flag &= ~(P_TRACED | P_WAITED | P_FOLLOWFORK); /* should we send SIGCHLD? */ /* childproc_continued(p); */ diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 54a050f..3334fc2 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1845,10 +1845,16 @@ dontblock: } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); - error = sbwait(&so->so_rcv); - if (error) { - SOCKBUF_UNLOCK(&so->so_rcv); - goto release; + /* + * We could receive some data while was notifying + * the protocol. Skip blocking in this case. + */ + if (so->so_rcv.sb_mb == NULL) { + error = sbwait(&so->so_rcv); + if (error) { + SOCKBUF_UNLOCK(&so->so_rcv); + goto release; + } } m = so->so_rcv.sb_mb; if (m != NULL) diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index a4bbdba..19aaee0 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -747,6 +747,10 @@ kern_sendit(td, s, mp, flags, control, segflg) return (error); so = (struct socket *)fp->f_data; +#ifdef KTRACE + if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT)) + ktrsockaddr(mp->msg_name); +#endif #ifdef MAC if (mp->msg_name != NULL) { error = mac_socket_check_connect(td->td_ucred, so, diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 00681ca..2743089 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -3999,10 +3999,11 @@ DB_SHOW_COMMAND(buffer, db_show_buffer) db_printf("b_flags = 0x%b\n", (u_int)bp->b_flags, PRINT_BUF_FLAGS); db_printf( "b_error = %d, b_bufsize = %ld, b_bcount = %ld, b_resid = %ld\n" - "b_bufobj = (%p), b_data = %p, b_blkno = %jd, b_dep = %p\n", + "b_bufobj = (%p), b_data = %p, b_blkno = %jd, b_lblkno = %jd, " + "b_dep = %p\n", bp->b_error, bp->b_bufsize, bp->b_bcount, bp->b_resid, bp->b_bufobj, bp->b_data, (intmax_t)bp->b_blkno, - bp->b_dep.lh_first); + (intmax_t)bp->b_lblkno, bp->b_dep.lh_first); if (bp->b_npages) { int i; db_printf("b_npages = %d, pages(OBJ, IDX, PA): ", bp->b_npages); diff --git a/sys/kgssapi/gss_impl.c b/sys/kgssapi/gss_impl.c index 01d940a..09b0a4b 100644 --- a/sys/kgssapi/gss_impl.c +++ b/sys/kgssapi/gss_impl.c @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include <kgssapi/gssapi_impl.h> #include <rpc/rpc.h> #include <rpc/rpc_com.h> +#include <rpc/rpcsec_gss.h> #include "gssd.h" #include "kgss_if.h" @@ -253,8 +254,40 @@ kgss_copy_buffer(const gss_buffer_t from, gss_buffer_t to) static int kgssapi_modevent(module_t mod, int type, void *data) { - - return (0); + int error = 0; + + switch (type) { + case MOD_LOAD: + rpc_gss_entries.rpc_gss_secfind = rpc_gss_secfind; + rpc_gss_entries.rpc_gss_secpurge = rpc_gss_secpurge; + rpc_gss_entries.rpc_gss_seccreate = rpc_gss_seccreate; + rpc_gss_entries.rpc_gss_set_defaults = rpc_gss_set_defaults; + rpc_gss_entries.rpc_gss_max_data_length = + rpc_gss_max_data_length; + rpc_gss_entries.rpc_gss_get_error = rpc_gss_get_error; + rpc_gss_entries.rpc_gss_mech_to_oid = rpc_gss_mech_to_oid; + rpc_gss_entries.rpc_gss_oid_to_mech = rpc_gss_oid_to_mech; + rpc_gss_entries.rpc_gss_qop_to_num = rpc_gss_qop_to_num; + rpc_gss_entries.rpc_gss_get_mechanisms = rpc_gss_get_mechanisms; + rpc_gss_entries.rpc_gss_get_versions = rpc_gss_get_versions; + rpc_gss_entries.rpc_gss_is_installed = rpc_gss_is_installed; + rpc_gss_entries.rpc_gss_set_svc_name = rpc_gss_set_svc_name; + rpc_gss_entries.rpc_gss_clear_svc_name = rpc_gss_clear_svc_name; + rpc_gss_entries.rpc_gss_getcred = rpc_gss_getcred; + rpc_gss_entries.rpc_gss_set_callback = rpc_gss_set_callback; + rpc_gss_entries.rpc_gss_clear_callback = rpc_gss_clear_callback; + rpc_gss_entries.rpc_gss_get_principal_name = + rpc_gss_get_principal_name; + rpc_gss_entries.rpc_gss_svc_max_data_length = + rpc_gss_svc_max_data_length; + break; + case MOD_UNLOAD: + /* Unloading of the kgssapi module isn't supported. */ + /* FALLTHROUGH */ + default: + error = EOPNOTSUPP; + }; + return (error); } static moduledata_t kgssapi_mod = { "kgssapi", diff --git a/sys/mips/cavium/octeon_ebt3000_cf.c b/sys/mips/cavium/octeon_ebt3000_cf.c index 7955d19..f5a44c4 100644 --- a/sys/mips/cavium/octeon_ebt3000_cf.c +++ b/sys/mips/cavium/octeon_ebt3000_cf.c @@ -104,12 +104,40 @@ __FBSDID("$FreeBSD$"); extern cvmx_bootinfo_t *octeon_bootinfo; /* Globals */ -int bus_width; +/* + * There's three bus types supported by this driver. + * + * CF_8 -- Traditional PC Card IDE interface on an 8-bit wide bus. We assume + * the bool loader has configure attribute memory properly. We then access + * the device like old-school 8-bit IDE card (which is all a traditional PC Card + * interface really is). + * CF_16 -- Traditional PC Card IDE interface on a 16-bit wide bus. Registers on + * this bus are 16-bits wide too. When accessing registers in the task file, you + * have to do it in 16-bit chunks, and worry about masking out what you don't want + * or ORing together the traditional 8-bit values. We assume the bootloader does + * the right attribute memory initialization dance. + * CF_TRUE_IDE_8 - CF Card wired to True IDE mode. There's no Attribute memory + * space at all. Instead all the traditional 8-bit registers are there, but + * on a 16-bit bus where addr0 isn't wired. This means we need to read/write them + * 16-bit chunks, but only the lower 8 bits are valid. We do not (and can not) + * access this like CF_16 with the comingled registers. Yet we can't access + * this like CF_8 because of the register offset. Except the TF_DATA register + * appears to be full width? + */ void *base_addr; +int bus_type; +#define CF_8 1 /* 8-bit bus, no offsets - PC Card */ +#define CF_16 2 /* 16-bit bus, registers shared - PC Card */ +#define CF_TRUE_IDE_8 3 /* 16-bit bus, only lower 8-bits, TrueIDE */ +const char *const cf_type[] = { + "impossible type", + "CF 8-bit", + "CF 16-bit", + "True IDE" +}; /* Device softc */ struct cf_priv { - device_t dev; struct drive_param *drive_param; @@ -230,9 +258,65 @@ static void cf_start (struct bio *bp) static int cf_ioctl (struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td) { - return (0); + return (0); +} + + +static uint8_t cf_inb_8(int port) +{ + /* + * Traditional 8-bit PC Card/CF bus access. + */ + if (bus_type == CF_8) { + volatile uint8_t *task_file = (volatile uint8_t *)base_addr; + return task_file[port]; + } + + /* + * True IDE access. lower 8 bits on a 16-bit bus (see above). + */ + volatile uint16_t *task_file = (volatile uint16_t *)base_addr; + return task_file[port] & 0xff; +} + +static void cf_outb_8(int port, uint8_t val) +{ + /* + * Traditional 8-bit PC Card/CF bus access. + */ + if (bus_type == CF_8) { + volatile uint8_t *task_file = (volatile uint8_t *)base_addr; + task_file[port] = val; + } + + /* + * True IDE access. lower 8 bits on a 16-bit bus (see above). + */ + volatile uint16_t *task_file = (volatile uint16_t *)base_addr; + task_file[port] = val & 0xff; +} + +static uint8_t cf_inb_16(int port) +{ + volatile uint16_t *task_file = (volatile uint16_t *)base_addr; + uint16_t val = task_file[port / 2]; + if (port & 1) + return (val >> 8) & 0xff; + return val & 0xff; } +static uint16_t cf_inw_16(int port) +{ + volatile uint16_t *task_file = (volatile uint16_t *)base_addr; + uint16_t val = task_file[port / 2]; + return val; +} + +static void cf_outw_16(int port, uint16_t val) +{ + volatile uint16_t *task_file = (volatile uint16_t *)base_addr; + task_file[port / 2] = val; +} /* ------------------------------------------------------------------- * * cf_cmd_read() * @@ -264,25 +348,29 @@ static int cf_cmd_read (uint32_t nr_sectors, uint32_t start_sector, void *buf) return (error); } - if (bus_width == 8) { - volatile uint8_t *task_file = (volatile uint8_t*)base_addr; - volatile uint8_t dummy; + switch (bus_type) + { + case CF_8: for (count = 0; count < SECTOR_SIZE; count++) { - *ptr_8++ = task_file[TF_DATA]; - if ((count & 0xf) == 0) dummy = task_file[TF_STATUS]; + *ptr_8++ = cf_inb_8(TF_DATA); + if ((count & 0xf) == 0) + (void)cf_inb_8(TF_STATUS); } - } else { - volatile uint16_t *task_file = (volatile uint16_t*)base_addr; - volatile uint16_t dummy; + break; + case CF_TRUE_IDE_8: + case CF_16: + default: for (count = 0; count < SECTOR_SIZE; count+=2) { uint16_t temp; - temp = task_file[TF_DATA]; + temp = cf_inw_16(TF_DATA); *ptr_16++ = SWAP_SHORT(temp); - if ((count & 0xf) == 0) dummy = task_file[TF_STATUS/2]; + if ((count & 0xf) == 0) + (void)cf_inb_16(TF_STATUS); } + break; } - lba ++; + lba++; } #ifdef OCTEON_VISUAL_CF_0 octeon_led_write_char(0, ' '); @@ -320,28 +408,28 @@ static int cf_cmd_write (uint32_t nr_sectors, uint32_t start_sector, void *buf) return (error); } - if (bus_width == 8) { - volatile uint8_t *task_file; - volatile uint8_t dummy; - - task_file = (volatile uint8_t *) base_addr; + switch (bus_type) + { + case CF_8: for (count = 0; count < SECTOR_SIZE; count++) { - task_file[TF_DATA] = *ptr_8++; - if ((count & 0xf) == 0) dummy = task_file[TF_STATUS]; + cf_outb_8(TF_DATA, *ptr_8++); + if ((count & 0xf) == 0) + (void)cf_inb_8(TF_STATUS); } - } else { - volatile uint16_t *task_file; - volatile uint16_t dummy; - - task_file = (volatile uint16_t *) base_addr; + break; + case CF_TRUE_IDE_8: + case CF_16: + default: for (count = 0; count < SECTOR_SIZE; count+=2) { uint16_t temp = *ptr_16++; - task_file[TF_DATA] = SWAP_SHORT(temp); - if ((count & 0xf) == 0) dummy = task_file[TF_STATUS/2]; + cf_outw_16(TF_DATA, SWAP_SHORT(temp)); + if ((count & 0xf) == 0) + (void)cf_inb_16(TF_STATUS); } + break; } - lba ++; + lba++; } #ifdef OCTEON_VISUAL_CF_1 octeon_led_write_char(1, ' '); @@ -361,59 +449,32 @@ static int cf_cmd_write (uint32_t nr_sectors, uint32_t start_sector, void *buf) static int cf_cmd_identify (void) { int count; - uint8_t status; int error; - if (bus_width == 8) { - volatile uint8_t *task_file; - - task_file = (volatile uint8_t *) base_addr; - - while ((status = task_file[TF_STATUS]) & STATUS_BSY) { - DELAY(WAIT_DELAY); - } - - task_file[TF_SECTOR_COUNT] = 0; - task_file[TF_SECTOR_NUMBER] = 0; - task_file[TF_CYL_LSB] = 0; - task_file[TF_CYL_MSB] = 0; - task_file[TF_DRV_HEAD] = 0; - task_file[TF_COMMAND] = CMD_IDENTIFY; - - error = cf_wait_busy(); - if (error == 0) { - for (count = 0; count < SECTOR_SIZE; count++) - drive_param.u.buf[count] = task_file[TF_DATA]; - } - } else { - volatile uint16_t *task_file; - - task_file = (volatile uint16_t *) base_addr; - - while ((status = (task_file[TF_STATUS/2]>>8)) & STATUS_BSY) { - DELAY(WAIT_DELAY); - } - - task_file[TF_SECTOR_COUNT/2] = 0; /* this includes TF_SECTOR_NUMBER */ - task_file[TF_CYL_LSB/2] = 0; /* this includes TF_CYL_MSB */ - task_file[TF_DRV_HEAD/2] = 0 | (CMD_IDENTIFY<<8); /* this includes TF_COMMAND */ - - error = cf_wait_busy(); - if (error == 0) { - for (count = 0; count < SECTOR_SIZE; count+=2) { - uint16_t temp; - temp = task_file[TF_DATA]; - - /* endianess will be swapped below */ - drive_param.u.buf[count] = (temp & 0xff); - drive_param.u.buf[count+1] = (temp & 0xff00)>>8; - } - } - } + error = cf_send_cmd(0, CMD_IDENTIFY); if (error != 0) { printf("%s: identify failed: %d\n", __func__, error); return (error); } + switch (bus_type) + { + case CF_8: + for (count = 0; count < SECTOR_SIZE; count++) + drive_param.u.buf[count] = cf_inb_8(TF_DATA); + break; + case CF_TRUE_IDE_8: + case CF_16: + default: + for (count = 0; count < SECTOR_SIZE; count += 2) { + uint16_t temp; + temp = cf_inw_16(TF_DATA); + + /* endianess will be swapped below */ + drive_param.u.buf[count] = (temp & 0xff); + drive_param.u.buf[count + 1] = (temp & 0xff00) >> 8; + } + break; + } cf_swap_ascii(drive_param.u.driveid.model, drive_param.model); @@ -423,6 +484,7 @@ static int cf_cmd_identify (void) drive_param.sec_track = SWAP_SHORT (drive_param.u.driveid.current_sectors); drive_param.nr_sectors = (uint32_t)SWAP_SHORT (drive_param.u.driveid.lba_size_1) | ((uint32_t)SWAP_SHORT (drive_param.u.driveid.lba_size_2)); + printf("cf0: <%s> %lld sectors\n", drive_param.model, (long long)drive_param.nr_sectors); return (0); } @@ -437,37 +499,27 @@ static int cf_cmd_identify (void) */ static int cf_send_cmd (uint32_t lba, uint8_t cmd) { - uint8_t status; - - if (bus_width == 8) { - volatile uint8_t *task_file; - - task_file = (volatile uint8_t *) base_addr; - - while ( (status = task_file[TF_STATUS]) & STATUS_BSY) { + switch (bus_type) + { + case CF_8: + case CF_TRUE_IDE_8: + while (cf_inb_8(TF_STATUS) & STATUS_BSY) DELAY(WAIT_DELAY); - } - - task_file[TF_SECTOR_COUNT] = 1; - task_file[TF_SECTOR_NUMBER] = (lba & 0xff); - task_file[TF_CYL_LSB] = ((lba >> 8) & 0xff); - task_file[TF_CYL_MSB] = ((lba >> 16) & 0xff); - task_file[TF_DRV_HEAD] = ((lba >> 24) & 0xff) | 0xe0; - task_file[TF_COMMAND] = cmd; - - } else { - volatile uint16_t *task_file; - - task_file = (volatile uint16_t *) base_addr; - - while ( (status = (task_file[TF_STATUS/2]>>8)) & STATUS_BSY) { + cf_outb_8(TF_SECTOR_COUNT, 1); + cf_outb_8(TF_SECTOR_NUMBER, lba & 0xff); + cf_outb_8(TF_CYL_LSB, (lba >> 8) & 0xff); + cf_outb_8(TF_CYL_MSB, (lba >> 16) & 0xff); + cf_outb_8(TF_DRV_HEAD, ((lba >> 24) & 0xff) | 0xe0); + cf_outb_8(TF_COMMAND, cmd); + break; + case CF_16: + default: + while (cf_inb_16(TF_STATUS) & STATUS_BSY) DELAY(WAIT_DELAY); - } - - task_file[TF_SECTOR_COUNT/2] = 1 | ((lba & 0xff) << 8); - task_file[TF_CYL_LSB/2] = ((lba >> 8) & 0xff) | (((lba >> 16) & 0xff) << 8); - task_file[TF_DRV_HEAD/2] = (((lba >> 24) & 0xff) | 0xe0) | (cmd << 8); - + cf_outw_16(TF_SECTOR_COUNT, 1 | ((lba & 0xff) << 8)); + cf_outw_16(TF_CYL_LSB, ((lba >> 8) & 0xff) | (((lba >> 16) & 0xff) << 8)); + cf_outw_16(TF_DRV_HEAD, (((lba >> 24) & 0xff) | 0xe0) | (cmd << 8)); + break; } return (cf_wait_busy()); @@ -499,32 +551,32 @@ static int cf_wait_busy (void) octeon_led_run_wheel(&where0, 2); #endif - if (bus_width == 8) { - volatile uint8_t *task_file; - task_file = (volatile uint8_t *)base_addr; - - status = task_file[TF_STATUS]; + switch (bus_type) + { + case CF_8: + case CF_TRUE_IDE_8: + status = cf_inb_8(TF_STATUS); while ((status & STATUS_BSY) == STATUS_BSY) { if ((status & STATUS_DF) != 0) { printf("%s: device fault (status=%x)\n", __func__, status); return (EIO); } DELAY(WAIT_DELAY); - status = task_file[TF_STATUS]; + status = cf_inb_8(TF_STATUS); } - } else { - volatile uint16_t *task_file; - task_file = (volatile uint16_t *)base_addr; - - status = task_file[TF_STATUS/2]>>8; + break; + case CF_16: + default: + status = cf_inb_16(TF_STATUS); while ((status & STATUS_BSY) == STATUS_BSY) { if ((status & STATUS_DF) != 0) { printf("%s: device fault (status=%x)\n", __func__, status); return (EIO); } DELAY(WAIT_DELAY); - status = (uint8_t)(task_file[TF_STATUS/2]>>8); + status = cf_inb_16(TF_STATUS); } + break; } if ((status & STATUS_DRQ) == 0) { printf("%s: device not ready (status=%x)\n", __func__, status); @@ -550,9 +602,8 @@ static void cf_swap_ascii (unsigned char str1[], char str2[]) { int i; - for(i = 0; i < MODEL_STR_SIZE; i++) { - str2[i] = str1[i^1]; - } + for(i = 0; i < MODEL_STR_SIZE; i++) + str2[i] = str1[i ^ 1]; } @@ -562,7 +613,8 @@ static void cf_swap_ascii (unsigned char str1[], char str2[]) static int cf_probe (device_t dev) { - if (octeon_is_simulation()) return 1; + if (octeon_is_simulation()) + return (ENXIO); if (device_get_unit(dev) != 0) { panic("can't attach more devices\n"); @@ -582,9 +634,9 @@ static int cf_probe (device_t dev) * inserted. * */ +typedef unsigned long long llu; static void cf_identify (driver_t *drv, device_t parent) { - uint8_t status; int bus_region; int count = 0; cvmx_mio_boot_reg_cfgx_t cfg; @@ -599,34 +651,39 @@ static void cf_identify (driver_t *drv, device_t parent) cfg.u64 = cvmx_read_csr(CVMX_MIO_BOOT_REG_CFGX(bus_region)); if (cfg.s.base == octeon_bootinfo->compact_flash_common_base_addr >> 16) { - bus_width = (cfg.s.width) ? 16: 8; - printf("Compact flash found in bootbus region %d (%d bit).\n", bus_region, bus_width); + if (octeon_bootinfo->compact_flash_attribute_base_addr == 0) + bus_type = CF_TRUE_IDE_8; + else + bus_type = (cfg.s.width) ? CF_16 : CF_8; + printf("Compact flash found in bootbus region %d (%s).\n", bus_region, cf_type[bus_type]); break; } } - if (bus_width == 8) { - volatile uint8_t *task_file; - task_file = (volatile uint8_t *) base_addr; + switch (bus_type) + { + case CF_8: + case CF_TRUE_IDE_8: /* Check if CF is inserted */ - while ( (status = task_file[TF_STATUS]) & STATUS_BSY){ - if ((count++) == NR_TRIES ) { + while (cf_inb_8(TF_STATUS) & STATUS_BSY) { + if ((count++) == NR_TRIES ) { printf("Compact Flash not present\n"); return; } DELAY(WAIT_DELAY); } - } else { - volatile uint16_t *task_file; - task_file = (volatile uint16_t *) base_addr; + break; + case CF_16: + default: /* Check if CF is inserted */ - while ( (status = (task_file[TF_STATUS/2]>>8)) & STATUS_BSY){ - if ((count++) == NR_TRIES ) { + while (cf_inb_16(TF_STATUS) & STATUS_BSY) { + if ((count++) == NR_TRIES ) { printf("Compact Flash not present\n"); return; } DELAY(WAIT_DELAY); } + break; } BUS_ADD_CHILD(parent, 0, "cf", 0); @@ -655,7 +712,7 @@ static int cf_attach_geom (void *arg, int flag) * ------------------------------------------------------------------- */ static void cf_attach_geom_proxy (void *arg, int flag) { - cf_attach_geom(arg, flag); + cf_attach_geom(arg, flag); } @@ -668,7 +725,8 @@ static int cf_attach (device_t dev) { struct cf_priv *cf_priv; - if (octeon_is_simulation()) return 1; + if (octeon_is_simulation()) + return (ENXIO); cf_priv = device_get_softc(dev); cf_priv->dev = dev; @@ -701,4 +759,3 @@ static driver_t cf_driver = { static devclass_t cf_devclass; DRIVER_MODULE(cf, nexus, cf_driver, cf_devclass, 0, 0); - diff --git a/sys/mips/cavium/octeon_mp.c b/sys/mips/cavium/octeon_mp.c index 78eafa6..efddee8 100644 --- a/sys/mips/cavium/octeon_mp.c +++ b/sys/mips/cavium/octeon_mp.c @@ -102,10 +102,18 @@ platform_init_ap(int cpuid) mips_wbflush(); } -cpumask_t -platform_cpu_mask(void) +void +platform_cpu_mask(cpuset_t *mask) { - return (octeon_bootinfo->core_mask); + + CPU_ZERO(mask); + + /* + * XXX: hack in order to simplify CPU set building, assuming that + * core_mask is 32-bits. + */ + memcpy(mask, &octeon_bootinfo->core_mask, + sizeof(octeon_bootinfo->core_mask)); } struct cpu_group * diff --git a/sys/mips/include/_types.h b/sys/mips/include/_types.h index 4d57e20..2f23db6 100644 --- a/sys/mips/include/_types.h +++ b/sys/mips/include/_types.h @@ -73,7 +73,6 @@ typedef unsigned long long __uint64_t; * Standard type definitions. */ typedef __int32_t __clock_t; /* clock()... */ -typedef unsigned int __cpumask_t; typedef double __double_t; typedef double __float_t; #ifdef __mips_n64 diff --git a/sys/mips/include/hwfunc.h b/sys/mips/include/hwfunc.h index 683aedb..a9e3285 100644 --- a/sys/mips/include/hwfunc.h +++ b/sys/mips/include/hwfunc.h @@ -28,6 +28,8 @@ #ifndef _MACHINE_HWFUNC_H_ #define _MACHINE_HWFUNC_H_ +#include <sys/_cpuset.h> + struct trapframe; struct timecounter; /* @@ -91,7 +93,7 @@ extern int platform_processor_id(void); /* * Return the cpumask of available processors. */ -extern cpumask_t platform_cpu_mask(void); +extern void platform_cpu_mask(cpuset_t *mask); /* * Return the topology of processors on this platform diff --git a/sys/mips/include/pmap.h b/sys/mips/include/pmap.h index e710635..90375eb 100644 --- a/sys/mips/include/pmap.h +++ b/sys/mips/include/pmap.h @@ -58,6 +58,7 @@ #ifndef LOCORE #include <sys/queue.h> +#include <sys/_cpuset.h> #include <sys/_lock.h> #include <sys/_mutex.h> @@ -83,7 +84,7 @@ struct pmap { pd_entry_t *pm_segtab; /* KVA of segment table */ TAILQ_HEAD(, pv_entry) pm_pvlist; /* list of mappings in * pmap */ - cpumask_t pm_active; /* active on cpus */ + cpuset_t pm_active; /* active on cpus */ struct { u_int32_t asid:ASID_BITS; /* TLB address space tag */ u_int32_t gen:ASIDGEN_BITS; /* its generation number */ diff --git a/sys/mips/include/smp.h b/sys/mips/include/smp.h index 58aaf03..0fcca9a 100644 --- a/sys/mips/include/smp.h +++ b/sys/mips/include/smp.h @@ -17,6 +17,8 @@ #ifdef _KERNEL +#include <sys/_cpuset.h> + #include <machine/pcb.h> /* @@ -33,7 +35,7 @@ void ipi_all_but_self(int ipi); void ipi_cpu(int cpu, u_int ipi); -void ipi_selected(cpumask_t cpus, int ipi); +void ipi_selected(cpuset_t cpus, int ipi); void smp_init_secondary(u_int32_t cpuid); void mpentry(void); diff --git a/sys/mips/mips/mp_machdep.c b/sys/mips/mips/mp_machdep.c index e945736..79a3476 100644 --- a/sys/mips/mips/mp_machdep.c +++ b/sys/mips/mips/mp_machdep.c @@ -29,6 +29,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <sys/cpuset.h> #include <sys/ktr.h> #include <sys/proc.h> #include <sys/lock.h> @@ -80,15 +81,16 @@ ipi_all_but_self(int ipi) /* Send an IPI to a set of cpus. */ void -ipi_selected(cpumask_t cpus, int ipi) +ipi_selected(cpuset_t cpus, int ipi) { struct pcpu *pc; - CTR3(KTR_SMP, "%s: cpus: %x, ipi: %x\n", __func__, cpus, ipi); - - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { - if ((cpus & pc->pc_cpumask) != 0) + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { + if (CPU_OVERLAP(&cpus, &pc->pc_cpumask)) { + CTR3(KTR_SMP, "%s: pc: %p, ipi: %x\n", __func__, pc, + ipi); ipi_send(pc, ipi); + } } } @@ -108,7 +110,7 @@ static int mips_ipi_handler(void *arg) { int cpu; - cpumask_t cpumask; + cpuset_t cpumask; u_int ipi, ipi_bitmap; int bit; @@ -148,14 +150,14 @@ mips_ipi_handler(void *arg) tlb_save(); /* Indicate we are stopped */ - atomic_set_int(&stopped_cpus, cpumask); + CPU_OR_ATOMIC(&stopped_cpus, &cpumask); /* Wait for restart */ - while ((started_cpus & cpumask) == 0) + while (!CPU_OVERLAP(&started_cpus, &cpumask)) cpu_spinwait(); - atomic_clear_int(&started_cpus, cpumask); - atomic_clear_int(&stopped_cpus, cpumask); + CPU_NAND_ATOMIC(&started_cpus, &cpumask); + CPU_NAND_ATOMIC(&stopped_cpus, &cpumask); CTR0(KTR_SMP, "IPI_STOP (restart)"); break; case IPI_PREEMPT: @@ -200,14 +202,22 @@ start_ap(int cpuid) void cpu_mp_setmaxid(void) { - cpumask_t cpumask; - - cpumask = platform_cpu_mask(); - mp_ncpus = bitcount32(cpumask); + cpuset_t cpumask; + int cpu, last; + + platform_cpu_mask(&cpumask); + mp_ncpus = 0; + last = 1; + while ((cpu = cpusetobj_ffs(&cpumask)) != 0) { + last = cpu; + cpu--; + CPU_CLR(cpu, &cpumask); + mp_ncpus++; + } if (mp_ncpus <= 0) mp_ncpus = 1; - mp_maxid = min(fls(cpumask), MAXCPU) - 1; + mp_maxid = min(last, MAXCPU) - 1; } void @@ -233,16 +243,16 @@ void cpu_mp_start(void) { int error, cpuid; - cpumask_t cpumask; + cpuset_t cpumask, ocpus; mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); - all_cpus = 0; - cpumask = platform_cpu_mask(); + CPU_ZERO(&all_cpus); + platform_cpu_mask(&cpumask); - while (cpumask != 0) { - cpuid = ffs(cpumask) - 1; - cpumask &= ~(1 << cpuid); + while (!CPU_EMPTY(&cpumask)) { + cpuid = cpusetobj_ffs(&cpumask) - 1; + CPU_CLR(cpuid, &cpumask); if (cpuid >= MAXCPU) { printf("cpu_mp_start: ignoring AP #%d.\n", cpuid); @@ -257,15 +267,19 @@ cpu_mp_start(void) if (bootverbose) printf("AP #%d started!\n", cpuid); } - all_cpus |= 1 << cpuid; + CPU_SET(cpuid, &all_cpus); } - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + ocpus = all_cpus; + CPU_CLR(PCPU_GET(cpuid), &ocpus); + PCPU_SET(other_cpus, ocpus); } void smp_init_secondary(u_int32_t cpuid) { + cpuset_t ocpus; + /* TLB */ mips_wr_wired(0); tlb_invalidate_all(); @@ -303,7 +317,9 @@ smp_init_secondary(u_int32_t cpuid) CTR1(KTR_SMP, "SMP: AP CPU #%d launched", PCPU_GET(cpuid)); /* Build our map of 'other' CPUs. */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + ocpus = all_cpus; + CPU_CLR(PCPU_GET(cpuid), &ocpus); + PCPU_SET(other_cpus, ocpus); if (bootverbose) printf("SMP: AP CPU #%d launched.\n", PCPU_GET(cpuid)); diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c index 7f0f4f0..f7ea660 100644 --- a/sys/mips/mips/pmap.c +++ b/sys/mips/mips/pmap.c @@ -471,7 +471,7 @@ pmap_create_kernel_pagetable(void) PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_segtab = kernel_segmap; - kernel_pmap->pm_active = ~0; + CPU_FILL(&kernel_pmap->pm_active); TAILQ_INIT(&kernel_pmap->pm_pvlist); kernel_pmap->pm_asid[0].asid = PMAP_ASID_RESERVED; kernel_pmap->pm_asid[0].gen = 0; @@ -630,10 +630,14 @@ pmap_invalidate_all_local(pmap_t pmap) tlb_invalidate_all(); return; } - if (pmap->pm_active & PCPU_GET(cpumask)) + sched_pin(); + if (CPU_OVERLAP(&pmap->pm_active, PCPU_PTR(cpumask))) { + sched_unpin(); tlb_invalidate_all_user(pmap); - else + } else { + sched_unpin(); pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; + } } #ifdef SMP @@ -667,12 +671,16 @@ pmap_invalidate_page_local(pmap_t pmap, vm_offset_t va) tlb_invalidate_address(pmap, va); return; } - if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) + sched_pin(); + if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) { + sched_unpin(); return; - else if (!(pmap->pm_active & PCPU_GET(cpumask))) { + } else if (!CPU_OVERLAP(&pmap->pm_active, PCPU_PTR(cpumask))) { pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; + sched_unpin(); return; } + sched_unpin(); tlb_invalidate_address(pmap, va); } @@ -716,12 +724,16 @@ pmap_update_page_local(pmap_t pmap, vm_offset_t va, pt_entry_t pte) tlb_update(pmap, va, pte); return; } - if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) + sched_pin(); + if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) { + sched_unpin(); return; - else if (!(pmap->pm_active & PCPU_GET(cpumask))) { + } else if (!CPU_OVERLAP(&pmap->pm_active, PCPU_PTR(cpumask))) { pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; + sched_unpin(); return; } + sched_unpin(); tlb_update(pmap, va, pte); } @@ -1041,7 +1053,7 @@ pmap_pinit0(pmap_t pmap) PMAP_LOCK_INIT(pmap); pmap->pm_segtab = kernel_segmap; - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); pmap->pm_ptphint = NULL; for (i = 0; i < MAXCPU; i++) { pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; @@ -1102,7 +1114,7 @@ pmap_pinit(pmap_t pmap) ptdva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(ptdpg)); pmap->pm_segtab = (pd_entry_t *)ptdva; - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); pmap->pm_ptphint = NULL; for (i = 0; i < MAXCPU; i++) { pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; @@ -2948,8 +2960,8 @@ pmap_activate(struct thread *td) oldpmap = PCPU_GET(curpmap); if (oldpmap) - atomic_clear_32(&oldpmap->pm_active, PCPU_GET(cpumask)); - atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask)); + CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask)); pmap_asid_alloc(pmap); if (td == curthread) { PCPU_SET(segbase, pmap->pm_segtab); @@ -3283,7 +3295,7 @@ pmap_kextract(vm_offset_t va) pt_entry_t *ptep; /* Is the kernel pmap initialized? */ - if (kernel_pmap->pm_active) { + if (!CPU_EMPTY(&kernel_pmap->pm_active)) { /* It's inside the virtual address range */ ptep = pmap_pte(kernel_pmap, va); if (ptep) { diff --git a/sys/mips/rmi/xlr_machdep.c b/sys/mips/rmi/xlr_machdep.c index 4a1734a..836c605 100644 --- a/sys/mips/rmi/xlr_machdep.c +++ b/sys/mips/rmi/xlr_machdep.c @@ -614,11 +614,15 @@ platform_processor_id(void) return (xlr_hwtid_to_cpuid[xlr_cpu_id()]); } -cpumask_t -platform_cpu_mask(void) +void +platform_cpu_mask(cpuset_t *mask) { + int i, s; - return (~0U >> (32 - (xlr_ncores * xlr_threads_per_core))); + CPU_ZERO(mask); + s = xlr_ncores * xlr_threads_per_core; + for (i = 0; i < s; i++) + CPU_SET(i, mask); } struct cpu_group * diff --git a/sys/mips/sibyte/sb_scd.c b/sys/mips/sibyte/sb_scd.c index e5ac23c..50b9987 100644 --- a/sys/mips/sibyte/sb_scd.c +++ b/sys/mips/sibyte/sb_scd.c @@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <sys/module.h> #include <sys/bus.h> +#include <sys/cpuset.h> #include <machine/resource.h> #include <machine/hwfunc.h> @@ -242,11 +243,15 @@ sb_clear_mailbox(int cpu, uint64_t val) sb_store64(regaddr, val); } -cpumask_t -platform_cpu_mask(void) +void +platform_cpu_mask(cpuset_t *mask) { + int i, s; - return (~0U >> (32 - SYSREV_NUM_PROCESSORS(sb_read_sysrev()))); + CPU_ZERO(mask); + s = SYSREV_NUM_PROCESSORS(sb_read_sysrev()); + for (i = 0; i < s; i++) + CPU_SET(i, mask); } #endif /* SMP */ diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 22e0f20..8775e19 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -156,6 +156,8 @@ SUBDIR= ${_3dfx} \ jme \ joy \ kbdmux \ + kgssapi \ + kgssapi_krb5 \ khelp \ krpc \ ksyms \ diff --git a/sys/modules/ath/Makefile b/sys/modules/ath/Makefile index b66ec09..49f7fa6 100644 --- a/sys/modules/ath/Makefile +++ b/sys/modules/ath/Makefile @@ -116,6 +116,12 @@ SRCS+= ar9280.c ar9280_attach.c ar9280_olc.c SRCS+= ar9285.c ar9285_reset.c ar9285_attach.c ar9285_cal.c ar9285_phy.c SRCS+= ar9285_diversity.c +# + AR9287 - Kiwi +.PATH: ${.CURDIR}/../../dev/ath/ath_hal +SRCS+= ah_eeprom_9287.c +.PATH: ${.CURDIR}/../../dev/ath/ath_hal/ar9002 +SRCS+= ar9287.c ar9287_reset.c ar9287_attach.c ar9287_cal.c ar9287_olc.c + # NB: rate control is bound to the driver by symbol names so only pick one .if ${ATH_RATE} == "sample" .PATH: ${.CURDIR}/../../dev/ath/ath_rate/sample @@ -128,6 +134,10 @@ SRCS+= onoe.c SRCS+= amrr.c .endif +# DFS +.PATH: ${.CURDIR}/../../dev/ath/ath_dfs/null +SRCS+= dfs_null.c + CFLAGS+= -I. -I${.CURDIR}/../../dev/ath -I${.CURDIR}/../../dev/ath/ath_hal opt_ah.h: diff --git a/sys/modules/cxgbe/if_cxgbe/Makefile b/sys/modules/cxgbe/if_cxgbe/Makefile index a524cde..ac1d22c 100644 --- a/sys/modules/cxgbe/if_cxgbe/Makefile +++ b/sys/modules/cxgbe/if_cxgbe/Makefile @@ -6,7 +6,7 @@ CXGBE = ${.CURDIR}/../../../dev/cxgbe .PATH: ${CXGBE} ${CXGBE}/common KMOD = if_cxgbe -SRCS = t4_main.c t4_sge.c +SRCS = t4_main.c t4_sge.c t4_l2t.c SRCS+= t4_hw.c SRCS+= device_if.h bus_if.h pci_if.h SRCS+= opt_inet.h diff --git a/sys/modules/dtrace/Makefile b/sys/modules/dtrace/Makefile index 09fe55e..316b859 100644 --- a/sys/modules/dtrace/Makefile +++ b/sys/modules/dtrace/Makefile @@ -4,6 +4,7 @@ .include "Makefile.inc" SUBDIR= dtmalloc \ + dtnfscl \ dtnfsclient \ dtrace \ dtraceall \ diff --git a/sys/modules/dtrace/dtnfscl/Makefile b/sys/modules/dtrace/dtnfscl/Makefile new file mode 100644 index 0000000..0296232 --- /dev/null +++ b/sys/modules/dtrace/dtnfscl/Makefile @@ -0,0 +1,13 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../../fs/nfsclient + +KMOD= dtnfscl +SRCS= nfs_clkdtrace.c \ + vnode_if.h + +CFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris \ + -I${.CURDIR}/../../../cddl/contrib/opensolaris/uts/common \ + -I${.CURDIR}/../../.. + +.include <bsd.kmod.mk> diff --git a/sys/modules/dtrace/dtraceall/dtraceall.c b/sys/modules/dtrace/dtraceall/dtraceall.c index 76172b7..61896bf 100644 --- a/sys/modules/dtrace/dtraceall/dtraceall.c +++ b/sys/modules/dtrace/dtraceall/dtraceall.c @@ -66,6 +66,7 @@ MODULE_DEPEND(dtraceall, cyclic, 1, 1, 1); MODULE_DEPEND(dtraceall, opensolaris, 1, 1, 1); MODULE_DEPEND(dtraceall, dtrace, 1, 1, 1); MODULE_DEPEND(dtraceall, dtmalloc, 1, 1, 1); +MODULE_DEPEND(dtraceall, dtnfscl, 1, 1, 1); MODULE_DEPEND(dtraceall, dtnfsclient, 1, 1, 1); #if defined(__amd64__) || defined(__i386__) MODULE_DEPEND(dtraceall, fbt, 1, 1, 1); diff --git a/sys/modules/kgssapi_krb5/Makefile b/sys/modules/kgssapi_krb5/Makefile index c2ee417..e5c3e30 100644 --- a/sys/modules/kgssapi_krb5/Makefile +++ b/sys/modules/kgssapi_krb5/Makefile @@ -8,7 +8,8 @@ SRCS= krb5_mech.c \ kcrypto_des.c \ kcrypto_des3.c \ kcrypto_aes.c \ - kcrypto_arcfour.c + kcrypto_arcfour.c \ + opt_inet6.h SRCS+= kgss_if.h gssd.h MFILES= kgssapi/kgss_if.m diff --git a/sys/modules/usb/Makefile b/sys/modules/usb/Makefile index 38f56cb..6288d66 100644 --- a/sys/modules/usb/Makefile +++ b/sys/modules/usb/Makefile @@ -30,7 +30,7 @@ SUBDIR += ehci musb ohci uhci xhci uss820dci ${_at91dci} ${_atmegadci} SUBDIR += rum run uath upgt ural zyd ${_urtw} SUBDIR += atp uhid ukbd ums udbp ufm uep SUBDIR += ucom u3g uark ubsa ubser uchcom ucycom ufoma uftdi ugensa uipaq ulpt \ - umct umodem umoscom uplcom uslcom uvisor uvscom + umct umcs umodem umoscom uplcom uslcom uvisor uvscom SUBDIR += uether aue axe cdce cue kue mos rue udav uhso ipheth SUBDIR += usfs umass urio SUBDIR += quirk template diff --git a/sys/modules/usb/umcs/Makefile b/sys/modules/usb/umcs/Makefile new file mode 100644 index 0000000..be75fb5 --- /dev/null +++ b/sys/modules/usb/umcs/Makefile @@ -0,0 +1,36 @@ +# +# $FreeBSD$ +# +# Copyright (c) 2011 Hans Petter Selasky. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# + +S= ${.CURDIR}/../../.. + +.PATH: $S/dev/usb/serial + +KMOD= umcs +SRCS= opt_bus.h opt_usb.h device_if.h bus_if.h usb_if.h usbdevs.h \ + umcs.c + +.include <bsd.kmod.mk> diff --git a/sys/net/bridgestp.c b/sys/net/bridgestp.c index 2993838..e263b0b 100644 --- a/sys/net/bridgestp.c +++ b/sys/net/bridgestp.c @@ -1860,6 +1860,8 @@ bstp_tick(void *arg) if (bs->bs_running == 0) return; + CURVNET_SET(bs->bs_vnet); + /* slow timer to catch missed link events */ if (bstp_timer_expired(&bs->bs_link_timer)) { LIST_FOREACH(bp, &bs->bs_bplist, bp_next) @@ -1893,6 +1895,8 @@ bstp_tick(void *arg) bp->bp_txcount--; } + CURVNET_RESTORE(); + callout_reset(&bs->bs_bstpcallout, hz, bstp_tick, bs); } @@ -2126,6 +2130,7 @@ bstp_attach(struct bstp_state *bs, struct bstp_cb_ops *cb) bs->bs_protover = BSTP_PROTO_RSTP; bs->bs_state_cb = cb->bcb_state; bs->bs_rtage_cb = cb->bcb_rtage; + bs->bs_vnet = curvnet; getmicrotime(&bs->bs_last_tc_time); diff --git a/sys/net/bridgestp.h b/sys/net/bridgestp.h index 74086fc..fdf16aa 100644 --- a/sys/net/bridgestp.h +++ b/sys/net/bridgestp.h @@ -358,6 +358,7 @@ struct bstp_state { LIST_HEAD(, bstp_port) bs_bplist; bstp_state_cb_t bs_state_cb; bstp_rtage_cb_t bs_rtage_cb; + struct vnet *bs_vnet; }; #define BSTP_LOCK_INIT(_bs) mtx_init(&(_bs)->bs_mtx, "bstp", NULL, MTX_DEF) diff --git a/sys/net/if.h b/sys/net/if.h index d291da8..06521cb 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -199,6 +199,13 @@ struct if_data { * field. IFCAP_* and CSUM_* do not match one to one and CSUM_* may be * more detailed or differenciated than IFCAP_*. * Hwassist features are defined CSUM_* in sys/mbuf.h + * + * Capabilities that cannot be arbitrarily changed with ifconfig/ioctl + * are listed in IFCAP_CANTCHANGE, similar to IFF_CANTCHANGE. + * This is not strictly necessary because the common code never + * changes capabilities, and it is left to the individual driver + * to do the right thing. However, having the filter here + * avoids replication of the same code in all individual drivers. */ #define IFCAP_RXCSUM 0x00001 /* can offload checksum on RX */ #define IFCAP_TXCSUM 0x00002 /* can offload checksum on TX */ @@ -220,12 +227,15 @@ struct if_data { #define IFCAP_POLLING_NOCOUNT 0x20000 /* polling ticks cannot be fragmented */ #define IFCAP_VLAN_HWTSO 0x40000 /* can do IFCAP_TSO on VLANs */ #define IFCAP_LINKSTATE 0x80000 /* the runtime link state is dynamic */ +#define IFCAP_NETMAP 0x100000 /* netmap mode supported/enabled */ #define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM) #define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6) #define IFCAP_WOL (IFCAP_WOL_UCAST | IFCAP_WOL_MCAST | IFCAP_WOL_MAGIC) #define IFCAP_TOE (IFCAP_TOE4 | IFCAP_TOE6) +#define IFCAP_CANTCHANGE (IFCAP_NETMAP) + #define IFQ_MAXLEN 50 #define IFNET_SLOWHZ 1 /* granularity is 1 second */ diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 5e5a548..be90f5a 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -561,7 +561,7 @@ ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared) * mbuf chain m with the ethernet header at the front. */ static void -ether_input(struct ifnet *ifp, struct mbuf *m) +ether_input_internal(struct ifnet *ifp, struct mbuf *m) { struct ether_header *eh; u_short etype; @@ -755,6 +755,46 @@ ether_input(struct ifnet *ifp, struct mbuf *m) } /* + * Ethernet input dispatch; by default, direct dispatch here regardless of + * global configuration. + */ +static void +ether_nh_input(struct mbuf *m) +{ + + ether_input_internal(m->m_pkthdr.rcvif, m); +} + +static struct netisr_handler ether_nh = { + .nh_name = "ether", + .nh_handler = ether_nh_input, + .nh_proto = NETISR_ETHER, + .nh_policy = NETISR_POLICY_SOURCE, + .nh_dispatch = NETISR_DISPATCH_DIRECT, +}; + +static void +ether_init(__unused void *arg) +{ + + netisr_register(ðer_nh); +} +SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL); + +static void +ether_input(struct ifnet *ifp, struct mbuf *m) +{ + + /* + * We will rely on rcvif being set properly in the deferred context, + * so assert it is correct here. + */ + KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch", __func__)); + + netisr_dispatch(NETISR_ETHER, m); +} + +/* * Upper layer processing for a received Ethernet packet. */ void diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c index de968af..5f8156b 100644 --- a/sys/net/if_gre.c +++ b/sys/net/if_gre.c @@ -48,6 +48,7 @@ #include <sys/param.h> #include <sys/jail.h> #include <sys/kernel.h> +#include <sys/libkern.h> #include <sys/malloc.h> #include <sys/module.h> #include <sys/mbuf.h> @@ -91,6 +92,14 @@ #define GRENAME "gre" +#define MTAG_COOKIE_GRE 1307983903 +#define MTAG_GRE_NESTING 1 +struct mtag_gre_nesting { + uint16_t count; + uint16_t max; + struct ifnet *ifp[]; +}; + /* * gre_mtx protects all global variables in if_gre.c. * XXX: gre_softc data not protected yet. @@ -196,7 +205,6 @@ gre_clone_create(ifc, unit, params) sc->g_proto = IPPROTO_GRE; GRE2IFP(sc)->if_flags |= IFF_LINK0; sc->encap = NULL; - sc->called = 0; sc->gre_fibnum = curthread->td_proc->p_fibnum; sc->wccp_ver = WCCP_V1; sc->key = 0; @@ -240,23 +248,77 @@ gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct gre_softc *sc = ifp->if_softc; struct greip *gh; struct ip *ip; + struct m_tag *mtag; + struct mtag_gre_nesting *gt; + size_t len; u_short gre_ip_id = 0; uint8_t gre_ip_tos = 0; u_int16_t etype = 0; struct mobile_h mob_h; u_int32_t af; - int extra = 0; + int extra = 0, max; /* - * gre may cause infinite recursion calls when misconfigured. - * We'll prevent this by introducing upper limit. + * gre may cause infinite recursion calls when misconfigured. High + * nesting level may cause stack exhaustion. We'll prevent this by + * detecting loops and by introducing upper limit. */ - if (++(sc->called) > max_gre_nesting) { - printf("%s: gre_output: recursively called too many " - "times(%d)\n", if_name(GRE2IFP(sc)), sc->called); - m_freem(m); - error = EIO; /* is there better errno? */ - goto end; + mtag = m_tag_locate(m, MTAG_COOKIE_GRE, MTAG_GRE_NESTING, NULL); + if (mtag != NULL) { + struct ifnet **ifp2; + + gt = (struct mtag_gre_nesting *)(mtag + 1); + gt->count++; + if (gt->count > min(gt->max,max_gre_nesting)) { + printf("%s: hit maximum recursion limit %u on %s\n", + __func__, gt->count - 1, ifp->if_xname); + m_freem(m); + error = EIO; /* is there better errno? */ + goto end; + } + + ifp2 = gt->ifp; + for (max = gt->count - 1; max > 0; max--) { + if (*ifp2 == ifp) + break; + ifp2++; + } + if (*ifp2 == ifp) { + printf("%s: detected loop with nexting %u on %s\n", + __func__, gt->count-1, ifp->if_xname); + m_freem(m); + error = EIO; /* is there better errno? */ + goto end; + } + *ifp2 = ifp; + + } else { + /* + * Given that people should NOT increase max_gre_nesting beyond + * their real needs, we allocate once per packet rather than + * allocating an mtag once per passing through gre. + * + * Note: the sysctl does not actually check for saneness, so we + * limit the maximum numbers of possible recursions here. + */ + max = imin(max_gre_nesting, 256); + /* If someone sets the sysctl <= 0, we want at least 1. */ + max = imax(max, 1); + len = sizeof(struct mtag_gre_nesting) + + max * sizeof(struct ifnet *); + mtag = m_tag_alloc(MTAG_COOKIE_GRE, MTAG_GRE_NESTING, len, + M_NOWAIT); + if (mtag == NULL) { + m_freem(m); + error = ENOMEM; + goto end; + } + gt = (struct mtag_gre_nesting *)(mtag + 1); + bzero(gt, len); + gt->count = 1; + gt->max = max; + *gt->ifp = ifp; + m_tag_prepend(m, mtag); } if (!((ifp->if_flags & IFF_UP) && @@ -444,7 +506,6 @@ gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, error = ip_output(m, NULL, &sc->route, IP_FORWARDING, (struct ip_moptions *)NULL, (struct inpcb *)NULL); end: - sc->called = 0; if (error) ifp->if_oerrors++; return (error); diff --git a/sys/net/if_gre.h b/sys/net/if_gre.h index e23daef..74d16b1 100644 --- a/sys/net/if_gre.h +++ b/sys/net/if_gre.h @@ -61,8 +61,6 @@ struct gre_softc { const struct encaptab *encap; /* encapsulation cookie */ - int called; /* infinite recursion preventer */ - uint32_t key; /* key included in outgoing GRE packets */ /* zero means none */ diff --git a/sys/net/if_tun.c b/sys/net/if_tun.c index 4e727d9..49a5249 100644 --- a/sys/net/if_tun.c +++ b/sys/net/if_tun.c @@ -126,7 +126,7 @@ static void tunclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev); static void tuncreate(const char *name, struct cdev *dev); static int tunifioctl(struct ifnet *, u_long, caddr_t); -static int tuninit(struct ifnet *); +static void tuninit(struct ifnet *); static int tunmodevent(module_t, int, void *); static int tunoutput(struct ifnet *, struct mbuf *, struct sockaddr *, struct route *ro); @@ -494,14 +494,13 @@ tunclose(struct cdev *dev, int foo, int bar, struct thread *td) return (0); } -static int +static void tuninit(struct ifnet *ifp) { struct tun_softc *tp = ifp->if_softc; #ifdef INET struct ifaddr *ifa; #endif - int error = 0; TUNDEBUG(ifp, "tuninit\n"); @@ -528,7 +527,6 @@ tuninit(struct ifnet *ifp) if_addr_runlock(ifp); #endif mtx_unlock(&tp->tun_mtx); - return (error); } /* @@ -552,12 +550,12 @@ tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data) mtx_unlock(&tp->tun_mtx); break; case SIOCSIFADDR: - error = tuninit(ifp); - TUNDEBUG(ifp, "address set, error=%d\n", error); + tuninit(ifp); + TUNDEBUG(ifp, "address set\n"); break; case SIOCSIFDSTADDR: - error = tuninit(ifp); - TUNDEBUG(ifp, "destination address set, error=%d\n", error); + tuninit(ifp); + TUNDEBUG(ifp, "destination address set\n"); break; case SIOCSIFMTU: ifp->if_mtu = ifr->ifr_mtu; @@ -857,7 +855,6 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag) struct tun_softc *tp = dev->si_drv1; struct ifnet *ifp = TUN2IFP(tp); struct mbuf *m; - int error = 0; uint32_t family; int isr; @@ -877,7 +874,7 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag) if ((m = m_uiotombuf(uio, M_DONTWAIT, 0, 0, M_PKTHDR)) == NULL) { ifp->if_ierrors++; - return (error); + return (ENOBUFS); } m->m_pkthdr.rcvif = ifp; diff --git a/sys/net/netisr.c b/sys/net/netisr.c index 67ec160..127cf67 100644 --- a/sys/net/netisr.c +++ b/sys/net/netisr.c @@ -1221,7 +1221,7 @@ netisr_start(void *arg) { struct pcpu *pc; - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { if (nws_count >= netisr_maxthreads) break; /* XXXRW: Is skipping absent CPUs still required here? */ diff --git a/sys/net/route.c b/sys/net/route.c index a41efa9..a6f910e 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -1189,6 +1189,7 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, rt0 = NULL; /* XXX * "flow-table" only support IPv4 at the moment. + * XXX-BZ as of r205066 it would support IPv6. */ #ifdef INET if (dst->sa_family == AF_INET) { diff --git a/sys/net80211/ieee80211_acl.c b/sys/net80211/ieee80211_acl.c index cb20b87..da505e3 100644 --- a/sys/net80211/ieee80211_acl.c +++ b/sys/net80211/ieee80211_acl.c @@ -77,7 +77,7 @@ struct acl { struct aclstate { acl_lock_t as_lock; int as_policy; - int as_nacls; + uint32_t as_nacls; TAILQ_HEAD(, acl) as_list; /* list of all ACL's */ LIST_HEAD(, acl) as_hash[ACL_HASHSIZE]; struct ieee80211vap *as_vap; @@ -289,7 +289,8 @@ acl_getioctl(struct ieee80211vap *vap, struct ieee80211req *ireq) struct aclstate *as = vap->iv_as; struct acl *acl; struct ieee80211req_maclist *ap; - int error, space, i; + int error; + uint32_t i, space; switch (ireq->i_val) { case IEEE80211_MACCMD_POLICY: diff --git a/sys/net80211/ieee80211_ht.c b/sys/net80211/ieee80211_ht.c index 8c4d7d3..61c84e9 100644 --- a/sys/net80211/ieee80211_ht.c +++ b/sys/net80211/ieee80211_ht.c @@ -217,6 +217,9 @@ static int ieee80211_addba_response(struct ieee80211_node *ni, int code, int baparamset, int batimeout); static void ieee80211_addba_stop(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap); +static void null_addba_response_timeout(struct ieee80211_node *ni, + struct ieee80211_tx_ampdu *tap); + static void ieee80211_bar_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, int status); static void ampdu_tx_stop(struct ieee80211_tx_ampdu *tap); @@ -234,6 +237,7 @@ ieee80211_ht_attach(struct ieee80211com *ic) ic->ic_ampdu_enable = ieee80211_ampdu_enable; ic->ic_addba_request = ieee80211_addba_request; ic->ic_addba_response = ieee80211_addba_response; + ic->ic_addba_response_timeout = null_addba_response_timeout; ic->ic_addba_stop = ieee80211_addba_stop; ic->ic_bar_response = ieee80211_bar_response; ic->ic_ampdu_rx_start = ampdu_rx_start; @@ -1691,14 +1695,23 @@ ampdu_tx_stop(struct ieee80211_tx_ampdu *tap) tap->txa_flags &= ~(IEEE80211_AGGR_SETUP | IEEE80211_AGGR_NAK); } +/* + * ADDBA response timeout. + * + * If software aggregation and per-TID queue management was done here, + * that queue would be unpaused after the ADDBA timeout occurs. + */ static void addba_timeout(void *arg) { struct ieee80211_tx_ampdu *tap = arg; + struct ieee80211_node *ni = tap->txa_ni; + struct ieee80211com *ic = ni->ni_ic; /* XXX ? */ tap->txa_flags &= ~IEEE80211_AGGR_XCHGPEND; tap->txa_attempts++; + ic->ic_addba_response_timeout(ni, tap); } static void @@ -1721,6 +1734,12 @@ addba_stop_timeout(struct ieee80211_tx_ampdu *tap) } } +static void +null_addba_response_timeout(struct ieee80211_node *ni, + struct ieee80211_tx_ampdu *tap) +{ +} + /* * Default method for requesting A-MPDU tx aggregation. * We setup the specified state block and start a timer @@ -2520,6 +2539,7 @@ ieee80211_add_htcap_body(uint8_t *frm, struct ieee80211_node *ni) frm[1] = (v) >> 8; \ frm += 2; \ } while (0) + struct ieee80211com *ic = ni->ni_ic; struct ieee80211vap *vap = ni->ni_vap; uint16_t caps, extcaps; int rxmax, density; @@ -2543,6 +2563,17 @@ ieee80211_add_htcap_body(uint8_t *frm, struct ieee80211_node *ni) /* use advertised setting (XXX locally constraint) */ rxmax = MS(ni->ni_htparam, IEEE80211_HTCAP_MAXRXAMPDU); density = MS(ni->ni_htparam, IEEE80211_HTCAP_MPDUDENSITY); + + /* + * NB: Hardware might support HT40 on some but not all + * channels. We can't determine this earlier because only + * after association the channel is upgraded to HT based + * on the negotiated capabilities. + */ + if (ni->ni_chan != IEEE80211_CHAN_ANYC && + findhtchan(ic, ni->ni_chan, IEEE80211_CHAN_HT40U) == NULL && + findhtchan(ic, ni->ni_chan, IEEE80211_CHAN_HT40D) == NULL) + caps &= ~IEEE80211_HTCAP_CHWIDTH40; } else { /* override 20/40 use based on current channel */ if (IEEE80211_IS_CHAN_HT40(ni->ni_chan)) diff --git a/sys/net80211/ieee80211_ioctl.c b/sys/net80211/ieee80211_ioctl.c index 37d5dbe..f8e1785 100644 --- a/sys/net80211/ieee80211_ioctl.c +++ b/sys/net80211/ieee80211_ioctl.c @@ -143,7 +143,7 @@ static __noinline int ieee80211_ioctl_getchaninfo(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; - int space; + uint32_t space; space = __offsetof(struct ieee80211req_chaninfo, ic_chans[ic->ic_nchans]); @@ -207,7 +207,7 @@ ieee80211_ioctl_getstastats(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211_node *ni; uint8_t macaddr[IEEE80211_ADDR_LEN]; - const int off = __offsetof(struct ieee80211req_sta_stats, is_stats); + const size_t off = __offsetof(struct ieee80211req_sta_stats, is_stats); int error; if (ireq->i_len < off) @@ -323,7 +323,7 @@ ieee80211_ioctl_getscanresults(struct ieee80211vap *vap, if (req.space > ireq->i_len) req.space = ireq->i_len; if (req.space > 0) { - size_t space; + uint32_t space; void *p; space = req.space; @@ -458,7 +458,7 @@ get_sta_info(void *arg, struct ieee80211_node *ni) static __noinline int getstainfo_common(struct ieee80211vap *vap, struct ieee80211req *ireq, - struct ieee80211_node *ni, int off) + struct ieee80211_node *ni, size_t off) { struct ieee80211com *ic = vap->iv_ic; struct stainforeq req; @@ -503,7 +503,7 @@ static __noinline int ieee80211_ioctl_getstainfo(struct ieee80211vap *vap, struct ieee80211req *ireq) { uint8_t macaddr[IEEE80211_ADDR_LEN]; - const int off = __offsetof(struct ieee80211req_sta_req, info); + const size_t off = __offsetof(struct ieee80211req_sta_req, info); struct ieee80211_node *ni; int error; diff --git a/sys/net80211/ieee80211_ioctl.h b/sys/net80211/ieee80211_ioctl.h index 7215a5e..cad5576 100644 --- a/sys/net80211/ieee80211_ioctl.h +++ b/sys/net80211/ieee80211_ioctl.h @@ -578,7 +578,7 @@ struct ieee80211req { char i_name[IFNAMSIZ]; /* if_name, e.g. "wi0" */ uint16_t i_type; /* req type */ int16_t i_val; /* Index or simple value */ - int16_t i_len; /* Index or simple value */ + uint16_t i_len; /* Index or simple value */ void *i_data; /* Extra data */ }; #define SIOCS80211 _IOW('i', 234, struct ieee80211req) diff --git a/sys/net80211/ieee80211_output.c b/sys/net80211/ieee80211_output.c index b689310..6020144 100644 --- a/sys/net80211/ieee80211_output.c +++ b/sys/net80211/ieee80211_output.c @@ -516,6 +516,7 @@ ieee80211_send_setup( { #define WH4(wh) ((struct ieee80211_frame_addr4 *)wh) struct ieee80211vap *vap = ni->ni_vap; + struct ieee80211_tx_ampdu *tap; struct ieee80211_frame *wh = mtod(m, struct ieee80211_frame *); ieee80211_seq seqno; @@ -583,9 +584,15 @@ ieee80211_send_setup( } *(uint16_t *)&wh->i_dur[0] = 0; - seqno = ni->ni_txseqs[tid]++; - *(uint16_t *)&wh->i_seq[0] = htole16(seqno << IEEE80211_SEQ_SEQ_SHIFT); - M_SEQNO_SET(m, seqno); + tap = &ni->ni_tx_ampdu[TID_TO_WME_AC(tid)]; + if (tid != IEEE80211_NONQOS_TID && IEEE80211_AMPDU_RUNNING(tap)) + m->m_flags |= M_AMPDU_MPDU; + else { + seqno = ni->ni_txseqs[tid]++; + *(uint16_t *)&wh->i_seq[0] = + htole16(seqno << IEEE80211_SEQ_SEQ_SHIFT); + M_SEQNO_SET(m, seqno); + } if (IEEE80211_IS_MULTICAST(wh->i_addr1)) m->m_flags |= M_MCAST; diff --git a/sys/net80211/ieee80211_var.h b/sys/net80211/ieee80211_var.h index 39cf347..a3dcd9f 100644 --- a/sys/net80211/ieee80211_var.h +++ b/sys/net80211/ieee80211_var.h @@ -307,6 +307,8 @@ struct ieee80211com { int status, int baparamset, int batimeout); void (*ic_addba_stop)(struct ieee80211_node *, struct ieee80211_tx_ampdu *); + void (*ic_addba_response_timeout)(struct ieee80211_node *, + struct ieee80211_tx_ampdu *); /* BAR response received */ void (*ic_bar_response)(struct ieee80211_node *, struct ieee80211_tx_ampdu *, int status); diff --git a/sys/netgraph/ng_nat.c b/sys/netgraph/ng_nat.c index 84da500..59818d9 100644 --- a/sys/netgraph/ng_nat.c +++ b/sys/netgraph/ng_nat.c @@ -43,6 +43,7 @@ #include <machine/in_cksum.h> #include <netinet/libalias/alias.h> +#include <netinet/libalias/alias_local.h> #include <netgraph/ng_message.h> #include <netgraph/ng_parse.h> @@ -696,22 +697,35 @@ ng_nat_rcvdata(hook_p hook, item_p item ) KASSERT(m->m_pkthdr.len == ntohs(ip->ip_len), ("ng_nat: ip_len != m_pkthdr.len")); + /* + * We drop packet when: + * 1. libalias returns PKT_ALIAS_ERROR; + * 2. For incoming packets: + * a) for unresolved fragments; + * b) libalias returns PKT_ALIAS_IGNORED and + * PKT_ALIAS_DENY_INCOMING flag is set. + */ if (hook == priv->in) { rval = LibAliasIn(priv->lib, c, m->m_len + M_TRAILINGSPACE(m)); - if (rval != PKT_ALIAS_OK && - rval != PKT_ALIAS_FOUND_HEADER_FRAGMENT) { + if (rval == PKT_ALIAS_ERROR || + rval == PKT_ALIAS_UNRESOLVED_FRAGMENT || + (rval == PKT_ALIAS_IGNORED && + (priv->lib->packetAliasMode & + PKT_ALIAS_DENY_INCOMING) != 0)) { NG_FREE_ITEM(item); return (EINVAL); } } else if (hook == priv->out) { rval = LibAliasOut(priv->lib, c, m->m_len + M_TRAILINGSPACE(m)); - if (rval != PKT_ALIAS_OK) { + if (rval == PKT_ALIAS_ERROR) { NG_FREE_ITEM(item); return (EINVAL); } } else panic("ng_nat: unknown hook!\n"); + if (rval == PKT_ALIAS_RESPOND) + m->m_flags |= M_SKIP_FIREWALL; m->m_pkthdr.len = m->m_len = ntohs(ip->ip_len); if ((ip->ip_off & htons(IP_OFFMASK)) == 0 && diff --git a/sys/netgraph/ng_pipe.c b/sys/netgraph/ng_pipe.c index b5bab3c..11ea814 100644 --- a/sys/netgraph/ng_pipe.c +++ b/sys/netgraph/ng_pipe.c @@ -298,11 +298,12 @@ ngp_rcvmsg(node_p node, item_p item, hook_p lasthook) { const priv_p priv = NG_NODE_PRIVATE(node); struct ng_mesg *resp = NULL; - struct ng_mesg *msg; + struct ng_mesg *msg, *flow_msg; struct ng_pipe_stats *stats; struct ng_pipe_run *run; struct ng_pipe_cfg *cfg; int error = 0; + int prev_down, now_down, cmd; NGI_GET_MSG(item, msg); switch (msg->header.typecookie) { @@ -403,10 +404,38 @@ ngp_rcvmsg(node_p node, item_p item, hook_p lasthook) cfg->header_offset < 64) priv->header_offset = cfg->header_offset; + prev_down = priv->upper.cfg.ber == 1 || + priv->lower.cfg.ber == 1; parse_cfg(&priv->upper.cfg, &cfg->downstream, &priv->upper, priv); parse_cfg(&priv->lower.cfg, &cfg->upstream, &priv->lower, priv); + now_down = priv->upper.cfg.ber == 1 || + priv->lower.cfg.ber == 1; + + if (prev_down != now_down) { + if (now_down) + cmd = NGM_LINK_IS_DOWN; + else + cmd = NGM_LINK_IS_UP; + + if (priv->lower.hook != NULL) { + NG_MKMESSAGE(flow_msg, NGM_FLOW_COOKIE, + cmd, 0, M_NOWAIT); + if (flow_msg != NULL) + NG_SEND_MSG_HOOK(error, node, + flow_msg, priv->lower.hook, + 0); + } + if (priv->upper.hook != NULL) { + NG_MKMESSAGE(flow_msg, NGM_FLOW_COOKIE, + cmd, 0, M_NOWAIT); + if (flow_msg != NULL) + NG_SEND_MSG_HOOK(error, node, + flow_msg, priv->upper.hook, + 0); + } + } break; default: error = EINVAL; diff --git a/sys/netinet/icmp6.h b/sys/netinet/icmp6.h index 5faae7c..c9da86a 100644 --- a/sys/netinet/icmp6.h +++ b/sys/netinet/icmp6.h @@ -297,8 +297,9 @@ struct nd_opt_hdr { /* Neighbor discovery option header */ #define ND_OPT_PREFIX_INFORMATION 3 #define ND_OPT_REDIRECTED_HEADER 4 #define ND_OPT_MTU 5 - -#define ND_OPT_ROUTE_INFO 200 /* draft-ietf-ipngwg-router-preference, not officially assigned yet */ +#define ND_OPT_ROUTE_INFO 24 /* RFC 4191 */ +#define ND_OPT_RDNSS 25 /* RFC 6016 */ +#define ND_OPT_DNSSL 31 /* RFC 6016 */ struct nd_opt_prefix_info { /* prefix information */ u_int8_t nd_opt_pi_type; @@ -338,6 +339,22 @@ struct nd_opt_route_info { /* route info */ /* prefix follows */ } __packed; +struct nd_opt_rdnss { /* RDNSS option (RFC 6106) */ + u_int8_t nd_opt_rdnss_type; + u_int8_t nd_opt_rdnss_len; + u_int16_t nd_opt_rdnss_reserved; + u_int32_t nd_opt_rdnss_lifetime; + /* followed by list of recursive DNS servers */ +} __packed; + +struct nd_opt_dnssl { /* DNSSL option (RFC 6106) */ + u_int8_t nd_opt_dnssl_type; + u_int8_t nd_opt_dnssl_len; + u_int16_t nd_opt_dnssl_reserved; + u_int32_t nd_opt_dnssl_lifetime; + /* followed by list of DNS search domains */ +} __packed; + /* * icmp6 namelookup */ diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index 3afdc7d..6a66c05 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -759,7 +759,7 @@ match: } } else LLE_WUNLOCK(la); - } /* end of FIB loop */ + } reply: if (op != ARPOP_REQUEST) goto drop; diff --git a/sys/netinet/in.c b/sys/netinet/in.c index 684d808..7ae8477 100644 --- a/sys/netinet/in.c +++ b/sys/netinet/in.c @@ -548,7 +548,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, * is the same as before, then the call is * un-necessarily executed here. */ - in_ifscrub(ifp, ia, 0); + in_ifscrub(ifp, ia, LLE_STATIC); ia->ia_sockmask = ifra->ifra_mask; ia->ia_sockmask.sin_family = AF_INET; ia->ia_subnetmask = @@ -557,7 +557,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, } if ((ifp->if_flags & IFF_POINTOPOINT) && (ifra->ifra_dstaddr.sin_family == AF_INET)) { - in_ifscrub(ifp, ia, 0); + in_ifscrub(ifp, ia, LLE_STATIC); ia->ia_dstaddr = ifra->ifra_dstaddr; maskIsNew = 1; /* We lie; but the effect's the same */ } @@ -1179,14 +1179,20 @@ in_scrubprefix(struct in_ifaddr *target, u_int flags) && (ia->ia_ifp->if_type != IFT_CARP)) { ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(); - rtinit(&(target->ia_ifa), (int)RTM_DELETE, + error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target)); - target->ia_flags &= ~IFA_ROUTE; - + if (error == 0) + target->ia_flags &= ~IFA_ROUTE; + else + log(LOG_INFO, "in_scrubprefix: err=%d, old prefix delete failed\n", + error); error = rtinit(&ia->ia_ifa, (int)RTM_ADD, rtinitflags(ia) | RTF_UP); if (error == 0) ia->ia_flags |= IFA_ROUTE; + else + log(LOG_INFO, "in_scrubprefix: err=%d, new prefix add failed\n", + error); ifa_free(&ia->ia_ifa); return (error); } @@ -1210,9 +1216,12 @@ in_scrubprefix(struct in_ifaddr *target, u_int flags) /* * As no-one seem to have this prefix, we can remove the route. */ - rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target)); - target->ia_flags &= ~IFA_ROUTE; - return (0); + error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target)); + if (error == 0) + target->ia_flags &= ~IFA_ROUTE; + else + log(LOG_INFO, "in_scrubprefix: err=%d, prefix delete failed\n", error); + return (error); } #undef rtinitflags diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index 85e31dc..4eb309a 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include "opt_ipsec.h" #include "opt_inet.h" #include "opt_inet6.h" +#include "opt_pcbgroup.h" #include <sys/param.h> #include <sys/systm.h> @@ -128,8 +129,12 @@ static VNET_DEFINE(int, ipport_tcplastcount); #define V_ipport_tcplastcount VNET(ipport_tcplastcount) static void in_pcbremlists(struct inpcb *inp); - #ifdef INET +static struct inpcb *in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, + struct in_addr faddr, u_int fport_arg, + struct in_addr laddr, u_int lport_arg, + int lookupflags, struct ifnet *ifp); + #define RANGECHK(var, min, max) \ if ((var) < (min)) { (var) = (min); } \ else if ((var) > (max)) { (var) = (max); } @@ -208,19 +213,24 @@ void in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name, struct inpcbhead *listhead, int hash_nelements, int porthash_nelements, char *inpcbzone_name, uma_init inpcbzone_init, uma_fini inpcbzone_fini, - uint32_t inpcbzone_flags) + uint32_t inpcbzone_flags, u_int hashfields) { INP_INFO_LOCK_INIT(pcbinfo, name); + INP_HASH_LOCK_INIT(pcbinfo, "pcbinfohash"); /* XXXRW: argument? */ #ifdef VIMAGE pcbinfo->ipi_vnet = curvnet; #endif pcbinfo->ipi_listhead = listhead; LIST_INIT(pcbinfo->ipi_listhead); + pcbinfo->ipi_count = 0; pcbinfo->ipi_hashbase = hashinit(hash_nelements, M_PCB, &pcbinfo->ipi_hashmask); pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB, &pcbinfo->ipi_porthashmask); +#ifdef PCBGROUP + in_pcbgroup_init(pcbinfo, hashfields, hash_nelements); +#endif pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb), NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR, inpcbzone_flags); @@ -234,10 +244,17 @@ void in_pcbinfo_destroy(struct inpcbinfo *pcbinfo) { + KASSERT(pcbinfo->ipi_count == 0, + ("%s: ipi_count = %u", __func__, pcbinfo->ipi_count)); + hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask); hashdestroy(pcbinfo->ipi_porthashbase, M_PCB, pcbinfo->ipi_porthashmask); +#ifdef PCBGROUP + in_pcbgroup_destroy(pcbinfo); +#endif uma_zdestroy(pcbinfo->ipi_zone); + INP_HASH_LOCK_DESTROY(pcbinfo); INP_INFO_LOCK_DESTROY(pcbinfo); } @@ -309,8 +326,8 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) { int anonport, error; - INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) return (EINVAL); @@ -351,8 +368,8 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp, * Because no actual state changes occur here, a global write lock on * the pcbinfo isn't required. */ - INP_INFO_LOCK_ASSERT(pcbinfo); INP_LOCK_ASSERT(inp); + INP_HASH_LOCK_ASSERT(pcbinfo); if (inp->inp_flags & INP_HIGHPORT) { first = V_ipport_hifirstauto; /* sysctl */ @@ -473,11 +490,10 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp, int error; /* - * Because no actual state changes occur here, a global write lock on - * the pcbinfo isn't required. + * No state changes, so read locks are sufficient here. */ - INP_INFO_LOCK_ASSERT(pcbinfo); INP_LOCK_ASSERT(inp); + INP_HASH_LOCK_ASSERT(pcbinfo); if (TAILQ_EMPTY(&V_in_ifaddrhead)) /* XXX broken! */ return (EADDRNOTAVAIL); @@ -612,14 +628,15 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp, * then pick one. */ int -in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) +in_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam, + struct ucred *cred, struct mbuf *m) { u_short lport, fport; in_addr_t laddr, faddr; int anonport, error; - INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); lport = inp->inp_lport; laddr = inp->inp_laddr.s_addr; @@ -645,13 +662,20 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) inp->inp_laddr.s_addr = laddr; inp->inp_faddr.s_addr = faddr; inp->inp_fport = fport; - in_pcbrehash(inp); + in_pcbrehash_mbuf(inp, m); if (anonport) inp->inp_flags |= INP_ANONPORT; return (0); } +int +in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) +{ + + return (in_pcbconnect_mbuf(inp, nam, cred, NULL)); +} + /* * Do proper source address selection on an unbound socket in case * of connect. Take jails into account as well. @@ -907,8 +931,8 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam, * Because a global state change doesn't actually occur here, a read * lock is sufficient. */ - INP_INFO_LOCK_ASSERT(inp->inp_pcbinfo); INP_LOCK_ASSERT(inp); + INP_HASH_LOCK_ASSERT(inp->inp_pcbinfo); if (oinpp != NULL) *oinpp = NULL; @@ -983,8 +1007,8 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam, if (error) return (error); } - oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport, - 0, NULL); + oinp = in_pcblookup_hash_locked(inp->inp_pcbinfo, faddr, fport, + laddr, lport, 0, NULL); if (oinp != NULL) { if (oinpp != NULL) *oinpp = oinp; @@ -1007,8 +1031,8 @@ void in_pcbdisconnect(struct inpcb *inp) { - INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); inp->inp_faddr.s_addr = INADDR_ANY; inp->inp_fport = 0; @@ -1036,7 +1060,8 @@ in_pcbdetach(struct inpcb *inp) * in_pcbref() bumps the reference count on an inpcb in order to maintain * stability of an inpcb pointer despite the inpcb lock being released. This * is used in TCP when the inpcbinfo lock needs to be acquired or upgraded, - * but where the inpcb lock is already held. + * but where the inpcb lock may already held, or when acquiring a reference + * via a pcbgroup. * * in_pcbref() should be used only to provide brief memory stability, and * must always be followed by a call to INP_WLOCK() and in_pcbrele() to @@ -1187,20 +1212,28 @@ void in_pcbdrop(struct inpcb *inp) { - INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); INP_WLOCK_ASSERT(inp); + /* + * XXXRW: Possibly we should protect the setting of INP_DROPPED with + * the hash lock...? + */ inp->inp_flags |= INP_DROPPED; if (inp->inp_flags & INP_INHASHLIST) { struct inpcbport *phd = inp->inp_phd; + INP_HASH_WLOCK(inp->inp_pcbinfo); LIST_REMOVE(inp, inp_hash); LIST_REMOVE(inp, inp_portlist); if (LIST_FIRST(&phd->phd_pcblist) == NULL) { LIST_REMOVE(phd, phd_hash); free(phd, M_PCB); } + INP_HASH_WUNLOCK(inp->inp_pcbinfo); inp->inp_flags &= ~INP_INHASHLIST; +#ifdef PCBGROUP + in_pcbgroup_remove(inp); +#endif } } @@ -1328,7 +1361,8 @@ in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) } /* - * Lookup a PCB based on the local address and port. + * Lookup a PCB based on the local address and port. Caller must hold the + * hash lock. No inpcb locks or references are acquired. */ #define INP_LOOKUP_MAPPED_PCB_COST 3 struct inpcb * @@ -1346,7 +1380,7 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); - INP_INFO_LOCK_ASSERT(pcbinfo); + INP_HASH_LOCK_ASSERT(pcbinfo); if ((lookupflags & INPLOOKUP_WILDCARD) == 0) { struct inpcbhead *head; @@ -1449,11 +1483,155 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, } #undef INP_LOOKUP_MAPPED_PCB_COST +#ifdef PCBGROUP /* - * Lookup PCB in hash list. + * Lookup PCB in hash list, using pcbgroup tables. */ -struct inpcb * -in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, +static struct inpcb * +in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, + struct in_addr faddr, u_int fport_arg, struct in_addr laddr, + u_int lport_arg, int lookupflags, struct ifnet *ifp) +{ + struct inpcbhead *head; + struct inpcb *inp, *tmpinp; + u_short fport = fport_arg, lport = lport_arg; + + /* + * First look for an exact match. + */ + tmpinp = NULL; + INP_GROUP_LOCK(pcbgroup); + head = &pcbgroup->ipg_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, + pcbgroup->ipg_hashmask)]; + LIST_FOREACH(inp, head, inp_pcbgrouphash) { +#ifdef INET6 + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV4) == 0) + continue; +#endif + if (inp->inp_faddr.s_addr == faddr.s_addr && + inp->inp_laddr.s_addr == laddr.s_addr && + inp->inp_fport == fport && + inp->inp_lport == lport) { + /* + * XXX We should be able to directly return + * the inp here, without any checks. + * Well unless both bound with SO_REUSEPORT? + */ + if (prison_flag(inp->inp_cred, PR_IP4)) + goto found; + if (tmpinp == NULL) + tmpinp = inp; + } + } + if (tmpinp != NULL) { + inp = tmpinp; + goto found; + } + + /* + * Then look for a wildcard match, if requested. + */ + if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { + struct inpcb *local_wild = NULL, *local_exact = NULL; +#ifdef INET6 + struct inpcb *local_wild_mapped = NULL; +#endif + struct inpcb *jail_wild = NULL; + struct inpcbhead *head; + int injail; + + /* + * Order of socket selection - we always prefer jails. + * 1. jailed, non-wild. + * 2. jailed, wild. + * 3. non-jailed, non-wild. + * 4. non-jailed, wild. + */ + head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport, + 0, pcbinfo->ipi_wildmask)]; + LIST_FOREACH(inp, head, inp_pcbgroup_wild) { +#ifdef INET6 + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV4) == 0) + continue; +#endif + if (inp->inp_faddr.s_addr != INADDR_ANY || + inp->inp_lport != lport) + continue; + + /* XXX inp locking */ + if (ifp && ifp->if_type == IFT_FAITH && + (inp->inp_flags & INP_FAITH) == 0) + continue; + + injail = prison_flag(inp->inp_cred, PR_IP4); + if (injail) { + if (prison_check_ip4(inp->inp_cred, + &laddr) != 0) + continue; + } else { + if (local_exact != NULL) + continue; + } + + if (inp->inp_laddr.s_addr == laddr.s_addr) { + if (injail) + goto found; + else + local_exact = inp; + } else if (inp->inp_laddr.s_addr == INADDR_ANY) { +#ifdef INET6 + /* XXX inp locking, NULL check */ + if (inp->inp_vflag & INP_IPV6PROTO) + local_wild_mapped = inp; + else +#endif /* INET6 */ + if (injail) + jail_wild = inp; + else + local_wild = inp; + } + } /* LIST_FOREACH */ + inp = jail_wild; + if (inp == NULL) + inp = local_exact; + if (inp == NULL) + inp = local_wild; +#ifdef INET6 + if (inp == NULL) + inp = local_wild_mapped; +#endif /* defined(INET6) */ + if (inp != NULL) + goto found; + } /* if (lookupflags & INPLOOKUP_WILDCARD) */ + INP_GROUP_UNLOCK(pcbgroup); + return (NULL); + +found: + in_pcbref(inp); + INP_GROUP_UNLOCK(pcbgroup); + if (lookupflags & INPLOOKUP_WLOCKPCB) { + INP_WLOCK(inp); + if (in_pcbrele_wlocked(inp)) + return (NULL); + } else if (lookupflags & INPLOOKUP_RLOCKPCB) { + INP_RLOCK(inp); + if (in_pcbrele_rlocked(inp)) + return (NULL); + } else + panic("%s: locking bug", __func__); + return (inp); +} +#endif /* PCBGROUP */ + +/* + * Lookup PCB in hash list, using pcbinfo tables. This variation assumes + * that the caller has locked the hash list, and will not perform any further + * locking or reference operations on either the hash list or the connection. + */ +static struct inpcb * +in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport_arg, struct in_addr laddr, u_int lport_arg, int lookupflags, struct ifnet *ifp) { @@ -1464,7 +1642,7 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); - INP_INFO_LOCK_ASSERT(pcbinfo); + INP_HASH_LOCK_ASSERT(pcbinfo); /* * First look for an exact match. @@ -1574,13 +1752,108 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, return (NULL); } + +/* + * Lookup PCB in hash list, using pcbinfo tables. This variation locks the + * hash list lock, and will return the inpcb locked (i.e., requires + * INPLOOKUP_LOCKPCB). + */ +static struct inpcb * +in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, + u_int fport, struct in_addr laddr, u_int lport, int lookupflags, + struct ifnet *ifp) +{ + struct inpcb *inp; + + INP_HASH_RLOCK(pcbinfo); + inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport, + (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp); + if (inp != NULL) { + in_pcbref(inp); + INP_HASH_RUNLOCK(pcbinfo); + if (lookupflags & INPLOOKUP_WLOCKPCB) { + INP_WLOCK(inp); + if (in_pcbrele_wlocked(inp)) + return (NULL); + } else if (lookupflags & INPLOOKUP_RLOCKPCB) { + INP_RLOCK(inp); + if (in_pcbrele_rlocked(inp)) + return (NULL); + } else + panic("%s: locking bug", __func__); + } else + INP_HASH_RUNLOCK(pcbinfo); + return (inp); +} + +/* + * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf + * from which a pre-calculated hash value may be extracted. + * + * Possibly more of this logic should be in in_pcbgroup.c. + */ +struct inpcb * +in_pcblookup(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport, + struct in_addr laddr, u_int lport, int lookupflags, struct ifnet *ifp) +{ +#if defined(PCBGROUP) + struct inpcbgroup *pcbgroup; +#endif + + KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, + ("%s: invalid lookup flags %d", __func__, lookupflags)); + KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, + ("%s: LOCKPCB not set", __func__)); + +#if defined(PCBGROUP) + if (in_pcbgroup_enabled(pcbinfo)) { + pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, + fport); + return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, + laddr, lport, lookupflags, ifp)); + } +#endif + return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, + lookupflags, ifp)); +} + +struct inpcb * +in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr, + u_int fport, struct in_addr laddr, u_int lport, int lookupflags, + struct ifnet *ifp, struct mbuf *m) +{ +#ifdef PCBGROUP + struct inpcbgroup *pcbgroup; +#endif + + KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, + ("%s: invalid lookup flags %d", __func__, lookupflags)); + KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, + ("%s: LOCKPCB not set", __func__)); + +#ifdef PCBGROUP + if (in_pcbgroup_enabled(pcbinfo)) { + pcbgroup = in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m), + m->m_pkthdr.flowid); + if (pcbgroup != NULL) + return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, + fport, laddr, lport, lookupflags, ifp)); + pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, + fport); + return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, + laddr, lport, lookupflags, ifp)); + } +#endif + return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, + lookupflags, ifp)); +} #endif /* INET */ /* * Insert PCB onto various hash lists. */ -int -in_pcbinshash(struct inpcb *inp) +static int +in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update) { struct inpcbhead *pcbhash; struct inpcbporthead *pcbporthash; @@ -1588,8 +1861,9 @@ in_pcbinshash(struct inpcb *inp) struct inpcbport *phd; u_int32_t hashkey_faddr; - INP_INFO_WLOCK_ASSERT(pcbinfo); INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(pcbinfo); + KASSERT((inp->inp_flags & INP_INHASHLIST) == 0, ("in_pcbinshash: INP_INHASHLIST")); @@ -1629,24 +1903,54 @@ in_pcbinshash(struct inpcb *inp) LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); LIST_INSERT_HEAD(pcbhash, inp, inp_hash); inp->inp_flags |= INP_INHASHLIST; +#ifdef PCBGROUP + if (do_pcbgroup_update) + in_pcbgroup_update(inp); +#endif return (0); } /* + * For now, there are two public interfaces to insert an inpcb into the hash + * lists -- one that does update pcbgroups, and one that doesn't. The latter + * is used only in the TCP syncache, where in_pcbinshash is called before the + * full 4-tuple is set for the inpcb, and we don't want to install in the + * pcbgroup until later. + * + * XXXRW: This seems like a misfeature. in_pcbinshash should always update + * connection groups, and partially initialised inpcbs should not be exposed + * to either reservation hash tables or pcbgroups. + */ +int +in_pcbinshash(struct inpcb *inp) +{ + + return (in_pcbinshash_internal(inp, 1)); +} + +int +in_pcbinshash_nopcbgroup(struct inpcb *inp) +{ + + return (in_pcbinshash_internal(inp, 0)); +} + +/* * Move PCB to the proper hash bucket when { faddr, fport } have been * changed. NOTE: This does not handle the case of the lport changing (the * hashed port list would have to be updated as well), so the lport must * not change after in_pcbinshash() has been called. */ void -in_pcbrehash(struct inpcb *inp) +in_pcbrehash_mbuf(struct inpcb *inp, struct mbuf *m) { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct inpcbhead *head; u_int32_t hashkey_faddr; - INP_INFO_WLOCK_ASSERT(pcbinfo); INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(pcbinfo); + KASSERT(inp->inp_flags & INP_INHASHLIST, ("in_pcbrehash: !INP_INHASHLIST")); @@ -1662,6 +1966,20 @@ in_pcbrehash(struct inpcb *inp) LIST_REMOVE(inp, inp_hash); LIST_INSERT_HEAD(head, inp, inp_hash); + +#ifdef PCBGROUP + if (m != NULL) + in_pcbgroup_update_mbuf(inp, m); + else + in_pcbgroup_update(inp); +#endif +} + +void +in_pcbrehash(struct inpcb *inp) +{ + + in_pcbrehash_mbuf(inp, NULL); } /* @@ -1679,16 +1997,21 @@ in_pcbremlists(struct inpcb *inp) if (inp->inp_flags & INP_INHASHLIST) { struct inpcbport *phd = inp->inp_phd; + INP_HASH_WLOCK(pcbinfo); LIST_REMOVE(inp, inp_hash); LIST_REMOVE(inp, inp_portlist); if (LIST_FIRST(&phd->phd_pcblist) == NULL) { LIST_REMOVE(phd, phd_hash); free(phd, M_PCB); } + INP_HASH_WUNLOCK(pcbinfo); inp->inp_flags &= ~INP_INHASHLIST; } LIST_REMOVE(inp, inp_list); pcbinfo->ipi_count--; +#ifdef PCBGROUP + in_pcbgroup_remove(inp); +#endif } /* diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 14d4ea2..dfef963 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -44,6 +44,7 @@ #include <sys/_rwlock.h> #ifdef _KERNEL +#include <sys/lock.h> #include <sys/rwlock.h> #include <net/vnet.h> #include <vm/uma.h> @@ -141,6 +142,7 @@ struct icmp6_filter; * * Key: * (c) - Constant after initialization + * (g) - Protected by the pcbgroup lock * (i) - Protected by the inpcb lock * (p) - Protected by the pcbinfo lock for the inpcb * (s) - Protected by another subsystem's locks @@ -160,9 +162,12 @@ struct icmp6_filter; */ struct inpcb { LIST_ENTRY(inpcb) inp_hash; /* (i/p) hash list */ + LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */ LIST_ENTRY(inpcb) inp_list; /* (i/p) list for all PCBs for proto */ void *inp_ppcb; /* (i) pointer to per-protocol pcb */ struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */ + struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */ + LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/p) group wildcard entry */ struct socket *inp_socket; /* (i) back pointer to socket */ struct ucred *inp_cred; /* (c) cache of socket cred */ u_int32_t inp_flow; /* (i) IPv6 flow information */ @@ -268,22 +273,23 @@ struct inpcbport { * Global data structure for each high-level protocol (UDP, TCP, ...) in both * IPv4 and IPv6. Holds inpcb lists and information for managing them. * - * Each pcbinfo is protected by ipi_lock, covering mutable global fields (such - * as the global pcb list) and hashed lookup tables. The lock order is: + * Each pcbinfo is protected by two locks: ipi_lock and ipi_hash_lock, + * the former covering mutable global fields (such as the global pcb list), + * and the latter covering the hashed lookup tables. The lock order is: * - * ipi_lock (before) inpcb locks + * ipi_lock (before) inpcb locks (before) {ipi_hash_lock, pcbgroup locks} * * Locking key: * * (c) Constant or nearly constant after initialisation * (g) Locked by ipi_lock - * (h) Read using either ipi_lock or inpcb lock; write requires both. + * (h) Read using either ipi_hash_lock or inpcb lock; write requires both + * (p) Protected by one or more pcbgroup locks * (x) Synchronisation properties poorly defined */ struct inpcbinfo { /* - * Global lock protecting global inpcb list, inpcb count, hash tables, - * etc. + * Global lock protecting global inpcb list, inpcb count, etc. */ struct rwlock ipi_lock; @@ -312,17 +318,39 @@ struct inpcbinfo { struct uma_zone *ipi_zone; /* (c) */ /* + * Connection groups associated with this protocol. These fields are + * constant, but pcbgroup structures themselves are protected by + * per-pcbgroup locks. + */ + struct inpcbgroup *ipi_pcbgroups; /* (c) */ + u_int ipi_npcbgroups; /* (c) */ + u_int ipi_hashfields; /* (c) */ + + /* + * Global lock protecting non-pcbgroup hash lookup tables. + */ + struct rwlock ipi_hash_lock; + + /* * Global hash of inpcbs, hashed by local and foreign addresses and * port numbers. */ - struct inpcbhead *ipi_hashbase; /* (g) */ - u_long ipi_hashmask; /* (g) */ + struct inpcbhead *ipi_hashbase; /* (h) */ + u_long ipi_hashmask; /* (h) */ /* * Global hash of inpcbs, hashed by only local port number. */ - struct inpcbporthead *ipi_porthashbase; /* (g) */ - u_long ipi_porthashmask; /* (g) */ + struct inpcbporthead *ipi_porthashbase; /* (h) */ + u_long ipi_porthashmask; /* (h) */ + + /* + * List of wildcard inpcbs for use with pcbgroups. In the past, was + * per-pcbgroup but is now global. All pcbgroup locks must be held + * to modify the list, so any is sufficient to read it. + */ + struct inpcbhead *ipi_wildbase; /* (p) */ + u_long ipi_wildmask; /* (p) */ /* * Pointer to network stack instance @@ -335,6 +363,31 @@ struct inpcbinfo { void *ipi_pspare[2]; }; +/* + * Connection groups hold sets of connections that have similar CPU/thread + * affinity. Each connection belongs to exactly one connection group. + */ +struct inpcbgroup { + /* + * Per-connection group hash of inpcbs, hashed by local and foreign + * addresses and port numbers. + */ + struct inpcbhead *ipg_hashbase; /* (c) */ + u_long ipg_hashmask; /* (c) */ + + /* + * Notional affinity of this pcbgroup. + */ + u_int ipg_cpu; /* (p) */ + + /* + * Per-connection group lock, not to be confused with ipi_lock. + * Protects the hash table hung off the group, but also the global + * wildcard list in inpcbinfo. + */ + struct mtx ipg_lock; +} __aligned(CACHE_LINE_SIZE); + #define INP_LOCK_INIT(inp, d, t) \ rw_init_flags(&(inp)->inp_lock, (t), RW_RECURSE | RW_DUPOK) #define INP_LOCK_DESTROY(inp) rw_destroy(&(inp)->inp_lock) @@ -406,6 +459,26 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, #define INP_INFO_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_WLOCKED) #define INP_INFO_UNLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED) +#define INP_HASH_LOCK_INIT(ipi, d) \ + rw_init_flags(&(ipi)->ipi_hash_lock, (d), 0) +#define INP_HASH_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_hash_lock) +#define INP_HASH_RLOCK(ipi) rw_rlock(&(ipi)->ipi_hash_lock) +#define INP_HASH_WLOCK(ipi) rw_wlock(&(ipi)->ipi_hash_lock) +#define INP_HASH_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_hash_lock) +#define INP_HASH_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_hash_lock) +#define INP_HASH_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \ + RA_LOCKED) +#define INP_HASH_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \ + RA_WLOCKED) + +#define INP_GROUP_LOCK_INIT(ipg, d) mtx_init(&(ipg)->ipg_lock, (d), NULL, \ + MTX_DEF | MTX_DUPOK) +#define INP_GROUP_LOCK_DESTROY(ipg) mtx_destroy(&(ipg)->ipg_lock) + +#define INP_GROUP_LOCK(ipg) mtx_lock(&(ipg)->ipg_lock) +#define INP_GROUP_LOCK_ASSERT(ipg) mtx_assert(&(ipg)->ipg_lock, MA_OWNED) +#define INP_GROUP_UNLOCK(ipg) mtx_unlock(&(ipg)->ipg_lock) + #define INP_PCBHASH(faddr, lport, fport, mask) \ (((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask)) #define INP_PCBPORTHASH(lport, mask) \ @@ -465,8 +538,18 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, */ #define INP_LLE_VALID 0x00000001 /* cached lle is valid */ #define INP_RT_VALID 0x00000002 /* cached rtentry is valid */ +#define INP_PCBGROUPWILD 0x00000004 /* in pcbgroup wildcard list */ + +/* + * Flags passed to in_pcblookup*() functions. + */ +#define INPLOOKUP_WILDCARD 0x00000001 /* Allow wildcard sockets. */ +#define INPLOOKUP_RLOCKPCB 0x00000002 /* Return inpcb read-locked. */ +#define INPLOOKUP_WLOCKPCB 0x00000004 /* Return inpcb write-locked. */ + +#define INPLOOKUP_MASK (INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB | \ + INPLOOKUP_WLOCKPCB) -#define INPLOOKUP_WILDCARD 1 #define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb) #define sotoin6pcb(so) sotoinpcb(so) /* for KAME src sync over BSD*'s */ @@ -474,6 +557,13 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, #define INP_CHECK_SOCKAF(so, af) (INP_SOCKAF(so) == af) +/* + * Constants for pcbinfo.ipi_hashfields. + */ +#define IPI_HASHFIELDS_NONE 0 +#define IPI_HASHFIELDS_2TUPLE 1 +#define IPI_HASHFIELDS_4TUPLE 2 + #ifdef _KERNEL VNET_DECLARE(int, ipport_reservedhigh); VNET_DECLARE(int, ipport_reservedlow); @@ -505,7 +595,21 @@ VNET_DECLARE(int, ipport_tcpallocs); void in_pcbinfo_destroy(struct inpcbinfo *); void in_pcbinfo_init(struct inpcbinfo *, const char *, struct inpcbhead *, - int, int, char *, uma_init, uma_fini, uint32_t); + int, int, char *, uma_init, uma_fini, uint32_t, u_int); + +struct inpcbgroup * + in_pcbgroup_byhash(struct inpcbinfo *, u_int, uint32_t); +struct inpcbgroup * + in_pcbgroup_byinpcb(struct inpcb *); +struct inpcbgroup * + in_pcbgroup_bytuple(struct inpcbinfo *, struct in_addr, u_short, + struct in_addr, u_short); +void in_pcbgroup_destroy(struct inpcbinfo *); +int in_pcbgroup_enabled(struct inpcbinfo *); +void in_pcbgroup_init(struct inpcbinfo *, u_int, int); +void in_pcbgroup_remove(struct inpcb *); +void in_pcbgroup_update(struct inpcb *); +void in_pcbgroup_update_mbuf(struct inpcb *, struct mbuf *); void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *); int in_pcballoc(struct socket *, struct inpcbinfo *); @@ -515,6 +619,8 @@ int in_pcb_lport(struct inpcb *, struct in_addr *, u_short *, int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *, u_short *, struct ucred *); int in_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *); +int in_pcbconnect_mbuf(struct inpcb *, struct sockaddr *, struct ucred *, + struct mbuf *); int in_pcbconnect_setup(struct inpcb *, struct sockaddr *, in_addr_t *, u_short *, in_addr_t *, u_short *, struct inpcb **, struct ucred *); @@ -523,16 +629,21 @@ void in_pcbdisconnect(struct inpcb *); void in_pcbdrop(struct inpcb *); void in_pcbfree(struct inpcb *); int in_pcbinshash(struct inpcb *); +int in_pcbinshash_nopcbgroup(struct inpcb *); struct inpcb * in_pcblookup_local(struct inpcbinfo *, struct in_addr, u_short, int, struct ucred *); struct inpcb * - in_pcblookup_hash(struct inpcbinfo *, struct in_addr, u_int, + in_pcblookup(struct inpcbinfo *, struct in_addr, u_int, struct in_addr, u_int, int, struct ifnet *); +struct inpcb * + in_pcblookup_mbuf(struct inpcbinfo *, struct in_addr, u_int, + struct in_addr, u_int, int, struct ifnet *, struct mbuf *); void in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr, int, struct inpcb *(*)(struct inpcb *, int)); void in_pcbref(struct inpcb *); void in_pcbrehash(struct inpcb *); +void in_pcbrehash_mbuf(struct inpcb *, struct mbuf *); int in_pcbrele(struct inpcb *); int in_pcbrele_rlocked(struct inpcb *); int in_pcbrele_wlocked(struct inpcb *); diff --git a/sys/netinet/in_pcbgroup.c b/sys/netinet/in_pcbgroup.c new file mode 100644 index 0000000..c9f5c70 --- /dev/null +++ b/sys/netinet/in_pcbgroup.c @@ -0,0 +1,457 @@ +/*- + * Copyright (c) 2010-2011 Juniper Networks, Inc. + * All rights reserved. + * + * This software was developed by Robert N. M. Watson under contract + * to Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +__FBSDID("$FreeBSD$"); + +#include "opt_inet6.h" + +#include <sys/param.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/mutex.h> +#include <sys/smp.h> +#include <sys/socketvar.h> + +#include <netinet/in.h> +#include <netinet/in_pcb.h> +#ifdef INET6 +#include <netinet6/in6_pcb.h> +#endif /* INET6 */ + +/* + * pcbgroups, or "connection groups" are based on Willman, Rixner, and Cox's + * 2006 USENIX paper, "An Evaluation of Network Stack Parallelization + * Strategies in Modern Operating Systems". This implementation differs + * significantly from that described in the paper, in that it attempts to + * introduce not just notions of affinity for connections and distribute work + * so as to reduce lock contention, but also align those notions with + * hardware work distribution strategies such as RSS. In this construction, + * connection groups supplement, rather than replace, existing reservation + * tables for protocol 4-tuples, offering CPU-affine lookup tables with + * minimal cache line migration and lock contention during steady state + * operation. + * + * Internet protocols, such as UDP and TCP, register to use connection groups + * by providing an ipi_hashfields value other than IPI_HASHFIELDS_NONE; this + * indicates to the connection group code whether a 2-tuple or 4-tuple is + * used as an argument to hashes that assign a connection to a particular + * group. This must be aligned with any hardware offloaded distribution + * model, such as RSS or similar approaches taken in embedded network boards. + * Wildcard sockets require special handling, as in Willman 2006, and are + * shared between connection groups -- while being protected by group-local + * locks. This means that connection establishment and teardown can be + * signficantly more expensive than without connection groups, but that + * steady-state processing can be significantly faster. + * + * Most of the implementation of connection groups is in this file; however, + * connection group lookup is implemented in in_pcb.c alongside reservation + * table lookups -- see in_pcblookup_group(). + * + * TODO: + * + * Implement dynamic rebalancing of buckets with connection groups; when + * load is unevenly distributed, search for more optimal balancing on + * demand. This might require scaling up the number of connection groups + * by <<1. + * + * Provide an IP 2-tuple or 4-tuple netisr m2cpu handler based on connection + * groups for ip_input and ip6_input, allowing non-offloaded work + * distribution. + * + * Expose effective CPU affinity of connections to userspace using socket + * options. + * + * Investigate per-connection affinity overrides based on socket options; an + * option could be set, certainly resulting in work being distributed + * differently in software, and possibly propagated to supporting hardware + * with TCAMs or hardware hash tables. This might require connections to + * exist in more than one connection group at a time. + * + * Hook netisr thread reconfiguration events, and propagate those to RSS so + * that rebalancing can occur when the thread pool grows or shrinks. + * + * Expose per-pcbgroup statistics to userspace monitoring tools such as + * netstat, in order to allow better debugging and profiling. + */ + +void +in_pcbgroup_init(struct inpcbinfo *pcbinfo, u_int hashfields, + int hash_nelements) +{ + struct inpcbgroup *pcbgroup; + u_int numpcbgroups, pgn; + + /* + * Only enable connection groups for a protocol if it has been + * specifically requested. + */ + if (hashfields == IPI_HASHFIELDS_NONE) + return; + + /* + * Connection groups are about multi-processor load distribution, + * lock contention, and connection CPU affinity. As such, no point + * in turning them on for a uniprocessor machine, it only wastes + * memory. + */ + if (mp_ncpus == 1) + return; + + /* + * Use one group per CPU for now. If we decide to do dynamic + * rebalancing a la RSS, we'll need to shift left by at least 1. + */ + numpcbgroups = mp_ncpus; + + pcbinfo->ipi_hashfields = hashfields; + pcbinfo->ipi_pcbgroups = malloc(numpcbgroups * + sizeof(*pcbinfo->ipi_pcbgroups), M_PCB, M_WAITOK | M_ZERO); + pcbinfo->ipi_npcbgroups = numpcbgroups; + pcbinfo->ipi_wildbase = hashinit(hash_nelements, M_PCB, + &pcbinfo->ipi_wildmask); + for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) { + pcbgroup = &pcbinfo->ipi_pcbgroups[pgn]; + pcbgroup->ipg_hashbase = hashinit(hash_nelements, M_PCB, + &pcbgroup->ipg_hashmask); + INP_GROUP_LOCK_INIT(pcbgroup, "pcbgroup"); + + /* + * Initialise notional affinity of the pcbgroup -- for RSS, + * we want the same notion of affinity as NICs to be used. + * Just round robin for the time being. + */ + pcbgroup->ipg_cpu = (pgn % mp_ncpus); + } +} + +void +in_pcbgroup_destroy(struct inpcbinfo *pcbinfo) +{ + struct inpcbgroup *pcbgroup; + u_int pgn; + + if (pcbinfo->ipi_npcbgroups == 0) + return; + + for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) { + pcbgroup = &pcbinfo->ipi_pcbgroups[pgn]; + KASSERT(LIST_EMPTY(pcbinfo->ipi_listhead), + ("in_pcbinfo_destroy: listhead not empty")); + INP_GROUP_LOCK_DESTROY(pcbgroup); + hashdestroy(pcbgroup->ipg_hashbase, M_PCB, + pcbgroup->ipg_hashmask); + } + hashdestroy(pcbinfo->ipi_wildbase, M_PCB, pcbinfo->ipi_wildmask); + free(pcbinfo->ipi_pcbgroups, M_PCB); + pcbinfo->ipi_pcbgroups = NULL; + pcbinfo->ipi_npcbgroups = 0; + pcbinfo->ipi_hashfields = 0; +} + +/* + * Given a hash of whatever the covered tuple might be, return a pcbgroup + * index. + */ +static __inline u_int +in_pcbgroup_getbucket(struct inpcbinfo *pcbinfo, uint32_t hash) +{ + + return (hash % pcbinfo->ipi_npcbgroups); +} + +/* + * Map a (hashtype, hash) tuple into a connection group, or NULL if the hash + * information is insufficient to identify the pcbgroup. + */ +struct inpcbgroup * +in_pcbgroup_byhash(struct inpcbinfo *pcbinfo, u_int hashtype, uint32_t hash) +{ + + return (NULL); +} + +static struct inpcbgroup * +in_pcbgroup_bymbuf(struct inpcbinfo *pcbinfo, struct mbuf *m) +{ + + return (in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m), + m->m_pkthdr.flowid)); +} + +struct inpcbgroup * +in_pcbgroup_bytuple(struct inpcbinfo *pcbinfo, struct in_addr laddr, + u_short lport, struct in_addr faddr, u_short fport) +{ + uint32_t hash; + + switch (pcbinfo->ipi_hashfields) { + case IPI_HASHFIELDS_4TUPLE: + hash = faddr.s_addr ^ fport; + break; + + case IPI_HASHFIELDS_2TUPLE: + hash = faddr.s_addr ^ laddr.s_addr; + break; + + default: + hash = 0; + } + return (&pcbinfo->ipi_pcbgroups[in_pcbgroup_getbucket(pcbinfo, + hash)]); +} + +struct inpcbgroup * +in_pcbgroup_byinpcb(struct inpcb *inp) +{ + + return (in_pcbgroup_bytuple(inp->inp_pcbinfo, inp->inp_laddr, + inp->inp_lport, inp->inp_faddr, inp->inp_fport)); +} + +static void +in_pcbwild_add(struct inpcb *inp) +{ + struct inpcbinfo *pcbinfo; + struct inpcbhead *head; + u_int pgn; + + INP_WLOCK_ASSERT(inp); + KASSERT(!(inp->inp_flags2 & INP_PCBGROUPWILD), + ("%s: is wild",__func__)); + + pcbinfo = inp->inp_pcbinfo; + for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) + INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]); + head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, inp->inp_lport, + 0, pcbinfo->ipi_wildmask)]; + LIST_INSERT_HEAD(head, inp, inp_pcbgroup_wild); + inp->inp_flags2 |= INP_PCBGROUPWILD; + for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) + INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]); +} + +static void +in_pcbwild_remove(struct inpcb *inp) +{ + struct inpcbinfo *pcbinfo; + u_int pgn; + + INP_WLOCK_ASSERT(inp); + KASSERT((inp->inp_flags2 & INP_PCBGROUPWILD), + ("%s: not wild", __func__)); + + pcbinfo = inp->inp_pcbinfo; + for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) + INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]); + LIST_REMOVE(inp, inp_pcbgroup_wild); + for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) + INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]); + inp->inp_flags2 &= ~INP_PCBGROUPWILD; +} + +static __inline int +in_pcbwild_needed(struct inpcb *inp) +{ + +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6) + return (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)); + else +#endif + return (inp->inp_faddr.s_addr == htonl(INADDR_ANY)); +} + +static void +in_pcbwild_update_internal(struct inpcb *inp) +{ + int wildcard_needed; + + wildcard_needed = in_pcbwild_needed(inp); + if (wildcard_needed && !(inp->inp_flags2 & INP_PCBGROUPWILD)) + in_pcbwild_add(inp); + else if (!wildcard_needed && (inp->inp_flags2 & INP_PCBGROUPWILD)) + in_pcbwild_remove(inp); +} + +/* + * Update the pcbgroup of an inpcb, which might include removing an old + * pcbgroup reference and/or adding a new one. Wildcard processing is not + * performed here, although ideally we'll never install a pcbgroup for a + * wildcard inpcb (asserted below). + */ +static void +in_pcbgroup_update_internal(struct inpcbinfo *pcbinfo, + struct inpcbgroup *newpcbgroup, struct inpcb *inp) +{ + struct inpcbgroup *oldpcbgroup; + struct inpcbhead *pcbhash; + uint32_t hashkey_faddr; + + INP_WLOCK_ASSERT(inp); + + oldpcbgroup = inp->inp_pcbgroup; + if (oldpcbgroup != NULL && oldpcbgroup != newpcbgroup) { + INP_GROUP_LOCK(oldpcbgroup); + LIST_REMOVE(inp, inp_pcbgrouphash); + inp->inp_pcbgroup = NULL; + INP_GROUP_UNLOCK(oldpcbgroup); + } + if (newpcbgroup != NULL && oldpcbgroup != newpcbgroup) { +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6) + hashkey_faddr = inp->in6p_faddr.s6_addr32[3]; /* XXX */ + else +#endif + hashkey_faddr = inp->inp_faddr.s_addr; + INP_GROUP_LOCK(newpcbgroup); + pcbhash = &newpcbgroup->ipg_hashbase[ + INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, + newpcbgroup->ipg_hashmask)]; + LIST_INSERT_HEAD(pcbhash, inp, inp_pcbgrouphash); + inp->inp_pcbgroup = newpcbgroup; + INP_GROUP_UNLOCK(newpcbgroup); + } + + KASSERT(!(newpcbgroup != NULL && in_pcbwild_needed(inp)), + ("%s: pcbgroup and wildcard!", __func__)); +} + +/* + * Two update paths: one in which the 4-tuple on an inpcb has been updated + * and therefore connection groups may need to change (or a wildcard entry + * may needed to be installed), and another in which the 4-tuple has been + * set as a result of a packet received, in which case we may be able to use + * the hash on the mbuf to avoid doing a software hash calculation for RSS. + * + * In each case: first, let the wildcard code have a go at placing it as a + * wildcard socket. If it was a wildcard, or if the connection has been + * dropped, then no pcbgroup is required (so potentially clear it); + * otherwise, calculate and update the pcbgroup for the inpcb. + */ +void +in_pcbgroup_update(struct inpcb *inp) +{ + struct inpcbinfo *pcbinfo; + struct inpcbgroup *newpcbgroup; + + INP_WLOCK_ASSERT(inp); + + pcbinfo = inp->inp_pcbinfo; + if (!in_pcbgroup_enabled(pcbinfo)) + return; + + in_pcbwild_update_internal(inp); + if (!(inp->inp_flags2 & INP_PCBGROUPWILD) && + !(inp->inp_flags & INP_DROPPED)) { +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6) + newpcbgroup = in6_pcbgroup_byinpcb(inp); + else +#endif + newpcbgroup = in_pcbgroup_byinpcb(inp); + } else + newpcbgroup = NULL; + in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp); +} + +void +in_pcbgroup_update_mbuf(struct inpcb *inp, struct mbuf *m) +{ + struct inpcbinfo *pcbinfo; + struct inpcbgroup *newpcbgroup; + + INP_WLOCK_ASSERT(inp); + + pcbinfo = inp->inp_pcbinfo; + if (!in_pcbgroup_enabled(pcbinfo)) + return; + + /* + * Possibly should assert !INP_PCBGROUPWILD rather than testing for + * it; presumably this function should never be called for anything + * other than non-wildcard socket? + */ + in_pcbwild_update_internal(inp); + if (!(inp->inp_flags2 & INP_PCBGROUPWILD) && + !(inp->inp_flags & INP_DROPPED)) { + newpcbgroup = in_pcbgroup_bymbuf(pcbinfo, m); +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6) { + if (newpcbgroup == NULL) + newpcbgroup = in6_pcbgroup_byinpcb(inp); + } else { +#endif + if (newpcbgroup == NULL) + newpcbgroup = in_pcbgroup_byinpcb(inp); +#ifdef INET6 + } +#endif + } else + newpcbgroup = NULL; + in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp); +} + +/* + * Remove pcbgroup entry and optional pcbgroup wildcard entry for this inpcb. + */ +void +in_pcbgroup_remove(struct inpcb *inp) +{ + struct inpcbgroup *pcbgroup; + + INP_WLOCK_ASSERT(inp); + + if (!in_pcbgroup_enabled(inp->inp_pcbinfo)) + return; + + if (inp->inp_flags2 & INP_PCBGROUPWILD) + in_pcbwild_remove(inp); + + pcbgroup = inp->inp_pcbgroup; + if (pcbgroup != NULL) { + INP_GROUP_LOCK(pcbgroup); + LIST_REMOVE(inp, inp_pcbgrouphash); + inp->inp_pcbgroup = NULL; + INP_GROUP_UNLOCK(pcbgroup); + } +} + +/* + * Query whether or not it is appropriate to use pcbgroups to look up inpcbs + * for a protocol. + */ +int +in_pcbgroup_enabled(struct inpcbinfo *pcbinfo) +{ + + return (pcbinfo->ipi_npcbgroups > 0); +} diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index 2827c22..d2a772f 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -106,6 +106,8 @@ static struct pr_usrreqs nousrreqs; #include <net/if_pfsync.h> #endif +FEATURE(inet, "Internet Protocol version 4"); + extern struct domain inetdomain; /* Spacer for loadable protocols. */ diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c index de88556..527ce56 100644 --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -153,7 +153,8 @@ div_init(void) * place for hashbase == NULL. */ in_pcbinfo_init(&V_divcbinfo, "div", &V_divcb, 1, 1, "divcb", - div_inpcb_init, div_inpcb_fini, UMA_ZONE_NOFREE); + div_inpcb_init, div_inpcb_fini, UMA_ZONE_NOFREE, + IPI_HASHFIELDS_NONE); } static void @@ -530,7 +531,9 @@ div_bind(struct socket *so, struct sockaddr *nam, struct thread *td) ((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY; INP_INFO_WLOCK(&V_divcbinfo); INP_WLOCK(inp); + INP_HASH_WLOCK(&V_divcbinfo); error = in_pcbbind(inp, nam, td->td_ucred); + INP_HASH_WUNLOCK(&V_divcbinfo); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_divcbinfo); return error; @@ -659,9 +662,9 @@ div_pcblist(SYSCTL_HANDLER_ARGS) INP_INFO_WLOCK(&V_divcbinfo); for (i = 0; i < n; i++) { inp = inp_list[i]; - INP_WLOCK(inp); - if (!in_pcbrele(inp)) - INP_WUNLOCK(inp); + INP_RLOCK(inp); + if (!in_pcbrele_rlocked(inp)) + INP_RUNLOCK(inp); } INP_INFO_WUNLOCK(&V_divcbinfo); diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index ac1c723..67fcb74 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -488,7 +488,7 @@ tooshort: } #ifdef IPSEC /* - * Bypass packet filtering for packets from a tunnel (gif). + * Bypass packet filtering for packets previously handled by IPsec. */ if (ip_ipsec_filtertunnel(m)) goto passin; diff --git a/sys/netinet/ip_ipsec.c b/sys/netinet/ip_ipsec.c index 50a6ce4..a3c87f5 100644 --- a/sys/netinet/ip_ipsec.c +++ b/sys/netinet/ip_ipsec.c @@ -95,7 +95,7 @@ ip_ipsec_filtertunnel(struct mbuf *m) #if defined(IPSEC) /* - * Bypass packet filtering for packets from a tunnel. + * Bypass packet filtering for packets previously handled by IPsec. */ if (!V_ip4_ipsec_filtertunnel && m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) diff --git a/sys/netinet/ipfw/ip_dummynet.c b/sys/netinet/ipfw/ip_dummynet.c index ba6e892..e23ba3a 100644 --- a/sys/netinet/ipfw/ip_dummynet.c +++ b/sys/netinet/ipfw/ip_dummynet.c @@ -1045,7 +1045,7 @@ config_red(struct dn_fsk *fs) fs->w_q = fs->fs.w_q; fs->max_p = fs->fs.max_p; - D("called"); + ND("called"); /* Doing stuff that was in userland */ i = fs->sched->link.bandwidth; s = (i <= 0) ? 0 : @@ -1109,7 +1109,7 @@ config_red(struct dn_fsk *fs) if (dn_cfg.red_max_pkt_size < 1) dn_cfg.red_max_pkt_size = 1500; fs->max_pkt_size = dn_cfg.red_max_pkt_size; - D("exit"); + ND("exit"); return 0; } @@ -2176,7 +2176,7 @@ ip_dn_destroy(int last) DN_BH_WLOCK(); if (last) { - printf("%s removing last instance\n", __FUNCTION__); + ND("removing last instance\n"); ip_dn_ctl_ptr = NULL; ip_dn_io_ptr = NULL; } @@ -2256,13 +2256,13 @@ unload_dn_sched(struct dn_alg *s) struct dn_alg *tmp, *r; int err = EINVAL; - D("called for %s", s->name); + ND("called for %s", s->name); DN_BH_WLOCK(); SLIST_FOREACH_SAFE(r, &dn_cfg.schedlist, next, tmp) { if (strcmp(s->name, r->name) != 0) continue; - D("ref_count = %d", r->ref_count); + ND("ref_count = %d", r->ref_count); err = (r->ref_count != 0) ? EBUSY : 0; if (err == 0) SLIST_REMOVE(&dn_cfg.schedlist, r, dn_alg, next); diff --git a/sys/netinet/ipfw/ip_fw2.c b/sys/netinet/ipfw/ip_fw2.c index 9a75cf5..9e5c737 100644 --- a/sys/netinet/ipfw/ip_fw2.c +++ b/sys/netinet/ipfw/ip_fw2.c @@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/ip6.h> #include <netinet/icmp6.h> #ifdef INET6 +#include <netinet6/in6_pcb.h> #include <netinet6/scope6_var.h> #include <netinet6/ip6_var.h> #endif @@ -646,21 +647,27 @@ send_reject(struct ip_fw_args *args, int code, int iplen, struct ip *ip) * we tried and failed, or any other value if successful. */ static int -check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif, - struct in_addr dst_ip, u_int16_t dst_port, struct in_addr src_ip, - u_int16_t src_port, int *ugid_lookupp, - struct ucred **uc, struct inpcb *inp) +check_uidgid(ipfw_insn_u32 *insn, struct ip_fw_args *args, int *ugid_lookupp, + struct ucred **uc) { #ifndef __FreeBSD__ + /* XXX */ return cred_check(insn, proto, oif, dst_ip, dst_port, src_ip, src_port, (struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb); #else /* FreeBSD */ + struct in_addr src_ip, dst_ip; struct inpcbinfo *pi; - int wildcard; - struct inpcb *pcb; + struct ipfw_flow_id *id; + struct inpcb *pcb, *inp; + struct ifnet *oif; + int lookupflags; int match; + id = &args->f_id; + inp = args->inp; + oif = args->oif; + /* * Check to see if the UDP or TCP stack supplied us with * the PCB. If so, rather then holding a lock and looking @@ -681,31 +688,53 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif, */ if (*ugid_lookupp == -1) return (0); - if (proto == IPPROTO_TCP) { - wildcard = 0; + if (id->proto == IPPROTO_TCP) { + lookupflags = 0; pi = &V_tcbinfo; - } else if (proto == IPPROTO_UDP) { - wildcard = INPLOOKUP_WILDCARD; + } else if (id->proto == IPPROTO_UDP) { + lookupflags = INPLOOKUP_WILDCARD; pi = &V_udbinfo; } else return 0; + lookupflags |= INPLOOKUP_RLOCKPCB; match = 0; if (*ugid_lookupp == 0) { - INP_INFO_RLOCK(pi); - pcb = (oif) ? - in_pcblookup_hash(pi, - dst_ip, htons(dst_port), - src_ip, htons(src_port), - wildcard, oif) : - in_pcblookup_hash(pi, - src_ip, htons(src_port), - dst_ip, htons(dst_port), - wildcard, NULL); + if (id->addr_type == 6) { +#ifdef INET6 + if (oif == NULL) + pcb = in6_pcblookup_mbuf(pi, + &id->src_ip6, htons(id->src_port), + &id->dst_ip6, htons(id->dst_port), + lookupflags, oif, args->m); + else + pcb = in6_pcblookup_mbuf(pi, + &id->dst_ip6, htons(id->dst_port), + &id->src_ip6, htons(id->src_port), + lookupflags, oif, args->m); +#else + *ugid_lookupp = -1; + return (0); +#endif + } else { + src_ip.s_addr = htonl(id->src_ip); + dst_ip.s_addr = htonl(id->dst_ip); + if (oif == NULL) + pcb = in_pcblookup_mbuf(pi, + src_ip, htons(id->src_port), + dst_ip, htons(id->dst_port), + lookupflags, oif, args->m); + else + pcb = in_pcblookup_mbuf(pi, + dst_ip, htons(id->dst_port), + src_ip, htons(id->src_port), + lookupflags, oif, args->m); + } if (pcb != NULL) { + INP_RLOCK_ASSERT(pcb); *uc = crhold(pcb->inp_cred); *ugid_lookupp = 1; + INP_RUNLOCK(pcb); } - INP_INFO_RUNLOCK(pi); if (*ugid_lookupp == 0) { /* * We tried and failed, set the variable to -1 @@ -714,14 +743,14 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif, *ugid_lookupp = -1; return (0); } - } + } if (insn->o.opcode == O_UID) match = ((*uc)->cr_uid == (uid_t)insn->d[0]); else if (insn->o.opcode == O_GID) match = groupmember((gid_t)insn->d[0], *uc); else if (insn->o.opcode == O_JAIL) match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]); - return match; + return (match); #endif /* __FreeBSD__ */ } @@ -1259,22 +1288,17 @@ do { \ * as this ensures that we have a * packet with the ports info. */ - if (offset!=0) - break; - if (is_ipv6) /* XXX to be fixed later */ + if (offset != 0) break; if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) match = check_uidgid( (ipfw_insn_u32 *)cmd, - proto, oif, - dst_ip, dst_port, - src_ip, src_port, &ucred_lookup, + args, &ucred_lookup, #ifdef __FreeBSD__ - &ucred_cache, args->inp); + &ucred_cache); #else - (void *)&ucred_cache, - (struct inpcb *)args->m); + (void *)&ucred_cache); #endif break; @@ -1389,18 +1413,15 @@ do { \ else if (v == 4 || v == 5) { check_uidgid( (ipfw_insn_u32 *)cmd, - proto, oif, - dst_ip, dst_port, - src_ip, src_port, &ucred_lookup, + args, &ucred_lookup, #ifdef __FreeBSD__ - &ucred_cache, args->inp); + &ucred_cache); if (v == 4 /* O_UID */) key = ucred_cache->cr_uid; else if (v == 5 /* O_JAIL */) key = ucred_cache->cr_prison->pr_id; #else /* !__FreeBSD__ */ - (void *)&ucred_cache, - (struct inpcb *)args->m); + (void *)&ucred_cache); if (v ==4 /* O_UID */) key = ucred_cache.uid; else if (v == 5 /* O_JAIL */) @@ -1827,21 +1848,32 @@ do { \ else break; + /* + * XXXRW: so_user_cookie should almost + * certainly be inp_user_cookie? + */ + /* For incomming packet, lookup up the inpcb using the src/dest ip/port tuple */ if (inp == NULL) { - INP_INFO_RLOCK(pi); - inp = in_pcblookup_hash(pi, + inp = in_pcblookup(pi, src_ip, htons(src_port), dst_ip, htons(dst_port), - 0, NULL); - INP_INFO_RUNLOCK(pi); - } - - if (inp && inp->inp_socket) { - tablearg = inp->inp_socket->so_user_cookie; - if (tablearg) - match = 1; + INPLOOKUP_RLOCKPCB, NULL); + if (inp != NULL) { + tablearg = + inp->inp_socket->so_user_cookie; + if (tablearg) + match = 1; + INP_RUNLOCK(inp); + } + } else { + if (inp->inp_socket) { + tablearg = + inp->inp_socket->so_user_cookie; + if (tablearg) + match = 1; + } } break; } @@ -2106,7 +2138,8 @@ do { \ case O_FORWARD_IP: if (args->eh) /* not valid on layer2 pkts */ break; - if (!q || dyn_dir == MATCH_FORWARD) { + if (q == NULL || q->rule != f || + dyn_dir == MATCH_FORWARD) { struct sockaddr_in *sa; sa = &(((ipfw_insn_sa *)cmd)->sa); if (sa->sin_addr.s_addr == INADDR_ANY) { @@ -2137,14 +2170,21 @@ do { \ done = 1; /* exit outer loop */ break; - case O_SETFIB: + case O_SETFIB: { + uint32_t fib; + f->pcnt++; /* update stats */ f->bcnt += pktlen; f->timestamp = time_uptime; - M_SETFIB(m, cmd->arg1); - args->f_id.fib = cmd->arg1; + fib = (cmd->arg1 == IP_FW_TABLEARG) ? tablearg: + cmd->arg1; + if (fib >= rt_numfibs) + fib = 0; + M_SETFIB(m, fib); + args->f_id.fib = fib; l = 0; /* exit inner loop */ break; + } case O_NAT: if (!IPFW_NAT_LOADED) { @@ -2154,6 +2194,13 @@ do { \ int nat_id; set_match(args, f_pos, chain); + /* Check if this is 'global' nat rule */ + if (cmd->arg1 == 0) { + retval = ipfw_nat_ptr(args, NULL, m); + l = 0; + done = 1; + break; + } t = ((ipfw_insn_nat *)cmd)->nat; if (t == NULL) { nat_id = (cmd->arg1 == IP_FW_TABLEARG) ? diff --git a/sys/netinet/ipfw/ip_fw_dynamic.c b/sys/netinet/ipfw/ip_fw_dynamic.c index 7f0feb4..0bc4cc1 100644 --- a/sys/netinet/ipfw/ip_fw_dynamic.c +++ b/sys/netinet/ipfw/ip_fw_dynamic.c @@ -753,11 +753,12 @@ ipfw_install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, q = lookup_dyn_rule_locked(&args->f_id, NULL, NULL); if (q != NULL) { /* should never occur */ + DEB( if (last_log != time_uptime) { last_log = time_uptime; printf("ipfw: %s: entry already present, done\n", __func__); - } + }) IPFW_DYN_UNLOCK(); return (0); } diff --git a/sys/netinet/ipfw/ip_fw_nat.c b/sys/netinet/ipfw/ip_fw_nat.c index f8c3e63..1679a97 100644 --- a/sys/netinet/ipfw/ip_fw_nat.c +++ b/sys/netinet/ipfw/ip_fw_nat.c @@ -207,7 +207,8 @@ ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m) struct mbuf *mcl; struct ip *ip; /* XXX - libalias duct tape */ - int ldt, retval; + int ldt, retval, found; + struct ip_fw_chain *chain; char *c; ldt = 0; @@ -256,23 +257,65 @@ ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m) ldt = 1; c = mtod(mcl, char *); - if (args->oif == NULL) - retval = LibAliasIn(t->lib, c, - mcl->m_len + M_TRAILINGSPACE(mcl)); - else - retval = LibAliasOut(t->lib, c, - mcl->m_len + M_TRAILINGSPACE(mcl)); - if (retval == PKT_ALIAS_RESPOND) { - m->m_flags |= M_SKIP_FIREWALL; - retval = PKT_ALIAS_OK; + + /* Check if this is 'global' instance */ + if (t == NULL) { + if (args->oif == NULL) { + /* Wrong direction, skip processing */ + args->m = mcl; + return (IP_FW_NAT); + } + + found = 0; + chain = &V_layer3_chain; + IPFW_RLOCK(chain); + /* Check every nat entry... */ + LIST_FOREACH(t, &chain->nat, _next) { + if ((t->mode & PKT_ALIAS_SKIP_GLOBAL) != 0) + continue; + retval = LibAliasOutTry(t->lib, c, + mcl->m_len + M_TRAILINGSPACE(mcl), 0); + if (retval == PKT_ALIAS_OK) { + /* Nat instance recognises state */ + found = 1; + break; + } + } + IPFW_RUNLOCK(chain); + if (found != 1) { + /* No instance found, return ignore */ + args->m = mcl; + return (IP_FW_NAT); + } + } else { + if (args->oif == NULL) + retval = LibAliasIn(t->lib, c, + mcl->m_len + M_TRAILINGSPACE(mcl)); + else + retval = LibAliasOut(t->lib, c, + mcl->m_len + M_TRAILINGSPACE(mcl)); } - if (retval != PKT_ALIAS_OK && - retval != PKT_ALIAS_FOUND_HEADER_FRAGMENT) { + + /* + * We drop packet when: + * 1. libalias returns PKT_ALIAS_ERROR; + * 2. For incoming packets: + * a) for unresolved fragments; + * b) libalias returns PKT_ALIAS_IGNORED and + * PKT_ALIAS_DENY_INCOMING flag is set. + */ + if (retval == PKT_ALIAS_ERROR || + (args->oif == NULL && (retval == PKT_ALIAS_UNRESOLVED_FRAGMENT || + (retval == PKT_ALIAS_IGNORED && + (t->mode & PKT_ALIAS_DENY_INCOMING) != 0)))) { /* XXX - should i add some logging? */ m_free(mcl); args->m = NULL; return (IP_FW_DENY); } + + if (retval == PKT_ALIAS_RESPOND) + m->m_flags |= M_SKIP_FIREWALL; mcl->m_pkthdr.len = mcl->m_len = ntohs(ip->ip_len); /* diff --git a/sys/netinet/ipfw/ip_fw_sockopt.c b/sys/netinet/ipfw/ip_fw_sockopt.c index 0c903ee..2347456 100644 --- a/sys/netinet/ipfw/ip_fw_sockopt.c +++ b/sys/netinet/ipfw/ip_fw_sockopt.c @@ -349,12 +349,13 @@ del_entry(struct ip_fw_chain *chain, uint32_t arg) } if (n == 0) { - /* A flush request (arg == 0) on empty ruleset - * returns with no error. On the contrary, + /* A flush request (arg == 0 or cmd == 1) on empty + * ruleset returns with no error. On the contrary, * if there is no match on a specific request, * we return EINVAL. */ - error = (arg == 0) ? 0 : EINVAL; + if (arg != 0 && cmd != 1) + error = EINVAL; break; } @@ -606,7 +607,8 @@ check_ipfw_struct(struct ip_fw *rule, int size) case O_SETFIB: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; - if (cmd->arg1 >= rt_numfibs) { + if ((cmd->arg1 != IP_FW_TABLEARG) && + (cmd->arg1 >= rt_numfibs)) { printf("ipfw: invalid fib number %d\n", cmd->arg1); return EINVAL; diff --git a/sys/netinet/libalias/alias.h b/sys/netinet/libalias/alias.h index 2aed829..b12b353 100644 --- a/sys/netinet/libalias/alias.h +++ b/sys/netinet/libalias/alias.h @@ -197,6 +197,18 @@ struct mbuf *m_megapullup(struct mbuf *, int); */ #define PKT_ALIAS_RESET_ON_ADDR_CHANGE 0x20 +/* + * If PKT_ALIAS_PROXY_ONLY is set, then NAT will be disabled and only + * transparent proxying is performed. + */ +#define PKT_ALIAS_PROXY_ONLY 0x40 + +/* + * If PKT_ALIAS_REVERSE is set, the actions of PacketAliasIn() and + * PacketAliasOut() are reversed. + */ +#define PKT_ALIAS_REVERSE 0x80 + #ifndef NO_FW_PUNCH /* * If PKT_ALIAS_PUNCH_FW is set, active FTP and IRC DCC connections will @@ -209,16 +221,10 @@ struct mbuf *m_megapullup(struct mbuf *, int); #endif /* - * If PKT_ALIAS_PROXY_ONLY is set, then NAT will be disabled and only - * transparent proxying is performed. - */ -#define PKT_ALIAS_PROXY_ONLY 0x40 - -/* - * If PKT_ALIAS_REVERSE is set, the actions of PacketAliasIn() and - * PacketAliasOut() are reversed. + * If PKT_ALIAS_SKIP_GLOBAL is set, nat instance is not checked for matching + * states in 'ipfw nat global' rule. */ -#define PKT_ALIAS_REVERSE 0x80 +#define PKT_ALIAS_SKIP_GLOBAL 0x200 /* Function return codes. */ #define PKT_ALIAS_ERROR -1 diff --git a/sys/netinet/libalias/alias_sctp.h b/sys/netinet/libalias/alias_sctp.h index 80ed965..99d54ce 100644 --- a/sys/netinet/libalias/alias_sctp.h +++ b/sys/netinet/libalias/alias_sctp.h @@ -135,13 +135,13 @@ struct sctp_nat_assoc { struct in_addr a_addr; /**< alias ip address */ int state; /**< current state of NAT association */ int TableRegister; /**< stores which look up tables association is registered in */ - int exp; /**< timer expiration in seconds from uptime */ + int exp; /**< timer expiration in seconds from uptime */ int exp_loc; /**< current location in timer_Q */ int num_Gaddr; /**< number of global IP addresses in the list */ LIST_HEAD(sctpGlobalAddresshead,sctp_GlobalAddress) Gaddr; /**< List of global addresses */ - LIST_ENTRY (sctp_nat_assoc) list_L; /**< Linked list of pointers for Local table*/ - LIST_ENTRY (sctp_nat_assoc) list_G; /**< Linked list of pointers for Global table */ - LIST_ENTRY (sctp_nat_assoc) timer_Q; /**< Linked list of pointers for timer Q */ + LIST_ENTRY (sctp_nat_assoc) list_L; /**< Linked list of pointers for Local table*/ + LIST_ENTRY (sctp_nat_assoc) list_G; /**< Linked list of pointers for Global table */ + LIST_ENTRY (sctp_nat_assoc) timer_Q; /**< Linked list of pointers for timer Q */ //Using libalias locking }; diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index be099a8..e754b88 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -205,7 +205,8 @@ rip_init(void) { in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE, - 1, "ripcb", rip_inpcb_init, NULL, UMA_ZONE_NOFREE); + 1, "ripcb", rip_inpcb_init, NULL, UMA_ZONE_NOFREE, + IPI_HASHFIELDS_NONE); EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL, EVENTHANDLER_PRI_ANY); } @@ -226,7 +227,7 @@ rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n, { int policyfail = 0; - INP_RLOCK_ASSERT(last); + INP_LOCK_ASSERT(last); #ifdef IPSEC /* check AH/ESP integrity. */ @@ -834,16 +835,19 @@ rip_detach(struct socket *so) static void rip_dodisconnect(struct socket *so, struct inpcb *inp) { + struct inpcbinfo *pcbinfo; - INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); - INP_WLOCK_ASSERT(inp); - + pcbinfo = inp->inp_pcbinfo; + INP_INFO_WLOCK(pcbinfo); + INP_WLOCK(inp); rip_delhash(inp); inp->inp_faddr.s_addr = INADDR_ANY; rip_inshash(inp); SOCK_LOCK(so); so->so_state &= ~SS_ISCONNECTED; SOCK_UNLOCK(so); + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(pcbinfo); } static void @@ -854,11 +858,7 @@ rip_abort(struct socket *so) inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_abort: inp == NULL")); - INP_INFO_WLOCK(&V_ripcbinfo); - INP_WLOCK(inp); rip_dodisconnect(so, inp); - INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_ripcbinfo); } static void @@ -869,11 +869,7 @@ rip_close(struct socket *so) inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_close: inp == NULL")); - INP_INFO_WLOCK(&V_ripcbinfo); - INP_WLOCK(inp); rip_dodisconnect(so, inp); - INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_ripcbinfo); } static int @@ -887,11 +883,7 @@ rip_disconnect(struct socket *so) inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_disconnect: inp == NULL")); - INP_INFO_WLOCK(&V_ripcbinfo); - INP_WLOCK(inp); rip_dodisconnect(so, inp); - INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_ripcbinfo); return (0); } @@ -1077,9 +1069,9 @@ rip_pcblist(SYSCTL_HANDLER_ARGS) INP_INFO_WLOCK(&V_ripcbinfo); for (i = 0; i < n; i++) { inp = inp_list[i]; - INP_WLOCK(inp); - if (!in_pcbrele(inp)) - INP_WUNLOCK(inp); + INP_RLOCK(inp); + if (!in_pcbrele_rlocked(inp)) + INP_RUNLOCK(inp); } INP_INFO_WUNLOCK(&V_ripcbinfo); diff --git a/sys/netinet/sctp.h b/sys/netinet/sctp.h index fa29a75..3c8cf36 100644 --- a/sys/netinet/sctp.h +++ b/sys/netinet/sctp.h @@ -91,7 +91,7 @@ struct sctp_paramhdr { #define SCTP_PEER_ADDR_PARAMS 0x0000000a #define SCTP_DEFAULT_SEND_PARAM 0x0000000b /* ancillary data/notification interest options */ -#define SCTP_EVENTS 0x0000000c +#define SCTP_EVENTS 0x0000000c /* deprecated */ /* Without this applied we will give V4 and V6 addresses on a V6 socket */ #define SCTP_I_WANT_MAPPED_V4_ADDR 0x0000000d #define SCTP_MAXSEG 0x0000000e @@ -114,6 +114,11 @@ struct sctp_paramhdr { #define SCTP_EXPLICIT_EOR 0x0000001b #define SCTP_REUSE_PORT 0x0000001c /* rw */ #define SCTP_AUTH_DEACTIVATE_KEY 0x0000001d +#define SCTP_EVENT 0x0000001e +#define SCTP_RECVRCVINFO 0x0000001f +#define SCTP_RECVNXTINFO 0x00000020 +#define SCTP_DEFAULT_SNDINFO 0x00000021 +#define SCTP_DEFAULT_PRINFO 0x00000022 /* * read-only options @@ -490,7 +495,7 @@ struct sctp_error_unrecognized_chunk { /* * PCB Features (in sctp_features bitmask) */ -#define SCTP_PCB_FLAGS_EXT_RCVINFO 0x00000002 +#define SCTP_PCB_FLAGS_EXT_RCVINFO 0x00000002 /* deprecated */ #define SCTP_PCB_FLAGS_DONOT_HEARTBEAT 0x00000004 #define SCTP_PCB_FLAGS_FRAG_INTERLEAVE 0x00000008 #define SCTP_PCB_FLAGS_INTERLEAVE_STRMS 0x00000010 @@ -500,7 +505,7 @@ struct sctp_error_unrecognized_chunk { /* socket options */ #define SCTP_PCB_FLAGS_NODELAY 0x00000100 #define SCTP_PCB_FLAGS_AUTOCLOSE 0x00000200 -#define SCTP_PCB_FLAGS_RECVDATAIOEVNT 0x00000400 +#define SCTP_PCB_FLAGS_RECVDATAIOEVNT 0x00000400 /* deprecated */ #define SCTP_PCB_FLAGS_RECVASSOCEVNT 0x00000800 #define SCTP_PCB_FLAGS_RECVPADDREVNT 0x00001000 #define SCTP_PCB_FLAGS_RECVPEERERR 0x00002000 @@ -516,6 +521,9 @@ struct sctp_error_unrecognized_chunk { #define SCTP_PCB_FLAGS_MULTIPLE_ASCONFS 0x01000000 #define SCTP_PCB_FLAGS_PORTREUSE 0x02000000 #define SCTP_PCB_FLAGS_DRYEVNT 0x04000000 +#define SCTP_PCB_FLAGS_RECVRCVINFO 0x08000000 +#define SCTP_PCB_FLAGS_RECVNXTINFO 0x10000000 + /*- * mobility_features parameters (by micchie).Note * these features are applied against the diff --git a/sys/netinet/sctp_auth.c b/sys/netinet/sctp_auth.c index 91e3f78..b68c840 100644 --- a/sys/netinet/sctp_auth.c +++ b/sys/netinet/sctp_auth.c @@ -1866,7 +1866,7 @@ sctp_notify_authentication(struct sctp_tcb *stcb, uint32_t indication, /* If the socket is gone we are out of here */ return; } - if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_AUTHEVNT)) + if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_AUTHEVNT)) /* event not enabled */ return; diff --git a/sys/netinet/sctp_indata.c b/sys/netinet/sctp_indata.c index 9734aea..e142a3e 100644 --- a/sys/netinet/sctp_indata.c +++ b/sys/netinet/sctp_indata.c @@ -201,95 +201,114 @@ failed_build: struct mbuf * -sctp_build_ctl_nchunk(struct sctp_inpcb *inp, - struct sctp_sndrcvinfo *sinfo) +sctp_build_ctl_nchunk(struct sctp_inpcb *inp, struct sctp_sndrcvinfo *sinfo) { + struct sctp_extrcvinfo *seinfo; struct sctp_sndrcvinfo *outinfo; + struct sctp_rcvinfo *rcvinfo; + struct sctp_nxtinfo *nxtinfo; struct cmsghdr *cmh; struct mbuf *ret; int len; - int use_extended = 0; + int use_extended; + int provide_nxt; - if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) { - /* user does not want the sndrcv ctl */ + if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT) && + sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVRCVINFO) && + sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVNXTINFO)) { + /* user does not want any ancillary data */ return (NULL); } - if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) { - use_extended = 1; - len = CMSG_LEN(sizeof(struct sctp_extrcvinfo)); + len = 0; + if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO)) { + len += CMSG_SPACE(sizeof(struct sctp_rcvinfo)); + } + seinfo = (struct sctp_extrcvinfo *)sinfo; + if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO) && + (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_AVAIL)) { + provide_nxt = 1; + len += CMSG_SPACE(sizeof(struct sctp_rcvinfo)); } else { - len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); + provide_nxt = 0; + } + if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) { + if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) { + use_extended = 1; + len += CMSG_SPACE(sizeof(struct sctp_extrcvinfo)); + } else { + use_extended = 0; + len += CMSG_SPACE(sizeof(struct sctp_sndrcvinfo)); + } + } else { + use_extended = 0; } - - ret = sctp_get_mbuf_for_msg(len, - 0, M_DONTWAIT, 1, MT_DATA); - + ret = sctp_get_mbuf_for_msg(len, 0, M_DONTWAIT, 1, MT_DATA); if (ret == NULL) { /* No space */ return (ret); } - /* We need a CMSG header followed by the struct */ + SCTP_BUF_LEN(ret) = 0; + + /* We need a CMSG header followed by the struct */ cmh = mtod(ret, struct cmsghdr *); - outinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmh); - cmh->cmsg_level = IPPROTO_SCTP; - if (use_extended) { - cmh->cmsg_type = SCTP_EXTRCV; - cmh->cmsg_len = len; - memcpy(outinfo, sinfo, len); - } else { - cmh->cmsg_type = SCTP_SNDRCV; - cmh->cmsg_len = len; - *outinfo = *sinfo; + if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO)) { + cmh->cmsg_level = IPPROTO_SCTP; + cmh->cmsg_len = CMSG_LEN(sizeof(struct sctp_rcvinfo)); + cmh->cmsg_type = SCTP_RCVINFO; + rcvinfo = (struct sctp_rcvinfo *)CMSG_DATA(cmh); + rcvinfo->rcv_sid = sinfo->sinfo_stream; + rcvinfo->rcv_ssn = sinfo->sinfo_ssn; + rcvinfo->rcv_flags = sinfo->sinfo_flags; + rcvinfo->rcv_ppid = sinfo->sinfo_ppid; + rcvinfo->rcv_tsn = sinfo->sinfo_tsn; + rcvinfo->rcv_cumtsn = sinfo->sinfo_cumtsn; + rcvinfo->rcv_context = sinfo->sinfo_context; + rcvinfo->rcv_assoc_id = sinfo->sinfo_assoc_id; + cmh = (struct cmsghdr *)((caddr_t)cmh + CMSG_SPACE(sizeof(struct sctp_rcvinfo))); + SCTP_BUF_LEN(ret) += CMSG_SPACE(sizeof(struct sctp_rcvinfo)); + } + if (provide_nxt) { + cmh->cmsg_level = IPPROTO_SCTP; + cmh->cmsg_len = CMSG_LEN(sizeof(struct sctp_nxtinfo)); + cmh->cmsg_type = SCTP_NXTINFO; + nxtinfo = (struct sctp_nxtinfo *)CMSG_DATA(cmh); + nxtinfo->nxt_sid = seinfo->sreinfo_next_stream; + nxtinfo->nxt_flags = 0; + if (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_IS_UNORDERED) { + nxtinfo->nxt_flags |= SCTP_UNORDERED; + } + if (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_IS_NOTIFICATION) { + nxtinfo->nxt_flags |= SCTP_NOTIFICATION; + } + if (seinfo->sreinfo_next_flags & SCTP_NEXT_MSG_ISCOMPLETE) { + nxtinfo->nxt_flags |= SCTP_COMPLETE; + } + nxtinfo->nxt_ppid = seinfo->sreinfo_next_ppid; + nxtinfo->nxt_length = seinfo->sreinfo_next_length; + nxtinfo->nxt_assoc_id = seinfo->sreinfo_next_aid; + cmh = (struct cmsghdr *)((caddr_t)cmh + CMSG_SPACE(sizeof(struct sctp_nxtinfo))); + SCTP_BUF_LEN(ret) += CMSG_SPACE(sizeof(struct sctp_nxtinfo)); + } + if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) { + cmh->cmsg_level = IPPROTO_SCTP; + outinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmh); + if (use_extended) { + cmh->cmsg_len = CMSG_LEN(sizeof(struct sctp_extrcvinfo)); + cmh->cmsg_type = SCTP_EXTRCV; + memcpy(outinfo, sinfo, sizeof(struct sctp_extrcvinfo)); + SCTP_BUF_LEN(ret) += CMSG_SPACE(sizeof(struct sctp_extrcvinfo)); + } else { + cmh->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); + cmh->cmsg_type = SCTP_SNDRCV; + *outinfo = *sinfo; + SCTP_BUF_LEN(ret) += CMSG_SPACE(sizeof(struct sctp_sndrcvinfo)); + } } - SCTP_BUF_LEN(ret) = cmh->cmsg_len; return (ret); } -char * -sctp_build_ctl_cchunk(struct sctp_inpcb *inp, - int *control_len, - struct sctp_sndrcvinfo *sinfo) -{ - struct sctp_sndrcvinfo *outinfo; - struct cmsghdr *cmh; - char *buf; - int len; - int use_extended = 0; - - if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) { - /* user does not want the sndrcv ctl */ - return (NULL); - } - if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) { - use_extended = 1; - len = CMSG_LEN(sizeof(struct sctp_extrcvinfo)); - } else { - len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); - } - SCTP_MALLOC(buf, char *, len, SCTP_M_CMSG); - if (buf == NULL) { - /* No space */ - return (buf); - } - /* We need a CMSG header followed by the struct */ - cmh = (struct cmsghdr *)buf; - outinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmh); - cmh->cmsg_level = IPPROTO_SCTP; - if (use_extended) { - cmh->cmsg_type = SCTP_EXTRCV; - cmh->cmsg_len = len; - memcpy(outinfo, sinfo, len); - } else { - cmh->cmsg_type = SCTP_SNDRCV; - cmh->cmsg_len = len; - *outinfo = *sinfo; - } - *control_len = len; - return (buf); -} - static void sctp_mark_non_revokable(struct sctp_association *asoc, uint32_t tsn) { diff --git a/sys/netinet/sctp_indata.h b/sys/netinet/sctp_indata.h index 34090df..1dbd364 100644 --- a/sys/netinet/sctp_indata.h +++ b/sys/netinet/sctp_indata.h @@ -83,11 +83,6 @@ struct mbuf * sctp_build_ctl_nchunk(struct sctp_inpcb *inp, struct sctp_sndrcvinfo *sinfo); -char * -sctp_build_ctl_cchunk(struct sctp_inpcb *inp, - int *control_len, - struct sctp_sndrcvinfo *sinfo); - void sctp_set_rwnd(struct sctp_tcb *, struct sctp_association *); uint32_t diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c index a7d22bd..043b3b2 100644 --- a/sys/netinet/sctp_output.c +++ b/sys/netinet/sctp_output.c @@ -3355,54 +3355,338 @@ sctp_source_address_selection(struct sctp_inpcb *inp, } static int -sctp_find_cmsg(int c_type, void *data, struct mbuf *control, int cpsize) +sctp_find_cmsg(int c_type, void *data, struct mbuf *control, size_t cpsize) { struct cmsghdr cmh; - int tlen, at; + int tlen, at, found; + struct sctp_sndinfo sndinfo; + struct sctp_prinfo prinfo; + struct sctp_authinfo authinfo; tlen = SCTP_BUF_LEN(control); at = 0; + found = 0; /* * Independent of how many mbufs, find the c_type inside the control * structure and copy out the data. */ while (at < tlen) { if ((tlen - at) < (int)CMSG_ALIGN(sizeof(cmh))) { - /* not enough room for one more we are done. */ - return (0); + /* There is not enough room for one more. */ + return (found); } m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh); + if (cmh.cmsg_len < CMSG_ALIGN(sizeof(struct cmsghdr))) { + /* We dont't have a complete CMSG header. */ + return (found); + } if (((int)cmh.cmsg_len + at) > tlen) { - /* - * this is real messed up since there is not enough - * data here to cover the cmsg header. We are done. - */ - return (0); + /* We don't have the complete CMSG. */ + return (found); } if ((cmh.cmsg_level == IPPROTO_SCTP) && - (c_type == cmh.cmsg_type)) { - /* found the one we want, copy it out */ - at += CMSG_ALIGN(sizeof(struct cmsghdr)); - if ((int)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < cpsize) { - /* - * space of cmsg_len after header not big - * enough - */ - return (0); + ((c_type == cmh.cmsg_type) || + ((c_type == SCTP_SNDRCV) && + ((cmh.cmsg_type == SCTP_SNDINFO) || + (cmh.cmsg_type == SCTP_PRINFO) || + (cmh.cmsg_type == SCTP_AUTHINFO))))) { + if (c_type == cmh.cmsg_type) { + if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < cpsize) { + return (found); + } + /* It is exactly what we want. Copy it out. */ + m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), cpsize, (caddr_t)data); + return (1); + } else { + struct sctp_sndrcvinfo *sndrcvinfo; + + sndrcvinfo = (struct sctp_sndrcvinfo *)data; + if (found == 0) { + if (cpsize < sizeof(struct sctp_sndrcvinfo)) { + return (found); + } + memset(sndrcvinfo, 0, sizeof(struct sctp_sndrcvinfo)); + } + switch (cmh.cmsg_type) { + case SCTP_SNDINFO: + if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct sctp_sndinfo)) { + return (found); + } + m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct sctp_sndinfo), (caddr_t)&sndinfo); + sndrcvinfo->sinfo_stream = sndinfo.snd_sid; + sndrcvinfo->sinfo_flags = sndinfo.snd_flags; + sndrcvinfo->sinfo_ppid = sndinfo.snd_ppid; + sndrcvinfo->sinfo_context = sndinfo.snd_context; + sndrcvinfo->sinfo_assoc_id = sndinfo.snd_assoc_id; + break; + case SCTP_PRINFO: + if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct sctp_prinfo)) { + return (found); + } + m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct sctp_prinfo), (caddr_t)&prinfo); + sndrcvinfo->sinfo_timetolive = prinfo.pr_value; + sndrcvinfo->sinfo_flags |= prinfo.pr_policy; + break; + case SCTP_AUTHINFO: + if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct sctp_authinfo)) { + return (found); + } + m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct sctp_authinfo), (caddr_t)&authinfo); + sndrcvinfo->sinfo_keynumber_valid = 1; + sndrcvinfo->sinfo_keynumber = authinfo.auth_keyid; + break; + default: + return (found); + } + found = 1; } - m_copydata(control, at, cpsize, data); + } + at += CMSG_ALIGN(cmh.cmsg_len); + } + return (found); +} + +static int +sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *error) +{ + struct cmsghdr cmh; + int tlen, at; + struct sctp_initmsg initmsg; + +#ifdef INET + struct sockaddr_in sin; + +#endif +#ifdef INET6 + struct sockaddr_in6 sin6; + +#endif + + tlen = SCTP_BUF_LEN(control); + at = 0; + while (at < tlen) { + if ((tlen - at) < (int)CMSG_ALIGN(sizeof(cmh))) { + /* There is not enough room for one more. */ + *error = EINVAL; return (1); - } else { - at += CMSG_ALIGN(cmh.cmsg_len); - if (cmh.cmsg_len == 0) { + } + m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh); + if (cmh.cmsg_len < CMSG_ALIGN(sizeof(struct cmsghdr))) { + /* We dont't have a complete CMSG header. */ + *error = EINVAL; + return (1); + } + if (((int)cmh.cmsg_len + at) > tlen) { + /* We don't have the complete CMSG. */ + *error = EINVAL; + return (1); + } + if (cmh.cmsg_level == IPPROTO_SCTP) { + switch (cmh.cmsg_type) { + case SCTP_INIT: + if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct sctp_initmsg)) { + *error = EINVAL; + return (1); + } + m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct sctp_initmsg), (caddr_t)&initmsg); + if (initmsg.sinit_max_attempts) + stcb->asoc.max_init_times = initmsg.sinit_max_attempts; + if (initmsg.sinit_num_ostreams) + stcb->asoc.pre_open_streams = initmsg.sinit_num_ostreams; + if (initmsg.sinit_max_instreams) + stcb->asoc.max_inbound_streams = initmsg.sinit_max_instreams; + if (initmsg.sinit_max_init_timeo) + stcb->asoc.initial_init_rto_max = initmsg.sinit_max_init_timeo; + if (stcb->asoc.streamoutcnt < stcb->asoc.pre_open_streams) { + struct sctp_stream_out *tmp_str; + unsigned int i; + + /* Default is NOT correct */ + SCTPDBG(SCTP_DEBUG_OUTPUT1, "Ok, default:%d pre_open:%d\n", + stcb->asoc.streamoutcnt, stcb->asoc.pre_open_streams); + SCTP_TCB_UNLOCK(stcb); + SCTP_MALLOC(tmp_str, + struct sctp_stream_out *, + (stcb->asoc.pre_open_streams * sizeof(struct sctp_stream_out)), + SCTP_M_STRMO); + SCTP_TCB_LOCK(stcb); + if (tmp_str != NULL) { + SCTP_FREE(stcb->asoc.strmout, SCTP_M_STRMO); + stcb->asoc.strmout = tmp_str; + stcb->asoc.strm_realoutsize = stcb->asoc.streamoutcnt = stcb->asoc.pre_open_streams; + } else { + stcb->asoc.pre_open_streams = stcb->asoc.streamoutcnt; + } + for (i = 0; i < stcb->asoc.streamoutcnt; i++) { + stcb->asoc.strmout[i].next_sequence_sent = 0; + TAILQ_INIT(&stcb->asoc.strmout[i].outqueue); + stcb->asoc.strmout[i].stream_no = i; + stcb->asoc.strmout[i].last_msg_incomplete = 0; + stcb->asoc.ss_functions.sctp_ss_init_stream(&stcb->asoc.strmout[i], NULL); + } + } + break; +#ifdef INET + case SCTP_DSTADDRV4: + if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct in_addr)) { + *error = EINVAL; + return (1); + } + memset(&sin, 0, sizeof(struct sockaddr_in)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_port = stcb->rport; + m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr); + if ((sin.sin_addr.s_addr == INADDR_ANY) || + (sin.sin_addr.s_addr == INADDR_BROADCAST) || + IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) { + *error = EINVAL; + return (-1); + } + if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) { + *error = ENOBUFS; + return (1); + } + break; +#endif +#ifdef INET6 + case SCTP_DSTADDRV6: + if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct in6_addr)) { + *error = EINVAL; + return (1); + } + memset(&sin6, 0, sizeof(struct sockaddr_in6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_port = stcb->rport; + m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr); + if (IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr) || + IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr)) { + *error = EINVAL; + return (-1); + } +#ifdef INET + if (IN6_IS_ADDR_V4MAPPED(&sin6.sin6_addr)) { + in6_sin6_2_sin(&sin, &sin6); + if ((sin.sin_addr.s_addr == INADDR_ANY) || + (sin.sin_addr.s_addr == INADDR_BROADCAST) || + IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) { + *error = EINVAL; + return (-1); + } + if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) { + *error = ENOBUFS; + return (1); + } + } else +#endif + if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin6, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) { + *error = ENOBUFS; + return (1); + } + break; +#endif + default: break; } } + at += CMSG_ALIGN(cmh.cmsg_len); } - /* not found */ return (0); } +static struct sctp_tcb * +sctp_findassociation_cmsgs(struct sctp_inpcb **inp_p, + in_port_t port, + struct mbuf *control, + struct sctp_nets **net_p, + int *error) +{ + struct cmsghdr cmh; + int tlen, at; + struct sctp_tcb *stcb; + struct sockaddr *addr; + +#ifdef INET + struct sockaddr_in sin; + +#endif +#ifdef INET6 + struct sockaddr_in6 sin6; + +#endif + + tlen = SCTP_BUF_LEN(control); + at = 0; + while (at < tlen) { + if ((tlen - at) < (int)CMSG_ALIGN(sizeof(cmh))) { + /* There is not enough room for one more. */ + *error = EINVAL; + return (NULL); + } + m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh); + if (cmh.cmsg_len < CMSG_ALIGN(sizeof(struct cmsghdr))) { + /* We dont't have a complete CMSG header. */ + *error = EINVAL; + return (NULL); + } + if (((int)cmh.cmsg_len + at) > tlen) { + /* We don't have the complete CMSG. */ + *error = EINVAL; + return (NULL); + } + if (cmh.cmsg_level == IPPROTO_SCTP) { + switch (cmh.cmsg_type) { +#ifdef INET + case SCTP_DSTADDRV4: + if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct in_addr)) { + *error = EINVAL; + return (NULL); + } + memset(&sin, 0, sizeof(struct sockaddr_in)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_port = port; + m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr); + addr = (struct sockaddr *)&sin; + break; +#endif +#ifdef INET6 + case SCTP_DSTADDRV6: + if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < sizeof(struct in6_addr)) { + *error = EINVAL; + return (NULL); + } + memset(&sin6, 0, sizeof(struct sockaddr_in6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_port = port; + m_copydata(control, at + CMSG_ALIGN(sizeof(struct cmsghdr)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr); +#ifdef INET + if (IN6_IS_ADDR_V4MAPPED(&sin6.sin6_addr)) { + in6_sin6_2_sin(&sin, &sin6); + addr = (struct sockaddr *)&sin; + } else +#endif + addr = (struct sockaddr *)&sin6; + break; +#endif + default: + addr = NULL; + break; + } + if (addr) { + stcb = sctp_findassociation_ep_addr(inp_p, addr, net_p, NULL, NULL); + if (stcb != NULL) { + return (stcb); + } + } + } + at += CMSG_ALIGN(cmh.cmsg_len); + } + return (NULL); +} + static struct mbuf * sctp_add_cookie(struct sctp_inpcb *inp, struct mbuf *init, int init_offset, struct mbuf *initack, int initack_offset, struct sctp_state_cookie *stc_in, uint8_t ** signature) @@ -5989,19 +6273,26 @@ sctp_msg_append(struct sctp_tcb *stcb, sp->some_taken = 0; sp->data = m; sp->tail_mbuf = NULL; - sp->length = 0; - at = m; sctp_set_prsctp_policy(sp); /* * We could in theory (for sendall) sifa the length in, but we would * still have to hunt through the chain since we need to setup the * tail_mbuf */ - while (at) { + sp->length = 0; + for (at = m; at; at = SCTP_BUF_NEXT(at)) { if (SCTP_BUF_NEXT(at) == NULL) sp->tail_mbuf = at; sp->length += SCTP_BUF_LEN(at); - at = SCTP_BUF_NEXT(at); + } + if (srcv->sinfo_keynumber_valid) { + sp->auth_keyid = srcv->sinfo_keynumber; + } else { + sp->auth_keyid = stcb->asoc.authinfo.active_keyid; + } + if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks)) { + sctp_auth_key_acquire(stcb, sp->auth_keyid); + sp->holds_key_ref = 1; } SCTP_TCB_SEND_LOCK(stcb); sctp_snd_sb_alloc(stcb, sp->length); @@ -6478,7 +6769,9 @@ sctp_sendall(struct sctp_inpcb *inp, struct uio *uio, struct mbuf *m, memset(ca, 0, sizeof(struct sctp_copy_all)); ca->inp = inp; - memcpy(&ca->sndrcv, srcv, sizeof(struct sctp_nonpad_sndrcvinfo)); + if (srcv) { + memcpy(&ca->sndrcv, srcv, sizeof(struct sctp_nonpad_sndrcvinfo)); + } /* * take off the sendall flag, it would be bad if we failed to do * this :-0 @@ -12229,9 +12522,13 @@ sctp_copy_it_in(struct sctp_tcb *stcb, *error = 0; goto skip_copy; } - sp->auth_keyid = stcb->asoc.authinfo.active_keyid; + if (srcv->sinfo_keynumber_valid) { + sp->auth_keyid = srcv->sinfo_keynumber; + } else { + sp->auth_keyid = stcb->asoc.authinfo.active_keyid; + } if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks)) { - sctp_auth_key_acquire(stcb, stcb->asoc.authinfo.active_keyid); + sctp_auth_key_acquire(stcb, sp->auth_keyid); sp->holds_key_ref = 1; } *error = sctp_copy_one(sp, uio, resv_in_first); @@ -12263,8 +12560,8 @@ sctp_sosend(struct socket *so, struct thread *p ) { - int error, use_rcvinfo = 0; - struct sctp_sndrcvinfo srcv; + int error, use_sndinfo = 0; + struct sctp_sndrcvinfo sndrcvninfo; struct sockaddr *addr_to_use; #if defined(INET) && defined(INET6) @@ -12274,10 +12571,10 @@ sctp_sosend(struct socket *so, if (control) { /* process cmsg snd/rcv info (maybe a assoc-id) */ - if (sctp_find_cmsg(SCTP_SNDRCV, (void *)&srcv, control, - sizeof(srcv))) { + if (sctp_find_cmsg(SCTP_SNDRCV, (void *)&sndrcvninfo, control, + sizeof(sndrcvninfo))) { /* got one */ - use_rcvinfo = 1; + use_sndinfo = 1; } } addr_to_use = addr; @@ -12295,7 +12592,7 @@ sctp_sosend(struct socket *so, error = sctp_lower_sosend(so, addr_to_use, uio, top, control, flags, - use_rcvinfo ? &srcv : NULL + use_sndinfo ? &sndrcvninfo : NULL ,p ); return (error); @@ -12500,6 +12797,9 @@ sctp_lower_sosend(struct socket *so, SCTP_INP_WUNLOCK(inp); /* With the lock applied look again */ stcb = sctp_findassociation_ep_addr(&t_inp, addr, &net, NULL, NULL); + if ((stcb == NULL) && (control != NULL) && (port > 0)) { + stcb = sctp_findassociation_cmsgs(&t_inp, port, control, &net, &error); + } if (stcb == NULL) { SCTP_INP_WLOCK(inp); SCTP_INP_DECR_REF(inp); @@ -12507,6 +12807,9 @@ sctp_lower_sosend(struct socket *so, } else { hold_tcblock = 1; } + if (error) { + goto out_unlocked; + } if (t_inp != inp) { SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOTCONN); error = ENOTCONN; @@ -12555,6 +12858,7 @@ sctp_lower_sosend(struct socket *so, /* Error is setup for us in the call */ goto out_unlocked; } + hold_tcblock = 1; if (create_lock_applied) { SCTP_ASOC_CREATE_UNLOCK(inp); create_lock_applied = 0; @@ -12574,84 +12878,13 @@ sctp_lower_sosend(struct socket *so, sctp_initialize_auth_params(inp, stcb); if (control) { - /* - * see if a init structure exists in cmsg - * headers - */ - struct sctp_initmsg initm; - int i; - - if (sctp_find_cmsg(SCTP_INIT, (void *)&initm, control, - sizeof(initm))) { - /* - * we have an INIT override of the - * default - */ - if (initm.sinit_max_attempts) - asoc->max_init_times = initm.sinit_max_attempts; - if (initm.sinit_num_ostreams) - asoc->pre_open_streams = initm.sinit_num_ostreams; - if (initm.sinit_max_instreams) - asoc->max_inbound_streams = initm.sinit_max_instreams; - if (initm.sinit_max_init_timeo) - asoc->initial_init_rto_max = initm.sinit_max_init_timeo; - if (asoc->streamoutcnt < asoc->pre_open_streams) { - struct sctp_stream_out *tmp_str; - int had_lock = 0; - - /* Default is NOT correct */ - SCTPDBG(SCTP_DEBUG_OUTPUT1, "Ok, defout:%d pre_open:%d\n", - asoc->streamoutcnt, asoc->pre_open_streams); - /* - * What happens if this - * fails? we panic ... - */ - - if (hold_tcblock) { - had_lock = 1; - SCTP_TCB_UNLOCK(stcb); - } - SCTP_MALLOC(tmp_str, - struct sctp_stream_out *, - (asoc->pre_open_streams * - sizeof(struct sctp_stream_out)), - SCTP_M_STRMO); - if (had_lock) { - SCTP_TCB_LOCK(stcb); - } - if (tmp_str != NULL) { - SCTP_FREE(asoc->strmout, SCTP_M_STRMO); - asoc->strmout = tmp_str; - asoc->strm_realoutsize = asoc->streamoutcnt = asoc->pre_open_streams; - } else { - asoc->pre_open_streams = asoc->streamoutcnt; - } - for (i = 0; i < asoc->streamoutcnt; i++) { - /*- - * inbound side must be set - * to 0xffff, also NOTE when - * we get the INIT-ACK back - * (for INIT sender) we MUST - * reduce the count - * (streamoutcnt) but first - * check if we sent to any - * of the upper streams that - * were dropped (if some - * were). Those that were - * dropped must be notified - * to the upper layer as - * failed to send. - */ - asoc->strmout[i].next_sequence_sent = 0x0; - TAILQ_INIT(&asoc->strmout[i].outqueue); - asoc->strmout[i].stream_no = i; - asoc->strmout[i].last_msg_incomplete = 0; - asoc->ss_functions.sctp_ss_init_stream(&asoc->strmout[i], NULL); - } - } + if (sctp_process_cmsgs_for_init(stcb, control, &error)) { + sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_7); + hold_tcblock = 0; + stcb = NULL; + goto out_unlocked; } } - hold_tcblock = 1; /* out with the INIT */ queue_only_for_init = 1; /*- diff --git a/sys/netinet/sctp_pcb.c b/sys/netinet/sctp_pcb.c index e53e28a..8dc01cd 100644 --- a/sys/netinet/sctp_pcb.c +++ b/sys/netinet/sctp_pcb.c @@ -4196,11 +4196,11 @@ try_again: return (0); } /* - * We don't allow assoc id to be 0, this is needed otherwise if the - * id were to wrap we would have issues with some socket options. + * We don't allow assoc id to be one of SCTP_FUTURE_ASSOC, + * SCTP_CURRENT_ASSOC and SCTP_ALL_ASSOC. */ - if (inp->sctp_associd_counter == 0) { - inp->sctp_associd_counter++; + if (inp->sctp_associd_counter <= SCTP_ALL_ASSOC) { + inp->sctp_associd_counter = SCTP_ALL_ASSOC + 1; } id = inp->sctp_associd_counter; inp->sctp_associd_counter++; @@ -4793,7 +4793,7 @@ sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfre /* Held for PD-API clear that. */ sq->pdapi_aborted = 1; sq->held_length = 0; - if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PDAPIEVNT) && (so != NULL)) { + if (sctp_stcb_is_feature_on(inp, stcb, SCTP_PCB_FLAGS_PDAPIEVNT) && (so != NULL)) { /* * Need to add a PD-API * aborted indication. diff --git a/sys/netinet/sctp_structs.h b/sys/netinet/sctp_structs.h index 250b312..0f9bcaf 100644 --- a/sys/netinet/sctp_structs.h +++ b/sys/netinet/sctp_structs.h @@ -647,6 +647,8 @@ struct sctp_nonpad_sndrcvinfo { uint32_t sinfo_tsn; uint32_t sinfo_cumtsn; sctp_assoc_t sinfo_assoc_id; + uint16_t sinfo_keynumber; + uint16_t sinfo_keynumber_valid; }; /* @@ -1201,6 +1203,7 @@ struct sctp_association { /* JRS 5/21/07 - CMT PF variable */ uint8_t sctp_cmt_pf; uint8_t use_precise_time; + uint32_t sctp_features; /* * The mapping array is used to track out of order sequences above * last_acked_seq. 0 indicates packet missing 1 indicates packet diff --git a/sys/netinet/sctp_uio.h b/sys/netinet/sctp_uio.h index 56aef9d..a798682 100644 --- a/sys/netinet/sctp_uio.h +++ b/sys/netinet/sctp_uio.h @@ -47,6 +47,16 @@ __FBSDID("$FreeBSD$"); typedef uint32_t sctp_assoc_t; +#define SCTP_FUTURE_ASSOC 0 +#define SCTP_CURRENT_ASSOC 1 +#define SCTP_ALL_ASSOC 2 + +struct sctp_event { + sctp_assoc_t se_assoc_id; + uint16_t se_type; + uint8_t se_on; +}; + /* Compatibility to previous define's */ #define sctp_stream_reset_events sctp_stream_reset_event @@ -69,6 +79,14 @@ struct sctp_event_subscribe { #define SCTP_INIT 0x0001 #define SCTP_SNDRCV 0x0002 #define SCTP_EXTRCV 0x0003 +#define SCTP_SNDINFO 0x0004 +#define SCTP_RCVINFO 0x0005 +#define SCTP_NXTINFO 0x0006 +#define SCTP_PRINFO 0x0007 +#define SCTP_AUTHINFO 0x0008 +#define SCTP_DSTADDRV4 0x0009 +#define SCTP_DSTADDRV6 0x000a + /* * ancillary data structures */ @@ -93,8 +111,8 @@ struct sctp_initmsg { */ -#define SCTP_ALIGN_RESV_PAD 96 -#define SCTP_ALIGN_RESV_PAD_SHORT 80 +#define SCTP_ALIGN_RESV_PAD 92 +#define SCTP_ALIGN_RESV_PAD_SHORT 76 struct sctp_sndrcvinfo { uint16_t sinfo_stream; @@ -106,6 +124,8 @@ struct sctp_sndrcvinfo { uint32_t sinfo_tsn; uint32_t sinfo_cumtsn; sctp_assoc_t sinfo_assoc_id; + uint16_t sinfo_keynumber; + uint16_t sinfo_keynumber_valid; uint8_t __reserve_pad[SCTP_ALIGN_RESV_PAD]; }; @@ -113,7 +133,6 @@ struct sctp_extrcvinfo { uint16_t sinfo_stream; uint16_t sinfo_ssn; uint16_t sinfo_flags; - uint16_t sinfo_pr_policy; uint32_t sinfo_ppid; uint32_t sinfo_context; uint32_t sinfo_timetolive; @@ -125,15 +144,86 @@ struct sctp_extrcvinfo { uint32_t sreinfo_next_aid; uint32_t sreinfo_next_length; uint32_t sreinfo_next_ppid; + uint16_t sinfo_keynumber; + uint16_t sinfo_keynumber_valid; uint8_t __reserve_pad[SCTP_ALIGN_RESV_PAD_SHORT]; }; +struct sctp_sndinfo { + uint16_t snd_sid; + uint16_t snd_flags; + uint32_t snd_ppid; + uint32_t snd_context; + sctp_assoc_t snd_assoc_id; +}; + +struct sctp_prinfo { + uint16_t pr_policy; + uint32_t pr_value; +}; + +struct sctp_default_prinfo { + uint16_t pr_policy; + uint32_t pr_value; + sctp_assoc_t pr_assoc_id; +}; + +struct sctp_authinfo { + uint16_t auth_keyid; +}; + +struct sctp_rcvinfo { + uint16_t rcv_sid; + uint16_t rcv_ssn; + uint16_t rcv_flags; + uint32_t rcv_ppid; + uint32_t rcv_tsn; + uint32_t rcv_cumtsn; + uint32_t rcv_context; + sctp_assoc_t rcv_assoc_id; +}; + +struct sctp_nxtinfo { + uint16_t nxt_sid; + uint16_t nxt_flags; + uint32_t nxt_ppid; + uint32_t nxt_length; + sctp_assoc_t nxt_assoc_id; +}; + #define SCTP_NO_NEXT_MSG 0x0000 #define SCTP_NEXT_MSG_AVAIL 0x0001 #define SCTP_NEXT_MSG_ISCOMPLETE 0x0002 #define SCTP_NEXT_MSG_IS_UNORDERED 0x0004 #define SCTP_NEXT_MSG_IS_NOTIFICATION 0x0008 +struct sctp_recvv_rn { + struct sctp_rcvinfo recvv_rcvinfo; + struct sctp_nxtinfo recvv_nxtinfo; +}; + +#define SCTP_RECVV_NOINFO 0 +#define SCTP_RECVV_RCVINFO 1 +#define SCTP_RECVV_NXTINFO 2 +#define SCTP_RECVV_RN 3 + +#define SCTP_SENDV_NOINFO 0 +#define SCTP_SENDV_SNDINFO 1 +#define SCTP_SENDV_PRINFO 2 +#define SCTP_SENDV_AUTHINFO 3 +#define SCTP_SENDV_SPA 4 + +struct sctp_sendv_spa { + uint32_t sendv_flags; + struct sctp_sndinfo sendv_sndinfo; + struct sctp_prinfo sendv_prinfo; + struct sctp_authinfo sendv_authinfo; +}; + +#define SCTP_SEND_SNDINFO_VALID 0x00000001 +#define SCTP_SEND_PRINFO_VALID 0x00000002 +#define SCTP_SEND_AUTHINFO_VALID 0x00000004 + struct sctp_snd_all_completes { uint16_t sall_stream; uint16_t sall_flags; @@ -144,6 +234,8 @@ struct sctp_snd_all_completes { }; /* Flags that go into the sinfo->sinfo_flags field */ +#define SCTP_NOTIFICATION 0x0010 /* next message is a notification */ +#define SCTP_COMPLETE 0x0020 /* next message is complete */ #define SCTP_EOF 0x0100 /* Start shutdown procedures */ #define SCTP_ABORT 0x0200 /* Send an ABORT to peer */ #define SCTP_UNORDERED 0x0400 /* Message is un-ordered */ @@ -152,7 +244,7 @@ struct sctp_snd_all_completes { #define SCTP_EOR 0x2000 /* end of message signal */ #define SCTP_SACK_IMMEDIATELY 0x4000 /* Set I-Bit */ -#define INVALID_SINFO_FLAG(x) (((x) & 0xffffff00 \ +#define INVALID_SINFO_FLAG(x) (((x) & 0xfffffff0 \ & ~(SCTP_EOF | SCTP_ABORT | SCTP_UNORDERED |\ SCTP_ADDR_OVER | SCTP_SENDALL | SCTP_EOR |\ SCTP_SACK_IMMEDIATELY)) != 0) @@ -163,7 +255,7 @@ struct sctp_snd_all_completes { #define SCTP_PR_SCTP_BUF 0x0002/* Buffer based PR-SCTP */ #define SCTP_PR_SCTP_RTX 0x0003/* Number of retransmissions based PR-SCTP */ -#define PR_SCTP_POLICY(x) ((x) & 0xff) +#define PR_SCTP_POLICY(x) ((x) & 0x0f) #define PR_SCTP_ENABLED(x) (PR_SCTP_POLICY(x) != 0) #define PR_SCTP_TTL_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_TTL) #define PR_SCTP_BUF_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_BUF) @@ -1132,26 +1224,34 @@ int sctp_getladdrs __P((int, sctp_assoc_t, struct sockaddr **)); void sctp_freeladdrs __P((struct sockaddr *)); int sctp_opt_info __P((int, sctp_assoc_t, int, void *, socklen_t *)); +/* deprecated */ ssize_t sctp_sendmsg -__P((int, const void *, size_t, - const struct sockaddr *, +__P((int, const void *, size_t, const struct sockaddr *, socklen_t, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t)); - ssize_t sctp_send __P((int sd, const void *msg, size_t len, - const struct sctp_sndrcvinfo *sinfo, int flags)); +/* deprecated */ + ssize_t sctp_send __P((int, const void *, size_t, + const struct sctp_sndrcvinfo *, int)); + +/* deprecated */ + ssize_t sctp_sendx __P((int, const void *, size_t, struct sockaddr *, + int, struct sctp_sndrcvinfo *, int)); + +/* deprecated */ + ssize_t sctp_sendmsgx __P((int sd, const void *, size_t, struct sockaddr *, + int, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t)); - ssize_t sctp_sendx __P((int sd, const void *msg, size_t len, - struct sockaddr *addrs, int addrcnt, - struct sctp_sndrcvinfo *sinfo, int flags)); + sctp_assoc_t sctp_getassocid __P((int, struct sockaddr *)); - ssize_t sctp_sendmsgx __P((int sd, const void *, size_t, - struct sockaddr *, int, - uint32_t, uint32_t, uint16_t, uint32_t, uint32_t)); +/* deprecated */ + ssize_t sctp_recvmsg __P((int, void *, size_t, struct sockaddr *, socklen_t *, + struct sctp_sndrcvinfo *, int *)); - sctp_assoc_t sctp_getassocid __P((int sd, struct sockaddr *sa)); + ssize_t sctp_sendv __P((int, const struct iovec *, int, struct sockaddr *, + int, void *, socklen_t, unsigned int, int)); - ssize_t sctp_recvmsg __P((int, void *, size_t, struct sockaddr *, - socklen_t *, struct sctp_sndrcvinfo *, int *)); + ssize_t sctp_recvv __P((int, const struct iovec *, int, struct sockaddr *, + socklen_t *, void *, socklen_t *, unsigned int *, int *)); __END_DECLS diff --git a/sys/netinet/sctp_usrreq.c b/sys/netinet/sctp_usrreq.c index b3eb805..4c1d726 100644 --- a/sys/netinet/sctp_usrreq.c +++ b/sys/netinet/sctp_usrreq.c @@ -713,7 +713,7 @@ sctp_sendm(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, control = NULL; } error = EDESTADDRREQ; - return EDESTADDRREQ; + return (error); } #endif /* INET6 */ connected_type: @@ -1448,7 +1448,6 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval, struct sctp_tcb *stcb = NULL; struct sockaddr *sa; int num_v6 = 0, num_v4 = 0, *totaddrp, totaddr; - int added = 0; uint32_t vrf_id; int bad_addresses = 0; sctp_assoc_t *a_id; @@ -1560,7 +1559,7 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval, } error = 0; - added = sctp_connectx_helper_add(stcb, sa, (totaddr - 1), &error); + sctp_connectx_helper_add(stcb, sa, (totaddr - 1), &error); /* Fill in the return id */ if (error) { (void)sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6); @@ -1603,7 +1602,7 @@ out_now: SCTP_TCB_LOCK(stcb); \ } \ SCTP_INP_RUNLOCK(inp); \ - } else if (assoc_id != 0) { \ + } else if (assoc_id > SCTP_ALL_ASSOC) { \ stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1); \ if (stcb == NULL) { \ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); \ @@ -1691,10 +1690,6 @@ sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT); error = ENOPROTOOPT; } /* end switch (sopt->sopt_name) */ - if (optname != SCTP_AUTOCLOSE) { - /* make it an "on/off" value */ - val = (val != 0); - } if (*optsize < sizeof(val)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; @@ -1734,8 +1729,8 @@ flags_out: SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize); *value = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE); *optsize = sizeof(uint32_t); + break; } - break; case SCTP_PARTIAL_DELIVERY_POINT: { uint32_t *value; @@ -1743,8 +1738,8 @@ flags_out: SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize); *value = inp->partial_delivery_point; *optsize = sizeof(uint32_t); + break; } - break; case SCTP_FRAGMENT_INTERLEAVE: { uint32_t *value; @@ -1760,8 +1755,8 @@ flags_out: *value = SCTP_FRAG_LEVEL_0; } *optsize = sizeof(uint32_t); + break; } - break; case SCTP_CMT_ON_OFF: { struct sctp_assoc_value *av; @@ -1772,14 +1767,20 @@ flags_out: av->assoc_value = stcb->asoc.sctp_cmt_on_off; SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_RLOCK(inp); - av->assoc_value = inp->sctp_cmt_on_off; - SCTP_INP_RUNLOCK(inp); + if (av->assoc_id == SCTP_FUTURE_ASSOC) { + SCTP_INP_RLOCK(inp); + av->assoc_value = inp->sctp_cmt_on_off; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } } - *optsize = sizeof(*av); + if (error == 0) { + *optsize = sizeof(struct sctp_assoc_value); + } + break; } - break; - /* JRS - Get socket option for pluggable congestion control */ case SCTP_PLUGGABLE_CC: { struct sctp_assoc_value *av; @@ -1790,11 +1791,20 @@ flags_out: av->assoc_value = stcb->asoc.congestion_control_module; SCTP_TCB_UNLOCK(stcb); } else { - av->assoc_value = inp->sctp_ep.sctp_default_cc_module; + if (av->assoc_id == SCTP_FUTURE_ASSOC) { + SCTP_INP_RLOCK(inp); + av->assoc_value = inp->sctp_ep.sctp_default_cc_module; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } } - *optsize = sizeof(*av); + if (error == 0) { + *optsize = sizeof(struct sctp_assoc_value); + } + break; } - break; case SCTP_CC_OPTION: { struct sctp_cc_option *cc_opt; @@ -1807,15 +1817,13 @@ flags_out: if (stcb->asoc.cc_functions.sctp_cwnd_socket_option == NULL) { error = ENOTSUP; } else { - error = (*stcb->asoc.cc_functions.sctp_cwnd_socket_option) (stcb, 0, - cc_opt); - *optsize = sizeof(*cc_opt); + error = (*stcb->asoc.cc_functions.sctp_cwnd_socket_option) (stcb, 0, cc_opt); + *optsize = sizeof(struct sctp_cc_option); } SCTP_TCB_UNLOCK(stcb); } + break; } - break; - /* RS - Get socket option for pluggable stream scheduling */ case SCTP_PLUGGABLE_SS: { struct sctp_assoc_value *av; @@ -1826,11 +1834,20 @@ flags_out: av->assoc_value = stcb->asoc.stream_scheduling_module; SCTP_TCB_UNLOCK(stcb); } else { - av->assoc_value = inp->sctp_ep.sctp_default_ss_module; + if (av->assoc_id == SCTP_FUTURE_ASSOC) { + SCTP_INP_RLOCK(inp); + av->assoc_value = inp->sctp_ep.sctp_default_ss_module; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + } + if (error == 0) { + *optsize = sizeof(struct sctp_assoc_value); } - *optsize = sizeof(*av); + break; } - break; case SCTP_SS_VALUE: { struct sctp_stream_value *av; @@ -1843,7 +1860,7 @@ flags_out: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } else { - *optsize = sizeof(*av); + *optsize = sizeof(struct sctp_stream_value); } SCTP_TCB_UNLOCK(stcb); } else { @@ -1854,8 +1871,8 @@ flags_out: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } + break; } - break; case SCTP_GET_ADDR_LEN: { struct sctp_assoc_value *av; @@ -1876,10 +1893,11 @@ flags_out: #endif if (error) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + } else { + *optsize = sizeof(struct sctp_assoc_value); } - *optsize = sizeof(*av); + break; } - break; case SCTP_GET_ASSOC_NUMBER: { uint32_t *value, cnt; @@ -1893,9 +1911,8 @@ flags_out: SCTP_INP_RUNLOCK(inp); *value = cnt; *optsize = sizeof(uint32_t); + break; } - break; - case SCTP_GET_ASSOC_ID_LIST: { struct sctp_assoc_ids *ids; @@ -1915,10 +1932,12 @@ flags_out: } } SCTP_INP_RUNLOCK(inp); - ids->gaids_number_of_ids = at; - *optsize = ((at * sizeof(sctp_assoc_t)) + sizeof(uint32_t)); + if (error == 0) { + ids->gaids_number_of_ids = at; + *optsize = ((at * sizeof(sctp_assoc_t)) + sizeof(uint32_t)); + } + break; } - break; case SCTP_CONTEXT: { struct sctp_assoc_value *av; @@ -1930,19 +1949,27 @@ flags_out: av->assoc_value = stcb->asoc.context; SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_RLOCK(inp); - av->assoc_value = inp->sctp_context; - SCTP_INP_RUNLOCK(inp); + if (av->assoc_id == SCTP_FUTURE_ASSOC) { + SCTP_INP_RLOCK(inp); + av->assoc_value = inp->sctp_context; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } } - *optsize = sizeof(*av); + if (error == 0) { + *optsize = sizeof(struct sctp_assoc_value); + } + break; } - break; case SCTP_VRF_ID: { uint32_t *default_vrfid; SCTP_CHECK_AND_CAST(default_vrfid, optval, uint32_t, *optsize); *default_vrfid = inp->def_vrf_id; + *optsize = sizeof(uint32_t); break; } case SCTP_GET_ASOC_VRF: @@ -1954,9 +1981,10 @@ flags_out: if (stcb == NULL) { error = EINVAL; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); - break; + } else { + id->assoc_value = stcb->asoc.vrf_id; + *optsize = sizeof(struct sctp_assoc_value); } - id->assoc_value = stcb->asoc.vrf_id; break; } case SCTP_GET_VRF_IDS: @@ -1976,13 +2004,13 @@ flags_out: gnv->gn_peers_tag = stcb->asoc.peer_vtag; gnv->gn_local_tag = stcb->asoc.my_vtag; SCTP_TCB_UNLOCK(stcb); + *optsize = sizeof(struct sctp_get_nonce_values); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN); error = ENOTCONN; } - *optsize = sizeof(*gnv); + break; } - break; case SCTP_DELAYED_SACK: { struct sctp_sack_info *sack; @@ -1994,15 +2022,21 @@ flags_out: sack->sack_freq = stcb->asoc.sack_freq; SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_RLOCK(inp); - sack->sack_delay = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]); - sack->sack_freq = inp->sctp_ep.sctp_sack_freq; - SCTP_INP_RUNLOCK(inp); + if (sack->sack_assoc_id == SCTP_FUTURE_ASSOC) { + SCTP_INP_RLOCK(inp); + sack->sack_delay = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]); + sack->sack_freq = inp->sctp_ep.sctp_sack_freq; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } } - *optsize = sizeof(*sack); + if (error == 0) { + *optsize = sizeof(struct sctp_sack_info); + } + break; } - break; - case SCTP_GET_SNDBUF_USE: { struct sctp_sockstat *ss; @@ -2015,13 +2049,13 @@ flags_out: ss->ss_total_recv_buf = (stcb->asoc.size_on_reasm_queue + stcb->asoc.size_on_all_streams); SCTP_TCB_UNLOCK(stcb); + *optsize = sizeof(struct sctp_sockstat); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN); error = ENOTCONN; } - *optsize = sizeof(struct sctp_sockstat); + break; } - break; case SCTP_MAX_BURST: { struct sctp_assoc_value *av; @@ -2033,14 +2067,20 @@ flags_out: av->assoc_value = stcb->asoc.max_burst; SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_RLOCK(inp); - av->assoc_value = inp->sctp_ep.max_burst; - SCTP_INP_RUNLOCK(inp); + if (av->assoc_id == SCTP_FUTURE_ASSOC) { + SCTP_INP_RLOCK(inp); + av->assoc_value = inp->sctp_ep.max_burst; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } } - *optsize = sizeof(struct sctp_assoc_value); - + if (error == 0) { + *optsize = sizeof(struct sctp_assoc_value); + } + break; } - break; case SCTP_MAXSEG: { struct sctp_assoc_value *av; @@ -2053,21 +2093,28 @@ flags_out: av->assoc_value = sctp_get_frag_point(stcb, &stcb->asoc); SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_RLOCK(inp); - if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { - ovh = SCTP_MED_OVERHEAD; + if (av->assoc_id == SCTP_FUTURE_ASSOC) { + SCTP_INP_RLOCK(inp); + if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { + ovh = SCTP_MED_OVERHEAD; + } else { + ovh = SCTP_MED_V4_OVERHEAD; + } + if (inp->sctp_frag_point >= SCTP_DEFAULT_MAXSEGMENT) + av->assoc_value = 0; + else + av->assoc_value = inp->sctp_frag_point - ovh; + SCTP_INP_RUNLOCK(inp); } else { - ovh = SCTP_MED_V4_OVERHEAD; + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; } - if (inp->sctp_frag_point >= SCTP_DEFAULT_MAXSEGMENT) - av->assoc_value = 0; - else - av->assoc_value = inp->sctp_frag_point - ovh; - SCTP_INP_RUNLOCK(inp); } - *optsize = sizeof(struct sctp_assoc_value); + if (error == 0) { + *optsize = sizeof(struct sctp_assoc_value); + } + break; } - break; case SCTP_GET_STAT_LOG: error = sctp_fill_stat_log(optval, optsize); break; @@ -2076,7 +2123,7 @@ flags_out: struct sctp_event_subscribe *events; SCTP_CHECK_AND_CAST(events, optval, struct sctp_event_subscribe, *optsize); - memset(events, 0, sizeof(*events)); + memset(events, 0, sizeof(struct sctp_event_subscribe)); SCTP_INP_RLOCK(inp); if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) events->sctp_data_io_event = 1; @@ -2112,9 +2159,8 @@ flags_out: events->sctp_stream_reset_event = 1; SCTP_INP_RUNLOCK(inp); *optsize = sizeof(struct sctp_event_subscribe); + break; } - break; - case SCTP_ADAPTATION_LAYER: { uint32_t *value; @@ -2125,8 +2171,8 @@ flags_out: *value = inp->sctp_ep.adaptation_layer_indicator; SCTP_INP_RUNLOCK(inp); *optsize = sizeof(uint32_t); + break; } - break; case SCTP_SET_INITIAL_DBG_SEQ: { uint32_t *value; @@ -2136,8 +2182,8 @@ flags_out: *value = inp->sctp_ep.initial_sequence_debug; SCTP_INP_RUNLOCK(inp); *optsize = sizeof(uint32_t); + break; } - break; case SCTP_GET_LOCAL_ADDR_SIZE: { uint32_t *value; @@ -2147,8 +2193,8 @@ flags_out: *value = sctp_count_max_addresses(inp); SCTP_INP_RUNLOCK(inp); *optsize = sizeof(uint32_t); + break; } - break; case SCTP_GET_REMOTE_ADDR_SIZE: { uint32_t *value; @@ -2184,13 +2230,13 @@ flags_out: } SCTP_TCB_UNLOCK(stcb); *value = (uint32_t) size; + *optsize = sizeof(uint32_t); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN); error = ENOTCONN; } - *optsize = sizeof(uint32_t); + break; } - break; case SCTP_GET_PEER_ADDRESSES: /* * Get the address information, an array is passed in to @@ -2260,8 +2306,8 @@ flags_out: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); error = ENOENT; } + break; } - break; case SCTP_GET_LOCAL_ADDRESSES: { size_t limit, actual; @@ -2278,8 +2324,8 @@ flags_out: SCTP_TCB_UNLOCK(stcb); } *optsize = sizeof(struct sockaddr_storage) + actual; + break; } - break; case SCTP_PEER_ADDR_PARAMS: { struct sctp_paddrparams *paddrp; @@ -2416,38 +2462,45 @@ flags_out: paddrp->spp_assoc_id = sctp_get_associd(stcb); SCTP_TCB_UNLOCK(stcb); } else { - /* Use endpoint defaults */ - SCTP_INP_RLOCK(inp); - paddrp->spp_pathmaxrxt = inp->sctp_ep.def_net_failure; - paddrp->spp_hbinterval = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]); - paddrp->spp_assoc_id = (sctp_assoc_t) 0; - /* get inp's default */ + if (paddrp->spp_assoc_id == SCTP_FUTURE_ASSOC) { + /* Use endpoint defaults */ + SCTP_INP_RLOCK(inp); + paddrp->spp_pathmaxrxt = inp->sctp_ep.def_net_failure; + paddrp->spp_hbinterval = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]); + paddrp->spp_assoc_id = SCTP_FUTURE_ASSOC; + /* get inp's default */ #ifdef INET - paddrp->spp_ipv4_tos = inp->ip_inp.inp.inp_ip_tos; - paddrp->spp_flags |= SPP_IPV4_TOS; + paddrp->spp_ipv4_tos = inp->ip_inp.inp.inp_ip_tos; + paddrp->spp_flags |= SPP_IPV4_TOS; #endif #ifdef INET6 - if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { - paddrp->spp_ipv6_flowlabel = ((struct in6pcb *)inp)->in6p_flowinfo; - paddrp->spp_flags |= SPP_IPV6_FLOWLABEL; - } + if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { + paddrp->spp_ipv6_flowlabel = ((struct in6pcb *)inp)->in6p_flowinfo; + paddrp->spp_flags |= SPP_IPV6_FLOWLABEL; + } #endif - /* can't return this */ - paddrp->spp_pathmtu = 0; + /* can't return this */ + paddrp->spp_pathmtu = 0; - /* default behavior, no stcb */ - paddrp->spp_flags = SPP_PMTUD_ENABLE; + /* default behavior, no stcb */ + paddrp->spp_flags = SPP_PMTUD_ENABLE; - if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT)) { - paddrp->spp_flags |= SPP_HB_ENABLE; + if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT)) { + paddrp->spp_flags |= SPP_HB_ENABLE; + } else { + paddrp->spp_flags |= SPP_HB_DISABLE; + } + SCTP_INP_RUNLOCK(inp); } else { - paddrp->spp_flags |= SPP_HB_DISABLE; + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; } - SCTP_INP_RUNLOCK(inp); } - *optsize = sizeof(struct sctp_paddrparams); + if (error == 0) { + *optsize = sizeof(struct sctp_paddrparams); + } + break; } - break; case SCTP_GET_PEER_ADDR_INFO: { struct sctp_paddrinfo *paddri; @@ -2491,6 +2544,7 @@ flags_out: paddri->spinfo_assoc_id = sctp_get_associd(stcb); paddri->spinfo_mtu = net->mtu; SCTP_TCB_UNLOCK(stcb); + *optsize = sizeof(struct sctp_paddrinfo); } else { if (stcb) { SCTP_TCB_UNLOCK(stcb); @@ -2498,9 +2552,8 @@ flags_out: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); error = ENOENT; } - *optsize = sizeof(struct sctp_paddrinfo); + break; } - break; case SCTP_PCB_STATUS: { struct sctp_pcbinfo *spcb; @@ -2508,9 +2561,8 @@ flags_out: SCTP_CHECK_AND_CAST(spcb, optval, struct sctp_pcbinfo, *optsize); sctp_fill_pcbinfo(spcb); *optsize = sizeof(struct sctp_pcbinfo); + break; } - break; - case SCTP_STATUS: { struct sctp_nets *net; @@ -2520,7 +2572,7 @@ flags_out: SCTP_FIND_STCB(inp, stcb, sstat->sstat_assoc_id); if (stcb == NULL) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; break; } @@ -2569,9 +2621,9 @@ flags_out: sstat->sstat_primary.spinfo_mtu = net->mtu; sstat->sstat_primary.spinfo_assoc_id = sctp_get_associd(stcb); SCTP_TCB_UNLOCK(stcb); - *optsize = sizeof(*sstat); + *optsize = sizeof(struct sctp_status); + break; } - break; case SCTP_RTOINFO: { struct sctp_rtoinfo *srto; @@ -2585,15 +2637,22 @@ flags_out: srto->srto_min = stcb->asoc.minrto; SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_RLOCK(inp); - srto->srto_initial = inp->sctp_ep.initial_rto; - srto->srto_max = inp->sctp_ep.sctp_maxrto; - srto->srto_min = inp->sctp_ep.sctp_minrto; - SCTP_INP_RUNLOCK(inp); + if (srto->srto_assoc_id == SCTP_FUTURE_ASSOC) { + SCTP_INP_RLOCK(inp); + srto->srto_initial = inp->sctp_ep.initial_rto; + srto->srto_max = inp->sctp_ep.sctp_maxrto; + srto->srto_min = inp->sctp_ep.sctp_minrto; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + } + if (error == 0) { + *optsize = sizeof(struct sctp_rtoinfo); } - *optsize = sizeof(*srto); + break; } - break; case SCTP_TIMEOUTS: { struct sctp_timeouts *stimo; @@ -2610,23 +2669,21 @@ flags_out: stimo->stimo_cookie = stcb->asoc.timocookie; stimo->stimo_shutdownack = stcb->asoc.timoshutdownack; SCTP_TCB_UNLOCK(stcb); + *optsize = sizeof(struct sctp_timeouts); } else { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } - *optsize = sizeof(*stimo); + break; } - break; case SCTP_ASSOCINFO: { struct sctp_assocparams *sasoc; - uint32_t oldval; SCTP_CHECK_AND_CAST(sasoc, optval, struct sctp_assocparams, *optsize); SCTP_FIND_STCB(inp, stcb, sasoc->sasoc_assoc_id); if (stcb) { - oldval = sasoc->sasoc_cookie_life; sasoc->sasoc_cookie_life = TICKS_TO_MSEC(stcb->asoc.cookie_life); sasoc->sasoc_asocmaxrxt = stcb->asoc.max_send_times; sasoc->sasoc_number_peer_destinations = stcb->asoc.numnets; @@ -2634,17 +2691,24 @@ flags_out: sasoc->sasoc_local_rwnd = stcb->asoc.my_rwnd; SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_RLOCK(inp); - sasoc->sasoc_cookie_life = TICKS_TO_MSEC(inp->sctp_ep.def_cookie_life); - sasoc->sasoc_asocmaxrxt = inp->sctp_ep.max_send_times; - sasoc->sasoc_number_peer_destinations = 0; - sasoc->sasoc_peer_rwnd = 0; - sasoc->sasoc_local_rwnd = sbspace(&inp->sctp_socket->so_rcv); - SCTP_INP_RUNLOCK(inp); + if (sasoc->sasoc_assoc_id == SCTP_FUTURE_ASSOC) { + SCTP_INP_RLOCK(inp); + sasoc->sasoc_cookie_life = TICKS_TO_MSEC(inp->sctp_ep.def_cookie_life); + sasoc->sasoc_asocmaxrxt = inp->sctp_ep.max_send_times; + sasoc->sasoc_number_peer_destinations = 0; + sasoc->sasoc_peer_rwnd = 0; + sasoc->sasoc_local_rwnd = sbspace(&inp->sctp_socket->so_rcv); + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + } + if (error == 0) { + *optsize = sizeof(struct sctp_assocparams); } - *optsize = sizeof(*sasoc); + break; } - break; case SCTP_DEFAULT_SEND_PARAM: { struct sctp_sndrcvinfo *s_info; @@ -2656,13 +2720,20 @@ flags_out: memcpy(s_info, &stcb->asoc.def_send, sizeof(stcb->asoc.def_send)); SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_RLOCK(inp); - memcpy(s_info, &inp->def_send, sizeof(inp->def_send)); - SCTP_INP_RUNLOCK(inp); + if (s_info->sinfo_assoc_id == SCTP_FUTURE_ASSOC) { + SCTP_INP_RLOCK(inp); + memcpy(s_info, &inp->def_send, sizeof(inp->def_send)); + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } } - *optsize = sizeof(*s_info); + if (error == 0) { + *optsize = sizeof(struct sctp_sndrcvinfo); + } + break; } - break; case SCTP_INITMSG: { struct sctp_initmsg *sinit; @@ -2674,9 +2745,9 @@ flags_out: sinit->sinit_max_attempts = inp->sctp_ep.max_init_times; sinit->sinit_max_init_timeo = inp->sctp_ep.initial_init_rto_max; SCTP_INP_RUNLOCK(inp); - *optsize = sizeof(*sinit); + *optsize = sizeof(struct sctp_initmsg); + break; } - break; case SCTP_PRIMARY_ADDR: /* we allow a "get" operation on this */ { @@ -2697,14 +2768,13 @@ flags_out: &stcb->asoc.primary_destination->ro._l_addr, len); SCTP_TCB_UNLOCK(stcb); + *optsize = sizeof(struct sctp_setprim); } else { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } - *optsize = sizeof(*ssp); + break; } - break; - case SCTP_HMAC_IDENT: { struct sctp_hmacalgo *shmac; @@ -2726,7 +2796,7 @@ flags_out: size = sizeof(*shmac) + (hmaclist->num_algo * sizeof(shmac->shmac_idents[0])); if ((size_t)(*optsize) < size) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; SCTP_INP_RUNLOCK(inp); break; @@ -2752,12 +2822,19 @@ flags_out: scact->scact_keynumber = stcb->asoc.authinfo.active_keyid; SCTP_TCB_UNLOCK(stcb); } else { - /* get the endpoint active key */ - SCTP_INP_RLOCK(inp); - scact->scact_keynumber = inp->sctp_ep.default_keyid; - SCTP_INP_RUNLOCK(inp); + if (scact->scact_assoc_id == SCTP_FUTURE_ASSOC) { + /* get the endpoint active key */ + SCTP_INP_RLOCK(inp); + scact->scact_keynumber = inp->sctp_ep.default_keyid; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + } + if (error == 0) { + *optsize = sizeof(struct sctp_authkeyid); } - *optsize = sizeof(*scact); break; } case SCTP_LOCAL_AUTH_CHUNKS: @@ -2780,24 +2857,30 @@ flags_out: } else { /* copy in the chunks */ (void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks); + *optsize = sizeof(struct sctp_authchunks) + size; } SCTP_TCB_UNLOCK(stcb); } else { - /* get off the endpoint */ - SCTP_INP_RLOCK(inp); - chklist = inp->sctp_ep.local_auth_chunks; - /* is there enough space? */ - size = sctp_auth_get_chklist_size(chklist); - if (*optsize < (sizeof(struct sctp_authchunks) + size)) { - error = EINVAL; - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + if (sac->gauth_assoc_id == SCTP_FUTURE_ASSOC) { + /* get off the endpoint */ + SCTP_INP_RLOCK(inp); + chklist = inp->sctp_ep.local_auth_chunks; + /* is there enough space? */ + size = sctp_auth_get_chklist_size(chklist); + if (*optsize < (sizeof(struct sctp_authchunks) + size)) { + error = EINVAL; + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); + } else { + /* copy in the chunks */ + (void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks); + *optsize = sizeof(struct sctp_authchunks) + size; + } + SCTP_INP_RUNLOCK(inp); } else { - /* copy in the chunks */ - (void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; } - SCTP_INP_RUNLOCK(inp); } - *optsize = sizeof(struct sctp_authchunks) + size; break; } case SCTP_PEER_AUTH_CHUNKS: @@ -2820,23 +2903,191 @@ flags_out: } else { /* copy in the chunks */ (void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks); + *optsize = sizeof(struct sctp_authchunks) + size; } SCTP_TCB_UNLOCK(stcb); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); error = ENOENT; } - *optsize = sizeof(struct sctp_authchunks) + size; break; } + case SCTP_EVENT: + { + struct sctp_event *event; + uint32_t event_type; + + SCTP_CHECK_AND_CAST(event, optval, struct sctp_event, *optsize); + SCTP_FIND_STCB(inp, stcb, event->se_assoc_id); + + switch (event->se_type) { + case SCTP_ASSOC_CHANGE: + event_type = SCTP_PCB_FLAGS_RECVASSOCEVNT; + break; + case SCTP_PEER_ADDR_CHANGE: + event_type = SCTP_PCB_FLAGS_RECVPADDREVNT; + break; + case SCTP_REMOTE_ERROR: + event_type = SCTP_PCB_FLAGS_RECVPEERERR; + break; + case SCTP_SEND_FAILED: + event_type = SCTP_PCB_FLAGS_RECVSENDFAILEVNT; + break; + case SCTP_SHUTDOWN_EVENT: + event_type = SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT; + break; + case SCTP_ADAPTATION_INDICATION: + event_type = SCTP_PCB_FLAGS_ADAPTATIONEVNT; + break; + case SCTP_PARTIAL_DELIVERY_EVENT: + event_type = SCTP_PCB_FLAGS_PDAPIEVNT; + break; + case SCTP_AUTHENTICATION_EVENT: + event_type = SCTP_PCB_FLAGS_AUTHEVNT; + break; + case SCTP_STREAM_RESET_EVENT: + event_type = SCTP_PCB_FLAGS_STREAM_RESETEVNT; + break; + case SCTP_SENDER_DRY_EVENT: + event_type = SCTP_PCB_FLAGS_DRYEVNT; + break; + case SCTP_NOTIFICATIONS_STOPPED_EVENT: + event_type = 0; + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTSUP); + error = ENOTSUP; + break; + default: + event_type = 0; + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + break; + } + if (event_type > 0) { + if (stcb) { + event->se_on = sctp_stcb_is_feature_on(inp, stcb, event_type); + SCTP_TCB_UNLOCK(stcb); + } else { + if (event->se_assoc_id == SCTP_FUTURE_ASSOC) { + SCTP_INP_RLOCK(inp); + event->se_on = sctp_is_feature_on(inp, event_type); + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + } + } + if (error == 0) { + *optsize = sizeof(struct sctp_event); + } + break; + } + case SCTP_RECVRCVINFO: + { + int onoff; + + if (*optsize < sizeof(int)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } else { + SCTP_INP_RUNLOCK(inp); + onoff = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO); + SCTP_INP_RUNLOCK(inp); + } + if (error == 0) { + /* return the option value */ + *(int *)optval = onoff; + *optsize = sizeof(int); + } + break; + } + case SCTP_RECVNXTINFO: + { + int onoff; + + if (*optsize < sizeof(int)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } else { + SCTP_INP_RUNLOCK(inp); + onoff = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO); + SCTP_INP_RUNLOCK(inp); + } + if (error == 0) { + /* return the option value */ + *(int *)optval = onoff; + *optsize = sizeof(int); + } + break; + } + case SCTP_DEFAULT_SNDINFO: + { + struct sctp_sndinfo *info; + + SCTP_CHECK_AND_CAST(info, optval, struct sctp_sndinfo, *optsize); + SCTP_FIND_STCB(inp, stcb, info->snd_assoc_id); + + if (stcb) { + info->snd_sid = stcb->asoc.def_send.sinfo_stream; + info->snd_flags = stcb->asoc.def_send.sinfo_flags; + info->snd_flags &= 0xfff0; + info->snd_ppid = stcb->asoc.def_send.sinfo_ppid; + info->snd_context = stcb->asoc.def_send.sinfo_context; + SCTP_TCB_UNLOCK(stcb); + } else { + if (info->snd_assoc_id == SCTP_FUTURE_ASSOC) { + SCTP_INP_RLOCK(inp); + info->snd_sid = inp->def_send.sinfo_stream; + info->snd_flags = inp->def_send.sinfo_flags; + info->snd_flags &= 0xfff0; + info->snd_ppid = inp->def_send.sinfo_ppid; + info->snd_context = inp->def_send.sinfo_context; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + } + if (error == 0) { + *optsize = sizeof(struct sctp_sndinfo); + } + break; + } + case SCTP_DEFAULT_PRINFO: + { + struct sctp_default_prinfo *info; + SCTP_CHECK_AND_CAST(info, optval, struct sctp_default_prinfo, *optsize); + SCTP_FIND_STCB(inp, stcb, info->pr_assoc_id); + if (stcb) { + info->pr_policy = PR_SCTP_POLICY(stcb->asoc.def_send.sinfo_flags); + info->pr_value = stcb->asoc.def_send.sinfo_timetolive; + SCTP_TCB_UNLOCK(stcb); + } else { + if (info->pr_assoc_id == SCTP_FUTURE_ASSOC) { + SCTP_INP_RLOCK(inp); + info->pr_policy = PR_SCTP_POLICY(inp->def_send.sinfo_flags); + info->pr_value = inp->def_send.sinfo_timetolive; + SCTP_INP_RUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + } + if (error == 0) { + *optsize = sizeof(struct sctp_default_prinfo); + } + break; + } default: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT); error = ENOPROTOOPT; - *optsize = 0; break; } /* end switch (sopt->sopt_name) */ + if (error) { + *optsize = 0; + } return (error); } @@ -2949,8 +3200,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, sctp_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE); else sctp_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE); + break; } - break; case SCTP_PARTIAL_DELIVERY_POINT: { uint32_t *value; @@ -2962,8 +3213,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, break; } inp->partial_delivery_point = *value; + break; } - break; case SCTP_FRAGMENT_INTERLEAVE: /* not yet until we re-write sctp_recvmsg() */ { @@ -2984,83 +3235,95 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } + break; } - break; case SCTP_CMT_ON_OFF: if (SCTP_BASE_SYSCTL(sctp_cmt_on_off)) { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); + if (av->assoc_value > SCTP_CMT_MAX) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + break; + } SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { - if (av->assoc_value > SCTP_CMT_MAX) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - error = EINVAL; - } else { - stcb->asoc.sctp_cmt_on_off = av->assoc_value; - } + stcb->asoc.sctp_cmt_on_off = av->assoc_value; SCTP_TCB_UNLOCK(stcb); } else { - if (av->assoc_value > SCTP_CMT_MAX) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - error = EINVAL; - } else { + if ((av->assoc_id == SCTP_FUTURE_ASSOC) || + (av->assoc_id == SCTP_ALL_ASSOC)) { SCTP_INP_WLOCK(inp); inp->sctp_cmt_on_off = av->assoc_value; SCTP_INP_WUNLOCK(inp); } + if ((av->assoc_id == SCTP_CURRENT_ASSOC) || + (av->assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + stcb->asoc.sctp_cmt_on_off = av->assoc_value; + SCTP_TCB_UNLOCK(stcb); + SCTP_INP_RUNLOCK(inp); + } + } } } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT); error = ENOPROTOOPT; } break; - /* JRS - Set socket option for pluggable congestion control */ case SCTP_PLUGGABLE_CC: { struct sctp_assoc_value *av; struct sctp_nets *net; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); + if ((av->assoc_value != SCTP_CC_RFC2581) && + (av->assoc_value != SCTP_CC_HSTCP) && + (av->assoc_value != SCTP_CC_HTCP) && + (av->assoc_value != SCTP_CC_RTCC)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + break; + } SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { - switch (av->assoc_value) { - case SCTP_CC_RFC2581: - case SCTP_CC_HSTCP: - case SCTP_CC_HTCP: - case SCTP_CC_RTCC: - stcb->asoc.cc_functions = sctp_cc_functions[av->assoc_value]; - stcb->asoc.congestion_control_module = av->assoc_value; - if (stcb->asoc.cc_functions.sctp_set_initial_cc_param != NULL) { - TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { - stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net); - } + stcb->asoc.cc_functions = sctp_cc_functions[av->assoc_value]; + stcb->asoc.congestion_control_module = av->assoc_value; + if (stcb->asoc.cc_functions.sctp_set_initial_cc_param != NULL) { + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net); } - break; - default: - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - error = EINVAL; - break; } SCTP_TCB_UNLOCK(stcb); } else { - switch (av->assoc_value) { - case SCTP_CC_RFC2581: - case SCTP_CC_HSTCP: - case SCTP_CC_HTCP: - case SCTP_CC_RTCC: + if ((av->assoc_id == SCTP_FUTURE_ASSOC) || + (av->assoc_id == SCTP_ALL_ASSOC)) { SCTP_INP_WLOCK(inp); inp->sctp_ep.sctp_default_cc_module = av->assoc_value; SCTP_INP_WUNLOCK(inp); - break; - default: - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - error = EINVAL; - break; + } + if ((av->assoc_id == SCTP_CURRENT_ASSOC) || + (av->assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + stcb->asoc.cc_functions = sctp_cc_functions[av->assoc_value]; + stcb->asoc.congestion_control_module = av->assoc_value; + if (stcb->asoc.cc_functions.sctp_set_initial_cc_param != NULL) { + TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { + stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net); + } + } + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); } } + break; } - break; case SCTP_CC_OPTION: { struct sctp_cc_option *cc_opt; @@ -3068,7 +3331,19 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, SCTP_CHECK_AND_CAST(cc_opt, optval, struct sctp_cc_option, optsize); SCTP_FIND_STCB(inp, stcb, cc_opt->aid_value.assoc_id); if (stcb == NULL) { - error = EINVAL; + if (cc_opt->aid_value.assoc_id == SCTP_CURRENT_ASSOC) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + if (stcb->asoc.cc_functions.sctp_cwnd_socket_option) { + (*stcb->asoc.cc_functions.sctp_cwnd_socket_option) (stcb, 1, cc_opt); + } + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); + } else { + error = EINVAL; + } } else { if (stcb->asoc.cc_functions.sctp_cwnd_socket_option == NULL) { error = ENOTSUP; @@ -3078,54 +3353,54 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } SCTP_TCB_UNLOCK(stcb); } + break; } - break; - /* RS - Set socket option for pluggable stream scheduling */ case SCTP_PLUGGABLE_SS: { struct sctp_assoc_value *av; SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize); + if ((av->assoc_value != SCTP_SS_DEFAULT) && + (av->assoc_value != SCTP_SS_DEFAULT) && + (av->assoc_value != SCTP_SS_ROUND_ROBIN) && + (av->assoc_value != SCTP_SS_ROUND_ROBIN_PACKET) && + (av->assoc_value != SCTP_SS_PRIORITY) && + (av->assoc_value != SCTP_SS_FAIR_BANDWITH) && + (av->assoc_value != SCTP_SS_FIRST_COME)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + break; + } SCTP_FIND_STCB(inp, stcb, av->assoc_id); if (stcb) { - switch (av->assoc_value) { - case SCTP_SS_DEFAULT: - case SCTP_SS_ROUND_ROBIN: - case SCTP_SS_ROUND_ROBIN_PACKET: - case SCTP_SS_PRIORITY: - case SCTP_SS_FAIR_BANDWITH: - case SCTP_SS_FIRST_COME: - stcb->asoc.ss_functions.sctp_ss_clear(stcb, &stcb->asoc, 1, 1); - stcb->asoc.ss_functions = sctp_ss_functions[av->assoc_value]; - stcb->asoc.stream_scheduling_module = av->assoc_value; - stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc, 1); - break; - default: - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - error = EINVAL; - break; - } + stcb->asoc.ss_functions.sctp_ss_clear(stcb, &stcb->asoc, 1, 1); + stcb->asoc.ss_functions = sctp_ss_functions[av->assoc_value]; + stcb->asoc.stream_scheduling_module = av->assoc_value; + stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc, 1); SCTP_TCB_UNLOCK(stcb); } else { - switch (av->assoc_value) { - case SCTP_SS_DEFAULT: - case SCTP_SS_ROUND_ROBIN: - case SCTP_SS_ROUND_ROBIN_PACKET: - case SCTP_SS_PRIORITY: - case SCTP_SS_FAIR_BANDWITH: - case SCTP_SS_FIRST_COME: + if ((av->assoc_id == SCTP_FUTURE_ASSOC) || + (av->assoc_id == SCTP_ALL_ASSOC)) { SCTP_INP_WLOCK(inp); inp->sctp_ep.sctp_default_ss_module = av->assoc_value; SCTP_INP_WUNLOCK(inp); - break; - default: - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - error = EINVAL; - break; + } + if ((av->assoc_id == SCTP_CURRENT_ASSOC) || + (av->assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + stcb->asoc.ss_functions.sctp_ss_clear(stcb, &stcb->asoc, 1, 1); + stcb->asoc.ss_functions = sctp_ss_functions[av->assoc_value]; + stcb->asoc.stream_scheduling_module = av->assoc_value; + stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc, 1); + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); } } + break; } - break; case SCTP_SS_VALUE: { struct sctp_stream_value *av; @@ -3140,15 +3415,29 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } SCTP_TCB_UNLOCK(stcb); } else { - /* - * Can't set stream value without - * association - */ - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); - error = EINVAL; + if (av->assoc_id == SCTP_CURRENT_ASSOC) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + stcb->asoc.ss_functions.sctp_ss_set_value(stcb, + &stcb->asoc, + &stcb->asoc.strmout[av->stream_id], + av->stream_value); + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); + + } else { + /* + * Can't set stream value without + * association + */ + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } } + break; } - break; case SCTP_CLR_STAT_LOG: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP); error = EOPNOTSUPP; @@ -3164,12 +3453,25 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, stcb->asoc.context = av->assoc_value; SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_WLOCK(inp); - inp->sctp_context = av->assoc_value; - SCTP_INP_WUNLOCK(inp); + if ((av->assoc_id == SCTP_FUTURE_ASSOC) || + (av->assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + inp->sctp_context = av->assoc_value; + SCTP_INP_WUNLOCK(inp); + } + if ((av->assoc_id == SCTP_CURRENT_ASSOC) || + (av->assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + stcb->asoc.context = av->assoc_value; + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); + } } + break; } - break; case SCTP_VRF_ID: { uint32_t *default_vrfid; @@ -3204,12 +3506,12 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, if (sack->sack_delay) { if (sack->sack_delay > SCTP_MAX_SACK_DELAY) sack->sack_delay = SCTP_MAX_SACK_DELAY; + if (MSEC_TO_TICKS(sack->sack_delay) < 1) { + sack->sack_delay = TICKS_TO_MSEC(1); + } } if (stcb) { if (sack->sack_delay) { - if (MSEC_TO_TICKS(sack->sack_delay) < 1) { - sack->sack_delay = TICKS_TO_MSEC(1); - } stcb->asoc.delayed_ack = sack->sack_delay; } if (sack->sack_freq) { @@ -3217,17 +3519,32 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_WLOCK(inp); - if (sack->sack_delay) { - if (MSEC_TO_TICKS(sack->sack_delay) < 1) { - sack->sack_delay = TICKS_TO_MSEC(1); + if ((sack->sack_assoc_id == SCTP_FUTURE_ASSOC) || + (sack->sack_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + if (sack->sack_delay) { + inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV] = MSEC_TO_TICKS(sack->sack_delay); + } + if (sack->sack_freq) { + inp->sctp_ep.sctp_sack_freq = sack->sack_freq; } - inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV] = MSEC_TO_TICKS(sack->sack_delay); + SCTP_INP_WUNLOCK(inp); } - if (sack->sack_freq) { - inp->sctp_ep.sctp_sack_freq = sack->sack_freq; + if ((sack->sack_assoc_id == SCTP_CURRENT_ASSOC) || + (sack->sack_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + if (sack->sack_delay) { + stcb->asoc.delayed_ack = sack->sack_delay; + } + if (sack->sack_freq) { + stcb->asoc.sack_freq = sack->sack_freq; + } + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); } - SCTP_INP_WUNLOCK(inp); } break; } @@ -3255,10 +3572,9 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, SCTP_CHECK_AND_CAST(sca, optval, struct sctp_authkey, optsize); SCTP_FIND_STCB(inp, stcb, sca->sca_assoc_id); - size = optsize - sizeof(*sca); + size = optsize - sizeof(struct sctp_authkey); if (stcb) { - /* set it on the assoc */ shared_keys = &stcb->asoc.shared_keys; /* clear the cached keys for this key id */ sctp_clear_cachedkeys(stcb, sca->sca_keynumber); @@ -3288,39 +3604,76 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, error = sctp_insert_sharedkey(shared_keys, shared_key); SCTP_TCB_UNLOCK(stcb); } else { - /* set it on the endpoint */ - SCTP_INP_WLOCK(inp); - shared_keys = &inp->sctp_ep.shared_keys; - /* - * clear the cached keys on all assocs for - * this key id - */ - sctp_clear_cachedkeys_ep(inp, sca->sca_keynumber); - /* - * create the new shared key and - * insert/replace it - */ - if (size > 0) { - key = sctp_set_key(sca->sca_key, (uint32_t) size); - if (key == NULL) { + if ((sca->sca_assoc_id == SCTP_FUTURE_ASSOC) || + (sca->sca_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + shared_keys = &inp->sctp_ep.shared_keys; + /* + * clear the cached keys on all + * assocs for this key id + */ + sctp_clear_cachedkeys_ep(inp, sca->sca_keynumber); + /* + * create the new shared key and + * insert/replace it + */ + if (size > 0) { + key = sctp_set_key(sca->sca_key, (uint32_t) size); + if (key == NULL) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM); + error = ENOMEM; + SCTP_INP_WUNLOCK(inp); + break; + } + } + shared_key = sctp_alloc_sharedkey(); + if (shared_key == NULL) { + sctp_free_key(key); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM); error = ENOMEM; SCTP_INP_WUNLOCK(inp); break; } - } - shared_key = sctp_alloc_sharedkey(); - if (shared_key == NULL) { - sctp_free_key(key); - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM); - error = ENOMEM; + shared_key->key = key; + shared_key->keyid = sca->sca_keynumber; + error = sctp_insert_sharedkey(shared_keys, shared_key); SCTP_INP_WUNLOCK(inp); - break; } - shared_key->key = key; - shared_key->keyid = sca->sca_keynumber; - error = sctp_insert_sharedkey(shared_keys, shared_key); - SCTP_INP_WUNLOCK(inp); + if ((sca->sca_assoc_id == SCTP_CURRENT_ASSOC) || + (sca->sca_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + shared_keys = &stcb->asoc.shared_keys; + /* + * clear the cached keys for + * this key id + */ + sctp_clear_cachedkeys(stcb, sca->sca_keynumber); + /* + * create the new shared key + * and insert/replace it + */ + if (size > 0) { + key = sctp_set_key(sca->sca_key, (uint32_t) size); + if (key == NULL) { + SCTP_TCB_UNLOCK(stcb); + continue; + } + } + shared_key = sctp_alloc_sharedkey(); + if (shared_key == NULL) { + sctp_free_key(key); + SCTP_TCB_UNLOCK(stcb); + continue; + } + shared_key->key = key; + shared_key->keyid = sca->sca_keynumber; + error = sctp_insert_sharedkey(shared_keys, shared_key); + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); + } } break; } @@ -3330,7 +3683,6 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, sctp_hmaclist_t *hmaclist; uint16_t hmacid; uint32_t i; - size_t found; SCTP_CHECK_AND_CAST(shmac, optval, struct sctp_hmacalgo, optsize); @@ -3381,8 +3733,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, { struct sctp_authkeyid *scact; - SCTP_CHECK_AND_CAST(scact, optval, struct sctp_authkeyid, - optsize); + SCTP_CHECK_AND_CAST(scact, optval, struct sctp_authkeyid, optsize); SCTP_FIND_STCB(inp, stcb, scact->scact_assoc_id); /* set the active key on the right place */ @@ -3397,16 +3748,25 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } SCTP_TCB_UNLOCK(stcb); } else { - /* set the active key on the endpoint */ - SCTP_INP_WLOCK(inp); - if (sctp_auth_setactivekey_ep(inp, - scact->scact_keynumber)) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, - SCTP_FROM_SCTP_USRREQ, - EINVAL); - error = EINVAL; + if ((scact->scact_assoc_id == SCTP_FUTURE_ASSOC) || + (scact->scact_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + if (sctp_auth_setactivekey_ep(inp, scact->scact_keynumber)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + SCTP_INP_WUNLOCK(inp); + } + if ((scact->scact_assoc_id == SCTP_CURRENT_ASSOC) || + (scact->scact_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + sctp_auth_setactivekey(stcb, scact->scact_keynumber); + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); } - SCTP_INP_WUNLOCK(inp); } break; } @@ -3414,30 +3774,36 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, { struct sctp_authkeyid *scdel; - SCTP_CHECK_AND_CAST(scdel, optval, struct sctp_authkeyid, - optsize); + SCTP_CHECK_AND_CAST(scdel, optval, struct sctp_authkeyid, optsize); SCTP_FIND_STCB(inp, stcb, scdel->scact_assoc_id); /* delete the key from the right place */ if (stcb) { - if (sctp_delete_sharedkey(stcb, - scdel->scact_keynumber)) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, - SCTP_FROM_SCTP_USRREQ, - EINVAL); + if (sctp_delete_sharedkey(stcb, scdel->scact_keynumber)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_WLOCK(inp); - if (sctp_delete_sharedkey_ep(inp, - scdel->scact_keynumber)) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, - SCTP_FROM_SCTP_USRREQ, - EINVAL); - error = EINVAL; + if ((scdel->scact_assoc_id == SCTP_FUTURE_ASSOC) || + (scdel->scact_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + if (sctp_delete_sharedkey_ep(inp, scdel->scact_keynumber)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + SCTP_INP_WUNLOCK(inp); + } + if ((scdel->scact_assoc_id == SCTP_CURRENT_ASSOC) || + (scdel->scact_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + sctp_delete_sharedkey(stcb, scdel->scact_keynumber); + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); } - SCTP_INP_WUNLOCK(inp); } break; } @@ -3445,30 +3811,36 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, { struct sctp_authkeyid *keyid; - SCTP_CHECK_AND_CAST(keyid, optval, struct sctp_authkeyid, - optsize); + SCTP_CHECK_AND_CAST(keyid, optval, struct sctp_authkeyid, optsize); SCTP_FIND_STCB(inp, stcb, keyid->scact_assoc_id); /* deactivate the key from the right place */ if (stcb) { - if (sctp_deact_sharedkey(stcb, - keyid->scact_keynumber)) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, - SCTP_FROM_SCTP_USRREQ, - EINVAL); + if (sctp_deact_sharedkey(stcb, keyid->scact_keynumber)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_WLOCK(inp); - if (sctp_deact_sharedkey_ep(inp, - keyid->scact_keynumber)) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, - SCTP_FROM_SCTP_USRREQ, - EINVAL); - error = EINVAL; + if ((keyid->scact_assoc_id == SCTP_FUTURE_ASSOC) || + (keyid->scact_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + if (sctp_deact_sharedkey_ep(inp, keyid->scact_keynumber)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + SCTP_INP_WUNLOCK(inp); + } + if ((keyid->scact_assoc_id == SCTP_CURRENT_ASSOC) || + (keyid->scact_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + sctp_deact_sharedkey(stcb, keyid->scact_keynumber); + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); } - SCTP_INP_WUNLOCK(inp); } break; } @@ -3632,9 +4004,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED); SCTP_TCB_UNLOCK(stcb); + break; } - break; - case SCTP_CONNECT_X: if (optsize < (sizeof(int) + sizeof(struct sockaddr_in))) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); @@ -3643,7 +4014,6 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } error = sctp_do_connect_x(so, inp, optval, optsize, p, 0); break; - case SCTP_CONNECT_X_DELAYED: if (optsize < (sizeof(int) + sizeof(struct sockaddr_in))) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); @@ -3652,7 +4022,6 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } error = sctp_do_connect_x(so, inp, optval, optsize, p, 1); break; - case SCTP_CONNECT_X_COMPLETE: { struct sockaddr *sa; @@ -3706,8 +4075,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, error = EALREADY; } SCTP_TCB_UNLOCK(stcb); + break; } - break; case SCTP_MAX_BURST: { struct sctp_assoc_value *av; @@ -3719,12 +4088,25 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, stcb->asoc.max_burst = av->assoc_value; SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_WLOCK(inp); - inp->sctp_ep.max_burst = av->assoc_value; - SCTP_INP_WUNLOCK(inp); + if ((av->assoc_id == SCTP_FUTURE_ASSOC) || + (av->assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + inp->sctp_ep.max_burst = av->assoc_value; + SCTP_INP_WUNLOCK(inp); + } + if ((av->assoc_id == SCTP_CURRENT_ASSOC) || + (av->assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + stcb->asoc.max_burst = av->assoc_value; + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); + } } + break; } - break; case SCTP_MAXSEG: { struct sctp_assoc_value *av; @@ -3746,20 +4128,25 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_WLOCK(inp); - /* - * FIXME MT: I think this is not in tune - * with the API ID - */ - if (av->assoc_value) { - inp->sctp_frag_point = (av->assoc_value + ovh); + if (av->assoc_id == SCTP_FUTURE_ASSOC) { + SCTP_INP_WLOCK(inp); + /* + * FIXME MT: I think this is not in + * tune with the API ID + */ + if (av->assoc_value) { + inp->sctp_frag_point = (av->assoc_value + ovh); + } else { + inp->sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT; + } + SCTP_INP_WUNLOCK(inp); } else { - inp->sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT; + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; } - SCTP_INP_WUNLOCK(inp); } + break; } - break; case SCTP_EVENTS: { struct sctp_event_subscribe *events; @@ -3823,22 +4210,6 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, if (events->sctp_sender_dry_event) { sctp_feature_on(inp, SCTP_PCB_FLAGS_DRYEVNT); - if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || - (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { - stcb = LIST_FIRST(&inp->sctp_asoc_list); - if (stcb) { - SCTP_TCB_LOCK(stcb); - } - if (stcb && - TAILQ_EMPTY(&stcb->asoc.send_queue) && - TAILQ_EMPTY(&stcb->asoc.sent_queue) && - (stcb->asoc.stream_queue_cnt == 0)) { - sctp_ulp_notify(SCTP_NOTIFY_SENDER_DRY, stcb, 0, NULL, SCTP_SO_LOCKED); - } - if (stcb) { - SCTP_TCB_UNLOCK(stcb); - } - } } else { sctp_feature_off(inp, SCTP_PCB_FLAGS_DRYEVNT); } @@ -3849,9 +4220,84 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, sctp_feature_off(inp, SCTP_PCB_FLAGS_STREAM_RESETEVNT); } SCTP_INP_WUNLOCK(inp); - } - break; + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + if (events->sctp_association_event) { + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVASSOCEVNT); + } else { + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVASSOCEVNT); + } + if (events->sctp_address_event) { + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVPADDREVNT); + } else { + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVPADDREVNT); + } + if (events->sctp_send_failure_event) { + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT); + } else { + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT); + } + if (events->sctp_peer_error_event) { + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVPEERERR); + } else { + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVPEERERR); + } + if (events->sctp_shutdown_event) { + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT); + } else { + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT); + } + if (events->sctp_partial_delivery_event) { + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_PDAPIEVNT); + } else { + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_PDAPIEVNT); + } + if (events->sctp_adaptation_layer_event) { + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_ADAPTATIONEVNT); + } else { + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_ADAPTATIONEVNT); + } + if (events->sctp_authentication_event) { + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_AUTHEVNT); + } else { + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_AUTHEVNT); + } + if (events->sctp_sender_dry_event) { + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_DRYEVNT); + } else { + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_DRYEVNT); + } + if (events->sctp_stream_reset_event) { + sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_STREAM_RESETEVNT); + } else { + sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_STREAM_RESETEVNT); + } + SCTP_TCB_UNLOCK(stcb); + } + /* + * Send up the sender dry event only for 1-to-1 + * style sockets. + */ + if (events->sctp_sender_dry_event) { + if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || + (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { + stcb = LIST_FIRST(&inp->sctp_asoc_list); + if (stcb) { + SCTP_TCB_LOCK(stcb); + if (TAILQ_EMPTY(&stcb->asoc.send_queue) && + TAILQ_EMPTY(&stcb->asoc.sent_queue) && + (stcb->asoc.stream_queue_cnt == 0)) { + sctp_ulp_notify(SCTP_NOTIFY_SENDER_DRY, stcb, 0, NULL, SCTP_SO_LOCKED); + } + SCTP_TCB_UNLOCK(stcb); + } + } + } + SCTP_INP_RUNLOCK(inp); + break; + } case SCTP_ADAPTATION_LAYER: { struct sctp_setadaptation *adap_bits; @@ -3860,8 +4306,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, SCTP_INP_WLOCK(inp); inp->sctp_ep.adaptation_layer_indicator = adap_bits->ssb_adaptation_ind; SCTP_INP_WUNLOCK(inp); + break; } - break; #ifdef SCTP_DEBUG case SCTP_SET_INITIAL_DBG_SEQ: { @@ -3871,8 +4317,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, SCTP_INP_WLOCK(inp); inp->sctp_ep.initial_sequence_debug = *vvv; SCTP_INP_WUNLOCK(inp); + break; } - break; #endif case SCTP_DEFAULT_SEND_PARAM: { @@ -3882,7 +4328,7 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, SCTP_FIND_STCB(inp, stcb, s_info->sinfo_assoc_id); if (stcb) { - if (s_info->sinfo_stream <= stcb->asoc.streamoutcnt) { + if (s_info->sinfo_stream < stcb->asoc.streamoutcnt) { memcpy(&stcb->asoc.def_send, s_info, min(optsize, sizeof(stcb->asoc.def_send))); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); @@ -3890,12 +4336,27 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_WLOCK(inp); - memcpy(&inp->def_send, s_info, min(optsize, sizeof(inp->def_send))); - SCTP_INP_WUNLOCK(inp); + if ((s_info->sinfo_assoc_id == SCTP_FUTURE_ASSOC) || + (s_info->sinfo_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + memcpy(&inp->def_send, s_info, min(optsize, sizeof(inp->def_send))); + SCTP_INP_WUNLOCK(inp); + } + if ((s_info->sinfo_assoc_id == SCTP_CURRENT_ASSOC) || + (s_info->sinfo_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + if (s_info->sinfo_stream < stcb->asoc.streamoutcnt) { + memcpy(&stcb->asoc.def_send, s_info, min(optsize, sizeof(stcb->asoc.def_send))); + } + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); + } } + break; } - break; case SCTP_PEER_ADDR_PARAMS: /* Applys to the specific association */ { @@ -4116,31 +4577,37 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, SCTP_TCB_UNLOCK(stcb); } else { /************************NO TCB, SET TO default stuff ******************/ - SCTP_INP_WLOCK(inp); - /* - * For the TOS/FLOWLABEL stuff you set it - * with the options on the socket - */ - if (paddrp->spp_pathmaxrxt) { - inp->sctp_ep.def_net_failure = paddrp->spp_pathmaxrxt; - } - if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) - inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = 0; - else if (paddrp->spp_hbinterval) { - if (paddrp->spp_hbinterval > SCTP_MAX_HB_INTERVAL) - paddrp->spp_hbinterval = SCTP_MAX_HB_INTERVAL; - inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(paddrp->spp_hbinterval); - } - if (paddrp->spp_flags & SPP_HB_ENABLE) { - sctp_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT); + if (paddrp->spp_assoc_id == SCTP_FUTURE_ASSOC) { + SCTP_INP_WLOCK(inp); + /* + * For the TOS/FLOWLABEL stuff you + * set it with the options on the + * socket + */ + if (paddrp->spp_pathmaxrxt) { + inp->sctp_ep.def_net_failure = paddrp->spp_pathmaxrxt; + } + if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) + inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = 0; + else if (paddrp->spp_hbinterval) { + if (paddrp->spp_hbinterval > SCTP_MAX_HB_INTERVAL) + paddrp->spp_hbinterval = SCTP_MAX_HB_INTERVAL; + inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(paddrp->spp_hbinterval); + } + if (paddrp->spp_flags & SPP_HB_ENABLE) { + sctp_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT); - } else if (paddrp->spp_flags & SPP_HB_DISABLE) { - sctp_feature_on(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT); + } else if (paddrp->spp_flags & SPP_HB_DISABLE) { + sctp_feature_on(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT); + } + SCTP_INP_WUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; } - SCTP_INP_WUNLOCK(inp); } + break; } - break; case SCTP_RTOINFO: { struct sctp_rtoinfo *srto; @@ -4172,31 +4639,36 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_WLOCK(inp); - if (srto->srto_initial) - new_init = srto->srto_initial; - else - new_init = inp->sctp_ep.initial_rto; - if (srto->srto_max) - new_max = srto->srto_max; - else - new_max = inp->sctp_ep.sctp_maxrto; - if (srto->srto_min) - new_min = srto->srto_min; - else - new_min = inp->sctp_ep.sctp_minrto; - if ((new_min <= new_init) && (new_init <= new_max)) { - inp->sctp_ep.initial_rto = new_init; - inp->sctp_ep.sctp_maxrto = new_max; - inp->sctp_ep.sctp_minrto = new_min; + if (srto->srto_assoc_id == SCTP_FUTURE_ASSOC) { + SCTP_INP_WLOCK(inp); + if (srto->srto_initial) + new_init = srto->srto_initial; + else + new_init = inp->sctp_ep.initial_rto; + if (srto->srto_max) + new_max = srto->srto_max; + else + new_max = inp->sctp_ep.sctp_maxrto; + if (srto->srto_min) + new_min = srto->srto_min; + else + new_min = inp->sctp_ep.sctp_minrto; + if ((new_min <= new_init) && (new_init <= new_max)) { + inp->sctp_ep.initial_rto = new_init; + inp->sctp_ep.sctp_maxrto = new_max; + inp->sctp_ep.sctp_minrto = new_min; + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + SCTP_INP_WUNLOCK(inp); } else { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } - SCTP_INP_WUNLOCK(inp); } + break; } - break; case SCTP_ASSOCINFO: { struct sctp_assocparams *sasoc; @@ -4214,27 +4686,26 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, if (stcb) { if (sasoc->sasoc_asocmaxrxt) stcb->asoc.max_send_times = sasoc->sasoc_asocmaxrxt; - sasoc->sasoc_number_peer_destinations = stcb->asoc.numnets; - sasoc->sasoc_peer_rwnd = 0; - sasoc->sasoc_local_rwnd = 0; if (sasoc->sasoc_cookie_life) { stcb->asoc.cookie_life = MSEC_TO_TICKS(sasoc->sasoc_cookie_life); } SCTP_TCB_UNLOCK(stcb); } else { - SCTP_INP_WLOCK(inp); - if (sasoc->sasoc_asocmaxrxt) - inp->sctp_ep.max_send_times = sasoc->sasoc_asocmaxrxt; - sasoc->sasoc_number_peer_destinations = 0; - sasoc->sasoc_peer_rwnd = 0; - sasoc->sasoc_local_rwnd = 0; - if (sasoc->sasoc_cookie_life) { - inp->sctp_ep.def_cookie_life = MSEC_TO_TICKS(sasoc->sasoc_cookie_life); + if (sasoc->sasoc_assoc_id == SCTP_FUTURE_ASSOC) { + SCTP_INP_WLOCK(inp); + if (sasoc->sasoc_asocmaxrxt) + inp->sctp_ep.max_send_times = sasoc->sasoc_asocmaxrxt; + if (sasoc->sasoc_cookie_life) { + inp->sctp_ep.def_cookie_life = MSEC_TO_TICKS(sasoc->sasoc_cookie_life); + } + SCTP_INP_WUNLOCK(inp); + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; } - SCTP_INP_WUNLOCK(inp); } + break; } - break; case SCTP_INITMSG: { struct sctp_initmsg *sinit; @@ -4253,12 +4724,12 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, if (sinit->sinit_max_init_timeo) inp->sctp_ep.initial_init_rto_max = sinit->sinit_max_init_timeo; SCTP_INP_WUNLOCK(inp); + break; } - break; case SCTP_PRIMARY_ADDR: { struct sctp_setprim *spa; - struct sctp_nets *net, *lnet; + struct sctp_nets *net; SCTP_CHECK_AND_CAST(spa, optval, struct sctp_setprim, optsize); SCTP_FIND_STCB(inp, stcb, spa->ssp_assoc_id); @@ -4287,7 +4758,6 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, if ((net != stcb->asoc.primary_destination) && (!(net->dest_state & SCTP_ADDR_UNCONFIRMED))) { /* Ok we need to set it */ - lnet = stcb->asoc.primary_destination; if (sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net) == 0) { if (net->dest_state & SCTP_ADDR_SWITCH_PRIMARY) { net->dest_state |= SCTP_ADDR_DOUBLE_SWITCH; @@ -4302,8 +4772,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, if (stcb) { SCTP_TCB_UNLOCK(stcb); } + break; } - break; case SCTP_SET_DYNAMIC_PRIMARY: { union sctp_sockstore *ss; @@ -4316,8 +4786,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, SCTP_CHECK_AND_CAST(ss, optval, union sctp_sockstore, optsize); /* SUPER USER CHECK? */ error = sctp_dynamic_set_primary(&ss->sa, vrf_id); + break; } - break; case SCTP_SET_PEER_PRIMARY_ADDR: { struct sctp_setpeerprim *sspp; @@ -4370,9 +4840,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); error = EINVAL; } - + break; } - break; case SCTP_BINDX_ADD_ADDR: { struct sctp_getaddresses *addrs; @@ -4418,8 +4887,8 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, sctp_bindx_add_address(so, inp, addrs->addr, addrs->sget_assoc_id, vrf_id, &error, p); + break; } - break; case SCTP_BINDX_REM_ADDR: { struct sctp_getaddresses *addrs; @@ -4465,8 +4934,232 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, sctp_bindx_delete_address(so, inp, addrs->addr, addrs->sget_assoc_id, vrf_id, &error); + break; + } + case SCTP_EVENT: + { + struct sctp_event *event; + uint32_t event_type; + + SCTP_CHECK_AND_CAST(event, optval, struct sctp_event, optsize); + SCTP_FIND_STCB(inp, stcb, event->se_assoc_id); + switch (event->se_type) { + case SCTP_ASSOC_CHANGE: + event_type = SCTP_PCB_FLAGS_RECVASSOCEVNT; + break; + case SCTP_PEER_ADDR_CHANGE: + event_type = SCTP_PCB_FLAGS_RECVPADDREVNT; + break; + case SCTP_REMOTE_ERROR: + event_type = SCTP_PCB_FLAGS_RECVPEERERR; + break; + case SCTP_SEND_FAILED: + event_type = SCTP_PCB_FLAGS_RECVSENDFAILEVNT; + break; + case SCTP_SHUTDOWN_EVENT: + event_type = SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT; + break; + case SCTP_ADAPTATION_INDICATION: + event_type = SCTP_PCB_FLAGS_ADAPTATIONEVNT; + break; + case SCTP_PARTIAL_DELIVERY_EVENT: + event_type = SCTP_PCB_FLAGS_PDAPIEVNT; + break; + case SCTP_AUTHENTICATION_EVENT: + event_type = SCTP_PCB_FLAGS_AUTHEVNT; + break; + case SCTP_STREAM_RESET_EVENT: + event_type = SCTP_PCB_FLAGS_STREAM_RESETEVNT; + break; + case SCTP_SENDER_DRY_EVENT: + event_type = SCTP_PCB_FLAGS_DRYEVNT; + break; + case SCTP_NOTIFICATIONS_STOPPED_EVENT: + event_type = 0; + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTSUP); + error = ENOTSUP; + break; + default: + event_type = 0; + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + break; + } + if (event_type > 0) { + if (stcb) { + if (event->se_on) { + sctp_stcb_feature_on(inp, stcb, event_type); + if (event_type == SCTP_PCB_FLAGS_DRYEVNT) { + if (TAILQ_EMPTY(&stcb->asoc.send_queue) && + TAILQ_EMPTY(&stcb->asoc.sent_queue) && + (stcb->asoc.stream_queue_cnt == 0)) { + sctp_ulp_notify(SCTP_NOTIFY_SENDER_DRY, stcb, 0, NULL, SCTP_SO_LOCKED); + } + } + } else { + sctp_stcb_feature_off(inp, stcb, event_type); + } + SCTP_TCB_UNLOCK(stcb); + } else { + /* + * We don't want to send up a storm + * of events, so return an error for + * sender dry events + */ + if ((event_type == SCTP_PCB_FLAGS_DRYEVNT) && + ((event->se_assoc_id == SCTP_ALL_ASSOC) || + (event->se_assoc_id == SCTP_CURRENT_ASSOC))) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTSUP); + error = ENOTSUP; + break; + } + if ((event->se_assoc_id == SCTP_FUTURE_ASSOC) || + (event->se_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + if (event->se_on) { + sctp_feature_on(inp, event_type); + } else { + sctp_feature_off(inp, event_type); + } + SCTP_INP_WUNLOCK(inp); + } + if ((event->se_assoc_id == SCTP_CURRENT_ASSOC) || + (event->se_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + if (event->se_on) { + sctp_stcb_feature_on(inp, stcb, event_type); + } else { + sctp_stcb_feature_off(inp, stcb, event_type); + } + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); + } + } + } + break; + } + case SCTP_RECVRCVINFO: + { + int *onoff; + + SCTP_CHECK_AND_CAST(onoff, optval, int, optsize); + SCTP_INP_WLOCK(inp); + if (*onoff != 0) { + sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO); + } else { + sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVRCVINFO); + } + SCTP_INP_WUNLOCK(inp); + break; + } + case SCTP_RECVNXTINFO: + { + int *onoff; + + SCTP_CHECK_AND_CAST(onoff, optval, int, optsize); + SCTP_INP_WLOCK(inp); + if (*onoff != 0) { + sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO); + } else { + sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVNXTINFO); + } + SCTP_INP_WUNLOCK(inp); + break; + } + case SCTP_DEFAULT_SNDINFO: + { + struct sctp_sndinfo *info; + uint16_t policy; + + SCTP_CHECK_AND_CAST(info, optval, struct sctp_sndinfo, optsize); + SCTP_FIND_STCB(inp, stcb, info->snd_assoc_id); + + if (stcb) { + if (info->snd_sid < stcb->asoc.streamoutcnt) { + stcb->asoc.def_send.sinfo_stream = info->snd_sid; + policy = PR_SCTP_POLICY(stcb->asoc.def_send.sinfo_flags); + stcb->asoc.def_send.sinfo_flags = info->snd_flags; + stcb->asoc.def_send.sinfo_flags |= policy; + stcb->asoc.def_send.sinfo_ppid = info->snd_ppid; + stcb->asoc.def_send.sinfo_context = info->snd_context; + } else { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + } + SCTP_TCB_UNLOCK(stcb); + } else { + if ((info->snd_assoc_id == SCTP_FUTURE_ASSOC) || + (info->snd_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + inp->def_send.sinfo_stream = info->snd_sid; + policy = PR_SCTP_POLICY(inp->def_send.sinfo_flags); + inp->def_send.sinfo_flags = info->snd_flags; + inp->def_send.sinfo_flags |= policy; + inp->def_send.sinfo_ppid = info->snd_ppid; + inp->def_send.sinfo_context = info->snd_context; + SCTP_INP_WUNLOCK(inp); + } + if ((info->snd_assoc_id == SCTP_CURRENT_ASSOC) || + (info->snd_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + if (info->snd_sid < stcb->asoc.streamoutcnt) { + stcb->asoc.def_send.sinfo_stream = info->snd_sid; + policy = PR_SCTP_POLICY(stcb->asoc.def_send.sinfo_flags); + stcb->asoc.def_send.sinfo_flags = info->snd_flags; + stcb->asoc.def_send.sinfo_flags |= policy; + stcb->asoc.def_send.sinfo_ppid = info->snd_ppid; + stcb->asoc.def_send.sinfo_context = info->snd_context; + } + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); + } + } + break; + } + case SCTP_DEFAULT_PRINFO: + { + struct sctp_default_prinfo *info; + + SCTP_CHECK_AND_CAST(info, optval, struct sctp_default_prinfo, optsize); + SCTP_FIND_STCB(inp, stcb, info->pr_assoc_id); + + if (PR_SCTP_INVALID_POLICY(info->pr_policy)) { + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); + error = EINVAL; + break; + } + if (stcb) { + stcb->asoc.def_send.sinfo_flags &= 0xfff0; + stcb->asoc.def_send.sinfo_flags |= info->pr_policy; + SCTP_TCB_UNLOCK(stcb); + } else { + if ((info->pr_assoc_id == SCTP_FUTURE_ASSOC) || + (info->pr_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_WLOCK(inp); + inp->def_send.sinfo_flags &= 0xfff0; + inp->def_send.sinfo_flags |= info->pr_policy; + SCTP_INP_WUNLOCK(inp); + } + if ((info->pr_assoc_id == SCTP_CURRENT_ASSOC) || + (info->pr_assoc_id == SCTP_ALL_ASSOC)) { + SCTP_INP_RLOCK(inp); + LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { + SCTP_TCB_LOCK(stcb); + stcb->asoc.def_send.sinfo_flags &= 0xfff0; + stcb->asoc.def_send.sinfo_flags |= info->pr_policy; + SCTP_TCB_UNLOCK(stcb); + } + SCTP_INP_RUNLOCK(inp); + } + } + break; } - break; default: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT); error = ENOPROTOOPT; diff --git a/sys/netinet/sctp_var.h b/sys/netinet/sctp_var.h index 1e17900..e48dfe4 100644 --- a/sys/netinet/sctp_var.h +++ b/sys/netinet/sctp_var.h @@ -50,6 +50,30 @@ extern struct pr_usrreqs sctp_usrreqs; #define sctp_is_feature_on(inp, feature) ((inp->sctp_features & feature) == feature) #define sctp_is_feature_off(inp, feature) ((inp->sctp_features & feature) == 0) +#define sctp_stcb_feature_on(inp, stcb, feature) {\ + if (stcb) { \ + stcb->asoc.sctp_features |= feature; \ + } else { \ + inp->sctp_features |= feature; \ + } \ +} +#define sctp_stcb_feature_off(inp, stcb, feature) {\ + if (stcb) { \ + stcb->asoc.sctp_features &= ~feature; \ + } else { \ + inp->sctp_features &= ~feature; \ + } \ +} +#define sctp_stcb_is_feature_on(inp, stcb, feature) \ + (((stcb != NULL) && \ + ((stcb->asoc.sctp_features & feature) == feature)) || \ + ((stcb == NULL) && \ + ((inp->sctp_features & feature) == feature))) +#define sctp_stcb_is_feature_off(inp, stcb, feature) \ + (((stcb != NULL) && \ + ((stcb->asoc.sctp_features & feature) == 0)) || \ + ((stcb == NULL) && \ + ((inp->sctp_features & feature) == 0))) /* managing mobility_feature in inpcb (by micchie) */ #define sctp_mobility_feature_on(inp, feature) (inp->sctp_mobility_features |= feature) diff --git a/sys/netinet/sctputil.c b/sys/netinet/sctputil.c index 39df039..9a8bd2e 100644 --- a/sys/netinet/sctputil.c +++ b/sys/netinet/sctputil.c @@ -923,6 +923,7 @@ sctp_init_asoc(struct sctp_inpcb *m, struct sctp_tcb *stcb, asoc->sctp_nr_sack_on_off = (uint8_t) SCTP_BASE_SYSCTL(sctp_nr_sack_on_off); asoc->sctp_cmt_pf = (uint8_t) SCTP_BASE_SYSCTL(sctp_cmt_pf); asoc->sctp_frag_point = m->sctp_frag_point; + asoc->sctp_features = m->sctp_features; #ifdef INET asoc->default_tos = m->ip_inp.inp.inp_ip_tos; #else @@ -2760,7 +2761,7 @@ sctp_notify_assoc_change(uint32_t event, struct sctp_tcb *stcb, } #endif } - if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVASSOCEVNT)) { + if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVASSOCEVNT)) { /* event not enabled */ return; } @@ -2831,7 +2832,7 @@ sctp_notify_peer_addr_change(struct sctp_tcb *stcb, uint32_t state, struct sctp_paddr_change *spc; struct sctp_queued_to_read *control; - if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVPADDREVNT)) { + if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVPADDREVNT)) { /* event not enabled */ return; } @@ -2914,7 +2915,7 @@ sctp_notify_send_failed(struct sctp_tcb *stcb, uint32_t error, struct sctp_queued_to_read *control; int length; - if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVSENDFAILEVNT)) { + if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT)) { /* event not enabled */ return; } @@ -2997,7 +2998,7 @@ sctp_notify_send_failed2(struct sctp_tcb *stcb, uint32_t error, struct sctp_queued_to_read *control; int length; - if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVSENDFAILEVNT)) { + if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT)) { /* event not enabled */ return; } @@ -3067,7 +3068,7 @@ sctp_notify_adaptation_layer(struct sctp_tcb *stcb, struct sctp_adaptation_event *sai; struct sctp_queued_to_read *control; - if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_ADAPTATIONEVNT)) { + if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_ADAPTATIONEVNT)) { /* event not enabled */ return; } @@ -3118,7 +3119,7 @@ sctp_notify_partial_delivery_indication(struct sctp_tcb *stcb, uint32_t error, struct sctp_queued_to_read *control; struct sockbuf *sb; - if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_PDAPIEVNT)) { + if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_PDAPIEVNT)) { /* event not enabled */ return; } @@ -3231,7 +3232,7 @@ sctp_notify_shutdown_event(struct sctp_tcb *stcb) SCTP_SOCKET_UNLOCK(so, 1); #endif } - if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT)) { + if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT)) { /* event not enabled */ return; } @@ -3278,7 +3279,7 @@ sctp_notify_sender_dry_event(struct sctp_tcb *stcb, struct sctp_sender_dry_event *event; struct sctp_queued_to_read *control; - if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_DRYEVNT)) { + if (sctp_stcb_is_feature_off(stcb->sctp_ep, stcb, SCTP_PCB_FLAGS_DRYEVNT)) { /* event not enabled */ return; } @@ -5490,7 +5491,8 @@ found_one: if ((sinfo) && filling_sinfo) { memcpy(sinfo, control, sizeof(struct sctp_nonpad_sndrcvinfo)); nxt = TAILQ_NEXT(control, next); - if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) { + if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO) || + sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO)) { struct sctp_extrcvinfo *s_extra; s_extra = (struct sctp_extrcvinfo *)sinfo; @@ -5997,7 +5999,8 @@ out: if (((out_flags & MSG_EOR) == 0) && ((in_flags & MSG_PEEK) == 0) && (sinfo) && - (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO))) { + (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO) || + sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO))) { struct sctp_extrcvinfo *s_extra; s_extra = (struct sctp_extrcvinfo *)sinfo; @@ -6147,8 +6150,9 @@ sctp_soreceive(struct socket *so, SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL); return (EINVAL); } - if ((sctp_is_feature_off(inp, - SCTP_PCB_FLAGS_RECVDATAIOEVNT)) || + if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT) && + sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVRCVINFO) && + sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVNXTINFO)) || (controlp == NULL)) { /* user does not want the sndrcv ctl */ filling_sinfo = 0; @@ -6184,71 +6188,6 @@ sctp_soreceive(struct socket *so, } -int -sctp_l_soreceive(struct socket *so, - struct sockaddr **name, - struct uio *uio, - char **controlp, - int *controllen, - int *flag) -{ - int error, fromlen; - uint8_t sockbuf[256]; - struct sockaddr *from; - struct sctp_extrcvinfo sinfo; - int filling_sinfo = 1; - struct sctp_inpcb *inp; - - inp = (struct sctp_inpcb *)so->so_pcb; - /* pickup the assoc we are reading from */ - if (inp == NULL) { - SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL); - return (EINVAL); - } - if ((sctp_is_feature_off(inp, - SCTP_PCB_FLAGS_RECVDATAIOEVNT)) || - (controlp == NULL)) { - /* user does not want the sndrcv ctl */ - filling_sinfo = 0; - } - if (name) { - from = (struct sockaddr *)sockbuf; - fromlen = sizeof(sockbuf); - from->sa_len = 0; - } else { - from = NULL; - fromlen = 0; - } - - error = sctp_sorecvmsg(so, uio, - (struct mbuf **)NULL, - from, fromlen, flag, - (struct sctp_sndrcvinfo *)&sinfo, - filling_sinfo); - if ((controlp) && (filling_sinfo)) { - /* - * copy back the sinfo in a CMSG format note that the caller - * has reponsibility for freeing the memory. - */ - if (filling_sinfo) - *controlp = sctp_build_ctl_cchunk(inp, - controllen, - (struct sctp_sndrcvinfo *)&sinfo); - } - if (name) { - /* copy back the address info */ - if (from && from->sa_len) { - *name = sodupsockaddr(from, M_WAIT); - } else { - *name = NULL; - } - } - return (error); -} - - - - diff --git a/sys/netinet/sctputil.h b/sys/netinet/sctputil.h index 69983e0..460adc7 100644 --- a/sys/netinet/sctputil.h +++ b/sys/netinet/sctputil.h @@ -328,20 +328,6 @@ sctp_soreceive(struct socket *so, struct sockaddr **psa, struct mbuf **controlp, int *flagsp); - -/* For those not passing mbufs, this does the - * translations for you. Caller owns memory - * of size controllen returned in controlp. - */ -int -sctp_l_soreceive(struct socket *so, - struct sockaddr **name, - struct uio *uio, - char **controlp, - int *controllen, - int *flag); - - void sctp_misc_ints(uint8_t from, uint32_t a, uint32_t b, uint32_t c, uint32_t d); diff --git a/sys/netinet/siftr.c b/sys/netinet/siftr.c index 9d11262..6145a54 100644 --- a/sys/netinet/siftr.c +++ b/sys/netinet/siftr.c @@ -696,17 +696,16 @@ siftr_findinpcb(int ipver, struct ip *ip, struct mbuf *m, uint16_t sport, /* We need the tcbinfo lock. */ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); - INP_INFO_RLOCK(&V_tcbinfo); if (dir == PFIL_IN) inp = (ipver == INP_IPV4 ? - in_pcblookup_hash(&V_tcbinfo, ip->ip_src, sport, ip->ip_dst, - dport, 0, m->m_pkthdr.rcvif) + in_pcblookup(&V_tcbinfo, ip->ip_src, sport, ip->ip_dst, + dport, INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif) : #ifdef SIFTR_IPV6 - in6_pcblookup_hash(&V_tcbinfo, + in6_pcblookup(&V_tcbinfo, &((struct ip6_hdr *)ip)->ip6_src, sport, - &((struct ip6_hdr *)ip)->ip6_dst, dport, 0, + &((struct ip6_hdr *)ip)->ip6_dst, dport, INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif) #else NULL @@ -715,13 +714,13 @@ siftr_findinpcb(int ipver, struct ip *ip, struct mbuf *m, uint16_t sport, else inp = (ipver == INP_IPV4 ? - in_pcblookup_hash(&V_tcbinfo, ip->ip_dst, dport, ip->ip_src, - sport, 0, m->m_pkthdr.rcvif) + in_pcblookup(&V_tcbinfo, ip->ip_dst, dport, ip->ip_src, + sport, INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif) : #ifdef SIFTR_IPV6 - in6_pcblookup_hash(&V_tcbinfo, + in6_pcblookup(&V_tcbinfo, &((struct ip6_hdr *)ip)->ip6_dst, dport, - &((struct ip6_hdr *)ip)->ip6_src, sport, 0, + &((struct ip6_hdr *)ip)->ip6_src, sport, INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif) #else NULL @@ -734,12 +733,7 @@ siftr_findinpcb(int ipver, struct ip *ip, struct mbuf *m, uint16_t sport, ss->nskip_in_inpcb++; else ss->nskip_out_inpcb++; - } else { - /* Acquire the inpcb lock. */ - INP_UNLOCK_ASSERT(inp); - INP_RLOCK(inp); } - INP_INFO_RUNLOCK(&V_tcbinfo); return (inp); } diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 1a94d0a..e3e9aa6 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -5,6 +5,7 @@ * Swinburne University of Technology, Melbourne, Australia. * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org> * Copyright (c) 2010 The FreeBSD Foundation + * Copyright (c) 2010-2011 Juniper Networks, Inc. * All rights reserved. * * Portions of this software were developed at the Centre for Advanced Internet @@ -16,6 +17,9 @@ * Internet Architectures, Swinburne University of Technology, Melbourne, * Australia by David Hayes under sponsorship from the FreeBSD Foundation. * + * Portions of this software were developed by Robert N. M. Watson under + * contract to Juniper Networks, Inc. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -197,10 +201,6 @@ SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_RW, &VNET_NAME(tcp_autorcvbuf_max), 0, "Max size of automatic receive buffer"); -int tcp_read_locking = 1; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, read_locking, CTLFLAG_RW, - &tcp_read_locking, 0, "Enable read locking strategy"); - VNET_DEFINE(struct inpcbhead, tcb); #define tcb6 tcb /* for KAME src sync over BSD*'s */ VNET_DEFINE(struct inpcbinfo, tcbinfo); @@ -591,8 +591,7 @@ tcp_input(struct mbuf *m, int off0) char *s = NULL; /* address and port logging */ int ti_locked; #define TI_UNLOCKED 1 -#define TI_RLOCKED 2 -#define TI_WLOCKED 3 +#define TI_WLOCKED 2 #ifdef TCPDEBUG /* @@ -756,30 +755,25 @@ tcp_input(struct mbuf *m, int off0) drop_hdrlen = off0 + off; /* - * Locate pcb for segment, which requires a lock on tcbinfo. - * Optimisticaly acquire a global read lock rather than a write lock - * unless header flags necessarily imply a state change. There are - * two cases where we might discover later we need a write lock - * despite the flags: ACKs moving a connection out of the syncache, - * and ACKs for a connection in TIMEWAIT. + * Locate pcb for segment; if we're likely to add or remove a + * connection then first acquire pcbinfo lock. There are two cases + * where we might discover later we need a write lock despite the + * flags: ACKs moving a connection out of the syncache, and ACKs for + * a connection in TIMEWAIT. */ - if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || - tcp_read_locking == 0) { + if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) { INP_INFO_WLOCK(&V_tcbinfo); ti_locked = TI_WLOCKED; - } else { - INP_INFO_RLOCK(&V_tcbinfo); - ti_locked = TI_RLOCKED; - } + } else + ti_locked = TI_UNLOCKED; findpcb: #ifdef INVARIANTS - if (ti_locked == TI_RLOCKED) - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - else if (ti_locked == TI_WLOCKED) + if (ti_locked == TI_WLOCKED) { INP_INFO_WLOCK_ASSERT(&V_tcbinfo); - else - panic("%s: findpcb ti_locked %d\n", __func__, ti_locked); + } else { + INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + } #endif #ifdef INET @@ -797,20 +791,20 @@ findpcb: * Transparently forwarded. Pretend to be the destination. * already got one like this? */ - inp = in_pcblookup_hash(&V_tcbinfo, - ip->ip_src, th->th_sport, - ip->ip_dst, th->th_dport, - 0, m->m_pkthdr.rcvif); + inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src, th->th_sport, + ip->ip_dst, th->th_dport, INPLOOKUP_WLOCKPCB, + m->m_pkthdr.rcvif, m); if (!inp) { - /* It's new. Try to find the ambushing socket. */ - inp = in_pcblookup_hash(&V_tcbinfo, - ip->ip_src, th->th_sport, - next_hop->sin_addr, - next_hop->sin_port ? - ntohs(next_hop->sin_port) : - th->th_dport, - INPLOOKUP_WILDCARD, - m->m_pkthdr.rcvif); + /* + * It's new. Try to find the ambushing socket. + * Because we've rewritten the destination address, + * any hardware-generated hash is ignored. + */ + inp = in_pcblookup(&V_tcbinfo, ip->ip_src, + th->th_sport, next_hop->sin_addr, + next_hop->sin_port ? ntohs(next_hop->sin_port) : + th->th_dport, INPLOOKUP_WILDCARD | + INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif); } /* Remove the tag from the packet. We don't need it anymore. */ m_tag_delete(m, fwd_tag); @@ -820,21 +814,19 @@ findpcb: { #ifdef INET6 if (isipv6) - inp = in6_pcblookup_hash(&V_tcbinfo, - &ip6->ip6_src, th->th_sport, - &ip6->ip6_dst, th->th_dport, - INPLOOKUP_WILDCARD, - m->m_pkthdr.rcvif); + inp = in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src, + th->th_sport, &ip6->ip6_dst, th->th_dport, + INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB, + m->m_pkthdr.rcvif, m); #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET - inp = in_pcblookup_hash(&V_tcbinfo, - ip->ip_src, th->th_sport, - ip->ip_dst, th->th_dport, - INPLOOKUP_WILDCARD, - m->m_pkthdr.rcvif); + inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src, + th->th_sport, ip->ip_dst, th->th_dport, + INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB, + m->m_pkthdr.rcvif, m); #endif } @@ -865,7 +857,7 @@ findpcb: rstreason = BANDLIM_RST_CLOSEDPORT; goto dropwithreset; } - INP_WLOCK(inp); + INP_WLOCK_ASSERT(inp); if (!(inp->inp_flags & INP_HW_FLOWID) && (m->m_flags & M_FLOWID) && ((inp->inp_socket == NULL) @@ -906,28 +898,26 @@ findpcb: * legitimate new connection attempt the old INPCB gets removed and * we can try again to find a listening socket. * - * At this point, due to earlier optimism, we may hold a read lock on - * the inpcbinfo, rather than a write lock. If so, we need to - * upgrade, or if that fails, acquire a reference on the inpcb, drop - * all locks, acquire a global write lock, and then re-acquire the - * inpcb lock. We may at that point discover that another thread has - * tried to free the inpcb, in which case we need to loop back and - * try to find a new inpcb to deliver to. + * At this point, due to earlier optimism, we may hold only an inpcb + * lock, and not the inpcbinfo write lock. If so, we need to try to + * acquire it, or if that fails, acquire a reference on the inpcb, + * drop all locks, acquire a global write lock, and then re-acquire + * the inpcb lock. We may at that point discover that another thread + * has tried to free the inpcb, in which case we need to loop back + * and try to find a new inpcb to deliver to. + * + * XXXRW: It may be time to rethink timewait locking. */ relocked: if (inp->inp_flags & INP_TIMEWAIT) { - KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED, - ("%s: INP_TIMEWAIT ti_locked %d", __func__, ti_locked)); - - if (ti_locked == TI_RLOCKED) { - if (INP_INFO_TRY_UPGRADE(&V_tcbinfo) == 0) { + if (ti_locked == TI_UNLOCKED) { + if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) { in_pcbref(inp); INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_WLOCK(&V_tcbinfo); ti_locked = TI_WLOCKED; INP_WLOCK(inp); - if (in_pcbrele(inp)) { + if (in_pcbrele_wlocked(inp)) { inp = NULL; goto findpcb; } @@ -975,26 +965,24 @@ relocked: /* * We've identified a valid inpcb, but it could be that we need an - * inpcbinfo write lock and have only a read lock. In this case, - * attempt to upgrade/relock using the same strategy as the TIMEWAIT - * case above. If we relock, we have to jump back to 'relocked' as - * the connection might now be in TIMEWAIT. + * inpcbinfo write lock but don't hold it. In this case, attempt to + * acquire using the same strategy as the TIMEWAIT case above. If we + * relock, we have to jump back to 'relocked' as the connection might + * now be in TIMEWAIT. */ - if (tp->t_state != TCPS_ESTABLISHED || - (thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || - tcp_read_locking == 0) { - KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED, - ("%s: upgrade check ti_locked %d", __func__, ti_locked)); - - if (ti_locked == TI_RLOCKED) { - if (INP_INFO_TRY_UPGRADE(&V_tcbinfo) == 0) { +#ifdef INVARIANTS + if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); +#endif + if (tp->t_state != TCPS_ESTABLISHED) { + if (ti_locked == TI_UNLOCKED) { + if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) { in_pcbref(inp); INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); INP_INFO_WLOCK(&V_tcbinfo); ti_locked = TI_WLOCKED; INP_WLOCK(inp); - if (in_pcbrele(inp)) { + if (in_pcbrele_wlocked(inp)) { inp = NULL; goto findpcb; } @@ -1027,13 +1015,16 @@ relocked: /* * When the socket is accepting connections (the INPCB is in LISTEN * state) we look into the SYN cache if this is a new connection - * attempt or the completion of a previous one. + * attempt or the completion of a previous one. Because listen + * sockets are never in TCPS_ESTABLISHED, the V_tcbinfo lock will be + * held in this case. */ if (so->so_options & SO_ACCEPTCONN) { struct in_conninfo inc; KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but " "tp not listening", __func__)); + INP_INFO_WLOCK_ASSERT(&V_tcbinfo); bzero(&inc, sizeof(inc)); #ifdef INET6 @@ -1371,13 +1362,17 @@ relocked: return; dropwithreset: - if (ti_locked == TI_RLOCKED) - INP_INFO_RUNLOCK(&V_tcbinfo); - else if (ti_locked == TI_WLOCKED) + if (ti_locked == TI_WLOCKED) { INP_INFO_WUNLOCK(&V_tcbinfo); - else - panic("%s: dropwithreset ti_locked %d", __func__, ti_locked); - ti_locked = TI_UNLOCKED; + ti_locked = TI_UNLOCKED; + } +#ifdef INVARIANTS + else { + KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropwithreset " + "ti_locked: %d", __func__, ti_locked)); + INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + } +#endif if (inp != NULL) { tcp_dropwithreset(m, th, tp, tlen, rstreason); @@ -1388,13 +1383,17 @@ dropwithreset: goto drop; dropunlock: - if (ti_locked == TI_RLOCKED) - INP_INFO_RUNLOCK(&V_tcbinfo); - else if (ti_locked == TI_WLOCKED) + if (ti_locked == TI_WLOCKED) { INP_INFO_WUNLOCK(&V_tcbinfo); - else - panic("%s: dropunlock ti_locked %d", __func__, ti_locked); - ti_locked = TI_UNLOCKED; + ti_locked = TI_UNLOCKED; + } +#ifdef INVARIANTS + else { + KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropunlock " + "ti_locked: %d", __func__, ti_locked)); + INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + } +#endif if (inp != NULL) INP_WUNLOCK(inp); @@ -1449,13 +1448,13 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, INP_INFO_WLOCK_ASSERT(&V_tcbinfo); } else { #ifdef INVARIANTS - if (ti_locked == TI_RLOCKED) - INP_INFO_RLOCK_ASSERT(&V_tcbinfo); - else if (ti_locked == TI_WLOCKED) + if (ti_locked == TI_WLOCKED) INP_INFO_WLOCK_ASSERT(&V_tcbinfo); - else - panic("%s: ti_locked %d for EST", __func__, - ti_locked); + else { + KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST " + "ti_locked: %d", __func__, ti_locked)); + INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + } #endif } INP_WLOCK_ASSERT(tp->t_inpcb); @@ -1601,13 +1600,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, /* * This is a pure ack for outstanding data. */ - if (ti_locked == TI_RLOCKED) - INP_INFO_RUNLOCK(&V_tcbinfo); - else if (ti_locked == TI_WLOCKED) + if (ti_locked == TI_WLOCKED) INP_INFO_WUNLOCK(&V_tcbinfo); - else - panic("%s: ti_locked %d on pure ACK", - __func__, ti_locked); ti_locked = TI_UNLOCKED; TCPSTAT_INC(tcps_predack); @@ -1708,13 +1702,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, * nothing on the reassembly queue and we have enough * buffer space to take it. */ - if (ti_locked == TI_RLOCKED) - INP_INFO_RUNLOCK(&V_tcbinfo); - else if (ti_locked == TI_WLOCKED) + if (ti_locked == TI_WLOCKED) INP_INFO_WUNLOCK(&V_tcbinfo); - else - panic("%s: ti_locked %d on pure data " - "segment", __func__, ti_locked); ti_locked = TI_UNLOCKED; /* Clean receiver SACK report if present */ @@ -2550,9 +2539,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, } process_ACK: - INP_INFO_LOCK_ASSERT(&V_tcbinfo); - KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED, - ("tcp_input: process_ACK ti_locked %d", ti_locked)); INP_WLOCK_ASSERT(tp->t_inpcb); acked = BYTES_THIS_ACK(tp, th); @@ -2716,9 +2702,6 @@ process_ACK: } step6: - INP_INFO_LOCK_ASSERT(&V_tcbinfo); - KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED, - ("tcp_do_segment: step6 ti_locked %d", ti_locked)); INP_WLOCK_ASSERT(tp->t_inpcb); /* @@ -2804,9 +2787,6 @@ step6: tp->rcv_up = tp->rcv_nxt; } dodata: /* XXX */ - INP_INFO_LOCK_ASSERT(&V_tcbinfo); - KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED, - ("tcp_do_segment: dodata ti_locked %d", ti_locked)); INP_WLOCK_ASSERT(tp->t_inpcb); /* @@ -2938,13 +2918,8 @@ dodata: /* XXX */ return; } } - if (ti_locked == TI_RLOCKED) - INP_INFO_RUNLOCK(&V_tcbinfo); - else if (ti_locked == TI_WLOCKED) + if (ti_locked == TI_WLOCKED) INP_INFO_WUNLOCK(&V_tcbinfo); - else - panic("%s: dodata epilogue ti_locked %d", __func__, - ti_locked); ti_locked = TI_UNLOCKED; #ifdef TCPDEBUG @@ -2973,9 +2948,6 @@ check_delack: return; dropafterack: - KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED, - ("tcp_do_segment: dropafterack ti_locked %d", ti_locked)); - /* * Generate an ACK dropping incoming segment if it occupies * sequence space, where the ACK reflects our state. @@ -3002,13 +2974,8 @@ dropafterack: tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif - if (ti_locked == TI_RLOCKED) - INP_INFO_RUNLOCK(&V_tcbinfo); - else if (ti_locked == TI_WLOCKED) + if (ti_locked == TI_WLOCKED) INP_INFO_WUNLOCK(&V_tcbinfo); - else - panic("%s: dropafterack epilogue ti_locked %d", __func__, - ti_locked); ti_locked = TI_UNLOCKED; tp->t_flags |= TF_ACKNOW; @@ -3018,12 +2985,8 @@ dropafterack: return; dropwithreset: - if (ti_locked == TI_RLOCKED) - INP_INFO_RUNLOCK(&V_tcbinfo); - else if (ti_locked == TI_WLOCKED) + if (ti_locked == TI_WLOCKED) INP_INFO_WUNLOCK(&V_tcbinfo); - else - panic("%s: dropwithreset ti_locked %d", __func__, ti_locked); ti_locked = TI_UNLOCKED; if (tp != NULL) { @@ -3034,15 +2997,14 @@ dropwithreset: return; drop: - if (ti_locked == TI_RLOCKED) - INP_INFO_RUNLOCK(&V_tcbinfo); - else if (ti_locked == TI_WLOCKED) + if (ti_locked == TI_WLOCKED) { INP_INFO_WUNLOCK(&V_tcbinfo); + ti_locked = TI_UNLOCKED; + } #ifdef INVARIANTS else INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); #endif - ti_locked = TI_UNLOCKED; /* * Drop space held by incoming segment and return. diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 4b5fa10..4542ac5 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -1102,8 +1102,15 @@ send: m->m_pkthdr.tso_segsz = tp->t_maxopd - optlen; } +#ifdef IPSEC + KASSERT(len + hdrlen + ipoptlen - ipsec_optlen == m_length(m, NULL), + ("%s: mbuf chain shorter than expected: %ld + %u + %u - %u != %u", + __func__, len, hdrlen, ipoptlen, ipsec_optlen, m_length(m, NULL))); +#else KASSERT(len + hdrlen + ipoptlen == m_length(m, NULL), - ("%s: mbuf chain shorter than expected", __func__)); + ("%s: mbuf chain shorter than expected: %ld + %u + %u != %u", + __func__, len, hdrlen, ipoptlen, m_length(m, NULL))); +#endif /* * In transmit state, time the transmission and arrange for @@ -1331,7 +1338,7 @@ out: * then remember the size of the advertised window. * Any pending ACK has now been sent. */ - if (recwin > 0 && SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv)) + if (recwin >= 0 && SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv)) tp->rcv_adv = tp->rcv_nxt + recwin; tp->last_ack_sent = tp->rcv_nxt; tp->t_flags &= ~(TF_ACKNOW | TF_DELACK); diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 2c013be..6ed58911 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -300,7 +300,8 @@ tcp_init(void) hashsize = 512; /* safe default */ } in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize, - "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE); + "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE, + IPI_HASHFIELDS_4TUPLE); /* * These have to be type stable for the benefit of the timers. @@ -1184,9 +1185,9 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) INP_INFO_WLOCK(&V_tcbinfo); for (i = 0; i < n; i++) { inp = inp_list[i]; - INP_WLOCK(inp); - if (!in_pcbrele(inp)) - INP_WUNLOCK(inp); + INP_RLOCK(inp); + if (!in_pcbrele_rlocked(inp)) + INP_RUNLOCK(inp); } INP_INFO_WUNLOCK(&V_tcbinfo); @@ -1228,12 +1229,9 @@ tcp_getcred(SYSCTL_HANDLER_ARGS) error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); - INP_INFO_RLOCK(&V_tcbinfo); - inp = in_pcblookup_hash(&V_tcbinfo, addrs[1].sin_addr, - addrs[1].sin_port, addrs[0].sin_addr, addrs[0].sin_port, 0, NULL); + inp = in_pcblookup(&V_tcbinfo, addrs[1].sin_addr, addrs[1].sin_port, + addrs[0].sin_addr, addrs[0].sin_port, INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { - INP_RLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); if (inp->inp_socket == NULL) error = ENOENT; if (error == 0) @@ -1241,10 +1239,8 @@ tcp_getcred(SYSCTL_HANDLER_ARGS) if (error == 0) cru2x(inp->inp_cred, &xuc); INP_RUNLOCK(inp); - } else { - INP_INFO_RUNLOCK(&V_tcbinfo); + } else error = ENOENT; - } if (error == 0) error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); return (error); @@ -1286,23 +1282,20 @@ tcp6_getcred(SYSCTL_HANDLER_ARGS) return (EINVAL); } - INP_INFO_RLOCK(&V_tcbinfo); #ifdef INET if (mapped == 1) - inp = in_pcblookup_hash(&V_tcbinfo, + inp = in_pcblookup(&V_tcbinfo, *(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12], addrs[1].sin6_port, *(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12], - addrs[0].sin6_port, - 0, NULL); + addrs[0].sin6_port, INPLOOKUP_RLOCKPCB, NULL); else #endif - inp = in6_pcblookup_hash(&V_tcbinfo, + inp = in6_pcblookup(&V_tcbinfo, &addrs[1].sin6_addr, addrs[1].sin6_port, - &addrs[0].sin6_addr, addrs[0].sin6_port, 0, NULL); + &addrs[0].sin6_addr, addrs[0].sin6_port, + INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { - INP_RLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); if (inp->inp_socket == NULL) error = ENOENT; if (error == 0) @@ -1310,10 +1303,8 @@ tcp6_getcred(SYSCTL_HANDLER_ARGS) if (error == 0) cru2x(inp->inp_cred, &xuc); INP_RUNLOCK(inp); - } else { - INP_INFO_RUNLOCK(&V_tcbinfo); + } else error = ENOENT; - } if (error == 0) error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); return (error); @@ -1374,10 +1365,9 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); INP_INFO_WLOCK(&V_tcbinfo); - inp = in_pcblookup_hash(&V_tcbinfo, faddr, th->th_dport, - ip->ip_src, th->th_sport, 0, NULL); + inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport, + ip->ip_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL); if (inp != NULL) { - INP_WLOCK(inp); if (!(inp->inp_flags & INP_TIMEWAIT) && !(inp->inp_flags & INP_DROPPED) && !(inp->inp_socket == NULL)) { @@ -2154,20 +2144,19 @@ sysctl_drop(SYSCTL_HANDLER_ARGS) switch (addrs[0].ss_family) { #ifdef INET6 case AF_INET6: - inp = in6_pcblookup_hash(&V_tcbinfo, &fin6->sin6_addr, - fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port, 0, - NULL); + inp = in6_pcblookup(&V_tcbinfo, &fin6->sin6_addr, + fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port, + INPLOOKUP_WLOCKPCB, NULL); break; #endif #ifdef INET case AF_INET: - inp = in_pcblookup_hash(&V_tcbinfo, fin->sin_addr, - fin->sin_port, lin->sin_addr, lin->sin_port, 0, NULL); + inp = in_pcblookup(&V_tcbinfo, fin->sin_addr, fin->sin_port, + lin->sin_addr, lin->sin_port, INPLOOKUP_WLOCKPCB, NULL); break; #endif } if (inp != NULL) { - INP_WLOCK(inp); if (inp->inp_flags & INP_TIMEWAIT) { /* * XXXRW: There currently exists a state where an diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 8262f43..66e4732 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_pcbgroup.h" #include <sys/param.h> #include <sys/systm.h> @@ -661,6 +662,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) inp = sotoinpcb(so); inp->inp_inc.inc_fibnum = so->so_fibnum; INP_WLOCK(inp); + INP_HASH_WLOCK(&V_tcbinfo); /* Insert new socket into PCB hash list. */ inp->inp_inc.inc_flags = sc->sc_inc.inc_flags; @@ -675,8 +677,14 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) #ifdef INET6 } #endif + + /* + * Install in the reservation hash table for now, but don't yet + * install a connection group since the full 4-tuple isn't yet + * configured. + */ inp->inp_lport = sc->sc_inc.inc_lport; - if ((error = in_pcbinshash(inp)) != 0) { + if ((error = in_pcbinshash_nopcbgroup(inp)) != 0) { /* * Undo the assignments above if we failed to * put the PCB on the hash lists. @@ -694,6 +702,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) s, __func__, error); free(s, M_TCPLOG); } + INP_HASH_WUNLOCK(&V_tcbinfo); goto abort; } #ifdef IPSEC @@ -728,8 +737,8 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) laddr6 = inp->in6p_laddr; if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) inp->in6p_laddr = sc->sc_inc.inc6_laddr; - if ((error = in6_pcbconnect(inp, (struct sockaddr *)&sin6, - thread0.td_ucred)) != 0) { + if ((error = in6_pcbconnect_mbuf(inp, (struct sockaddr *)&sin6, + thread0.td_ucred, m)) != 0) { inp->in6p_laddr = laddr6; if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: in6_pcbconnect failed " @@ -737,6 +746,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) s, __func__, error); free(s, M_TCPLOG); } + INP_HASH_WUNLOCK(&V_tcbinfo); goto abort; } /* Override flowlabel from in6_pcbconnect. */ @@ -767,8 +777,8 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) laddr = inp->inp_laddr; if (inp->inp_laddr.s_addr == INADDR_ANY) inp->inp_laddr = sc->sc_inc.inc_laddr; - if ((error = in_pcbconnect(inp, (struct sockaddr *)&sin, - thread0.td_ucred)) != 0) { + if ((error = in_pcbconnect_mbuf(inp, (struct sockaddr *)&sin, + thread0.td_ucred, m)) != 0) { inp->inp_laddr = laddr; if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: in_pcbconnect failed " @@ -776,10 +786,12 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) s, __func__, error); free(s, M_TCPLOG); } + INP_HASH_WUNLOCK(&V_tcbinfo); goto abort; } } #endif /* INET */ + INP_HASH_WUNLOCK(&V_tcbinfo); tp = intotcpcb(inp); tp->t_state = TCPS_SYN_RECEIVED; tp->iss = sc->sc_iss; diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index 5c2c5c2..73984c7 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -490,7 +490,7 @@ tcp_timer_rexmt(void * xtp) INP_WUNLOCK(inp); INP_INFO_WLOCK(&V_tcbinfo); INP_WLOCK(inp); - if (in_pcbrele(inp)) { + if (in_pcbrele_wlocked(inp)) { INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 318fe27..96cb1e4 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -2,8 +2,12 @@ * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. * Copyright (c) 2006-2007 Robert N. M. Watson + * Copyright (c) 2010-2011 Juniper Networks, Inc. * All rights reserved. * + * Portions of this software were developed by Robert N. M. Watson under + * contract to Juniper Networks, Inc. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -251,7 +255,6 @@ tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) return (EAFNOSUPPORT); TCPDEBUG0; - INP_INFO_WLOCK(&V_tcbinfo); inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL")); INP_WLOCK(inp); @@ -261,11 +264,12 @@ tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) } tp = intotcpcb(inp); TCPDEBUG1(); + INP_HASH_WLOCK(&V_tcbinfo); error = in_pcbbind(inp, nam, td->td_ucred); + INP_HASH_WUNLOCK(&V_tcbinfo); out: TCPDEBUG2(PRU_BIND); INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); return (error); } @@ -292,7 +296,6 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) return (EAFNOSUPPORT); TCPDEBUG0; - INP_INFO_WLOCK(&V_tcbinfo); inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL")); INP_WLOCK(inp); @@ -302,6 +305,7 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) } tp = intotcpcb(inp); TCPDEBUG1(); + INP_HASH_WLOCK(&V_tcbinfo); inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; #ifdef INET @@ -316,15 +320,16 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) inp->inp_vflag &= ~INP_IPV6; error = in_pcbbind(inp, (struct sockaddr *)&sin, td->td_ucred); + INP_HASH_WUNLOCK(&V_tcbinfo); goto out; } } #endif error = in6_pcbbind(inp, nam, td->td_ucred); + INP_HASH_WUNLOCK(&V_tcbinfo); out: TCPDEBUG2(PRU_BIND); INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); return (error); } #endif /* INET6 */ @@ -341,7 +346,6 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td) struct tcpcb *tp = NULL; TCPDEBUG0; - INP_INFO_WLOCK(&V_tcbinfo); inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL")); INP_WLOCK(inp); @@ -353,8 +357,10 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td) TCPDEBUG1(); SOCK_LOCK(so); error = solisten_proto_check(so); + INP_HASH_WLOCK(&V_tcbinfo); if (error == 0 && inp->inp_lport == 0) error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); + INP_HASH_WUNLOCK(&V_tcbinfo); if (error == 0) { tp->t_state = TCPS_LISTEN; solisten_proto(so, backlog); @@ -365,7 +371,6 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td) out: TCPDEBUG2(PRU_LISTEN); INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); return (error); } #endif /* INET */ @@ -379,7 +384,6 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td) struct tcpcb *tp = NULL; TCPDEBUG0; - INP_INFO_WLOCK(&V_tcbinfo); inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL")); INP_WLOCK(inp); @@ -391,12 +395,14 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td) TCPDEBUG1(); SOCK_LOCK(so); error = solisten_proto_check(so); + INP_HASH_WLOCK(&V_tcbinfo); if (error == 0 && inp->inp_lport == 0) { inp->inp_vflag &= ~INP_IPV4; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) inp->inp_vflag |= INP_IPV4; error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); } + INP_HASH_WUNLOCK(&V_tcbinfo); if (error == 0) { tp->t_state = TCPS_LISTEN; solisten_proto(so, backlog); @@ -406,7 +412,6 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td) out: TCPDEBUG2(PRU_LISTEN); INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); return (error); } #endif /* INET6 */ @@ -440,7 +445,6 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) return (error); TCPDEBUG0; - INP_INFO_WLOCK(&V_tcbinfo); inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL")); INP_WLOCK(inp); @@ -456,7 +460,6 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) out: TCPDEBUG2(PRU_CONNECT); INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); return (error); } #endif /* INET */ @@ -482,7 +485,6 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) return (EAFNOSUPPORT); - INP_INFO_WLOCK(&V_tcbinfo); inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL")); INP_WLOCK(inp); @@ -493,6 +495,11 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) tp = intotcpcb(inp); TCPDEBUG1(); #ifdef INET + /* + * XXXRW: Some confusion: V4/V6 flags relate to binding, and + * therefore probably require the hash lock, which isn't held here. + * Is this a significant problem? + */ if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { struct sockaddr_in sin; @@ -525,7 +532,6 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) out: TCPDEBUG2(PRU_CONNECT); INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_tcbinfo); return (error); } #endif /* INET6 */ @@ -639,6 +645,7 @@ tcp6_usr_accept(struct socket *so, struct sockaddr **nam) inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL")); + INP_INFO_RLOCK(&V_tcbinfo); INP_WLOCK(inp); if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { error = ECONNABORTED; @@ -664,6 +671,7 @@ tcp6_usr_accept(struct socket *so, struct sockaddr **nam) out: TCPDEBUG2(PRU_ACCEPT); INP_WUNLOCK(inp); + INP_INFO_RUNLOCK(&V_tcbinfo); if (error == 0) { if (v4) *nam = in6_v4mapsin6_sockaddr(port, &addr); @@ -750,25 +758,17 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m, int error = 0; struct inpcb *inp; struct tcpcb *tp = NULL; - int headlocked = 0; #ifdef INET6 int isipv6; #endif TCPDEBUG0; /* - * We require the pcbinfo lock in two cases: - * - * (1) An implied connect is taking place, which can result in - * binding IPs and ports and hence modification of the pcb hash - * chains. - * - * (2) PRUS_EOF is set, resulting in explicit close on the send. + * We require the pcbinfo lock if we will close the socket as part of + * this call. */ - if ((nam != NULL) || (flags & PRUS_EOF)) { + if (flags & PRUS_EOF) INP_INFO_WLOCK(&V_tcbinfo); - headlocked = 1; - } inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL")); INP_WLOCK(inp); @@ -805,7 +805,6 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m, * initialize maxseg/maxopd using peer's cached * MSS. */ - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); #ifdef INET6 if (isipv6) error = tcp6_connect(tp, nam, td); @@ -830,10 +829,6 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m, socantsendmore(so); tcp_usrclosed(tp); } - if (headlocked) { - INP_INFO_WUNLOCK(&V_tcbinfo); - headlocked = 0; - } if (!(inp->inp_flags & INP_DROPPED)) { if (flags & PRUS_MORETOCOME) tp->t_flags |= TF_MORETOCOME; @@ -869,7 +864,6 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m, * initialize maxseg/maxopd using peer's cached * MSS. */ - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); #ifdef INET6 if (isipv6) error = tcp6_connect(tp, nam, td); @@ -884,11 +878,6 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m, goto out; tp->snd_wnd = TTCP_CLIENT_SND_WND; tcp_mss(tp, -1); - INP_INFO_WUNLOCK(&V_tcbinfo); - headlocked = 0; - } else if (nam) { - INP_INFO_WUNLOCK(&V_tcbinfo); - headlocked = 0; } tp->snd_up = tp->snd_una + so->so_snd.sb_cc; tp->t_flags |= TF_FORCEDATA; @@ -899,7 +888,7 @@ out: TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB : ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); INP_WUNLOCK(inp); - if (headlocked) + if (flags & PRUS_EOF) INP_INFO_WUNLOCK(&V_tcbinfo); return (error); } @@ -1087,13 +1076,13 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) u_short lport; int error; - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK(&V_tcbinfo); if (inp->inp_lport == 0) { error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); if (error) - return error; + goto out; } /* @@ -1106,11 +1095,14 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport, &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred); if (error && oinp == NULL) - return error; - if (oinp) - return EADDRINUSE; + goto out; + if (oinp) { + error = EADDRINUSE; + goto out; + } inp->inp_laddr = laddr; in_pcbrehash(inp); + INP_HASH_WUNLOCK(&V_tcbinfo); /* * Compute window scaling to request: @@ -1129,6 +1121,10 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) tcp_sendseqinit(tp); return 0; + +out: + INP_HASH_WUNLOCK(&V_tcbinfo); + return (error); } #endif /* INET */ @@ -1142,13 +1138,13 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) struct in6_addr addr6; int error; - INP_INFO_WLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK(&V_tcbinfo); if (inp->inp_lport == 0) { error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); if (error) - return error; + goto out; } /* @@ -1156,18 +1152,23 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) * earlier incarnation of this same connection still in * TIME_WAIT state, creating an ADDRINUSE error. * in6_pcbladdr() also handles scope zone IDs. + * + * XXXRW: We wouldn't need to expose in6_pcblookup_hash_locked() + * outside of in6_pcb.c if there were an in6_pcbconnect_setup(). */ error = in6_pcbladdr(inp, nam, &addr6); if (error) - return error; - oinp = in6_pcblookup_hash(inp->inp_pcbinfo, + goto out; + oinp = in6_pcblookup_hash_locked(inp->inp_pcbinfo, &sin6->sin6_addr, sin6->sin6_port, IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ? &addr6 : &inp->in6p_laddr, inp->inp_lport, 0, NULL); - if (oinp) - return EADDRINUSE; + if (oinp) { + error = EADDRINUSE; + goto out; + } if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) inp->in6p_laddr = addr6; inp->in6p_faddr = sin6->sin6_addr; @@ -1178,6 +1179,7 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) inp->inp_flow |= (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); in_pcbrehash(inp); + INP_HASH_WUNLOCK(&V_tcbinfo); /* Compute window scaling to request. */ while (tp->request_r_scale < TCP_MAX_WINSHIFT && @@ -1192,6 +1194,10 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) tcp_sendseqinit(tp); return 0; + +out: + INP_HASH_WUNLOCK(&V_tcbinfo); + return error; } #endif /* INET6 */ diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index c3503e6..28eb8fd 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -2,8 +2,12 @@ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. * Copyright (c) 2008 Robert N. M. Watson + * Copyright (c) 2010-2011 Juniper Networks, Inc. * All rights reserved. * + * Portions of this software were developed by Robert N. M. Watson under + * contract to Juniper Networks, Inc. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -182,7 +186,8 @@ udp_init(void) { in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE, - "udp_inpcb", udp_inpcb_init, NULL, UMA_ZONE_NOFREE); + "udp_inpcb", udp_inpcb_init, NULL, UMA_ZONE_NOFREE, + IPI_HASHFIELDS_2TUPLE); V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uma_zone_set_max(V_udpcb_zone, maxsockets); @@ -253,7 +258,7 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off, #endif struct udpcb *up; - INP_RLOCK_ASSERT(inp); + INP_LOCK_ASSERT(inp); /* * Engage the tunneling protocol. @@ -458,12 +463,12 @@ udp_input(struct mbuf *m, int off) } #endif - INP_INFO_RLOCK(&V_udbinfo); if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || in_broadcast(ip->ip_dst, ifp)) { struct inpcb *last; struct ip_moptions *imo; + INP_INFO_RLOCK(&V_udbinfo); last = NULL; LIST_FOREACH(inp, &V_udb, inp_list) { if (inp->inp_lport != uh->uh_dport) @@ -485,6 +490,13 @@ udp_input(struct mbuf *m, int off) INP_RLOCK(inp); /* + * XXXRW: Because we weren't holding either the inpcb + * or the hash lock when we checked for a match + * before, we should probably recheck now that the + * inpcb lock is held. + */ + + /* * Handle socket delivery policy for any-source * and source-specific multicast. [RFC3678] */ @@ -542,7 +554,10 @@ udp_input(struct mbuf *m, int off) * or multicast datgram.) */ UDPSTAT_INC(udps_noportbcast); - goto badheadlocked; + if (inp) + INP_RUNLOCK(inp); + INP_INFO_RUNLOCK(&V_udbinfo); + goto badunlocked; } udp_append(last, ip, m, iphlen, &udp_in); INP_RUNLOCK(last); @@ -553,8 +568,9 @@ udp_input(struct mbuf *m, int off) /* * Locate pcb for datagram. */ - inp = in_pcblookup_hash(&V_udbinfo, ip->ip_src, uh->uh_sport, - ip->ip_dst, uh->uh_dport, 1, ifp); + inp = in_pcblookup_mbuf(&V_udbinfo, ip->ip_src, uh->uh_sport, + ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, + ifp, m); if (inp == NULL) { if (udp_log_in_vain) { char buf[4*sizeof "123"]; @@ -568,36 +584,31 @@ udp_input(struct mbuf *m, int off) UDPSTAT_INC(udps_noport); if (m->m_flags & (M_BCAST | M_MCAST)) { UDPSTAT_INC(udps_noportbcast); - goto badheadlocked; + goto badunlocked; } if (V_udp_blackhole) - goto badheadlocked; + goto badunlocked; if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) - goto badheadlocked; + goto badunlocked; *ip = save_ip; ip->ip_len += iphlen; icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); - INP_INFO_RUNLOCK(&V_udbinfo); return; } /* * Check the minimum TTL for socket. */ - INP_RLOCK(inp); - INP_INFO_RUNLOCK(&V_udbinfo); + INP_RLOCK_ASSERT(inp); if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) { INP_RUNLOCK(inp); - goto badunlocked; + m_freem(m); + return; } udp_append(inp, ip, m, iphlen, &udp_in); INP_RUNLOCK(inp); return; -badheadlocked: - if (inp) - INP_RUNLOCK(inp); - INP_INFO_RUNLOCK(&V_udbinfo); badunlocked: m_freem(m); } @@ -656,17 +667,15 @@ udp_ctlinput(int cmd, struct sockaddr *sa, void *vip) return; if (ip != NULL) { uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); - INP_INFO_RLOCK(&V_udbinfo); - inp = in_pcblookup_hash(&V_udbinfo, faddr, uh->uh_dport, - ip->ip_src, uh->uh_sport, 0, NULL); + inp = in_pcblookup(&V_udbinfo, faddr, uh->uh_dport, + ip->ip_src, uh->uh_sport, INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { - INP_RLOCK(inp); + INP_RLOCK_ASSERT(inp); if (inp->inp_socket != NULL) { udp_notify(inp, inetctlerrmap[cmd]); } INP_RUNLOCK(inp); } - INP_INFO_RUNLOCK(&V_udbinfo); } else in_pcbnotifyall(&V_udbinfo, faddr, inetctlerrmap[cmd], udp_notify); @@ -756,9 +765,9 @@ udp_pcblist(SYSCTL_HANDLER_ARGS) INP_INFO_WLOCK(&V_udbinfo); for (i = 0; i < n; i++) { inp = inp_list[i]; - INP_WLOCK(inp); - if (!in_pcbrele(inp)) - INP_WUNLOCK(inp); + INP_RLOCK(inp); + if (!in_pcbrele_rlocked(inp)) + INP_RUNLOCK(inp); } INP_INFO_WUNLOCK(&V_udbinfo); @@ -799,12 +808,11 @@ udp_getcred(SYSCTL_HANDLER_ARGS) error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); - INP_INFO_RLOCK(&V_udbinfo); - inp = in_pcblookup_hash(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port, - addrs[0].sin_addr, addrs[0].sin_port, 1, NULL); + inp = in_pcblookup(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port, + addrs[0].sin_addr, addrs[0].sin_port, + INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { - INP_RLOCK(inp); - INP_INFO_RUNLOCK(&V_udbinfo); + INP_RLOCK_ASSERT(inp); if (inp->inp_socket == NULL) error = ENOENT; if (error == 0) @@ -812,10 +820,8 @@ udp_getcred(SYSCTL_HANDLER_ARGS) if (error == 0) cru2x(inp->inp_cred, &xuc); INP_RUNLOCK(inp); - } else { - INP_INFO_RUNLOCK(&V_udbinfo); + } else error = ENOENT; - } if (error == 0) error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); return (error); @@ -924,6 +930,9 @@ udp_ctloutput(struct socket *so, struct sockopt *sopt) } #ifdef INET +#define UH_WLOCKED 2 +#define UH_RLOCKED 1 +#define UH_UNLOCKED 0 static int udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) @@ -1016,29 +1025,27 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, * conservative locks than required the second time around, so later * assertions have to accept that. Further analysis of the number of * misses under contention is required. + * + * XXXRW: Check that hash locking update here is correct. */ sin = (struct sockaddr_in *)addr; INP_RLOCK(inp); if (sin != NULL && (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) { INP_RUNLOCK(inp); - INP_INFO_WLOCK(&V_udbinfo); INP_WLOCK(inp); - unlock_udbinfo = 2; + INP_HASH_WLOCK(&V_udbinfo); + unlock_udbinfo = UH_WLOCKED; } else if ((sin != NULL && ( (sin->sin_addr.s_addr == INADDR_ANY) || (sin->sin_addr.s_addr == INADDR_BROADCAST) || (inp->inp_laddr.s_addr == INADDR_ANY) || (inp->inp_lport == 0))) || (src.sin_family == AF_INET)) { - if (!INP_INFO_TRY_RLOCK(&V_udbinfo)) { - INP_RUNLOCK(inp); - INP_INFO_RLOCK(&V_udbinfo); - INP_RLOCK(inp); - } - unlock_udbinfo = 1; + INP_HASH_RLOCK(&V_udbinfo); + unlock_udbinfo = UH_RLOCKED; } else - unlock_udbinfo = 0; + unlock_udbinfo = UH_UNLOCKED; /* * If the IP_SENDSRCADDR control message was specified, override the @@ -1048,7 +1055,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, laddr = inp->inp_laddr; lport = inp->inp_lport; if (src.sin_family == AF_INET) { - INP_INFO_LOCK_ASSERT(&V_udbinfo); + INP_HASH_LOCK_ASSERT(&V_udbinfo); if ((lport == 0) || (laddr.s_addr == INADDR_ANY && src.sin_addr.s_addr == INADDR_ANY)) { @@ -1099,7 +1106,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, inp->inp_lport == 0 || sin->sin_addr.s_addr == INADDR_ANY || sin->sin_addr.s_addr == INADDR_BROADCAST) { - INP_INFO_LOCK_ASSERT(&V_udbinfo); + INP_HASH_LOCK_ASSERT(&V_udbinfo); error = in_pcbconnect_setup(inp, addr, &laddr.s_addr, &lport, &faddr.s_addr, &fport, NULL, td->td_ucred); @@ -1113,8 +1120,8 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, /* Commit the local port if newly assigned. */ if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) { - INP_INFO_WLOCK_ASSERT(&V_udbinfo); INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(&V_udbinfo); /* * Remember addr if jailed, to prevent * rebinding. @@ -1209,25 +1216,25 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ UDPSTAT_INC(udps_opackets); - if (unlock_udbinfo == 2) - INP_INFO_WUNLOCK(&V_udbinfo); - else if (unlock_udbinfo == 1) - INP_INFO_RUNLOCK(&V_udbinfo); + if (unlock_udbinfo == UH_WLOCKED) + INP_HASH_WUNLOCK(&V_udbinfo); + else if (unlock_udbinfo == UH_RLOCKED) + INP_HASH_RUNLOCK(&V_udbinfo); error = ip_output(m, inp->inp_options, NULL, ipflags, inp->inp_moptions, inp); - if (unlock_udbinfo == 2) + if (unlock_udbinfo == UH_WLOCKED) INP_WUNLOCK(inp); else INP_RUNLOCK(inp); return (error); release: - if (unlock_udbinfo == 2) { + if (unlock_udbinfo == UH_WLOCKED) { + INP_HASH_WUNLOCK(&V_udbinfo); INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_udbinfo); - } else if (unlock_udbinfo == 1) { + } else if (unlock_udbinfo == UH_RLOCKED) { + INP_HASH_RUNLOCK(&V_udbinfo); INP_RUNLOCK(inp); - INP_INFO_RUNLOCK(&V_udbinfo); } else INP_RUNLOCK(inp); m_freem(m); @@ -1376,15 +1383,15 @@ udp_abort(struct socket *so) inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp_abort: inp == NULL")); - INP_INFO_WLOCK(&V_udbinfo); INP_WLOCK(inp); if (inp->inp_faddr.s_addr != INADDR_ANY) { + INP_HASH_WLOCK(&V_udbinfo); in_pcbdisconnect(inp); inp->inp_laddr.s_addr = INADDR_ANY; + INP_HASH_WUNLOCK(&V_udbinfo); soisdisconnected(so); } INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_udbinfo); } static int @@ -1453,11 +1460,11 @@ udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td) inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp_bind: inp == NULL")); - INP_INFO_WLOCK(&V_udbinfo); INP_WLOCK(inp); + INP_HASH_WLOCK(&V_udbinfo); error = in_pcbbind(inp, nam, td->td_ucred); + INP_HASH_WUNLOCK(&V_udbinfo); INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_udbinfo); return (error); } @@ -1468,15 +1475,15 @@ udp_close(struct socket *so) inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp_close: inp == NULL")); - INP_INFO_WLOCK(&V_udbinfo); INP_WLOCK(inp); if (inp->inp_faddr.s_addr != INADDR_ANY) { + INP_HASH_WLOCK(&V_udbinfo); in_pcbdisconnect(inp); inp->inp_laddr.s_addr = INADDR_ANY; + INP_HASH_WUNLOCK(&V_udbinfo); soisdisconnected(so); } INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_udbinfo); } static int @@ -1488,25 +1495,23 @@ udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp_connect: inp == NULL")); - INP_INFO_WLOCK(&V_udbinfo); INP_WLOCK(inp); if (inp->inp_faddr.s_addr != INADDR_ANY) { INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_udbinfo); return (EISCONN); } sin = (struct sockaddr_in *)nam; error = prison_remote_ip4(td->td_ucred, &sin->sin_addr); if (error != 0) { INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_udbinfo); return (error); } + INP_HASH_WLOCK(&V_udbinfo); error = in_pcbconnect(inp, nam, td->td_ucred); + INP_HASH_WUNLOCK(&V_udbinfo); if (error == 0) soisconnected(so); INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_udbinfo); return (error); } @@ -1538,21 +1543,19 @@ udp_disconnect(struct socket *so) inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp_disconnect: inp == NULL")); - INP_INFO_WLOCK(&V_udbinfo); INP_WLOCK(inp); if (inp->inp_faddr.s_addr == INADDR_ANY) { INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_udbinfo); return (ENOTCONN); } - + INP_HASH_WLOCK(&V_udbinfo); in_pcbdisconnect(inp); inp->inp_laddr.s_addr = INADDR_ANY; + INP_HASH_WUNLOCK(&V_udbinfo); SOCK_LOCK(so); so->so_state &= ~SS_ISCONNECTED; /* XXX */ SOCK_UNLOCK(so); INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_udbinfo); return (0); } diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index 9e8e5cd..9558d1b 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -652,8 +652,32 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, * that is, this address might make other addresses detached. */ pfxlist_onlink_check(); - if (error == 0 && ia) + if (error == 0 && ia) { + if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) { + /* + * Try to clear the flag when a new + * IPv6 address is added onto an + * IFDISABLED interface and it + * succeeds. + */ + struct in6_ndireq nd; + + memset(&nd, 0, sizeof(nd)); + nd.ndi.flags = ND_IFINFO(ifp)->flags; + nd.ndi.flags &= ~ND6_IFF_IFDISABLED; + if (nd6_ioctl(SIOCSIFINFO_FLAGS, + (caddr_t)&nd, ifp) < 0) + log(LOG_NOTICE, "SIOCAIFADDR_IN6: " + "SIOCSIFINFO_FLAGS for -ifdisabled " + "failed."); + /* + * Ignore failure of clearing the flag + * intentionally. The failure means + * address duplication was detected. + */ + } EVENTHANDLER_INVOKE(ifaddr_event, ifp); + } break; } diff --git a/sys/netinet6/in6.h b/sys/netinet6/in6.h index 32759af..ae0da6a 100644 --- a/sys/netinet6/in6.h +++ b/sys/netinet6/in6.h @@ -611,7 +611,10 @@ struct ip6_mtuinfo { #define IPV6CTL_STEALTH 45 #define ICMPV6CTL_ND6_ONLINKNSRFC4861 47 -#define IPV6CTL_MAXID 48 +#define IPV6CTL_NO_RADR 48 /* No defroute from RA */ +#define IPV6CTL_NORBIT_RAIF 49 /* Disable R-bit in NA on RA + * receiving IF. */ +#define IPV6CTL_MAXID 50 #endif /* __BSD_VISIBLE */ /* diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index eacce8c..d15c605 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -1,7 +1,11 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * Copyright (c) 2010-2011 Juniper Networks, Inc. * All rights reserved. * + * Portions of this software were developed by Robert N. M. Watson under + * contract to Juniper Networks, Inc. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -66,6 +70,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_pcbgroup.h" #include <sys/param.h> #include <sys/systm.h> @@ -114,8 +119,8 @@ in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam, int error, lookupflags = 0; int reuseport = (so->so_options & SO_REUSEPORT); - INP_INFO_WLOCK_ASSERT(pcbinfo); INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(pcbinfo); if (TAILQ_EMPTY(&V_in6_ifaddrhead)) /* XXX broken! */ return (EADDRNOTAVAIL); @@ -298,8 +303,8 @@ in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam, int scope_ambiguous = 0; struct in6_addr in6a; - INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); /* XXXRW: why? */ if (nam->sa_len != sizeof (*sin6)) return (EINVAL); @@ -360,15 +365,16 @@ in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam, * then pick one. */ int -in6_pcbconnect(register struct inpcb *inp, struct sockaddr *nam, - struct ucred *cred) +in6_pcbconnect_mbuf(register struct inpcb *inp, struct sockaddr *nam, + struct ucred *cred, struct mbuf *m) { + struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; struct in6_addr addr6; int error; - INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(pcbinfo); /* * Call inner routine, to assign local interface address. @@ -377,7 +383,7 @@ in6_pcbconnect(register struct inpcb *inp, struct sockaddr *nam, if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0) return (error); - if (in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr, + if (in6_pcblookup_hash_locked(pcbinfo, &sin6->sin6_addr, sin6->sin6_port, IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ? &addr6 : &inp->in6p_laddr, @@ -400,17 +406,24 @@ in6_pcbconnect(register struct inpcb *inp, struct sockaddr *nam, inp->inp_flow |= (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); - in_pcbrehash(inp); + in_pcbrehash_mbuf(inp, m); return (0); } +int +in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) +{ + + return (in6_pcbconnect_mbuf(inp, nam, cred, NULL)); +} + void in6_pcbdisconnect(struct inpcb *inp) { - INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr)); inp->inp_fport = 0; @@ -649,7 +662,8 @@ in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst, } /* - * Lookup a PCB based on the local address and port. + * Lookup a PCB based on the local address and port. Caller must hold the + * hash lock. No inpcb locks or references are acquired. */ struct inpcb * in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr, @@ -661,7 +675,7 @@ in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr, KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); - INP_INFO_WLOCK_ASSERT(pcbinfo); + INP_HASH_WLOCK_ASSERT(pcbinfo); if ((lookupflags & INPLOOKUP_WILDCARD) == 0) { struct inpcbhead *head; @@ -814,13 +828,148 @@ in6_rtchange(struct inpcb *inp, int errno) return inp; } +#ifdef PCBGROUP +/* + * Lookup PCB in hash list, using pcbgroup tables. + */ +static struct inpcb * +in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, + struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr, + u_int lport_arg, int lookupflags, struct ifnet *ifp) +{ + struct inpcbhead *head; + struct inpcb *inp, *tmpinp; + u_short fport = fport_arg, lport = lport_arg; + int faith; + + if (faithprefix_p != NULL) + faith = (*faithprefix_p)(laddr); + else + faith = 0; + + /* + * First look for an exact match. + */ + tmpinp = NULL; + INP_GROUP_LOCK(pcbgroup); + head = &pcbgroup->ipg_hashbase[ + INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport, + pcbgroup->ipg_hashmask)]; + LIST_FOREACH(inp, head, inp_pcbgrouphash) { + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV6) == 0) + continue; + if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && + IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && + inp->inp_fport == fport && + inp->inp_lport == lport) { + /* + * XXX We should be able to directly return + * the inp here, without any checks. + * Well unless both bound with SO_REUSEPORT? + */ + if (prison_flag(inp->inp_cred, PR_IP6)) + goto found; + if (tmpinp == NULL) + tmpinp = inp; + } + } + if (tmpinp != NULL) { + inp = tmpinp; + goto found; + } + + /* + * Then look for a wildcard match, if requested. + */ + if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { + struct inpcb *local_wild = NULL, *local_exact = NULL; + struct inpcb *jail_wild = NULL; + int injail; + + /* + * Order of socket selection - we always prefer jails. + * 1. jailed, non-wild. + * 2. jailed, wild. + * 3. non-jailed, non-wild. + * 4. non-jailed, wild. + */ + head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport, + 0, pcbinfo->ipi_wildmask)]; + LIST_FOREACH(inp, head, inp_pcbgroup_wild) { + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV6) == 0) + continue; + + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || + inp->inp_lport != lport) { + continue; + } + + /* XXX inp locking */ + if (faith && (inp->inp_flags & INP_FAITH) == 0) + continue; + + injail = prison_flag(inp->inp_cred, PR_IP6); + if (injail) { + if (prison_check_ip6(inp->inp_cred, + laddr) != 0) + continue; + } else { + if (local_exact != NULL) + continue; + } + + if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) { + if (injail) + goto found; + else + local_exact = inp; + } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { + if (injail) + jail_wild = inp; + else + local_wild = inp; + } + } /* LIST_FOREACH */ + + inp = jail_wild; + if (inp == NULL) + inp = jail_wild; + if (inp == NULL) + inp = local_exact; + if (inp == NULL) + inp = local_wild; + if (inp != NULL) + goto found; + } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */ + INP_GROUP_UNLOCK(pcbgroup); + return (NULL); + +found: + in_pcbref(inp); + INP_GROUP_UNLOCK(pcbgroup); + if (lookupflags & INPLOOKUP_WLOCKPCB) { + INP_WLOCK(inp); + if (in_pcbrele_wlocked(inp)) + return (NULL); + } else if (lookupflags & INPLOOKUP_RLOCKPCB) { + INP_RLOCK(inp); + if (in_pcbrele_rlocked(inp)) + return (NULL); + } else + panic("%s: locking buf", __func__); + return (inp); +} +#endif /* PCBGROUP */ + /* * Lookup PCB in hash list. */ struct inpcb * -in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, - u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, int lookupflags, - struct ifnet *ifp) +in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, + u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, + int lookupflags, struct ifnet *ifp) { struct inpcbhead *head; struct inpcb *inp, *tmpinp; @@ -830,7 +979,7 @@ in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); - INP_INFO_LOCK_ASSERT(pcbinfo); + INP_HASH_LOCK_ASSERT(pcbinfo); if (faithprefix_p != NULL) faith = (*faithprefix_p)(laddr); @@ -934,6 +1083,101 @@ in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, return (NULL); } +/* + * Lookup PCB in hash list, using pcbinfo tables. This variation locks the + * hash list lock, and will return the inpcb locked (i.e., requires + * INPLOOKUP_LOCKPCB). + */ +static struct inpcb * +in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, + u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags, + struct ifnet *ifp) +{ + struct inpcb *inp; + + INP_HASH_RLOCK(pcbinfo); + inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport, + (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp); + if (inp != NULL) { + in_pcbref(inp); + INP_HASH_RUNLOCK(pcbinfo); + if (lookupflags & INPLOOKUP_WLOCKPCB) { + INP_WLOCK(inp); + if (in_pcbrele_wlocked(inp)) + return (NULL); + } else if (lookupflags & INPLOOKUP_RLOCKPCB) { + INP_RLOCK(inp); + if (in_pcbrele_rlocked(inp)) + return (NULL); + } else + panic("%s: locking bug", __func__); + } else + INP_HASH_RUNLOCK(pcbinfo); + return (inp); +} + +/* + * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf + * from which a pre-calculated hash value may be extracted. + * + * Possibly more of this logic should be in in6_pcbgroup.c. + */ +struct inpcb * +in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport, + struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp) +{ +#if defined(PCBGROUP) + struct inpcbgroup *pcbgroup; +#endif + + KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, + ("%s: invalid lookup flags %d", __func__, lookupflags)); + KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, + ("%s: LOCKPCB not set", __func__)); + +#if defined(PCBGROUP) + if (in_pcbgroup_enabled(pcbinfo)) { + pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, + fport); + return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, + laddr, lport, lookupflags, ifp)); + } +#endif + return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, + lookupflags, ifp)); +} + +struct inpcb * +in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, + u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags, + struct ifnet *ifp, struct mbuf *m) +{ +#ifdef PCBGROUP + struct inpcbgroup *pcbgroup; +#endif + + KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, + ("%s: invalid lookup flags %d", __func__, lookupflags)); + KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, + ("%s: LOCKPCB not set", __func__)); + +#ifdef PCBGROUP + if (in_pcbgroup_enabled(pcbinfo)) { + pcbgroup = in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m), + m->m_pkthdr.flowid); + if (pcbgroup != NULL) + return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, + fport, laddr, lport, lookupflags, ifp)); + pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, + fport); + return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, + laddr, lport, lookupflags, ifp)); + } +#endif + return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, + lookupflags, ifp)); +} + void init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m) { diff --git a/sys/netinet6/in6_pcb.h b/sys/netinet6/in6_pcb.h index c54a8cf..8398d54 100644 --- a/sys/netinet6/in6_pcb.h +++ b/sys/netinet6/in6_pcb.h @@ -69,10 +69,22 @@ #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) +struct inpcbgroup * + in6_pcbgroup_byhash(struct inpcbinfo *, u_int, uint32_t); +struct inpcbgroup * + in6_pcbgroup_byinpcb __P((struct inpcb *)); +struct inpcbgroup * + in6_pcbgroup_bymbuf(struct inpcbinfo *, struct mbuf *); +struct inpcbgroup * + in6_pcbgroup_bytuple __P((struct inpcbinfo *, const struct in6_addr *, + u_short, const struct in6_addr *, u_short)); + void in6_pcbpurgeif0 __P((struct inpcbinfo *, struct ifnet *)); void in6_losing __P((struct inpcb *)); int in6_pcbbind __P((struct inpcb *, struct sockaddr *, struct ucred *)); int in6_pcbconnect __P((struct inpcb *, struct sockaddr *, struct ucred *)); +int in6_pcbconnect_mbuf __P((struct inpcb *, struct sockaddr *, + struct ucred *, struct mbuf *)); void in6_pcbdisconnect __P((struct inpcb *)); int in6_pcbladdr(struct inpcb *, struct sockaddr *, struct in6_addr *); struct inpcb * @@ -80,9 +92,17 @@ struct inpcb * struct in6_addr *, u_short, int, struct ucred *)); struct inpcb * - in6_pcblookup_hash __P((struct inpcbinfo *, - struct in6_addr *, u_int, struct in6_addr *, - u_int, int, struct ifnet *)); + in6_pcblookup __P((struct inpcbinfo *, struct in6_addr *, + u_int, struct in6_addr *, u_int, int, + struct ifnet *)); +struct inpcb * + in6_pcblookup_hash_locked __P((struct inpcbinfo *, struct in6_addr *, + u_int, struct in6_addr *, u_int, int, + struct ifnet *)); +struct inpcb * + in6_pcblookup_mbuf __P((struct inpcbinfo *, struct in6_addr *, + u_int, struct in6_addr *, u_int, int, + struct ifnet *ifp, struct mbuf *)); void in6_pcbnotify __P((struct inpcbinfo *, struct sockaddr *, u_int, const struct sockaddr *, u_int, int, void *, struct inpcb *(*)(struct inpcb *, int))); diff --git a/sys/netinet6/in6_pcbgroup.c b/sys/netinet6/in6_pcbgroup.c new file mode 100644 index 0000000..850d7f4 --- /dev/null +++ b/sys/netinet6/in6_pcbgroup.c @@ -0,0 +1,103 @@ +/*- + * Copyright (c) 2010-2011 Juniper Networks, Inc. + * All rights reserved. + * + * This software was developed by Robert N. M. Watson under contract + * to Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +__FBSDID("$FreeBSD$"); + +#include "opt_inet6.h" + +#include <sys/param.h> +#include <sys/mbuf.h> + +#include <netinet/in.h> +#include <netinet/in_pcb.h> +#ifdef INET6 +#include <netinet6/in6_pcb.h> +#endif /* INET6 */ + +/* + * Given a hash of whatever the covered tuple might be, return a pcbgroup + * index. + */ +static __inline u_int +in6_pcbgroup_getbucket(struct inpcbinfo *pcbinfo, uint32_t hash) +{ + + return (hash % pcbinfo->ipi_npcbgroups); +} + +/* + * Map a (hashtype, hash) tuple into a connection group, or NULL if the hash + * information is insufficient to identify the pcbgroup. + */ +struct inpcbgroup * +in6_pcbgroup_byhash(struct inpcbinfo *pcbinfo, u_int hashtype, uint32_t hash) +{ + + return (NULL); +} + +struct inpcbgroup * +in6_pcbgroup_bymbuf(struct inpcbinfo *pcbinfo, struct mbuf *m) +{ + + return (in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m), + m->m_pkthdr.flowid)); +} + +struct inpcbgroup * +in6_pcbgroup_bytuple(struct inpcbinfo *pcbinfo, const struct in6_addr *laddrp, + u_short lport, const struct in6_addr *faddrp, u_short fport) +{ + uint32_t hash; + + switch (pcbinfo->ipi_hashfields) { + case IPI_HASHFIELDS_4TUPLE: + hash = faddrp->s6_addr32[3] ^ fport; + break; + + case IPI_HASHFIELDS_2TUPLE: + hash = faddrp->s6_addr32[3] ^ laddrp->s6_addr32[3]; + break; + + default: + hash = 0; + } + return (&pcbinfo->ipi_pcbgroups[in6_pcbgroup_getbucket(pcbinfo, + hash)]); +} + +struct inpcbgroup * +in6_pcbgroup_byinpcb(struct inpcb *inp) +{ + + return (in6_pcbgroup_bytuple(inp->inp_pcbinfo, &inp->in6p_laddr, + inp->inp_lport, &inp->in6p_faddr, inp->inp_fport)); +} diff --git a/sys/netinet6/in6_proto.c b/sys/netinet6/in6_proto.c index acd1569..9e78e9a 100644 --- a/sys/netinet6/in6_proto.c +++ b/sys/netinet6/in6_proto.c @@ -133,6 +133,7 @@ __FBSDID("$FreeBSD$"); /* * TCP/IP protocol family: IP6, ICMP6, UDP, TCP. */ +FEATURE(inet6, "Internet Protocol version 6"); extern struct domain inet6domain; static struct pr_usrreqs nousrreqs; @@ -408,6 +409,8 @@ VNET_DEFINE(int, ip6_sendredirects) = IPV6_SENDREDIRECTS; VNET_DEFINE(int, ip6_defhlim) = IPV6_DEFHLIM; VNET_DEFINE(int, ip6_defmcasthlim) = IPV6_DEFAULT_MULTICAST_HOPS; VNET_DEFINE(int, ip6_accept_rtadv) = 0; +VNET_DEFINE(int, ip6_no_radr) = 0; +VNET_DEFINE(int, ip6_norbit_raif) = 0; VNET_DEFINE(int, ip6_maxfragpackets); /* initialized in frag6.c:frag6_init() */ VNET_DEFINE(int, ip6_maxfrags); /* initialized in frag6.c:frag6_init() */ VNET_DEFINE(int, ip6_log_interval) = 5; @@ -536,6 +539,15 @@ SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv, CTLFLAG_RW, &VNET_NAME(ip6_accept_rtadv), 0, "Default value of per-interface flag for accepting ICMPv6 Router" "Advertisement messages"); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_NO_RADR, no_radr, + CTLFLAG_RW, &VNET_NAME(ip6_no_radr), 0, + "Default value of per-interface flag to control whether routers " + "sending ICMPv6 RA messages on that interface are added into the " + "default router list."); +SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_NORBIT_RAIF, norbit_raif, CTLFLAG_RW, + &VNET_NAME(ip6_norbit_raif), 0, + "Always set 0 to R flag in ICMPv6 NA messages when accepting RA" + " on the interface."); SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_KEEPFAITH, keepfaith, CTLFLAG_RW, &VNET_NAME(ip6_keepfaith), 0, ""); SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_LOG_INTERVAL, log_interval, diff --git a/sys/netinet6/in6_src.c b/sys/netinet6/in6_src.c index 5202e09..c802bfc 100644 --- a/sys/netinet6/in6_src.c +++ b/sys/netinet6/in6_src.c @@ -856,8 +856,8 @@ in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred) struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; #endif - INP_INFO_WLOCK_ASSERT(pcbinfo); INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(pcbinfo); error = prison_local_ip6(cred, laddr, ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)); diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 052b8f6..de3a622 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -504,6 +504,13 @@ ip6_input(struct mbuf *m) goto bad; } #endif +#ifdef IPSEC + /* + * Bypass packet filtering for packets previously handled by IPsec. + */ + if (ip6_ipsec_filtertunnel(m)) + goto passin; +#endif /* IPSEC */ /* * Run through list of hooks for input packets. diff --git a/sys/netinet6/ip6_ipsec.c b/sys/netinet6/ip6_ipsec.c index 8731e12..bbbc9c9 100644 --- a/sys/netinet6/ip6_ipsec.c +++ b/sys/netinet6/ip6_ipsec.c @@ -97,7 +97,7 @@ SYSCTL_VNET_INT(_net_inet6_ipsec6, OID_AUTO, /* * Check if we have to jump over firewall processing for this packet. - * Called from ip_input(). + * Called from ip6_input(). * 1 = jump over firewall, 0 = packet goes through firewall. */ int @@ -106,7 +106,7 @@ ip6_ipsec_filtertunnel(struct mbuf *m) #if defined(IPSEC) /* - * Bypass packet filtering for packets from a tunnel. + * Bypass packet filtering for packets previously handled by IPsec. */ if (!V_ip6_ipsec6_filtertunnel && m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) @@ -118,7 +118,7 @@ ip6_ipsec_filtertunnel(struct mbuf *m) /* * Check if this packet has an active SA and needs to be dropped instead * of forwarded. - * Called from ip_input(). + * Called from ip6_input(). * 1 = drop packet, 0 = forward packet. */ int @@ -141,7 +141,7 @@ ip6_ipsec_fwd(struct mbuf *m) if (sp == NULL) { /* NB: can happen if error */ splx(s); /*XXX error stat???*/ - DPRINTF(("ip_input: no SP for forwarding\n")); /*XXX*/ + DPRINTF(("%s: no SP for forwarding\n", __func__)); /*XXX*/ return 1; } @@ -163,7 +163,7 @@ ip6_ipsec_fwd(struct mbuf *m) * Check if protocol type doesn't have a further header and do IPSEC * decryption or reject right now. Protocols with further headers get * their IPSEC treatment within the protocol specific processing. - * Called from ip_input(). + * Called from ip6_input(). * 1 = drop packet, 0 = continue processing packet. */ int @@ -206,7 +206,7 @@ ip6_ipsec_input(struct mbuf *m, int nxt) } else { /* XXX error stat??? */ error = EINVAL; - DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/ + DPRINTF(("%s: no SP, packet discarded\n", __func__));/*XXX*/ return 1; } splx(s); diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h index 4e44d9f..dbfba9a 100644 --- a/sys/netinet6/ip6_var.h +++ b/sys/netinet6/ip6_var.h @@ -316,6 +316,9 @@ VNET_DECLARE(int, ip6_maxfragpackets); /* Maximum packets in reassembly VNET_DECLARE(int, ip6_maxfrags); /* Maximum fragments in reassembly * queue */ VNET_DECLARE(int, ip6_accept_rtadv); /* Acts as a host not a router */ +VNET_DECLARE(int, ip6_no_radr); /* No defroute from RA */ +VNET_DECLARE(int, ip6_norbit_raif); /* Disable R-bit in NA on RA + * receiving IF. */ VNET_DECLARE(int, ip6_keepfaith); /* Firewall Aided Internet Translator */ VNET_DECLARE(int, ip6_log_interval); VNET_DECLARE(time_t, ip6_log_time); @@ -327,6 +330,8 @@ VNET_DECLARE(int, ip6_dad_count); /* DupAddrDetectionTransmits */ #define V_ip6_maxfragpackets VNET(ip6_maxfragpackets) #define V_ip6_maxfrags VNET(ip6_maxfrags) #define V_ip6_accept_rtadv VNET(ip6_accept_rtadv) +#define V_ip6_no_radr VNET(ip6_no_radr) +#define V_ip6_norbit_raif VNET(ip6_norbit_raif) #define V_ip6_keepfaith VNET(ip6_keepfaith) #define V_ip6_log_interval VNET(ip6_log_interval) #define V_ip6_log_time VNET(ip6_log_time) diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index f1e48ea..2b51e43 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -193,6 +193,8 @@ nd6_ifattach(struct ifnet *ifp) /* A loopback interface does not need to accept RTADV. */ if (V_ip6_accept_rtadv && !(ifp->if_flags & IFF_LOOPBACK)) nd->flags |= ND6_IFF_ACCEPT_RTADV; + if (V_ip6_no_radr && !(ifp->if_flags & IFF_LOOPBACK)) + nd->flags |= ND6_IFF_NO_RADR; /* XXX: we cannot call nd6_setmtu since ifp is not fully initialized */ nd6_setmtu0(ifp, nd); @@ -825,7 +827,7 @@ nd6_purge(struct ifnet *ifp) if (V_nd6_defifindex == ifp->if_index) nd6_setdefaultiface(0); - if (!V_ip6_forwarding && ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { + if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { /* Refresh default router list. */ defrouter_select(); } @@ -958,10 +960,9 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) /* * If the default router list is empty, all addresses are regarded * as on-link, and thus, as a neighbor. - * XXX: we restrict the condition to hosts, because routers usually do - * not have the "default router list". */ - if (!V_ip6_forwarding && TAILQ_FIRST(&V_nd_defrouter) == NULL && + if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV && + TAILQ_FIRST(&V_nd_defrouter) == NULL && V_nd6_defifindex == ifp->if_index) { return (1); } @@ -1022,8 +1023,7 @@ nd6_free(struct llentry *ln, int gc) ifp = ln->lle_tbl->llt_ifp; - if (!V_ip6_forwarding) { - + if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ifp); if (dr != NULL && dr->expire && @@ -1322,6 +1322,16 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) struct ifaddr *ifa; struct in6_ifaddr *ia; + /* + * Try to clear ifdisabled flag when enabling + * accept_rtadv or auto_linklocal. + */ + if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) && + !(ND.flags & ND6_IFF_IFDISABLED) && + (ND.flags & (ND6_IFF_ACCEPT_RTADV | + ND6_IFF_AUTO_LINKLOCAL))) + ND.flags &= ~ND6_IFF_IFDISABLED; + if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) && !(ND.flags & ND6_IFF_IFDISABLED)) { /* ifdisabled 1->0 transision */ @@ -1340,7 +1350,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) continue; ia = (struct in6_ifaddr *)ifa; if ((ia->ia6_flags & IN6_IFF_DUPLICATED) && - IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) { + IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) { duplicated_linklocal = 1; break; } @@ -1379,6 +1389,28 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) /* If no link-local address on ifp, configure */ ND_IFINFO(ifp)->flags |= ND6_IFF_AUTO_LINKLOCAL; in6_ifattach(ifp, NULL); + } else if ((ND_IFINFO(ifp)->flags & ND6_IFF_AUTO_LINKLOCAL) && + !(ND.flags & ND6_IFF_IFDISABLED)) { + /* + * When the IF already has + * ND6_IFF_AUTO_LINKLOCAL and no link-local + * address is assigned, try to assign one. + */ + int haslinklocal = 0; + + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + ia = (struct in6_ifaddr *)ifa; + if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) { + haslinklocal = 1; + break; + } + } + IF_ADDR_UNLOCK(ifp); + if (!haslinklocal) + in6_ifattach(ifp, NULL); } } ND_IFINFO(ifp)->flags = ND.flags; @@ -1718,7 +1750,7 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, * for those are not autoconfigured hosts, we explicitly avoid such * cases for safety. */ - if (do_update && router && !V_ip6_forwarding && + if (do_update && router && ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { /* * guaranteed recursion diff --git a/sys/netinet6/nd6.h b/sys/netinet6/nd6.h index abcfcb7..6f63192 100644 --- a/sys/netinet6/nd6.h +++ b/sys/netinet6/nd6.h @@ -85,6 +85,7 @@ struct nd_ifinfo { */ #define ND6_IFF_DONT_SET_IFROUTE 0x10 #define ND6_IFF_AUTO_LINKLOCAL 0x20 +#define ND6_IFF_NO_RADR 0x40 #define ND6_CREATE LLE_CREATE #define ND6_EXCLUSIVE LLE_EXCLUSIVE diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index fd5bcf2..fb8e379 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -112,10 +112,14 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) int lladdrlen = 0; int anycast = 0, proxy = 0, tentative = 0; int tlladdr; + int rflag; union nd_opts ndopts; struct sockaddr_dl proxydl; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; + rflag = (V_ip6_forwarding) ? ND_NA_FLAG_ROUTER : 0; + if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV && V_ip6_norbit_raif) + rflag = 0; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_ns = (struct nd_neighbor_solicit *)((caddr_t)ip6 + off); @@ -339,8 +343,7 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) goto bad; nd6_na_output(ifp, &in6_all, &taddr6, ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) | - (V_ip6_forwarding ? ND_NA_FLAG_ROUTER : 0), - tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL); + rflag, tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL); goto freeit; } @@ -349,8 +352,8 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) nd6_na_output(ifp, &saddr6, &taddr6, ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) | - (V_ip6_forwarding ? ND_NA_FLAG_ROUTER : 0) | ND_NA_FLAG_SOLICITED, - tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL); + rflag | ND_NA_FLAG_SOLICITED, tlladdr, + proxy ? (struct sockaddr *)&proxydl : NULL); freeit: if (ifa != NULL) ifa_free(ifa); @@ -862,7 +865,8 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) dr = defrouter_lookup(in6, ln->lle_tbl->llt_ifp); if (dr) defrtrlist_del(dr); - else if (!V_ip6_forwarding) { + else if (ND_IFINFO(ln->lle_tbl->llt_ifp)->flags & + ND6_IFF_ACCEPT_RTADV) { /* * Even if the neighbor is not in the default * router list, the neighbor may be used diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c index 19ec989..e791e2e 100644 --- a/sys/netinet6/nd6_rtr.c +++ b/sys/netinet6/nd6_rtr.c @@ -127,8 +127,11 @@ nd6_rs_input(struct mbuf *m, int off, int icmp6len) union nd_opts ndopts; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; - /* If I'm not a router, ignore it. */ - if (!V_ip6_forwarding) + /* + * Accept RS only when V_ip6_forwarding=1 and the interface has + * no ND6_IFF_ACCEPT_RTADV. + */ + if (!V_ip6_forwarding || ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) goto freeit; /* Sanity checks */ @@ -213,11 +216,10 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len) char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; /* - * We only accept RAs only when - * the node is not a router and - * per-interface variable allows RAs on the receiving interface. + * We only accept RAs only when the per-interface flag + * ND6_IFF_ACCEPT_RTADV is on the receiving interface. */ - if (V_ip6_forwarding || !(ndi->flags & ND6_IFF_ACCEPT_RTADV)) + if (!(ndi->flags & ND6_IFF_ACCEPT_RTADV)) goto freeit; if (ip6->ip6_hlim != 255) { @@ -266,7 +268,15 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len) bzero(&dr0, sizeof(dr0)); dr0.rtaddr = saddr6; dr0.flags = nd_ra->nd_ra_flags_reserved; - dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime); + /* + * Effectively-disable the route in the RA packet + * when ND6_IFF_NO_RADR on the receiving interface or + * ip6.forwarding=1. + */ + if (ndi->flags & ND6_IFF_NO_RADR || V_ip6_forwarding) + dr0.rtlifetime = 0; + else + dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime); dr0.expire = time_second + dr0.rtlifetime; dr0.ifp = ifp; /* unspecified or not? (RFC 2461 6.3.4) */ @@ -557,7 +567,7 @@ defrtrlist_del(struct nd_defrouter *dr) * Flush all the routing table entries that use the router * as a next hop. */ - if (!V_ip6_forwarding) + if (ND_IFINFO(dr->ifp)->flags & ND6_IFF_ACCEPT_RTADV) rt6_flush(&dr->rtaddr, dr->ifp); if (dr->installed) { @@ -616,20 +626,6 @@ defrouter_select(void) struct llentry *ln = NULL; /* - * This function should be called only when acting as an autoconfigured - * host. Although the remaining part of this function is not effective - * if the node is not an autoconfigured host, we explicitly exclude - * such cases here for safety. - */ - if (V_ip6_forwarding) { - nd6log((LOG_WARNING, - "defrouter_select: called unexpectedly (forwarding=%d)\n", - V_ip6_forwarding)); - splx(s); - return; - } - - /* * Let's handle easy case (3) first: * If default router list is empty, there's nothing to be done. */ diff --git a/sys/netinet6/send.h b/sys/netinet6/send.h index 36ba571..9795d14 100644 --- a/sys/netinet6/send.h +++ b/sys/netinet6/send.h @@ -33,7 +33,7 @@ #define SND_IN 1 /* Incoming traffic. */ struct sockaddr_send { - unsigned char send_len; /* total length */ + uint8_t send_len; /* total length */ sa_family_t send_family; /* address family */ int send_direction; int send_ifidx; diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c index d574c19..6723007 100644 --- a/sys/netinet6/udp6_usrreq.c +++ b/sys/netinet6/udp6_usrreq.c @@ -1,7 +1,11 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * Copyright (c) 2010-2011 Juniper Networks, Inc. * All rights reserved. * + * Portions of this software were developed by Robert N. M. Watson under + * contract to Juniper Networks, Inc. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -231,11 +235,11 @@ udp6_input(struct mbuf **mp, int *offp, int proto) init_sin6(&fromsa, m); fromsa.sin6_port = uh->uh_sport; - INP_INFO_RLOCK(&V_udbinfo); if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { struct inpcb *last; struct ip6_moptions *imo; + INP_INFO_RLOCK(&V_udbinfo); /* * In the event that laddr should be set to the link-local * address (this happens in RIPng), the multicast address @@ -273,6 +277,13 @@ udp6_input(struct mbuf **mp, int *offp, int proto) } /* + * XXXRW: Because we weren't holding either the inpcb + * or the hash lock when we checked for a match + * before, we should probably recheck now that the + * inpcb lock is (supposed to be) held. + */ + + /* * Handle socket delivery policy for any-source * and source-specific multicast. [RFC3678] */ @@ -366,8 +377,9 @@ udp6_input(struct mbuf **mp, int *offp, int proto) /* * Locate pcb for datagram. */ - inp = in6_pcblookup_hash(&V_udbinfo, &ip6->ip6_src, uh->uh_sport, - &ip6->ip6_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif); + inp = in6_pcblookup_mbuf(&V_udbinfo, &ip6->ip6_src, uh->uh_sport, + &ip6->ip6_dst, uh->uh_dport, INPLOOKUP_WILDCARD | + INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m); if (inp == NULL) { if (udp_log_in_vain) { char ip6bufs[INET6_ADDRSTRLEN]; @@ -384,9 +396,8 @@ udp6_input(struct mbuf **mp, int *offp, int proto) if (m->m_flags & M_MCAST) { printf("UDP6: M_MCAST is set in a unicast packet.\n"); UDPSTAT_INC(udps_noportmcast); - goto badheadlocked; + goto badunlocked; } - INP_INFO_RUNLOCK(&V_udbinfo); if (V_udp_blackhole) goto badunlocked; if (badport_bandlim(BANDLIM_ICMP6_UNREACH) < 0) @@ -394,8 +405,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0); return (IPPROTO_DONE); } - INP_RLOCK(inp); - INP_INFO_RUNLOCK(&V_udbinfo); + INP_RLOCK_ASSERT(inp); up = intoudpcb(inp); if (up->u_tun_func == NULL) { udp6_append(inp, m, off, &fromsa); @@ -505,13 +515,11 @@ udp6_getcred(SYSCTL_HANDLER_ARGS) (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) { return (error); } - INP_INFO_RLOCK(&V_udbinfo); - inp = in6_pcblookup_hash(&V_udbinfo, &addrs[1].sin6_addr, - addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, 1, - NULL); + inp = in6_pcblookup(&V_udbinfo, &addrs[1].sin6_addr, + addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, + INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { - INP_RLOCK(inp); - INP_INFO_RUNLOCK(&V_udbinfo); + INP_RLOCK_ASSERT(inp); if (inp->inp_socket == NULL) error = ENOENT; if (error == 0) @@ -520,10 +528,8 @@ udp6_getcred(SYSCTL_HANDLER_ARGS) if (error == 0) cru2x(inp->inp_cred, &xuc); INP_RUNLOCK(inp); - } else { - INP_INFO_RUNLOCK(&V_udbinfo); + } else error = ENOENT; - } if (error == 0) error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); return (error); @@ -552,6 +558,7 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, struct sockaddr_in6 tmp; INP_WLOCK_ASSERT(inp); + INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); if (addr6) { /* addr6 has been validated in udp6_send(). */ @@ -772,15 +779,15 @@ udp6_abort(struct socket *so) } #endif - INP_INFO_WLOCK(&V_udbinfo); INP_WLOCK(inp); if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { + INP_HASH_WLOCK(&V_udbinfo); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; + INP_HASH_WUNLOCK(&V_udbinfo); soisdisconnected(so); } INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_udbinfo); } static int @@ -838,8 +845,8 @@ udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td) inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_bind: inp == NULL")); - INP_INFO_WLOCK(&V_udbinfo); INP_WLOCK(inp); + INP_HASH_WLOCK(&V_udbinfo); inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { @@ -867,8 +874,8 @@ udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td) #ifdef INET out: #endif + INP_HASH_WUNLOCK(&V_udbinfo); INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_udbinfo); return (error); } @@ -889,15 +896,15 @@ udp6_close(struct socket *so) return; } #endif - INP_INFO_WLOCK(&V_udbinfo); INP_WLOCK(inp); if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { + INP_HASH_WLOCK(&V_udbinfo); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; + INP_HASH_WUNLOCK(&V_udbinfo); soisdisconnected(so); } INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_udbinfo); } static int @@ -911,7 +918,9 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) sin6 = (struct sockaddr_in6 *)nam; KASSERT(inp != NULL, ("udp6_connect: inp == NULL")); - INP_INFO_WLOCK(&V_udbinfo); + /* + * XXXRW: Need to clarify locking of v4/v6 flags. + */ INP_WLOCK(inp); #ifdef INET if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { @@ -931,8 +940,10 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) error = prison_remote_ip4(td->td_ucred, &sin.sin_addr); if (error != 0) goto out; + INP_HASH_WLOCK(&V_udbinfo); error = in_pcbconnect(inp, (struct sockaddr *)&sin, td->td_ucred); + INP_HASH_WUNLOCK(&V_udbinfo); if (error == 0) soisconnected(so); goto out; @@ -947,12 +958,13 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr); if (error != 0) goto out; + INP_HASH_WLOCK(&V_udbinfo); error = in6_pcbconnect(inp, nam, td->td_ucred); + INP_HASH_WUNLOCK(&V_udbinfo); if (error == 0) soisconnected(so); out: INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_udbinfo); return (error); } @@ -984,32 +996,32 @@ udp6_disconnect(struct socket *so) inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL")); - INP_INFO_WLOCK(&V_udbinfo); - INP_WLOCK(inp); - #ifdef INET if (inp->inp_vflag & INP_IPV4) { struct pr_usrreqs *pru; pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs; - error = (*pru->pru_disconnect)(so); - goto out; + (void)(*pru->pru_disconnect)(so); + return (0); } #endif + INP_WLOCK(inp); + if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { error = ENOTCONN; goto out; } + INP_HASH_WLOCK(&V_udbinfo); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; + INP_HASH_WUNLOCK(&V_udbinfo); SOCK_LOCK(so); so->so_state &= ~SS_ISCONNECTED; /* XXX */ SOCK_UNLOCK(so); out: INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_udbinfo); return (0); } @@ -1023,7 +1035,6 @@ udp6_send(struct socket *so, int flags, struct mbuf *m, inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_send: inp == NULL")); - INP_INFO_WLOCK(&V_udbinfo); INP_WLOCK(inp); if (addr) { if (addr->sa_len != sizeof(struct sockaddr_in6)) { @@ -1060,7 +1071,6 @@ udp6_send(struct socket *so, int flags, struct mbuf *m, * select the UDPv4 output routine are invalidated? */ INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_udbinfo); if (sin6) in6_sin6_2_sin_in_sock(addr); pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs; @@ -1073,16 +1083,16 @@ udp6_send(struct socket *so, int flags, struct mbuf *m, #ifdef MAC mac_inpcb_create_mbuf(inp, m); #endif + INP_HASH_WLOCK(&V_udbinfo); error = udp6_output(inp, m, addr, control, td); + INP_HASH_WUNLOCK(&V_udbinfo); #ifdef INET #endif INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_udbinfo); return (error); bad: INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_udbinfo); m_freem(m); return (error); } diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c index 9a777c8..da8a2ec 100644 --- a/sys/nfsclient/nfs_bio.c +++ b/sys/nfsclient/nfs_bio.c @@ -300,7 +300,7 @@ nfs_putpages(struct vop_putpages_args *ap) } for (i = 0; i < npages; i++) - rtvals[i] = VM_PAGER_AGAIN; + rtvals[i] = VM_PAGER_ERROR; /* * When putting pages, do not extend file past EOF. @@ -344,11 +344,7 @@ nfs_putpages(struct vop_putpages_args *ap) relpbuf(bp, &nfs_pbuf_freecnt); if (!error) { - int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE; - for (i = 0; i < nwritten; i++) { - rtvals[i] = VM_PAGER_OK; - vm_page_undirty(pages[i]); - } + vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid); if (must_commit) { nfs_clearcommit(vp->v_mount); } diff --git a/sys/nfsclient/nfs_krpc.c b/sys/nfsclient/nfs_krpc.c index 242d425..171f7aa 100644 --- a/sys/nfsclient/nfs_krpc.c +++ b/sys/nfsclient/nfs_krpc.c @@ -306,9 +306,7 @@ nfs_disconnect(struct nfsmount *nmp) client = nmp->nm_client; nmp->nm_client = NULL; mtx_unlock(&nmp->nm_mtx); -#ifdef KGSSAPI - rpc_gss_secpurge(client); -#endif + rpc_gss_secpurge_call(client); CLNT_CLOSE(client); CLNT_RELEASE(client); } else @@ -325,18 +323,15 @@ nfs_safedisconnect(struct nfsmount *nmp) static AUTH * nfs_getauth(struct nfsmount *nmp, struct ucred *cred) { -#ifdef KGSSAPI rpc_gss_service_t svc; AUTH *auth; -#endif switch (nmp->nm_secflavor) { -#ifdef KGSSAPI case RPCSEC_GSS_KRB5: case RPCSEC_GSS_KRB5I: case RPCSEC_GSS_KRB5P: if (!nmp->nm_mech_oid) - if (!rpc_gss_mech_to_oid("kerberosv5", + if (!rpc_gss_mech_to_oid_call("kerberosv5", &nmp->nm_mech_oid)) return (NULL); if (nmp->nm_secflavor == RPCSEC_GSS_KRB5) @@ -345,12 +340,11 @@ nfs_getauth(struct nfsmount *nmp, struct ucred *cred) svc = rpc_gss_svc_integrity; else svc = rpc_gss_svc_privacy; - auth = rpc_gss_secfind(nmp->nm_client, cred, + auth = rpc_gss_secfind_call(nmp->nm_client, cred, nmp->nm_principal, nmp->nm_mech_oid, svc); if (auth) return (auth); /* fallthrough */ -#endif case AUTH_SYS: default: return (authunix_create(cred)); diff --git a/sys/nfsclient/nfs_vfsops.c b/sys/nfsclient/nfs_vfsops.c index 79659d0..8498ee4 100644 --- a/sys/nfsclient/nfs_vfsops.c +++ b/sys/nfsclient/nfs_vfsops.c @@ -1408,10 +1408,20 @@ nfs_sync(struct mount *mp, int waitfor) td = curthread; + MNT_ILOCK(mp); + /* + * If a forced dismount is in progress, return from here so that + * the umount(2) syscall doesn't get stuck in VFS_SYNC() before + * calling VFS_UNMOUNT(). + */ + if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { + MNT_IUNLOCK(mp); + return (EBADF); + } + /* * Force stale buffer cache information to be flushed. */ - MNT_ILOCK(mp); loop: MNT_VNODE_FOREACH(vp, mp, mvp) { VI_LOCK(vp); diff --git a/sys/nfsserver/nfs_srvkrpc.c b/sys/nfsserver/nfs_srvkrpc.c index 512373b..3c60825 100644 --- a/sys/nfsserver/nfs_srvkrpc.c +++ b/sys/nfsserver/nfs_srvkrpc.c @@ -418,12 +418,9 @@ nfssvc_addsock(struct file *fp, struct thread *td) static int nfssvc_nfsd(struct thread *td, struct nfsd_nfsd_args *args) { -#ifdef KGSSAPI char principal[128]; int error; -#endif -#ifdef KGSSAPI if (args) { error = copyinstr(args->principal, principal, sizeof(principal), NULL); @@ -434,7 +431,6 @@ nfssvc_nfsd(struct thread *td, struct nfsd_nfsd_args *args) getcredhostname(td->td_ucred, principal + 4, sizeof(principal) - 4); } -#endif /* * Only the first nfsd actually does any work. The RPC code @@ -449,12 +445,10 @@ nfssvc_nfsd(struct thread *td, struct nfsd_nfsd_args *args) NFSD_UNLOCK(); -#ifdef KGSSAPI - rpc_gss_set_svc_name(principal, "kerberosv5", + rpc_gss_set_svc_name_call(principal, "kerberosv5", GSS_C_INDEFINITE, NFS_PROG, NFS_VER2); - rpc_gss_set_svc_name(principal, "kerberosv5", + rpc_gss_set_svc_name_call(principal, "kerberosv5", GSS_C_INDEFINITE, NFS_PROG, NFS_VER3); -#endif if (args) { nfsrv_pool->sp_minthreads = args->minthreads; @@ -466,10 +460,8 @@ nfssvc_nfsd(struct thread *td, struct nfsd_nfsd_args *args) svc_run(nfsrv_pool); -#ifdef KGSSAPI - rpc_gss_clear_svc_name(NFS_PROG, NFS_VER2); - rpc_gss_clear_svc_name(NFS_PROG, NFS_VER3); -#endif + rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER2); + rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER3); NFSD_LOCK(); nfsrv_numnfsd--; diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 2d0fd61..4aaf216 100644 --- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -554,8 +554,8 @@ void ipoib_cm_handle_rx_wc(struct ipoib_dev_priv *priv, struct ib_wc *wc) ipoib_dma_mb(priv, mb, wc->byte_len); - ++dev->if_opackets; - dev->if_obytes += mb->m_pkthdr.len; + ++dev->if_ipackets; + dev->if_ibytes += mb->m_pkthdr.len; mb->m_pkthdr.rcvif = dev; proto = *mtod(mb, uint16_t *); diff --git a/sys/ofed/include/linux/list.h b/sys/ofed/include/linux/list.h index f6f9404..61b42d2 100644 --- a/sys/ofed/include/linux/list.h +++ b/sys/ofed/include/linux/list.h @@ -38,6 +38,7 @@ #include <sys/param.h> #include <sys/kernel.h> #include <sys/queue.h> +#include <sys/cpuset.h> #include <sys/lock.h> #include <sys/mutex.h> #include <sys/proc.h> diff --git a/sys/pc98/conf/GENERIC b/sys/pc98/conf/GENERIC index 0fac1a9..e2bed44 100644 --- a/sys/pc98/conf/GENERIC +++ b/sys/pc98/conf/GENERIC @@ -139,12 +139,6 @@ device sc # Add suspend/resume support for the i8254. #device pmtimer -# Audio support -#device sound # Generic sound driver -#device snd_mss # Microsoft Sound System -#device "snd_sb16" # Sound Blaster 16 -#device snd_sbc # Sound Blaster - # PCCARD (PCMCIA) support # PCMCIA and cardbus bridge support device cbb # cardbus (yenta) bridge @@ -288,3 +282,10 @@ device bpf # Berkeley packet filter #device firewire # FireWire bus code #device sbp # SCSI over FireWire (Requires scbus and da) #device fwe # Ethernet over FireWire (non-standard!) + +# Sound support +#device sound # Generic sound driver (required) +#device snd_mss # Microsoft Sound System +#device "snd_sb16" # Sound Blaster 16 +#device snd_sbc # Sound Blaster +#device snd_uaudio # USB Audio diff --git a/sys/pc98/pc98/machdep.c b/sys/pc98/pc98/machdep.c index 853ac69..8bcb618 100644 --- a/sys/pc98/pc98/machdep.c +++ b/sys/pc98/pc98/machdep.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" +#include "opt_mp_watchdog.h" #include "opt_npx.h" #include "opt_perfmon.h" @@ -115,6 +116,7 @@ __FBSDID("$FreeBSD$"); #include <machine/intr_machdep.h> #include <x86/mca.h> #include <machine/md_var.h> +#include <machine/mp_watchdog.h> #include <machine/pc/bios.h> #include <machine/pcb.h> #include <machine/pcb_ext.h> @@ -1193,9 +1195,8 @@ cpu_idle(int busy) CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu); -#ifdef SMP - if (mp_grab_cpu_hlt()) - return; +#ifdef MP_WATCHDOG + ap_watchdog(PCPU_GET(cpuid)); #endif /* If we are busy - try to use fast methods. */ if (busy) { diff --git a/sys/powerpc/aim/interrupt.c b/sys/powerpc/aim/interrupt.c index b06fb92..d1e3655 100644 --- a/sys/powerpc/aim/interrupt.c +++ b/sys/powerpc/aim/interrupt.c @@ -100,10 +100,8 @@ powerpc_interrupt(struct trapframe *framep) default: /* Re-enable interrupts if applicable. */ ee = framep->srr1 & PSL_EE; - if (ee != 0) { + if (ee != 0) mtmsr(mfmsr() | ee); - isync(); - } trap(framep); } } diff --git a/sys/powerpc/aim/locore32.S b/sys/powerpc/aim/locore32.S index 64bf81e..35ea99b 100644 --- a/sys/powerpc/aim/locore32.S +++ b/sys/powerpc/aim/locore32.S @@ -87,9 +87,6 @@ GLOBAL(tmpstk) GLOBAL(esym) .long 0 /* end of symbol table */ -GLOBAL(ofmsr) - .long 0, 0, 0, 0, 0 /* msr/sprg0-3 used in Open Firmware */ - #define INTRCNT_COUNT 256 /* max(HROWPIC_IRQMAX,OPENPIC_IRQMAX) */ GLOBAL(intrnames) .space INTRCNT_COUNT * (MAXCOMLEN + 1) * 2 @@ -99,16 +96,6 @@ GLOBAL(intrcnt) .space INTRCNT_COUNT * 4 * 2 GLOBAL(eintrcnt) -/* - * File-scope for locore.S - */ -idle_u: - .long 0 /* fake uarea during idle after exit */ -openfirmware_entry: - .long 0 /* Open Firmware entry point */ -srsave: - .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 - .text .globl btext btext: diff --git a/sys/powerpc/aim/locore64.S b/sys/powerpc/aim/locore64.S index acdc8a1..c200b4c 100644 --- a/sys/powerpc/aim/locore64.S +++ b/sys/powerpc/aim/locore64.S @@ -75,8 +75,7 @@ .globl kernbase .set kernbase, KERNBASE -#define TMPSTKSZ 8192 /* 8K temporary stack */ -#define OFWSTKSZ 4096 /* 4K Open Firmware stack */ +#define TMPSTKSZ 16384 /* 16K temporary stack */ /* * Globals @@ -85,14 +84,9 @@ .align 4 GLOBAL(tmpstk) .space TMPSTKSZ -GLOBAL(ofwstk) - .space OFWSTKSZ GLOBAL(esym) .llong 0 /* end of symbol table */ -GLOBAL(ofmsr) - .llong 0, 0, 0, 0, 0 /* msr/sprg0-3 used in Open Firmware */ - #define INTRCNT_COUNT 256 /* max(HROWPIC_IRQMAX,OPENPIC_IRQMAX) */ GLOBAL(intrnames) .space INTRCNT_COUNT * (MAXCOMLEN + 1) * 2 @@ -102,16 +96,6 @@ GLOBAL(intrcnt) .space INTRCNT_COUNT * 4 * 2 GLOBAL(eintrcnt) -/* - * File-scope for locore.S - */ -idle_u: - .llong 0 /* fake uarea during idle after exit */ -openfirmware_entry: - .llong 0 /* Open Firmware entry point */ -srsave: - .llong 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 - .text .globl btext btext: @@ -208,122 +192,6 @@ tocbase: .llong .TOC.@tocbase /* - * Open Firmware Real-mode Entry Point. This is a huge pain. - */ - -ASENTRY(ofw_32bit_mode_entry) - mflr %r0 - std %r0,16(%r1) - stdu %r1,-208(%r1) - - /* - * We need to save the following, because OF's register save/ - * restore code assumes that the contents of registers are - * at most 32 bits wide: lr, cr, r2, r13-r31, the old MSR. These - * get placed in that order in the stack. - */ - - mfcr %r4 - std %r4,48(%r1) - std %r13,56(%r1) - std %r14,64(%r1) - std %r15,72(%r1) - std %r16,80(%r1) - std %r17,88(%r1) - std %r18,96(%r1) - std %r19,104(%r1) - std %r20,112(%r1) - std %r21,120(%r1) - std %r22,128(%r1) - std %r23,136(%r1) - std %r24,144(%r1) - std %r25,152(%r1) - std %r26,160(%r1) - std %r27,168(%r1) - std %r28,176(%r1) - std %r29,184(%r1) - std %r30,192(%r1) - std %r31,200(%r1) - - /* Record the old MSR */ - mfmsr %r6 - - /* read client interface handler */ - lis %r4,openfirmware_entry@ha - ld %r4,openfirmware_entry@l(%r4) - - /* - * Set the MSR to the OF value. This has the side effect of disabling - * exceptions, which is important for the next few steps. - */ - - lis %r5,ofmsr@ha - ld %r5,ofmsr@l(%r5) - mtmsrd %r5 - isync - - /* - * Set up OF stack. This needs to be accessible in real mode and - * use the 32-bit ABI stack frame format. The pointer to the current - * kernel stack is placed at the very top of the stack along with - * the old MSR so we can get them back later. - */ - mr %r5,%r1 - lis %r1,(ofwstk+OFWSTKSZ-32)@ha - addi %r1,%r1,(ofwstk+OFWSTKSZ-32)@l - std %r5,8(%r1) /* Save real stack pointer */ - std %r2,16(%r1) /* Save old TOC */ - std %r6,24(%r1) /* Save old MSR */ - li %r5,0 - stw %r5,4(%r1) - stw %r5,0(%r1) - - /* Finally, branch to OF */ - mtctr %r4 - bctrl - - /* Reload stack pointer and MSR from the OFW stack */ - ld %r6,24(%r1) - ld %r2,16(%r1) - ld %r1,8(%r1) - - /* Now set the real MSR */ - mtmsrd %r6 - isync - - /* Sign-extend the return value from OF */ - extsw %r3,%r3 - - /* Restore all the non-volatile registers */ - ld %r5,48(%r1) - mtcr %r5 - ld %r13,56(%r1) - ld %r14,64(%r1) - ld %r15,72(%r1) - ld %r16,80(%r1) - ld %r17,88(%r1) - ld %r18,96(%r1) - ld %r19,104(%r1) - ld %r20,112(%r1) - ld %r21,120(%r1) - ld %r22,128(%r1) - ld %r23,136(%r1) - ld %r24,144(%r1) - ld %r25,152(%r1) - ld %r26,160(%r1) - ld %r27,168(%r1) - ld %r28,176(%r1) - ld %r29,184(%r1) - ld %r30,192(%r1) - ld %r31,200(%r1) - - /* Restore the stack and link register */ - ld %r1,0(%r1) - ld %r0,16(%r1) - mtlr %r0 - blr - -/* * int setfault() * * Similar to setjmp to setup for handling faults on accesses to user memory. diff --git a/sys/powerpc/aim/machdep.c b/sys/powerpc/aim/machdep.c index 65c9db1..3ccae91 100644 --- a/sys/powerpc/aim/machdep.c +++ b/sys/powerpc/aim/machdep.c @@ -132,6 +132,7 @@ extern vm_offset_t ksym_start, ksym_end; int cold = 1; #ifdef __powerpc64__ +extern int n_slbs; int cacheline_size = 128; #else int cacheline_size = 32; @@ -251,7 +252,6 @@ powerpc_init(vm_offset_t startkernel, vm_offset_t endkernel, vm_offset_t basekernel, void *mdp) { struct pcpu *pc; - vm_offset_t end; void *generictrap; size_t trap_offset; void *kmdp; @@ -263,7 +263,6 @@ powerpc_init(vm_offset_t startkernel, vm_offset_t endkernel, int ppc64; #endif - end = 0; kmdp = NULL; trap_offset = 0; cacheline_warn = 0; @@ -279,7 +278,8 @@ powerpc_init(vm_offset_t startkernel, vm_offset_t endkernel, if (kmdp != NULL) { boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); - end = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); + endkernel = ulmax(endkernel, MD_FETCH(kmdp, + MODINFOMD_KERNEND, vm_offset_t)); #ifdef DDB ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); @@ -338,13 +338,13 @@ powerpc_init(vm_offset_t startkernel, vm_offset_t endkernel, kdb_init(); - /* - * PowerPC 970 CPUs have a misfeature requested by Apple that makes - * them pretend they have a 32-byte cacheline. Turn this off - * before we measure the cacheline size. - */ - + /* Various very early CPU fix ups */ switch (mfpvr() >> 16) { + /* + * PowerPC 970 CPUs have a misfeature requested by Apple that + * makes them pretend they have a 32-byte cacheline. Turn this + * off before we measure the cacheline size. + */ case IBM970: case IBM970FX: case IBM970MP: @@ -353,6 +353,12 @@ powerpc_init(vm_offset_t startkernel, vm_offset_t endkernel, scratch &= ~HID5_970_DCBZ_SIZE_HI; mtspr(SPR_HID5, scratch); break; + #ifdef __powerpc64__ + case IBMPOWER7: + /* XXX: get from ibm,slb-size in device tree */ + n_slbs = 32; + break; + #endif } /* @@ -368,7 +374,6 @@ powerpc_init(vm_offset_t startkernel, vm_offset_t endkernel, msr = mfmsr(); mtmsr((msr & ~(PSL_IR | PSL_DR)) | PSL_RI); - isync(); /* * Measure the cacheline size using dcbz @@ -503,7 +508,6 @@ powerpc_init(vm_offset_t startkernel, vm_offset_t endkernel, * Restore MSR */ mtmsr(msr); - isync(); /* Warn if cachline size was not determined */ if (cacheline_warn == 1) { @@ -527,8 +531,7 @@ powerpc_init(vm_offset_t startkernel, vm_offset_t endkernel, pmap_mmu_install(MMU_TYPE_OEA, BUS_PROBE_GENERIC); pmap_bootstrap(startkernel, endkernel); - mtmsr(mfmsr() | PSL_IR|PSL_DR|PSL_ME|PSL_RI); - isync(); + mtmsr(PSL_KERNSET & ~PSL_EE); /* * Initialize params/tunables that are derived from memsize diff --git a/sys/powerpc/aim/mmu_oea.c b/sys/powerpc/aim/mmu_oea.c index 30435f5..be80455 100644 --- a/sys/powerpc/aim/mmu_oea.c +++ b/sys/powerpc/aim/mmu_oea.c @@ -118,11 +118,14 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/kernel.h> +#include <sys/queue.h> +#include <sys/cpuset.h> #include <sys/ktr.h> #include <sys/lock.h> #include <sys/msgbuf.h> #include <sys/mutex.h> #include <sys/proc.h> +#include <sys/sched.h> #include <sys/sysctl.h> #include <sys/systm.h> #include <sys/vmmeter.h> @@ -584,26 +587,9 @@ moea_pte_change(struct pte *pt, struct pte *pvo_pt, vm_offset_t va) /* * Quick sort callout for comparing memory regions. */ -static int mr_cmp(const void *a, const void *b); static int om_cmp(const void *a, const void *b); static int -mr_cmp(const void *a, const void *b) -{ - const struct mem_region *regiona; - const struct mem_region *regionb; - - regiona = a; - regionb = b; - if (regiona->mr_start < regionb->mr_start) - return (-1); - else if (regiona->mr_start > regionb->mr_start) - return (1); - else - return (0); -} - -static int om_cmp(const void *a, const void *b) { const struct ofw_map *mapa; @@ -720,7 +706,6 @@ moea_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz); CTR0(KTR_PMAP, "moea_bootstrap: physical memory"); - qsort(pregions, pregions_sz, sizeof(*pregions), mr_cmp); for (i = 0; i < pregions_sz; i++) { vm_offset_t pa; vm_offset_t end; @@ -749,7 +734,7 @@ moea_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) if (sizeof(phys_avail)/sizeof(phys_avail[0]) < regions_sz) panic("moea_bootstrap: phys_avail too small"); - qsort(regions, regions_sz, sizeof(*regions), mr_cmp); + phys_avail_count = 0; physsz = 0; hwphyssz = 0; @@ -838,7 +823,7 @@ moea_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) PMAP_LOCK_INIT(kernel_pmap); for (i = 0; i < 16; i++) kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i; - kernel_pmap->pm_active = ~0; + CPU_FILL(&kernel_pmap->pm_active); /* * Set up the Open Firmware mappings @@ -960,7 +945,9 @@ moea_activate(mmu_t mmu, struct thread *td) pm = &td->td_proc->p_vmspace->vm_pmap; pmr = pm->pmap_phys; - pm->pm_active |= PCPU_GET(cpumask); + sched_pin(); + CPU_OR(&pm->pm_active, PCPU_PTR(cpumask)); + sched_unpin(); PCPU_SET(curpmap, pmr); } @@ -970,7 +957,9 @@ moea_deactivate(mmu_t mmu, struct thread *td) pmap_t pm; pm = &td->td_proc->p_vmspace->vm_pmap; - pm->pm_active &= ~PCPU_GET(cpumask); + sched_pin(); + CPU_NAND(&pm->pm_active, PCPU_PTR(cpumask)); + sched_unpin(); PCPU_SET(curpmap, NULL); } diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c index c33a094..291d89b 100644 --- a/sys/powerpc/aim/mmu_oea64.c +++ b/sys/powerpc/aim/mmu_oea64.c @@ -118,11 +118,14 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/kernel.h> +#include <sys/queue.h> +#include <sys/cpuset.h> #include <sys/ktr.h> #include <sys/lock.h> #include <sys/msgbuf.h> #include <sys/mutex.h> #include <sys/proc.h> +#include <sys/sched.h> #include <sys/sysctl.h> #include <sys/systm.h> #include <sys/vmmeter.h> @@ -162,8 +165,8 @@ __FBSDID("$FreeBSD$"); void moea64_release_vsid(uint64_t vsid); uintptr_t moea64_get_unique_vsid(void); -#define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR); isync() -#define ENABLE_TRANS(msr) mtmsr(msr); isync() +#define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR) +#define ENABLE_TRANS(msr) mtmsr(msr) #define VSID_MAKE(sr, hash) ((sr) | (((hash) & 0xfffff) << 4)) #define VSID_TO_HASH(vsid) (((vsid) >> 4) & 0xfffff) @@ -473,26 +476,9 @@ moea64_calc_wimg(vm_offset_t pa, vm_memattr_t ma) /* * Quick sort callout for comparing memory regions. */ -static int mr_cmp(const void *a, const void *b); static int om_cmp(const void *a, const void *b); static int -mr_cmp(const void *a, const void *b) -{ - const struct mem_region *regiona; - const struct mem_region *regionb; - - regiona = a; - regionb = b; - if (regiona->mr_start < regionb->mr_start) - return (-1); - else if (regiona->mr_start > regionb->mr_start) - return (1); - else - return (0); -} - -static int om_cmp(const void *a, const void *b) { const struct ofw_map *mapa; @@ -707,10 +693,9 @@ moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelen mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz); CTR0(KTR_PMAP, "moea64_bootstrap: physical memory"); - qsort(pregions, pregions_sz, sizeof(*pregions), mr_cmp); if (sizeof(phys_avail)/sizeof(phys_avail[0]) < regions_sz) panic("moea64_bootstrap: phys_avail too small"); - qsort(regions, regions_sz, sizeof(*regions), mr_cmp); + phys_avail_count = 0; physsz = 0; hwphyssz = 0; @@ -845,7 +830,7 @@ moea64_mid_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) #endif kernel_pmap->pmap_phys = kernel_pmap; - kernel_pmap->pm_active = ~0; + CPU_FILL(&kernel_pmap->pm_active); PMAP_LOCK_INIT(kernel_pmap); @@ -895,7 +880,7 @@ moea64_late_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend * Initialize MMU and remap early physical mappings */ MMU_CPU_BOOTSTRAP(mmup,0); - mtmsr(mfmsr() | PSL_DR | PSL_IR); isync(); + mtmsr(mfmsr() | PSL_DR | PSL_IR); pmap_bootstrapped++; bs_remap_earlyboot(); @@ -1013,7 +998,9 @@ moea64_activate(mmu_t mmu, struct thread *td) pmap_t pm; pm = &td->td_proc->p_vmspace->vm_pmap; - pm->pm_active |= PCPU_GET(cpumask); + sched_pin(); + CPU_OR(&pm->pm_active, PCPU_PTR(cpumask)); + sched_unpin(); #ifdef __powerpc64__ PCPU_SET(userslb, pm->pm_slb); @@ -1028,7 +1015,9 @@ moea64_deactivate(mmu_t mmu, struct thread *td) pmap_t pm; pm = &td->td_proc->p_vmspace->vm_pmap; - pm->pm_active &= ~(PCPU_GET(cpumask)); + sched_pin(); + CPU_NAND(&pm->pm_active, PCPU_PTR(cpumask)); + sched_unpin(); #ifdef __powerpc64__ PCPU_SET(userslb, NULL); #else @@ -2580,8 +2569,8 @@ moea64_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) lim = round_page(va); len = MIN(lim - va, sz); pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF); - if (pvo != NULL) { - pa = (pvo->pvo_pte.pte.pte_lo & LPTE_RPGN) | + if (pvo != NULL && !(pvo->pvo_pte.lpte.pte_lo & LPTE_I)) { + pa = (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) | (va & ADDR_POFF); moea64_syncicache(mmu, pm, va, pa, len); } diff --git a/sys/powerpc/aim/moea64_native.c b/sys/powerpc/aim/moea64_native.c index bca51ab..9e5174f 100644 --- a/sys/powerpc/aim/moea64_native.c +++ b/sys/powerpc/aim/moea64_native.c @@ -185,8 +185,8 @@ TLBIE(uint64_t vpn) { mtx_unlock_spin(&tlbie_mutex); } -#define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR); isync() -#define ENABLE_TRANS(msr) mtmsr(msr); isync() +#define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR) +#define ENABLE_TRANS(msr) mtmsr(msr) /* * PTEG data. @@ -344,7 +344,7 @@ moea64_cpu_bootstrap_native(mmu_t mmup, int ap) * Initialize segment registers and MMU */ - mtmsr(mfmsr() & ~PSL_DR & ~PSL_IR); isync(); + mtmsr(mfmsr() & ~PSL_DR & ~PSL_IR); /* * Install kernel SLB entries diff --git a/sys/powerpc/aim/mp_cpudep.c b/sys/powerpc/aim/mp_cpudep.c index 3ee22f3..d617fde 100644 --- a/sys/powerpc/aim/mp_cpudep.c +++ b/sys/powerpc/aim/mp_cpudep.c @@ -87,7 +87,6 @@ cpudep_ap_bootstrap(void) msr = PSL_KERNSET & ~PSL_EE; mtmsr(msr); - isync(); pcpup->pc_curthread = pcpup->pc_idlethread; pcpup->pc_curpcb = pcpup->pc_curthread->td_pcb; @@ -344,6 +343,10 @@ cpudep_ap_setup() break; default: +#ifdef __powerpc64__ + if (!(mfmsr() & PSL_HV)) /* Rely on HV to have set things up */ + break; +#endif printf("WARNING: Unknown CPU type. Cache performace may be " "suboptimal.\n"); break; diff --git a/sys/powerpc/aim/slb.c b/sys/powerpc/aim/slb.c index 1fafbb4..df493b4 100644 --- a/sys/powerpc/aim/slb.c +++ b/sys/powerpc/aim/slb.c @@ -51,8 +51,9 @@ uintptr_t moea64_get_unique_vsid(void); void moea64_release_vsid(uint64_t vsid); static void slb_zone_init(void *); -uma_zone_t slbt_zone; -uma_zone_t slb_cache_zone; +static uma_zone_t slbt_zone; +static uma_zone_t slb_cache_zone; +int n_slbs = 64; SYSINIT(slb_zone_init, SI_SUB_KMEM, SI_ORDER_ANY, slb_zone_init, NULL); @@ -426,16 +427,18 @@ slb_insert_kernel(uint64_t slbe, uint64_t slbv) /* Check for an unused slot, abusing the user slot as a full flag */ if (slbcache[USER_SLB_SLOT].slbe == 0) { - for (i = 0; i < USER_SLB_SLOT; i++) { + for (i = 0; i < n_slbs; i++) { + if (i == USER_SLB_SLOT) + continue; if (!(slbcache[i].slbe & SLBE_VALID)) goto fillkernslb; } - if (i == USER_SLB_SLOT) + if (i == n_slbs) slbcache[USER_SLB_SLOT].slbe = 1; } - for (i = mftb() % 64, j = 0; j < 64; j++, i = (i+1) % 64) { + for (i = mftb() % n_slbs, j = 0; j < n_slbs; j++, i = (i+1) % n_slbs) { if (i == USER_SLB_SLOT) continue; @@ -443,9 +446,11 @@ slb_insert_kernel(uint64_t slbe, uint64_t slbv) break; } - KASSERT(j < 64, ("All kernel SLB slots locked!")); + KASSERT(j < n_slbs, ("All kernel SLB slots locked!")); fillkernslb: + KASSERT(i != USER_SLB_SLOT, + ("Filling user SLB slot with a kernel mapping")); slbcache[i].slbv = slbv; slbcache[i].slbe = slbe | (uint64_t)i; @@ -466,11 +471,11 @@ slb_insert_user(pmap_t pm, struct slb *slb) PMAP_LOCK_ASSERT(pm, MA_OWNED); - if (pm->pm_slb_len < 64) { + if (pm->pm_slb_len < n_slbs) { i = pm->pm_slb_len; pm->pm_slb_len++; } else { - i = mftb() % 64; + i = mftb() % n_slbs; } /* Note that this replacement is atomic with respect to trap_subr */ @@ -521,8 +526,9 @@ slb_zone_init(void *dummy) slbt_zone = uma_zcreate("SLB tree node", sizeof(struct slbtnode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM); - slb_cache_zone = uma_zcreate("SLB cache", 64*sizeof(struct slb *), - NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM); + slb_cache_zone = uma_zcreate("SLB cache", + (n_slbs + 1)*sizeof(struct slb *), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_VM); if (platform_real_maxaddr() != VM_MAX_ADDRESS) { uma_zone_set_allocf(slb_cache_zone, slb_uma_real_alloc); diff --git a/sys/powerpc/aim/trap_subr64.S b/sys/powerpc/aim/trap_subr64.S index 64e4ac1..5d4148a 100644 --- a/sys/powerpc/aim/trap_subr64.S +++ b/sys/powerpc/aim/trap_subr64.S @@ -53,55 +53,53 @@ * User SRs are loaded through a pointer to the current pmap. */ restore_usersrs: - GET_CPUINFO(%r28); - ld %r28,PC_USERSLB(%r28); + GET_CPUINFO(%r28) + ld %r28,PC_USERSLB(%r28) li %r29, 0 /* Set the counter to zero */ slbia slbmfee %r31,%r29 clrrdi %r31,%r31,28 slbie %r31 -instuserslb: - ld %r31, 0(%r28); /* Load SLB entry pointer */ - cmpli 0, %r31, 0; /* If NULL, stop */ - beqlr; +1: ld %r31, 0(%r28) /* Load SLB entry pointer */ + cmpli 0, %r31, 0 /* If NULL, stop */ + beqlr ld %r30, 0(%r31) /* Load SLBV */ ld %r31, 8(%r31) /* Load SLBE */ or %r31, %r31, %r29 /* Set SLBE slot */ - slbmte %r30, %r31; /* Install SLB entry */ + slbmte %r30, %r31 /* Install SLB entry */ - addi %r28, %r28, 8; /* Advance pointer */ - addi %r29, %r29, 1; - cmpli 0, %r29, 64; /* Repeat if we are not at the end */ - blt instuserslb; - blr; + addi %r28, %r28, 8 /* Advance pointer */ + addi %r29, %r29, 1 + b 1b /* Repeat */ /* * Kernel SRs are loaded directly from the PCPU fields */ restore_kernsrs: - GET_CPUINFO(%r28); - addi %r28,%r28,PC_KERNSLB; + GET_CPUINFO(%r28) + addi %r28,%r28,PC_KERNSLB li %r29, 0 /* Set the counter to zero */ slbia slbmfee %r31,%r29 clrrdi %r31,%r31,28 slbie %r31 -instkernslb: - ld %r31, 8(%r28); /* Load SLBE */ +1: cmpli 0, %r29, USER_SLB_SLOT /* Skip the user slot */ + beq- 2f - cmpli 0, %r31, 0; /* If SLBE is not valid, stop */ - beqlr; + ld %r31, 8(%r28) /* Load SLBE */ + cmpli 0, %r31, 0 /* If SLBE is not valid, stop */ + beqlr ld %r30, 0(%r28) /* Load SLBV */ - slbmte %r30, %r31; /* Install SLB entry */ + slbmte %r30, %r31 /* Install SLB entry */ - addi %r28, %r28, 16; /* Advance pointer */ - addi %r29, %r29, 1; - cmpli 0, %r29, USER_SLB_SLOT; /* Repeat if we are not at the end */ - blt instkernslb; - blr; +2: addi %r28, %r28, 16 /* Advance pointer */ + addi %r29, %r29, 1 + cmpli 0, %r29, 64 /* Repeat if we are not at the end */ + blt 1b + blr /* * FRAME_SETUP assumes: @@ -519,6 +517,7 @@ CNAME(trapexit): mfmsr %r3 andi. %r3,%r3,~PSL_EE@l mtmsr %r3 + isync /* Test AST pending: */ ld %r5,FRAME_SRR1+48(%r1) mtcr %r5 diff --git a/sys/powerpc/booke/locore.S b/sys/powerpc/booke/locore.S index 3ac4a1a..de7effc 100644 --- a/sys/powerpc/booke/locore.S +++ b/sys/powerpc/booke/locore.S @@ -83,17 +83,18 @@ __start: * locore registers use: * r1 : stack pointer * r2 : trace pointer (AP only, for early diagnostics) - * r3-r27 : scratch registers - * r28 : kernload - * r29 : temp TLB1 entry - * r30 : initial TLB1 entry we started in - * r31 : metadata pointer + * r3-r26 : scratch registers + * r27 : kernload + * r28 : temp TLB1 entry + * r29 : initial TLB1 entry we started in + * r30-r31 : arguments (metadata pointer) */ /* - * Keep metadata ptr in r31 for later use. + * Keep arguments in r30 & r31 for later use. */ - mr %r31, %r3 + mr %r30, %r3 + mr %r31, %r4 /* * Initial cleanup @@ -120,7 +121,7 @@ __start: */ bl 1f 1: mflr %r3 - bl tlb1_find_current /* the entry number found is returned in r30 */ + bl tlb1_find_current /* the entry found is returned in r29 */ bl tlb1_inval_all_but_current /* @@ -140,7 +141,7 @@ __start: /* * Invalidate initial entry */ - mr %r3, %r30 + mr %r3, %r29 bl tlb1_inval_entry /* @@ -148,7 +149,7 @@ __start: */ /* Final kernel mapping, map in 16 MB of RAM */ lis %r3, MAS0_TLBSEL1@h /* Select TLB1 */ - li %r4, 1 /* Entry 1 */ + li %r4, 0 /* Entry 0 */ rlwimi %r3, %r4, 16, 12, 15 mtspr SPR_MAS0, %r3 isync @@ -170,7 +171,7 @@ __start: bl 3f 3: mflr %r4 /* Use current address */ rlwinm %r4, %r4, 0, 0, 7 /* 16MB alignment mask */ - mr %r28, %r4 /* Keep kernel load address */ + mr %r27, %r4 /* Keep kernel load address */ ori %r4, %r4, (MAS3_SX | MAS3_SW | MAS3_SR)@l mtspr SPR_MAS3, %r4 /* Set RPN and protection */ isync @@ -193,7 +194,7 @@ __start: /* * Invalidate temp mapping */ - mr %r3, %r29 + mr %r3, %r28 bl tlb1_inval_entry /* @@ -201,7 +202,7 @@ __start: */ lis %r3, kernload@ha addi %r3, %r3, kernload@l - stw %r28, 0(%r3) + stw %r27, 0(%r3) #ifdef SMP /* * APs need a separate copy of kernload info within the __boot_page @@ -210,7 +211,7 @@ __start: */ lis %r3, kernload_ap@ha addi %r3, %r3, kernload_ap@l - stw %r28, 0(%r3) + stw %r27, 0(%r3) msync #endif @@ -229,14 +230,11 @@ __start: /* * Set up arguments and jump to system initialization code */ - lis %r3, kernel_text@ha - addi %r3, %r3, kernel_text@l - lis %r4, _end@ha - addi %r4, %r4, _end@l - mr %r5, %r31 /* metadata ptr */ + mr %r3, %r30 + mr %r4, %r31 /* Prepare e500 core */ - bl e500_init + bl booke_init /* Switch to thread0.td_kstack now */ mr %r1, %r3 @@ -290,7 +288,7 @@ kernload_ap: */ bl 2f 2: mflr %r3 - bl tlb1_find_current /* the entry number found is in r30 */ + bl tlb1_find_current /* the entry number found is in r29 */ bl tlb1_inval_all_but_current /* @@ -310,7 +308,7 @@ kernload_ap: /* * Invalidate initial entry */ - mr %r3, %r30 + mr %r3, %r29 bl tlb1_inval_entry /* @@ -318,7 +316,7 @@ kernload_ap: */ /* Final kernel mapping, map in 16 MB of RAM */ lis %r3, MAS0_TLBSEL1@h /* Select TLB1 */ - li %r4, 1 /* Entry 1 */ + li %r4, 0 /* Entry 0 */ rlwimi %r3, %r4, 16, 4, 15 mtspr SPR_MAS0, %r3 isync @@ -373,7 +371,7 @@ kernload_ap: /* * Invalidate temp mapping */ - mr %r3, %r29 + mr %r3, %r28 bl tlb1_inval_entry /* @@ -425,7 +423,7 @@ tlb_inval_all: blr /* - * expects address to look up in r3, returns entry number in r30 + * expects address to look up in r3, returns entry number in r29 * * FIXME: the hidden assumption is we are now running in AS=0, but we should * retrieve actual AS from MSR[IS|DS] and put it in MAS6[SAS] @@ -437,7 +435,7 @@ tlb1_find_current: isync tlbsx 0, %r3 mfspr %r17, SPR_MAS0 - rlwinm %r30, %r17, 16, 20, 31 /* MAS0[ESEL] -> r30 */ + rlwinm %r29, %r17, 16, 20, 31 /* MAS0[ESEL] -> r29 */ /* Make sure we have IPROT set on the entry */ mfspr %r17, SPR_MAS1 @@ -470,14 +468,14 @@ tlb1_inval_entry: blr /* - * r30 current entry number - * r29 returned temp entry + * r29 current entry number + * r28 returned temp entry * r3-r5 scratched */ tlb1_temp_mapping_as1: /* Read our current translation */ lis %r3, MAS0_TLBSEL1@h /* Select TLB1 */ - rlwimi %r3, %r30, 16, 12, 15 /* Select our current entry */ + rlwimi %r3, %r29, 16, 12, 15 /* Select our current entry */ mtspr SPR_MAS0, %r3 isync tlbre @@ -489,12 +487,8 @@ tlb1_temp_mapping_as1: * entry is the last in TLB1 */ lis %r3, MAS0_TLBSEL1@h /* Select TLB1 */ - addi %r29, %r30, 1 /* Use next entry. */ - li %r4, 1 - cmpw %r4, %r29 - bne 1f - addi %r29, %r29, 1 -1: rlwimi %r3, %r29, 16, 12, 15 /* Select temp entry */ + addi %r28, %r29, 1 /* Use next entry. */ + rlwimi %r3, %r28, 16, 12, 15 /* Select temp entry */ mtspr SPR_MAS0, %r3 isync mfspr %r5, SPR_MAS1 @@ -514,7 +508,7 @@ tlb1_temp_mapping_as1: * Loops over TLB1, invalidates all entries skipping the one which currently * maps this code. * - * r30 current entry + * r29 current entry * r3-r5 scratched */ tlb1_inval_all_but_current: @@ -528,7 +522,7 @@ tlb1_inval_all_but_current: isync tlbre mfspr %r5, SPR_MAS1 - cmpw %r4, %r30 /* our current entry? */ + cmpw %r4, %r29 /* our current entry? */ beq 2f rlwinm %r5, %r5, 0, 2, 31 /* clear VALID and IPROT bits */ mtspr SPR_MAS1, %r5 diff --git a/sys/powerpc/booke/machdep.c b/sys/powerpc/booke/machdep.c index f2dbacf..c2b5e6f 100644 --- a/sys/powerpc/booke/machdep.c +++ b/sys/powerpc/booke/machdep.c @@ -190,7 +190,7 @@ SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_e500_startup, NULL); void print_kernel_section_addr(void); void print_kenv(void); -u_int e500_init(u_int32_t, u_int32_t, void *); +u_int booke_init(uint32_t, uint32_t); static void cpu_e500_startup(void *dummy) @@ -276,19 +276,41 @@ print_kernel_section_addr(void) } u_int -e500_init(u_int32_t startkernel, u_int32_t endkernel, void *mdp) +booke_init(uint32_t arg1, uint32_t arg2) { struct pcpu *pc; - void *kmdp; + void *kmdp, *mdp; vm_offset_t dtbp, end; uint32_t csr; kmdp = NULL; - end = endkernel; + end = (uintptr_t)_end; dtbp = (vm_offset_t)NULL; /* + * Handle the various ways we can get loaded and started: + * - FreeBSD's loader passes the pointer to the metadata + * in arg1, with arg2 undefined. arg1 has a value that's + * relative to the kernel's link address (i.e. larger + * than 0xc0000000). + * - Juniper's loader passes the metadata pointer in arg2 + * and sets arg1 to zero. This is to signal that the + * loader maps the kernel and starts it at its link + * address (unlike the FreeBSD loader). + * - U-Boot passes the standard argc and argv parameters + * in arg1 and arg2 (resp). arg1 is between 1 and some + * relatively small number, such as 64K. arg2 is the + * physical address of the argv vector. + */ + if (arg1 > (uintptr_t)kernel_text) /* FreeBSD loader */ + mdp = (void *)arg1; + else if (arg1 == 0) /* Juniper loader */ + mdp = (void *)arg2; + else /* U-Boot */ + mdp = NULL; + + /* * Parse metadata and fetch parameters. */ if (mdp != NULL) { @@ -309,17 +331,8 @@ e500_init(u_int32_t startkernel, u_int32_t endkernel, void *mdp) #endif } } else { - /* - * We should scream but how? Cannot even output anything... - */ - - /* - * FIXME add return value and handle in the locore so we can - * return to the loader maybe? (this seems not very easy to - * restore everything as the TLB have all been reprogrammed - * in the locore etc...) - */ - while (1); + bzero(__sbss_start, __sbss_end - __sbss_start); + bzero(__bss_start, _end - __bss_start); } #if defined(FDT_DTB_STATIC) @@ -368,9 +381,7 @@ e500_init(u_int32_t startkernel, u_int32_t endkernel, void *mdp) cninit(); /* Print out some debug info... */ - debugf("e500_init: console initialized\n"); - debugf(" arg1 startkernel = 0x%08x\n", startkernel); - debugf(" arg2 endkernel = 0x%08x\n", endkernel); + debugf("%s: console initialized\n", __func__); debugf(" arg3 mdp = 0x%08x\n", (u_int32_t)mdp); debugf(" end = 0x%08x\n", (u_int32_t)end); debugf(" boothowto = 0x%08x\n", boothowto); @@ -403,7 +414,7 @@ e500_init(u_int32_t startkernel, u_int32_t endkernel, void *mdp) /* Initialise virtual memory. */ pmap_mmu_install(MMU_TYPE_BOOKE, 0); - pmap_bootstrap(startkernel, end); + pmap_bootstrap((uintptr_t)kernel_text, end); debugf("MSR = 0x%08x\n", mfmsr()); //tlb1_print_entries(); //tlb1_print_tlbentries(); @@ -449,8 +460,8 @@ e500_init(u_int32_t startkernel, u_int32_t endkernel, void *mdp) printf("L1 I-cache %sabled\n", (csr & L1CSR1_ICE) ? "en" : "dis"); - debugf("e500_init: SP = 0x%08x\n", ((uintptr_t)thread0.td_pcb - 16) & ~15); - debugf("e500_init: e\n"); + debugf("%s: SP = 0x%08x\n", __func__, + ((uintptr_t)thread0.td_pcb - 16) & ~15); return (((uintptr_t)thread0.td_pcb - 16) & ~15); } diff --git a/sys/powerpc/booke/platform_bare.c b/sys/powerpc/booke/platform_bare.c index 8e03bd3..d76664e 100644 --- a/sys/powerpc/booke/platform_bare.c +++ b/sys/powerpc/booke/platform_bare.c @@ -104,10 +104,22 @@ bare_probe(platform_t plat) int i, law_max, tgt; ver = SVR_VER(mfspr(SPR_SVR)); - if (ver == SVR_MPC8572E || ver == SVR_MPC8572) + switch (ver & ~0x0008) { /* Mask Security Enabled bit */ + case SVR_P4080: + maxcpu = 8; + break; + case SVR_P4040: + maxcpu = 4; + break; + case SVR_MPC8572: + case SVR_P1020: + case SVR_P2020: maxcpu = 2; - else + break; + default: maxcpu = 1; + break; + } /* * Clear local access windows. Skip DRAM entries, so we don't shoot @@ -166,8 +178,11 @@ bare_timebase_freq(platform_t plat, struct cpuref *cpuref) phandle_t cpus, child; pcell_t freq; - /* Backward compatibility. See 8-STABLE. */ - ticks = bootinfo[3] >> 3; + if (bootinfo != NULL) { + /* Backward compatibility. See 8-STABLE. */ + ticks = bootinfo[3] >> 3; + } else + ticks = 0; if ((cpus = OF_finddevice("/cpus")) == 0) goto out; @@ -241,7 +256,7 @@ bare_smp_start_cpu(platform_t plat, struct pcpu *pc) int timeout; eebpcr = ccsr_read4(OCP85XX_EEBPCR); - if ((eebpcr & (pc->pc_cpumask << 24)) != 0) { + if ((eebpcr & (1 << (pc->pc_cpuid + 24))) != 0) { printf("%s: CPU=%d already out of hold-off state!\n", __func__, pc->pc_cpuid); return (ENXIO); @@ -259,7 +274,7 @@ bare_smp_start_cpu(platform_t plat, struct pcpu *pc) /* * Release AP from hold-off state */ - eebpcr |= (pc->pc_cpumask << 24); + eebpcr |= (1 << (pc->pc_cpuid + 24)); ccsr_write4(OCP85XX_EEBPCR, eebpcr); __asm __volatile("isync; msync"); @@ -277,24 +292,23 @@ bare_smp_start_cpu(platform_t plat, struct pcpu *pc) static void e500_reset(platform_t plat) { - uint32_t ver = SVR_VER(mfspr(SPR_SVR)); - - if (ver == SVR_MPC8572E || ver == SVR_MPC8572 || - ver == SVR_MPC8548E || ver == SVR_MPC8548) - /* Systems with dedicated reset register */ - ccsr_write4(OCP85XX_RSTCR, 2); - else { - /* Clear DBCR0, disables debug interrupts and events. */ - mtspr(SPR_DBCR0, 0); - __asm __volatile("isync"); - - /* Enable Debug Interrupts in MSR. */ - mtmsr(mfmsr() | PSL_DE); - - /* Enable debug interrupts and issue reset. */ - mtspr(SPR_DBCR0, mfspr(SPR_DBCR0) | DBCR0_IDM | - DBCR0_RST_SYSTEM); - } + + /* + * Try the dedicated reset register first. + * If the SoC doesn't have one, we'll fall + * back to using the debug control register. + */ + ccsr_write4(OCP85XX_RSTCR, 2); + + /* Clear DBCR0, disables debug interrupts and events. */ + mtspr(SPR_DBCR0, 0); + __asm __volatile("isync"); + + /* Enable Debug Interrupts in MSR. */ + mtmsr(mfmsr() | PSL_DE); + + /* Enable debug interrupts and issue reset. */ + mtspr(SPR_DBCR0, mfspr(SPR_DBCR0) | DBCR0_IDM | DBCR0_RST_SYSTEM); printf("Reset failed...\n"); while (1); diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c index 2fffa3f..e1cd071 100644 --- a/sys/powerpc/booke/pmap.c +++ b/sys/powerpc/booke/pmap.c @@ -63,6 +63,7 @@ __FBSDID("$FreeBSD$"); #include <sys/msgbuf.h> #include <sys/lock.h> #include <sys/mutex.h> +#include <sys/sched.h> #include <sys/smp.h> #include <sys/vmmeter.h> @@ -91,9 +92,6 @@ __FBSDID("$FreeBSD$"); #include "mmu_if.h" -#define DEBUG -#undef DEBUG - #ifdef DEBUG #define debugf(fmt, args...) printf(fmt, ##args) #else @@ -393,7 +391,7 @@ tlb_miss_lock(void) if (!smp_started) return; - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { if (pc != pcpup) { CTR3(KTR_PMAP, "%s: tlb miss LOCK of CPU=%d, " @@ -419,7 +417,7 @@ tlb_miss_unlock(void) if (!smp_started) return; - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { if (pc != pcpup) { CTR2(KTR_PMAP, "%s: tlb miss UNLOCK of CPU=%d", __func__, pc->pc_cpuid); @@ -946,7 +944,7 @@ pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va) /**************************************************************************/ /* - * This is called during e500_init, before the system is really initialized. + * This is called during booke_init, before the system is really initialized. */ static void mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_offset_t kernelend) @@ -1228,7 +1226,7 @@ mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_offset_t kernelend) PTE_VALID; } /* Mark kernel_pmap active on all CPUs */ - kernel_pmap->pm_active = ~0; + CPU_FILL(&kernel_pmap->pm_active); /*******************************************************/ /* Final setup */ @@ -1483,7 +1481,7 @@ mmu_booke_pinit(mmu_t mmu, pmap_t pmap) PMAP_LOCK_INIT(pmap); for (i = 0; i < MAXCPU; i++) pmap->pm_tid[i] = TID_NONE; - pmap->pm_active = 0; + CPU_ZERO(&kernel_pmap->pm_active); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); bzero(&pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES); TAILQ_INIT(&pmap->pm_ptbl_list); @@ -1838,7 +1836,7 @@ mmu_booke_activate(mmu_t mmu, struct thread *td) mtx_lock_spin(&sched_lock); - atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); + CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask)); PCPU_SET(curpmap, pmap); if (pmap->pm_tid[PCPU_GET(cpuid)] == TID_NONE) @@ -1867,7 +1865,9 @@ mmu_booke_deactivate(mmu_t mmu, struct thread *td) CTR5(KTR_PMAP, "%s: td=%p, proc = '%s', id = %d, pmap = 0x%08x", __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); - atomic_clear_int(&pmap->pm_active, PCPU_GET(cpumask)); + sched_pin(); + CPU_NAND_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask)); + sched_unpin(); PCPU_SET(curpmap, NULL); } @@ -3019,24 +3019,18 @@ tlb1_init(vm_offset_t ccsrbar) { uint32_t mas0; - /* TLB1[1] is used to map the kernel. Save that entry. */ - mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(1); + /* TLB1[0] is used to map the kernel. Save that entry. */ + mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(0); mtspr(SPR_MAS0, mas0); __asm __volatile("isync; tlbre"); - tlb1[1].mas1 = mfspr(SPR_MAS1); - tlb1[1].mas2 = mfspr(SPR_MAS2); - tlb1[1].mas3 = mfspr(SPR_MAS3); + tlb1[0].mas1 = mfspr(SPR_MAS1); + tlb1[0].mas2 = mfspr(SPR_MAS2); + tlb1[0].mas3 = mfspr(SPR_MAS3); - /* Map in CCSRBAR in TLB1[0] */ - tlb1_idx = 0; + /* Map in CCSRBAR in TLB1[1] */ + tlb1_idx = 1; tlb1_set_entry(CCSRBAR_VA, ccsrbar, CCSRBAR_SIZE, _TLB_ENTRY_IO); - /* - * Set the next available TLB1 entry index. Note TLB[1] is reserved - * for initial mapping of kernel text+data, which was set early in - * locore, we need to skip this [busy] entry. - */ - tlb1_idx = 2; /* Setup TLB miss defaults */ set_mas4_defaults(); diff --git a/sys/powerpc/conf/GENERIC b/sys/powerpc/conf/GENERIC index d221a97..2950a49 100644 --- a/sys/powerpc/conf/GENERIC +++ b/sys/powerpc/conf/GENERIC @@ -174,18 +174,24 @@ device sbp # SCSI over FireWire (Requires scbus and da) device fwe # Ethernet over FireWire (non-standard!) # Misc +device iicbus # I2C bus code +device kiic # Keywest I2C +device ad7417 # PowerMac7,2 temperature sensor device ds1775 # PowerMac7,2 temperature sensor device fcu # Apple Fan Control Unit device max6690 # PowerMac7,2 temperature sensor device powermac_nvram # Open Firmware configuration NVRAM device smu # Apple System Management Unit +device windtunnel # Apple G4 MDD fan controller # ADB support device adb device cuda device pmu -# Powermac I2C support -device iicbus # I2C bus code -device kiic # Keywest I2C +# Sound support +device sound # Generic sound driver (required) +device snd_ai2s # Apple I2S audio +device snd_davbus # Apple DAVBUS audio +device snd_uaudio # USB Audio diff --git a/sys/powerpc/conf/GENERIC64 b/sys/powerpc/conf/GENERIC64 index d7526b3..7e385a1 100644 --- a/sys/powerpc/conf/GENERIC64 +++ b/sys/powerpc/conf/GENERIC64 @@ -175,6 +175,9 @@ device sbp # SCSI over FireWire (Requires scbus and da) device fwe # Ethernet over FireWire (non-standard!) # Misc +device iicbus # I2C bus code +device kiic # Keywest I2C +device ad7417 # PowerMac7,2 temperature sensor device ds1775 # PowerMac7,2 temperature sensor device fcu # Apple Fan Control Unit device max6690 # PowerMac7,2 temperature sensor @@ -185,7 +188,8 @@ device smu # Apple System Management Unit device adb device pmu -# Powermac I2C support -device iicbus # I2C bus code -device kiic # Keywest I2C +# Sound support +device sound # Generic sound driver (required) +device snd_ai2s # Apple I2S audio +device snd_uaudio # USB Audio diff --git a/sys/powerpc/conf/NOTES b/sys/powerpc/conf/NOTES index b80a817..0045763 100644 --- a/sys/powerpc/conf/NOTES +++ b/sys/powerpc/conf/NOTES @@ -39,6 +39,7 @@ device kiic # Apple Keywest I2C Controller device ofwd # Open Firmware disks device adb # Apple Desktop Bus device cuda # VIA-CUDA ADB interface +device ad7417 # PowerMac7,2 temperature sensor device ds1775 # PowerMac7,2 temperature sensor device fcu # Apple Fan Control Unit device max6690 # PowerMac7,2 temperature sensor @@ -46,6 +47,7 @@ device pmu # Apple Power Management Unit device smu # Apple System Management Unit device snd_ai2s # Apple I2S Audio device snd_davbus # Apple Davbus Audio +device windtunnel # Apple G4 MDD fan controller ##################################################################### diff --git a/sys/powerpc/include/_types.h b/sys/powerpc/include/_types.h index fae2416..b0b582e 100644 --- a/sys/powerpc/include/_types.h +++ b/sys/powerpc/include/_types.h @@ -72,7 +72,6 @@ typedef unsigned long long __uint64_t; * Standard type definitions. */ typedef __uint32_t __clock_t; /* clock()... */ -typedef unsigned int __cpumask_t; typedef double __double_t; typedef double __float_t; #ifdef __LP64__ diff --git a/sys/powerpc/include/openpicvar.h b/sys/powerpc/include/openpicvar.h index 4fb9aa7..605dc0f 100644 --- a/sys/powerpc/include/openpicvar.h +++ b/sys/powerpc/include/openpicvar.h @@ -57,7 +57,7 @@ int openpic_common_attach(device_t, uint32_t); /* * PIC interface. */ -void openpic_bind(device_t dev, u_int irq, cpumask_t cpumask); +void openpic_bind(device_t dev, u_int irq, cpuset_t cpumask); void openpic_config(device_t, u_int, enum intr_trigger, enum intr_polarity); void openpic_dispatch(device_t, struct trapframe *); void openpic_enable(device_t, u_int, u_int); diff --git a/sys/powerpc/include/param.h b/sys/powerpc/include/param.h index d71d048..06b131c 100644 --- a/sys/powerpc/include/param.h +++ b/sys/powerpc/include/param.h @@ -68,7 +68,7 @@ #endif #if defined(SMP) || defined(KLD_MODULE) -#define MAXCPU 4 +#define MAXCPU 8 #else #define MAXCPU 1 #endif /* SMP || KLD_MODULE */ diff --git a/sys/powerpc/include/pmap.h b/sys/powerpc/include/pmap.h index 369ca9d..9166d04 100644 --- a/sys/powerpc/include/pmap.h +++ b/sys/powerpc/include/pmap.h @@ -66,6 +66,7 @@ #include <sys/queue.h> #include <sys/tree.h> +#include <sys/_cpuset.h> #include <sys/_lock.h> #include <sys/_mutex.h> #include <machine/sr.h> @@ -98,7 +99,7 @@ struct pmap { #else register_t pm_sr[16]; #endif - cpumask_t pm_active; + cpuset_t pm_active; struct pmap *pmap_phys; struct pmap_statistics pm_stats; @@ -175,7 +176,7 @@ void slb_free_user_cache(struct slb **); struct pmap { struct mtx pm_mtx; /* pmap mutex */ tlbtid_t pm_tid[MAXCPU]; /* TID to identify this pmap entries in TLB */ - cpumask_t pm_active; /* active on cpus */ + cpuset_t pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statistics */ /* Page table directory, array of pointers to page tables. */ diff --git a/sys/powerpc/include/rtas.h b/sys/powerpc/include/rtas.h new file mode 100644 index 0000000..5b18632 --- /dev/null +++ b/sys/powerpc/include/rtas.h @@ -0,0 +1,61 @@ +/*- + * Copyright (c) 2011 Nathan Whitehorn + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MACHINE_RTAS_H_ +#define _MACHINE_RTAS_H_ + +#include <sys/cdefs.h> +#include <sys/types.h> +#include <dev/ofw/openfirm.h> + +/* + * RTAS functions are defined by 32-bit integer tokens. These vary from + * system to system, and can be looked up from their standardized names + * using rtas_token_lookup(). If RTAS is not available, rtas_token_lookup() + * and rtas_call_method() return -1; this can be checked in advance using + * rtas_exists(). Otherwise, rtas_call_method() returns one of the RTAS + * status codes from the bottom of this file. + */ + +int rtas_exists(void); +int rtas_call_method(cell_t token, int nargs, int nreturns, ...); +cell_t rtas_token_lookup(const char *method); + +/* RTAS Status Codes: see CHRP or PAPR specification */ +#define RTAS_OK 0 +#define RTAS_HW_ERROR -1 +#define RTAS_BUSY -2 +#define RTAS_PARAM_ERROR -3 +#define RTAS_STATE_CHANGE -7 +#define RTAS_VENDOR_BEGIN 9000 +#define RTAS_EXTENDED_DELAY 9900 +#define RTAS_ISOLATION_ERROR -9000 +#define RTAS_VENDOR_ERROR_BEGIN -9004 + +#endif /* _MACHINE_RTAS_H_ */ + diff --git a/sys/powerpc/include/slb.h b/sys/powerpc/include/slb.h index f675e15..637110c 100644 --- a/sys/powerpc/include/slb.h +++ b/sys/powerpc/include/slb.h @@ -65,7 +65,7 @@ /* * User segment for copyin/out */ -#define USER_SLB_SLOT 63 +#define USER_SLB_SLOT 0 #define USER_SLB_SLBE (((USER_ADDR >> ADDR_SR_SHFT) << SLBE_ESID_SHIFT) | \ SLBE_VALID | USER_SLB_SLOT) diff --git a/sys/powerpc/include/smp.h b/sys/powerpc/include/smp.h index cf95278..32fcfb4 100644 --- a/sys/powerpc/include/smp.h +++ b/sys/powerpc/include/smp.h @@ -40,9 +40,11 @@ #ifndef LOCORE +#include <sys/_cpuset.h> + void ipi_all_but_self(int ipi); void ipi_cpu(int cpu, u_int ipi); -void ipi_selected(cpumask_t cpus, int ipi); +void ipi_selected(cpuset_t cpus, int ipi); struct cpuref { uintptr_t cr_hwref; diff --git a/sys/powerpc/include/spr.h b/sys/powerpc/include/spr.h index 7d45136..4f675c3 100644 --- a/sys/powerpc/include/spr.h +++ b/sys/powerpc/include/spr.h @@ -644,8 +644,8 @@ #define SPR_MCSRR1 0x23b /* ..8 571 Machine check SRR1 */ #define SPR_SVR 0x3ff /* ..8 1023 System Version Register */ -#define SVR_MPC8533 0x803c -#define SVR_MPC8533E 0x8034 +#define SVR_MPC8533 0x8034 +#define SVR_MPC8533E 0x803c #define SVR_MPC8541 0x8072 #define SVR_MPC8541E 0x807a #define SVR_MPC8548 0x8031 @@ -654,6 +654,18 @@ #define SVR_MPC8555E 0x8079 #define SVR_MPC8572 0x80e0 #define SVR_MPC8572E 0x80e8 +#define SVR_P1011 0x80e5 +#define SVR_P1011E 0x80ed +#define SVR_P1020 0x80e4 +#define SVR_P1020E 0x80ec +#define SVR_P2010 0x80e3 +#define SVR_P2010E 0x80eb +#define SVR_P2020 0x80e2 +#define SVR_P2020E 0x80ea +#define SVR_P4040 0x8200 +#define SVR_P4040E 0x8208 +#define SVR_P4080 0x8201 +#define SVR_P4080E 0x8209 #define SVR_VER(svr) (((svr) >> 16) & 0xffff) #define SPR_PID0 0x030 /* ..8 Process ID Register 0 */ diff --git a/sys/powerpc/mpc85xx/mpc85xx.c b/sys/powerpc/mpc85xx/mpc85xx.c index 564bf84..f383a1b 100644 --- a/sys/powerpc/mpc85xx/mpc85xx.c +++ b/sys/powerpc/mpc85xx/mpc85xx.c @@ -69,12 +69,13 @@ law_getmax(void) uint32_t ver; ver = SVR_VER(mfspr(SPR_SVR)); - if (ver == SVR_MPC8572E || ver == SVR_MPC8572) - return (12); - else if (ver == SVR_MPC8548E || ver == SVR_MPC8548) - return (10); - else + if (ver == SVR_MPC8555E || ver == SVR_MPC8555) return (8); + if (ver == SVR_MPC8548E || ver == SVR_MPC8548 || + ver == SVR_MPC8533E || ver == SVR_MPC8533) + return (10); + + return (12); } #define _LAW_SR(trgt,size) (0x80000000 | (trgt << 20) | (ffsl(size) - 2)) @@ -152,10 +153,16 @@ law_pci_target(struct resource *res, int *trgt_mem, int *trgt_io) trgt = 1; break; case 0xa000: - if (ver == SVR_MPC8572E || ver == SVR_MPC8572) - trgt = 2; + if (ver == SVR_MPC8548E || ver == SVR_MPC8548) + trgt = 3; else + trgt = 2; + break; + case 0xb000: + if (ver == SVR_MPC8548E || ver == SVR_MPC8548) rv = EINVAL; + else + trgt = 3; break; default: rv = ENXIO; diff --git a/sys/powerpc/mpc85xx/mpc85xx.h b/sys/powerpc/mpc85xx/mpc85xx.h index 7621f2c..fa3bde3 100644 --- a/sys/powerpc/mpc85xx/mpc85xx.h +++ b/sys/powerpc/mpc85xx/mpc85xx.h @@ -67,11 +67,6 @@ #define OCP85XX_PORDEVSR2 (CCSRBAR_VA + 0xe0014) -#define OCP85XX_DEVDISR (CCSRBAR_VA + 0xe0070) -#define OCP85XX_DEVDISR_PCIE0 0x20000000 -#define OCP85XX_DEVDISR_PCIE1 0x04000000 -#define OCP85XX_DEVDISR_PCIE2 0x02000000 - /* * Status Registers. */ diff --git a/sys/powerpc/mpc85xx/openpic_fdt.c b/sys/powerpc/mpc85xx/openpic_fdt.c index 7cf18ea..1cd9369 100644 --- a/sys/powerpc/mpc85xx/openpic_fdt.c +++ b/sys/powerpc/mpc85xx/openpic_fdt.c @@ -37,11 +37,12 @@ __FBSDID("$FreeBSD$"); #include <machine/bus.h> #include <machine/intr_machdep.h> -#include <machine/openpicvar.h> #include <dev/ofw/ofw_bus.h> #include <dev/ofw/ofw_bus_subr.h> +#include <machine/openpicvar.h> + #include "pic_if.h" static int openpic_fdt_probe(device_t); diff --git a/sys/powerpc/ofw/ofw_machdep.c b/sys/powerpc/ofw/ofw_machdep.c index 9af4051..fcdc953 100644 --- a/sys/powerpc/ofw/ofw_machdep.c +++ b/sys/powerpc/ofw/ofw_machdep.c @@ -60,17 +60,15 @@ __FBSDID("$FreeBSD$"); #include <machine/platform.h> #include <machine/ofw_machdep.h> -#define OFMEM_REGIONS 32 -static struct mem_region OFmem[OFMEM_REGIONS + 1], OFavail[OFMEM_REGIONS + 3]; -static struct mem_region OFfree[OFMEM_REGIONS + 3]; -static int nOFmem; +static struct mem_region OFmem[PHYS_AVAIL_SZ], OFavail[PHYS_AVAIL_SZ]; +static struct mem_region OFfree[PHYS_AVAIL_SZ]; extern register_t ofmsr[5]; -static int (*ofwcall)(void *); +extern void *openfirmware_entry; static void *fdt; int ofw_real_mode; -int ofw_32bit_mode_entry(void *); +int ofwcall(void *); static void ofw_quiesce(void); static int openfirmware(void *args); @@ -134,11 +132,32 @@ memr_merge(struct mem_region *from, struct mem_region *to) to->mr_size = end - to->mr_start; } +/* + * Quick sort callout for comparing memory regions. + */ +static int mr_cmp(const void *a, const void *b); + +static int +mr_cmp(const void *a, const void *b) +{ + const struct mem_region *regiona; + const struct mem_region *regionb; + + regiona = a; + regionb = b; + if (regiona->mr_start < regionb->mr_start) + return (-1); + else if (regiona->mr_start > regionb->mr_start) + return (1); + else + return (0); +} + static int parse_ofw_memory(phandle_t node, const char *prop, struct mem_region *output) { cell_t address_cells, size_cells; - cell_t OFmem[4*(OFMEM_REGIONS + 1)]; + cell_t OFmem[4 * PHYS_AVAIL_SZ]; int sz, i, j; int apple_hack_mode; phandle_t phandle; @@ -174,8 +193,8 @@ parse_ofw_memory(phandle_t node, const char *prop, struct mem_region *output) /* * Get memory. */ - if ((node == -1) || (sz = OF_getprop(node, prop, - OFmem, sizeof(OFmem[0]) * 4 * OFMEM_REGIONS)) <= 0) + if (node == -1 || (sz = OF_getprop(node, prop, + OFmem, sizeof(OFmem))) <= 0) panic("Physical memory map not found"); i = 0; @@ -225,7 +244,7 @@ parse_ofw_memory(phandle_t node, const char *prop, struct mem_region *output) #ifdef __powerpc64__ if (apple_hack_mode) { /* Add in regions above 4 GB to the available list */ - struct mem_region himem[OFMEM_REGIONS]; + struct mem_region himem[16]; int hisz; hisz = parse_ofw_memory(node, "reg", himem); @@ -243,6 +262,81 @@ parse_ofw_memory(phandle_t node, const char *prop, struct mem_region *output) return (sz); } +static int +parse_drconf_memory(int *msz, int *asz, struct mem_region *ofmem, + struct mem_region *ofavail) +{ + phandle_t phandle; + vm_offset_t base; + int i, idx, len, lasz, lmsz, res; + uint32_t lmb_size[2]; + unsigned long *dmem, flags; + + lmsz = *msz; + lasz = *asz; + + phandle = OF_finddevice("/ibm,dynamic-reconfiguration-memory"); + if (phandle == -1) + /* No drconf node, return. */ + return (0); + + res = OF_getprop(phandle, "ibm,lmb-size", lmb_size, sizeof(lmb_size)); + if (res == -1) + return (0); + + /* Parse the /ibm,dynamic-memory. + The first position gives the # of entries. The next two words + reflect the address of the memory block. The next four words are + the DRC index, reserved, list index and flags. + (see PAPR C.6.6.2 ibm,dynamic-reconfiguration-memory) + + #el Addr DRC-idx res list-idx flags + ------------------------------------------------- + | 4 | 8 | 4 | 4 | 4 | 4 |.... + ------------------------------------------------- + */ + + len = OF_getproplen(phandle, "ibm,dynamic-memory"); + if (len > 0) { + + /* We have to use a variable length array on the stack + since we have very limited stack space. + */ + cell_t arr[len/sizeof(cell_t)]; + + res = OF_getprop(phandle, "ibm,dynamic-memory", &arr, + sizeof(arr)); + if (res == -1) + return (0); + + /* Number of elements */ + idx = arr[0]; + + /* First address. */ + dmem = (void*)&arr[1]; + + for (i = 0; i < idx; i++) { + base = *dmem; + dmem += 2; + flags = *dmem; + /* Use region only if available and not reserved. */ + if ((flags & 0x8) && !(flags & 0x80)) { + ofmem[lmsz].mr_start = base; + ofmem[lmsz].mr_size = (vm_size_t)lmb_size[1]; + ofavail[lasz].mr_start = base; + ofavail[lasz].mr_size = (vm_size_t)lmb_size[1]; + lmsz++; + lasz++; + } + dmem++; + } + } + + *msz = lmsz; + *asz = lasz; + + return (1); +} /* * This is called during powerpc_init, before the system is really initialized. * It shall provide the total and the available regions of RAM. @@ -255,31 +349,62 @@ ofw_mem_regions(struct mem_region **memp, int *memsz, struct mem_region **availp, int *availsz) { phandle_t phandle; + vm_offset_t maxphysaddr; int asz, msz, fsz; - int i, j; + int i, j, res; int still_merging; + char name[31]; asz = msz = 0; /* - * Get memory. + * Get memory from all the /memory nodes. */ - phandle = OF_finddevice("/memory"); - if (phandle == -1) - phandle = OF_finddevice("/memory@0"); + for (phandle = OF_child(OF_peer(0)); phandle != 0; + phandle = OF_peer(phandle)) { + if (OF_getprop(phandle, "name", name, sizeof(name)) <= 0) + continue; + if (strncmp(name, "memory", sizeof(name)) != 0) + continue; + + res = parse_ofw_memory(phandle, "reg", &OFmem[msz]); + msz += res/sizeof(struct mem_region); + if (OF_getproplen(phandle, "available") >= 0) + res = parse_ofw_memory(phandle, "available", + &OFavail[asz]); + else + res = parse_ofw_memory(phandle, "reg", &OFavail[asz]); + asz += res/sizeof(struct mem_region); + } + + /* Check for memory in ibm,dynamic-reconfiguration-memory */ + parse_drconf_memory(&msz, &asz, OFmem, OFavail); - msz = parse_ofw_memory(phandle, "reg", OFmem); - nOFmem = msz / sizeof(struct mem_region); - asz = parse_ofw_memory(phandle, "available", OFavail); + qsort(OFmem, msz, sizeof(*OFmem), mr_cmp); + qsort(OFavail, asz, sizeof(*OFavail), mr_cmp); *memp = OFmem; - *memsz = nOFmem; - + *memsz = msz; + + /* + * On some firmwares (SLOF), some memory may be marked available that + * doesn't actually exist. This manifests as an extension of the last + * available segment past the end of physical memory, so truncate that + * one. + */ + maxphysaddr = 0; + for (i = 0; i < msz; i++) + if (OFmem[i].mr_start + OFmem[i].mr_size > maxphysaddr) + maxphysaddr = OFmem[i].mr_start + OFmem[i].mr_size; + + if (OFavail[asz - 1].mr_start + OFavail[asz - 1].mr_size > maxphysaddr) + OFavail[asz - 1].mr_size = maxphysaddr - + OFavail[asz - 1].mr_start; + /* * OFavail may have overlapping regions - collapse these * and copy out remaining regions to OFfree */ - asz /= sizeof(struct mem_region); do { still_merging = FALSE; for (i = 0; i < asz; i++) { @@ -318,19 +443,6 @@ OF_initial_setup(void *fdt_ptr, void *junk, int (*openfirm)(void *)) else ofw_real_mode = 1; - ofwcall = NULL; - - #ifdef __powerpc64__ - /* - * For PPC64, we need to use some hand-written - * asm trampolines to get to OF. - */ - if (openfirm != NULL) - ofwcall = ofw_32bit_mode_entry; - #else - ofwcall = openfirm; - #endif - fdt = fdt_ptr; #ifdef FDT_DTB_STATIC @@ -345,7 +457,7 @@ OF_bootstrap() { boolean_t status = FALSE; - if (ofwcall != NULL) { + if (openfirmware_entry != NULL) { if (ofw_real_mode) { status = OF_install(OFW_STD_REAL, 0); } else { @@ -481,12 +593,7 @@ openfirmware(void *args) int result; #ifdef SMP struct ofw_rv_args rv_args; - #endif - - if (pmap_bootstrapped && ofw_real_mode) - args = (void *)pmap_kextract((vm_offset_t)args); - #ifdef SMP rv_args.args = args; rv_args.in_progress = 1; smp_rendezvous(smp_no_rendevous_barrier, ofw_rendezvous_dispatch, diff --git a/sys/powerpc/ofw/ofw_real.c b/sys/powerpc/ofw/ofw_real.c index 617f9be..1fc2ed1 100644 --- a/sys/powerpc/ofw/ofw_real.c +++ b/sys/powerpc/ofw/ofw_real.c @@ -205,13 +205,14 @@ ofw_real_bounce_alloc(void *junk) /* * Allocate a page of contiguous, wired physical memory that can - * fit into a 32-bit address space. + * fit into a 32-bit address space and accessed from real mode. */ mtx_lock(&of_bounce_mtx); - of_bounce_virt = contigmalloc(PAGE_SIZE, M_OFWREAL, 0, - 0, BUS_SPACE_MAXADDR_32BIT, PAGE_SIZE, PAGE_SIZE); + of_bounce_virt = contigmalloc(PAGE_SIZE, M_OFWREAL, 0, 0, + ulmin(platform_real_maxaddr(), BUS_SPACE_MAXADDR_32BIT), PAGE_SIZE, + PAGE_SIZE); of_bounce_phys = vtophys(of_bounce_virt); of_bounce_size = PAGE_SIZE; diff --git a/sys/powerpc/ofw/ofwcall32.S b/sys/powerpc/ofw/ofwcall32.S new file mode 100644 index 0000000..06cc105 --- /dev/null +++ b/sys/powerpc/ofw/ofwcall32.S @@ -0,0 +1,154 @@ +/*- + * Copyright (C) 2009-2011 Nathan Whitehorn + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include <sys/syscall.h> + +#include <machine/trap.h> +#include <machine/param.h> +#include <machine/spr.h> +#include <machine/asm.h> + +#define OFWSTKSZ 4096 /* 4K Open Firmware stack */ + +/* + * Globals + */ + .data +GLOBAL(ofmsr) + .long 0, 0, 0, 0, 0 /* msr/sprg0-3 used in Open Firmware */ +GLOBAL(rtasmsr) + .long 0 +GLOBAL(openfirmware_entry) + .long 0 /* Open Firmware entry point */ +GLOBAL(rtas_entry) + .long 0 /* RTAS entry point */ + + .align 4 +ofwstk: + .space OFWSTKSZ +rtas_regsave: + .space 4 + +/* + * Open Firmware Entry Point. May need to enter real mode. + * + * C prototype: int ofwcall(void *callbuffer); + */ + +ASENTRY(ofwcall) + mflr %r0 + stw %r0,4(%r1) + + /* Record the old MSR */ + mfmsr %r6 + + /* read client interface handler */ + lis %r4,openfirmware_entry@ha + lwz %r4,openfirmware_entry@l(%r4) + + /* + * Set the MSR to the OF value. This has the side effect of disabling + * exceptions, which prevents preemption later. + */ + + lis %r5,ofmsr@ha + lwz %r5,ofmsr@l(%r5) + mtmsr %r5 + isync + + /* + * Set up OF stack. This needs to be potentially accessible in real mode + * The pointer to the current kernel stack is placed at the very + * top of the stack along with the old MSR so we can get them back + * later. + */ + mr %r5,%r1 + lis %r1,(ofwstk+OFWSTKSZ-16)@ha + addi %r1,%r1,(ofwstk+OFWSTKSZ-16)@l + stw %r5,8(%r1) /* Save real stack pointer */ + stw %r6,12(%r1) /* Save old MSR */ + li %r5,0 + stw %r5,4(%r1) + stw %r5,0(%r1) + + /* Finally, branch to OF */ + mtctr %r4 + bctrl + + /* Reload stack pointer and MSR from the OFW stack */ + lwz %r6,12(%r1) + lwz %r1,8(%r1) + + /* Now set the real MSR */ + mtmsr %r6 + isync + + /* Return */ + lwz %r0,4(%r1) + mtlr %r0 + blr + +/* + * RTAS Entry Point. Similar to the OF one, but simpler (no separate stack) + * + * C prototype: int rtascall(void *callbuffer, void *rtas_privdat); + */ + +ASENTRY(rtascall) + mflr %r0 + stw %r0,4(%r1) + + /* Record the old MSR to real-mode-accessible area */ + mfmsr %r0 + lis %r5,rtas_regsave@ha + stw %r0,rtas_regsave@l(%r5) + + /* read client interface handler */ + lis %r5,rtas_entry@ha + lwz %r5,rtas_entry@l(%r5) + + /* Set the MSR to the RTAS value */ + lis %r6,rtasmsr@ha + lwz %r6,rtasmsr@l(%r6) + mtmsr %r6 + isync + + /* Branch to RTAS */ + mtctr %r5 + bctrl + + /* Now set the MSR back */ + lis %r6,rtas_regsave@ha + lwz %r6,rtas_regsave@l(%r6) + mtmsr %r6 + isync + + /* And return */ + lwz %r0,4(%r1) + mtlr %r0 + blr + diff --git a/sys/powerpc/ofw/ofwcall64.S b/sys/powerpc/ofw/ofwcall64.S new file mode 100644 index 0000000..1fb78e8 --- /dev/null +++ b/sys/powerpc/ofw/ofwcall64.S @@ -0,0 +1,290 @@ +/*- + * Copyright (C) 2009-2011 Nathan Whitehorn + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include <sys/syscall.h> + +#include <machine/trap.h> +#include <machine/param.h> +#include <machine/spr.h> +#include <machine/asm.h> + +#define OFWSTKSZ 4096 /* 4K Open Firmware stack */ + +/* + * Globals + */ + .data + .align 4 +ofwstk: + .space OFWSTKSZ +rtas_regsave: + .space 24 /* 3 * sizeof(register_t) */ +GLOBAL(ofmsr) + .llong 0, 0, 0, 0, 0 /* msr/sprg0-3 used in Open Firmware */ +GLOBAL(rtasmsr) + .llong 0 +GLOBAL(openfirmware_entry) + .llong 0 /* Open Firmware entry point */ +GLOBAL(rtas_entry) + .llong 0 /* RTAS entry point */ + +/* + * Open Firmware Real-mode Entry Point. This is a huge pain. + */ + +ASENTRY(ofwcall) + mflr %r0 + std %r0,16(%r1) + stdu %r1,-208(%r1) + + /* + * We need to save the following, because OF's register save/ + * restore code assumes that the contents of registers are + * at most 32 bits wide: lr, cr, r2, r13-r31, the old MSR. These + * get placed in that order in the stack. + */ + + mfcr %r4 + std %r4,48(%r1) + std %r13,56(%r1) + std %r14,64(%r1) + std %r15,72(%r1) + std %r16,80(%r1) + std %r17,88(%r1) + std %r18,96(%r1) + std %r19,104(%r1) + std %r20,112(%r1) + std %r21,120(%r1) + std %r22,128(%r1) + std %r23,136(%r1) + std %r24,144(%r1) + std %r25,152(%r1) + std %r26,160(%r1) + std %r27,168(%r1) + std %r28,176(%r1) + std %r29,184(%r1) + std %r30,192(%r1) + std %r31,200(%r1) + + /* Record the old MSR */ + mfmsr %r6 + + /* read client interface handler */ + lis %r4,openfirmware_entry@ha + ld %r4,openfirmware_entry@l(%r4) + + /* + * Set the MSR to the OF value. This has the side effect of disabling + * exceptions, which is important for the next few steps. + */ + + lis %r5,ofmsr@ha + ld %r5,ofmsr@l(%r5) + mtmsrd %r5 + isync + + /* + * Set up OF stack. This needs to be accessible in real mode and + * use the 32-bit ABI stack frame format. The pointer to the current + * kernel stack is placed at the very top of the stack along with + * the old MSR so we can get them back later. + */ + mr %r5,%r1 + lis %r1,(ofwstk+OFWSTKSZ-32)@ha + addi %r1,%r1,(ofwstk+OFWSTKSZ-32)@l + std %r5,8(%r1) /* Save real stack pointer */ + std %r2,16(%r1) /* Save old TOC */ + std %r6,24(%r1) /* Save old MSR */ + li %r5,0 + stw %r5,4(%r1) + stw %r5,0(%r1) + + /* Finally, branch to OF */ + mtctr %r4 + bctrl + + /* Reload stack pointer and MSR from the OFW stack */ + ld %r6,24(%r1) + ld %r2,16(%r1) + ld %r1,8(%r1) + + /* Now set the real MSR */ + mtmsrd %r6 + isync + + /* Sign-extend the return value from OF */ + extsw %r3,%r3 + + /* Restore all the non-volatile registers */ + ld %r5,48(%r1) + mtcr %r5 + ld %r13,56(%r1) + ld %r14,64(%r1) + ld %r15,72(%r1) + ld %r16,80(%r1) + ld %r17,88(%r1) + ld %r18,96(%r1) + ld %r19,104(%r1) + ld %r20,112(%r1) + ld %r21,120(%r1) + ld %r22,128(%r1) + ld %r23,136(%r1) + ld %r24,144(%r1) + ld %r25,152(%r1) + ld %r26,160(%r1) + ld %r27,168(%r1) + ld %r28,176(%r1) + ld %r29,184(%r1) + ld %r30,192(%r1) + ld %r31,200(%r1) + + /* Restore the stack and link register */ + ld %r1,0(%r1) + ld %r0,16(%r1) + mtlr %r0 + blr + +/* + * RTAS 32-bit Entry Point. Similar to the OF one, but simpler (no separate + * stack) + * + * C prototype: int rtascall(void *callbuffer, void *rtas_privdat); + */ + +ASENTRY(rtascall) + mflr %r0 + std %r0,16(%r1) + stdu %r1,-208(%r1) + + /* + * We need to save the following, because RTAS's register save/ + * restore code assumes that the contents of registers are + * at most 32 bits wide: lr, cr, r2, r13-r31, the old MSR. These + * get placed in that order in the stack. + */ + + mfcr %r5 + std %r5,48(%r1) + std %r13,56(%r1) + std %r14,64(%r1) + std %r15,72(%r1) + std %r16,80(%r1) + std %r17,88(%r1) + std %r18,96(%r1) + std %r19,104(%r1) + std %r20,112(%r1) + std %r21,120(%r1) + std %r22,128(%r1) + std %r23,136(%r1) + std %r24,144(%r1) + std %r25,152(%r1) + std %r26,160(%r1) + std %r27,168(%r1) + std %r28,176(%r1) + std %r29,184(%r1) + std %r30,192(%r1) + std %r31,200(%r1) + + /* Record the old MSR */ + mfmsr %r6 + + /* read client interface handler */ + lis %r5,rtas_entry@ha + ld %r5,rtas_entry@l(%r5) + + /* + * Set the MSR to the RTAS value. This has the side effect of disabling + * exceptions, which is important for the next few steps. + */ + + lis %r7,rtasmsr@ha + ld %r7,rtasmsr@l(%r7) + mtmsrd %r7 + isync + + /* + * Set up RTAS register save area, so that we can get back all of + * our 64-bit pointers. Save our stack pointer, the TOC, and the MSR. + * Put this in r1, since RTAS is obliged to save it. Kernel globals + * are below 4 GB, so this is safe. + */ + mr %r7,%r1 + lis %r1,rtas_regsave@ha + addi %r1,%r1,rtas_regsave@l + std %r7,0(%r1) /* Save 64-bit stack pointer */ + std %r2,8(%r1) /* Save TOC */ + std %r6,16(%r1) /* Save MSR */ + + /* Finally, branch to RTAS */ + mtctr %r5 + bctrl + + /* + * Reload stack pointer and MSR from the reg save area in r1. We are + * running in 32-bit mode at this point, so it doesn't matter if r1 + * has become sign-extended. + */ + ld %r6,16(%r1) + ld %r2,8(%r1) + ld %r1,0(%r1) + + /* Now set the real MSR */ + mtmsrd %r6 + isync + + /* Sign-extend the return value from RTAS */ + extsw %r3,%r3 + + /* Restore all the non-volatile registers */ + ld %r5,48(%r1) + mtcr %r5 + ld %r13,56(%r1) + ld %r14,64(%r1) + ld %r15,72(%r1) + ld %r16,80(%r1) + ld %r17,88(%r1) + ld %r18,96(%r1) + ld %r19,104(%r1) + ld %r20,112(%r1) + ld %r21,120(%r1) + ld %r22,128(%r1) + ld %r23,136(%r1) + ld %r24,144(%r1) + ld %r25,152(%r1) + ld %r26,160(%r1) + ld %r27,168(%r1) + ld %r28,176(%r1) + ld %r29,184(%r1) + ld %r30,192(%r1) + ld %r31,200(%r1) + + /* Restore the stack and link register */ + ld %r1,0(%r1) + ld %r0,16(%r1) + mtlr %r0 + blr + diff --git a/sys/powerpc/aim/ofwmagic.S b/sys/powerpc/ofw/ofwmagic.S index f44f1e5..f44f1e5 100644 --- a/sys/powerpc/aim/ofwmagic.S +++ b/sys/powerpc/ofw/ofwmagic.S diff --git a/sys/powerpc/ofw/rtas.c b/sys/powerpc/ofw/rtas.c new file mode 100644 index 0000000..59692c9 --- /dev/null +++ b/sys/powerpc/ofw/rtas.c @@ -0,0 +1,243 @@ +/*- + * Copyright (c) 2011 Nathan Whitehorn + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/systm.h> + +#include <vm/vm.h> +#include <vm/vm_page.h> +#include <vm/pmap.h> + +#include <machine/bus.h> +#include <machine/md_var.h> +#include <machine/pmap.h> +#include <machine/rtas.h> +#include <machine/stdarg.h> + +#include <dev/ofw/openfirm.h> + +MALLOC_DEFINE(M_RTAS, "rtas", "Run Time Abstraction Service"); + +static vm_offset_t rtas_bounce_phys; +static caddr_t rtas_bounce_virt; +static off_t rtas_bounce_offset; +static size_t rtas_bounce_size; +static uintptr_t rtas_private_data; +static struct mtx rtas_mtx; +static phandle_t rtas; + +/* From ofwcall.S */ +int rtascall(vm_offset_t callbuffer, uintptr_t rtas_privdat); +extern uintptr_t rtas_entry; +extern register_t rtasmsr; + +/* + * After the VM is up, allocate RTAS memory and instantiate it + */ + +static void rtas_setup(void *); + +SYSINIT(rtas_setup, SI_SUB_KMEM, SI_ORDER_ANY, rtas_setup, NULL); + +static void +rtas_setup(void *junk) +{ + ihandle_t rtasi; + cell_t rtas_size = 0, rtas_ptr; + char path[31]; + int result; + + rtas = OF_finddevice("/rtas"); + if (rtas == -1) { + rtas = 0; + return; + } + OF_package_to_path(rtas, path, sizeof(path)); + rtasi = OF_open(path); + if (rtasi == 0) { + rtas = 0; + printf("Error initializing RTAS: could not open node\n"); + return; + } + + mtx_init(&rtas_mtx, "RTAS", MTX_DEF, 0); + + /* RTAS must be called with everything turned off in MSR */ + rtasmsr = mfmsr(); + rtasmsr &= ~(PSL_IR | PSL_DR | PSL_EE | PSL_SE); + #ifdef __powerpc64__ + rtasmsr &= ~PSL_SF; + #endif + + /* + * Allocate rtas_size + one page of contiguous, wired physical memory + * that can fit into a 32-bit address space and accessed from real mode. + * This is used both to bounce arguments and for RTAS private data. + * + * It must be 4KB-aligned and not cross a 256 MB boundary. + */ + + OF_getprop(rtas, "rtas-size", &rtas_size, sizeof(rtas_size)); + rtas_size = round_page(rtas_size); + rtas_bounce_virt = contigmalloc(rtas_size + PAGE_SIZE, M_RTAS, 0, 0, + ulmin(platform_real_maxaddr(), BUS_SPACE_MAXADDR_32BIT), + 4096, 256*1024*1024); + + rtas_private_data = vtophys(rtas_bounce_virt); + rtas_bounce_virt += rtas_size; /* Actual bounce area */ + rtas_bounce_phys = vtophys(rtas_bounce_virt); + rtas_bounce_size = PAGE_SIZE; + + /* + * Instantiate RTAS. We always use the 32-bit version. + */ + + result = OF_call_method("instantiate-rtas", rtasi, 1, 1, + (cell_t)rtas_private_data, &rtas_ptr); + OF_close(rtasi); + + if (result != 0) { + rtas = 0; + rtas_ptr = 0; + printf("Error initializing RTAS (%d)\n", result); + return; + } + + rtas_entry = (uintptr_t)(rtas_ptr); +} + +static cell_t +rtas_real_map(const void *buf, size_t len) +{ + cell_t phys; + + mtx_assert(&rtas_mtx, MA_OWNED); + + /* + * Make sure the bounce page offset satisfies any reasonable + * alignment constraint. + */ + rtas_bounce_offset += sizeof(register_t) - + (rtas_bounce_offset % sizeof(register_t)); + + if (rtas_bounce_offset + len > rtas_bounce_size) { + panic("Oversize RTAS call!"); + return 0; + } + + if (buf != NULL) + memcpy(rtas_bounce_virt + rtas_bounce_offset, buf, len); + else + return (0); + + phys = rtas_bounce_phys + rtas_bounce_offset; + rtas_bounce_offset += len; + + return (phys); +} + +static void +rtas_real_unmap(cell_t physaddr, void *buf, size_t len) +{ + mtx_assert(&rtas_mtx, MA_OWNED); + + if (physaddr == 0) + return; + + memcpy(buf, rtas_bounce_virt + (physaddr - rtas_bounce_phys), len); +} + +/* Check if we have RTAS */ +int +rtas_exists(void) +{ + return (rtas != 0); +} + +/* Call an RTAS method by token */ +int +rtas_call_method(cell_t token, int nargs, int nreturns, ...) +{ + vm_offset_t argsptr; + va_list ap; + struct { + cell_t token; + cell_t nargs; + cell_t nreturns; + cell_t args_n_results[12]; + } args; + int n, result; + + if (!rtas_exists() || nargs + nreturns > 12) + return (-1); + + args.token = token; + va_start(ap, nreturns); + + mtx_lock(&rtas_mtx); + rtas_bounce_offset = 0; + + args.nargs = nargs; + args.nreturns = nreturns; + + for (n = 0; n < nargs; n++) + args.args_n_results[n] = va_arg(ap, cell_t); + + argsptr = rtas_real_map(&args, sizeof(args)); + result = rtascall(argsptr, rtas_private_data); + rtas_real_unmap(argsptr, &args, sizeof(args)); + mtx_unlock(&rtas_mtx); + + if (result < 0) + return (result); + + for (n = nargs; n < nargs + nreturns; n++) + *va_arg(ap, cell_t *) = args.args_n_results[n]; + return (result); +} + +/* Look up an RTAS token */ +cell_t +rtas_token_lookup(const char *method) +{ + cell_t token; + + if (!rtas_exists()) + return (-1); + + if (OF_getprop(rtas, method, &token, sizeof(token)) == -1) + return (-1); + + return (token); +} + + diff --git a/sys/powerpc/powermac/fcu.c b/sys/powerpc/powermac/fcu.c index 004b4db..7ac9b1b 100644 --- a/sys/powerpc/powermac/fcu.c +++ b/sys/powerpc/powermac/fcu.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include <dev/ofw/openfirm.h> #include <dev/ofw/ofw_bus.h> +#include <powerpc/powermac/powermac_thermal.h> /* FCU registers * /u3@0,f8000000/i2c@f8001000/fan@15e @@ -66,10 +67,10 @@ __FBSDID("$FreeBSD$"); #define FCU_PWM_SGET(x) 0x30 + (x) * 2 /* Set or get PWM. */ struct fcu_fan { + struct pmac_fan fan; + device_t dev; + int id; - cell_t min; - cell_t max; - char location[32]; enum { FCU_FAN_RPM, FCU_FAN_PWM @@ -103,9 +104,9 @@ static int fcu_attach(device_t); /* Utility functions */ static void fcu_attach_fans(device_t dev); static int fcu_fill_fan_prop(device_t dev); -static int fcu_fan_set_rpm(device_t dev, struct fcu_fan *fan, int rpm); -static int fcu_fan_get_rpm(device_t dev, struct fcu_fan *fan, int *rpm); -static int fcu_fan_set_pwm(device_t dev, struct fcu_fan *fan, int pwm); +static int fcu_fan_set_rpm(struct fcu_fan *fan, int rpm); +static int fcu_fan_get_rpm(struct fcu_fan *fan); +static int fcu_fan_set_pwm(struct fcu_fan *fan, int pwm); static int fcu_fan_get_pwm(device_t dev, struct fcu_fan *fan, int *pwm, int *rpm); static int fcu_fanrpm_sysctl(SYSCTL_HANDLER_ARGS); @@ -137,6 +138,8 @@ fcu_write(device_t dev, uint32_t addr, uint8_t reg, uint8_t *buff, int len) { unsigned char buf[4]; + int try = 0; + struct iic_msg msg[] = { { addr, IIC_M_WR, 0, buf } }; @@ -144,33 +147,46 @@ fcu_write(device_t dev, uint32_t addr, uint8_t reg, uint8_t *buff, msg[0].len = len + 1; buf[0] = reg; memcpy(buf + 1, buff, len); - if (iicbus_transfer(dev, msg, 1) != 0) { - device_printf(dev, "iicbus write failed\n"); - return (EIO); - } - return (0); + for (;;) + { + if (iicbus_transfer(dev, msg, 1) == 0) + return (0); + if (++try > 5) { + device_printf(dev, "iicbus write failed\n"); + return (-1); + } + pause("fcu_write", hz); + } } static int fcu_read_1(device_t dev, uint32_t addr, uint8_t reg, uint8_t *data) { uint8_t buf[4]; + int err, try = 0; struct iic_msg msg[2] = { { addr, IIC_M_WR | IIC_M_NOSTOP, 1, ® }, { addr, IIC_M_RD, 1, buf }, }; - if (iicbus_transfer(dev, msg, 2) != 0) { - device_printf(dev, "iicbus read failed\n"); - return (EIO); + for (;;) + { + err = iicbus_transfer(dev, msg, 2); + if (err != 0) + goto retry; + + *data = *((uint8_t*)buf); + return (0); + retry: + if (++try > 5) { + device_printf(dev, "iicbus read failed\n"); + return (-1); + } + pause("fcu_read_1", hz); } - - *data = *((uint8_t*)buf); - - return (0); } static int @@ -249,95 +265,102 @@ fcu_start(void *xdev) } static int -fcu_fan_set_rpm(device_t dev, struct fcu_fan *fan, int rpm) +fcu_fan_set_rpm(struct fcu_fan *fan, int rpm) { uint8_t reg; struct fcu_softc *sc; unsigned char buf[2]; - sc = device_get_softc(dev); + sc = device_get_softc(fan->dev); /* Clamp to allowed range */ - rpm = max(fan->min, rpm); - rpm = min(fan->max, rpm); + rpm = max(fan->fan.min_rpm, rpm); + rpm = min(fan->fan.max_rpm, rpm); if (fan->type == FCU_FAN_RPM) { reg = FCU_RPM_SET(fan->id); fan->setpoint = rpm; } else { - device_printf(dev, "Unknown fan type: %d\n", fan->type); - return (EIO); + device_printf(fan->dev, "Unknown fan type: %d\n", fan->type); + return (-1); } buf[0] = rpm >> (8 - fcu_rpm_shift); buf[1] = rpm << fcu_rpm_shift; - fcu_write(sc->sc_dev, sc->sc_addr, reg, buf, 2); + if (fcu_write(sc->sc_dev, sc->sc_addr, reg, buf, 2) < 0) + return (-1); return (0); } static int -fcu_fan_get_rpm(device_t dev, struct fcu_fan *fan, int *rpm) +fcu_fan_get_rpm(struct fcu_fan *fan) { uint8_t reg; struct fcu_softc *sc; uint8_t buff[2] = { 0, 0 }; uint8_t active = 0, avail = 0, fail = 0; + int rpm; - sc = device_get_softc(dev); + sc = device_get_softc(fan->dev); if (fan->type == FCU_FAN_RPM) { /* Check if the fan is available. */ reg = FCU_RPM_AVAILABLE; - fcu_read_1(sc->sc_dev, sc->sc_addr, reg, &avail); + if (fcu_read_1(sc->sc_dev, sc->sc_addr, reg, &avail) < 0) + return (-1); if ((avail & (1 << fan->id)) == 0) { - device_printf(dev, "RPM Fan not available ID: %d\n", - fan->id); - return (EIO); + device_printf(fan->dev, + "RPM Fan not available ID: %d\n", fan->id); + return (-1); } /* Check if we have a failed fan. */ reg = FCU_RPM_FAIL; - fcu_read_1(sc->sc_dev, sc->sc_addr, reg, &fail); + if (fcu_read_1(sc->sc_dev, sc->sc_addr, reg, &fail) < 0) + return (-1); if ((fail & (1 << fan->id)) != 0) { - device_printf(dev, "RPM Fan failed ID: %d\n", fan->id); - return (EIO); + device_printf(fan->dev, + "RPM Fan failed ID: %d\n", fan->id); + return (-1); } /* Check if fan is active. */ reg = FCU_RPM_ACTIVE; - fcu_read_1(sc->sc_dev, sc->sc_addr, reg, &active); + if (fcu_read_1(sc->sc_dev, sc->sc_addr, reg, &active) < 0) + return (-1); if ((active & (1 << fan->id)) == 0) { - device_printf(dev, "RPM Fan not active ID: %d\n", + device_printf(fan->dev, "RPM Fan not active ID: %d\n", fan->id); - return (ENXIO); + return (-1); } reg = FCU_RPM_READ(fan->id); } else { - device_printf(dev, "Unknown fan type: %d\n", fan->type); - return (EIO); + device_printf(fan->dev, "Unknown fan type: %d\n", fan->type); + return (-1); } /* It seems that we can read the fans rpm. */ - fcu_read_1(sc->sc_dev, sc->sc_addr, reg, buff); + if (fcu_read_1(sc->sc_dev, sc->sc_addr, reg, buff) < 0) + return (-1); - *rpm = (buff[0] << (8 - fcu_rpm_shift)) | buff[1] >> fcu_rpm_shift; + rpm = (buff[0] << (8 - fcu_rpm_shift)) | buff[1] >> fcu_rpm_shift; - return (0); + return (rpm); } static int -fcu_fan_set_pwm(device_t dev, struct fcu_fan *fan, int pwm) +fcu_fan_set_pwm(struct fcu_fan *fan, int pwm) { uint8_t reg; struct fcu_softc *sc; uint8_t buf[2]; - sc = device_get_softc(dev); + sc = device_get_softc(fan->dev); /* Clamp to allowed range */ - pwm = max(fan->min, pwm); - pwm = min(fan->max, pwm); + pwm = max(fan->fan.min_rpm, pwm); + pwm = min(fan->fan.max_rpm, pwm); if (fan->type == FCU_FAN_PWM) { reg = FCU_PWM_SGET(fan->id); @@ -347,14 +370,14 @@ fcu_fan_set_pwm(device_t dev, struct fcu_fan *fan, int pwm) pwm = 30; fan->setpoint = pwm; } else { - device_printf(dev, "Unknown fan type: %d\n", fan->type); + device_printf(fan->dev, "Unknown fan type: %d\n", fan->type); return (EIO); } buf[0] = (pwm * 2550) / 1000; - fcu_write(sc->sc_dev, sc->sc_addr, reg, buf, 1); - + if (fcu_write(sc->sc_dev, sc->sc_addr, reg, buf, 1) < 0) + return (-1); return (0); } @@ -371,26 +394,29 @@ fcu_fan_get_pwm(device_t dev, struct fcu_fan *fan, int *pwm, int *rpm) if (fan->type == FCU_FAN_PWM) { /* Check if the fan is available. */ reg = FCU_PWM_AVAILABLE; - fcu_read_1(sc->sc_dev, sc->sc_addr, reg, &avail); + if (fcu_read_1(sc->sc_dev, sc->sc_addr, reg, &avail) < 0) + return (-1); if ((avail & (1 << fan->id)) == 0) { device_printf(dev, "PWM Fan not available ID: %d\n", fan->id); - return (EIO); + return (-1); } /* Check if we have a failed fan. */ reg = FCU_PWM_FAIL; - fcu_read_1(sc->sc_dev, sc->sc_addr, reg, &fail); + if (fcu_read_1(sc->sc_dev, sc->sc_addr, reg, &fail) < 0) + return (-1); if ((fail & (1 << fan->id)) != 0) { device_printf(dev, "PWM Fan failed ID: %d\n", fan->id); - return (EIO); + return (-1); } /* Check if fan is active. */ reg = FCU_PWM_ACTIVE; - fcu_read_1(sc->sc_dev, sc->sc_addr, reg, &active); + if (fcu_read_1(sc->sc_dev, sc->sc_addr, reg, &active) < 0) + return (-1); if ((active & (1 << fan->id)) == 0) { device_printf(dev, "PWM Fan not active ID: %d\n", fan->id); - return (ENXIO); + return (-1); } reg = FCU_PWM_SGET(fan->id); } else { @@ -399,13 +425,16 @@ fcu_fan_get_pwm(device_t dev, struct fcu_fan *fan, int *pwm, int *rpm) } /* It seems that we can read the fans pwm. */ - fcu_read_1(sc->sc_dev, sc->sc_addr, reg, buf); + if (fcu_read_1(sc->sc_dev, sc->sc_addr, reg, buf) < 0) + return (-1); *pwm = (buf[0] * 1000) / 2550; /* Now read the rpm. */ reg = FCU_PWM_RPM(fan->id); - fcu_read_1(sc->sc_dev, sc->sc_addr, reg, buf); + if (fcu_read_1(sc->sc_dev, sc->sc_addr, reg, buf) < 0) + return (-1); + *rpm = (buf[0] << (8 - fcu_rpm_shift)) | buf[1] >> fcu_rpm_shift; return (0); @@ -434,7 +463,7 @@ fcu_fill_fan_prop(device_t dev) sizeof(location)); while (len < prop_len) { if (sc->sc_fans != NULL) { - strcpy(sc->sc_fans[i].location, location + len); + strcpy(sc->sc_fans[i].fan.name, location + len); } prev_len = strlen(location + len) + 1; len += prev_len; @@ -463,6 +492,33 @@ fcu_fill_fan_prop(device_t dev) for (j = 0; j < i; j++) sc->sc_fans[j].id = ((id[j] >> 8) & 0x0f) % 8; + /* Fill the fan zone property. */ + prop_len = OF_getprop(child, "hwctrl-zone", id, sizeof(id)); + for (j = 0; j < i; j++) + sc->sc_fans[j].fan.zone = id[j]; + + /* Finish setting up fan properties */ + for (j = 0; j < i; j++) { + sc->sc_fans[j].dev = sc->sc_dev; + if (sc->sc_fans[j].type == FCU_FAN_RPM) { + sc->sc_fans[j].fan.min_rpm = 4800 >> fcu_rpm_shift; + sc->sc_fans[j].fan.max_rpm = 56000 >> fcu_rpm_shift; + sc->sc_fans[j].setpoint = + fcu_fan_get_rpm(&sc->sc_fans[j]); + sc->sc_fans[j].fan.read = + (int (*)(struct pmac_fan *))(fcu_fan_get_rpm); + sc->sc_fans[j].fan.set = + (int (*)(struct pmac_fan *, int))(fcu_fan_set_rpm); + } else { + sc->sc_fans[j].fan.min_rpm = 40; /* Percent */ + sc->sc_fans[j].fan.max_rpm = 100; + sc->sc_fans[j].fan.read = NULL; + sc->sc_fans[j].fan.set = + (int (*)(struct pmac_fan *, int))(fcu_fan_set_pwm); + } + sc->sc_fans[j].fan.default_rpm = sc->sc_fans[j].fan.max_rpm; + } + return (i); } @@ -472,16 +528,20 @@ fcu_fanrpm_sysctl(SYSCTL_HANDLER_ARGS) device_t fcu; struct fcu_softc *sc; struct fcu_fan *fan; - int rpm = 0, pwm = 0, error; + int rpm = 0, pwm = 0, error = 0; fcu = arg1; sc = device_get_softc(fcu); fan = &sc->sc_fans[arg2 & 0x00ff]; if (fan->type == FCU_FAN_RPM) { - fcu_fan_get_rpm(fcu, fan, &rpm); + rpm = fcu_fan_get_rpm(fan); + if (rpm < 0) + return (-1); error = sysctl_handle_int(oidp, &rpm, 0, req); } else { - fcu_fan_get_pwm(fcu, fan, &pwm, &rpm); + error = fcu_fan_get_pwm(fcu, fan, &pwm, &rpm); + if (error < 0) + return (-1); switch (arg2 & 0xff00) { case FCU_PWM_SYSCTL_PWM: @@ -504,9 +564,9 @@ fcu_fanrpm_sysctl(SYSCTL_HANDLER_ARGS) return (error); if (fan->type == FCU_FAN_RPM) - return (fcu_fan_set_rpm(fcu, fan, rpm)); + return (fcu_fan_set_rpm(fan, rpm)); else - return (fcu_fan_set_pwm(fcu, fan, pwm)); + return (fcu_fan_set_pwm(fan, pwm)); } static void @@ -543,39 +603,36 @@ fcu_attach_fans(device_t dev) /* Now we can fill the properties into the allocated struct. */ sc->sc_nfans = fcu_fill_fan_prop(dev); + /* Register fans with pmac_thermal */ + for (i = 0; i < sc->sc_nfans; i++) + pmac_thermal_fan_register(&sc->sc_fans[i].fan); + /* Add sysctls for the fans. */ for (i = 0; i < sc->sc_nfans; i++) { - for (j = 0; j < strlen(sc->sc_fans[i].location); j++) { - sysctl_name[j] = tolower(sc->sc_fans[i].location[j]); + for (j = 0; j < strlen(sc->sc_fans[i].fan.name); j++) { + sysctl_name[j] = tolower(sc->sc_fans[i].fan.name[j]); if (isspace(sysctl_name[j])) sysctl_name[j] = '_'; } sysctl_name[j] = 0; if (sc->sc_fans[i].type == FCU_FAN_RPM) { - sc->sc_fans[i].min = 2400 >> fcu_rpm_shift; - sc->sc_fans[i].max = 56000 >> fcu_rpm_shift; - fcu_fan_get_rpm(dev, &sc->sc_fans[i], - &sc->sc_fans[i].setpoint); - oid = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(fanroot_oid), OID_AUTO, sysctl_name, CTLFLAG_RD, 0, "Fan Information"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "minrpm", CTLTYPE_INT | CTLFLAG_RD, - &(sc->sc_fans[i].min), sizeof(cell_t), - "Minimum allowed RPM"); + &(sc->sc_fans[i].fan.min_rpm), + sizeof(int), "Minimum allowed RPM"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "maxrpm", CTLTYPE_INT | CTLFLAG_RD, - &(sc->sc_fans[i].max), sizeof(cell_t), - "Maximum allowed RPM"); + &(sc->sc_fans[i].fan.max_rpm), + sizeof(int), "Maximum allowed RPM"); /* I use i to pass the fan id. */ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "rpm", CTLTYPE_INT | CTLFLAG_RW, dev, i, fcu_fanrpm_sysctl, "I", "Fan RPM"); } else { - sc->sc_fans[i].min = 30; - sc->sc_fans[i].max = 100; fcu_fan_get_pwm(dev, &sc->sc_fans[i], &sc->sc_fans[i].setpoint, &sc->sc_fans[i].rpm); @@ -585,12 +642,12 @@ fcu_attach_fans(device_t dev) CTLFLAG_RD, 0, "Fan Information"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "minpwm", CTLTYPE_INT | CTLFLAG_RD, - &(sc->sc_fans[i].min), sizeof(cell_t), - "Minimum allowed PWM in %"); + &(sc->sc_fans[i].fan.min_rpm), + sizeof(int), "Minimum allowed PWM in %"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "maxpwm", CTLTYPE_INT | CTLFLAG_RD, - &(sc->sc_fans[i].max), sizeof(cell_t), - "Maximum allowed PWM in %"); + &(sc->sc_fans[i].fan.max_rpm), + sizeof(int), "Maximum allowed PWM in %"); /* I use i to pass the fan id or'ed with the type * of info I want to display/modify. */ @@ -610,7 +667,7 @@ fcu_attach_fans(device_t dev) device_printf(dev, "Fans\n"); for (i = 0; i < sc->sc_nfans; i++) { device_printf(dev, "Location: %s type: %d ID: %d " - "RPM: %d\n", sc->sc_fans[i].location, + "RPM: %d\n", sc->sc_fans[i].fan.name, sc->sc_fans[i].type, sc->sc_fans[i].id, (sc->sc_fans[i].type == FCU_FAN_RPM) ? sc->sc_fans[i].setpoint : diff --git a/sys/powerpc/powermac/powermac_thermal.c b/sys/powerpc/powermac/powermac_thermal.c new file mode 100644 index 0000000..9c1f59d --- /dev/null +++ b/sys/powerpc/powermac/powermac_thermal.c @@ -0,0 +1,183 @@ +/*- + * Copyright (c) 2009-2011 Nathan Whitehorn + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/systm.h> + +#include <sys/types.h> +#include <sys/kthread.h> +#include <sys/malloc.h> +#include <sys/reboot.h> +#include <sys/sysctl.h> +#include <sys/queue.h> + +#include "powermac_thermal.h" + +static void fan_management_proc(void); +static void pmac_therm_manage_fans(void); + +static struct proc *pmac_them_proc; +static int enable_pmac_thermal = 1; + +static struct kproc_desc pmac_therm_kp = { + "pmac_thermal", + fan_management_proc, + &pmac_them_proc +}; + +SYSINIT(pmac_therm_setup, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, + &pmac_therm_kp); +SYSCTL_INT(_machdep, OID_AUTO, manage_fans, CTLFLAG_RW | CTLFLAG_TUN, + &enable_pmac_thermal, 1, "Enable automatic fan management"); +MALLOC_DEFINE(M_PMACTHERM, "pmactherm", "Powermac Thermal Management"); + +struct pmac_fan_le { + struct pmac_fan *fan; + int last_val; + SLIST_ENTRY(pmac_fan_le) entries; +}; +struct pmac_sens_le { + struct pmac_therm *sensor; + int last_val; + SLIST_ENTRY(pmac_sens_le) entries; +}; +static SLIST_HEAD(pmac_fans, pmac_fan_le) fans = SLIST_HEAD_INITIALIZER(fans); +static SLIST_HEAD(pmac_sensors, pmac_sens_le) sensors = + SLIST_HEAD_INITIALIZER(sensors); + +static void +fan_management_proc(void) +{ + /* Nothing to manage? */ + if (SLIST_EMPTY(&fans)) + kproc_exit(0); + + while (1) { + pmac_therm_manage_fans(); + pause("pmac_therm", hz); + } +} + +static void +pmac_therm_manage_fans(void) +{ + struct pmac_sens_le *sensor; + struct pmac_fan_le *fan; + int average_excess, max_excess_zone, frac_excess; + int nsens, nsens_zone; + int temp; + + if (!enable_pmac_thermal) + return; + + /* Read all the sensors */ + SLIST_FOREACH(sensor, &sensors, entries) { + temp = sensor->sensor->read(sensor->sensor); + if (temp > 0) /* Use the previous temp in case of error */ + sensor->last_val = temp; + + if (sensor->last_val > sensor->sensor->max_temp) { + printf("WARNING: Current temperature (%s: %d.%d C) " + "exceeds critical temperature (%d.%d C)! " + "Shutting down!\n", sensor->sensor->name, + (sensor->last_val - ZERO_C_TO_K) / 10, + (sensor->last_val - ZERO_C_TO_K) % 10, + (sensor->sensor->max_temp - ZERO_C_TO_K) / 10, + (sensor->sensor->max_temp - ZERO_C_TO_K) % 10); + shutdown_nice(RB_POWEROFF); + } + } + + /* Set all the fans */ + SLIST_FOREACH(fan, &fans, entries) { + nsens = nsens_zone = 0; + average_excess = max_excess_zone = 0; + SLIST_FOREACH(sensor, &sensors, entries) { + frac_excess = (sensor->last_val - + sensor->sensor->target_temp)*100 / + (sensor->sensor->max_temp - + sensor->sensor->target_temp); + if (frac_excess < 0) + frac_excess = 0; + if (sensor->sensor->zone == fan->fan->zone) { + max_excess_zone = imax(max_excess_zone, + frac_excess); + nsens_zone++; + } + average_excess += frac_excess; + nsens++; + } + average_excess /= nsens; + + /* If there are no sensors in this zone, use the average */ + if (nsens_zone == 0) + max_excess_zone = average_excess; + /* No sensors at all? Use default */ + if (nsens == 0) { + fan->fan->set(fan->fan, fan->fan->default_rpm); + continue; + } + + /* + * Scale the fan linearly in the max temperature in its + * thermal zone. + */ + fan->fan->set(fan->fan, max_excess_zone * + (fan->fan->max_rpm - fan->fan->min_rpm)/100 + + fan->fan->min_rpm); + } +} + +void +pmac_thermal_fan_register(struct pmac_fan *fan) +{ + struct pmac_fan_le *list_entry; + + list_entry = malloc(sizeof(struct pmac_fan_le), M_PMACTHERM, + M_ZERO | M_WAITOK); + list_entry->fan = fan; + + SLIST_INSERT_HEAD(&fans, list_entry, entries); +} + +void +pmac_thermal_sensor_register(struct pmac_therm *sensor) +{ + struct pmac_sens_le *list_entry; + + list_entry = malloc(sizeof(struct pmac_sens_le), M_PMACTHERM, + M_ZERO | M_WAITOK); + list_entry->sensor = sensor; + + SLIST_INSERT_HEAD(&sensors, list_entry, entries); +} + diff --git a/sys/powerpc/powermac/powermac_thermal.h b/sys/powerpc/powermac/powermac_thermal.h new file mode 100644 index 0000000..424c612 --- /dev/null +++ b/sys/powerpc/powermac/powermac_thermal.h @@ -0,0 +1,56 @@ +/*- + * Copyright (c) 2009-2011 Nathan Whitehorn + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _POWERPC_POWERMAC_POWERMAC_THERMAL_H +#define _POWERPC_POWERMAC_POWERMAC_THERMAL_H + +#define ZERO_C_TO_K 2732 + +struct pmac_fan { + int min_rpm, max_rpm, default_rpm; + + char name[32]; + int zone; + + int (*read)(struct pmac_fan *); + int (*set)(struct pmac_fan *, int value); +}; + +struct pmac_therm { + int target_temp, max_temp; /* Tenths of a degree K */ + + char name[32]; + int zone; + + int (*read)(struct pmac_therm *); +}; + +void pmac_thermal_fan_register(struct pmac_fan *); +void pmac_thermal_sensor_register(struct pmac_therm *); + +#endif diff --git a/sys/powerpc/powermac/smu.c b/sys/powerpc/powermac/smu.c index 928472c..ede97a1 100644 --- a/sys/powerpc/powermac/smu.c +++ b/sys/powerpc/powermac/smu.c @@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$"); #include <dev/ofw/ofw_bus.h> #include <dev/ofw/ofw_bus_subr.h> #include <powerpc/powermac/macgpiovar.h> +#include <powerpc/powermac/powermac_thermal.h> #include "clock_if.h" #include "iicbus_if.h" @@ -69,19 +70,19 @@ struct smu_cmd { STAILQ_HEAD(smu_cmdq, smu_cmd); struct smu_fan { + struct pmac_fan fan; + device_t dev; cell_t reg; - cell_t min_rpm; - cell_t max_rpm; - cell_t unmanaged_rpm; - char location[32]; int old_style; int setpoint; }; struct smu_sensor { + struct pmac_therm therm; + device_t dev; + cell_t reg; - char location[32]; enum { SMU_CURRENT_SENSOR, SMU_VOLTAGE_SENSOR, @@ -131,10 +132,6 @@ struct smu_softc { uint16_t sc_slots_pow_scale; int16_t sc_slots_pow_offset; - /* Thermal management parameters */ - int sc_target_temp; /* Default 55 C */ - int sc_critical_temp; /* Default 90 C */ - struct cdev *sc_leddev; }; @@ -161,8 +158,6 @@ static int smu_get_datablock(device_t dev, int8_t id, uint8_t *buf, static void smu_attach_i2c(device_t dev, phandle_t i2croot); static void smu_attach_fans(device_t dev, phandle_t fanroot); static void smu_attach_sensors(device_t dev, phandle_t sensroot); -static void smu_fan_management_proc(void *xdev); -static void smu_manage_fans(device_t smu); static void smu_set_sleepled(void *xdev, int onoff); static int smu_server_mode(SYSCTL_HANDLER_ARGS); static void smu_doorbell_intr(void *xdev); @@ -349,24 +344,6 @@ smu_attach(device_t dev) sc->sc_slots_pow_offset = (data[6] << 8) + data[7]; /* - * Set up simple-minded thermal management. - */ - sc->sc_target_temp = 55; - sc->sc_critical_temp = 90; - - SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, - "target_temp", CTLTYPE_INT | CTLFLAG_RW, &sc->sc_target_temp, - sizeof(int), "Target temperature (C)"); - SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, - "critical_temp", CTLTYPE_INT | CTLFLAG_RW, - &sc->sc_critical_temp, sizeof(int), "Critical temperature (C)"); - - kproc_create(smu_fan_management_proc, dev, &sc->sc_fanmgt_proc, - RFHIGHPID, 0, "smu_thermal"); - - /* * Set up LED interface */ sc->sc_leddev = led_create(smu_set_sleepled, dev, "sleepled"); @@ -658,8 +635,9 @@ doorbell_attach(device_t dev) */ static int -smu_fan_set_rpm(device_t smu, struct smu_fan *fan, int rpm) +smu_fan_set_rpm(struct smu_fan *fan, int rpm) { + device_t smu = fan->dev; struct smu_cmd cmd; int error; @@ -667,8 +645,8 @@ smu_fan_set_rpm(device_t smu, struct smu_fan *fan, int rpm) error = EIO; /* Clamp to allowed range */ - rpm = max(fan->min_rpm, rpm); - rpm = min(fan->max_rpm, rpm); + rpm = max(fan->fan.min_rpm, rpm); + rpm = min(fan->fan.max_rpm, rpm); /* * Apple has two fan control mechanisms. We can't distinguish @@ -684,7 +662,7 @@ smu_fan_set_rpm(device_t smu, struct smu_fan *fan, int rpm) cmd.data[3] = rpm & 0xff; error = smu_run_cmd(smu, &cmd, 1); - if (error) + if (error && error != EWOULDBLOCK) fan->old_style = 1; } @@ -704,8 +682,9 @@ smu_fan_set_rpm(device_t smu, struct smu_fan *fan, int rpm) } static int -smu_fan_read_rpm(device_t smu, struct smu_fan *fan) +smu_fan_read_rpm(struct smu_fan *fan) { + device_t smu = fan->dev; struct smu_cmd cmd; int rpm, error; @@ -716,7 +695,7 @@ smu_fan_read_rpm(device_t smu, struct smu_fan *fan) cmd.data[1] = fan->reg; error = smu_run_cmd(smu, &cmd, 1); - if (error) + if (error && error != EWOULDBLOCK) fan->old_style = 1; rpm = (cmd.data[0] << 8) | cmd.data[1]; @@ -749,7 +728,7 @@ smu_fanrpm_sysctl(SYSCTL_HANDLER_ARGS) sc = device_get_softc(smu); fan = &sc->sc_fans[arg2]; - rpm = smu_fan_read_rpm(smu, fan); + rpm = smu_fan_read_rpm(fan); if (rpm < 0) return (rpm); @@ -760,7 +739,7 @@ smu_fanrpm_sysctl(SYSCTL_HANDLER_ARGS) sc->sc_lastuserchange = time_uptime; - return (smu_fan_set_rpm(smu, fan, rpm)); + return (smu_fan_set_rpm(fan, rpm)); } static void @@ -801,23 +780,25 @@ smu_attach_fans(device_t dev, phandle_t fanroot) if (strcmp(type, "fan-rpm-control") != 0) continue; + fan->dev = dev; fan->old_style = 0; OF_getprop(child, "reg", &fan->reg, sizeof(cell_t)); - OF_getprop(child, "min-value", &fan->min_rpm, sizeof(cell_t)); - OF_getprop(child, "max-value", &fan->max_rpm, sizeof(cell_t)); + OF_getprop(child, "min-value", &fan->fan.min_rpm, sizeof(int)); + OF_getprop(child, "max-value", &fan->fan.max_rpm, sizeof(int)); + OF_getprop(child, "zone", &fan->fan.zone, sizeof(int)); - if (OF_getprop(child, "unmanaged-value", &fan->unmanaged_rpm, - sizeof(cell_t)) != sizeof(cell_t)) - fan->unmanaged_rpm = fan->max_rpm; + if (OF_getprop(child, "unmanaged-value", &fan->fan.default_rpm, + sizeof(int)) != sizeof(int)) + fan->fan.default_rpm = fan->fan.max_rpm; - fan->setpoint = smu_fan_read_rpm(dev, fan); + fan->setpoint = smu_fan_read_rpm(fan); - OF_getprop(child, "location", fan->location, - sizeof(fan->location)); + OF_getprop(child, "location", fan->fan.name, + sizeof(fan->fan.name)); /* Add sysctls */ - for (i = 0; i < strlen(fan->location); i++) { - sysctl_name[i] = tolower(fan->location[i]); + for (i = 0; i < strlen(fan->fan.name); i++) { + sysctl_name[i] = tolower(fan->fan.name[i]); if (isspace(sysctl_name[i])) sysctl_name[i] = '_'; } @@ -826,23 +807,28 @@ smu_attach_fans(device_t dev, phandle_t fanroot) oid = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(fanroot_oid), OID_AUTO, sysctl_name, CTLFLAG_RD, 0, "Fan Information"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "minrpm", - CTLTYPE_INT | CTLFLAG_RD, &fan->min_rpm, sizeof(cell_t), + CTLTYPE_INT | CTLFLAG_RD, &fan->fan.min_rpm, sizeof(int), "Minimum allowed RPM"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "maxrpm", - CTLTYPE_INT | CTLFLAG_RD, &fan->max_rpm, sizeof(cell_t), + CTLTYPE_INT | CTLFLAG_RD, &fan->fan.max_rpm, sizeof(int), "Maximum allowed RPM"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "rpm", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, dev, sc->sc_nfans, smu_fanrpm_sysctl, "I", "Fan RPM"); + fan->fan.read = (int (*)(struct pmac_fan *))smu_fan_read_rpm; + fan->fan.set = (int (*)(struct pmac_fan *, int))smu_fan_set_rpm; + pmac_thermal_fan_register(&fan->fan); + fan++; sc->sc_nfans++; } } static int -smu_sensor_read(device_t smu, struct smu_sensor *sens, int *val) +smu_sensor_read(struct smu_sensor *sens) { + device_t smu = sens->dev; struct smu_cmd cmd; struct smu_softc *sc; int64_t value; @@ -855,7 +841,7 @@ smu_sensor_read(device_t smu, struct smu_sensor *sens, int *val) error = smu_run_cmd(smu, &cmd, 1); if (error != 0) - return (error); + return (-1); sc = device_get_softc(smu); value = (cmd.data[0] << 8) | cmd.data[1]; @@ -867,8 +853,8 @@ smu_sensor_read(device_t smu, struct smu_sensor *sens, int *val) value += ((int64_t)sc->sc_cpu_diode_offset) << 9; value <<= 1; - /* Convert from 16.16 fixed point degC into integer C. */ - value >>= 16; + /* Convert from 16.16 fixed point degC into integer 0.1 K. */ + value = 10*(value >> 16) + ((10*(value & 0xffff)) >> 16) + 2732; break; case SMU_VOLTAGE_SENSOR: value *= sc->sc_cpu_volt_scale; @@ -902,8 +888,7 @@ smu_sensor_read(device_t smu, struct smu_sensor *sens, int *val) break; } - *val = value; - return (0); + return (value); } static int @@ -918,9 +903,9 @@ smu_sensor_sysctl(SYSCTL_HANDLER_ARGS) sc = device_get_softc(smu); sens = &sc->sc_sensors[arg2]; - error = smu_sensor_read(smu, sens, &value); - if (error != 0) - return (error); + value = smu_sensor_read(sens); + if (value < 0) + return (EBUSY); error = sysctl_handle_int(oidp, &value, 0, req); @@ -964,6 +949,7 @@ smu_attach_sensors(device_t dev, phandle_t sensroot) char sysctl_name[40], sysctl_desc[40]; const char *units; + sens->dev = dev; OF_getprop(child, "device_type", type, sizeof(type)); if (strcmp(type, "current-sensor") == 0) { @@ -983,98 +969,37 @@ smu_attach_sensors(device_t dev, phandle_t sensroot) } OF_getprop(child, "reg", &sens->reg, sizeof(cell_t)); - OF_getprop(child, "location", sens->location, - sizeof(sens->location)); + OF_getprop(child, "zone", &sens->therm.zone, sizeof(int)); + OF_getprop(child, "location", sens->therm.name, + sizeof(sens->therm.name)); - for (i = 0; i < strlen(sens->location); i++) { - sysctl_name[i] = tolower(sens->location[i]); + for (i = 0; i < strlen(sens->therm.name); i++) { + sysctl_name[i] = tolower(sens->therm.name[i]); if (isspace(sysctl_name[i])) sysctl_name[i] = '_'; } sysctl_name[i] = 0; - sprintf(sysctl_desc,"%s (%s)", sens->location, units); + sprintf(sysctl_desc,"%s (%s)", sens->therm.name, units); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(sensroot_oid), OID_AUTO, sysctl_name, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, - dev, sc->sc_nsensors, smu_sensor_sysctl, "I", sysctl_desc); - - sens++; - sc->sc_nsensors++; - } -} - -static void -smu_fan_management_proc(void *xdev) -{ - device_t smu = xdev; - - while(1) { - smu_manage_fans(smu); - pause("smu", SMU_FANMGT_INTERVAL * hz / 1000); - } -} - -static void -smu_manage_fans(device_t smu) -{ - struct smu_softc *sc; - int i, maxtemp, temp, factor, error; - - sc = device_get_softc(smu); - - maxtemp = 0; - for (i = 0; i < sc->sc_nsensors; i++) { - if (sc->sc_sensors[i].type != SMU_TEMP_SENSOR) - continue; - - error = smu_sensor_read(smu, &sc->sc_sensors[i], &temp); - if (error == 0 && temp > maxtemp) - maxtemp = temp; - } + dev, sc->sc_nsensors, smu_sensor_sysctl, + (sens->type == SMU_TEMP_SENSOR) ? "IK" : "I", sysctl_desc); - if (maxtemp > sc->sc_critical_temp) { - device_printf(smu, "WARNING: Current system temperature (%d C) " - "exceeds critical temperature (%d C)! Shutting down!\n", - maxtemp, sc->sc_critical_temp); - shutdown_nice(RB_POWEROFF); - } - - if (maxtemp - sc->sc_target_temp > 20) - device_printf(smu, "WARNING: Current system temperature (%d C) " - "more than 20 degrees over target temperature (%d C)!\n", - maxtemp, sc->sc_target_temp); - - if (time_uptime - sc->sc_lastuserchange < 3) { - /* - * If we have heard from a user process in the last 3 seconds, - * go away. - */ + if (sens->type == SMU_TEMP_SENSOR) { + /* Make up some numbers */ + sens->therm.target_temp = 500 + 2732; /* 50 C */ + sens->therm.max_temp = 900 + 2732; /* 90 C */ - return; - } + sens->therm.read = + (int (*)(struct pmac_therm *))smu_sensor_read; + pmac_thermal_sensor_register(&sens->therm); + } - if (maxtemp < 10) { /* Bail if no good sensors */ - for (i = 0; i < sc->sc_nfans; i++) - smu_fan_set_rpm(smu, &sc->sc_fans[i], - sc->sc_fans[i].unmanaged_rpm); - return; + sens++; + sc->sc_nsensors++; } - - if (maxtemp - sc->sc_target_temp > 4) - factor = 110; - else if (maxtemp - sc->sc_target_temp > 1) - factor = 105; - else if (sc->sc_target_temp - maxtemp > 4) - factor = 90; - else if (sc->sc_target_temp - maxtemp > 1) - factor = 95; - else - factor = 100; - - for (i = 0; i < sc->sc_nfans; i++) - smu_fan_set_rpm(smu, &sc->sc_fans[i], - (sc->sc_fans[i].setpoint * factor) / 100); } static void diff --git a/sys/powerpc/powermac/smusat.c b/sys/powerpc/powermac/smusat.c index 42f023a..fcaa9ed 100644 --- a/sys/powerpc/powermac/smusat.c +++ b/sys/powerpc/powermac/smusat.c @@ -43,9 +43,13 @@ __FBSDID("$FreeBSD$"); #include <dev/ofw/ofw_bus.h> #include <dev/ofw/openfirm.h> +#include <powerpc/powermac/powermac_thermal.h> + struct smu_sensor { + struct pmac_therm therm; + device_t dev; + cell_t reg; - char location[32]; enum { SMU_CURRENT_SENSOR, SMU_VOLTAGE_SENSOR, @@ -57,6 +61,7 @@ struct smu_sensor { static int smusat_probe(device_t); static int smusat_attach(device_t); static int smusat_sensor_sysctl(SYSCTL_HANDLER_ARGS); +static int smusat_sensor_read(struct smu_sensor *sens); MALLOC_DEFINE(M_SMUSAT, "smusat", "SMU Sattelite Sensors"); @@ -135,14 +140,16 @@ smusat_attach(device_t dev) char sysctl_name[40], sysctl_desc[40]; const char *units; + sens->dev = dev; sens->reg = 0; OF_getprop(child, "reg", &sens->reg, sizeof(sens->reg)); if (sens->reg < 0x30) continue; - sens->reg -= 0x30; - OF_getprop(child, "location", sens->location, - sizeof(sens->location)); + + OF_getprop(child, "zone", &sens->therm.zone, sizeof(int)); + OF_getprop(child, "location", sens->therm.name, + sizeof(sens->therm.name)); OF_getprop(child, "device_type", type, sizeof(type)); @@ -162,17 +169,27 @@ smusat_attach(device_t dev) continue; } - for (i = 0; i < strlen(sens->location); i++) { - sysctl_name[i] = tolower(sens->location[i]); + for (i = 0; i < strlen(sens->therm.name); i++) { + sysctl_name[i] = tolower(sens->therm.name[i]); if (isspace(sysctl_name[i])) sysctl_name[i] = '_'; } sysctl_name[i] = 0; - sprintf(sysctl_desc,"%s (%s)", sens->location, units); + sprintf(sysctl_desc,"%s (%s)", sens->therm.name, units); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(sensroot_oid), OID_AUTO, sysctl_name, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, - sc->sc_nsensors, smusat_sensor_sysctl, "I", sysctl_desc); + sc->sc_nsensors, smusat_sensor_sysctl, + (sens->type == SMU_TEMP_SENSOR) ? "IK" : "I", sysctl_desc); + + if (sens->type == SMU_TEMP_SENSOR) { + /* Make up some numbers */ + sens->therm.target_temp = 500 + 2732; /* 50 C */ + sens->therm.max_temp = 900 + 2732; /* 90 C */ + sens->therm.read = + (int (*)(struct pmac_therm *))smusat_sensor_read; + pmac_thermal_sensor_register(&sens->therm); + } sens++; sc->sc_nsensors++; @@ -198,11 +215,13 @@ smusat_updatecache(device_t dev) } static int -smusat_sensor_read(device_t dev, struct smu_sensor *sens, int *val) +smusat_sensor_read(struct smu_sensor *sens) { int value; + device_t dev; struct smusat_softc *sc; + dev = sens->dev; sc = device_get_softc(dev); if (time_uptime - sc->sc_last_update > 1) @@ -215,8 +234,8 @@ smusat_sensor_read(device_t dev, struct smu_sensor *sens, int *val) case SMU_TEMP_SENSOR: /* 16.16 */ value <<= 10; - /* Kill the .16 */ - value >>= 16; + /* From 16.16 to 0.1 C */ + value = 10*(value >> 16) + ((10*(value & 0xffff)) >> 16) + 2732; break; case SMU_VOLTAGE_SENSOR: /* 16.16 */ @@ -235,8 +254,7 @@ smusat_sensor_read(device_t dev, struct smu_sensor *sens, int *val) break; } - *val = value; - return (0); + return (value); } static int @@ -251,9 +269,9 @@ smusat_sensor_sysctl(SYSCTL_HANDLER_ARGS) sc = device_get_softc(dev); sens = &sc->sc_sensors[arg2]; - error = smusat_sensor_read(dev, sens, &value); - if (error != 0) - return (error); + value = smusat_sensor_read(sens); + if (value < 0) + return (EBUSY); error = sysctl_handle_int(oidp, &value, 0, req); diff --git a/sys/powerpc/powermac/windtunnel.c b/sys/powerpc/powermac/windtunnel.c new file mode 100644 index 0000000..b4aeca3 --- /dev/null +++ b/sys/powerpc/powermac/windtunnel.c @@ -0,0 +1,216 @@ +/*- + * Copyright (c) 2011 Justin Hibbits + * Copyright (c) 2010 Andreas Tobler + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/systm.h> +#include <sys/module.h> +#include <sys/callout.h> +#include <sys/conf.h> +#include <sys/cpu.h> +#include <sys/ctype.h> +#include <sys/kernel.h> +#include <sys/kthread.h> +#include <sys/limits.h> +#include <sys/reboot.h> +#include <sys/rman.h> +#include <sys/sysctl.h> +#include <sys/unistd.h> + +#include <machine/bus.h> +#include <machine/md_var.h> + +#include <dev/iicbus/iicbus.h> +#include <dev/iicbus/iiconf.h> + +#include <dev/ofw/openfirm.h> +#include <dev/ofw/ofw_bus.h> +#include <powerpc/powermac/powermac_thermal.h> + +struct adm1030_softc { + struct pmac_fan fan; + device_t sc_dev; + struct intr_config_hook enum_hook; + uint32_t sc_addr; + phandle_t sc_thermostat_phandle; + device_t sc_thermostat_dev; +}; + +/* Regular bus attachment functions */ +static int adm1030_probe(device_t); +static int adm1030_attach(device_t); + +/* Utility functions */ +static void adm1030_start(void *xdev); +static int adm1030_write_byte(device_t dev, uint32_t addr, uint8_t reg, uint8_t buf); +static int adm1030_set(struct adm1030_softc *fan, int pwm); + +static device_method_t adm1030_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, adm1030_probe), + DEVMETHOD(device_attach, adm1030_attach), + {0, 0}, +}; + +static driver_t adm1030_driver = { + "adm1030", + adm1030_methods, + sizeof(struct adm1030_softc) +}; + +static devclass_t adm1030_devclass; + +DRIVER_MODULE(adm1030, iicbus, adm1030_driver, adm1030_devclass, 0, 0); + +static int +adm1030_write_byte(device_t dev, uint32_t addr, uint8_t reg, uint8_t byte) +{ + unsigned char buf[4]; + int try = 0; + + struct iic_msg msg[] = { + {addr, IIC_M_WR, 0, buf} + }; + + msg[0].len = 2; + buf[0] = reg; + buf[1] = byte; + + for (;;) + { + if (iicbus_transfer(dev, msg, 1) == 0) + return (0); + + if (++try > 5) { + device_printf(dev, "iicbus write failed\n"); + return (-1); + } + pause("adm1030_write_byte", hz); + } +} + +static int +adm1030_probe(device_t dev) +{ + const char *name, *compatible; + struct adm1030_softc *sc; + phandle_t handle; + phandle_t thermostat; + + name = ofw_bus_get_name(dev); + compatible = ofw_bus_get_compat(dev); + handle = ofw_bus_get_node(dev); + + if (!name) + return (ENXIO); + + if (strcmp(name, "fan") != 0 || strcmp(compatible, "adm1030") != 0) + return (ENXIO); + + /* This driver can only be used if there's an associated temp sensor. */ + if (OF_getprop(handle, "platform-getTemp", &thermostat, sizeof(thermostat)) < 0) + return (ENXIO); + + sc = device_get_softc(dev); + sc->sc_dev = dev; + sc->sc_addr = iicbus_get_addr(dev); + + device_set_desc(dev, "G4 MDD Fan driver"); + + return (0); +} + +static int +adm1030_attach(device_t dev) +{ + struct adm1030_softc *sc; + + sc = device_get_softc(dev); + + sc->enum_hook.ich_func = adm1030_start; + sc->enum_hook.ich_arg = dev; + + /* + * We have to wait until interrupts are enabled. I2C read and write + * only works if the interrupts are available. The unin/i2c is + * controlled by the htpic on unin. But this is not the master. The + * openpic on mac-io is controlling the htpic. This one gets attached + * after the mac-io probing and then the interrupts will be + * available. + */ + + if (config_intrhook_establish(&sc->enum_hook) != 0) + return (ENOMEM); + + return (0); +} + +static void +adm1030_start(void *xdev) +{ + struct adm1030_softc *sc; + + device_t dev = (device_t) xdev; + + sc = device_get_softc(dev); + + /* Start the adm1030 device. */ + adm1030_write_byte(sc->sc_dev, sc->sc_addr, 0x1, 0x1); + adm1030_write_byte(sc->sc_dev, sc->sc_addr, 0x0, 0x95); + adm1030_write_byte(sc->sc_dev, sc->sc_addr, 0x23, 0x91); + + /* Use the RPM fields as PWM duty cycles. */ + sc->fan.min_rpm = 0; + sc->fan.max_rpm = 15; + sc->fan.default_rpm = 2; + + strcpy(sc->fan.name, "MDD Case fan"); + sc->fan.zone = 0; + sc->fan.read = NULL; + sc->fan.set = (int (*)(struct pmac_fan *, int))adm1030_set; + config_intrhook_disestablish(&sc->enum_hook); + + pmac_thermal_fan_register(&sc->fan); +} + +static int adm1030_set(struct adm1030_softc *fan, int pwm) +{ + /* Clamp the PWM to 0-0xF, one nibble. */ + if (pwm > 0xF) + pwm = 0xF; + if (pwm < 0) + pwm = 0; + + if (adm1030_write_byte(fan->sc_dev, fan->sc_addr, 0x22, pwm) < 0) + return (-1); + + return (0); +} + diff --git a/sys/powerpc/powerpc/intr_machdep.c b/sys/powerpc/powerpc/intr_machdep.c index f2bfa33..1e6342c 100644 --- a/sys/powerpc/powerpc/intr_machdep.c +++ b/sys/powerpc/powerpc/intr_machdep.c @@ -67,6 +67,7 @@ #include <sys/kernel.h> #include <sys/queue.h> #include <sys/bus.h> +#include <sys/cpuset.h> #include <sys/interrupt.h> #include <sys/ktr.h> #include <sys/lock.h> @@ -98,7 +99,7 @@ struct powerpc_intr { u_int intline; u_int vector; u_int cntindex; - cpumask_t cpu; + cpuset_t cpu; enum intr_trigger trig; enum intr_polarity pol; }; @@ -205,7 +206,7 @@ intr_lookup(u_int irq) #ifdef SMP i->cpu = all_cpus; #else - i->cpu = 1; + CPU_SETOF(0, &i->cpu); #endif for (vector = 0; vector < INTR_VECTORS && vector <= nvectors; @@ -296,7 +297,7 @@ powerpc_assign_intr_cpu(void *arg, u_char cpu) if (cpu == NOCPU) i->cpu = all_cpus; else - i->cpu = 1 << cpu; + CPU_SETOF(cpu, &i->cpu); if (!cold && i->pic != NULL && i->pic == root_pic) PIC_BIND(i->pic, i->intline, i->cpu); diff --git a/sys/powerpc/powerpc/mp_machdep.c b/sys/powerpc/powerpc/mp_machdep.c index 02920da..62a97e9 100644 --- a/sys/powerpc/powerpc/mp_machdep.c +++ b/sys/powerpc/powerpc/mp_machdep.c @@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$"); #include <sys/kernel.h> #include <sys/ktr.h> #include <sys/bus.h> +#include <sys/cpuset.h> #include <sys/lock.h> #include <sys/mutex.h> #include <sys/pcpu.h> @@ -157,7 +158,7 @@ cpu_mp_start(void) cpu.cr_cpuid); goto next; } - if (all_cpus & (1 << cpu.cr_cpuid)) { + if (CPU_ISSET(cpu.cr_cpuid, &all_cpus)) { printf("SMP: cpu%d: skipped - duplicate ID\n", cpu.cr_cpuid); goto next; @@ -174,9 +175,9 @@ cpu_mp_start(void) pc->pc_cpuid = bsp.cr_cpuid; pc->pc_bsp = 1; } - pc->pc_cpumask = 1 << pc->pc_cpuid; + CPU_SETOF(pc->pc_cpuid, &pc->pc_cpumask); pc->pc_hwref = cpu.cr_hwref; - all_cpus |= pc->pc_cpumask; + CPU_OR(&all_cpus, &pc->pc_cpumask); next: error = platform_smp_next_cpu(&cpu); } @@ -212,9 +213,10 @@ cpu_mp_unleash(void *dummy) cpus = 0; smp_cpus = 0; - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { cpus++; - pc->pc_other_cpus = all_cpus & ~pc->pc_cpumask; + pc->pc_other_cpus = all_cpus; + CPU_NAND(&pc->pc_other_cpus, &pc->pc_cpumask); if (!pc->pc_bsp) { if (bootverbose) printf("Waking up CPU %d (dev=%x)\n", @@ -236,7 +238,7 @@ cpu_mp_unleash(void *dummy) pc->pc_cpuid, pc->pc_pir, pc->pc_awake); smp_cpus++; } else - stopped_cpus |= (1 << pc->pc_cpuid); + CPU_SET(pc->pc_cpuid, &stopped_cpus); } ap_awake = 1; @@ -276,7 +278,7 @@ SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, cpu_mp_unleash, NULL); int powerpc_ipi_handler(void *arg) { - cpumask_t self; + cpuset_t self; uint32_t ipimask; int msg; @@ -311,11 +313,11 @@ powerpc_ipi_handler(void *arg) savectx(&stoppcbs[PCPU_GET(cpuid)]); self = PCPU_GET(cpumask); savectx(PCPU_GET(curpcb)); - atomic_set_int(&stopped_cpus, self); - while ((started_cpus & self) == 0) + CPU_OR_ATOMIC(&stopped_cpus, &self); + while (!CPU_OVERLAP(&started_cpus, &self)) cpu_spinwait(); - atomic_clear_int(&started_cpus, self); - atomic_clear_int(&stopped_cpus, self); + CPU_NAND_ATOMIC(&started_cpus, &self); + CPU_NAND_ATOMIC(&stopped_cpus, &self); CTR1(KTR_SMP, "%s: IPI_STOP (restart)", __func__); break; case IPI_HARDCLOCK: @@ -343,12 +345,12 @@ ipi_send(struct pcpu *pc, int ipi) /* Send an IPI to a set of cpus. */ void -ipi_selected(cpumask_t cpus, int ipi) +ipi_selected(cpuset_t cpus, int ipi) { struct pcpu *pc; - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { - if (cpus & pc->pc_cpumask) + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { + if (CPU_OVERLAP(&cpus, &pc->pc_cpumask)) ipi_send(pc, ipi); } } @@ -367,7 +369,7 @@ ipi_all_but_self(int ipi) { struct pcpu *pc; - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { if (pc != pcpup) ipi_send(pc, ipi); } diff --git a/sys/powerpc/powerpc/openpic.c b/sys/powerpc/powerpc/openpic.c index 042f8b8..347dc3f 100644 --- a/sys/powerpc/powerpc/openpic.c +++ b/sys/powerpc/powerpc/openpic.c @@ -231,7 +231,7 @@ openpic_common_attach(device_t dev, uint32_t node) */ void -openpic_bind(device_t dev, u_int irq, cpumask_t cpumask) +openpic_bind(device_t dev, u_int irq, cpuset_t cpumask) { struct openpic_softc *sc; @@ -240,7 +240,12 @@ openpic_bind(device_t dev, u_int irq, cpumask_t cpumask) return; sc = device_get_softc(dev); - openpic_write(sc, OPENPIC_IDEST(irq), cpumask); + + /* + * XXX: openpic_write() is very special and just needs a 32 bits mask. + * For the moment, just play dirty and get the first half word. + */ + openpic_write(sc, OPENPIC_IDEST(irq), cpumask.__bits[0] & 0xffffffff); } void diff --git a/sys/powerpc/powerpc/pic_if.m b/sys/powerpc/powerpc/pic_if.m index 185cc08..e429d31 100644 --- a/sys/powerpc/powerpc/pic_if.m +++ b/sys/powerpc/powerpc/pic_if.m @@ -28,6 +28,7 @@ # #include <sys/bus.h> +#include <sys/cpuset.h> #include <machine/frame.h> INTERFACE pic; @@ -35,7 +36,7 @@ INTERFACE pic; METHOD void bind { device_t dev; u_int irq; - cpumask_t cpumask; + cpuset_t cpumask; }; METHOD void config { diff --git a/sys/powerpc/ps3/if_glc.c b/sys/powerpc/ps3/if_glc.c index 6901f44..d87383d 100644 --- a/sys/powerpc/ps3/if_glc.c +++ b/sys/powerpc/ps3/if_glc.c @@ -135,6 +135,7 @@ glc_attach(device_t dev) callout_init_mtx(&sc->sc_tick_ch, &sc->sc_mtx, 0); sc->next_txdma_slot = 0; sc->bsy_txdma_slots = 0; + sc->sc_next_rxdma_slot = 0; sc->first_used_txdma_slot = -1; /* @@ -375,6 +376,14 @@ glc_tick(void *xsc) mtx_assert(&sc->sc_mtx, MA_OWNED); + /* + * XXX: Sometimes the RX queue gets stuck. Poke it periodically until + * we figure out why. This will fail harmlessly if the RX queue is + * already running. + */ + lv1_net_start_rx_dma(sc->sc_bus, sc->sc_dev, + sc->sc_rxsoft[sc->sc_next_rxdma_slot].rxs_desc, 0); + if (sc->sc_wdog_timer == 0 || --sc->sc_wdog_timer != 0) { callout_reset(&sc->sc_tick_ch, hz, glc_tick, sc); return; @@ -707,12 +716,19 @@ glc_rxintr(struct glc_softc *sc) struct ifnet *ifp = sc->sc_ifp; bus_dmamap_sync(sc->sc_dmadesc_tag, sc->sc_rxdmadesc_map, - BUS_DMASYNC_PREWRITE); + BUS_DMASYNC_POSTREAD); restart_rxdma = 0; while ((sc->sc_rxdmadesc[sc->sc_next_rxdma_slot].cmd_stat & GELIC_DESCR_OWNED) == 0) { i = sc->sc_next_rxdma_slot; + sc->sc_next_rxdma_slot++; + if (sc->sc_next_rxdma_slot >= GLC_MAX_RX_PACKETS) + sc->sc_next_rxdma_slot = 0; + + if (sc->sc_rxdmadesc[i].cmd_stat & GELIC_CMDSTAT_CHAIN_END) + restart_rxdma = 1; + if (sc->sc_rxdmadesc[i].rxerror & GELIC_RXERRORS) { ifp->if_ierrors++; goto requeue; @@ -738,9 +754,6 @@ glc_rxintr(struct glc_softc *sc) m->m_pkthdr.rcvif = ifp; m->m_len = sc->sc_rxdmadesc[i].valid_size; m->m_pkthdr.len = m->m_len; - sc->sc_next_rxdma_slot++; - if (sc->sc_next_rxdma_slot >= GLC_MAX_RX_PACKETS) - sc->sc_next_rxdma_slot = 0; if (sc->sc_rx_vlan >= 0) m_adj(m, 2); @@ -750,16 +763,18 @@ glc_rxintr(struct glc_softc *sc) mtx_lock(&sc->sc_mtx); requeue: - if (sc->sc_rxdmadesc[i].cmd_stat & GELIC_CMDSTAT_CHAIN_END) - restart_rxdma = 1; glc_add_rxbuf_dma(sc, i); - if (restart_rxdma) { - error = lv1_net_start_rx_dma(sc->sc_bus, sc->sc_dev, - sc->sc_rxsoft[i].rxs_desc, 0); - if (error != 0) - device_printf(sc->sc_self, - "lv1_net_start_rx_dma error: %d\n", error); - } + } + + bus_dmamap_sync(sc->sc_dmadesc_tag, sc->sc_rxdmadesc_map, + BUS_DMASYNC_PREWRITE); + + if (restart_rxdma) { + error = lv1_net_start_rx_dma(sc->sc_bus, sc->sc_dev, + sc->sc_rxsoft[sc->sc_next_rxdma_slot].rxs_desc, 0); + if (error != 0) + device_printf(sc->sc_self, + "lv1_net_start_rx_dma error: %d\n", error); } } @@ -770,6 +785,9 @@ glc_txintr(struct glc_softc *sc) struct glc_txsoft *txs; int progress = 0, kickstart = 0, error; + bus_dmamap_sync(sc->sc_dmadesc_tag, sc->sc_txdmadesc_map, + BUS_DMASYNC_POSTREAD); + while ((txs = STAILQ_FIRST(&sc->sc_txdirtyq)) != NULL) { if (sc->sc_txdmadesc[txs->txs_lastdesc].cmd_stat & GELIC_DESCR_OWNED) @@ -805,7 +823,8 @@ glc_txintr(struct glc_softc *sc) else sc->first_used_txdma_slot = -1; - if (kickstart && txs != NULL) { + if (kickstart || txs != NULL) { + /* Speculatively (or necessarily) start the TX queue again */ error = lv1_net_start_tx_dma(sc->sc_bus, sc->sc_dev, sc->sc_txdmadesc_phys + txs->txs_firstdesc*sizeof(struct glc_dmadesc), 0); diff --git a/sys/powerpc/ps3/ohci_ps3.c b/sys/powerpc/ps3/ohci_ps3.c new file mode 100644 index 0000000..e13435b --- /dev/null +++ b/sys/powerpc/ps3/ohci_ps3.c @@ -0,0 +1,170 @@ +/*- + * Copyright (C) 2010 Nathan Whitehorn + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/stdint.h> +#include <sys/stddef.h> +#include <sys/param.h> +#include <sys/queue.h> +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/bus.h> +#include <sys/linker_set.h> +#include <sys/module.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/condvar.h> +#include <sys/sysctl.h> +#include <sys/sx.h> +#include <sys/unistd.h> +#include <sys/callout.h> +#include <sys/malloc.h> +#include <sys/priv.h> + +#include <sys/rman.h> + +#include <dev/usb/usb.h> +#include <dev/usb/usbdi.h> + +#include <dev/usb/usb_core.h> +#include <dev/usb/usb_busdma.h> +#include <dev/usb/usb_process.h> +#include <dev/usb/usb_util.h> + +#include <dev/usb/usb_controller.h> +#include <dev/usb/usb_bus.h> +#include <dev/usb/controller/ohci.h> +#include <dev/usb/controller/ohcireg.h> + +#include "ps3bus.h" + +static int +ohci_ps3_probe(device_t dev) +{ + if (ps3bus_get_bustype(dev) != PS3_BUSTYPE_SYSBUS || + ps3bus_get_devtype(dev) != PS3_DEVTYPE_USB) + return (ENXIO); + + device_set_desc(dev, "Playstation 3 USB 2.0 controller"); + return (BUS_PROBE_SPECIFIC); +} + +static int +ohci_ps3_attach(device_t dev) +{ + ohci_softc_t *sc = device_get_softc(dev); + int rid, err; + + sc->sc_bus.parent = dev; + sc->sc_bus.devices = sc->sc_devices; + sc->sc_bus.devices_max = OHCI_MAX_DEVICES; + + if (usb_bus_mem_alloc_all(&sc->sc_bus, + USB_GET_DMA_TAG(dev), &ohci_iterate_hw_softc)) + return (ENOMEM); + + rid = 0; + sc->sc_io_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &rid, RF_ACTIVE); + + if (!sc->sc_io_res) { + device_printf(dev, "Could not map memory\n"); + goto error; + } + + sc->sc_io_tag = rman_get_bustag(sc->sc_io_res); + sc->sc_io_hdl = rman_get_bushandle(sc->sc_io_res); + sc->sc_io_size = rman_get_size(sc->sc_io_res); + + rid = 0; + sc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, + RF_SHAREABLE | RF_ACTIVE); + + if (sc->sc_irq_res == NULL) { + device_printf(dev, "Could not allocate irq\n"); + return (ENXIO); + } + + sc->sc_bus.bdev = device_add_child(dev, "usbus", -1); + if (!sc->sc_bus.bdev) { + device_printf(dev, "Could not add USB device\n"); + return (ENXIO); + } + + device_set_ivars(sc->sc_bus.bdev, &sc->sc_bus); + + sprintf(sc->sc_vendor, "Sony"); + + err = bus_setup_intr(dev, sc->sc_irq_res, INTR_TYPE_BIO | INTR_MPSAFE, + NULL, (driver_intr_t *)ohci_interrupt, sc, &sc->sc_intr_hdl); + if (err) { + device_printf(dev, "Could not setup error irq, %d\n", err); + goto error; + } + + //sc->sc_flags |= EHCI_SCFLG_BIGEMMIO; + bus_space_write_4(sc->sc_io_tag, sc->sc_io_hdl, + OHCI_CONTROL, 0); + err = ohci_init(sc); + if (err) { + device_printf(dev, "USB init failed err=%d\n", err); + goto error; + } + + err = device_probe_and_attach(sc->sc_bus.bdev); + if (err == 0) + return (0); + +error: + return (ENXIO); +} + +static device_method_t ohci_ps3_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, ohci_ps3_probe), + DEVMETHOD(device_attach, ohci_ps3_attach), + + /* Bus interface */ + DEVMETHOD(bus_print_child, bus_generic_print_child), + + {0, 0} +}; + +static driver_t ohci_ps3_driver = { + "ohci", + ohci_ps3_methods, + sizeof(ohci_softc_t), +}; + +static devclass_t ohci_ps3_devclass; + +DRIVER_MODULE(ohci_ps3, ps3bus, ohci_ps3_driver, ohci_ps3_devclass, 0, 0); +MODULE_DEPEND(ohci_ps3, usb, 1, 1, 1); + diff --git a/sys/powerpc/ps3/ps3bus.c b/sys/powerpc/ps3/ps3bus.c index 6a5120a..2fe303d 100644 --- a/sys/powerpc/ps3/ps3bus.c +++ b/sys/powerpc/ps3/ps3bus.c @@ -1,5 +1,6 @@ /*- * Copyright (C) 2010 Nathan Whitehorn + * Copyright (C) 2011 glevand (geoffrey.levand@mail.ru) * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -73,6 +74,8 @@ struct ps3bus_devinfo { int dev; uint64_t bustype; uint64_t devtype; + int busidx; + int devidx; struct resource_list resources; bus_dma_tag_t dma_tag; @@ -89,6 +92,11 @@ enum ps3bus_irq_type { EHCI_IRQ = 4, }; +enum ps3bus_reg_type { + OHCI_REG = 3, + EHCI_REG = 4, +}; + static device_method_t ps3bus_methods[] = { /* Device interface */ DEVMETHOD(device_identify, ps3bus_identify), @@ -235,6 +243,77 @@ ps3bus_resources_init(struct rman *rm, int bus_index, int dev_index, } } +static void +ps3bus_resources_init_by_type(struct rman *rm, int bus_index, int dev_index, + uint64_t irq_type, uint64_t reg_type, struct ps3bus_devinfo *dinfo) +{ + uint64_t _irq_type, irq, outlet; + uint64_t _reg_type, paddr, len; + uint64_t ppe, junk; + int i, result; + int thread; + + resource_list_init(&dinfo->resources); + + lv1_get_logical_ppe_id(&ppe); + thread = 32 - fls(mfctrl()); + + /* Scan for interrupts */ + for (i = 0; i < 10; i++) { + result = lv1_get_repository_node_value(PS3_LPAR_ID_PME, + (lv1_repository_string("bus") >> 32) | bus_index, + lv1_repository_string("dev") | dev_index, + lv1_repository_string("intr") | i, 0, &_irq_type, &irq); + + if (result != 0) + break; + + if (_irq_type != irq_type) + continue; + + lv1_construct_io_irq_outlet(irq, &outlet); + lv1_connect_irq_plug_ext(ppe, thread, outlet, outlet, + 0); + resource_list_add(&dinfo->resources, SYS_RES_IRQ, i, + outlet, outlet, 1); + } + + /* Scan for registers */ + for (i = 0; i < 10; i++) { + result = lv1_get_repository_node_value(PS3_LPAR_ID_PME, + (lv1_repository_string("bus") >> 32) | bus_index, + lv1_repository_string("dev") | dev_index, + lv1_repository_string("reg") | i, + lv1_repository_string("type"), &_reg_type, &junk); + + if (result != 0) + break; + + if (_reg_type != reg_type) + continue; + + result = lv1_get_repository_node_value(PS3_LPAR_ID_PME, + (lv1_repository_string("bus") >> 32) | bus_index, + lv1_repository_string("dev") | dev_index, + lv1_repository_string("reg") | i, + lv1_repository_string("data"), &paddr, &len); + + result = lv1_map_device_mmio_region(dinfo->bus, dinfo->dev, + paddr, len, 12 /* log_2(4 KB) */, &paddr); + + if (result != 0) { + printf("Mapping registers failed for device " + "%d.%d (%ld.%ld): %d\n", dinfo->bus, dinfo->dev, + dinfo->bustype, dinfo->devtype, result); + break; + } + + rman_manage_region(rm, paddr, paddr + len - 1); + resource_list_add(&dinfo->resources, SYS_RES_MEMORY, i, + paddr, paddr + len, len); + } +} + static int ps3bus_attach(device_t self) { @@ -294,30 +373,93 @@ ps3bus_attach(device_t self) if (result != 0) continue; - dinfo = malloc(sizeof(*dinfo), M_PS3BUS, - M_WAITOK | M_ZERO); - - dinfo->bus = bus; - dinfo->dev = dev; - dinfo->bustype = bustype; - dinfo->devtype = devtype; + switch (devtype) { + case PS3_DEVTYPE_USB: + /* USB device has OHCI and EHCI USB host controllers */ - if (dinfo->bustype == PS3_BUSTYPE_SYSBUS) lv1_open_device(bus, dev, 0); - ps3bus_resources_init(&sc->sc_mem_rman, bus_index, - dev_index, dinfo); - - cdev = device_add_child(self, NULL, -1); - if (cdev == NULL) { - device_printf(self, - "device_add_child failed\n"); - free(dinfo, M_PS3BUS); - continue; + /* OHCI host controller */ + + dinfo = malloc(sizeof(*dinfo), M_PS3BUS, + M_WAITOK | M_ZERO); + + dinfo->bus = bus; + dinfo->dev = dev; + dinfo->bustype = bustype; + dinfo->devtype = devtype; + dinfo->busidx = bus_index; + dinfo->devidx = dev_index; + + ps3bus_resources_init_by_type(&sc->sc_mem_rman, bus_index, + dev_index, OHCI_IRQ, OHCI_REG, dinfo); + + cdev = device_add_child(self, "ohci", -1); + if (cdev == NULL) { + device_printf(self, + "device_add_child failed\n"); + free(dinfo, M_PS3BUS); + continue; + } + + mtx_init(&dinfo->iommu_mtx, "iommu", NULL, MTX_DEF); + device_set_ivars(cdev, dinfo); + + /* EHCI host controller */ + + dinfo = malloc(sizeof(*dinfo), M_PS3BUS, + M_WAITOK | M_ZERO); + + dinfo->bus = bus; + dinfo->dev = dev; + dinfo->bustype = bustype; + dinfo->devtype = devtype; + dinfo->busidx = bus_index; + dinfo->devidx = dev_index; + + ps3bus_resources_init_by_type(&sc->sc_mem_rman, bus_index, + dev_index, EHCI_IRQ, EHCI_REG, dinfo); + + cdev = device_add_child(self, "ehci", -1); + if (cdev == NULL) { + device_printf(self, + "device_add_child failed\n"); + free(dinfo, M_PS3BUS); + continue; + } + + mtx_init(&dinfo->iommu_mtx, "iommu", NULL, MTX_DEF); + device_set_ivars(cdev, dinfo); + break; + default: + dinfo = malloc(sizeof(*dinfo), M_PS3BUS, + M_WAITOK | M_ZERO); + + dinfo->bus = bus; + dinfo->dev = dev; + dinfo->bustype = bustype; + dinfo->devtype = devtype; + dinfo->busidx = bus_index; + dinfo->devidx = dev_index; + + if (dinfo->bustype == PS3_BUSTYPE_SYSBUS || + dinfo->bustype == PS3_BUSTYPE_STORAGE) + lv1_open_device(bus, dev, 0); + + ps3bus_resources_init(&sc->sc_mem_rman, bus_index, + dev_index, dinfo); + + cdev = device_add_child(self, NULL, -1); + if (cdev == NULL) { + device_printf(self, + "device_add_child failed\n"); + free(dinfo, M_PS3BUS); + continue; + } + + mtx_init(&dinfo->iommu_mtx, "iommu", NULL, MTX_DEF); + device_set_ivars(cdev, dinfo); } - - mtx_init(&dinfo->iommu_mtx, "iommu", NULL, MTX_DEF); - device_set_ivars(cdev, dinfo); } } @@ -361,6 +503,12 @@ ps3bus_read_ivar(device_t bus, device_t child, int which, uintptr_t *result) case PS3BUS_IVAR_DEVTYPE: *result = dinfo->devtype; break; + case PS3BUS_IVAR_BUSIDX: + *result = dinfo->busidx; + break; + case PS3BUS_IVAR_DEVIDX: + *result = dinfo->devidx; + break; default: return (EINVAL); } @@ -483,7 +631,8 @@ ps3bus_get_dma_tag(device_t dev, device_t child) struct ps3bus_softc *sc = device_get_softc(dev); int i, err, flags; - if (dinfo->bustype != PS3_BUSTYPE_SYSBUS) + if (dinfo->bustype != PS3_BUSTYPE_SYSBUS && + dinfo->bustype != PS3_BUSTYPE_STORAGE) return (bus_get_dma_tag(dev)); mtx_lock(&dinfo->iommu_mtx); diff --git a/sys/powerpc/ps3/ps3bus.h b/sys/powerpc/ps3/ps3bus.h index b11ff8a..6725625 100644 --- a/sys/powerpc/ps3/ps3bus.h +++ b/sys/powerpc/ps3/ps3bus.h @@ -32,7 +32,9 @@ enum { PS3BUS_IVAR_BUS, PS3BUS_IVAR_DEVICE, PS3BUS_IVAR_BUSTYPE, - PS3BUS_IVAR_DEVTYPE + PS3BUS_IVAR_DEVTYPE, + PS3BUS_IVAR_BUSIDX, + PS3BUS_IVAR_DEVIDX, }; #define PS3BUS_ACCESSOR(A, B, T) \ @@ -42,6 +44,8 @@ PS3BUS_ACCESSOR(bus, BUS, int) PS3BUS_ACCESSOR(device, DEVICE, int) PS3BUS_ACCESSOR(bustype, BUSTYPE, uint64_t) PS3BUS_ACCESSOR(devtype, DEVTYPE, uint64_t) +PS3BUS_ACCESSOR(busidx, BUSIDX, int) +PS3BUS_ACCESSOR(devidx, DEVIDX, int) /* Bus types */ enum { diff --git a/sys/powerpc/ps3/ps3disk.c b/sys/powerpc/ps3/ps3disk.c new file mode 100644 index 0000000..5390f50 --- /dev/null +++ b/sys/powerpc/ps3/ps3disk.c @@ -0,0 +1,901 @@ +/*- + * Copyright (C) 2011 glevand (geoffrey.levand@mail.ru) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/sysctl.h> +#include <sys/disk.h> +#include <sys/bio.h> +#include <sys/bus.h> +#include <sys/conf.h> +#include <sys/kernel.h> +#include <sys/kthread.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/mutex.h> + +#include <vm/vm.h> +#include <vm/pmap.h> + +#include <machine/pio.h> +#include <machine/bus.h> +#include <machine/platform.h> +#include <machine/pmap.h> +#include <machine/resource.h> +#include <sys/bus.h> +#include <sys/rman.h> + +#include <geom/geom_disk.h> + +#include "ps3bus.h" +#include "ps3-hvcall.h" + +#define PS3DISK_LOCK_INIT(_sc) \ + mtx_init(&_sc->sc_mtx, device_get_nameunit(_sc->sc_dev), "ps3disk", MTX_DEF) +#define PS3DISK_LOCK_DESTROY(_sc) mtx_destroy(&_sc->sc_mtx); +#define PS3DISK_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) +#define PS3DISK_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) +#define PS3DISK_ASSERT_LOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_OWNED); +#define PS3DISK_ASSERT_UNLOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_NOTOWNED); + +#define LV1_STORAGE_ATA_HDDOUT 0x23 + +SYSCTL_NODE(_hw, OID_AUTO, ps3disk, CTLFLAG_RD, 0, "PS3 Disk driver parameters"); + +#ifdef PS3DISK_DEBUG +static int ps3disk_debug = 0; +SYSCTL_INT(_hw_ps3disk, OID_AUTO, debug, CTLFLAG_RW, &ps3disk_debug, + 0, "control debugging printfs"); +TUNABLE_INT("hw.ps3disk.debug", &ps3disk_debug); +enum { + PS3DISK_DEBUG_INTR = 0x00000001, + PS3DISK_DEBUG_TASK = 0x00000002, + PS3DISK_DEBUG_READ = 0x00000004, + PS3DISK_DEBUG_WRITE = 0x00000008, + PS3DISK_DEBUG_FLUSH = 0x00000010, + PS3DISK_DEBUG_ANY = 0xffffffff +}; +#define DPRINTF(sc, m, fmt, ...) \ +do { \ + if (sc->sc_debug & (m)) \ + printf(fmt, __VA_ARGS__); \ +} while (0) +#else +#define DPRINTF(sc, m, fmt, ...) +#endif + +struct ps3disk_region { + uint64_t r_id; + uint64_t r_start; + uint64_t r_size; + uint64_t r_flags; +}; + +struct ps3disk_softc { + device_t sc_dev; + + struct mtx sc_mtx; + + uint64_t sc_blksize; + uint64_t sc_nblocks; + + uint64_t sc_nregs; + struct ps3disk_region *sc_reg; + + int sc_irqid; + struct resource *sc_irq; + void *sc_irqctx; + + struct disk **sc_disk; + + struct bio_queue_head sc_bioq; + + struct proc *sc_task; + + int sc_bounce_maxblocks; + bus_dma_tag_t sc_bounce_dmatag; + bus_dmamap_t sc_bounce_dmamap; + bus_addr_t sc_bounce_dmaphys; + char *sc_bounce; + uint64_t sc_bounce_lpar; + int sc_bounce_busy; + uint64_t sc_bounce_tag; + uint64_t sc_bounce_status; + + int sc_running; + + int sc_debug; +}; + +static int ps3disk_open(struct disk *dp); +static int ps3disk_close(struct disk *dp); +static void ps3disk_strategy(struct bio *bp); +static void ps3disk_task(void *arg); + +static int ps3disk_intr_filter(void *arg); +static void ps3disk_intr(void *arg); +static void ps3disk_getphys(void *arg, bus_dma_segment_t *segs, int nsegs, int error); +static int ps3disk_get_disk_geometry(struct ps3disk_softc *sc); +static int ps3disk_enum_regions(struct ps3disk_softc *sc); +static int ps3disk_read(struct ps3disk_softc *sc, int regidx, + uint64_t start_sector, uint64_t sector_count, char *data); +static int ps3disk_write(struct ps3disk_softc *sc, int regidx, + uint64_t start_sector, uint64_t sector_count, char *data); +static int ps3disk_flush(struct ps3disk_softc *sc); + +static void ps3disk_sysctlattach(struct ps3disk_softc *sc); + +static MALLOC_DEFINE(M_PS3DISK, "ps3disk", "PS3 Disk"); + +static int +ps3disk_probe(device_t dev) +{ + if (ps3bus_get_bustype(dev) != PS3_BUSTYPE_STORAGE || + ps3bus_get_devtype(dev) != PS3_DEVTYPE_DISK) + return (ENXIO); + + device_set_desc(dev, "Playstation 3 Disk"); + + return (BUS_PROBE_SPECIFIC); +} + +static int +ps3disk_attach(device_t dev) +{ + struct ps3disk_softc *sc; + struct disk *d; + intmax_t mb; + char unit; + int i, err; + + sc = device_get_softc(dev); + sc->sc_dev = dev; + + PS3DISK_LOCK_INIT(sc); + + err = ps3disk_get_disk_geometry(sc); + if (err) { + device_printf(dev, "Could not get disk geometry\n"); + err = ENXIO; + goto fail_destroy_lock; + } + + device_printf(dev, "block size %lu total blocks %lu\n", + sc->sc_blksize, sc->sc_nblocks); + + err = ps3disk_enum_regions(sc); + if (err) { + device_printf(dev, "Could not enumerate disk regions\n"); + err = ENXIO; + goto fail_destroy_lock; + } + + device_printf(dev, "Found %lu regions\n", sc->sc_nregs); + + if (!sc->sc_nregs) { + err = ENXIO; + goto fail_destroy_lock; + } + + /* Setup interrupt handler */ + + sc->sc_irqid = 0; + sc->sc_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->sc_irqid, + RF_ACTIVE); + if (!sc->sc_irq) { + device_printf(dev, "Could not allocate IRQ\n"); + err = ENXIO; + goto fail_free_regions; + } + + err = bus_setup_intr(dev, sc->sc_irq, + INTR_TYPE_BIO | INTR_MPSAFE | INTR_ENTROPY, + ps3disk_intr_filter, ps3disk_intr, sc, &sc->sc_irqctx); + if (err) { + device_printf(dev, "Could not setup IRQ\n"); + err = ENXIO; + goto fail_release_intr; + } + + /* Setup DMA bounce buffer */ + + sc->sc_bounce_maxblocks = DFLTPHYS / sc->sc_blksize; + + err = bus_dma_tag_create(bus_get_dma_tag(dev), 4096, 0, + BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, + sc->sc_bounce_maxblocks * sc->sc_blksize, 1, + sc->sc_bounce_maxblocks * sc->sc_blksize, + 0, NULL, NULL, &sc->sc_bounce_dmatag); + if (err) { + device_printf(dev, "Could not create DMA tag for bounce buffer\n"); + err = ENXIO; + goto fail_teardown_intr; + } + + err = bus_dmamem_alloc(sc->sc_bounce_dmatag, (void **) &sc->sc_bounce, + BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, + &sc->sc_bounce_dmamap); + if (err) { + device_printf(dev, "Could not allocate DMA memory for bounce buffer\n"); + err = ENXIO; + goto fail_destroy_dmatag; + } + + err = bus_dmamap_load(sc->sc_bounce_dmatag, sc->sc_bounce_dmamap, + sc->sc_bounce, sc->sc_bounce_maxblocks * sc->sc_blksize, + ps3disk_getphys, &sc->sc_bounce_dmaphys, 0); + if (err) { + device_printf(dev, "Could not load DMA map for bounce buffer\n"); + err = ENXIO; + goto fail_free_dmamem; + } + + sc->sc_bounce_lpar = vtophys(sc->sc_bounce); + + if (bootverbose) + device_printf(dev, "bounce buffer lpar address 0x%016lx\n", + sc->sc_bounce_lpar); + + /* Setup disks */ + + sc->sc_disk = malloc(sc->sc_nregs * sizeof(struct disk *), + M_PS3DISK, M_ZERO | M_WAITOK); + if (!sc->sc_disk) { + device_printf(dev, "Could not allocate disk(s)\n"); + err = ENOMEM; + goto fail_unload_dmamem; + } + + for (i = 0; i < sc->sc_nregs; i++) { + struct ps3disk_region *rp = &sc->sc_reg[i]; + + d = sc->sc_disk[i] = disk_alloc(); + d->d_open = ps3disk_open; + d->d_close = ps3disk_close; + d->d_strategy = ps3disk_strategy; + d->d_name = "ps3disk"; + d->d_drv1 = sc; + d->d_maxsize = DFLTPHYS; + d->d_sectorsize = sc->sc_blksize; + d->d_unit = i; + d->d_mediasize = sc->sc_reg[i].r_size * sc->sc_blksize; + d->d_flags |= DISKFLAG_CANFLUSHCACHE; + + mb = d->d_mediasize >> 20; + unit = 'M'; + if (mb >= 10240) { + unit = 'G'; + mb /= 1024; + } + + /* Test to see if we can read this region */ + err = lv1_storage_read(ps3bus_get_device(dev), d->d_unit, + 0, 1, rp->r_flags, sc->sc_bounce_lpar, &sc->sc_bounce_tag); + device_printf(dev, "region %d %ju%cB%s\n", i, mb, unit, + (err == 0) ? "" : " (hypervisor protected)"); + + if (err == 0) + disk_create(d, DISK_VERSION); + } + err = 0; + + bioq_init(&sc->sc_bioq); + + ps3disk_sysctlattach(sc); + + sc->sc_running = 1; + + kproc_create(&ps3disk_task, sc, &sc->sc_task, 0, 0, "task: ps3disk"); + + return (0); + +fail_unload_dmamem: + + bus_dmamap_unload(sc->sc_bounce_dmatag, sc->sc_bounce_dmamap); + +fail_free_dmamem: + + bus_dmamem_free(sc->sc_bounce_dmatag, sc->sc_bounce, sc->sc_bounce_dmamap); + +fail_destroy_dmatag: + + bus_dma_tag_destroy(sc->sc_bounce_dmatag); + +fail_teardown_intr: + + bus_teardown_intr(dev, sc->sc_irq, sc->sc_irqctx); + +fail_release_intr: + + bus_release_resource(dev, SYS_RES_IRQ, sc->sc_irqid, sc->sc_irq); + +fail_free_regions: + + free(sc->sc_reg, M_PS3DISK); + +fail_destroy_lock: + + PS3DISK_LOCK_DESTROY(sc); + + return (err); +} + +static int +ps3disk_detach(device_t dev) +{ + struct ps3disk_softc *sc = device_get_softc(dev); + int i; + + PS3DISK_LOCK(sc); + sc->sc_running = 0; + wakeup(sc); + PS3DISK_UNLOCK(sc); + + PS3DISK_LOCK(sc); + while (sc->sc_running != -1) + msleep(sc, &sc->sc_mtx, PRIBIO, "detach", 0); + PS3DISK_UNLOCK(sc); + + for (i = 0; i < sc->sc_nregs; i++) + disk_destroy(sc->sc_disk[i]); + + bus_dmamap_unload(sc->sc_bounce_dmatag, sc->sc_bounce_dmamap); + bus_dmamem_free(sc->sc_bounce_dmatag, sc->sc_bounce, sc->sc_bounce_dmamap); + bus_dma_tag_destroy(sc->sc_bounce_dmatag); + + bus_teardown_intr(dev, sc->sc_irq, sc->sc_irqctx); + bus_release_resource(dev, SYS_RES_IRQ, sc->sc_irqid, sc->sc_irq); + + free(sc->sc_disk, M_PS3DISK); + + free(sc->sc_reg, M_PS3DISK); + + PS3DISK_LOCK_DESTROY(sc); + + return (0); +} + +static int +ps3disk_open(struct disk *dp) +{ + return (0); +} + +static int +ps3disk_close(struct disk *dp) +{ + return (0); +} + +static void +ps3disk_strategy(struct bio *bp) +{ + struct ps3disk_softc *sc = (struct ps3disk_softc *) bp->bio_disk->d_drv1; + + if (!sc) { + bp->bio_flags |= BIO_ERROR; + bp->bio_error = EINVAL; + biodone(bp); + return; + } + + PS3DISK_LOCK(sc); + bioq_disksort(&sc->sc_bioq, bp); + if (!sc->sc_bounce_busy) + wakeup(sc); + PS3DISK_UNLOCK(sc); +} + +static void +ps3disk_task(void *arg) +{ + struct ps3disk_softc *sc = (struct ps3disk_softc *) arg; + struct bio *bp; + daddr_t block, end; + u_long nblocks; + char *data; + int err; + + while (sc->sc_running) { + PS3DISK_LOCK(sc); + do { + bp = bioq_first(&sc->sc_bioq); + if (bp == NULL) + msleep(sc, &sc->sc_mtx, PRIBIO, "jobqueue", 0); + } while (bp == NULL && sc->sc_running); + if (bp) + bioq_remove(&sc->sc_bioq, bp); + PS3DISK_UNLOCK(sc); + + if (!sc->sc_running) + break; + + DPRINTF(sc, PS3DISK_DEBUG_TASK, "%s: bio_cmd 0x%02x\n", + __func__, bp->bio_cmd); + + if (bp->bio_cmd == BIO_FLUSH) { + err = ps3disk_flush(sc); + + if (err) { + bp->bio_error = EIO; + bp->bio_flags |= BIO_ERROR; + } else { + bp->bio_error = 0; + bp->bio_flags |= BIO_DONE; + } + } else if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { + end = bp->bio_pblkno + (bp->bio_bcount / sc->sc_blksize); + + DPRINTF(sc, PS3DISK_DEBUG_TASK, "%s: bio_pblkno %ld bio_bcount %ld\n", + __func__, bp->bio_pblkno, bp->bio_bcount); + + for (block = bp->bio_pblkno; block < end;) { + data = bp->bio_data + + (block - bp->bio_pblkno) * sc->sc_blksize; + + nblocks = end - block; + if (nblocks > sc->sc_bounce_maxblocks) + nblocks = sc->sc_bounce_maxblocks; + + DPRINTF(sc, PS3DISK_DEBUG_TASK, "%s: nblocks %lu\n", + __func__, nblocks); + + if (bp->bio_cmd == BIO_READ) { + err = ps3disk_read(sc, bp->bio_disk->d_unit, + block, nblocks, data); + } else { + err = ps3disk_write(sc, bp->bio_disk->d_unit, + block, nblocks, data); + } + + if (err) + break; + + block += nblocks; + } + + bp->bio_resid = (end - block) * sc->sc_blksize; + if (bp->bio_resid) { + bp->bio_error = EIO; + bp->bio_flags |= BIO_ERROR; + } else { + bp->bio_error = 0; + bp->bio_flags |= BIO_DONE; + } + + DPRINTF(sc, PS3DISK_DEBUG_TASK, "%s: bio_resid %ld\n", + __func__, bp->bio_resid); + } else { + bp->bio_error = EINVAL; + bp->bio_flags |= BIO_ERROR; + } + + if (bp->bio_flags & BIO_ERROR) + disk_err(bp, "hard error", -1, 1); + + biodone(bp); + } + + PS3DISK_LOCK(sc); + sc->sc_running = -1; + wakeup(sc); + PS3DISK_UNLOCK(sc); + + kproc_exit(0); +} + +static int +ps3disk_intr_filter(void *arg) +{ + return (FILTER_SCHEDULE_THREAD); +} + +static void +ps3disk_intr(void *arg) +{ + struct ps3disk_softc *sc = (struct ps3disk_softc *) arg; + device_t dev = sc->sc_dev; + uint64_t devid = ps3bus_get_device(dev); + uint64_t tag, status; + int err; + + PS3DISK_LOCK(sc); + + err = lv1_storage_get_async_status(devid, &tag, &status); + + DPRINTF(sc, PS3DISK_DEBUG_INTR, "%s: err %d tag 0x%016lx status 0x%016lx\n", + __func__, err, tag, status); + + if (err) + goto out; + + if (!sc->sc_bounce_busy) { + device_printf(dev, "Got interrupt while no request pending\n"); + goto out; + } + + if (tag != sc->sc_bounce_tag) + device_printf(dev, "Tag mismatch, got 0x%016lx expected 0x%016lx\n", + tag, sc->sc_bounce_tag); + + if (status) + device_printf(dev, "Request completed with status 0x%016lx\n", status); + + sc->sc_bounce_status = status; + sc->sc_bounce_busy = 0; + + wakeup(sc); + +out: + + PS3DISK_UNLOCK(sc); +} + +static void +ps3disk_getphys(void *arg, bus_dma_segment_t *segs, int nsegs, int error) +{ + if (error != 0) + return; + + *(bus_addr_t *) arg = segs[0].ds_addr; +} + +static int +ps3disk_get_disk_geometry(struct ps3disk_softc *sc) +{ + device_t dev = sc->sc_dev; + uint64_t bus_index = ps3bus_get_busidx(dev); + uint64_t dev_index = ps3bus_get_devidx(dev); + uint64_t junk; + int err; + + err = lv1_get_repository_node_value(PS3_LPAR_ID_PME, + (lv1_repository_string("bus") >> 32) | bus_index, + lv1_repository_string("dev") | dev_index, + lv1_repository_string("blk_size"), 0, &sc->sc_blksize, &junk); + if (err) { + device_printf(dev, "Could not get block size (0x%08x)\n", err); + err = ENXIO; + goto out; + } + + err = lv1_get_repository_node_value(PS3_LPAR_ID_PME, + (lv1_repository_string("bus") >> 32) | bus_index, + lv1_repository_string("dev") | dev_index, + lv1_repository_string("n_blocks"), 0, &sc->sc_nblocks, &junk); + if (err) { + device_printf(dev, "Could not get total number of blocks (0x%08x)\n", + err); + err = ENXIO; + goto out; + } + + err = 0; + +out: + + return (err); +} + +static int +ps3disk_enum_regions(struct ps3disk_softc *sc) +{ + device_t dev = sc->sc_dev; + uint64_t bus_index = ps3bus_get_busidx(dev); + uint64_t dev_index = ps3bus_get_devidx(dev); + uint64_t junk; + int i, err; + + /* Read number of regions */ + + err = lv1_get_repository_node_value(PS3_LPAR_ID_PME, + (lv1_repository_string("bus") >> 32) | bus_index, + lv1_repository_string("dev") | dev_index, + lv1_repository_string("n_regs"), 0, &sc->sc_nregs, &junk); + if (err) { + device_printf(dev, "Could not get number of regions (0x%08x)\n", + err); + err = ENXIO; + goto fail; + } + + if (!sc->sc_nregs) + return 0; + + sc->sc_reg = malloc(sc->sc_nregs * sizeof(struct ps3disk_region), + M_PS3DISK, M_ZERO | M_WAITOK); + if (!sc->sc_reg) { + err = ENOMEM; + goto fail; + } + + /* Setup regions */ + + for (i = 0; i < sc->sc_nregs; i++) { + err = lv1_get_repository_node_value(PS3_LPAR_ID_PME, + (lv1_repository_string("bus") >> 32) | bus_index, + lv1_repository_string("dev") | dev_index, + lv1_repository_string("region") | i, + lv1_repository_string("id"), &sc->sc_reg[i].r_id, &junk); + if (err) { + device_printf(dev, "Could not get region id (0x%08x)\n", + err); + err = ENXIO; + goto fail; + } + + err = lv1_get_repository_node_value(PS3_LPAR_ID_PME, + (lv1_repository_string("bus") >> 32) | bus_index, + lv1_repository_string("dev") | dev_index, + lv1_repository_string("region") | i, + lv1_repository_string("start"), &sc->sc_reg[i].r_start, &junk); + if (err) { + device_printf(dev, "Could not get region start (0x%08x)\n", + err); + err = ENXIO; + goto fail; + } + + err = lv1_get_repository_node_value(PS3_LPAR_ID_PME, + (lv1_repository_string("bus") >> 32) | bus_index, + lv1_repository_string("dev") | dev_index, + lv1_repository_string("region") | i, + lv1_repository_string("size"), &sc->sc_reg[i].r_size, &junk); + if (err) { + device_printf(dev, "Could not get region size (0x%08x)\n", + err); + err = ENXIO; + goto fail; + } + + if (i == 0) + /* disables HV access control and grants access to whole disk */ + sc->sc_reg[i].r_flags = 0x2; + else + sc->sc_reg[i].r_flags = 0; + } + + return (0); + +fail: + + sc->sc_nregs = 0; + if (sc->sc_reg) + free(sc->sc_reg, M_PS3DISK); + + return (err); +} + +static int +ps3disk_read(struct ps3disk_softc *sc, int regidx, + uint64_t start_sector, uint64_t sector_count, char *data) +{ + device_t dev = sc->sc_dev; + struct ps3disk_region *rp = &sc->sc_reg[regidx]; + uint64_t devid = ps3bus_get_device(dev); + int err; + + PS3DISK_LOCK(sc); + + if (sc->sc_bounce_busy) { + device_printf(dev, "busy\n"); + PS3DISK_UNLOCK(sc); + return EIO; + } + + sc->sc_bounce_busy = 1; + + err = lv1_storage_read(devid, rp->r_id, + start_sector, sector_count, rp->r_flags, + sc->sc_bounce_lpar, &sc->sc_bounce_tag); + if (err) { + device_printf(dev, "Could not read sectors (0x%08x)\n", err); + err = EIO; + goto out; + } + + DPRINTF(sc, PS3DISK_DEBUG_READ, "%s: tag 0x%016lx\n", + __func__, sc->sc_bounce_tag); + + err = msleep(sc, &sc->sc_mtx, PRIBIO, "read", hz); + if (err) { + device_printf(dev, "Read request timed out\n"); + err = EIO; + goto out; + } + + if (sc->sc_bounce_busy || sc->sc_bounce_status) { + err = EIO; + } else { + bus_dmamap_sync(sc->sc_bounce_dmatag, sc->sc_bounce_dmamap, + BUS_DMASYNC_POSTREAD); + memcpy(data, sc->sc_bounce, sector_count * sc->sc_blksize); + err = 0; + } + +out: + + sc->sc_bounce_busy = 0; + + PS3DISK_UNLOCK(sc); + + return (err); +} + +static int +ps3disk_write(struct ps3disk_softc *sc, int regidx, + uint64_t start_sector, uint64_t sector_count, char *data) +{ + device_t dev = sc->sc_dev; + struct ps3disk_region *rp = &sc->sc_reg[regidx]; + uint64_t devid = ps3bus_get_device(dev); + int err; + + PS3DISK_LOCK(sc); + + if (sc->sc_bounce_busy) { + device_printf(dev, "busy\n"); + PS3DISK_UNLOCK(sc); + return EIO; + } + + memcpy(sc->sc_bounce, data, sector_count * sc->sc_blksize); + + bus_dmamap_sync(sc->sc_bounce_dmatag, sc->sc_bounce_dmamap, + BUS_DMASYNC_PREWRITE); + + sc->sc_bounce_busy = 1; + + err = lv1_storage_write(devid, rp->r_id, + start_sector, sector_count, rp->r_flags, + sc->sc_bounce_lpar, &sc->sc_bounce_tag); + if (err) { + device_printf(dev, "Could not write sectors (0x%08x)\n", err); + err = EIO; + goto out; + } + + DPRINTF(sc, PS3DISK_DEBUG_WRITE, "%s: tag 0x%016lx\n", + __func__, sc->sc_bounce_tag); + + err = msleep(sc, &sc->sc_mtx, PRIBIO, "write", hz); + if (err) { + device_printf(dev, "Write request timed out\n"); + err = EIO; + goto out; + } + + err = (sc->sc_bounce_busy || sc->sc_bounce_status) ? EIO : 0; + +out: + + sc->sc_bounce_busy = 0; + + PS3DISK_UNLOCK(sc); + + return (err); +} + +static int +ps3disk_flush(struct ps3disk_softc *sc) +{ + device_t dev = sc->sc_dev; + uint64_t devid = ps3bus_get_device(dev); + int err; + + PS3DISK_LOCK(sc); + + if (sc->sc_bounce_busy) { + device_printf(dev, "busy\n"); + PS3DISK_UNLOCK(sc); + return EIO; + } + + sc->sc_bounce_busy = 1; + + err = lv1_storage_send_device_command(devid, LV1_STORAGE_ATA_HDDOUT, + 0, 0, 0, 0, &sc->sc_bounce_tag); + if (err) { + device_printf(dev, "Could not flush (0x%08x)\n", err); + err = EIO; + goto out; + } + + DPRINTF(sc, PS3DISK_DEBUG_FLUSH, "%s: tag 0x%016lx\n", + __func__, sc->sc_bounce_tag); + + err = msleep(sc, &sc->sc_mtx, PRIBIO, "flush", hz); + if (err) { + device_printf(dev, "Flush request timed out\n"); + err = EIO; + goto out; + } + + err = (sc->sc_bounce_busy || sc->sc_bounce_status) ? EIO : 0; + +out: + + sc->sc_bounce_busy = 0; + + PS3DISK_UNLOCK(sc); + + return (err); +} + +#ifdef PS3DISK_DEBUG +static int +ps3disk_sysctl_debug(SYSCTL_HANDLER_ARGS) +{ + struct ps3disk_softc *sc = arg1; + int debug, error; + + debug = sc->sc_debug; + + error = sysctl_handle_int(oidp, &debug, 0, req); + if (error || !req->newptr) + return error; + + sc->sc_debug = debug; + + return 0; +} +#endif + +static void +ps3disk_sysctlattach(struct ps3disk_softc *sc) +{ +#ifdef PS3DISK_DEBUG + struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->sc_dev); + struct sysctl_oid *tree = device_get_sysctl_tree(sc->sc_dev); + + sc->sc_debug = ps3disk_debug; + + SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, + "debug", CTLTYPE_INT | CTLFLAG_RW, sc, 0, + ps3disk_sysctl_debug, "I", "control debugging printfs"); +#endif +} + +static device_method_t ps3disk_methods[] = { + DEVMETHOD(device_probe, ps3disk_probe), + DEVMETHOD(device_attach, ps3disk_attach), + DEVMETHOD(device_detach, ps3disk_detach), + {0, 0}, +}; + +static driver_t ps3disk_driver = { + "ps3disk", + ps3disk_methods, + sizeof(struct ps3disk_softc), +}; + +static devclass_t ps3disk_devclass; + +DRIVER_MODULE(ps3disk, ps3bus, ps3disk_driver, ps3disk_devclass, 0, 0); diff --git a/sys/rpc/rpc_generic.c b/sys/rpc/rpc_generic.c index fd39350..6adae38 100644 --- a/sys/rpc/rpc_generic.c +++ b/sys/rpc/rpc_generic.c @@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$"); #include <rpc/rpc.h> #include <rpc/nettype.h> +#include <rpc/rpcsec_gss.h> #include <rpc/rpc_com.h> @@ -69,6 +70,9 @@ extern u_long sb_max_adj; /* not defined in socketvar.h */ #define strrchr rindex #endif +/* Provide an entry point hook for the rpcsec_gss module. */ +struct rpc_gss_entries rpc_gss_entries; + struct handle { NCONF_HANDLE *nhandle; int nflag; /* Whether NETPATH or NETCONFIG */ diff --git a/sys/rpc/rpcsec_gss.h b/sys/rpc/rpcsec_gss.h index 563205c..94696f3 100644 --- a/sys/rpc/rpcsec_gss.h +++ b/sys/rpc/rpcsec_gss.h @@ -141,6 +141,271 @@ typedef struct { __BEGIN_DECLS #ifdef _KERNEL +/* + * Set up a structure of entry points for the kgssapi module and inline + * functions named rpc_gss_XXX_call() to use them, so that the kgssapi + * module doesn't need to be loaded for the NFS modules to work using + * AUTH_SYS. The kgssapi modules will be loaded by the gssd(8) daemon + * when it is started up and the entry points will then be filled in. + */ +typedef AUTH *rpc_gss_secfind_ftype(CLIENT *clnt, struct ucred *cred, + const char *principal, gss_OID mech_oid, + rpc_gss_service_t service); +typedef void rpc_gss_secpurge_ftype(CLIENT *clnt); +typedef AUTH *rpc_gss_seccreate_ftype(CLIENT *clnt, struct ucred *cred, + const char *principal, const char *mechanism, + rpc_gss_service_t service, const char *qop, + rpc_gss_options_req_t *options_req, + rpc_gss_options_ret_t *options_ret); +typedef bool_t rpc_gss_set_defaults_ftype(AUTH *auth, + rpc_gss_service_t service, const char *qop); +typedef int rpc_gss_max_data_length_ftype(AUTH *handle, + int max_tp_unit_len); +typedef void rpc_gss_get_error_ftype(rpc_gss_error_t *error); +typedef bool_t rpc_gss_mech_to_oid_ftype(const char *mech, gss_OID *oid_ret); +typedef bool_t rpc_gss_oid_to_mech_ftype(gss_OID oid, const char **mech_ret); +typedef bool_t rpc_gss_qop_to_num_ftype(const char *qop, const char *mech, + u_int *num_ret); +typedef const char **rpc_gss_get_mechanisms_ftype(void); +typedef bool_t rpc_gss_get_versions_ftype(u_int *vers_hi, u_int *vers_lo); +typedef bool_t rpc_gss_is_installed_ftype(const char *mech); +typedef bool_t rpc_gss_set_svc_name_ftype(const char *principal, + const char *mechanism, u_int req_time, u_int program, + u_int version); +typedef void rpc_gss_clear_svc_name_ftype(u_int program, u_int version); +typedef bool_t rpc_gss_getcred_ftype(struct svc_req *req, + rpc_gss_rawcred_t **rcred, + rpc_gss_ucred_t **ucred, void **cookie); +typedef bool_t rpc_gss_set_callback_ftype(rpc_gss_callback_t *cb); +typedef void rpc_gss_clear_callback_ftype(rpc_gss_callback_t *cb); +typedef bool_t rpc_gss_get_principal_name_ftype(rpc_gss_principal_t *principal, + const char *mech, const char *name, const char *node, + const char *domain); +typedef int rpc_gss_svc_max_data_length_ftype(struct svc_req *req, + int max_tp_unit_len); + +struct rpc_gss_entries { + rpc_gss_secfind_ftype *rpc_gss_secfind; + rpc_gss_secpurge_ftype *rpc_gss_secpurge; + rpc_gss_seccreate_ftype *rpc_gss_seccreate; + rpc_gss_set_defaults_ftype *rpc_gss_set_defaults; + rpc_gss_max_data_length_ftype *rpc_gss_max_data_length; + rpc_gss_get_error_ftype *rpc_gss_get_error; + rpc_gss_mech_to_oid_ftype *rpc_gss_mech_to_oid; + rpc_gss_oid_to_mech_ftype *rpc_gss_oid_to_mech; + rpc_gss_qop_to_num_ftype *rpc_gss_qop_to_num; + rpc_gss_get_mechanisms_ftype *rpc_gss_get_mechanisms; + rpc_gss_get_versions_ftype *rpc_gss_get_versions; + rpc_gss_is_installed_ftype *rpc_gss_is_installed; + rpc_gss_set_svc_name_ftype *rpc_gss_set_svc_name; + rpc_gss_clear_svc_name_ftype *rpc_gss_clear_svc_name; + rpc_gss_getcred_ftype *rpc_gss_getcred; + rpc_gss_set_callback_ftype *rpc_gss_set_callback; + rpc_gss_clear_callback_ftype *rpc_gss_clear_callback; + rpc_gss_get_principal_name_ftype *rpc_gss_get_principal_name; + rpc_gss_svc_max_data_length_ftype *rpc_gss_svc_max_data_length; +}; +extern struct rpc_gss_entries rpc_gss_entries; + +/* Functions to access the entry points. */ +static __inline AUTH * +rpc_gss_secfind_call(CLIENT *clnt, struct ucred *cred, const char *principal, + gss_OID mech_oid, rpc_gss_service_t service) +{ + AUTH *ret = NULL; + + if (rpc_gss_entries.rpc_gss_secfind != NULL) + ret = (*rpc_gss_entries.rpc_gss_secfind)(clnt, cred, principal, + mech_oid, service); + return (ret); +} + +static __inline void +rpc_gss_secpurge_call(CLIENT *clnt) +{ + + if (rpc_gss_entries.rpc_gss_secpurge != NULL) + (*rpc_gss_entries.rpc_gss_secpurge)(clnt); +} + +static __inline AUTH * +rpc_gss_seccreate_call(CLIENT *clnt, struct ucred *cred, const char *principal, + const char *mechanism, rpc_gss_service_t service, const char *qop, + rpc_gss_options_req_t *options_req, rpc_gss_options_ret_t *options_ret) +{ + AUTH *ret = NULL; + + if (rpc_gss_entries.rpc_gss_seccreate != NULL) + ret = (*rpc_gss_entries.rpc_gss_seccreate)(clnt, cred, + principal, mechanism, service, qop, options_req, + options_ret); + return (ret); +} + +static __inline bool_t +rpc_gss_set_defaults_call(AUTH *auth, rpc_gss_service_t service, + const char *qop) +{ + bool_t ret = 1; + + if (rpc_gss_entries.rpc_gss_set_defaults != NULL) + ret = (*rpc_gss_entries.rpc_gss_set_defaults)(auth, service, + qop); + return (ret); +} + +static __inline int +rpc_gss_max_data_length_call(AUTH *handle, int max_tp_unit_len) +{ + int ret = 0; + + if (rpc_gss_entries.rpc_gss_max_data_length != NULL) + ret = (*rpc_gss_entries.rpc_gss_max_data_length)(handle, + max_tp_unit_len); + return (ret); +} + +static __inline void +rpc_gss_get_error_call(rpc_gss_error_t *error) +{ + + if (rpc_gss_entries.rpc_gss_get_error != NULL) + (*rpc_gss_entries.rpc_gss_get_error)(error); +} + +static __inline bool_t +rpc_gss_mech_to_oid_call(const char *mech, gss_OID *oid_ret) +{ + bool_t ret = 1; + + if (rpc_gss_entries.rpc_gss_mech_to_oid != NULL) + ret = (*rpc_gss_entries.rpc_gss_mech_to_oid)(mech, oid_ret); + return (ret); +} + +static __inline bool_t +rpc_gss_oid_to_mech_call(gss_OID oid, const char **mech_ret) +{ + bool_t ret = 1; + + if (rpc_gss_entries.rpc_gss_oid_to_mech != NULL) + ret = (*rpc_gss_entries.rpc_gss_oid_to_mech)(oid, mech_ret); + return (ret); +} + +static __inline bool_t +rpc_gss_qop_to_num_call(const char *qop, const char *mech, u_int *num_ret) +{ + bool_t ret = 1; + + if (rpc_gss_entries.rpc_gss_qop_to_num != NULL) + ret = (*rpc_gss_entries.rpc_gss_qop_to_num)(qop, mech, num_ret); + return (ret); +} + +static __inline const char ** +rpc_gss_get_mechanisms_call(void) +{ + const char **ret = NULL; + + if (rpc_gss_entries.rpc_gss_get_mechanisms != NULL) + ret = (*rpc_gss_entries.rpc_gss_get_mechanisms)(); + return (ret); +} + +static __inline bool_t +rpc_gss_get_versions_call(u_int *vers_hi, u_int *vers_lo) +{ + bool_t ret = 1; + + if (rpc_gss_entries.rpc_gss_get_versions != NULL) + ret = (*rpc_gss_entries.rpc_gss_get_versions)(vers_hi, vers_lo); + return (ret); +} + +static __inline bool_t +rpc_gss_is_installed_call(const char *mech) +{ + bool_t ret = 1; + + if (rpc_gss_entries.rpc_gss_is_installed != NULL) + ret = (*rpc_gss_entries.rpc_gss_is_installed)(mech); + return (ret); +} + +static __inline bool_t +rpc_gss_set_svc_name_call(const char *principal, const char *mechanism, + u_int req_time, u_int program, u_int version) +{ + bool_t ret = 1; + + if (rpc_gss_entries.rpc_gss_set_svc_name != NULL) + ret = (*rpc_gss_entries.rpc_gss_set_svc_name)(principal, + mechanism, req_time, program, version); + return (ret); +} + +static __inline void +rpc_gss_clear_svc_name_call(u_int program, u_int version) +{ + + if (rpc_gss_entries.rpc_gss_clear_svc_name != NULL) + (*rpc_gss_entries.rpc_gss_clear_svc_name)(program, version); +} + +static __inline bool_t +rpc_gss_getcred_call(struct svc_req *req, rpc_gss_rawcred_t **rcred, + rpc_gss_ucred_t **ucred, void **cookie) +{ + bool_t ret = 1; + + if (rpc_gss_entries.rpc_gss_getcred != NULL) + ret = (*rpc_gss_entries.rpc_gss_getcred)(req, rcred, ucred, + cookie); + return (ret); +} + +static __inline bool_t +rpc_gss_set_callback_call(rpc_gss_callback_t *cb) +{ + bool_t ret = 1; + + if (rpc_gss_entries.rpc_gss_set_callback != NULL) + ret = (*rpc_gss_entries.rpc_gss_set_callback)(cb); + return (ret); +} + +static __inline void +rpc_gss_clear_callback_call(rpc_gss_callback_t *cb) +{ + + if (rpc_gss_entries.rpc_gss_clear_callback != NULL) + (*rpc_gss_entries.rpc_gss_clear_callback)(cb); +} + +static __inline bool_t +rpc_gss_get_principal_name_call(rpc_gss_principal_t *principal, + const char *mech, const char *name, const char *node, const char *domain) +{ + bool_t ret = 1; + + if (rpc_gss_entries.rpc_gss_get_principal_name != NULL) + ret = (*rpc_gss_entries.rpc_gss_get_principal_name)(principal, + mech, name, node, domain); + return (ret); +} + +static __inline int +rpc_gss_svc_max_data_length_call(struct svc_req *req, int max_tp_unit_len) +{ + int ret = 0; + + if (rpc_gss_entries.rpc_gss_svc_max_data_length != NULL) + ret = (*rpc_gss_entries.rpc_gss_svc_max_data_length)(req, + max_tp_unit_len); + return (ret); +} + AUTH *rpc_gss_secfind(CLIENT *clnt, struct ucred *cred, const char *principal, gss_OID mech_oid, rpc_gss_service_t service); void rpc_gss_secpurge(CLIENT *clnt); diff --git a/sys/sparc64/conf/GENERIC b/sys/sparc64/conf/GENERIC index 406dcf6..11c5d35 100644 --- a/sys/sparc64/conf/GENERIC +++ b/sys/sparc64/conf/GENERIC @@ -61,13 +61,13 @@ options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options AUDIT # Security event auditing options MAC # TrustedBSD MAC Framework -options INCLUDE_CONFIG_FILE # Include this file in kernel +options INCLUDE_CONFIG_FILE # Include this file in kernel # Debugging for use in -current options KDB # Enable kernel debugger support. options DDB # Support DDB. options GDB # Support remote GDB. -options DEADLKRES # Enable the deadlock resolver +options DEADLKRES # Enable the deadlock resolver options INVARIANTS # Enable calls of extra sanity checking options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS options WITNESS # Enable checks to detect deadlocks and cycles @@ -266,3 +266,10 @@ device fwe # Ethernet over FireWire (non-standard!) device fwip # IP over FireWire (RFC 2734,3146) device dcons # Dumb console driver device dcons_crom # Configuration ROM for dcons + +# Sound support +device sound # Generic sound driver (required) +device snd_audiocs # Crystal Semiconductor CS4231 +device snd_es137x # Ensoniq AudioPCI ES137x +device snd_t4dwave # Acer Labs M5451 +device snd_uaudio # USB Audio diff --git a/sys/sparc64/include/_types.h b/sys/sparc64/include/_types.h index f810c15..7e993c4 100644 --- a/sys/sparc64/include/_types.h +++ b/sys/sparc64/include/_types.h @@ -55,7 +55,6 @@ typedef unsigned long __uint64_t; * Standard type definitions. */ typedef __int32_t __clock_t; /* clock()... */ -typedef unsigned int __cpumask_t; typedef __int64_t __critical_t; typedef double __double_t; typedef float __float_t; diff --git a/sys/sparc64/include/ktr.h b/sys/sparc64/include/ktr.h index 5948ba2..f13865f 100644 --- a/sys/sparc64/include/ktr.h +++ b/sys/sparc64/include/ktr.h @@ -40,16 +40,6 @@ #else -#define AND(var, mask, r1, r2) \ - SET(var, r2, r1) ; \ - lduw [r1], r2 ; \ - and r2, mask, r1 - -#define TEST(var, mask, r1, r2, l1) \ - AND(var, mask, r1, r2) ; \ - brz r1, l1 ## f ; \ - nop - /* * XXX could really use another register... */ @@ -79,13 +69,37 @@ l2: add r2, 1, r3 ; \ SET(l1 ## b, r3, r2) ; \ stx r2, [r1 + KTR_DESC] +/* + * NB: this clobbers %y. + */ #define CATR(mask, desc, r1, r2, r3, l1, l2, l3) \ set mask, r1 ; \ - TEST(ktr_mask, r1, r2, r2, l3) ; \ - lduw [PCPU(MID)], r1 ; \ + SET(ktr_mask, r3, r2) ; \ + lduw [r2], r2 ; \ + and r2, r1, r1 ; \ + brz r1, l3 ## f ; \ + nop ; \ + lduw [PCPU(CPUID)], r2 ; \ + mov _NCPUBITS, r3 ; \ + mov %g0, %y ; \ + udiv r2, r3, r2 ; \ + srl r2, 0, r2 ; \ + sllx r2, PTR_SHIFT, r2 ; \ + SET(ktr_cpumask, r3, r1) ; \ + ldx [r1 + r2], r1 ; \ + lduw [PCPU(CPUID)], r2 ; \ + mov _NCPUBITS, r3 ; \ + mov %g0, %y ; \ + udiv r2, r3, r2 ; \ + srl r2, 0, r2 ; \ + smul r2, r3, r3 ; \ + lduw [PCPU(CPUID)], r2 ; \ + sub r2, r3, r3 ; \ mov 1, r2 ; \ - sllx r2, r1, r1 ; \ - TEST(ktr_cpumask, r1, r2, r3, l3) ; \ + sllx r2, r3, r2 ; \ + andn r1, r2, r1 ; \ + brz r1, l3 ## f ; \ + nop ; \ ATR(desc, r1, r2, r3, l1, l2) #endif /* LOCORE */ diff --git a/sys/sparc64/include/pmap.h b/sys/sparc64/include/pmap.h index e16ea97..adad257 100644 --- a/sys/sparc64/include/pmap.h +++ b/sys/sparc64/include/pmap.h @@ -40,6 +40,7 @@ #define _MACHINE_PMAP_H_ #include <sys/queue.h> +#include <sys/_cpuset.h> #include <sys/_lock.h> #include <sys/_mutex.h> #include <machine/cache.h> @@ -61,7 +62,7 @@ struct pmap { struct mtx pm_mtx; struct tte *pm_tsb; vm_object_t pm_tsb_obj; - cpumask_t pm_active; + cpuset_t pm_active; u_int pm_context[MAXCPU]; struct pmap_statistics pm_stats; }; diff --git a/sys/sparc64/include/smp.h b/sys/sparc64/include/smp.h index 3ca8e03..a519e01 100644 --- a/sys/sparc64/include/smp.h +++ b/sys/sparc64/include/smp.h @@ -1,5 +1,6 @@ /*- * Copyright (c) 2001 Jake Burkholder. + * Copyright (c) 2007 - 2011 Marius Strobl <marius@FreeBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,6 +39,7 @@ #ifndef LOCORE +#include <sys/cpuset.h> #include <sys/proc.h> #include <sys/sched.h> @@ -76,17 +78,17 @@ struct cpu_start_args { }; struct ipi_cache_args { - cpumask_t ica_mask; + cpuset_t ica_mask; vm_paddr_t ica_pa; }; struct ipi_rd_args { - cpumask_t ira_mask; + cpuset_t ira_mask; register_t *ira_val; }; struct ipi_tlb_args { - cpumask_t ita_mask; + cpuset_t ita_mask; struct pmap *ita_pmap; u_long ita_start; u_long ita_end; @@ -100,7 +102,7 @@ extern struct pcb stoppcbs[]; void cpu_mp_bootstrap(struct pcpu *pc); void cpu_mp_shutdown(void); -typedef void cpu_ipi_selected_t(u_int, u_long, u_long, u_long); +typedef void cpu_ipi_selected_t(cpuset_t, u_long, u_long, u_long); extern cpu_ipi_selected_t *cpu_ipi_selected; typedef void cpu_ipi_single_t(u_int, u_long, u_long, u_long); extern cpu_ipi_single_t *cpu_ipi_single; @@ -135,12 +137,15 @@ extern char tl_ipi_tlb_range_demap[]; static __inline void ipi_all_but_self(u_int ipi) { + cpuset_t cpus; - cpu_ipi_selected(PCPU_GET(other_cpus), 0, (u_long)tl_ipi_level, ipi); + cpus = all_cpus; + CPU_CLR(PCPU_GET(cpuid), &cpus); + cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_level, ipi); } static __inline void -ipi_selected(u_int cpus, u_int ipi) +ipi_selected(cpuset_t cpus, u_int ipi) { cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_level, ipi); @@ -166,8 +171,9 @@ ipi_dcache_page_inval(void *func, vm_paddr_t pa) ica = &ipi_cache_args; mtx_lock_spin(&ipi_mtx); ica->ica_mask = all_cpus; + CPU_CLR(PCPU_GET(cpuid), &ica->ica_mask); ica->ica_pa = pa; - cpu_ipi_selected(PCPU_GET(other_cpus), 0, (u_long)func, (u_long)ica); + cpu_ipi_selected(ica->ica_mask, 0, (u_long)func, (u_long)ica); return (&ica->ica_mask); } @@ -182,8 +188,9 @@ ipi_icache_page_inval(void *func, vm_paddr_t pa) ica = &ipi_cache_args; mtx_lock_spin(&ipi_mtx); ica->ica_mask = all_cpus; + CPU_CLR(PCPU_GET(cpuid), &ica->ica_mask); ica->ica_pa = pa; - cpu_ipi_selected(PCPU_GET(other_cpus), 0, (u_long)func, (u_long)ica); + cpu_ipi_selected(ica->ica_mask, 0, (u_long)func, (u_long)ica); return (&ica->ica_mask); } @@ -197,7 +204,7 @@ ipi_rd(u_int cpu, void *func, u_long *val) sched_pin(); ira = &ipi_rd_args; mtx_lock_spin(&ipi_mtx); - ira->ira_mask = 1 << cpu | PCPU_GET(cpumask); + CPU_SETOF(cpu, &ira->ira_mask); ira->ira_val = val; cpu_ipi_single(cpu, 0, (u_long)func, (u_long)ira); return (&ira->ira_mask); @@ -207,18 +214,21 @@ static __inline void * ipi_tlb_context_demap(struct pmap *pm) { struct ipi_tlb_args *ita; - cpumask_t cpus; + cpuset_t cpus; if (smp_cpus == 1) return (NULL); sched_pin(); - if ((cpus = (pm->pm_active & PCPU_GET(other_cpus))) == 0) { + cpus = pm->pm_active; + CPU_AND(&cpus, &all_cpus); + CPU_CLR(PCPU_GET(cpuid), &cpus); + if (CPU_EMPTY(&cpus)) { sched_unpin(); return (NULL); } ita = &ipi_tlb_args; mtx_lock_spin(&ipi_mtx); - ita->ita_mask = cpus | PCPU_GET(cpumask); + ita->ita_mask = cpus; ita->ita_pmap = pm; cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_tlb_context_demap, (u_long)ita); @@ -229,18 +239,21 @@ static __inline void * ipi_tlb_page_demap(struct pmap *pm, vm_offset_t va) { struct ipi_tlb_args *ita; - cpumask_t cpus; + cpuset_t cpus; if (smp_cpus == 1) return (NULL); sched_pin(); - if ((cpus = (pm->pm_active & PCPU_GET(other_cpus))) == 0) { + cpus = pm->pm_active; + CPU_AND(&cpus, &all_cpus); + CPU_CLR(PCPU_GET(cpuid), &cpus); + if (CPU_EMPTY(&cpus)) { sched_unpin(); return (NULL); } ita = &ipi_tlb_args; mtx_lock_spin(&ipi_mtx); - ita->ita_mask = cpus | PCPU_GET(cpumask); + ita->ita_mask = cpus; ita->ita_pmap = pm; ita->ita_va = va; cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_tlb_page_demap, (u_long)ita); @@ -251,18 +264,21 @@ static __inline void * ipi_tlb_range_demap(struct pmap *pm, vm_offset_t start, vm_offset_t end) { struct ipi_tlb_args *ita; - cpumask_t cpus; + cpuset_t cpus; if (smp_cpus == 1) return (NULL); sched_pin(); - if ((cpus = (pm->pm_active & PCPU_GET(other_cpus))) == 0) { + cpus = pm->pm_active; + CPU_AND(&cpus, &all_cpus); + CPU_CLR(PCPU_GET(cpuid), &cpus); + if (CPU_EMPTY(&cpus)) { sched_unpin(); return (NULL); } ita = &ipi_tlb_args; mtx_lock_spin(&ipi_mtx); - ita->ita_mask = cpus | PCPU_GET(cpumask); + ita->ita_mask = cpus; ita->ita_pmap = pm; ita->ita_start = start; ita->ita_end = end; @@ -274,11 +290,10 @@ ipi_tlb_range_demap(struct pmap *pm, vm_offset_t start, vm_offset_t end) static __inline void ipi_wait(void *cookie) { - volatile cpumask_t *mask; + volatile cpuset_t *mask; if ((mask = cookie) != NULL) { - atomic_clear_int(mask, PCPU_GET(cpumask)); - while (*mask != 0) + while (!CPU_EMPTY(mask)) ; mtx_unlock_spin(&ipi_mtx); sched_unpin(); diff --git a/sys/sparc64/sparc64/exception.S b/sys/sparc64/sparc64/exception.S index ed0e381..0b8a0fa 100644 --- a/sys/sparc64/sparc64/exception.S +++ b/sys/sparc64/sparc64/exception.S @@ -1280,6 +1280,7 @@ ENTRY(tl1_data_excptn_trap) END(tl1_data_excptn_trap) .macro tl1_align + wrpr %g0, PSTATE_ALT, %pstate ba,a %xcc, tl1_align_trap nop .align 32 @@ -1289,7 +1290,7 @@ ENTRY(tl1_align_trap) RESUME_SPILLFILL_ALIGN ba %xcc, tl1_sfsr_trap mov T_MEM_ADDRESS_NOT_ALIGNED | T_KERNEL, %g2 -END(tl1_data_excptn_trap) +END(tl1_align_trap) ENTRY(tl1_sfsr_trap) wr %g0, ASI_DMMU, %asi @@ -2615,9 +2616,9 @@ ENTRY(tl0_ret) andn %l4, TSTATE_CWP_MASK, %g2 /* - * Restore %y. Could also be below if we had more alternate globals. + * Save %y in an alternate global. */ - wr %l5, 0, %y + mov %l5, %g4 /* * Setup %wstate for return. We need to restore the user window state @@ -2662,8 +2663,8 @@ tl0_ret_fill: * Fixup %tstate so the saved %cwp points to the current window and * restore it. */ - rdpr %cwp, %g4 - wrpr %g2, %g4, %tstate + rdpr %cwp, %g1 + wrpr %g2, %g1, %tstate /* * Restore the user window state. The transition bit was set above @@ -2673,20 +2674,25 @@ tl0_ret_fill: #if KTR_COMPILE & KTR_TRAP CATR(KTR_TRAP, "tl0_ret: td=%#lx pil=%#lx pc=%#lx npc=%#lx sp=%#lx" - , %g2, %g3, %g4, 7, 8, 9) - ldx [PCPU(CURTHREAD)], %g3 - stx %g3, [%g2 + KTR_PARM1] - rdpr %pil, %g3 - stx %g3, [%g2 + KTR_PARM2] - rdpr %tpc, %g3 - stx %g3, [%g2 + KTR_PARM3] - rdpr %tnpc, %g3 - stx %g3, [%g2 + KTR_PARM4] - stx %sp, [%g2 + KTR_PARM5] + , %g1, %g2, %g3, 7, 8, 9) + ldx [PCPU(CURTHREAD)], %g2 + stx %g2, [%g1 + KTR_PARM1] + rdpr %pil, %g2 + stx %g2, [%g1 + KTR_PARM2] + rdpr %tpc, %g2 + stx %g2, [%g1 + KTR_PARM3] + rdpr %tnpc, %g2 + stx %g2, [%g1 + KTR_PARM4] + stx %sp, [%g1 + KTR_PARM5] 9: #endif /* + * Restore %y. Note that the CATR above clobbered it. + */ + wr %g4, 0, %y + + /* * Return to usermode. */ retry @@ -2697,9 +2703,14 @@ tl0_ret_fill_end: , %l0, %l1, %l2, 7, 8, 9) rdpr %pstate, %l1 stx %l1, [%l0 + KTR_PARM1] - stx %l5, [%l0 + KTR_PARM2] + stx %l6, [%l0 + KTR_PARM2] stx %sp, [%l0 + KTR_PARM3] 9: + + /* + * Restore %y clobbered by the CATR. This was saved in %l5 above. + */ + wr %l5, 0, %y #endif /* @@ -2867,34 +2878,36 @@ ENTRY(tl1_ret) andn %l0, TSTATE_CWP_MASK, %g1 mov %l1, %g2 mov %l2, %g3 + mov %l4, %g4 wrpr %l3, 0, %pil - wr %l4, 0, %y restore wrpr %g0, 2, %tl - rdpr %cwp, %g4 - wrpr %g1, %g4, %tstate wrpr %g2, 0, %tpc wrpr %g3, 0, %tnpc + rdpr %cwp, %g2 + wrpr %g1, %g2, %tstate #if KTR_COMPILE & KTR_TRAP CATR(KTR_TRAP, "tl1_ret: td=%#lx pil=%#lx ts=%#lx pc=%#lx sp=%#lx" - , %g2, %g3, %g4, 7, 8, 9) - ldx [PCPU(CURTHREAD)], %g3 - stx %g3, [%g2 + KTR_PARM1] - rdpr %pil, %g3 - stx %g3, [%g2 + KTR_PARM2] - rdpr %tstate, %g3 - stx %g3, [%g2 + KTR_PARM3] - rdpr %tpc, %g3 - stx %g3, [%g2 + KTR_PARM4] - stx %sp, [%g2 + KTR_PARM5] + , %g1, %g2, %g3, 7, 8, 9) + ldx [PCPU(CURTHREAD)], %g2 + stx %g2, [%g1 + KTR_PARM1] + rdpr %pil, %g2 + stx %g2, [%g1 + KTR_PARM2] + rdpr %tstate, %g2 + stx %g2, [%g1 + KTR_PARM3] + rdpr %tpc, %g2 + stx %g2, [%g1 + KTR_PARM4] + stx %sp, [%g1 + KTR_PARM5] 9: #endif + wr %g4, 0, %y + retry END(tl1_ret) @@ -2995,33 +3008,35 @@ ENTRY(tl1_intr) andn %l0, TSTATE_CWP_MASK, %g1 mov %l1, %g2 mov %l2, %g3 + mov %l4, %g4 wrpr %l3, 0, %pil - wr %l4, 0, %y restore wrpr %g0, 2, %tl - rdpr %cwp, %g4 - wrpr %g1, %g4, %tstate wrpr %g2, 0, %tpc wrpr %g3, 0, %tnpc + rdpr %cwp, %g2 + wrpr %g1, %g2, %tstate #if KTR_COMPILE & KTR_INTR CATR(KTR_INTR, "tl1_intr: td=%#x pil=%#lx ts=%#lx pc=%#lx sp=%#lx" - , %g2, %g3, %g4, 7, 8, 9) - ldx [PCPU(CURTHREAD)], %g3 - stx %g3, [%g2 + KTR_PARM1] - rdpr %pil, %g3 - stx %g3, [%g2 + KTR_PARM2] - rdpr %tstate, %g3 - stx %g3, [%g2 + KTR_PARM3] - rdpr %tpc, %g3 - stx %g3, [%g2 + KTR_PARM4] - stx %sp, [%g2 + KTR_PARM5] + , %g1, %g2, %g3, 7, 8, 9) + ldx [PCPU(CURTHREAD)], %g2 + stx %g2, [%g1 + KTR_PARM1] + rdpr %pil, %g2 + stx %g2, [%g1 + KTR_PARM2] + rdpr %tstate, %g2 + stx %g2, [%g1 + KTR_PARM3] + rdpr %tpc, %g2 + stx %g2, [%g1 + KTR_PARM4] + stx %sp, [%g1 + KTR_PARM5] 9: #endif + wr %g4, 0, %y + retry END(tl1_intr) diff --git a/sys/sparc64/sparc64/genassym.c b/sys/sparc64/sparc64/genassym.c index e33e581..89ec718 100644 --- a/sys/sparc64/sparc64/genassym.c +++ b/sys/sparc64/sparc64/genassym.c @@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$"); #include <sys/proc.h> #include <sys/smp.h> #include <sys/vmmeter.h> +#include <sys/_cpuset.h> #include <vm/vm.h> #include <vm/vm_page.h> @@ -59,6 +60,8 @@ ASSYM(PCPU_PAGES, PCPU_PAGES); ASSYM(TAR_VPN_SHIFT, TAR_VPN_SHIFT); +ASSYM(_NCPUBITS, _NCPUBITS); + #ifdef SUN4U ASSYM(TLB_DEMAP_ALL, TLB_DEMAP_ALL); #endif @@ -137,7 +140,6 @@ ASSYM(MAXCOMLEN, MAXCOMLEN); ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread)); ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid)); -ASSYM(PC_CPUMASK, offsetof(struct pcpu, pc_cpumask)); ASSYM(PC_IRHEAD, offsetof(struct pcpu, pc_irhead)); ASSYM(PC_IRTAIL, offsetof(struct pcpu, pc_irtail)); ASSYM(PC_IRFREE, offsetof(struct pcpu, pc_irfree)); diff --git a/sys/sparc64/sparc64/intr_machdep.c b/sys/sparc64/sparc64/intr_machdep.c index f6ef9a7..112ddab 100644 --- a/sys/sparc64/sparc64/intr_machdep.c +++ b/sys/sparc64/sparc64/intr_machdep.c @@ -83,10 +83,11 @@ CTASSERT((1 << IV_SHIFT) == sizeof(struct intr_vector)); ih_func_t *intr_handlers[PIL_MAX]; uint16_t pil_countp[PIL_MAX]; +static uint16_t pil_stray_count[PIL_MAX]; struct intr_vector intr_vectors[IV_MAX]; uint16_t intr_countp[IV_MAX]; -static u_long intr_stray_count[IV_MAX]; +static uint16_t intr_stray_count[IV_MAX]; static const char *const pil_names[] = { "stray", @@ -199,22 +200,32 @@ intr_setup(int pri, ih_func_t *ihf, int vec, iv_func_t *ivf, void *iva) static void intr_stray_level(struct trapframe *tf) { - - printf("stray level interrupt %ld\n", tf->tf_level); + uint64_t level; + + level = tf->tf_level; + if (pil_stray_count[level] < MAX_STRAY_LOG) { + printf("stray level interrupt %ld\n", level); + pil_stray_count[level]++; + if (pil_stray_count[level] >= MAX_STRAY_LOG) + printf("got %d stray level interrupt %ld's: not " + "logging anymore\n", MAX_STRAY_LOG, level); + } } static void intr_stray_vector(void *cookie) { struct intr_vector *iv; + u_int vec; iv = cookie; - if (intr_stray_count[iv->iv_vec] < MAX_STRAY_LOG) { - printf("stray vector interrupt %d\n", iv->iv_vec); - intr_stray_count[iv->iv_vec]++; - if (intr_stray_count[iv->iv_vec] >= MAX_STRAY_LOG) - printf("got %d stray interrupt %d's: not logging " - "anymore\n", MAX_STRAY_LOG, iv->iv_vec); + vec = iv->iv_vec; + if (intr_stray_count[vec] < MAX_STRAY_LOG) { + printf("stray vector interrupt %d\n", vec); + intr_stray_count[vec]++; + if (intr_stray_count[vec] >= MAX_STRAY_LOG) + printf("got %d stray vector interrupt %d's: not " + "logging anymore\n", MAX_STRAY_LOG, vec); } } @@ -445,8 +456,7 @@ intr_describe(int vec, void *ih, const char *descr) * allocate CPUs round-robin. */ -/* The BSP is always a valid target. */ -static cpumask_t intr_cpus = (1 << 0); +static cpuset_t intr_cpus; static int current_cpu; static void @@ -468,7 +478,7 @@ intr_assign_next_cpu(struct intr_vector *iv) current_cpu++; if (current_cpu > mp_maxid) current_cpu = 0; - } while (!(intr_cpus & (1 << current_cpu))); + } while (!CPU_ISSET(current_cpu, &intr_cpus)); } /* Attempt to bind the specified IRQ to the specified CPU. */ @@ -504,7 +514,7 @@ intr_add_cpu(u_int cpu) if (bootverbose) printf("INTR: Adding CPU %d as a target\n", cpu); - intr_cpus |= (1 << cpu); + CPU_SET(cpu, &intr_cpus); } /* @@ -518,6 +528,9 @@ intr_shuffle_irqs(void *arg __unused) struct intr_vector *iv; int i; + /* The BSP is always a valid target. */ + CPU_SETOF(0, &intr_cpus); + /* Don't bother on UP. */ if (mp_ncpus == 1) return; diff --git a/sys/sparc64/sparc64/mp_exception.S b/sys/sparc64/sparc64/mp_exception.S index 5a8a105..f1b323a 100644 --- a/sys/sparc64/sparc64/mp_exception.S +++ b/sys/sparc64/sparc64/mp_exception.S @@ -38,9 +38,21 @@ __FBSDID("$FreeBSD$"); .register %g2, #ignore .register %g3, #ignore -#define IPI_DONE(r1, r2, r3, r4) \ - lduw [PCPU(CPUMASK)], r4 ; \ - ATOMIC_CLEAR_INT(r1, r2, r3, r4) +#define IPI_DONE(r1, r2, r3, r4, r5, r6) \ + rd %y, r6 ; \ + lduw [PCPU(CPUID)], r2 ; \ + mov _NCPUBITS, r3 ; \ + mov %g0, %y ; \ + udiv r2, r3, r4 ; \ + srl r4, 0, r5 ; \ + sllx r5, PTR_SHIFT, r5 ; \ + add r1, r5, r1 ; \ + smul r4, r3, r3 ; \ + sub r2, r3, r3 ; \ + mov 1, r4 ; \ + sllx r4, r3, r4 ; \ + wr r6, %y ; \ + ATOMIC_CLEAR_LONG(r1, r2, r3, r4) /* * Invalidate a physical page in the data cache. For UltraSPARC I and II. @@ -77,7 +89,7 @@ ENTRY(tl_ipi_spitfire_dcache_page_inval) 2: brgz,pt %g2, 1b sub %g2, %g4, %g2 - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6) retry END(tl_ipi_spitfire_dcache_page_inval) @@ -117,7 +129,7 @@ ENTRY(tl_ipi_spitfire_icache_page_inval) 2: brgz,pt %g2, 1b sub %g2, %g4, %g2 - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6) retry END(tl_ipi_spitfire_icache_page_inval) @@ -148,7 +160,7 @@ ENTRY(tl_ipi_cheetah_dcache_page_inval) blt,a,pt %xcc, 1b nop - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6) retry END(tl_ipi_cheetah_dcache_page_inval) @@ -204,7 +216,7 @@ ENTRY(tl_ipi_tlb_page_demap) stxa %g0, [%g2] ASI_IMMU_DEMAP flush %g3 - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6) retry END(tl_ipi_tlb_page_demap) @@ -247,7 +259,7 @@ ENTRY(tl_ipi_tlb_range_demap) blt,a,pt %xcc, 1b nop - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6) retry END(tl_ipi_tlb_range_demap) @@ -271,7 +283,7 @@ ENTRY(tl_ipi_tlb_context_demap) stxa %g0, [%g1] ASI_IMMU_DEMAP flush %g3 - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6) retry END(tl_ipi_tlb_context_demap) @@ -283,7 +295,7 @@ ENTRY(tl_ipi_stick_rd) rd %asr24, %g2 stx %g2, [%g1] - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6) retry END(tl_ipi_stick_rd) @@ -295,6 +307,6 @@ ENTRY(tl_ipi_tick_rd) rd %tick, %g2 stx %g2, [%g1] - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6) retry END(tl_ipi_tick_rd) diff --git a/sys/sparc64/sparc64/mp_locore.S b/sys/sparc64/sparc64/mp_locore.S index fbcb767..fd4357e 100644 --- a/sys/sparc64/sparc64/mp_locore.S +++ b/sys/sparc64/sparc64/mp_locore.S @@ -269,13 +269,17 @@ ENTRY(mp_startup) add %l1, %l2, %l1 sub %l1, SPOFF + CCFSZ, %sp + /* Initialize global registers. */ + call cpu_setregs + mov %l1, %o0 + #if KTR_COMPILE & KTR_SMP CATR(KTR_SMP, "mp_startup: bootstrap cpuid=%d mid=%d pcpu=%#lx data=%#lx sp=%#lx" , %g1, %g2, %g3, 7, 8, 9) - lduw [%l1 + PC_CPUID], %g2 + lduw [PCPU(CPUID)], %g2 stx %g2, [%g1 + KTR_PARM1] - lduw [%l1 + PC_MID], %g2 + lduw [PCPU(MID)], %g2 stx %g2, [%g1 + KTR_PARM2] stx %l1, [%g1 + KTR_PARM3] stx %sp, [%g1 + KTR_PARM5] diff --git a/sys/sparc64/sparc64/mp_machdep.c b/sys/sparc64/sparc64/mp_machdep.c index 8700f89..83d8e9f 100644 --- a/sys/sparc64/sparc64/mp_machdep.c +++ b/sys/sparc64/sparc64/mp_machdep.c @@ -121,7 +121,7 @@ cpu_ipi_single_t *cpu_ipi_single; static vm_offset_t mp_tramp; static u_int cpuid_to_mid[MAXCPU]; static int isjbus; -static volatile cpumask_t shutdown_cpus; +static volatile cpuset_t shutdown_cpus; static void ap_count(phandle_t node, u_int mid, u_int cpu_impl); static void ap_start(phandle_t node, u_int mid, u_int cpu_impl); @@ -140,8 +140,6 @@ static cpu_ipi_single_t spitfire_ipi_single; SYSINIT(cpu_mp_unleash, SI_SUB_SMP, SI_ORDER_FIRST, cpu_mp_unleash, NULL); CTASSERT(MAXCPU <= IDR_CHEETAH_MAX_BN_PAIRS); -CTASSERT(MAXCPU <= sizeof(u_int) * NBBY); -CTASSERT(MAXCPU <= sizeof(int) * NBBY); void mp_init(u_int cpu_impl) @@ -228,7 +226,7 @@ void cpu_mp_setmaxid() { - all_cpus = 1 << curcpu; + CPU_SETOF(curcpu, &all_cpus); mp_ncpus = 1; mp_maxid = 0; @@ -283,6 +281,7 @@ sun4u_startcpu(phandle_t cpu, void *func, u_long arg) void cpu_mp_start(void) { + cpuset_t ocpus; mtx_init(&ipi_mtx, "ipi", NULL, MTX_SPIN); @@ -299,7 +298,9 @@ cpu_mp_start(void) KASSERT(!isjbus || mp_ncpus <= IDR_JALAPENO_MAX_BN_PAIRS, ("%s: can only IPI a maximum of %d JBus-CPUs", __func__, IDR_JALAPENO_MAX_BN_PAIRS)); - PCPU_SET(other_cpus, all_cpus & ~(1 << curcpu)); + ocpus = all_cpus; + CPU_CLR(curcpu, &ocpus); + PCPU_SET(other_cpus, ocpus); smp_active = 1; } @@ -357,7 +358,7 @@ ap_start(phandle_t node, u_int mid, u_int cpu_impl) cache_init(pc); - all_cpus |= 1 << cpuid; + CPU_SET(cpuid, &all_cpus); intr_add_cpu(cpuid); } @@ -383,7 +384,7 @@ cpu_mp_unleash(void *v) ctx_inc = (TLB_CTX_USER_MAX - 1) / mp_ncpus; csa = &cpu_start_args; csa->csa_count = mp_ncpus; - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { pc->pc_tlb_ctx = ctx_min; pc->pc_tlb_ctx_min = ctx_min; pc->pc_tlb_ctx_max = ctx_min + ctx_inc; @@ -421,6 +422,7 @@ cpu_mp_unleash(void *v) void cpu_mp_bootstrap(struct pcpu *pc) { + cpuset_t ocpus; volatile struct cpu_start_args *csa; csa = &cpu_start_args; @@ -453,9 +455,6 @@ cpu_mp_bootstrap(struct pcpu *pc) */ tlb_flush_nonlocked(); - /* Initialize global registers. */ - cpu_setregs(pc); - /* * Enable interrupts. * Note that the PIL we be lowered indirectly via sched_throw(NULL) @@ -465,7 +464,9 @@ cpu_mp_bootstrap(struct pcpu *pc) smp_cpus++; KASSERT(curthread != NULL, ("%s: curthread", __func__)); - PCPU_SET(other_cpus, all_cpus & ~(1 << curcpu)); + ocpus = all_cpus; + CPU_CLR(curcpu, &ocpus); + PCPU_SET(other_cpus, ocpus); printf("SMP: AP CPU #%d Launched!\n", curcpu); csa->csa_count--; @@ -484,14 +485,23 @@ cpu_mp_bootstrap(struct pcpu *pc) void cpu_mp_shutdown(void) { + cpuset_t cpus; int i; critical_enter(); - shutdown_cpus = PCPU_GET(other_cpus); - if (stopped_cpus != PCPU_GET(other_cpus)) /* XXX */ - stop_cpus(stopped_cpus ^ PCPU_GET(other_cpus)); + shutdown_cpus = all_cpus; + CPU_CLR(PCPU_GET(cpuid), &shutdown_cpus); + cpus = shutdown_cpus; + + /* XXX: Stop all the CPUs which aren't already. */ + if (CPU_CMP(&stopped_cpus, &cpus)) { + + /* cpus is just a flat "on" mask without curcpu. */ + CPU_NAND(&cpus, &stopped_cpus); + stop_cpus(cpus); + } i = 0; - while (shutdown_cpus != 0) { + while (!CPU_EMPTY(&shutdown_cpus)) { if (i++ > 100000) { printf("timeout shutting down CPUs.\n"); break; @@ -509,20 +519,24 @@ cpu_ipi_ast(struct trapframe *tf) static void cpu_ipi_stop(struct trapframe *tf) { + u_int cpuid; CTR2(KTR_SMP, "%s: stopped %d", __func__, curcpu); + sched_pin(); savectx(&stoppcbs[curcpu]); - atomic_set_acq_int(&stopped_cpus, PCPU_GET(cpumask)); - while ((started_cpus & PCPU_GET(cpumask)) == 0) { - if ((shutdown_cpus & PCPU_GET(cpumask)) != 0) { - atomic_clear_int(&shutdown_cpus, PCPU_GET(cpumask)); + cpuid = PCPU_GET(cpuid); + CPU_SET_ATOMIC(cpuid, &stopped_cpus); + while (!CPU_ISSET(cpuid, &started_cpus)) { + if (CPU_ISSET(cpuid, &shutdown_cpus)) { + CPU_CLR_ATOMIC(cpuid, &shutdown_cpus); (void)intr_disable(); for (;;) ; } } - atomic_clear_rel_int(&started_cpus, PCPU_GET(cpumask)); - atomic_clear_rel_int(&stopped_cpus, PCPU_GET(cpumask)); + CPU_CLR_ATOMIC(cpuid, &started_cpus); + CPU_CLR_ATOMIC(cpuid, &stopped_cpus); + sched_unpin(); CTR2(KTR_SMP, "%s: restarted %d", __func__, curcpu); } @@ -551,13 +565,13 @@ cpu_ipi_hardclock(struct trapframe *tf) } static void -spitfire_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) +spitfire_ipi_selected(cpuset_t cpus, u_long d0, u_long d1, u_long d2) { u_int cpu; - while (cpus) { - cpu = ffs(cpus) - 1; - cpus &= ~(1 << cpu); + while ((cpu = cpusetobj_ffs(&cpus)) != 0) { + cpu--; + CPU_CLR(cpu, &cpus); spitfire_ipi_single(cpu, d0, d1, d2); } } @@ -657,20 +671,21 @@ cheetah_ipi_single(u_int cpu, u_long d0, u_long d1, u_long d2) } static void -cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) +cheetah_ipi_selected(cpuset_t cpus, u_long d0, u_long d1, u_long d2) { + char pbuf[CPUSETBUFSIZ]; register_t s; u_long ids; u_int bnp; u_int cpu; int i; - KASSERT((cpus & (1 << curcpu)) == 0, - ("%s: CPU can't IPI itself", __func__)); + KASSERT(!CPU_ISSET(curcpu, &cpus), ("%s: CPU can't IPI itself", + __func__)); KASSERT((ldxa(0, ASI_INTR_DISPATCH_STATUS) & IDR_CHEETAH_ALL_BUSY) == 0, ("%s: outstanding dispatch", __func__)); - if (cpus == 0) + if (CPU_EMPTY(&cpus)) return; ids = 0; for (i = 0; i < IPI_RETRIES * mp_ncpus; i++) { @@ -681,7 +696,7 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) membar(Sync); bnp = 0; for (cpu = 0; cpu < mp_ncpus; cpu++) { - if ((cpus & (1 << cpu)) != 0) { + if (CPU_ISSET(cpu, &cpus)) { stxa(AA_INTR_SEND | (cpuid_to_mid[cpu] << IDC_ITID_SHIFT) | bnp << IDC_BN_SHIFT, ASI_SDB_INTR_W, 0); @@ -698,9 +713,9 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) return; bnp = 0; for (cpu = 0; cpu < mp_ncpus; cpu++) { - if ((cpus & (1 << cpu)) != 0) { + if (CPU_ISSET(cpu, &cpus)) { if ((ids & (IDR_NACK << (2 * bnp))) == 0) - cpus &= ~(1 << cpu); + CPU_CLR(cpu, &cpus); bnp++; } } @@ -709,7 +724,7 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) * CPUs we actually haven't tried to send an IPI to, * but which apparently can be safely ignored. */ - if (cpus == 0) + if (CPU_EMPTY(&cpus)) return; /* * Leave interrupts enabled for a bit before retrying @@ -719,11 +734,11 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) DELAY(2 * mp_ncpus); } if (kdb_active != 0 || panicstr != NULL) - printf("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)\n", - __func__, cpus, ids); + printf("%s: couldn't send IPI (cpus=%s ids=0x%lu)\n", + __func__, cpusetobj_strprint(pbuf, &cpus), ids); else - panic("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)", - __func__, cpus, ids); + panic("%s: couldn't send IPI (cpus=%s ids=0x%lu)", + __func__, cpusetobj_strprint(pbuf, &cpus), ids); } static void @@ -772,19 +787,20 @@ jalapeno_ipi_single(u_int cpu, u_long d0, u_long d1, u_long d2) } static void -jalapeno_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) +jalapeno_ipi_selected(cpuset_t cpus, u_long d0, u_long d1, u_long d2) { + char pbuf[CPUSETBUFSIZ]; register_t s; u_long ids; u_int cpu; int i; - KASSERT((cpus & (1 << curcpu)) == 0, - ("%s: CPU can't IPI itself", __func__)); + KASSERT(!CPU_ISSET(curcpu, &cpus), ("%s: CPU can't IPI itself", + __func__)); KASSERT((ldxa(0, ASI_INTR_DISPATCH_STATUS) & IDR_CHEETAH_ALL_BUSY) == 0, ("%s: outstanding dispatch", __func__)); - if (cpus == 0) + if (CPU_EMPTY(&cpus)) return; ids = 0; for (i = 0; i < IPI_RETRIES * mp_ncpus; i++) { @@ -794,7 +810,7 @@ jalapeno_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) stxa(AA_SDB_INTR_D2, ASI_SDB_INTR_W, d2); membar(Sync); for (cpu = 0; cpu < mp_ncpus; cpu++) { - if ((cpus & (1 << cpu)) != 0) { + if (CPU_ISSET(cpu, &cpus)) { stxa(AA_INTR_SEND | (cpuid_to_mid[cpu] << IDC_ITID_SHIFT), ASI_SDB_INTR_W, 0); membar(Sync); @@ -808,10 +824,10 @@ jalapeno_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) (IDR_CHEETAH_ALL_BUSY | IDR_CHEETAH_ALL_NACK)) == 0) return; for (cpu = 0; cpu < mp_ncpus; cpu++) - if ((cpus & (1 << cpu)) != 0) + if (CPU_ISSET(cpu, &cpus)) if ((ids & (IDR_NACK << (2 * cpuid_to_mid[cpu]))) == 0) - cpus &= ~(1 << cpu); + CPU_CLR(cpu, &cpus); /* * Leave interrupts enabled for a bit before retrying * in order to avoid deadlocks if the other CPUs are @@ -820,9 +836,9 @@ jalapeno_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) DELAY(2 * mp_ncpus); } if (kdb_active != 0 || panicstr != NULL) - printf("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)\n", - __func__, cpus, ids); + printf("%s: couldn't send IPI (cpus=%s ids=0x%lu)\n", + __func__, cpusetobj_strprint(pbuf, &cpus), ids); else - panic("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)", - __func__, cpus, ids); + panic("%s: couldn't send IPI (cpus=%s ids=0x%lu)", + __func__, cpusetobj_strprint(pbuf, &cpus), ids); } diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c index 09482b9..876dce3 100644 --- a/sys/sparc64/sparc64/pmap.c +++ b/sys/sparc64/sparc64/pmap.c @@ -664,7 +664,7 @@ pmap_bootstrap(u_int cpu_impl) pm = kernel_pmap; for (i = 0; i < MAXCPU; i++) pm->pm_context[i] = TLB_CTX_KERNEL; - pm->pm_active = ~0; + CPU_FILL(&pm->pm_active); /* * Flush all non-locked TLB entries possibly left over by the @@ -1189,7 +1189,7 @@ pmap_pinit0(pmap_t pm) PMAP_LOCK_INIT(pm); for (i = 0; i < MAXCPU; i++) pm->pm_context[i] = TLB_CTX_KERNEL; - pm->pm_active = 0; + CPU_ZERO(&pm->pm_active); pm->pm_tsb = NULL; pm->pm_tsb_obj = NULL; bzero(&pm->pm_stats, sizeof(pm->pm_stats)); @@ -1229,7 +1229,7 @@ pmap_pinit(pmap_t pm) mtx_lock_spin(&sched_lock); for (i = 0; i < MAXCPU; i++) pm->pm_context[i] = -1; - pm->pm_active = 0; + CPU_ZERO(&pm->pm_active); mtx_unlock_spin(&sched_lock); VM_OBJECT_LOCK(pm->pm_tsb_obj); @@ -1278,7 +1278,7 @@ pmap_release(pmap_t pm) * to a kernel thread, leaving the pmap pointer unchanged. */ mtx_lock_spin(&sched_lock); - SLIST_FOREACH(pc, &cpuhead, pc_allcpu) + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) if (pc->pc_pmap == pm) pc->pc_pmap = NULL; mtx_unlock_spin(&sched_lock); @@ -2217,11 +2217,10 @@ pmap_activate(struct thread *td) struct pmap *pm; int context; + critical_enter(); vm = td->td_proc->p_vmspace; pm = vmspace_pmap(vm); - mtx_lock_spin(&sched_lock); - context = PCPU_GET(tlb_ctx); if (context == PCPU_GET(tlb_ctx_max)) { tlb_flush_user(); @@ -2229,17 +2228,18 @@ pmap_activate(struct thread *td) } PCPU_SET(tlb_ctx, context + 1); + mtx_lock_spin(&sched_lock); pm->pm_context[curcpu] = context; - pm->pm_active |= PCPU_GET(cpumask); + CPU_SET(PCPU_GET(cpuid), &pm->pm_active); PCPU_SET(pmap, pm); + mtx_unlock_spin(&sched_lock); stxa(AA_DMMU_TSB, ASI_DMMU, pm->pm_tsb); stxa(AA_IMMU_TSB, ASI_IMMU, pm->pm_tsb); stxa(AA_DMMU_PCXR, ASI_DMMU, (ldxa(AA_DMMU_PCXR, ASI_DMMU) & TLB_CXR_PGSZ_MASK) | context); flush(KERNBASE); - - mtx_unlock_spin(&sched_lock); + critical_exit(); } void diff --git a/sys/sparc64/sparc64/swtch.S b/sys/sparc64/sparc64/swtch.S index ea13779..7515734 100644 --- a/sys/sparc64/sparc64/swtch.S +++ b/sys/sparc64/sparc64/swtch.S @@ -164,20 +164,29 @@ ENTRY(cpu_switch) * If there was no non-kernel pmap, don't try to deactivate it. */ brz,pn %l2, 3f - lduw [PCPU(CPUMASK)], %l4 + lduw [PCPU(CPUID)], %l3 /* * Mark the pmap of the last non-kernel vmspace to run as no longer * active on this CPU. */ - lduw [%l2 + PM_ACTIVE], %l3 - andn %l3, %l4, %l3 - stw %l3, [%l2 + PM_ACTIVE] + mov _NCPUBITS, %l5 + mov %g0, %y + udiv %l3, %l5, %l6 + srl %l6, 0, %l4 + sllx %l4, PTR_SHIFT, %l4 + add %l4, PM_ACTIVE, %l4 + smul %l6, %l5, %l5 + sub %l3, %l5, %l5 + mov 1, %l6 + sllx %l6, %l5, %l5 + ldx [%l2 + %l4], %l6 + andn %l6, %l5, %l6 + stx %l6, [%l2 + %l4] /* * Take away its context number. */ - lduw [PCPU(CPUID)], %l3 sllx %l3, INT_SHIFT, %l3 add %l2, PM_CONTEXT, %l4 mov -1, %l5 @@ -210,18 +219,27 @@ ENTRY(cpu_switch) /* * Set the new context number in the pmap. */ - lduw [PCPU(CPUID)], %i4 - sllx %i4, INT_SHIFT, %i4 + lduw [PCPU(CPUID)], %l3 + sllx %l3, INT_SHIFT, %i4 add %l1, PM_CONTEXT, %i5 stw %i3, [%i4 + %i5] /* * Mark the pmap as active on this CPU. */ - lduw [%l1 + PM_ACTIVE], %i4 - lduw [PCPU(CPUMASK)], %i5 - or %i4, %i5, %i4 - stw %i4, [%l1 + PM_ACTIVE] + mov _NCPUBITS, %l5 + mov %g0, %y + udiv %l3, %l5, %l6 + srl %l6, 0, %l4 + sllx %l4, PTR_SHIFT, %l4 + add %l4, PM_ACTIVE, %l4 + smul %l6, %l5, %l5 + sub %l3, %l5, %l5 + mov 1, %l6 + sllx %l6, %l5, %l5 + ldx [%l1 + %l4], %l6 + or %l6, %l5, %l6 + stx %l6, [%l1 + %l4] /* * Make note of the change in pmap. diff --git a/sys/sparc64/sparc64/tlb.c b/sys/sparc64/sparc64/tlb.c index 990c777..70b2823 100644 --- a/sys/sparc64/sparc64/tlb.c +++ b/sys/sparc64/sparc64/tlb.c @@ -80,7 +80,7 @@ tlb_context_demap(struct pmap *pm) PMAP_STATS_INC(tlb_ncontext_demap); cookie = ipi_tlb_context_demap(pm); s = intr_disable(); - if (pm->pm_active & PCPU_GET(cpumask)) { + if (CPU_ISSET(PCPU_GET(cpuid), &pm->pm_active)) { KASSERT(pm->pm_context[curcpu] != -1, ("tlb_context_demap: inactive pmap?")); stxa(TLB_DEMAP_PRIMARY | TLB_DEMAP_CONTEXT, ASI_DMMU_DEMAP, 0); @@ -101,7 +101,7 @@ tlb_page_demap(struct pmap *pm, vm_offset_t va) PMAP_STATS_INC(tlb_npage_demap); cookie = ipi_tlb_page_demap(pm, va); s = intr_disable(); - if (pm->pm_active & PCPU_GET(cpumask)) { + if (CPU_ISSET(PCPU_GET(cpuid), &pm->pm_active)) { KASSERT(pm->pm_context[curcpu] != -1, ("tlb_page_demap: inactive pmap?")); if (pm == kernel_pmap) @@ -128,7 +128,7 @@ tlb_range_demap(struct pmap *pm, vm_offset_t start, vm_offset_t end) PMAP_STATS_INC(tlb_nrange_demap); cookie = ipi_tlb_range_demap(pm, start, end); s = intr_disable(); - if (pm->pm_active & PCPU_GET(cpumask)) { + if (CPU_ISSET(PCPU_GET(cpuid), &pm->pm_active)) { KASSERT(pm->pm_context[curcpu] != -1, ("tlb_range_demap: inactive pmap?")); if (pm == kernel_pmap) diff --git a/sys/sys/_cpuset.h b/sys/sys/_cpuset.h new file mode 100644 index 0000000..42a0a6a --- /dev/null +++ b/sys/sys/_cpuset.h @@ -0,0 +1,52 @@ +/*- + * Copyright (c) 2008, Jeffrey Roberson <jeff@freebsd.org> + * All rights reserved. + * + * Copyright (c) 2008 Nokia Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SYS__CPUSET_H_ +#define _SYS__CPUSET_H_ + +#ifdef _KERNEL +#define CPU_SETSIZE MAXCPU +#endif + +#define CPU_MAXSIZE 128 + +#ifndef CPU_SETSIZE +#define CPU_SETSIZE CPU_MAXSIZE +#endif + +#define _NCPUBITS (sizeof(long) * NBBY) /* bits per mask */ +#define _NCPUWORDS howmany(CPU_SETSIZE, _NCPUBITS) + +typedef struct _cpuset { + long __bits[howmany(CPU_SETSIZE, _NCPUBITS)]; +} cpuset_t; + +#endif /* !_SYS__CPUSET_H_ */ diff --git a/sys/sys/_rmlock.h b/sys/sys/_rmlock.h index 75a159c..15d6c49 100644 --- a/sys/sys/_rmlock.h +++ b/sys/sys/_rmlock.h @@ -45,7 +45,7 @@ LIST_HEAD(rmpriolist,rm_priotracker); struct rmlock { struct lock_object lock_object; - volatile cpumask_t rm_writecpus; + volatile cpuset_t rm_writecpus; LIST_HEAD(,rm_priotracker) rm_activeReaders; union { struct mtx _rm_lock_mtx; diff --git a/sys/sys/conf.h b/sys/sys/conf.h index 31fd34d..08e1582 100644 --- a/sys/sys/conf.h +++ b/sys/sys/conf.h @@ -280,6 +280,9 @@ struct cdev *make_dev_alias(struct cdev *_pdev, const char *_fmt, ...) __printflike(2, 3); int make_dev_alias_p(int _flags, struct cdev **_cdev, struct cdev *_pdev, const char *_fmt, ...) __printflike(4, 5); +int make_dev_physpath_alias(int _flags, struct cdev **_cdev, + struct cdev *_pdev, struct cdev *_old_alias, + const char *_physpath); void dev_lock(void); void dev_unlock(void); void setconf(void); @@ -332,6 +335,7 @@ struct dumperinfo { int set_dumper(struct dumperinfo *); int dump_write(struct dumperinfo *, void *, vm_offset_t, off_t, size_t); void dumpsys(struct dumperinfo *); +int doadump(boolean_t); extern int dumping; /* system is dumping */ #endif /* _KERNEL */ diff --git a/sys/sys/cpuset.h b/sys/sys/cpuset.h index 854fa29..030a874 100644 --- a/sys/sys/cpuset.h +++ b/sys/sys/cpuset.h @@ -32,22 +32,9 @@ #ifndef _SYS_CPUSET_H_ #define _SYS_CPUSET_H_ -#ifdef _KERNEL -#define CPU_SETSIZE MAXCPU -#endif +#include <sys/_cpuset.h> -#define CPU_MAXSIZE 128 - -#ifndef CPU_SETSIZE -#define CPU_SETSIZE CPU_MAXSIZE -#endif - -#define _NCPUBITS (sizeof(long) * NBBY) /* bits per mask */ -#define _NCPUWORDS howmany(CPU_SETSIZE, _NCPUBITS) - -typedef struct _cpuset { - long __bits[howmany(CPU_SETSIZE, _NCPUBITS)]; -} cpuset_t; +#define CPUSETBUFSIZ ((2 + sizeof(long) * 2) * _NCPUWORDS) #define __cpuset_mask(n) ((long)1 << ((n) % _NCPUBITS)) #define CPU_CLR(n, p) ((p)->__bits[(n)/_NCPUBITS] &= ~__cpuset_mask(n)) @@ -66,6 +53,11 @@ typedef struct _cpuset { (p)->__bits[__i] = -1; \ } while (0) +#define CPU_SETOF(n, p) do { \ + CPU_ZERO(p); \ + ((p)->__bits[(n)/_NCPUBITS] = __cpuset_mask(n)); \ +} while (0) + /* Is p empty. */ #define CPU_EMPTY(p) __extension__ ({ \ __size_t __i; \ @@ -75,6 +67,15 @@ typedef struct _cpuset { __i == _NCPUWORDS; \ }) +/* Is p full set. */ +#define CPU_ISFULLSET(p) __extension__ ({ \ + __size_t __i; \ + for (__i = 0; __i < _NCPUWORDS; __i++) \ + if ((p)->__bits[__i] != (long)-1) \ + break; \ + __i == _NCPUWORDS; \ +}) + /* Is c a subset of p. */ #define CPU_SUBSET(p, c) __extension__ ({ \ __size_t __i; \ @@ -124,6 +125,33 @@ typedef struct _cpuset { (d)->__bits[__i] &= ~(s)->__bits[__i]; \ } while (0) +#define CPU_CLR_ATOMIC(n, p) \ + atomic_clear_long(&(p)->__bits[(n)/_NCPUBITS], __cpuset_mask(n)) + +#define CPU_SET_ATOMIC(n, p) \ + atomic_set_long(&(p)->__bits[(n)/_NCPUBITS], __cpuset_mask(n)) + +#define CPU_OR_ATOMIC(d, s) do { \ + __size_t __i; \ + for (__i = 0; __i < _NCPUWORDS; __i++) \ + atomic_set_long(&(d)->__bits[__i], \ + (s)->__bits[__i]); \ +} while (0) + +#define CPU_NAND_ATOMIC(d, s) do { \ + __size_t __i; \ + for (__i = 0; __i < _NCPUWORDS; __i++) \ + atomic_clear_long(&(d)->__bits[__i], \ + (s)->__bits[__i]); \ +} while (0) + +#define CPU_COPY_STORE_REL(f, t) do { \ + __size_t __i; \ + for (__i = 0; __i < _NCPUWORDS; __i++) \ + atomic_store_rel_long(&(t)->__bits[__i], \ + (f)->__bits[__i]); \ +} while (0) + /* * Valid cpulevel_t values. */ @@ -184,6 +212,9 @@ void cpuset_rel(struct cpuset *); int cpuset_setthread(lwpid_t id, cpuset_t *); int cpuset_create_root(struct prison *, struct cpuset **); int cpuset_setproc_update_set(struct proc *, struct cpuset *); +int cpusetobj_ffs(const cpuset_t *); +char *cpusetobj_strprint(char *, const cpuset_t *); +int cpusetobj_strscan(cpuset_t *, const char *); #else __BEGIN_DECLS diff --git a/sys/sys/disk.h b/sys/sys/disk.h index ba25c89..112eed0 100644 --- a/sys/sys/disk.h +++ b/sys/sys/disk.h @@ -116,4 +116,12 @@ void disk_err(struct bio *bp, const char *what, int blkdone, int nl); * This should be a multiple of the sector size. */ +#define DIOCGPHYSPATH _IOR('d', 141, char[MAXPATHLEN]) + /* + * Get a string defining the physical path for a given provider. + * This has similar rules to ident, but is intended to uniquely + * identify the physical location of the device, not the current + * occupant of that location. + */ + #endif /* _SYS_DISK_H_ */ diff --git a/sys/sys/diskpc98.h b/sys/sys/diskpc98.h index c20ca6c..66bda90 100644 --- a/sys/sys/diskpc98.h +++ b/sys/sys/diskpc98.h @@ -36,8 +36,11 @@ #include <sys/ioccom.h> #define DOSBBSECTOR 0 /* DOS boot block relative sector number */ +#undef DOSPARTOFF #define DOSPARTOFF 0 +#undef DOSPARTSIZE #define DOSPARTSIZE 32 +#undef NDOSPART #define NDOSPART 16 #define DOSMAGICOFFSET 510 #define DOSMAGIC 0xAA55 @@ -52,6 +55,7 @@ #define DOSMID_386BSD (PC98_MID_386BSD | PC98_MID_BOOTABLE) #define DOSSID_386BSD (PC98_SID_386BSD | PC98_SID_ACTIVE) +#undef DOSPTYP_386BSD #define DOSPTYP_386BSD (DOSSID_386BSD << 8 | DOSMID_386BSD) struct pc98_partition { diff --git a/sys/sys/dtrace_bsd.h b/sys/sys/dtrace_bsd.h index 15e1be9..eb348b2 100644 --- a/sys/sys/dtrace_bsd.h +++ b/sys/sys/dtrace_bsd.h @@ -100,54 +100,73 @@ typedef void (*dtrace_malloc_probe_func_t)(u_int32_t, uintptr_t arg0, extern dtrace_malloc_probe_func_t dtrace_malloc_probe; -/* dtnfsclient NFSv3 access cache provider hooks. */ +/* dtnfsclient NFSv[34] access cache provider hooks. */ typedef void (*dtrace_nfsclient_accesscache_flush_probe_func_t)(uint32_t, struct vnode *); extern dtrace_nfsclient_accesscache_flush_probe_func_t dtrace_nfsclient_accesscache_flush_done_probe; +extern dtrace_nfsclient_accesscache_flush_probe_func_t + dtrace_nfscl_accesscache_flush_done_probe; typedef void (*dtrace_nfsclient_accesscache_get_probe_func_t)(uint32_t, struct vnode *, uid_t, uint32_t); extern dtrace_nfsclient_accesscache_get_probe_func_t dtrace_nfsclient_accesscache_get_hit_probe, dtrace_nfsclient_accesscache_get_miss_probe; +extern dtrace_nfsclient_accesscache_get_probe_func_t + dtrace_nfscl_accesscache_get_hit_probe, + dtrace_nfscl_accesscache_get_miss_probe; typedef void (*dtrace_nfsclient_accesscache_load_probe_func_t)(uint32_t, struct vnode *, uid_t, uint32_t, int); extern dtrace_nfsclient_accesscache_load_probe_func_t dtrace_nfsclient_accesscache_load_done_probe; +extern dtrace_nfsclient_accesscache_load_probe_func_t + dtrace_nfscl_accesscache_load_done_probe; -/* dtnfsclient NFSv[23] attribute cache provider hooks. */ +/* dtnfsclient NFSv[234] attribute cache provider hooks. */ typedef void (*dtrace_nfsclient_attrcache_flush_probe_func_t)(uint32_t, struct vnode *); extern dtrace_nfsclient_attrcache_flush_probe_func_t dtrace_nfsclient_attrcache_flush_done_probe; +extern dtrace_nfsclient_attrcache_flush_probe_func_t + dtrace_nfscl_attrcache_flush_done_probe; typedef void (*dtrace_nfsclient_attrcache_get_hit_probe_func_t)(uint32_t, struct vnode *, struct vattr *); extern dtrace_nfsclient_attrcache_get_hit_probe_func_t dtrace_nfsclient_attrcache_get_hit_probe; +extern dtrace_nfsclient_attrcache_get_hit_probe_func_t + dtrace_nfscl_attrcache_get_hit_probe; typedef void (*dtrace_nfsclient_attrcache_get_miss_probe_func_t)(uint32_t, struct vnode *); extern dtrace_nfsclient_attrcache_get_miss_probe_func_t dtrace_nfsclient_attrcache_get_miss_probe; +extern dtrace_nfsclient_attrcache_get_miss_probe_func_t + dtrace_nfscl_attrcache_get_miss_probe; typedef void (*dtrace_nfsclient_attrcache_load_probe_func_t)(uint32_t, struct vnode *, struct vattr *, int); extern dtrace_nfsclient_attrcache_load_probe_func_t dtrace_nfsclient_attrcache_load_done_probe; +extern dtrace_nfsclient_attrcache_load_probe_func_t + dtrace_nfscl_attrcache_load_done_probe; -/* dtnfsclient NFSv[23] RPC provider hooks. */ +/* dtnfsclient NFSv[234] RPC provider hooks. */ typedef void (*dtrace_nfsclient_nfs23_start_probe_func_t)(uint32_t, struct vnode *, struct mbuf *, struct ucred *, int); extern dtrace_nfsclient_nfs23_start_probe_func_t dtrace_nfsclient_nfs23_start_probe; +extern dtrace_nfsclient_nfs23_start_probe_func_t + dtrace_nfscl_nfs234_start_probe; typedef void (*dtrace_nfsclient_nfs23_done_probe_func_t)(uint32_t, struct vnode *, struct mbuf *, struct ucred *, int, int); extern dtrace_nfsclient_nfs23_done_probe_func_t dtrace_nfsclient_nfs23_done_probe; +extern dtrace_nfsclient_nfs23_done_probe_func_t + dtrace_nfscl_nfs234_done_probe; /* * Functions which allow the dtrace module to check that the kernel diff --git a/sys/sys/ktr.h b/sys/sys/ktr.h index 3b78101..7885b22 100644 --- a/sys/sys/ktr.h +++ b/sys/sys/ktr.h @@ -97,6 +97,9 @@ #ifndef LOCORE +#include <sys/param.h> +#include <sys/_cpuset.h> + struct ktr_entry { u_int64_t ktr_timestamp; int ktr_cpu; @@ -107,7 +110,7 @@ struct ktr_entry { u_long ktr_parms[KTR_PARMS]; }; -extern int ktr_cpumask; +extern cpuset_t ktr_cpumask; extern int ktr_mask; extern int ktr_entries; extern int ktr_verbose; diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 654f145..c2b7081 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -199,7 +199,9 @@ struct mbuf { #define M_PROTO6 0x00080000 /* protocol-specific */ #define M_PROTO7 0x00100000 /* protocol-specific */ #define M_PROTO8 0x00200000 /* protocol-specific */ -#define M_FLOWID 0x00400000 /* flowid is valid */ +#define M_FLOWID 0x00400000 /* deprecated: flowid is valid */ +#define M_HASHTYPEBITS 0x0F000000 /* mask of bits holding flowid hash type */ + /* * For RELENG_{6,7} steal these flags for limited multiple routing table * support. In RELENG_8 and beyond, use just one flag and a tag. @@ -215,11 +217,45 @@ struct mbuf { (M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8) /* + * Network interface cards are able to hash protocol fields (such as IPv4 + * addresses and TCP port numbers) classify packets into flows. These flows + * can then be used to maintain ordering while delivering packets to the OS + * via parallel input queues, as well as to provide a stateless affinity + * model. NIC drivers can pass up the hash via m->m_pkthdr.flowid, and set + * m_flag fields to indicate how the hash should be interpreted by the + * network stack. + * + * Most NICs support RSS, which provides ordering and explicit affinity, and + * use the hash m_flag bits to indicate what header fields were covered by + * the hash. M_HASHTYPE_OPAQUE can be set by non-RSS cards or configurations + * that provide an opaque flow identifier, allowing for ordering and + * distribution without explicit affinity. + */ +#define M_HASHTYPE_SHIFT 24 +#define M_HASHTYPE_NONE 0x0 +#define M_HASHTYPE_RSS_IPV4 0x1 /* IPv4 2-tuple */ +#define M_HASHTYPE_RSS_TCP_IPV4 0x2 /* TCPv4 4-tuple */ +#define M_HASHTYPE_RSS_IPV6 0x3 /* IPv6 2-tuple */ +#define M_HASHTYPE_RSS_TCP_IPV6 0x4 /* TCPv6 4-tuple */ +#define M_HASHTYPE_RSS_IPV6_EX 0x5 /* IPv6 2-tuple + ext hdrs */ +#define M_HASHTYPE_RSS_TCP_IPV6_EX 0x6 /* TCPv6 4-tiple + ext hdrs */ +#define M_HASHTYPE_OPAQUE 0xf /* ordering, not affinity */ + +#define M_HASHTYPE_CLEAR(m) (m)->m_flags &= ~(M_HASHTYPEBITS) +#define M_HASHTYPE_GET(m) (((m)->m_flags & M_HASHTYPEBITS) >> \ + M_HASHTYPE_SHIFT) +#define M_HASHTYPE_SET(m, v) do { \ + (m)->m_flags &= ~M_HASHTYPEBITS; \ + (m)->m_flags |= ((v) << M_HASHTYPE_SHIFT); \ +} while (0) +#define M_HASHTYPE_TEST(m, v) (M_HASHTYPE_GET(m) == (v)) + +/* * Flags preserved when copying m_pkthdr. */ #define M_COPYFLAGS \ (M_PKTHDR|M_EOR|M_RDONLY|M_PROTOFLAGS|M_SKIP_FIREWALL|M_BCAST|M_MCAST|\ - M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_VLANTAG|M_PROMISC|M_FIB) + M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_VLANTAG|M_PROMISC|M_FIB|M_HASHTYPEBITS) /* * External buffer types: identify ext_buf type. diff --git a/sys/sys/msgbuf.h b/sys/sys/msgbuf.h index 8435c68..67f80a5 100644 --- a/sys/sys/msgbuf.h +++ b/sys/sys/msgbuf.h @@ -33,15 +33,21 @@ #ifndef _SYS_MSGBUF_H_ #define _SYS_MSGBUF_H_ +#include <sys/lock.h> +#include <sys/mutex.h> + struct msgbuf { - char *msg_ptr; /* pointer to buffer */ + char *msg_ptr; /* pointer to buffer */ #define MSG_MAGIC 0x063062 - u_int msg_magic; - u_int msg_size; /* size of buffer area */ - u_int msg_wseq; /* write sequence number */ - u_int msg_rseq; /* read sequence number */ - u_int msg_cksum; /* checksum of contents */ - u_int msg_seqmod; /* range for sequence numbers */ + u_int msg_magic; + u_int msg_size; /* size of buffer area */ + u_int msg_wseq; /* write sequence number */ + u_int msg_rseq; /* read sequence number */ + u_int msg_cksum; /* checksum of contents */ + u_int msg_seqmod; /* range for sequence numbers */ + int msg_lastpri; /* saved priority value */ + int msg_needsnl; /* set when newline needed */ + struct mtx msg_lock; /* mutex to protect the buffer */ }; /* Normalise a sequence number or a difference between sequence numbers. */ @@ -59,6 +65,7 @@ extern struct mtx msgbuf_lock; void msgbufinit(void *ptr, int size); void msgbuf_addchar(struct msgbuf *mbp, int c); +void msgbuf_addstr(struct msgbuf *mbp, int pri, char *str, int filter_cr); void msgbuf_clear(struct msgbuf *mbp); void msgbuf_copy(struct msgbuf *src, struct msgbuf *dst); int msgbuf_getbytes(struct msgbuf *mbp, char *buf, int buflen); diff --git a/sys/sys/param.h b/sys/sys/param.h index 838769f..589a0f7 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -319,4 +319,10 @@ __END_DECLS #define member2struct(s, m, x) \ ((struct s *)(void *)((char *)(x) - offsetof(struct s, m))) +/* + * Access a variable length array that has been declared as a fixed + * length array. + */ +#define __PAST_END(array, offset) (((typeof(*(array)) *)(array))[offset]) + #endif /* _SYS_PARAM_H_ */ diff --git a/sys/sys/pcpu.h b/sys/sys/pcpu.h index ad1cf33..e6044a7 100644 --- a/sys/sys/pcpu.h +++ b/sys/sys/pcpu.h @@ -37,6 +37,7 @@ #error "no assembler-serviceable parts inside" #endif +#include <sys/_cpuset.h> #include <sys/queue.h> #include <sys/vmmeter.h> #include <sys/resource.h> @@ -162,9 +163,7 @@ struct pcpu { uint64_t pc_switchtime; /* cpu_ticks() at last csw */ int pc_switchticks; /* `ticks' at last csw */ u_int pc_cpuid; /* This cpu number */ - cpumask_t pc_cpumask; /* This cpu mask */ - cpumask_t pc_other_cpus; /* Mask of all other cpus */ - SLIST_ENTRY(pcpu) pc_allcpu; + STAILQ_ENTRY(pcpu) pc_allcpu; struct lock_list_entry *pc_spinlocks; #ifdef KTR char pc_name[PCPU_NAME_LEN]; /* String name for KTR */ @@ -197,11 +196,23 @@ struct pcpu { * if only to make kernel debugging easier. */ PCPU_MD_FIELDS; + + /* + * XXX + * For the time being, keep the cpuset_t objects as the very last + * members of the structure. + * They are actually tagged to be removed soon, but as long as this + * does not happen, it is necessary to find a way to implement + * easilly interfaces to userland and leaving them last makes that + * possible. + */ + cpuset_t pc_cpumask; /* This cpu mask */ + cpuset_t pc_other_cpus; /* Mask of all other cpus */ } __aligned(CACHE_LINE_SIZE); #ifdef _KERNEL -SLIST_HEAD(cpuhead, pcpu); +STAILQ_HEAD(cpuhead, pcpu); extern struct cpuhead cpuhead; extern struct pcpu *cpuid_to_pcpu[MAXCPU]; diff --git a/sys/sys/pmckern.h b/sys/sys/pmckern.h index 3e8c1ef..796c4ca 100644 --- a/sys/sys/pmckern.h +++ b/sys/sys/pmckern.h @@ -76,7 +76,7 @@ extern int (*pmc_intr)(int _cpu, struct trapframe *_frame); extern struct sx pmc_sx; /* Per-cpu flags indicating availability of sampling data */ -extern volatile cpumask_t pmc_cpumask; +extern volatile cpuset_t pmc_cpumask; /* Count of system-wide sampling PMCs in existence */ extern volatile int pmc_ss_count; @@ -122,7 +122,7 @@ do { \ #define PMC_SYSTEM_SAMPLING_ACTIVE() (pmc_ss_count > 0) /* Check if a CPU has recorded samples. */ -#define PMC_CPU_HAS_SAMPLES(C) (__predict_false(pmc_cpumask & (1 << (C)))) +#define PMC_CPU_HAS_SAMPLES(C) (__predict_false(CPU_ISSET(C, &pmc_cpumask))) /* * Helper functions. diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 4d7b540..c54a956 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -503,6 +503,8 @@ struct proc { /* The following fields are all zeroed upon creation in fork. */ #define p_startzero p_oppid pid_t p_oppid; /* (c + e) Save ppid in ptrace. XXX */ + int p_dbg_child; /* (c + e) # of debugged children in + ptrace. */ struct vmspace *p_vmspace; /* (b) Address space. */ u_int p_swtick; /* (c) Tick when swapped in or out. */ struct itimerval p_realtimer; /* (c) Alarm timer. */ diff --git a/sys/sys/racct.h b/sys/sys/racct.h index cbd96a9..222dbce 100644 --- a/sys/sys/racct.h +++ b/sys/sys/racct.h @@ -49,27 +49,25 @@ struct ucred; */ #define RACCT_UNDEFINED -1 #define RACCT_CPU 0 -#define RACCT_FSIZE 1 -#define RACCT_DATA 2 -#define RACCT_STACK 3 -#define RACCT_CORE 4 -#define RACCT_RSS 5 -#define RACCT_MEMLOCK 6 -#define RACCT_NPROC 7 -#define RACCT_NOFILE 8 -#define RACCT_SBSIZE 9 -#define RACCT_VMEM 10 -#define RACCT_NPTS 11 -#define RACCT_SWAP 12 -#define RACCT_NTHR 13 -#define RACCT_MSGQQUEUED 14 -#define RACCT_MSGQSIZE 15 -#define RACCT_NMSGQ 16 -#define RACCT_NSEM 17 -#define RACCT_NSEMOP 18 -#define RACCT_NSHM 19 -#define RACCT_SHMSIZE 20 -#define RACCT_WALLCLOCK 21 +#define RACCT_DATA 1 +#define RACCT_STACK 2 +#define RACCT_CORE 3 +#define RACCT_RSS 4 +#define RACCT_MEMLOCK 5 +#define RACCT_NPROC 6 +#define RACCT_NOFILE 7 +#define RACCT_VMEM 8 +#define RACCT_NPTS 9 +#define RACCT_SWAP 10 +#define RACCT_NTHR 11 +#define RACCT_MSGQQUEUED 12 +#define RACCT_MSGQSIZE 13 +#define RACCT_NMSGQ 14 +#define RACCT_NSEM 15 +#define RACCT_NSEMOP 16 +#define RACCT_NSHM 17 +#define RACCT_SHMSIZE 18 +#define RACCT_WALLCLOCK 19 #define RACCT_MAX RACCT_WALLCLOCK /* diff --git a/sys/sys/smp.h b/sys/sys/smp.h index f8cce5f..66e8008 100644 --- a/sys/sys/smp.h +++ b/sys/sys/smp.h @@ -16,6 +16,8 @@ #ifndef LOCORE +#include <sys/cpuset.h> + /* * Topology of a NUMA or HTT system. * @@ -32,7 +34,7 @@ struct cpu_group { struct cpu_group *cg_parent; /* Our parent group. */ struct cpu_group *cg_child; /* Optional children groups. */ - cpumask_t cg_mask; /* Mask of cpus in this group. */ + cpuset_t cg_mask; /* Mask of cpus in this group. */ int32_t cg_count; /* Count of cpus in this group. */ int16_t cg_children; /* Number of children groups. */ int8_t cg_level; /* Shared cache level. */ @@ -71,10 +73,10 @@ struct cpu_group *smp_topo_find(struct cpu_group *top, int cpu); extern void (*cpustop_restartfunc)(void); extern int smp_active; extern int smp_cpus; -extern volatile cpumask_t started_cpus; -extern volatile cpumask_t stopped_cpus; -extern cpumask_t hlt_cpus_mask; -extern cpumask_t logical_cpus_mask; +extern volatile cpuset_t started_cpus; +extern volatile cpuset_t stopped_cpus; +extern cpuset_t hlt_cpus_mask; +extern cpuset_t logical_cpus_mask; #endif /* SMP */ extern u_int mp_maxid; @@ -82,14 +84,14 @@ extern int mp_maxcpus; extern int mp_ncpus; extern volatile int smp_started; -extern cpumask_t all_cpus; +extern cpuset_t all_cpus; /* * Macro allowing us to determine whether a CPU is absent at any given * time, thus permitting us to configure sparse maps of cpuid-dependent * (per-CPU) structures. */ -#define CPU_ABSENT(x_cpu) ((all_cpus & (1 << (x_cpu))) == 0) +#define CPU_ABSENT(x_cpu) (!CPU_ISSET(x_cpu, &all_cpus)) /* * Macros to iterate over non-absent CPUs. CPU_FOREACH() takes an @@ -158,11 +160,11 @@ void cpu_mp_setmaxid(void); void cpu_mp_start(void); void forward_signal(struct thread *); -int restart_cpus(cpumask_t); -int stop_cpus(cpumask_t); -int stop_cpus_hard(cpumask_t); +int restart_cpus(cpuset_t); +int stop_cpus(cpuset_t); +int stop_cpus_hard(cpuset_t); #if defined(__amd64__) -int suspend_cpus(cpumask_t); +int suspend_cpus(cpuset_t); #endif void smp_rendezvous_action(void); extern struct mtx smp_ipi_mtx; @@ -173,7 +175,7 @@ void smp_rendezvous(void (*)(void *), void (*)(void *), void (*)(void *), void *arg); -void smp_rendezvous_cpus(cpumask_t, +void smp_rendezvous_cpus(cpuset_t, void (*)(void *), void (*)(void *), void (*)(void *), diff --git a/sys/sys/soundcard.h b/sys/sys/soundcard.h index c4cfc27..a6817df 100644 --- a/sys/sys/soundcard.h +++ b/sys/sys/soundcard.h @@ -311,7 +311,8 @@ typedef struct _snd_capabilities { * IOCTL Commands for /dev/sequencer */ -#define SNDCTL_SEQ_RESET _IO ('Q', 0) +#define SNDCTL_SEQ_HALT _IO ('Q', 0) +#define SNDCTL_SEQ_RESET SNDCTL_SEQ_HALT /* Historic interface */ #define SNDCTL_SEQ_SYNC _IO ('Q', 1) #define SNDCTL_SYNTH_INFO _IOWR('Q', 2, struct synth_info) #define SNDCTL_SEQ_CTRLRATE _IOWR('Q', 3, int) /* Set/get timer res.(hz) */ diff --git a/sys/sys/types.h b/sys/sys/types.h index 4bc1a8d..cb513af 100644 --- a/sys/sys/types.h +++ b/sys/sys/types.h @@ -99,7 +99,6 @@ typedef __clockid_t clockid_t; #define _CLOCKID_T_DECLARED #endif -typedef __cpumask_t cpumask_t; typedef __critical_t critical_t; /* Critical section value */ typedef __int64_t daddr_t; /* disk address */ diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index bfe94fb..40f9a6a 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -302,6 +302,7 @@ struct vattr { #define IO_EXT 0x0400 /* operate on external attributes */ #define IO_NORMAL 0x0800 /* operate on regular data */ #define IO_NOMACCHECK 0x1000 /* MAC checks unnecessary */ +#define IO_BUFLOCKED 0x2000 /* ffs flag; indir buf is locked */ #define IO_SEQMAX 0x7F /* seq heuristic max value */ #define IO_SEQSHIFT 16 /* seq heuristic in upper 16 bits */ diff --git a/sys/teken/libteken/teken.3 b/sys/teken/libteken/teken.3 index bcc0db0..2a9b291 100644 --- a/sys/teken/libteken/teken.3 +++ b/sys/teken/libteken/teken.3 @@ -185,22 +185,6 @@ function switches terminal emulation to which is used by versions of .Fx prior to 9.0. -.Sh SECURITY CONSIDERATIONS -The -.Fn tf_respond -callback is used to respond to device status requests commands generated -by an application. -In the past, there have been various security issues, where a malicious -application sends a device status request before termination, causing -the generated response to be interpreted by applications such as -.Xr sh 1 . -.Pp -.Nm -only implements a small subset of responses which are unlikely to cause -any harm. -Still, it is advised to leave -.Fn tf_respond -unimplemented. .Sh SEE ALSO .Xr ncurses 3 , .Xr termcap 3 , @@ -218,3 +202,19 @@ the library appeared in userspace. .Sh AUTHORS .An Ed Schouten Aq ed@FreeBSD.org +.Sh SECURITY CONSIDERATIONS +The +.Fn tf_respond +callback is used to respond to device status requests commands generated +by an application. +In the past, there have been various security issues, where a malicious +application sends a device status request before termination, causing +the generated response to be interpreted by applications such as +.Xr sh 1 . +.Pp +.Nm +only implements a small subset of responses which are unlikely to cause +any harm. +Still, it is advised to leave +.Fn tf_respond +unimplemented. diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index e60514d..6d27ace 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -116,7 +116,6 @@ static ufs2_daddr_t ffs_clusteralloc(struct inode *, u_int, ufs2_daddr_t, int, static ino_t ffs_dirpref(struct inode *); static ufs2_daddr_t ffs_fragextend(struct inode *, u_int, ufs2_daddr_t, int, int); -static void ffs_fserr(struct fs *, ino_t, char *); static ufs2_daddr_t ffs_hashalloc (struct inode *, u_int, ufs2_daddr_t, int, int, allocfcn_t *); static ufs2_daddr_t ffs_nodealloccg(struct inode *, u_int, ufs2_daddr_t, int, @@ -217,13 +216,13 @@ nospace: (void) chkdq(ip, -btodb(size), cred, FORCE); UFS_LOCK(ump); #endif - if (reclaimed == 0) { + if (reclaimed == 0 && (flags & IO_BUFLOCKED) == 0) { reclaimed = 1; softdep_request_cleanup(fs, ITOV(ip), cred, FLUSH_BLOCKS_WAIT); goto retry; } UFS_UNLOCK(ump); - if (ppsratecheck(&lastfail, &curfail, 1)) { + if (reclaimed > 0 && ppsratecheck(&lastfail, &curfail, 1)) { ffs_fserr(fs, ip->i_number, "filesystem full"); uprintf("\n%s: write failed, filesystem is full\n", fs->fs_fsmnt); @@ -391,7 +390,7 @@ retry: bp->b_blkno = fsbtodb(fs, bno); if (!DOINGSOFTDEP(vp)) ffs_blkfree(ump, fs, ip->i_devvp, bprev, (long)osize, - ip->i_number, NULL); + ip->i_number, vp->v_type, NULL); delta = btodb(nsize - osize); DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta); if (flags & IO_EXT) @@ -418,21 +417,21 @@ nospace: /* * no space available */ - if (reclaimed == 0) { + if (reclaimed == 0 && (flags & IO_BUFLOCKED) == 0) { reclaimed = 1; - softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT); UFS_UNLOCK(ump); if (bp) { brelse(bp); bp = NULL; } UFS_LOCK(ump); + softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT); goto retry; } UFS_UNLOCK(ump); if (bp) brelse(bp); - if (ppsratecheck(&lastfail, &curfail, 1)) { + if (reclaimed > 0 && ppsratecheck(&lastfail, &curfail, 1)) { ffs_fserr(fs, ip->i_number, "filesystem full"); uprintf("\n%s: write failed, filesystem is full\n", fs->fs_fsmnt); @@ -671,7 +670,7 @@ ffs_reallocblks_ufs1(ap) if (!DOINGSOFTDEP(vp)) ffs_blkfree(ump, fs, ip->i_devvp, dbtofsb(fs, buflist->bs_children[i]->b_blkno), - fs->fs_bsize, ip->i_number, NULL); + fs->fs_bsize, ip->i_number, vp->v_type, NULL); buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); #ifdef INVARIANTS if (!ffs_checkblk(ip, @@ -879,7 +878,7 @@ ffs_reallocblks_ufs2(ap) if (!DOINGSOFTDEP(vp)) ffs_blkfree(ump, fs, ip->i_devvp, dbtofsb(fs, buflist->bs_children[i]->b_blkno), - fs->fs_bsize, ip->i_number, NULL); + fs->fs_bsize, ip->i_number, vp->v_type, NULL); buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); #ifdef INVARIANTS if (!ffs_checkblk(ip, @@ -1022,7 +1021,7 @@ dup_alloc: (*vpp)->v_op = &ffs_vnodeops1; return (0); noinodes: - if (fs->fs_pendinginodes > 0 && reclaimed == 0) { + if (reclaimed == 0) { reclaimed = 1; softdep_request_cleanup(fs, pvp, cred, FLUSH_INODES_WAIT); goto retry; @@ -1830,7 +1829,7 @@ gotit: } UFS_UNLOCK(ump); if (DOINGSOFTDEP(ITOV(ip))) - softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref); + softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref, mode); bdwrite(bp); if (ibp != NULL) bawrite(ibp); @@ -1873,10 +1872,7 @@ ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd) /* devvp is a normal disk device */ dev = devvp->v_rdev; cgblkno = fsbtodb(fs, cgtod(fs, cg)); - ASSERT_VOP_LOCKED(devvp, "ffs_blkfree"); - if ((devvp->v_vflag & VV_COPYONWRITE) && - ffs_snapblkfree(fs, devvp, bno, size, inum)) - return; + ASSERT_VOP_LOCKED(devvp, "ffs_blkfree_cg"); } #ifdef INVARIANTS if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || @@ -1884,7 +1880,7 @@ ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd) printf("dev=%s, bno = %jd, bsize = %ld, size = %ld, fs = %s\n", devtoname(dev), (intmax_t)bno, (long)fs->fs_bsize, size, fs->fs_fsmnt); - panic("ffs_blkfree: bad size"); + panic("ffs_blkfree_cg: bad size"); } #endif if ((u_int)bno >= fs->fs_size) { @@ -1918,7 +1914,7 @@ ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd) } printf("dev = %s, block = %jd, fs = %s\n", devtoname(dev), (intmax_t)bno, fs->fs_fsmnt); - panic("ffs_blkfree: freeing free block"); + panic("ffs_blkfree_cg: freeing free block"); } ffs_setblock(fs, blksfree, fragno); ffs_clusteracct(fs, cgp, fragno, 1); @@ -1941,7 +1937,7 @@ ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd) printf("dev = %s, block = %jd, fs = %s\n", devtoname(dev), (intmax_t)(bno + i), fs->fs_fsmnt); - panic("ffs_blkfree: freeing free frag"); + panic("ffs_blkfree_cg: freeing free frag"); } setbit(blksfree, cgbno + i); } @@ -2017,19 +2013,31 @@ ffs_blkfree_trim_completed(bip) } void -ffs_blkfree(ump, fs, devvp, bno, size, inum, dephd) +ffs_blkfree(ump, fs, devvp, bno, size, inum, vtype, dephd) struct ufsmount *ump; struct fs *fs; struct vnode *devvp; ufs2_daddr_t bno; long size; ino_t inum; + enum vtype vtype; struct workhead *dephd; { struct mount *mp; struct bio *bip; struct ffs_blkfree_trim_params *tp; + /* + * Check to see if a snapshot wants to claim the block. + * Check that devvp is a normal disk device, not a snapshot, + * it has a snapshot(s) associated with it, and one of the + * snapshots wants to claim the block. + */ + if (devvp->v_type != VREG && + (devvp->v_vflag & VV_COPYONWRITE) && + ffs_snapblkfree(fs, devvp, bno, size, inum, vtype, dephd)) { + return; + } if (!ump->um_candelete) { ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd); return; @@ -2327,7 +2335,7 @@ ffs_mapsearch(fs, cgp, bpref, allocsiz) * The form of the error message is: * fs: error message */ -static void +void ffs_fserr(fs, inum, cp) struct fs *fs; ino_t inum; @@ -2348,8 +2356,8 @@ ffs_fserr(fs, inum, cp) * specified inode by the specified amount. Under normal * operation the count should always go down. Decrementing * the count to zero will cause the inode to be freed. - * adjblkcnt(inode, amt) - adjust the number of blocks used to - * by the specifed amount. + * adjblkcnt(inode, amt) - adjust the number of blocks used by the + * inode by the specified amount. * adjndir, adjbfree, adjifree, adjffree, adjnumclusters(amt) - * adjust the superblock summary. * freedirs(inode, count) - directory inodes [inode..inode + count - 1] @@ -2564,7 +2572,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) if (blksize > blkcnt) blksize = blkcnt; ffs_blkfree(ump, fs, ump->um_devvp, blkno, - blksize * fs->fs_fsize, ROOTINO, NULL); + blksize * fs->fs_fsize, ROOTINO, VDIR, NULL); blkno += blksize; blkcnt -= blksize; blksize = fs->fs_frag; diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c index 6d5f27c..63a4eba 100644 --- a/sys/ufs/ffs/ffs_balloc.c +++ b/sys/ufs/ffs/ffs_balloc.c @@ -105,6 +105,9 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; int unwindidx = -1; int saved_inbdflush; + static struct timeval lastfail; + static int curfail; + int reclaimed; ip = VTOI(vp); dp = ip->i_din1; @@ -112,6 +115,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, ump = ip->i_ump; lbn = lblkno(fs, startoffset); size = blkoff(fs, startoffset) + size; + reclaimed = 0; if (size > fs->fs_bsize) panic("ffs_balloc_ufs1: blk too big"); *bpp = NULL; @@ -276,6 +280,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, /* * Fetch through the indirect blocks, allocating as necessary. */ +retry: for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); @@ -296,8 +301,20 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, if (pref == 0) pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, - flags, cred, &newb)) != 0) { + flags | IO_BUFLOCKED, cred, &newb)) != 0) { brelse(bp); + if (++reclaimed == 1) { + UFS_LOCK(ump); + softdep_request_cleanup(fs, vp, cred, + FLUSH_BLOCKS_WAIT); + UFS_UNLOCK(ump); + goto retry; + } + if (ppsratecheck(&lastfail, &curfail, 1)) { + ffs_fserr(fs, ip->i_number, "filesystem full"); + uprintf("\n%s: write failed, filesystem " + "is full\n", fs->fs_fsmnt); + } goto fail; } nb = newb; @@ -349,10 +366,22 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, if (nb == 0) { UFS_LOCK(ump); pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]); - error = ffs_alloc(ip, - lbn, pref, (int)fs->fs_bsize, flags, cred, &newb); + error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, + flags | IO_BUFLOCKED, cred, &newb); if (error) { brelse(bp); + if (++reclaimed == 1) { + UFS_LOCK(ump); + softdep_request_cleanup(fs, vp, cred, + FLUSH_BLOCKS_WAIT); + UFS_UNLOCK(ump); + goto retry; + } + if (ppsratecheck(&lastfail, &curfail, 1)) { + ffs_fserr(fs, ip->i_number, "filesystem full"); + uprintf("\n%s: write failed, filesystem " + "is full\n", fs->fs_fsmnt); + } goto fail; } nb = newb; @@ -477,7 +506,7 @@ fail: */ for (blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, - ip->i_number, NULL); + ip->i_number, vp->v_type, NULL); } return (error); } @@ -506,6 +535,9 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, int deallocated, osize, nsize, num, i, error; int unwindidx = -1; int saved_inbdflush; + static struct timeval lastfail; + static int curfail; + int reclaimed; ip = VTOI(vp); dp = ip->i_din2; @@ -513,6 +545,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, ump = ip->i_ump; lbn = lblkno(fs, startoffset); size = blkoff(fs, startoffset) + size; + reclaimed = 0; if (size > fs->fs_bsize) panic("ffs_balloc_ufs2: blk too big"); *bpp = NULL; @@ -787,6 +820,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, /* * Fetch through the indirect blocks, allocating as necessary. */ +retry: for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); @@ -807,8 +841,20 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, if (pref == 0) pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, - flags, cred, &newb)) != 0) { + flags | IO_BUFLOCKED, cred, &newb)) != 0) { brelse(bp); + if (++reclaimed == 1) { + UFS_LOCK(ump); + softdep_request_cleanup(fs, vp, cred, + FLUSH_BLOCKS_WAIT); + UFS_UNLOCK(ump); + goto retry; + } + if (ppsratecheck(&lastfail, &curfail, 1)) { + ffs_fserr(fs, ip->i_number, "filesystem full"); + uprintf("\n%s: write failed, filesystem " + "is full\n", fs->fs_fsmnt); + } goto fail; } nb = newb; @@ -860,10 +906,22 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, if (nb == 0) { UFS_LOCK(ump); pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]); - error = ffs_alloc(ip, - lbn, pref, (int)fs->fs_bsize, flags, cred, &newb); + error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, + flags | IO_BUFLOCKED, cred, &newb); if (error) { brelse(bp); + if (++reclaimed == 1) { + UFS_LOCK(ump); + softdep_request_cleanup(fs, vp, cred, + FLUSH_BLOCKS_WAIT); + UFS_UNLOCK(ump); + goto retry; + } + if (ppsratecheck(&lastfail, &curfail, 1)) { + ffs_fserr(fs, ip->i_number, "filesystem full"); + uprintf("\n%s: write failed, filesystem " + "is full\n", fs->fs_fsmnt); + } goto fail; } nb = newb; @@ -994,7 +1052,7 @@ fail: */ for (blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, - ip->i_number, NULL); + ip->i_number, vp->v_type, NULL); } return (error); } diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h index d819c8a..70bcf1d 100644 --- a/sys/ufs/ffs/ffs_extern.h +++ b/sys/ufs/ffs/ffs_extern.h @@ -33,6 +33,10 @@ #ifndef _UFS_FFS_EXTERN_H #define _UFS_FFS_EXTERN_H +#ifndef _KERNEL +#error "No user-serving parts inside" +#else + struct buf; struct cg; struct fid; @@ -57,7 +61,7 @@ int ffs_balloc_ufs2(struct vnode *a_vp, off_t a_startoffset, int a_size, struct ucred *a_cred, int a_flags, struct buf **a_bpp); int ffs_blkatoff(struct vnode *, off_t, char **, struct buf **); void ffs_blkfree(struct ufsmount *, struct fs *, struct vnode *, - ufs2_daddr_t, long, ino_t, struct workhead *); + ufs2_daddr_t, long, ino_t, enum vtype, struct workhead *); ufs2_daddr_t ffs_blkpref_ufs1(struct inode *, ufs_lbn_t, int, ufs1_daddr_t *); ufs2_daddr_t ffs_blkpref_ufs2(struct inode *, ufs_lbn_t, int, ufs2_daddr_t *); int ffs_checkfreefile(struct fs *, struct vnode *, ino_t); @@ -69,22 +73,26 @@ int ffs_flushfiles(struct mount *, int, struct thread *); void ffs_fragacct(struct fs *, int, int32_t [], int); int ffs_freefile(struct ufsmount *, struct fs *, struct vnode *, ino_t, int, struct workhead *); +void ffs_fserr(struct fs *, ino_t, char *); int ffs_isblock(struct fs *, u_char *, ufs1_daddr_t); int ffs_isfreeblock(struct fs *, u_char *, ufs1_daddr_t); void ffs_load_inode(struct buf *, struct inode *, struct fs *, ino_t); int ffs_mountroot(void); void ffs_oldfscompat_write(struct fs *, struct ufsmount *); +void ffs_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end); int ffs_reallocblks(struct vop_reallocblks_args *); int ffs_realloccg(struct inode *, ufs2_daddr_t, ufs2_daddr_t, ufs2_daddr_t, int, int, int, struct ucred *, struct buf **); int ffs_sbupdate(struct ufsmount *, int, int); void ffs_setblock(struct fs *, u_char *, ufs1_daddr_t); -int ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t); +int ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t, + enum vtype, struct workhead *); void ffs_snapremove(struct vnode *vp); int ffs_snapshot(struct mount *mp, char *snapfile); void ffs_snapshot_mount(struct mount *mp); void ffs_snapshot_unmount(struct mount *mp); void process_deferred_inactive(struct mount *mp); +void ffs_sync_snap(struct mount *, int); int ffs_syncvnode(struct vnode *vp, int waitfor); int ffs_truncate(struct vnode *, off_t, int, struct ucred *, struct thread *); int ffs_update(struct vnode *, int); @@ -107,7 +115,6 @@ extern struct vop_vector ffs_fifoops2; int softdep_check_suspend(struct mount *, struct vnode *, int, int, int, int); -int softdep_complete_trunc(struct vnode *, void *); void softdep_get_depcounts(struct mount *, int *, int *); void softdep_initialize(void); void softdep_uninitialize(void); @@ -123,7 +130,7 @@ void softdep_freefile(struct vnode *, ino_t, int); int softdep_request_cleanup(struct fs *, struct vnode *, struct ucred *, int); void softdep_setup_freeblocks(struct inode *, off_t, int); -void softdep_setup_inomapdep(struct buf *, struct inode *, ino_t); +void softdep_setup_inomapdep(struct buf *, struct inode *, ino_t, int); void softdep_setup_blkmapdep(struct buf *, struct mount *, ufs2_daddr_t, int, int); void softdep_setup_allocdirect(struct inode *, ufs_lbn_t, ufs2_daddr_t, @@ -139,14 +146,20 @@ void softdep_setup_blkfree(struct mount *, struct buf *, ufs2_daddr_t, int, void softdep_setup_inofree(struct mount *, struct buf *, ino_t, struct workhead *); void softdep_setup_sbupdate(struct ufsmount *, struct fs *, struct buf *); -void *softdep_setup_trunc(struct vnode *vp, off_t length, int flags); void softdep_fsync_mountdev(struct vnode *); int softdep_sync_metadata(struct vnode *); +int softdep_sync_buf(struct vnode *, struct buf *, int); int softdep_process_worklist(struct mount *, int); int softdep_fsync(struct vnode *); int softdep_waitidle(struct mount *); int softdep_prealloc(struct vnode *, int); int softdep_journal_lookup(struct mount *, struct vnode **); +void softdep_journal_freeblocks(struct inode *, struct ucred *, off_t, int); +void softdep_journal_fsync(struct inode *); +void softdep_buf_append(struct buf *, struct workhead *); +void softdep_inode_append(struct inode *, struct ucred *, struct workhead *); +void softdep_freework(struct workhead *); + /* * Things to request flushing in softdep_request_cleanup() @@ -158,4 +171,16 @@ int softdep_journal_lookup(struct mount *, struct vnode **); int ffs_rdonly(struct inode *); +TAILQ_HEAD(snaphead, inode); + +struct snapdata { + LIST_ENTRY(snapdata) sn_link; + struct snaphead sn_head; + daddr_t sn_listsize; + daddr_t *sn_blklist; + struct lock sn_lock; +}; + +#endif /* _KERNEL */ + #endif /* !_UFS_FFS_EXTERN_H */ diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c index ba2813d..a7b43e2 100644 --- a/sys/ufs/ffs/ffs_inode.c +++ b/sys/ufs/ffs/ffs_inode.c @@ -120,7 +120,7 @@ ffs_update(vp, waitfor) } } -static void +void ffs_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end) { vm_object_t object; @@ -151,12 +151,12 @@ ffs_truncate(vp, length, flags, cred, td) ufs2_daddr_t bn, lbn, lastblock, lastiblock[NIADDR], indir_lbn[NIADDR]; ufs2_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; ufs2_daddr_t count, blocksreleased = 0, datablocks; - void *cookie; struct bufobj *bo; struct fs *fs; struct buf *bp; struct ufsmount *ump; - int needextclean, softdepslowdown, extblocks; + int softdeptrunc, journaltrunc; + int needextclean, extblocks; int offset, size, level, nblocks; int i, error, allerror; off_t osize; @@ -165,7 +165,6 @@ ffs_truncate(vp, length, flags, cred, td) fs = ip->i_fs; ump = ip->i_ump; bo = &vp->v_bufobj; - cookie = NULL; ASSERT_VOP_LOCKED(vp, "ffs_truncate"); @@ -173,6 +172,11 @@ ffs_truncate(vp, length, flags, cred, td) return (EINVAL); if (length > fs->fs_maxfilesize) return (EFBIG); +#ifdef QUOTA + error = getinoquota(ip); + if (error) + return (error); +#endif /* * Historically clients did not have to specify which data * they were truncating. So, if not specified, we assume @@ -191,7 +195,10 @@ ffs_truncate(vp, length, flags, cred, td) */ allerror = 0; needextclean = 0; - softdepslowdown = DOINGSOFTDEP(vp) && softdep_slowdown(vp); + softdeptrunc = 0; + journaltrunc = DOINGSUJ(vp); + if (journaltrunc == 0 && DOINGSOFTDEP(vp) && length == 0) + softdeptrunc = !softdep_slowdown(vp); extblocks = 0; datablocks = DIP(ip, i_blocks); if (fs->fs_magic == FS_UFS2_MAGIC && ip->i_din2->di_extsize > 0) { @@ -199,27 +206,23 @@ ffs_truncate(vp, length, flags, cred, td) datablocks -= extblocks; } if ((flags & IO_EXT) && extblocks > 0) { - if (DOINGSOFTDEP(vp) && softdepslowdown == 0 && length == 0) { - if ((flags & IO_NORMAL) == 0) { - softdep_setup_freeblocks(ip, length, IO_EXT); - return (0); - } + if (length != 0) + panic("ffs_truncate: partial trunc of extdata"); + if (softdeptrunc || journaltrunc) { + if ((flags & IO_NORMAL) == 0) + goto extclean; needextclean = 1; } else { - if (length != 0) - panic("ffs_truncate: partial trunc of extdata"); if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0) return (error); - if (DOINGSUJ(vp)) - cookie = softdep_setup_trunc(vp, length, flags); - osize = ip->i_din2->di_extsize; - ip->i_din2->di_blocks -= extblocks; #ifdef QUOTA (void) chkdq(ip, -extblocks, NOCRED, 0); #endif vinvalbuf(vp, V_ALT, 0, 0); ffs_pages_remove(vp, OFF_TO_IDX(lblktosize(fs, -extblocks)), 0); + osize = ip->i_din2->di_extsize; + ip->i_din2->di_blocks -= extblocks; ip->i_din2->di_extsize = 0; for (i = 0; i < NXADDR; i++) { oldblks[i] = ip->i_din2->di_extb[i]; @@ -227,19 +230,18 @@ ffs_truncate(vp, length, flags, cred, td) } ip->i_flag |= IN_CHANGE; if ((error = ffs_update(vp, 1))) - goto out; + return (error); for (i = 0; i < NXADDR; i++) { if (oldblks[i] == 0) continue; ffs_blkfree(ump, fs, ip->i_devvp, oldblks[i], - sblksize(fs, osize, i), ip->i_number, NULL); + sblksize(fs, osize, i), ip->i_number, + vp->v_type, NULL); } } } - if ((flags & IO_NORMAL) == 0) { - error = 0; - goto out; - } + if ((flags & IO_NORMAL) == 0) + return (0); if (vp->v_type == VLNK && (ip->i_size < vp->v_mount->mnt_maxsymlinklen || datablocks == 0)) { @@ -252,24 +254,17 @@ ffs_truncate(vp, length, flags, cred, td) DIP_SET(ip, i_size, 0); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (needextclean) - softdep_setup_freeblocks(ip, length, IO_EXT); - error = ffs_update(vp, 1); - goto out; + goto extclean; + return ffs_update(vp, 1); } if (ip->i_size == length) { ip->i_flag |= IN_CHANGE | IN_UPDATE; if (needextclean) - softdep_setup_freeblocks(ip, length, IO_EXT); - error = ffs_update(vp, 0); - goto out; + goto extclean; + return ffs_update(vp, 0); } if (fs->fs_ronly) panic("ffs_truncate: read-only filesystem"); -#ifdef QUOTA - error = getinoquota(ip); - if (error) - goto out; -#endif if ((ip->i_flags & SF_SNAPSHOT) != 0) ffs_snapremove(vp); vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; @@ -285,7 +280,7 @@ ffs_truncate(vp, length, flags, cred, td) error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp); if (error) { vnode_pager_setsize(vp, osize); - goto out; + return (error); } ip->i_size = length; DIP_SET(ip, i_size, length); @@ -296,11 +291,10 @@ ffs_truncate(vp, length, flags, cred, td) else bawrite(bp); ip->i_flag |= IN_CHANGE | IN_UPDATE; - error = ffs_update(vp, 1); - goto out; + return ffs_update(vp, 1); } if (DOINGSOFTDEP(vp)) { - if (length > 0 || softdepslowdown) { + if (softdeptrunc == 0 && journaltrunc == 0) { /* * If a file is only partially truncated, then * we have to clean up the data structures @@ -311,29 +305,20 @@ ffs_truncate(vp, length, flags, cred, td) * so that it will have no data structures left. */ if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0) - goto out; - /* - * We have to journal the truncation before we change - * any blocks so we don't leave the file partially - * truncated. - */ - if (DOINGSUJ(vp) && cookie == NULL) - cookie = softdep_setup_trunc(vp, length, flags); + return (error); } else { -#ifdef QUOTA - (void) chkdq(ip, -datablocks, NOCRED, 0); -#endif - softdep_setup_freeblocks(ip, length, needextclean ? - IO_EXT | IO_NORMAL : IO_NORMAL); + flags = IO_NORMAL | (needextclean ? IO_EXT: 0); + if (journaltrunc) + softdep_journal_freeblocks(ip, cred, length, + flags); + else + softdep_setup_freeblocks(ip, length, flags); ASSERT_VOP_LOCKED(vp, "ffs_truncate1"); - vinvalbuf(vp, needextclean ? 0 : V_NORMAL, 0, 0); - if (!needextclean) - ffs_pages_remove(vp, 0, - OFF_TO_IDX(lblktosize(fs, -extblocks))); - vnode_pager_setsize(vp, 0); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - error = ffs_update(vp, 0); - goto out; + if (journaltrunc == 0) { + ip->i_flag |= IN_CHANGE | IN_UPDATE; + error = ffs_update(vp, 0); + } + return (error); } } /* @@ -353,7 +338,7 @@ ffs_truncate(vp, length, flags, cred, td) flags |= BA_CLRBUF; error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp); if (error) - goto out; + return (error); /* * When we are doing soft updates and the UFS_BALLOC * above fills in a direct block hole with a full sized @@ -365,7 +350,7 @@ ffs_truncate(vp, length, flags, cred, td) if (DOINGSOFTDEP(vp) && lbn < NDADDR && fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize && (error = ffs_syncvnode(vp, MNT_WAIT)) != 0) - goto out; + return (error); ip->i_size = length; DIP_SET(ip, i_size, length); size = blksize(fs, ip, lbn); @@ -411,13 +396,7 @@ ffs_truncate(vp, length, flags, cred, td) DIP_SET(ip, i_db[i], 0); } ip->i_flag |= IN_CHANGE | IN_UPDATE; - /* - * When doing softupdate journaling we must preserve the size along - * with the old pointers until they are freed or we might not - * know how many fragments remain. - */ - if (!DOINGSUJ(vp)) - allerror = ffs_update(vp, 1); + allerror = ffs_update(vp, 1); /* * Having written the new inode to disk, save its new configuration @@ -457,7 +436,8 @@ ffs_truncate(vp, length, flags, cred, td) if (lastiblock[level] < 0) { DIP_SET(ip, i_ib[level], 0); ffs_blkfree(ump, fs, ip->i_devvp, bn, - fs->fs_bsize, ip->i_number, NULL); + fs->fs_bsize, ip->i_number, + vp->v_type, NULL); blocksreleased += nblocks; } } @@ -477,7 +457,7 @@ ffs_truncate(vp, length, flags, cred, td) DIP_SET(ip, i_db[i], 0); bsize = blksize(fs, ip, i); ffs_blkfree(ump, fs, ip->i_devvp, bn, bsize, ip->i_number, - NULL); + vp->v_type, NULL); blocksreleased += btodb(bsize); } if (lastblock < 0) @@ -509,7 +489,7 @@ ffs_truncate(vp, length, flags, cred, td) */ bn += numfrags(fs, newspace); ffs_blkfree(ump, fs, ip->i_devvp, bn, - oldspace - newspace, ip->i_number, NULL); + oldspace - newspace, ip->i_number, vp->v_type, NULL); blocksreleased += btodb(oldspace - newspace); } } @@ -541,14 +521,14 @@ done: #ifdef QUOTA (void) chkdq(ip, -blocksreleased, NOCRED, 0); #endif - error = allerror; -out: - if (cookie) { - allerror = softdep_complete_trunc(vp, cookie); - if (allerror != 0 && error == 0) - error = allerror; - } - return (error); + return (allerror); + +extclean: + if (journaltrunc) + softdep_journal_freeblocks(ip, cred, length, IO_EXT); + else + softdep_setup_freeblocks(ip, length, IO_EXT); + return ffs_update(vp, MNT_WAIT); } /* @@ -656,7 +636,7 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp) blocksreleased += blkcount; } ffs_blkfree(ip->i_ump, fs, ip->i_devvp, nb, fs->fs_bsize, - ip->i_number, NULL); + ip->i_number, vp->v_type, NULL); blocksreleased += nblocks; } diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c index 968be8a..c8dd4c6 100644 --- a/sys/ufs/ffs/ffs_snapshot.c +++ b/sys/ufs/ffs/ffs_snapshot.c @@ -81,12 +81,14 @@ ffs_snapshot(mp, snapfile) } int -ffs_snapblkfree(fs, devvp, bno, size, inum) +ffs_snapblkfree(fs, devvp, bno, size, inum, vtype, wkhd) struct fs *fs; struct vnode *devvp; ufs2_daddr_t bno; long size; ino_t inum; + enum vtype vtype; + struct workhead *wkhd; { return (EINVAL); } @@ -123,19 +125,16 @@ ffs_copyonwrite(devvp, bp) return (EINVAL); } +void +ffs_sync_snap(mp, waitfor) + struct mount *mp; + int waitfor; +{ +} + #else FEATURE(ffs_snapshot, "FFS snapshot support"); -TAILQ_HEAD(snaphead, inode); - -struct snapdata { - LIST_ENTRY(snapdata) sn_link; - struct snaphead sn_head; - daddr_t sn_listsize; - daddr_t *sn_blklist; - struct lock sn_lock; -}; - LIST_HEAD(, snapdata) snapfree; static struct mtx snapfree_lock; MTX_SYSINIT(ffs_snapfree, &snapfree_lock, "snapdata free list", MTX_DEF); @@ -176,8 +175,10 @@ static int ffs_bp_snapblk(struct vnode *, struct buf *); * To ensure the consistency of snapshots across crashes, we must * synchronously write out copied blocks before allowing the * originals to be modified. Because of the rather severe speed - * penalty that this imposes, the following flag allows this - * crash persistence to be disabled. + * penalty that this imposes, the code normally only ensures + * persistence for the filesystem metadata contained within a + * snapshot. Setting the following flag allows this crash + * persistence to be enabled for file contents. */ int dopersistence = 0; @@ -584,7 +585,7 @@ loop: if (len != 0 && len < fs->fs_bsize) { ffs_blkfree(ump, copy_fs, vp, DIP(xp, i_db[loc]), len, xp->i_number, - NULL); + xvp->v_type, NULL); blkno = DIP(xp, i_db[loc]); DIP_SET(xp, i_db[loc], 0); } @@ -1247,7 +1248,8 @@ mapacct_ufs1(vp, oldblkp, lastblkp, fs, lblkno, expungetype) *ip->i_snapblklist++ = lblkno; if (blkno == BLK_SNAP) blkno = blkstofrags(fs, lblkno); - ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, NULL); + ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, + vp->v_type, NULL); } return (0); } @@ -1530,7 +1532,8 @@ mapacct_ufs2(vp, oldblkp, lastblkp, fs, lblkno, expungetype) *ip->i_snapblklist++ = lblkno; if (blkno == BLK_SNAP) blkno = blkstofrags(fs, lblkno); - ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, NULL); + ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, + vp->v_type, NULL); } return (0); } @@ -1635,7 +1638,7 @@ ffs_snapremove(vp) DIP_SET(ip, i_db[blkno], 0); else if ((dblk == blkstofrags(fs, blkno) && ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize, - ip->i_number))) { + ip->i_number, vp->v_type, NULL))) { DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - btodb(fs->fs_bsize)); DIP_SET(ip, i_db[blkno], 0); @@ -1660,7 +1663,8 @@ ffs_snapremove(vp) ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; else if ((dblk == blkstofrags(fs, blkno) && ffs_snapblkfree(fs, ip->i_devvp, dblk, - fs->fs_bsize, ip->i_number))) { + fs->fs_bsize, ip->i_number, vp->v_type, + NULL))) { ip->i_din1->di_blocks -= btodb(fs->fs_bsize); ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; @@ -1674,7 +1678,7 @@ ffs_snapremove(vp) ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; else if ((dblk == blkstofrags(fs, blkno) && ffs_snapblkfree(fs, ip->i_devvp, dblk, - fs->fs_bsize, ip->i_number))) { + fs->fs_bsize, ip->i_number, vp->v_type, NULL))) { ip->i_din2->di_blocks -= btodb(fs->fs_bsize); ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; } @@ -1722,12 +1726,14 @@ ffs_snapremove(vp) * must always have been allocated from a BLK_NOCOPY location. */ int -ffs_snapblkfree(fs, devvp, bno, size, inum) +ffs_snapblkfree(fs, devvp, bno, size, inum, vtype, wkhd) struct fs *fs; struct vnode *devvp; ufs2_daddr_t bno; long size; ino_t inum; + enum vtype vtype; + struct workhead *wkhd; { struct buf *ibp, *cbp, *savedcbp = 0; struct thread *td = curthread; @@ -1825,6 +1831,17 @@ retry: "Grabonremove: snapino", ip->i_number, (intmax_t)lbn, inum); #endif + /* + * If journaling is tracking this write we must add + * the work to the inode or indirect being written. + */ + if (wkhd != NULL) { + if (lbn < NDADDR) + softdep_inode_append(ip, + curthread->td_ucred, wkhd); + else + softdep_buf_append(ibp, wkhd); + } if (lbn < NDADDR) { DIP_SET(ip, i_db[lbn], bno); } else if (ip->i_ump->um_fstype == UFS1) { @@ -1864,12 +1881,16 @@ retry: * simply copy them to the new block. Note that we need * to synchronously write snapshots that have not been * unlinked, and hence will be visible after a crash, - * to ensure their integrity. + * to ensure their integrity. At a minimum we ensure the + * integrity of the filesystem metadata, but use the + * dopersistence sysctl-setable flag to decide on the + * persistence needed for file content data. */ if (savedcbp != 0) { bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); bawrite(cbp); - if (dopersistence && ip->i_effnlink > 0) + if ((vtype == VDIR || dopersistence) && + ip->i_effnlink > 0) (void) ffs_syncvnode(vp, MNT_WAIT); continue; } @@ -1879,7 +1900,8 @@ retry: if ((error = readblock(vp, cbp, lbn)) != 0) { bzero(cbp->b_data, fs->fs_bsize); bawrite(cbp); - if (dopersistence && ip->i_effnlink > 0) + if ((vtype == VDIR || dopersistence) && + ip->i_effnlink > 0) (void) ffs_syncvnode(vp, MNT_WAIT); break; } @@ -1888,12 +1910,16 @@ retry: /* * Note that we need to synchronously write snapshots that * have not been unlinked, and hence will be visible after - * a crash, to ensure their integrity. + * a crash, to ensure their integrity. At a minimum we + * ensure the integrity of the filesystem metadata, but + * use the dopersistence sysctl-setable flag to decide on + * the persistence needed for file content data. */ if (savedcbp) { vp = savedcbp->b_vp; bawrite(savedcbp); - if (dopersistence && VTOI(vp)->i_effnlink > 0) + if ((vtype == VDIR || dopersistence) && + VTOI(vp)->i_effnlink > 0) (void) ffs_syncvnode(vp, MNT_WAIT); } /* @@ -1902,6 +1928,8 @@ retry: * not be freed. Although space will be lost, the snapshot * will stay consistent. */ + if (error != 0 && wkhd != NULL) + softdep_freework(wkhd); lockmgr(vp->v_vnlock, LK_RELEASE, NULL); return (error); } @@ -2346,12 +2374,16 @@ ffs_copyonwrite(devvp, bp) * simply copy them to the new block. Note that we need * to synchronously write snapshots that have not been * unlinked, and hence will be visible after a crash, - * to ensure their integrity. + * to ensure their integrity. At a minimum we ensure the + * integrity of the filesystem metadata, but use the + * dopersistence sysctl-setable flag to decide on the + * persistence needed for file content data. */ if (savedcbp != 0) { bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); bawrite(cbp); - if (dopersistence && ip->i_effnlink > 0) + if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR || + dopersistence) && ip->i_effnlink > 0) (void) ffs_syncvnode(vp, MNT_WAIT); else launched_async_io = 1; @@ -2363,7 +2395,8 @@ ffs_copyonwrite(devvp, bp) if ((error = readblock(vp, cbp, lbn)) != 0) { bzero(cbp->b_data, fs->fs_bsize); bawrite(cbp); - if (dopersistence && ip->i_effnlink > 0) + if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR || + dopersistence) && ip->i_effnlink > 0) (void) ffs_syncvnode(vp, MNT_WAIT); else launched_async_io = 1; @@ -2374,12 +2407,16 @@ ffs_copyonwrite(devvp, bp) /* * Note that we need to synchronously write snapshots that * have not been unlinked, and hence will be visible after - * a crash, to ensure their integrity. + * a crash, to ensure their integrity. At a minimum we + * ensure the integrity of the filesystem metadata, but + * use the dopersistence sysctl-setable flag to decide on + * the persistence needed for file content data. */ if (savedcbp) { vp = savedcbp->b_vp; bawrite(savedcbp); - if (dopersistence && VTOI(vp)->i_effnlink > 0) + if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR || + dopersistence) && VTOI(vp)->i_effnlink > 0) (void) ffs_syncvnode(vp, MNT_WAIT); else launched_async_io = 1; @@ -2400,6 +2437,42 @@ ffs_copyonwrite(devvp, bp) } /* + * sync snapshots to force freework records waiting on snapshots to claim + * blocks to free. + */ +void +ffs_sync_snap(mp, waitfor) + struct mount *mp; + int waitfor; +{ + struct snapdata *sn; + struct vnode *devvp; + struct vnode *vp; + struct inode *ip; + + devvp = VFSTOUFS(mp)->um_devvp; + if ((devvp->v_vflag & VV_COPYONWRITE) == 0) + return; + for (;;) { + VI_LOCK(devvp); + sn = devvp->v_rdev->si_snapdata; + if (sn == NULL) { + VI_UNLOCK(devvp); + return; + } + if (lockmgr(&sn->sn_lock, + LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, + VI_MTX(devvp)) == 0) + break; + } + TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) { + vp = ITOV(ip); + ffs_syncvnode(vp, waitfor); + } + lockmgr(&sn->sn_lock, LK_RELEASE, NULL); +} + +/* * Read the specified block into the given buffer. * Much of this boiler-plate comes from bwrite(). */ diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index a7ae484..a10104d 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #include <sys/buf.h> #include <sys/kdb.h> #include <sys/kthread.h> +#include <sys/limits.h> #include <sys/lock.h> #include <sys/malloc.h> #include <sys/mount.h> @@ -71,6 +72,7 @@ __FBSDID("$FreeBSD$"); #include <sys/syslog.h> #include <sys/vnode.h> #include <sys/conf.h> + #include <ufs/ufs/dir.h> #include <ufs/ufs/extattr.h> #include <ufs/ufs/quota.h> @@ -82,6 +84,8 @@ __FBSDID("$FreeBSD$"); #include <ufs/ufs/ufs_extern.h> #include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_object.h> #include <ddb/ddb.h> @@ -138,10 +142,11 @@ softdep_setup_sbupdate(ump, fs, bp) } void -softdep_setup_inomapdep(bp, ip, newinum) +softdep_setup_inomapdep(bp, ip, newinum, mode) struct buf *bp; struct inode *ip; ino_t newinum; + int mode; { panic("softdep_setup_inomapdep called"); @@ -214,6 +219,25 @@ softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno) } void +softdep_journal_freeblocks(ip, cred, length, flags) + struct inode *ip; + struct ucred *cred; + off_t length; + int flags; +{ + + panic("softdep_journal_freeblocks called"); +} + +void +softdep_journal_fsync(ip) + struct inode *ip; +{ + + panic("softdep_journal_fsync called"); +} + +void softdep_setup_freeblocks(ip, length, flags) struct inode *ip; off_t length; @@ -282,29 +306,6 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir) panic("softdep_setup_directory_change called"); } -void * -softdep_setup_trunc(vp, length, flags) - struct vnode *vp; - off_t length; - int flags; -{ - - panic("%s called", __FUNCTION__); - - return (NULL); -} - -int -softdep_complete_trunc(vp, cookie) - struct vnode *vp; - void *cookie; -{ - - panic("%s called", __FUNCTION__); - - return (0); -} - void softdep_setup_blkfree(mp, bp, blkno, frags, wkhd) struct mount *mp; @@ -499,6 +500,13 @@ softdep_sync_metadata(struct vnode *vp) } int +softdep_sync_buf(struct vnode *vp, struct buf *bp, int waitfor) +{ + + return (0); +} + +int softdep_slowdown(vp) struct vnode *vp; { @@ -577,6 +585,33 @@ softdep_get_depcounts(struct mount *mp, *softdepactiveaccp = 0; } +void +softdep_buf_append(bp, wkhd) + struct buf *bp; + struct workhead *wkhd; +{ + + panic("softdep_buf_appendwork called"); +} + +void +softdep_inode_append(ip, cred, wkhd) + struct inode *ip; + struct ucred *cred; + struct workhead *wkhd; +{ + + panic("softdep_inode_appendwork called"); +} + +void +softdep_freework(wkhd) + struct workhead *wkhd; +{ + + panic("softdep_freework called"); +} + #else FEATURE(softupdates, "FFS soft-updates support"); @@ -614,10 +649,13 @@ FEATURE(softupdates, "FFS soft-updates support"); #define D_JSEGDEP 23 #define D_SBDEP 24 #define D_JTRUNC 25 -#define D_LAST D_JTRUNC +#define D_JFSYNC 26 +#define D_SENTINAL 27 +#define D_LAST D_SENTINAL unsigned long dep_current[D_LAST + 1]; unsigned long dep_total[D_LAST + 1]; +unsigned long dep_write[D_LAST + 1]; SYSCTL_NODE(_debug, OID_AUTO, softdep, CTLFLAG_RW, 0, "soft updates stats"); @@ -625,13 +663,17 @@ SYSCTL_NODE(_debug_softdep, OID_AUTO, total, CTLFLAG_RW, 0, "total dependencies allocated"); SYSCTL_NODE(_debug_softdep, OID_AUTO, current, CTLFLAG_RW, 0, "current dependencies allocated"); +SYSCTL_NODE(_debug_softdep, OID_AUTO, write, CTLFLAG_RW, 0, + "current dependencies written"); #define SOFTDEP_TYPE(type, str, long) \ static MALLOC_DEFINE(M_ ## type, #str, long); \ SYSCTL_ULONG(_debug_softdep_total, OID_AUTO, str, CTLFLAG_RD, \ &dep_total[D_ ## type], 0, ""); \ SYSCTL_ULONG(_debug_softdep_current, OID_AUTO, str, CTLFLAG_RD, \ - &dep_current[D_ ## type], 0, ""); + &dep_current[D_ ## type], 0, ""); \ + SYSCTL_ULONG(_debug_softdep_write, OID_AUTO, str, CTLFLAG_RD, \ + &dep_write[D_ ## type], 0, ""); SOFTDEP_TYPE(PAGEDEP, pagedep, "File page dependencies"); SOFTDEP_TYPE(INODEDEP, inodedep, "Inode dependencies"); @@ -660,6 +702,7 @@ SOFTDEP_TYPE(JSEG, jseg, "Journal segment"); SOFTDEP_TYPE(JSEGDEP, jsegdep, "Journal segment complete"); SOFTDEP_TYPE(SBDEP, sbdep, "Superblock write dependency"); SOFTDEP_TYPE(JTRUNC, jtrunc, "Journal inode truncation"); +SOFTDEP_TYPE(JFSYNC, jfsync, "Journal fsync complete"); static MALLOC_DEFINE(M_SAVEDINO, "savedino", "Saved inodes"); static MALLOC_DEFINE(M_JBLOCKS, "jblocks", "Journal block locations"); @@ -694,7 +737,8 @@ static struct malloc_type *memtype[] = { M_JSEG, M_JSEGDEP, M_SBDEP, - M_JTRUNC + M_JTRUNC, + M_JFSYNC }; static LIST_HEAD(mkdirlist, mkdir) mkdirlisthd; @@ -734,10 +778,11 @@ static void clear_unlinked_inodedep(struct inodedep *); static struct inodedep *first_unlinked_inodedep(struct ufsmount *); static int flush_pagedep_deps(struct vnode *, struct mount *, struct diraddhd *); -static void free_pagedep(struct pagedep *); +static int free_pagedep(struct pagedep *); static int flush_newblk_dep(struct vnode *, struct mount *, ufs_lbn_t); -static int flush_inodedep_deps(struct mount *, ino_t); +static int flush_inodedep_deps(struct vnode *, struct mount *, ino_t); static int flush_deplist(struct allocdirectlst *, int, int *); +static int sync_cgs(struct mount *, int); static int handle_written_filepage(struct pagedep *, struct buf *); static int handle_written_sbdep(struct sbdep *, struct buf *); static void initiate_write_sbdep(struct sbdep *); @@ -745,12 +790,14 @@ static void diradd_inode_written(struct diradd *, struct inodedep *); static int handle_written_indirdep(struct indirdep *, struct buf *, struct buf**); static int handle_written_inodeblock(struct inodedep *, struct buf *); +static int jnewblk_rollforward(struct jnewblk *, struct fs *, struct cg *, + uint8_t *); static int handle_written_bmsafemap(struct bmsafemap *, struct buf *); static void handle_written_jaddref(struct jaddref *); static void handle_written_jremref(struct jremref *); static void handle_written_jseg(struct jseg *, struct buf *); static void handle_written_jnewblk(struct jnewblk *); -static void handle_written_jfreeblk(struct jfreeblk *); +static void handle_written_jblkdep(struct jblkdep *); static void handle_written_jfreefrag(struct jfreefrag *); static void complete_jseg(struct jseg *); static void jseg_write(struct ufsmount *ump, struct jseg *, uint8_t *); @@ -758,6 +805,7 @@ static void jaddref_write(struct jaddref *, struct jseg *, uint8_t *); static void jremref_write(struct jremref *, struct jseg *, uint8_t *); static void jmvref_write(struct jmvref *, struct jseg *, uint8_t *); static void jtrunc_write(struct jtrunc *, struct jseg *, uint8_t *); +static void jfsync_write(struct jfsync *, struct jseg *, uint8_t *data); static void jnewblk_write(struct jnewblk *, struct jseg *, uint8_t *); static void jfreeblk_write(struct jfreeblk *, struct jseg *, uint8_t *); static void jfreefrag_write(struct jfreefrag *, struct jseg *, uint8_t *); @@ -768,19 +816,25 @@ static void handle_allocdirect_partdone(struct allocdirect *, static struct jnewblk *cancel_newblk(struct newblk *, struct worklist *, struct workhead *); static void indirdep_complete(struct indirdep *); -static int indirblk_inseg(struct mount *, ufs2_daddr_t); +static int indirblk_lookup(struct mount *, ufs2_daddr_t); +static void indirblk_insert(struct freework *); +static void indirblk_remove(struct freework *); static void handle_allocindir_partdone(struct allocindir *); static void initiate_write_filepage(struct pagedep *, struct buf *); static void initiate_write_indirdep(struct indirdep*, struct buf *); static void handle_written_mkdir(struct mkdir *, int); +static int jnewblk_rollback(struct jnewblk *, struct fs *, struct cg *, + uint8_t *); static void initiate_write_bmsafemap(struct bmsafemap *, struct buf *); static void initiate_write_inodeblock_ufs1(struct inodedep *, struct buf *); static void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *); static void handle_workitem_freefile(struct freefile *); -static void handle_workitem_remove(struct dirrem *, struct vnode *); +static int handle_workitem_remove(struct dirrem *, int); static struct dirrem *newdirrem(struct buf *, struct inode *, struct inode *, int, struct dirrem **); -static void cancel_indirdep(struct indirdep *, struct buf *, struct inodedep *, +static struct indirdep *indirdep_lookup(struct mount *, struct inode *, + struct buf *); +static void cancel_indirdep(struct indirdep *, struct buf *, struct freeblks *); static void free_indirdep(struct indirdep *); static void free_diradd(struct diradd *, struct workhead *); @@ -795,8 +849,13 @@ static void cancel_diradd(struct diradd *, struct dirrem *, struct jremref *, struct jremref *, struct jremref *); static void dirrem_journal(struct dirrem *, struct jremref *, struct jremref *, struct jremref *); -static void cancel_allocindir(struct allocindir *, struct inodedep *, - struct freeblks *); +static void cancel_allocindir(struct allocindir *, struct buf *bp, + struct freeblks *, int); +static int setup_trunc_indir(struct freeblks *, struct inode *, + ufs_lbn_t, ufs_lbn_t, ufs2_daddr_t); +static void complete_trunc_indir(struct freework *); +static void trunc_indirdep(struct indirdep *, struct freeblks *, struct buf *, + int); static void complete_mkdir(struct mkdir *); static void free_newdirblk(struct newdirblk *); static void free_jremref(struct jremref *); @@ -806,7 +865,7 @@ static void free_jsegs(struct jblocks *); static void rele_jseg(struct jseg *); static void free_jseg(struct jseg *, struct jblocks *); static void free_jnewblk(struct jnewblk *); -static void free_jfreeblk(struct jfreeblk *); +static void free_jblkdep(struct jblkdep *); static void free_jfreefrag(struct jfreefrag *); static void free_freedep(struct freedep *); static void journal_jremref(struct dirrem *, struct jremref *, @@ -818,30 +877,33 @@ static void cancel_jfreefrag(struct jfreefrag *); static inline void setup_freedirect(struct freeblks *, struct inode *, int, int); static inline void setup_freeext(struct freeblks *, struct inode *, int, int); -static inline void setup_freeindir(struct freeblks *, struct inode *, int i, +static inline void setup_freeindir(struct freeblks *, struct inode *, int, ufs_lbn_t, int); static inline struct freeblks *newfreeblks(struct mount *, struct inode *); static void indir_trunc(struct freework *, ufs2_daddr_t, ufs_lbn_t); -static void softdep_trunc_deps(struct vnode *, struct freeblks *, ufs_lbn_t, +ufs2_daddr_t blkcount(struct fs *, ufs2_daddr_t, off_t); +static int trunc_check_buf(struct buf *, int *, ufs_lbn_t, int, int); +static void trunc_dependencies(struct inode *, struct freeblks *, ufs_lbn_t, int, int); -static int cancel_pagedep(struct pagedep *, struct inodedep *, - struct freeblks *); -static int deallocate_dependencies(struct buf *, struct inodedep *, - struct freeblks *, int off); +static void trunc_pages(struct inode *, off_t, ufs2_daddr_t, int); +static int cancel_pagedep(struct pagedep *, struct freeblks *, int); +static int deallocate_dependencies(struct buf *, struct freeblks *, int); +static void newblk_freefrag(struct newblk*); static void free_newblk(struct newblk *); static void cancel_allocdirect(struct allocdirectlst *, - struct allocdirect *, struct freeblks *, int); + struct allocdirect *, struct freeblks *); static int check_inode_unwritten(struct inodedep *); static int free_inodedep(struct inodedep *); static void freework_freeblock(struct freework *); -static void handle_workitem_freeblocks(struct freeblks *, int); -static void handle_complete_freeblocks(struct freeblks *); +static void freework_enqueue(struct freework *); +static int handle_workitem_freeblocks(struct freeblks *, int); +static int handle_complete_freeblocks(struct freeblks *, int); static void handle_workitem_indirblk(struct freework *); static void handle_written_freework(struct freework *); static void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *); static struct worklist *jnewblk_merge(struct worklist *, struct worklist *, struct workhead *); -static void setup_allocindir_phase2(struct buf *, struct inode *, +static struct freefrag *setup_allocindir_phase2(struct buf *, struct inode *, struct inodedep *, struct allocindir *, ufs_lbn_t); static struct allocindir *newallocindir(struct inode *, int, ufs2_daddr_t, ufs2_daddr_t, ufs_lbn_t); @@ -862,18 +924,23 @@ static int newblk_lookup(struct mount *, ufs2_daddr_t, int, struct newblk **); static int inodedep_find(struct inodedep_hashhead *, struct fs *, ino_t, struct inodedep **); static int inodedep_lookup(struct mount *, ino_t, int, struct inodedep **); -static int pagedep_lookup(struct mount *, ino_t, ufs_lbn_t, int, - struct pagedep **); +static int pagedep_lookup(struct mount *, struct buf *bp, ino_t, ufs_lbn_t, + int, struct pagedep **); static int pagedep_find(struct pagedep_hashhead *, ino_t, ufs_lbn_t, struct mount *mp, int, struct pagedep **); static void pause_timer(void *); static int request_cleanup(struct mount *, int); -static int process_worklist_item(struct mount *, int); +static int process_worklist_item(struct mount *, int, int); static void process_removes(struct vnode *); +static void process_truncates(struct vnode *); static void jwork_move(struct workhead *, struct workhead *); +static void jwork_insert(struct workhead *, struct jsegdep *); static void add_to_worklist(struct worklist *, int); +static void wake_worklist(struct worklist *); +static void wait_worklist(struct worklist *, char *); static void remove_from_worklist(struct worklist *); static void softdep_flush(void); +static void softdep_flushjournal(struct mount *); static int softdep_speedup(void); static void worklist_speedup(void); static int journal_mount(struct mount *, struct fs *, struct ucred *); @@ -889,17 +956,20 @@ static struct jremref *newjremref(struct dirrem *, struct inode *, struct inode *ip, off_t, nlink_t); static struct jaddref *newjaddref(struct inode *, ino_t, off_t, int16_t, uint16_t); -static inline void newinoref(struct inoref *, ino_t, ino_t, off_t, nlink_t, +static inline void newinoref(struct inoref *, ino_t, ino_t, off_t, nlink_t, uint16_t); -static inline struct jsegdep *inoref_jseg(struct inoref *); +static inline struct jsegdep *inoref_jseg(struct inoref *); static struct jmvref *newjmvref(struct inode *, ino_t, off_t, off_t); static struct jfreeblk *newjfreeblk(struct freeblks *, ufs_lbn_t, ufs2_daddr_t, int); +static struct jtrunc *newjtrunc(struct freeblks *, off_t, int); +static void move_newblock_dep(struct jaddref *, struct inodedep *); +static void cancel_jfreeblk(struct freeblks *, ufs2_daddr_t); static struct jfreefrag *newjfreefrag(struct freefrag *, struct inode *, ufs2_daddr_t, long, ufs_lbn_t); static struct freework *newfreework(struct ufsmount *, struct freeblks *, - struct freework *, ufs_lbn_t, ufs2_daddr_t, int, int); -static void jwait(struct worklist *wk); + struct freework *, ufs_lbn_t, ufs2_daddr_t, int, int, int); +static int jwait(struct worklist *, int); static struct inodedep *inodedep_lookup_ip(struct inode *); static int bmsafemap_rollbacks(struct bmsafemap *); static struct freefile *handle_bufwait(struct inodedep *, struct workhead *); @@ -1064,6 +1134,30 @@ jwork_move(dst, src) } } +static void +jwork_insert(dst, jsegdep) + struct workhead *dst; + struct jsegdep *jsegdep; +{ + struct jsegdep *jsegdepn; + struct worklist *wk; + + LIST_FOREACH(wk, dst, wk_list) + if (wk->wk_type == D_JSEGDEP) + break; + if (wk == NULL) { + WORKLIST_INSERT(dst, &jsegdep->jd_list); + return; + } + jsegdepn = WK_JSEGDEP(wk); + if (jsegdep->jd_seg->js_seq < jsegdepn->jd_seg->js_seq) { + WORKLIST_REMOVE(wk); + free_jsegdep(jsegdepn); + WORKLIST_INSERT(dst, &jsegdep->jd_list); + } else + free_jsegdep(jsegdep); +} + /* * Routines for tracking and managing workitems. */ @@ -1088,6 +1182,8 @@ workitem_free(item, type) panic("workitem_free: type mismatch %s != %s", TYPENAME(item->wk_type), TYPENAME(type)); #endif + if (item->wk_state & IOWAITING) + wakeup(item); ump = VFSTOUFS(item->wk_mp); if (--ump->softdep_deps == 0 && ump->softdep_req) wakeup(&ump->softdep_deps); @@ -1101,14 +1197,18 @@ workitem_alloc(item, type, mp) int type; struct mount *mp; { + struct ufsmount *ump; + item->wk_type = type; item->wk_mp = mp; item->wk_state = 0; + + ump = VFSTOUFS(mp); ACQUIRE_LOCK(&lk); dep_current[type]++; dep_total[type]++; - VFSTOUFS(mp)->softdep_deps++; - VFSTOUFS(mp)->softdep_accdeps++; + ump->softdep_deps++; + ump->softdep_accdeps++; FREE_LOCK(&lk); } @@ -1270,8 +1370,7 @@ softdep_flush(void) vfslocked = VFS_LOCK_GIANT(mp); progress += softdep_process_worklist(mp, 0); ump = VFSTOUFS(mp); - remaining += ump->softdep_on_worklist - - ump->softdep_on_worklist_inprogress; + remaining += ump->softdep_on_worklist; VFS_UNLOCK_GIANT(vfslocked); mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); @@ -1314,10 +1413,14 @@ softdep_speedup(void) * The following routine is the only one that removes items * and does so in order from first to last. */ + +#define WK_HEAD 0x0001 /* Add to HEAD. */ +#define WK_NODELAY 0x0002 /* Process immediately. */ + static void -add_to_worklist(wk, nodelay) +add_to_worklist(wk, flags) struct worklist *wk; - int nodelay; + int flags; { struct ufsmount *ump; @@ -1327,13 +1430,17 @@ add_to_worklist(wk, nodelay) panic("add_to_worklist: %s(0x%X) already on list", TYPENAME(wk->wk_type), wk->wk_state); wk->wk_state |= ONWORKLIST; - if (LIST_EMPTY(&ump->softdep_workitem_pending)) + if (ump->softdep_on_worklist == 0) { LIST_INSERT_HEAD(&ump->softdep_workitem_pending, wk, wk_list); - else + ump->softdep_worklist_tail = wk; + } else if (flags & WK_HEAD) { + LIST_INSERT_HEAD(&ump->softdep_workitem_pending, wk, wk_list); + } else { LIST_INSERT_AFTER(ump->softdep_worklist_tail, wk, wk_list); - ump->softdep_worklist_tail = wk; + ump->softdep_worklist_tail = wk; + } ump->softdep_on_worklist += 1; - if (nodelay) + if (flags & WK_NODELAY) worklist_speedup(); } @@ -1346,19 +1453,35 @@ remove_from_worklist(wk) struct worklist *wk; { struct ufsmount *ump; - struct worklist *wkend; ump = VFSTOUFS(wk->wk_mp); WORKLIST_REMOVE(wk); - if (wk == ump->softdep_worklist_tail) { - LIST_FOREACH(wkend, &ump->softdep_workitem_pending, wk_list) - if (LIST_NEXT(wkend, wk_list) == NULL) - break; - ump->softdep_worklist_tail = wkend; - } + if (ump->softdep_worklist_tail == wk) + ump->softdep_worklist_tail = + (struct worklist *)wk->wk_list.le_prev; ump->softdep_on_worklist -= 1; } +static void +wake_worklist(wk) + struct worklist *wk; +{ + if (wk->wk_state & IOWAITING) { + wk->wk_state &= ~IOWAITING; + wakeup(wk); + } +} + +static void +wait_worklist(wk, wmesg) + struct worklist *wk; + char *wmesg; +{ + + wk->wk_state |= IOWAITING; + msleep(wk, &lk, PVM, wmesg, 0); +} + /* * Process that runs once per second to handle items in the background queue. * @@ -1389,7 +1512,7 @@ softdep_process_worklist(mp, full) starttime = time_second; softdep_process_journal(mp, NULL, full?MNT_WAIT:0); while (ump->softdep_on_worklist > 0) { - if ((cnt = process_worklist_item(mp, LK_NOWAIT)) == -1) + if ((cnt = process_worklist_item(mp, 10, LK_NOWAIT)) == 0) break; else matchcnt += cnt; @@ -1449,46 +1572,123 @@ process_removes(vp) mp = vp->v_mount; inum = VTOI(vp)->i_number; for (;;) { +top: if (inodedep_lookup(mp, inum, 0, &inodedep) == 0) return; - LIST_FOREACH(dirrem, &inodedep->id_dirremhd, dm_inonext) - if ((dirrem->dm_state & (COMPLETE | ONWORKLIST)) == + LIST_FOREACH(dirrem, &inodedep->id_dirremhd, dm_inonext) { + /* + * If another thread is trying to lock this vnode + * it will fail but we must wait for it to do so + * before we can proceed. + */ + if (dirrem->dm_state & INPROGRESS) { + wait_worklist(&dirrem->dm_list, "pwrwait"); + goto top; + } + if ((dirrem->dm_state & (COMPLETE | ONWORKLIST)) == (COMPLETE | ONWORKLIST)) break; + } if (dirrem == NULL) return; - /* - * If another thread is trying to lock this vnode it will - * fail but we must wait for it to do so before we can - * proceed. - */ - if (dirrem->dm_state & INPROGRESS) { - dirrem->dm_state |= IOWAITING; - msleep(&dirrem->dm_list, &lk, PVM, "pwrwait", 0); - continue; - } remove_from_worklist(&dirrem->dm_list); FREE_LOCK(&lk); if (vn_start_secondary_write(NULL, &mp, V_NOWAIT)) panic("process_removes: suspended filesystem"); - handle_workitem_remove(dirrem, vp); + handle_workitem_remove(dirrem, 0); vn_finished_secondary_write(mp); ACQUIRE_LOCK(&lk); } } /* + * Process all truncations associated with a vnode if we are running out + * of journal space. This is called when the vnode lock is already held + * and no other process can clear the truncation. This function returns + * a value greater than zero if it did any work. + */ +static void +process_truncates(vp) + struct vnode *vp; +{ + struct inodedep *inodedep; + struct freeblks *freeblks; + struct mount *mp; + ino_t inum; + int cgwait; + + mtx_assert(&lk, MA_OWNED); + + mp = vp->v_mount; + inum = VTOI(vp)->i_number; + for (;;) { + if (inodedep_lookup(mp, inum, 0, &inodedep) == 0) + return; + cgwait = 0; + TAILQ_FOREACH(freeblks, &inodedep->id_freeblklst, fb_next) { + /* Journal entries not yet written. */ + if (!LIST_EMPTY(&freeblks->fb_jblkdephd)) { + jwait(&LIST_FIRST( + &freeblks->fb_jblkdephd)->jb_list, + MNT_WAIT); + break; + } + /* Another thread is executing this item. */ + if (freeblks->fb_state & INPROGRESS) { + wait_worklist(&freeblks->fb_list, "ptrwait"); + break; + } + /* Freeblks is waiting on a inode write. */ + if ((freeblks->fb_state & COMPLETE) == 0) { + FREE_LOCK(&lk); + ffs_update(vp, 1); + ACQUIRE_LOCK(&lk); + break; + } + if ((freeblks->fb_state & (ALLCOMPLETE | ONWORKLIST)) == + (ALLCOMPLETE | ONWORKLIST)) { + remove_from_worklist(&freeblks->fb_list); + freeblks->fb_state |= INPROGRESS; + FREE_LOCK(&lk); + if (vn_start_secondary_write(NULL, &mp, + V_NOWAIT)) + panic("process_truncates: " + "suspended filesystem"); + handle_workitem_freeblocks(freeblks, 0); + vn_finished_secondary_write(mp); + ACQUIRE_LOCK(&lk); + break; + } + if (freeblks->fb_cgwait) + cgwait++; + } + if (cgwait) { + FREE_LOCK(&lk); + sync_cgs(mp, MNT_WAIT); + ffs_sync_snap(mp, MNT_WAIT); + ACQUIRE_LOCK(&lk); + continue; + } + if (freeblks == NULL) + break; + } + return; +} + +/* * Process one item on the worklist. */ static int -process_worklist_item(mp, flags) +process_worklist_item(mp, target, flags) struct mount *mp; + int target; int flags; { + struct worklist sintenel; struct worklist *wk; struct ufsmount *ump; - struct vnode *vp; - int matchcnt = 0; + int matchcnt; + int error; mtx_assert(&lk, MA_OWNED); KASSERT(mp != NULL, ("process_worklist_item: NULL mp")); @@ -1499,77 +1699,79 @@ process_worklist_item(mp, flags) */ if (curthread->td_pflags & TDP_COWINPROGRESS) return (-1); - /* - * Normally we just process each item on the worklist in order. - * However, if we are in a situation where we cannot lock any - * inodes, we have to skip over any dirrem requests whose - * vnodes are resident and locked. - */ - vp = NULL; + PHOLD(curproc); /* Don't let the stack go away. */ ump = VFSTOUFS(mp); - LIST_FOREACH(wk, &ump->softdep_workitem_pending, wk_list) { - if (wk->wk_state & INPROGRESS) + matchcnt = 0; + sintenel.wk_mp = NULL; + sintenel.wk_type = D_SENTINAL; + LIST_INSERT_HEAD(&ump->softdep_workitem_pending, &sintenel, wk_list); + for (wk = LIST_NEXT(&sintenel, wk_list); wk != NULL; + wk = LIST_NEXT(&sintenel, wk_list)) { + if (wk->wk_type == D_SENTINAL) { + LIST_REMOVE(&sintenel, wk_list); + LIST_INSERT_AFTER(wk, &sintenel, wk_list); continue; - if ((flags & LK_NOWAIT) == 0 || wk->wk_type != D_DIRREM) - break; + } + if (wk->wk_state & INPROGRESS) + panic("process_worklist_item: %p already in progress.", + wk); wk->wk_state |= INPROGRESS; - ump->softdep_on_worklist_inprogress++; + remove_from_worklist(wk); FREE_LOCK(&lk); - ffs_vgetf(mp, WK_DIRREM(wk)->dm_oldinum, - LK_NOWAIT | LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ); - ACQUIRE_LOCK(&lk); - if (wk->wk_state & IOWAITING) { - wk->wk_state &= ~IOWAITING; - wakeup(wk); - } - wk->wk_state &= ~INPROGRESS; - ump->softdep_on_worklist_inprogress--; - if (vp != NULL) + if (vn_start_secondary_write(NULL, &mp, V_NOWAIT)) + panic("process_worklist_item: suspended filesystem"); + switch (wk->wk_type) { + case D_DIRREM: + /* removal of a directory entry */ + error = handle_workitem_remove(WK_DIRREM(wk), flags); break; - } - if (wk == 0) - return (-1); - remove_from_worklist(wk); - FREE_LOCK(&lk); - if (vn_start_secondary_write(NULL, &mp, V_NOWAIT)) - panic("process_worklist_item: suspended filesystem"); - matchcnt++; - switch (wk->wk_type) { - - case D_DIRREM: - /* removal of a directory entry */ - handle_workitem_remove(WK_DIRREM(wk), vp); - if (vp) - vput(vp); - break; - - case D_FREEBLKS: - /* releasing blocks and/or fragments from a file */ - handle_workitem_freeblocks(WK_FREEBLKS(wk), flags & LK_NOWAIT); - break; - case D_FREEFRAG: - /* releasing a fragment when replaced as a file grows */ - handle_workitem_freefrag(WK_FREEFRAG(wk)); - break; + case D_FREEBLKS: + /* releasing blocks and/or fragments from a file */ + error = handle_workitem_freeblocks(WK_FREEBLKS(wk), + flags); + break; - case D_FREEFILE: - /* releasing an inode when its link count drops to 0 */ - handle_workitem_freefile(WK_FREEFILE(wk)); - break; + case D_FREEFRAG: + /* releasing a fragment when replaced as a file grows */ + handle_workitem_freefrag(WK_FREEFRAG(wk)); + error = 0; + break; - case D_FREEWORK: - /* Final block in an indirect was freed. */ - handle_workitem_indirblk(WK_FREEWORK(wk)); - break; + case D_FREEFILE: + /* releasing an inode when its link count drops to 0 */ + handle_workitem_freefile(WK_FREEFILE(wk)); + error = 0; + break; - default: - panic("%s_process_worklist: Unknown type %s", - "softdep", TYPENAME(wk->wk_type)); - /* NOTREACHED */ - } - vn_finished_secondary_write(mp); - ACQUIRE_LOCK(&lk); + default: + panic("%s_process_worklist: Unknown type %s", + "softdep", TYPENAME(wk->wk_type)); + /* NOTREACHED */ + } + vn_finished_secondary_write(mp); + ACQUIRE_LOCK(&lk); + if (error == 0) { + if (++matchcnt == target) + break; + continue; + } + /* + * We have to retry the worklist item later. Wake up any + * waiters who may be able to complete it immediately and + * add the item back to the head so we don't try to execute + * it again. + */ + wk->wk_state &= ~INPROGRESS; + wake_worklist(wk); + add_to_worklist(wk, WK_HEAD); + } + LIST_REMOVE(&sintenel, wk_list); + /* Sentinal could've become the tail from remove_from_worklist. */ + if (ump->softdep_worklist_tail == &sintenel) + ump->softdep_worklist_tail = + (struct worklist *)sintenel.wk_list.le_prev; + PRELE(curproc); return (matchcnt); } @@ -1774,31 +1976,26 @@ pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp) { struct pagedep *pagedep; - LIST_FOREACH(pagedep, pagedephd, pd_hash) - if (ino == pagedep->pd_ino && - lbn == pagedep->pd_lbn && - mp == pagedep->pd_list.wk_mp) - break; - if (pagedep) { - *pagedeppp = pagedep; - if ((flags & DEPALLOC) != 0 && - (pagedep->pd_state & ONWORKLIST) == 0) - return (0); - return (1); + LIST_FOREACH(pagedep, pagedephd, pd_hash) { + if (ino == pagedep->pd_ino && lbn == pagedep->pd_lbn && + mp == pagedep->pd_list.wk_mp) { + *pagedeppp = pagedep; + return (1); + } } *pagedeppp = NULL; return (0); } /* - * Look up a pagedep. Return 1 if found, 0 if not found or found - * when asked to allocate but not associated with any buffer. + * Look up a pagedep. Return 1 if found, 0 otherwise. * If not found, allocate if DEPALLOC flag is passed. * Found or allocated entry is returned in pagedeppp. * This routine must be called with splbio interrupts blocked. */ static int -pagedep_lookup(mp, ino, lbn, flags, pagedeppp) +pagedep_lookup(mp, bp, ino, lbn, flags, pagedeppp) struct mount *mp; + struct buf *bp; ino_t ino; ufs_lbn_t lbn; int flags; @@ -1806,15 +2003,28 @@ pagedep_lookup(mp, ino, lbn, flags, pagedeppp) { struct pagedep *pagedep; struct pagedep_hashhead *pagedephd; + struct worklist *wk; int ret; int i; mtx_assert(&lk, MA_OWNED); + if (bp) { + LIST_FOREACH(wk, &bp->b_dep, wk_list) { + if (wk->wk_type == D_PAGEDEP) { + *pagedeppp = WK_PAGEDEP(wk); + return (1); + } + } + } pagedephd = PAGEDEP_HASH(mp, ino, lbn); - ret = pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp); - if (*pagedeppp || (flags & DEPALLOC) == 0) - return (ret); + if (ret) { + if (((*pagedeppp)->pd_state & ONWORKLIST) == 0 && bp) + WORKLIST_INSERT(&bp->b_dep, &(*pagedeppp)->pd_list); + return (1); + } + if ((flags & DEPALLOC) == 0) + return (0); FREE_LOCK(&lk); pagedep = malloc(sizeof(struct pagedep), M_PAGEDEP, M_SOFTDEP_FLAGS|M_ZERO); @@ -1822,6 +2032,10 @@ pagedep_lookup(mp, ino, lbn, flags, pagedeppp) ACQUIRE_LOCK(&lk); ret = pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp); if (*pagedeppp) { + /* + * This should never happen since we only create pagedeps + * with the vnode lock held. Could be an assert. + */ WORKITEM_FREE(pagedep, D_PAGEDEP); return (ret); } @@ -1832,6 +2046,7 @@ pagedep_lookup(mp, ino, lbn, flags, pagedeppp) for (i = 0; i < DAHASHSZ; i++) LIST_INIT(&pagedep->pd_diraddhd[i]); LIST_INSERT_HEAD(pagedephd, pagedep, pd_hash); + WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list); *pagedeppp = pagedep; return (0); } @@ -1922,6 +2137,7 @@ inodedep_lookup(mp, inum, flags, inodedeppp) TAILQ_INIT(&inodedep->id_newinoupdt); TAILQ_INIT(&inodedep->id_extupdt); TAILQ_INIT(&inodedep->id_newextupdt); + TAILQ_INIT(&inodedep->id_freeblklst); LIST_INSERT_HEAD(inodedephd, inodedep, id_hash); *inodedeppp = inodedep; return (0); @@ -2008,43 +2224,86 @@ newblk_lookup(mp, newblkno, flags, newblkpp) } /* - * Structures and routines associated with indir caching. + * Structures and routines associated with freed indirect block caching. */ -struct workhead *indir_hashtbl; +struct freeworklst *indir_hashtbl; u_long indir_hash; /* size of hash table - 1 */ #define INDIR_HASH(mp, blkno) \ (&indir_hashtbl[((((register_t)(mp)) >> 13) + (blkno)) & indir_hash]) +/* + * Lookup an indirect block in the indir hash table. The freework is + * removed and potentially freed. The caller must do a blocking journal + * write before writing to the blkno. + */ static int -indirblk_inseg(mp, blkno) +indirblk_lookup(mp, blkno) struct mount *mp; ufs2_daddr_t blkno; { struct freework *freework; - struct workhead *wkhd; - struct worklist *wk; + struct freeworklst *wkhd; wkhd = INDIR_HASH(mp, blkno); - LIST_FOREACH(wk, wkhd, wk_list) { - freework = WK_FREEWORK(wk); - if (freework->fw_blkno == blkno && - freework->fw_list.wk_mp == mp) { - LIST_REMOVE(freework, fw_next); - WORKLIST_REMOVE(&freework->fw_list); - WORKITEM_FREE(freework, D_FREEWORK); - return (1); - } + TAILQ_FOREACH(freework, wkhd, fw_next) { + if (freework->fw_blkno != blkno) + continue; + if (freework->fw_list.wk_mp != mp) + continue; + indirblk_remove(freework); + return (1); } return (0); } /* + * Insert an indirect block represented by freework into the indirblk + * hash table so that it may prevent the block from being re-used prior + * to the journal being written. + */ +static void +indirblk_insert(freework) + struct freework *freework; +{ + struct freeblks *freeblks; + struct jsegdep *jsegdep; + struct worklist *wk; + + freeblks = freework->fw_freeblks; + LIST_FOREACH(wk, &freeblks->fb_jwork, wk_list) + if (wk->wk_type == D_JSEGDEP) + break; + if (wk == NULL) + return; + + jsegdep = WK_JSEGDEP(wk); + LIST_INSERT_HEAD(&jsegdep->jd_seg->js_indirs, freework, fw_segs); + TAILQ_INSERT_HEAD(INDIR_HASH(freework->fw_list.wk_mp, + freework->fw_blkno), freework, fw_next); + freework->fw_state &= ~DEPCOMPLETE; +} + +static void +indirblk_remove(freework) + struct freework *freework; +{ + + LIST_REMOVE(freework, fw_segs); + TAILQ_REMOVE(INDIR_HASH(freework->fw_list.wk_mp, + freework->fw_blkno), freework, fw_next); + freework->fw_state |= DEPCOMPLETE; + if ((freework->fw_state & ALLCOMPLETE) == ALLCOMPLETE) + WORKITEM_FREE(freework, D_FREEWORK); +} + +/* * Executed during filesystem system initialization before * mounting any filesystems. */ void softdep_initialize() { + int i; LIST_INIT(&mkdirlisthd); max_softdeps = desiredvnodes * 4; @@ -2052,7 +2311,12 @@ softdep_initialize() inodedep_hashtbl = hashinit(desiredvnodes, M_INODEDEP, &inodedep_hash); newblk_hashtbl = hashinit(desiredvnodes / 5, M_NEWBLK, &newblk_hash); bmsafemap_hashtbl = hashinit(1024, M_BMSAFEMAP, &bmsafemap_hash); - indir_hashtbl = hashinit(desiredvnodes / 10, M_FREEWORK, &indir_hash); + i = 1 << (ffs(desiredvnodes / 10) - 1); + indir_hashtbl = malloc(i * sizeof(indir_hashtbl[0]), M_FREEWORK, + M_WAITOK); + indir_hash = i - 1; + for (i = 0; i <= indir_hash; i++) + TAILQ_INIT(&indir_hashtbl[i]); /* initialise bioops hack */ bioops.io_start = softdep_disk_io_initiation; @@ -2077,6 +2341,7 @@ softdep_uninitialize() hashdestroy(inodedep_hashtbl, M_INODEDEP, inodedep_hash); hashdestroy(newblk_hashtbl, M_NEWBLK, newblk_hash); hashdestroy(bmsafemap_hashtbl, M_BMSAFEMAP, bmsafemap_hash); + free(indir_hashtbl, M_FREEWORK); } /* @@ -2108,6 +2373,7 @@ softdep_mount(devvp, mp, fs, cred) LIST_INIT(&ump->softdep_workitem_pending); LIST_INIT(&ump->softdep_journal_pending); TAILQ_INIT(&ump->softdep_unlinked); + LIST_INIT(&ump->softdep_dirtycg); ump->softdep_worklist_tail = NULL; ump->softdep_on_worklist = 0; ump->softdep_deps = 0; @@ -2154,8 +2420,15 @@ softdep_unmount(mp) struct mount *mp; { - if (mp->mnt_kern_flag & MNTK_SUJ) - journal_unmount(mp); + MNT_ILOCK(mp); + mp->mnt_flag &= ~MNT_SOFTDEP; + if ((mp->mnt_kern_flag & MNTK_SUJ) == 0) { + MNT_IUNLOCK(mp); + return; + } + mp->mnt_kern_flag &= ~MNTK_SUJ; + MNT_IUNLOCK(mp); + journal_unmount(mp); } struct jblocks { @@ -2570,6 +2843,7 @@ softdep_prealloc(vp, waitok) ffs_syncvnode(vp, waitok); ACQUIRE_LOCK(&lk); process_removes(vp); + process_truncates(vp); if (journal_space(ump, 0) == 0) { softdep_speedup(); if (journal_space(ump, 1) == 0) @@ -2604,12 +2878,14 @@ softdep_prelink(dvp, vp) ffs_syncvnode(dvp, MNT_WAIT); ACQUIRE_LOCK(&lk); /* Process vp before dvp as it may create .. removes. */ - if (vp) + if (vp) { process_removes(vp); + process_truncates(vp); + } process_removes(dvp); + process_truncates(dvp); softdep_speedup(); - process_worklist_item(UFSTOVFS(ump), LK_NOWAIT); - process_worklist_item(UFSTOVFS(ump), LK_NOWAIT); + process_worklist_item(UFSTOVFS(ump), 2, LK_NOWAIT); if (journal_space(ump, 0) == 0) { softdep_speedup(); if (journal_space(ump, 1) == 0) @@ -2717,7 +2993,7 @@ jfreeblk_write(jfreeblk, jseg, data) { struct jblkrec *rec; - jfreeblk->jf_jsegdep->jd_seg = jseg; + jfreeblk->jf_dep.jb_jsegdep->jd_seg = jseg; rec = (struct jblkrec *)data; rec->jb_op = JOP_FREEBLK; rec->jb_ino = jfreeblk->jf_ino; @@ -2753,6 +3029,7 @@ jtrunc_write(jtrunc, jseg, data) { struct jtrncrec *rec; + jtrunc->jt_dep.jb_jsegdep->jd_seg = jseg; rec = (struct jtrncrec *)data; rec->jt_op = JOP_TRUNC; rec->jt_ino = jtrunc->jt_ino; @@ -2760,6 +3037,40 @@ jtrunc_write(jtrunc, jseg, data) rec->jt_extsize = jtrunc->jt_extsize; } +static void +jfsync_write(jfsync, jseg, data) + struct jfsync *jfsync; + struct jseg *jseg; + uint8_t *data; +{ + struct jtrncrec *rec; + + rec = (struct jtrncrec *)data; + rec->jt_op = JOP_SYNC; + rec->jt_ino = jfsync->jfs_ino; + rec->jt_size = jfsync->jfs_size; + rec->jt_extsize = jfsync->jfs_extsize; +} + +static void +softdep_flushjournal(mp) + struct mount *mp; +{ + struct jblocks *jblocks; + struct ufsmount *ump; + + if ((mp->mnt_kern_flag & MNTK_SUJ) == 0) + return; + ump = VFSTOUFS(mp); + jblocks = ump->softdep_jblocks; + ACQUIRE_LOCK(&lk); + while (ump->softdep_on_journal) { + jblocks->jb_needseg = 1; + softdep_process_journal(mp, NULL, MNT_WAIT); + } + FREE_LOCK(&lk); +} + /* * Flush some journal records to disk. */ @@ -2909,7 +3220,7 @@ softdep_process_journal(mp, needwk, flags) if (wk == needwk) needwk = NULL; remove_from_journal(wk); - wk->wk_state |= IOSTARTED; + wk->wk_state |= INPROGRESS; WORKLIST_INSERT(&jseg->js_entries, wk); switch (wk->wk_type) { case D_JADDREF: @@ -2933,6 +3244,9 @@ softdep_process_journal(mp, needwk, flags) case D_JTRUNC: jtrunc_write(WK_JTRUNC(wk), jseg, data); break; + case D_JFSYNC: + jfsync_write(WK_JFSYNC(wk), jseg, data); + break; default: panic("process_journal: Unknown type %s", TYPENAME(wk->wk_type)); @@ -2956,7 +3270,7 @@ softdep_process_journal(mp, needwk, flags) * We only do the blocking wait once we find the journal * entry we're looking for. */ - if (needwk == NULL && flags & MNT_WAIT) + if (needwk == NULL && flags == MNT_WAIT) bwrite(bp); else bawrite(bp); @@ -2996,7 +3310,7 @@ complete_jseg(jseg) while ((wk = LIST_FIRST(&jseg->js_entries)) != NULL) { WORKLIST_REMOVE(wk); waiting = wk->wk_state & IOWAITING; - wk->wk_state &= ~(IOSTARTED | IOWAITING); + wk->wk_state &= ~(INPROGRESS | IOWAITING); wk->wk_state |= COMPLETE; KASSERT(i++ < jseg->js_cnt, ("handle_written_jseg: overflow %d >= %d", @@ -3009,26 +3323,29 @@ complete_jseg(jseg) handle_written_jremref(WK_JREMREF(wk)); break; case D_JMVREF: - /* No jsegdep here. */ - rele_jseg(jseg); + rele_jseg(jseg); /* No jsegdep. */ jmvref = WK_JMVREF(wk); LIST_REMOVE(jmvref, jm_deps); - free_pagedep(jmvref->jm_pagedep); + if ((jmvref->jm_pagedep->pd_state & ONWORKLIST) == 0) + free_pagedep(jmvref->jm_pagedep); WORKITEM_FREE(jmvref, D_JMVREF); break; case D_JNEWBLK: handle_written_jnewblk(WK_JNEWBLK(wk)); break; case D_JFREEBLK: - handle_written_jfreeblk(WK_JFREEBLK(wk)); + handle_written_jblkdep(&WK_JFREEBLK(wk)->jf_dep); + break; + case D_JTRUNC: + handle_written_jblkdep(&WK_JTRUNC(wk)->jt_dep); + break; + case D_JFSYNC: + rele_jseg(jseg); /* No jsegdep. */ + WORKITEM_FREE(wk, D_JFSYNC); break; case D_JFREEFRAG: handle_written_jfreefrag(WK_JFREEFRAG(wk)); break; - case D_JTRUNC: - WK_JTRUNC(wk)->jt_jsegdep->jd_seg = jseg; - WORKITEM_FREE(wk, D_JTRUNC); - break; default: panic("handle_written_jseg: Unknown type %s", TYPENAME(wk->wk_type)); @@ -3123,7 +3440,7 @@ handle_written_jremref(jremref) jremref->jr_dirrem = NULL; LIST_REMOVE(jremref, jr_deps); jsegdep->jd_state |= jremref->jr_state & MKDIR_PARENT; - WORKLIST_INSERT(&dirrem->dm_jwork, &jsegdep->jd_list); + jwork_insert(&dirrem->dm_jwork, jsegdep); if (LIST_EMPTY(&dirrem->dm_jremrefhd) && (dirrem->dm_state & COMPLETE) != 0) add_to_worklist(&dirrem->dm_list, 0); @@ -3183,7 +3500,7 @@ handle_written_jaddref(jaddref) mkdir->md_state |= DEPCOMPLETE; complete_mkdir(mkdir); } - WORKLIST_INSERT(&diradd->da_jwork, &jsegdep->jd_list); + jwork_insert(&diradd->da_jwork, jsegdep); if (jaddref->ja_state & NEWBLOCK) { inodedep->id_state |= ONDEPLIST; LIST_INSERT_HEAD(&inodedep->id_bmsafemap->sm_inodedephd, @@ -3205,10 +3522,9 @@ handle_written_jnewblk(jnewblk) { struct bmsafemap *bmsafemap; struct freefrag *freefrag; + struct freework *freework; struct jsegdep *jsegdep; struct newblk *newblk; - struct freework *freework; - struct indirdep *indirdep; /* Grab the jsegdep. */ jsegdep = jnewblk->jn_jsegdep; @@ -3225,10 +3541,13 @@ handle_written_jnewblk(jnewblk) */ newblk = WK_NEWBLK(jnewblk->jn_dep); newblk->nb_jnewblk = NULL; - bmsafemap = newblk->nb_bmsafemap; - newblk->nb_state |= ONDEPLIST; - LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps); - WORKLIST_INSERT(&newblk->nb_jwork, &jsegdep->jd_list); + if ((newblk->nb_state & GOINGAWAY) == 0) { + bmsafemap = newblk->nb_bmsafemap; + newblk->nb_state |= ONDEPLIST; + LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, + nb_deps); + } + jwork_insert(&newblk->nb_jwork, jsegdep); break; case D_FREEFRAG: /* @@ -3245,15 +3564,8 @@ handle_written_jnewblk(jnewblk) */ freework = WK_FREEWORK(jnewblk->jn_dep); freework->fw_jnewblk = NULL; - WORKLIST_INSERT(&freework->fw_jwork, &jsegdep->jd_list); - break; - case D_INDIRDEP: - /* - * An indirect block was removed by truncate. - */ - indirdep = WK_INDIRDEP(jnewblk->jn_dep); - LIST_REMOVE(jnewblk, jn_indirdeps); - WORKLIST_INSERT(&indirdep->ir_jwork, &jsegdep->jd_list); + WORKLIST_INSERT(&freework->fw_freeblks->fb_jwork, + &jsegdep->jd_list); break; default: panic("handle_written_jnewblk: Unknown type %d.", @@ -3293,7 +3605,7 @@ free_jfreefrag(jfreefrag) struct jfreefrag *jfreefrag; { - if (jfreefrag->fr_state & IOSTARTED) + if (jfreefrag->fr_state & INPROGRESS) WORKLIST_REMOVE(&jfreefrag->fr_list); else if (jfreefrag->fr_state & ONWORKLIST) remove_from_journal(&jfreefrag->fr_list); @@ -3321,7 +3633,7 @@ handle_written_jfreefrag(jfreefrag) panic("handle_written_jfreefrag: No freefrag."); freefrag->ff_state |= DEPCOMPLETE; freefrag->ff_jdep = NULL; - WORKLIST_INSERT(&freefrag->ff_jwork, &jsegdep->jd_list); + jwork_insert(&freefrag->ff_jwork, jsegdep); if ((freefrag->ff_state & ALLCOMPLETE) == ALLCOMPLETE) add_to_worklist(&freefrag->ff_list, 0); jfreefrag->fr_freefrag = NULL; @@ -3335,30 +3647,26 @@ handle_written_jfreefrag(jfreefrag) * have been reclaimed. */ static void -handle_written_jfreeblk(jfreeblk) - struct jfreeblk *jfreeblk; +handle_written_jblkdep(jblkdep) + struct jblkdep *jblkdep; { struct freeblks *freeblks; struct jsegdep *jsegdep; /* Grab the jsegdep. */ - jsegdep = jfreeblk->jf_jsegdep; - jfreeblk->jf_jsegdep = NULL; - freeblks = jfreeblk->jf_freeblks; - LIST_REMOVE(jfreeblk, jf_deps); + jsegdep = jblkdep->jb_jsegdep; + jblkdep->jb_jsegdep = NULL; + freeblks = jblkdep->jb_freeblks; + LIST_REMOVE(jblkdep, jb_deps); WORKLIST_INSERT(&freeblks->fb_jwork, &jsegdep->jd_list); /* * If the freeblks is all journaled, we can add it to the worklist. */ - if (LIST_EMPTY(&freeblks->fb_jfreeblkhd) && - (freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE) { - /* Remove from the b_dep that is waiting on this write. */ - if (freeblks->fb_state & ONWORKLIST) - WORKLIST_REMOVE(&freeblks->fb_list); - add_to_worklist(&freeblks->fb_list, 1); - } + if (LIST_EMPTY(&freeblks->fb_jblkdephd) && + (freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE) + add_to_worklist(&freeblks->fb_list, WK_NODELAY); - free_jfreeblk(jfreeblk); + free_jblkdep(jblkdep); } static struct jsegdep * @@ -3480,9 +3788,12 @@ static void free_freedep(freedep) struct freedep *freedep; { + struct freework *freework; - if (--freedep->fd_freework->fw_ref == 0) - add_to_worklist(&freedep->fd_freework->fw_list, 1); + freework = freedep->fd_freework; + freework->fw_freeblks->fb_cgwait--; + if (--freework->fw_ref == 0) + freework_enqueue(freework); WORKITEM_FREE(freedep, D_FREEDEP); } @@ -3493,42 +3804,69 @@ free_freedep(freedep) * is visible outside of softdep_setup_freeblocks(). */ static struct freework * -newfreework(ump, freeblks, parent, lbn, nb, frags, journal) +newfreework(ump, freeblks, parent, lbn, nb, frags, off, journal) struct ufsmount *ump; struct freeblks *freeblks; struct freework *parent; ufs_lbn_t lbn; ufs2_daddr_t nb; int frags; + int off; int journal; { struct freework *freework; freework = malloc(sizeof(*freework), M_FREEWORK, M_SOFTDEP_FLAGS); workitem_alloc(&freework->fw_list, D_FREEWORK, freeblks->fb_list.wk_mp); + freework->fw_state = ATTACHED; freework->fw_jnewblk = NULL; freework->fw_freeblks = freeblks; freework->fw_parent = parent; freework->fw_lbn = lbn; freework->fw_blkno = nb; freework->fw_frags = frags; + freework->fw_indir = NULL; freework->fw_ref = ((UFSTOVFS(ump)->mnt_kern_flag & MNTK_SUJ) == 0 || lbn >= -NXADDR) ? 0 : NINDIR(ump->um_fs) + 1; - freework->fw_off = 0; - LIST_INIT(&freework->fw_jwork); - + freework->fw_start = freework->fw_off = off; + if (journal) + newjfreeblk(freeblks, lbn, nb, frags); if (parent == NULL) { - WORKLIST_INSERT_UNLOCKED(&freeblks->fb_freeworkhd, - &freework->fw_list); + ACQUIRE_LOCK(&lk); + WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list); freeblks->fb_ref++; + FREE_LOCK(&lk); } - if (journal) - newjfreeblk(freeblks, lbn, nb, frags); return (freework); } /* + * Eliminate a jfreeblk for a block that does not need journaling. + */ +static void +cancel_jfreeblk(freeblks, blkno) + struct freeblks *freeblks; + ufs2_daddr_t blkno; +{ + struct jfreeblk *jfreeblk; + struct jblkdep *jblkdep; + + LIST_FOREACH(jblkdep, &freeblks->fb_jblkdephd, jb_deps) { + if (jblkdep->jb_list.wk_type != D_JFREEBLK) + continue; + jfreeblk = WK_JFREEBLK(&jblkdep->jb_list); + if (jfreeblk->jf_blkno == blkno) + break; + } + if (jblkdep == NULL) + return; + free_jsegdep(jblkdep->jb_jsegdep); + LIST_REMOVE(jblkdep, jb_deps); + WORKITEM_FREE(jfreeblk, D_JFREEBLK); +} + +/* * Allocate a new jfreeblk to journal top level block pointer when truncating * a file. The caller must add this to the worklist when lk is held. */ @@ -3542,20 +3880,43 @@ newjfreeblk(freeblks, lbn, blkno, frags) struct jfreeblk *jfreeblk; jfreeblk = malloc(sizeof(*jfreeblk), M_JFREEBLK, M_SOFTDEP_FLAGS); - workitem_alloc(&jfreeblk->jf_list, D_JFREEBLK, freeblks->fb_list.wk_mp); - jfreeblk->jf_jsegdep = newjsegdep(&jfreeblk->jf_list); - jfreeblk->jf_state = ATTACHED | DEPCOMPLETE; - jfreeblk->jf_ino = freeblks->fb_previousinum; + workitem_alloc(&jfreeblk->jf_dep.jb_list, D_JFREEBLK, + freeblks->fb_list.wk_mp); + jfreeblk->jf_dep.jb_jsegdep = newjsegdep(&jfreeblk->jf_dep.jb_list); + jfreeblk->jf_dep.jb_freeblks = freeblks; + jfreeblk->jf_ino = freeblks->fb_inum; jfreeblk->jf_lbn = lbn; jfreeblk->jf_blkno = blkno; jfreeblk->jf_frags = frags; - jfreeblk->jf_freeblks = freeblks; - LIST_INSERT_HEAD(&freeblks->fb_jfreeblkhd, jfreeblk, jf_deps); + LIST_INSERT_HEAD(&freeblks->fb_jblkdephd, &jfreeblk->jf_dep, jb_deps); return (jfreeblk); } -static void move_newblock_dep(struct jaddref *, struct inodedep *); +/* + * Allocate a new jtrunc to track a partial truncation. + */ +static struct jtrunc * +newjtrunc(freeblks, size, extsize) + struct freeblks *freeblks; + off_t size; + int extsize; +{ + struct jtrunc *jtrunc; + + jtrunc = malloc(sizeof(*jtrunc), M_JTRUNC, M_SOFTDEP_FLAGS); + workitem_alloc(&jtrunc->jt_dep.jb_list, D_JTRUNC, + freeblks->fb_list.wk_mp); + jtrunc->jt_dep.jb_jsegdep = newjsegdep(&jtrunc->jt_dep.jb_list); + jtrunc->jt_dep.jb_freeblks = freeblks; + jtrunc->jt_ino = freeblks->fb_inum; + jtrunc->jt_size = size; + jtrunc->jt_extsize = extsize; + LIST_INSERT_HEAD(&freeblks->fb_jblkdephd, &jtrunc->jt_dep, jb_deps); + + return (jtrunc); +} + /* * If we're canceling a new bitmap we have to search for another ref * to move into the bmsafemap dep. This might be better expressed @@ -3613,7 +3974,7 @@ cancel_jaddref(jaddref, inodedep, wkhd) KASSERT((jaddref->ja_state & COMPLETE) == 0, ("cancel_jaddref: Canceling complete jaddref")); - if (jaddref->ja_state & (IOSTARTED | COMPLETE)) + if (jaddref->ja_state & (INPROGRESS | COMPLETE)) needsj = 1; else needsj = 0; @@ -3637,15 +3998,12 @@ cancel_jaddref(jaddref, inodedep, wkhd) jsegdep = inoref_jseg(&jaddref->ja_ref); if (jaddref->ja_state & NEWBLOCK) move_newblock_dep(jaddref, inodedep); - if (jaddref->ja_state & IOWAITING) { - jaddref->ja_state &= ~IOWAITING; - wakeup(&jaddref->ja_list); - } + wake_worklist(&jaddref->ja_list); jaddref->ja_mkdir = NULL; - if (jaddref->ja_state & IOSTARTED) { - jaddref->ja_state &= ~IOSTARTED; + if (jaddref->ja_state & INPROGRESS) { + jaddref->ja_state &= ~INPROGRESS; WORKLIST_REMOVE(&jaddref->ja_list); - WORKLIST_INSERT(wkhd, &jsegdep->jd_list); + jwork_insert(wkhd, jsegdep); } else { free_jsegdep(jsegdep); if (jaddref->ja_state & DEPCOMPLETE) @@ -3694,7 +4052,7 @@ free_jaddref(jaddref) jaddref, jaddref->ja_state); if (jaddref->ja_state & NEWBLOCK) LIST_REMOVE(jaddref, ja_bmdeps); - if (jaddref->ja_state & (IOSTARTED | ONWORKLIST)) + if (jaddref->ja_state & (INPROGRESS | ONWORKLIST)) panic("free_jaddref: Bad state %p(0x%X)", jaddref, jaddref->ja_state); if (jaddref->ja_mkdir != NULL) @@ -3712,7 +4070,7 @@ free_jremref(jremref) if (jremref->jr_ref.if_jsegdep) free_jsegdep(jremref->jr_ref.if_jsegdep); - if (jremref->jr_state & IOSTARTED) + if (jremref->jr_state & INPROGRESS) panic("free_jremref: IO still pending"); WORKITEM_FREE(jremref, D_JREMREF); } @@ -3734,11 +4092,7 @@ free_jnewblk(jnewblk) } /* - * Cancel a jnewblk which has been superseded by a freeblk. The jnewblk - * is kept linked into the bmsafemap until the free completes, thus - * preventing the modified state from ever reaching disk. The free - * routine must pass this structure via ffs_blkfree() to - * softdep_setup_freeblks() so there is no race in releasing the space. + * Cancel a jnewblk which has been been made redundant by frag extension. */ static void cancel_jnewblk(jnewblk, wkhd) @@ -3753,27 +4107,30 @@ cancel_jnewblk(jnewblk, wkhd) jnewblk->jn_jsegdep = NULL; jnewblk->jn_dep = NULL; jnewblk->jn_state |= GOINGAWAY; - if (jnewblk->jn_state & IOSTARTED) { - jnewblk->jn_state &= ~IOSTARTED; + if (jnewblk->jn_state & INPROGRESS) { + jnewblk->jn_state &= ~INPROGRESS; WORKLIST_REMOVE(&jnewblk->jn_list); - WORKLIST_INSERT(wkhd, &jsegdep->jd_list); + jwork_insert(wkhd, jsegdep); } else { free_jsegdep(jsegdep); remove_from_journal(&jnewblk->jn_list); } - if (jnewblk->jn_state & IOWAITING) { - jnewblk->jn_state &= ~IOWAITING; - wakeup(&jnewblk->jn_list); - } + wake_worklist(&jnewblk->jn_list); WORKLIST_INSERT(wkhd, &jnewblk->jn_list); } static void -free_jfreeblk(jfreeblk) - struct jfreeblk *jfreeblk; +free_jblkdep(jblkdep) + struct jblkdep *jblkdep; { - WORKITEM_FREE(jfreeblk, D_JFREEBLK); + if (jblkdep->jb_list.wk_type == D_JFREEBLK) + WORKITEM_FREE(jblkdep, D_JFREEBLK); + else if (jblkdep->jb_list.wk_type == D_JTRUNC) + WORKITEM_FREE(jblkdep, D_JTRUNC); + else + panic("free_jblkdep: Unexpected type %s", + TYPENAME(jblkdep->jb_list.wk_type)); } /* @@ -3792,11 +4149,8 @@ free_jseg(jseg, jblocks) * Free freework structures that were lingering to indicate freed * indirect blocks that forced journal write ordering on reallocate. */ - while ((freework = LIST_FIRST(&jseg->js_indirs)) != NULL) { - LIST_REMOVE(freework, fw_next); - WORKLIST_REMOVE(&freework->fw_list); - WORKITEM_FREE(freework, D_FREEWORK); - } + while ((freework = LIST_FIRST(&jseg->js_indirs)) != NULL) + indirblk_remove(freework); if (jblocks->jb_oldestseg == jseg) jblocks->jb_oldestseg = TAILQ_NEXT(jseg, js_next); TAILQ_REMOVE(&jblocks->jb_segs, jseg, js_next); @@ -3887,24 +4241,53 @@ free_jsegdep(jsegdep) * Wait for a journal item to make it to disk. Initiate journal processing * if required. */ -static void -jwait(wk) +static int +jwait(wk, waitfor) struct worklist *wk; + int waitfor; { - stat_journal_wait++; + /* + * Blocking journal waits cause slow synchronous behavior. Record + * stats on the frequency of these blocking operations. + */ + if (waitfor == MNT_WAIT) { + stat_journal_wait++; + switch (wk->wk_type) { + case D_JREMREF: + case D_JMVREF: + stat_jwait_filepage++; + break; + case D_JTRUNC: + case D_JFREEBLK: + stat_jwait_freeblks++; + break; + case D_JNEWBLK: + stat_jwait_newblk++; + break; + case D_JADDREF: + stat_jwait_inode++; + break; + default: + break; + } + } /* * If IO has not started we process the journal. We can't mark the * worklist item as IOWAITING because we drop the lock while * processing the journal and the worklist entry may be freed after * this point. The caller may call back in and re-issue the request. */ - if ((wk->wk_state & IOSTARTED) == 0) { - softdep_process_journal(wk->wk_mp, wk, MNT_WAIT); - return; + if ((wk->wk_state & INPROGRESS) == 0) { + softdep_process_journal(wk->wk_mp, wk, waitfor); + if (waitfor != MNT_WAIT) + return (EBUSY); + return (0); } - wk->wk_state |= IOWAITING; - msleep(wk, &lk, PRIBIO, "jwait", 0); + if (waitfor != MNT_WAIT) + return (EBUSY); + wait_worklist(wk, "jwait"); + return (0); } /* @@ -3928,68 +4311,6 @@ inodedep_lookup_ip(ip) } /* - * Create a journal entry that describes a truncate that we're about to - * perform. The inode allocations and frees between here and the completion - * of the operation are done asynchronously and without journaling. At - * the end of the operation the vnode is sync'd and the journal space - * is released. Recovery will discover the partially completed truncate - * and complete it. - */ -void * -softdep_setup_trunc(vp, length, flags) - struct vnode *vp; - off_t length; - int flags; -{ - struct jsegdep *jsegdep; - struct jtrunc *jtrunc; - struct ufsmount *ump; - struct inode *ip; - - softdep_prealloc(vp, MNT_WAIT); - ip = VTOI(vp); - ump = VFSTOUFS(vp->v_mount); - jtrunc = malloc(sizeof(*jtrunc), M_JTRUNC, M_SOFTDEP_FLAGS); - workitem_alloc(&jtrunc->jt_list, D_JTRUNC, vp->v_mount); - jsegdep = jtrunc->jt_jsegdep = newjsegdep(&jtrunc->jt_list); - jtrunc->jt_ino = ip->i_number; - jtrunc->jt_extsize = 0; - jtrunc->jt_size = length; - if ((flags & IO_EXT) == 0 && ump->um_fstype == UFS2) - jtrunc->jt_extsize = ip->i_din2->di_extsize; - if ((flags & IO_NORMAL) == 0) - jtrunc->jt_size = DIP(ip, i_size); - ACQUIRE_LOCK(&lk); - add_to_journal(&jtrunc->jt_list); - while (jsegdep->jd_seg == NULL) { - stat_jwait_freeblks++; - jwait(&jtrunc->jt_list); - } - FREE_LOCK(&lk); - - return (jsegdep); -} - -/* - * After synchronous truncation is complete we free sync the vnode and - * release the jsegdep so the journal space can be freed. - */ -int -softdep_complete_trunc(vp, cookie) - struct vnode *vp; - void *cookie; -{ - int error; - - error = ffs_syncvnode(vp, MNT_WAIT); - ACQUIRE_LOCK(&lk); - free_jsegdep((struct jsegdep *)cookie); - FREE_LOCK(&lk); - - return (error); -} - -/* * Called prior to creating a new inode and linking it to a directory. The * jaddref structure must already be allocated by softdep_setup_inomapdep * and it is discovered here so we can initialize the mode and update @@ -4014,7 +4335,6 @@ softdep_setup_create(dp, ip) inoreflst); KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number, ("softdep_setup_create: No addref structure present.")); - jaddref->ja_mode = ip->i_mode; } softdep_prelink(dvp, NULL); FREE_LOCK(&lk); @@ -4121,7 +4441,6 @@ softdep_setup_mkdir(dp, ip) KASSERT(jaddref->ja_parent == dp->i_number, ("softdep_setup_mkdir: bad parent %d", jaddref->ja_parent)); - jaddref->ja_mode = ip->i_mode; TAILQ_INSERT_BEFORE(&jaddref->ja_ref, &dotaddref->ja_ref, if_deps); } @@ -4341,10 +4660,11 @@ softdep_revert_rmdir(dp, ip) * Called just after updating the cylinder group block to allocate an inode. */ void -softdep_setup_inomapdep(bp, ip, newinum) +softdep_setup_inomapdep(bp, ip, newinum, mode) struct buf *bp; /* buffer for cylgroup block with inode map */ struct inode *ip; /* inode related to allocation */ ino_t newinum; /* new inode number being allocated */ + int mode; { struct inodedep *inodedep; struct bmsafemap *bmsafemap; @@ -4361,7 +4681,7 @@ softdep_setup_inomapdep(bp, ip, newinum) * can be dependent on it. */ if (mp->mnt_kern_flag & MNTK_SUJ) { - jaddref = newjaddref(ip, newinum, 0, 0, 0); + jaddref = newjaddref(ip, newinum, 0, 0, mode); jaddref->ja_state |= NEWBLOCK; } @@ -4523,6 +4843,8 @@ bmsafemap_lookup(mp, bp, cg) LIST_INIT(&bmsafemap->sm_newblkwr); LIST_INIT(&bmsafemap->sm_jaddrefhd); LIST_INIT(&bmsafemap->sm_jnewblkhd); + LIST_INIT(&bmsafemap->sm_freehd); + LIST_INIT(&bmsafemap->sm_freewr); ACQUIRE_LOCK(&lk); if (bmsafemap_find(bmsafemaphd, mp, cg, &collision) == 1) { WORKITEM_FREE(bmsafemap, D_BMSAFEMAP); @@ -4530,6 +4852,7 @@ bmsafemap_lookup(mp, bp, cg) } bmsafemap->sm_cg = cg; LIST_INSERT_HEAD(bmsafemaphd, bmsafemap, sm_hash); + LIST_INSERT_HEAD(&VFSTOUFS(mp)->softdep_dirtycg, bmsafemap, sm_next); WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list); return (bmsafemap); } @@ -4609,10 +4932,9 @@ softdep_setup_allocdirect(ip, off, newblkno, oldblkno, newsize, oldsize, bp) * allocate an associated pagedep to track additions and * deletions. */ - if ((ip->i_mode & IFMT) == IFDIR && - pagedep_lookup(mp, ip->i_number, off, DEPALLOC, - &pagedep) == 0) - WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list); + if ((ip->i_mode & IFMT) == IFDIR) + pagedep_lookup(mp, bp, ip->i_number, off, DEPALLOC, + &pagedep); } if (newblk_lookup(mp, newblkno, 0, &newblk) == 0) panic("softdep_setup_allocdirect: lost block"); @@ -4716,14 +5038,12 @@ jnewblk_merge(new, old, wkhd) if (jnewblk->jn_blkno != njnewblk->jn_blkno) panic("jnewblk_merge: Merging disparate blocks."); /* - * The record may be rolled back in the cg update bits - * appropriately. NEWBLOCK here alerts the cg rollback code - * that the frag bits have changed. + * The record may be rolled back in the cg. */ if (jnewblk->jn_state & UNDONE) { - njnewblk->jn_state |= UNDONE | NEWBLOCK; - njnewblk->jn_state &= ~ATTACHED; jnewblk->jn_state &= ~UNDONE; + njnewblk->jn_state |= UNDONE; + njnewblk->jn_state &= ~ATTACHED; } /* * We modify the newer addref and free the older so that if neither @@ -4751,7 +5071,6 @@ allocdirect_merge(adphead, newadp, oldadp) { struct worklist *wk; struct freefrag *freefrag; - struct newdirblk *newdirblk; freefrag = NULL; mtx_assert(&lk, MA_OWNED); @@ -4791,11 +5110,10 @@ allocdirect_merge(adphead, newadp, oldadp) * move it from the old allocdirect to the new allocdirect. */ if ((wk = LIST_FIRST(&oldadp->ad_newdirblk)) != NULL) { - newdirblk = WK_NEWDIRBLK(wk); - WORKLIST_REMOVE(&newdirblk->db_list); + WORKLIST_REMOVE(wk); if (!LIST_EMPTY(&oldadp->ad_newdirblk)) panic("allocdirect_merge: extra newdirblk"); - WORKLIST_INSERT(&newadp->ad_newdirblk, &newdirblk->db_list); + WORKLIST_INSERT(&newadp->ad_newdirblk, wk); } TAILQ_REMOVE(adphead, oldadp, ad_next); /* @@ -4814,9 +5132,8 @@ allocdirect_merge(adphead, newadp, oldadp) &oldadp->ad_block.nb_jnewblk->jn_list, &newadp->ad_block.nb_jwork); oldadp->ad_block.nb_jnewblk = NULL; - if (cancel_newblk(&oldadp->ad_block, NULL, - &newadp->ad_block.nb_jwork)) - panic("allocdirect_merge: Unexpected dependency."); + cancel_newblk(&oldadp->ad_block, NULL, + &newadp->ad_block.nb_jwork); } else { wk = (struct worklist *) cancel_newblk(&oldadp->ad_block, &freefrag->ff_list, &freefrag->ff_jwork); @@ -4877,6 +5194,7 @@ newfreefrag(ip, blkno, size, lbn) freefrag->ff_state = ATTACHED; LIST_INIT(&freefrag->ff_jwork); freefrag->ff_inum = ip->i_number; + freefrag->ff_vtype = ITOV(ip)->v_type; freefrag->ff_blkno = blkno; freefrag->ff_fragsize = size; @@ -4921,7 +5239,7 @@ handle_workitem_freefrag(freefrag) } FREE_LOCK(&lk); ffs_blkfree(ump, ump->um_fs, ump->um_devvp, freefrag->ff_blkno, - freefrag->ff_fragsize, freefrag->ff_inum, &wkhd); + freefrag->ff_fragsize, freefrag->ff_inum, freefrag->ff_vtype, &wkhd); ACQUIRE_LOCK(&lk); WORKITEM_FREE(freefrag, D_FREEFRAG); FREE_LOCK(&lk); @@ -5084,6 +5402,7 @@ newallocindir(ip, ptrno, newblkno, oldblkno, lbn) aip = (struct allocindir *)newblk; aip->ai_offset = ptrno; aip->ai_oldblkno = oldblkno; + aip->ai_lbn = lbn; if ((jnewblk = newblk->nb_jnewblk) != NULL) { jnewblk->jn_ino = ip->i_number; jnewblk->jn_lbn = lbn; @@ -5110,6 +5429,7 @@ softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp) struct buf *nbp; /* buffer holding allocated page */ { struct inodedep *inodedep; + struct freefrag *freefrag; struct allocindir *aip; struct pagedep *pagedep; struct mount *mp; @@ -5126,12 +5446,13 @@ softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp) * allocate an associated pagedep to track additions and * deletions. */ - if ((ip->i_mode & IFMT) == IFDIR && - pagedep_lookup(mp, ip->i_number, lbn, DEPALLOC, &pagedep) == 0) - WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list); + if ((ip->i_mode & IFMT) == IFDIR) + pagedep_lookup(mp, nbp, ip->i_number, lbn, DEPALLOC, &pagedep); WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list); - setup_allocindir_phase2(bp, ip, inodedep, aip, lbn); + freefrag = setup_allocindir_phase2(bp, ip, inodedep, aip, lbn); FREE_LOCK(&lk); + if (freefrag) + handle_workitem_freefrag(freefrag); } /* @@ -5155,7 +5476,8 @@ softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno) aip = newallocindir(ip, ptrno, newblkno, 0, lbn); inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, DEPALLOC, &inodedep); WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list); - setup_allocindir_phase2(bp, ip, inodedep, aip, lbn); + if (setup_allocindir_phase2(bp, ip, inodedep, aip, lbn)) + panic("softdep_setup_allocindir_meta: Block already existed"); FREE_LOCK(&lk); } @@ -5166,7 +5488,7 @@ indirdep_complete(indirdep) struct allocindir *aip; LIST_REMOVE(indirdep, ir_next); - indirdep->ir_state &= ~ONDEPLIST; + indirdep->ir_state |= DEPCOMPLETE; while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != NULL) { LIST_REMOVE(aip, ai_next); @@ -5181,124 +5503,131 @@ indirdep_complete(indirdep) free_indirdep(indirdep); } -/* - * Called to finish the allocation of the "aip" allocated - * by one of the two routines above. - */ -static void -setup_allocindir_phase2(bp, ip, inodedep, aip, lbn) - struct buf *bp; /* in-memory copy of the indirect block */ - struct inode *ip; /* inode for file being extended */ - struct inodedep *inodedep; /* Inodedep for ip */ - struct allocindir *aip; /* allocindir allocated by the above routines */ - ufs_lbn_t lbn; /* Logical block number for this block. */ +static struct indirdep * +indirdep_lookup(mp, ip, bp) + struct mount *mp; + struct inode *ip; + struct buf *bp; { + struct indirdep *indirdep, *newindirdep; + struct newblk *newblk; struct worklist *wk; struct fs *fs; - struct newblk *newblk; - struct indirdep *indirdep, *newindirdep; - struct allocindir *oldaip; - struct freefrag *freefrag; - struct mount *mp; ufs2_daddr_t blkno; - mp = UFSTOVFS(ip->i_ump); - fs = ip->i_fs; mtx_assert(&lk, MA_OWNED); - if (bp->b_lblkno >= 0) - panic("setup_allocindir_phase2: not indir blk"); - for (freefrag = NULL, indirdep = NULL, newindirdep = NULL; ; ) { + indirdep = NULL; + newindirdep = NULL; + fs = ip->i_fs; + for (;;) { LIST_FOREACH(wk, &bp->b_dep, wk_list) { if (wk->wk_type != D_INDIRDEP) continue; indirdep = WK_INDIRDEP(wk); break; } - if (indirdep == NULL && newindirdep) { - indirdep = newindirdep; - newindirdep = NULL; - WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list); - if (newblk_lookup(mp, dbtofsb(fs, bp->b_blkno), 0, - &newblk)) { - indirdep->ir_state |= ONDEPLIST; - LIST_INSERT_HEAD(&newblk->nb_indirdeps, - indirdep, ir_next); - } else - indirdep->ir_state |= DEPCOMPLETE; - } - if (indirdep) { - aip->ai_indirdep = indirdep; - /* - * Check to see if there is an existing dependency - * for this block. If there is, merge the old - * dependency into the new one. This happens - * as a result of reallocblk only. - */ - if (aip->ai_oldblkno == 0) - oldaip = NULL; - else - - LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, - ai_next) - if (oldaip->ai_offset == aip->ai_offset) - break; - if (oldaip != NULL) - freefrag = allocindir_merge(aip, oldaip); - LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next); - KASSERT(aip->ai_offset >= 0 && - aip->ai_offset < NINDIR(ip->i_ump->um_fs), - ("setup_allocindir_phase2: Bad offset %d", - aip->ai_offset)); - KASSERT(indirdep->ir_savebp != NULL, - ("setup_allocindir_phase2 NULL ir_savebp")); - if (ip->i_ump->um_fstype == UFS1) - ((ufs1_daddr_t *)indirdep->ir_savebp->b_data) - [aip->ai_offset] = aip->ai_oldblkno; - else - ((ufs2_daddr_t *)indirdep->ir_savebp->b_data) - [aip->ai_offset] = aip->ai_oldblkno; - FREE_LOCK(&lk); - if (freefrag != NULL) - handle_workitem_freefrag(freefrag); - } else - FREE_LOCK(&lk); - if (newindirdep) { - newindirdep->ir_savebp->b_flags |= B_INVAL | B_NOCACHE; - brelse(newindirdep->ir_savebp); - ACQUIRE_LOCK(&lk); - WORKITEM_FREE((caddr_t)newindirdep, D_INDIRDEP); - if (indirdep) - break; - FREE_LOCK(&lk); - } - if (indirdep) { - ACQUIRE_LOCK(&lk); + /* Found on the buffer worklist, no new structure to free. */ + if (indirdep != NULL && newindirdep == NULL) + return (indirdep); + if (indirdep != NULL && newindirdep != NULL) + panic("indirdep_lookup: simultaneous create"); + /* None found on the buffer and a new structure is ready. */ + if (indirdep == NULL && newindirdep != NULL) break; - } + /* None found and no new structure available. */ + FREE_LOCK(&lk); newindirdep = malloc(sizeof(struct indirdep), - M_INDIRDEP, M_SOFTDEP_FLAGS); + M_INDIRDEP, M_SOFTDEP_FLAGS); workitem_alloc(&newindirdep->ir_list, D_INDIRDEP, mp); newindirdep->ir_state = ATTACHED; if (ip->i_ump->um_fstype == UFS1) newindirdep->ir_state |= UFS1FMT; + TAILQ_INIT(&newindirdep->ir_trunc); newindirdep->ir_saveddata = NULL; LIST_INIT(&newindirdep->ir_deplisthd); LIST_INIT(&newindirdep->ir_donehd); LIST_INIT(&newindirdep->ir_writehd); LIST_INIT(&newindirdep->ir_completehd); - LIST_INIT(&newindirdep->ir_jwork); - LIST_INIT(&newindirdep->ir_jnewblkhd); if (bp->b_blkno == bp->b_lblkno) { ufs_bmaparray(bp->b_vp, bp->b_lblkno, &blkno, bp, NULL, NULL); bp->b_blkno = blkno; } + newindirdep->ir_freeblks = NULL; newindirdep->ir_savebp = getblk(ip->i_devvp, bp->b_blkno, bp->b_bcount, 0, 0, 0); + newindirdep->ir_bp = bp; BUF_KERNPROC(newindirdep->ir_savebp); bcopy(bp->b_data, newindirdep->ir_savebp->b_data, bp->b_bcount); ACQUIRE_LOCK(&lk); } + indirdep = newindirdep; + WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list); + /* + * If the block is not yet allocated we don't set DEPCOMPLETE so + * that we don't free dependencies until the pointers are valid. + * This could search b_dep for D_ALLOCDIRECT/D_ALLOCINDIR rather + * than using the hash. + */ + if (newblk_lookup(mp, dbtofsb(fs, bp->b_blkno), 0, &newblk)) + LIST_INSERT_HEAD(&newblk->nb_indirdeps, indirdep, ir_next); + else + indirdep->ir_state |= DEPCOMPLETE; + return (indirdep); +} + +/* + * Called to finish the allocation of the "aip" allocated + * by one of the two routines above. + */ +static struct freefrag * +setup_allocindir_phase2(bp, ip, inodedep, aip, lbn) + struct buf *bp; /* in-memory copy of the indirect block */ + struct inode *ip; /* inode for file being extended */ + struct inodedep *inodedep; /* Inodedep for ip */ + struct allocindir *aip; /* allocindir allocated by the above routines */ + ufs_lbn_t lbn; /* Logical block number for this block. */ +{ + struct fs *fs; + struct indirdep *indirdep; + struct allocindir *oldaip; + struct freefrag *freefrag; + struct mount *mp; + + mtx_assert(&lk, MA_OWNED); + mp = UFSTOVFS(ip->i_ump); + fs = ip->i_fs; + if (bp->b_lblkno >= 0) + panic("setup_allocindir_phase2: not indir blk"); + KASSERT(aip->ai_offset >= 0 && aip->ai_offset < NINDIR(fs), + ("setup_allocindir_phase2: Bad offset %d", aip->ai_offset)); + indirdep = indirdep_lookup(mp, ip, bp); + KASSERT(indirdep->ir_savebp != NULL, + ("setup_allocindir_phase2 NULL ir_savebp")); + aip->ai_indirdep = indirdep; + /* + * Check for an unwritten dependency for this indirect offset. If + * there is, merge the old dependency into the new one. This happens + * as a result of reallocblk only. + */ + freefrag = NULL; + if (aip->ai_oldblkno != 0) { + LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, ai_next) { + if (oldaip->ai_offset == aip->ai_offset) { + freefrag = allocindir_merge(aip, oldaip); + goto done; + } + } + LIST_FOREACH(oldaip, &indirdep->ir_donehd, ai_next) { + if (oldaip->ai_offset == aip->ai_offset) { + freefrag = allocindir_merge(aip, oldaip); + goto done; + } + } + } +done: + LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next); + return (freefrag); } /* @@ -5310,7 +5639,6 @@ allocindir_merge(aip, oldaip) struct allocindir *aip; struct allocindir *oldaip; { - struct newdirblk *newdirblk; struct freefrag *freefrag; struct worklist *wk; @@ -5326,11 +5654,10 @@ allocindir_merge(aip, oldaip) * move it from the old allocindir to the new allocindir. */ if ((wk = LIST_FIRST(&oldaip->ai_newdirblk)) != NULL) { - newdirblk = WK_NEWDIRBLK(wk); - WORKLIST_REMOVE(&newdirblk->db_list); + WORKLIST_REMOVE(wk); if (!LIST_EMPTY(&oldaip->ai_newdirblk)) panic("allocindir_merge: extra newdirblk"); - WORKLIST_INSERT(&aip->ai_newdirblk, &newdirblk->db_list); + WORKLIST_INSERT(&aip->ai_newdirblk, wk); } /* * We can skip journaling for this freefrag and just complete @@ -5363,7 +5690,7 @@ setup_freedirect(freeblks, ip, i, needj) DIP_SET(ip, i_db[i], 0); frags = sblksize(ip->i_fs, ip->i_size, i); frags = numfrags(ip->i_fs, frags); - newfreework(ip->i_ump, freeblks, NULL, i, blkno, frags, needj); + newfreework(ip->i_ump, freeblks, NULL, i, blkno, frags, 0, needj); } static inline void @@ -5382,15 +5709,15 @@ setup_freeext(freeblks, ip, i, needj) ip->i_din2->di_extb[i] = 0; frags = sblksize(ip->i_fs, ip->i_din2->di_extsize, i); frags = numfrags(ip->i_fs, frags); - newfreework(ip->i_ump, freeblks, NULL, -1 - i, blkno, frags, needj); + newfreework(ip->i_ump, freeblks, NULL, -1 - i, blkno, frags, 0, needj); } static inline void setup_freeindir(freeblks, ip, i, lbn, needj) struct freeblks *freeblks; struct inode *ip; - ufs_lbn_t lbn; int i; + ufs_lbn_t lbn; int needj; { ufs2_daddr_t blkno; @@ -5400,7 +5727,7 @@ setup_freeindir(freeblks, ip, i, lbn, needj) return; DIP_SET(ip, i_ib[i], 0); newfreework(ip->i_ump, freeblks, NULL, lbn, blkno, ip->i_fs->fs_frag, - needj); + 0, needj); } static inline struct freeblks * @@ -5413,17 +5740,617 @@ newfreeblks(mp, ip) freeblks = malloc(sizeof(struct freeblks), M_FREEBLKS, M_SOFTDEP_FLAGS|M_ZERO); workitem_alloc(&freeblks->fb_list, D_FREEBLKS, mp); - LIST_INIT(&freeblks->fb_jfreeblkhd); + LIST_INIT(&freeblks->fb_jblkdephd); LIST_INIT(&freeblks->fb_jwork); + freeblks->fb_ref = 0; + freeblks->fb_cgwait = 0; freeblks->fb_state = ATTACHED; freeblks->fb_uid = ip->i_uid; - freeblks->fb_previousinum = ip->i_number; + freeblks->fb_inum = ip->i_number; + freeblks->fb_vtype = ITOV(ip)->v_type; + freeblks->fb_modrev = DIP(ip, i_modrev); freeblks->fb_devvp = ip->i_devvp; freeblks->fb_chkcnt = 0; + freeblks->fb_freecnt = 0; + freeblks->fb_len = 0; return (freeblks); } +static void +trunc_indirdep(indirdep, freeblks, bp, off) + struct indirdep *indirdep; + struct freeblks *freeblks; + struct buf *bp; + int off; +{ + struct allocindir *aip, *aipn; + + /* + * The first set of allocindirs won't be in savedbp. + */ + LIST_FOREACH_SAFE(aip, &indirdep->ir_deplisthd, ai_next, aipn) + if (aip->ai_offset > off) + cancel_allocindir(aip, bp, freeblks, 1); + LIST_FOREACH_SAFE(aip, &indirdep->ir_donehd, ai_next, aipn) + if (aip->ai_offset > off) + cancel_allocindir(aip, bp, freeblks, 1); + /* + * These will exist in savedbp. + */ + LIST_FOREACH_SAFE(aip, &indirdep->ir_writehd, ai_next, aipn) + if (aip->ai_offset > off) + cancel_allocindir(aip, NULL, freeblks, 0); + LIST_FOREACH_SAFE(aip, &indirdep->ir_completehd, ai_next, aipn) + if (aip->ai_offset > off) + cancel_allocindir(aip, NULL, freeblks, 0); +} + +/* + * Follow the chain of indirects down to lastlbn creating a freework + * structure for each. This will be used to start indir_trunc() at + * the right offset and create the journal records for the parrtial + * truncation. A second step will handle the truncated dependencies. + */ +static int +setup_trunc_indir(freeblks, ip, lbn, lastlbn, blkno) + struct freeblks *freeblks; + struct inode *ip; + ufs_lbn_t lbn; + ufs_lbn_t lastlbn; + ufs2_daddr_t blkno; +{ + struct indirdep *indirdep; + struct indirdep *indirn; + struct freework *freework; + struct newblk *newblk; + struct mount *mp; + struct buf *bp; + uint8_t *start; + uint8_t *end; + ufs_lbn_t lbnadd; + int level; + int error; + int off; + + + freework = NULL; + if (blkno == 0) + return (0); + mp = freeblks->fb_list.wk_mp; + bp = getblk(ITOV(ip), lbn, mp->mnt_stat.f_iosize, 0, 0, 0); + if ((bp->b_flags & B_CACHE) == 0) { + bp->b_blkno = blkptrtodb(VFSTOUFS(mp), blkno); + bp->b_iocmd = BIO_READ; + bp->b_flags &= ~B_INVAL; + bp->b_ioflags &= ~BIO_ERROR; + vfs_busy_pages(bp, 0); + bp->b_iooffset = dbtob(bp->b_blkno); + bstrategy(bp); + curthread->td_ru.ru_inblock++; + error = bufwait(bp); + if (error) { + brelse(bp); + return (error); + } + } + level = lbn_level(lbn); + lbnadd = lbn_offset(ip->i_fs, level); + /* + * Compute the offset of the last block we want to keep. Store + * in the freework the first block we want to completely free. + */ + off = (lastlbn - -(lbn + level)) / lbnadd; + if (off + 1 == NINDIR(ip->i_fs)) + goto nowork; + freework = newfreework(ip->i_ump, freeblks, NULL, lbn, blkno, 0, off+1, + 0); + /* + * Link the freework into the indirdep. This will prevent any new + * allocations from proceeding until we are finished with the + * truncate and the block is written. + */ + ACQUIRE_LOCK(&lk); + indirdep = indirdep_lookup(mp, ip, bp); + if (indirdep->ir_freeblks) + panic("setup_trunc_indir: indirdep already truncated."); + TAILQ_INSERT_TAIL(&indirdep->ir_trunc, freework, fw_next); + freework->fw_indir = indirdep; + /* + * Cancel any allocindirs that will not make it to disk. + * We have to do this for all copies of the indirdep that + * live on this newblk. + */ + if ((indirdep->ir_state & DEPCOMPLETE) == 0) { + newblk_lookup(mp, dbtofsb(ip->i_fs, bp->b_blkno), 0, &newblk); + LIST_FOREACH(indirn, &newblk->nb_indirdeps, ir_next) + trunc_indirdep(indirn, freeblks, bp, off); + } else + trunc_indirdep(indirdep, freeblks, bp, off); + FREE_LOCK(&lk); + /* + * Creation is protected by the buf lock. The saveddata is only + * needed if a full truncation follows a partial truncation but it + * is difficult to allocate in that case so we fetch it anyway. + */ + if (indirdep->ir_saveddata == NULL) + indirdep->ir_saveddata = malloc(bp->b_bcount, M_INDIRDEP, + M_SOFTDEP_FLAGS); +nowork: + /* Fetch the blkno of the child and the zero start offset. */ + if (ip->i_ump->um_fstype == UFS1) { + blkno = ((ufs1_daddr_t *)bp->b_data)[off]; + start = (uint8_t *)&((ufs1_daddr_t *)bp->b_data)[off+1]; + } else { + blkno = ((ufs2_daddr_t *)bp->b_data)[off]; + start = (uint8_t *)&((ufs2_daddr_t *)bp->b_data)[off+1]; + } + if (freework) { + /* Zero the truncated pointers. */ + end = bp->b_data + bp->b_bcount; + bzero(start, end - start); + bdwrite(bp); + } else + bqrelse(bp); + if (level == 0) + return (0); + lbn++; /* adjust level */ + lbn -= (off * lbnadd); + return setup_trunc_indir(freeblks, ip, lbn, lastlbn, blkno); +} + +/* + * Complete the partial truncation of an indirect block setup by + * setup_trunc_indir(). This zeros the truncated pointers in the saved + * copy and writes them to disk before the freeblks is allowed to complete. + */ +static void +complete_trunc_indir(freework) + struct freework *freework; +{ + struct freework *fwn; + struct indirdep *indirdep; + struct buf *bp; + uintptr_t start; + int count; + + indirdep = freework->fw_indir; + for (;;) { + bp = indirdep->ir_bp; + /* See if the block was discarded. */ + if (bp == NULL) + break; + /* Inline part of getdirtybuf(). We dont want bremfree. */ + if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0) + break; + if (BUF_LOCK(bp, + LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, &lk) == 0) + BUF_UNLOCK(bp); + ACQUIRE_LOCK(&lk); + } + mtx_assert(&lk, MA_OWNED); + freework->fw_state |= DEPCOMPLETE; + TAILQ_REMOVE(&indirdep->ir_trunc, freework, fw_next); + /* + * Zero the pointers in the saved copy. + */ + if (indirdep->ir_state & UFS1FMT) + start = sizeof(ufs1_daddr_t); + else + start = sizeof(ufs2_daddr_t); + start *= freework->fw_start; + count = indirdep->ir_savebp->b_bcount - start; + start += (uintptr_t)indirdep->ir_savebp->b_data; + bzero((char *)start, count); + /* + * We need to start the next truncation in the list if it has not + * been started yet. + */ + fwn = TAILQ_FIRST(&indirdep->ir_trunc); + if (fwn != NULL) { + if (fwn->fw_freeblks == indirdep->ir_freeblks) + TAILQ_REMOVE(&indirdep->ir_trunc, fwn, fw_next); + if ((fwn->fw_state & ONWORKLIST) == 0) + freework_enqueue(fwn); + } + /* + * If bp is NULL the block was fully truncated, restore + * the saved block list otherwise free it if it is no + * longer needed. + */ + if (TAILQ_EMPTY(&indirdep->ir_trunc)) { + if (bp == NULL) + bcopy(indirdep->ir_saveddata, + indirdep->ir_savebp->b_data, + indirdep->ir_savebp->b_bcount); + free(indirdep->ir_saveddata, M_INDIRDEP); + indirdep->ir_saveddata = NULL; + } + /* + * When bp is NULL there is a full truncation pending. We + * must wait for this full truncation to be journaled before + * we can release this freework because the disk pointers will + * never be written as zero. + */ + if (bp == NULL) { + if (LIST_EMPTY(&indirdep->ir_freeblks->fb_jblkdephd)) + handle_written_freework(freework); + else + WORKLIST_INSERT(&indirdep->ir_freeblks->fb_freeworkhd, + &freework->fw_list); + } else { + /* Complete when the real copy is written. */ + WORKLIST_INSERT(&bp->b_dep, &freework->fw_list); + BUF_UNLOCK(bp); + } +} + +/* + * Calculate the number of blocks we are going to release where datablocks + * is the current total and length is the new file size. + */ +ufs2_daddr_t +blkcount(fs, datablocks, length) + struct fs *fs; + ufs2_daddr_t datablocks; + off_t length; +{ + off_t totblks, numblks; + + totblks = 0; + numblks = howmany(length, fs->fs_bsize); + if (numblks <= NDADDR) { + totblks = howmany(length, fs->fs_fsize); + goto out; + } + totblks = blkstofrags(fs, numblks); + numblks -= NDADDR; + /* + * Count all single, then double, then triple indirects required. + * Subtracting one indirects worth of blocks for each pass + * acknowledges one of each pointed to by the inode. + */ + for (;;) { + totblks += blkstofrags(fs, howmany(numblks, NINDIR(fs))); + numblks -= NINDIR(fs); + if (numblks <= 0) + break; + numblks = howmany(numblks, NINDIR(fs)); + } +out: + totblks = fsbtodb(fs, totblks); + /* + * Handle sparse files. We can't reclaim more blocks than the inode + * references. We will correct it later in handle_complete_freeblks() + * when we know the real count. + */ + if (totblks > datablocks) + return (0); + return (datablocks - totblks); +} + +/* + * Handle freeblocks for journaled softupdate filesystems. + * + * Contrary to normal softupdates, we must preserve the block pointers in + * indirects until their subordinates are free. This is to avoid journaling + * every block that is freed which may consume more space than the journal + * itself. The recovery program will see the free block journals at the + * base of the truncated area and traverse them to reclaim space. The + * pointers in the inode may be cleared immediately after the journal + * records are written because each direct and indirect pointer in the + * inode is recorded in a journal. This permits full truncation to proceed + * asynchronously. The write order is journal -> inode -> cgs -> indirects. + * + * The algorithm is as follows: + * 1) Traverse the in-memory state and create journal entries to release + * the relevant blocks and full indirect trees. + * 2) Traverse the indirect block chain adding partial truncation freework + * records to indirects in the path to lastlbn. The freework will + * prevent new allocation dependencies from being satisfied in this + * indirect until the truncation completes. + * 3) Read and lock the inode block, performing an update with the new size + * and pointers. This prevents truncated data from becoming valid on + * disk through step 4. + * 4) Reap unsatisfied dependencies that are beyond the truncated area, + * eliminate journal work for those records that do not require it. + * 5) Schedule the journal records to be written followed by the inode block. + * 6) Allocate any necessary frags for the end of file. + * 7) Zero any partially truncated blocks. + * + * From this truncation proceeds asynchronously using the freework and + * indir_trunc machinery. The file will not be extended again into a + * partially truncated indirect block until all work is completed but + * the normal dependency mechanism ensures that it is rolled back/forward + * as appropriate. Further truncation may occur without delay and is + * serialized in indir_trunc(). + */ +void +softdep_journal_freeblocks(ip, cred, length, flags) + struct inode *ip; /* The inode whose length is to be reduced */ + struct ucred *cred; + off_t length; /* The new length for the file */ + int flags; /* IO_EXT and/or IO_NORMAL */ +{ + struct freeblks *freeblks, *fbn; + struct inodedep *inodedep; + struct jblkdep *jblkdep; + struct allocdirect *adp, *adpn; + struct fs *fs; + struct buf *bp; + struct vnode *vp; + struct mount *mp; + ufs2_daddr_t extblocks, datablocks; + ufs_lbn_t tmpval, lbn, lastlbn; + int frags; + int lastoff, iboff; + int allocblock; + int error, i; + int needj; + + fs = ip->i_fs; + mp = UFSTOVFS(ip->i_ump); + vp = ITOV(ip); + needj = 1; + iboff = -1; + allocblock = 0; + extblocks = 0; + datablocks = 0; + frags = 0; + freeblks = newfreeblks(mp, ip); + ACQUIRE_LOCK(&lk); + /* + * If we're truncating a removed file that will never be written + * we don't need to journal the block frees. The canceled journals + * for the allocations will suffice. + */ + inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep); + if ((inodedep->id_state & (UNLINKED | DEPCOMPLETE)) == UNLINKED && + length == 0) + needj = 0; + FREE_LOCK(&lk); + /* + * Calculate the lbn that we are truncating to. This results in -1 + * if we're truncating the 0 bytes. So it is the last lbn we want + * to keep, not the first lbn we want to truncate. + */ + lastlbn = lblkno(fs, length + fs->fs_bsize - 1) - 1; + lastoff = blkoff(fs, length); + /* + * Compute frags we are keeping in lastlbn. 0 means all. + */ + if (lastlbn >= 0 && lastlbn < NDADDR) { + frags = fragroundup(fs, lastoff); + /* adp offset of last valid allocdirect. */ + iboff = lastlbn; + } else if (lastlbn > 0) + iboff = NDADDR; + if (fs->fs_magic == FS_UFS2_MAGIC) + extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize)); + /* + * Handle normal data blocks and indirects. This section saves + * values used after the inode update to complete frag and indirect + * truncation. + */ + if ((flags & IO_NORMAL) != 0) { + /* + * Handle truncation of whole direct and indirect blocks. + */ + for (i = iboff + 1; i < NDADDR; i++) + setup_freedirect(freeblks, ip, i, needj); + for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; + i++, lbn += tmpval, tmpval *= NINDIR(fs)) { + /* Release a whole indirect tree. */ + if (lbn > lastlbn) { + setup_freeindir(freeblks, ip, i, -lbn -i, + needj); + continue; + } + iboff = i + NDADDR; + /* + * Traverse partially truncated indirect tree. + */ + if (lbn <= lastlbn && lbn + tmpval - 1 > lastlbn) + setup_trunc_indir(freeblks, ip, -lbn - i, + lastlbn, DIP(ip, i_ib[i])); + } + /* + * Handle partial truncation to a frag boundary. + */ + if (frags) { + ufs2_daddr_t blkno; + long oldfrags; + + oldfrags = blksize(fs, ip, lastlbn); + blkno = DIP(ip, i_db[lastlbn]); + if (blkno && oldfrags != frags) { + oldfrags -= frags; + oldfrags = numfrags(ip->i_fs, oldfrags); + blkno += numfrags(ip->i_fs, frags); + newfreework(ip->i_ump, freeblks, NULL, lastlbn, + blkno, oldfrags, 0, needj); + } else if (blkno == 0) + allocblock = 1; + } + /* + * Add a journal record for partial truncate if we are + * handling indirect blocks. Non-indirects need no extra + * journaling. + */ + if (length != 0 && lastlbn >= NDADDR) { + ip->i_flag |= IN_TRUNCATED; + newjtrunc(freeblks, length, 0); + } + ip->i_size = length; + DIP_SET(ip, i_size, ip->i_size); + datablocks = DIP(ip, i_blocks) - extblocks; + if (length != 0) + datablocks = blkcount(ip->i_fs, datablocks, length); + freeblks->fb_len = length; + } + if ((flags & IO_EXT) != 0) { + for (i = 0; i < NXADDR; i++) + setup_freeext(freeblks, ip, i, needj); + ip->i_din2->di_extsize = 0; + datablocks += extblocks; + } +#ifdef QUOTA + /* Reference the quotas in case the block count is wrong in the end. */ + quotaref(vp, freeblks->fb_quota); + (void) chkdq(ip, -datablocks, NOCRED, 0); +#endif + freeblks->fb_chkcnt = datablocks; + UFS_LOCK(ip->i_ump); + fs->fs_pendingblocks += datablocks; + UFS_UNLOCK(ip->i_ump); + DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - datablocks); + /* + * Handle truncation of incomplete alloc direct dependencies. We + * hold the inode block locked to prevent incomplete dependencies + * from reaching the disk while we are eliminating those that + * have been truncated. This is a partially inlined ffs_update(). + */ + ufs_itimes(vp); + ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED); + error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), + (int)fs->fs_bsize, cred, &bp); + if (error) { + brelse(bp); + softdep_error("softdep_journal_freeblocks", error); + return; + } + if (bp->b_bufsize == fs->fs_bsize) + bp->b_flags |= B_CLUSTEROK; + softdep_update_inodeblock(ip, bp, 0); + if (ip->i_ump->um_fstype == UFS1) + *((struct ufs1_dinode *)bp->b_data + + ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1; + else + *((struct ufs2_dinode *)bp->b_data + + ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2; + ACQUIRE_LOCK(&lk); + (void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep); + if ((inodedep->id_state & IOSTARTED) != 0) + panic("softdep_setup_freeblocks: inode busy"); + /* + * Add the freeblks structure to the list of operations that + * must await the zero'ed inode being written to disk. If we + * still have a bitmap dependency (needj), then the inode + * has never been written to disk, so we can process the + * freeblks below once we have deleted the dependencies. + */ + if (needj) + WORKLIST_INSERT(&bp->b_dep, &freeblks->fb_list); + else + freeblks->fb_state |= COMPLETE; + if ((flags & IO_NORMAL) != 0) { + TAILQ_FOREACH_SAFE(adp, &inodedep->id_inoupdt, ad_next, adpn) { + if (adp->ad_offset > iboff) + cancel_allocdirect(&inodedep->id_inoupdt, adp, + freeblks); + /* + * Truncate the allocdirect. We could eliminate + * or modify journal records as well. + */ + else if (adp->ad_offset == iboff && frags) + adp->ad_newsize = frags; + } + } + if ((flags & IO_EXT) != 0) + while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != 0) + cancel_allocdirect(&inodedep->id_extupdt, adp, + freeblks); + /* + * Add journal work. + */ + LIST_FOREACH(jblkdep, &freeblks->fb_jblkdephd, jb_deps) + add_to_journal(&jblkdep->jb_list); + FREE_LOCK(&lk); + bdwrite(bp); + /* + * Truncate dependency structures beyond length. + */ + trunc_dependencies(ip, freeblks, lastlbn, frags, flags); + /* + * This is only set when we need to allocate a fragment because + * none existed at the end of a frag-sized file. It handles only + * allocating a new, zero filled block. + */ + if (allocblock) { + ip->i_size = length - lastoff; + DIP_SET(ip, i_size, ip->i_size); + error = UFS_BALLOC(vp, length - 1, 1, cred, BA_CLRBUF, &bp); + if (error != 0) { + softdep_error("softdep_journal_freeblks", error); + return; + } + ip->i_size = length; + DIP_SET(ip, i_size, length); + ip->i_flag |= IN_CHANGE | IN_UPDATE; + allocbuf(bp, frags); + ffs_update(vp, MNT_NOWAIT); + bawrite(bp); + } else if (lastoff != 0 && vp->v_type != VDIR) { + int size; + + /* + * Zero the end of a truncated frag or block. + */ + size = sblksize(fs, length, lastlbn); + error = bread(vp, lastlbn, size, cred, &bp); + if (error) { + softdep_error("softdep_journal_freeblks", error); + return; + } + bzero((char *)bp->b_data + lastoff, size - lastoff); + bawrite(bp); + + } + ACQUIRE_LOCK(&lk); + inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep); + TAILQ_INSERT_TAIL(&inodedep->id_freeblklst, freeblks, fb_next); + freeblks->fb_state |= DEPCOMPLETE | ONDEPLIST; + /* + * We zero earlier truncations so they don't erroneously + * update i_blocks. + */ + if (freeblks->fb_len == 0 && (flags & IO_NORMAL) != 0) + TAILQ_FOREACH(fbn, &inodedep->id_freeblklst, fb_next) + fbn->fb_len = 0; + if ((freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE && + LIST_EMPTY(&freeblks->fb_jblkdephd)) + freeblks->fb_state |= INPROGRESS; + else + freeblks = NULL; + FREE_LOCK(&lk); + if (freeblks) + handle_workitem_freeblocks(freeblks, 0); + trunc_pages(ip, length, extblocks, flags); + +} + +/* + * Flush a JOP_SYNC to the journal. + */ +void +softdep_journal_fsync(ip) + struct inode *ip; +{ + struct jfsync *jfsync; + + if ((ip->i_flag & IN_TRUNCATED) == 0) + return; + ip->i_flag &= ~IN_TRUNCATED; + jfsync = malloc(sizeof(*jfsync), M_JFSYNC, M_SOFTDEP_FLAGS | M_ZERO); + workitem_alloc(&jfsync->jfs_list, D_JFSYNC, UFSTOVFS(ip->i_ump)); + jfsync->jfs_size = ip->i_size; + jfsync->jfs_ino = ip->i_number; + ACQUIRE_LOCK(&lk); + add_to_journal(&jfsync->jfs_list); + jwait(&jfsync->jfs_list, MNT_WAIT); + FREE_LOCK(&lk); +} + /* * Block de-allocation dependencies. * @@ -5464,7 +6391,6 @@ softdep_setup_freeblocks(ip, length, flags) struct freeblks *freeblks; struct inodedep *inodedep; struct allocdirect *adp; - struct jfreeblk *jfreeblk; struct buf *bp; struct fs *fs; ufs2_daddr_t extblocks, datablocks; @@ -5472,52 +6398,42 @@ softdep_setup_freeblocks(ip, length, flags) int i, delay, error; ufs_lbn_t tmpval; ufs_lbn_t lbn; - int needj; fs = ip->i_fs; mp = UFSTOVFS(ip->i_ump); if (length != 0) panic("softdep_setup_freeblocks: non-zero length"); freeblks = newfreeblks(mp, ip); - ACQUIRE_LOCK(&lk); - /* - * If we're truncating a removed file that will never be written - * we don't need to journal the block frees. The canceled journals - * for the allocations will suffice. - */ - inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep); - if ((inodedep->id_state & (UNLINKED | DEPCOMPLETE)) == UNLINKED || - (fs->fs_flags & FS_SUJ) == 0) - needj = 0; - else - needj = 1; - FREE_LOCK(&lk); extblocks = 0; + datablocks = 0; if (fs->fs_magic == FS_UFS2_MAGIC) extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize)); - datablocks = DIP(ip, i_blocks) - extblocks; if ((flags & IO_NORMAL) != 0) { for (i = 0; i < NDADDR; i++) - setup_freedirect(freeblks, ip, i, needj); + setup_freedirect(freeblks, ip, i, 0); for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; i++, lbn += tmpval, tmpval *= NINDIR(fs)) - setup_freeindir(freeblks, ip, i, -lbn -i, needj); + setup_freeindir(freeblks, ip, i, -lbn -i, 0); ip->i_size = 0; DIP_SET(ip, i_size, 0); - freeblks->fb_chkcnt = datablocks; - UFS_LOCK(ip->i_ump); - fs->fs_pendingblocks += datablocks; - UFS_UNLOCK(ip->i_ump); + datablocks = DIP(ip, i_blocks) - extblocks; } if ((flags & IO_EXT) != 0) { for (i = 0; i < NXADDR; i++) - setup_freeext(freeblks, ip, i, needj); + setup_freeext(freeblks, ip, i, 0); ip->i_din2->di_extsize = 0; - freeblks->fb_chkcnt += extblocks; + datablocks += extblocks; } - if (LIST_EMPTY(&freeblks->fb_jfreeblkhd)) - needj = 0; - DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - freeblks->fb_chkcnt); +#ifdef QUOTA + /* Reference the quotas in case the block count is wrong in the end. */ + quotaref(vp, freeblks->fb_quota); + (void) chkdq(ip, -datablocks, NOCRED, 0); +#endif + freeblks->fb_chkcnt = datablocks; + UFS_LOCK(ip->i_ump); + fs->fs_pendingblocks += datablocks; + UFS_UNLOCK(ip->i_ump); + DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - datablocks); /* * Push the zero'ed inode to to its disk buffer so that we are free * to delete its dependencies below. Once the dependencies are gone @@ -5557,7 +6473,7 @@ softdep_setup_freeblocks(ip, length, flags) delay = (inodedep->id_state & DEPCOMPLETE); if (delay) WORKLIST_INSERT(&bp->b_dep, &freeblks->fb_list); - else if (needj) + else freeblks->fb_state |= COMPLETE; /* * Because the file length has been truncated to zero, any @@ -5573,67 +6489,145 @@ softdep_setup_freeblocks(ip, length, flags) &inodedep->id_inoupdt); while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0) cancel_allocdirect(&inodedep->id_inoupdt, adp, - freeblks, delay); + freeblks); } if (flags & IO_EXT) { merge_inode_lists(&inodedep->id_newextupdt, &inodedep->id_extupdt); while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != 0) cancel_allocdirect(&inodedep->id_extupdt, adp, - freeblks, delay); + freeblks); } - LIST_FOREACH(jfreeblk, &freeblks->fb_jfreeblkhd, jf_deps) - add_to_journal(&jfreeblk->jf_list); - FREE_LOCK(&lk); bdwrite(bp); - softdep_trunc_deps(ITOV(ip), freeblks, 0, 0, flags); + trunc_dependencies(ip, freeblks, -1, 0, flags); ACQUIRE_LOCK(&lk); if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0) (void) free_inodedep(inodedep); + freeblks->fb_state |= DEPCOMPLETE; + /* + * If the inode with zeroed block pointers is now on disk + * we can start freeing blocks. + */ + if ((freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE) + freeblks->fb_state |= INPROGRESS; + else + freeblks = NULL; + FREE_LOCK(&lk); + if (freeblks) + handle_workitem_freeblocks(freeblks, 0); + trunc_pages(ip, length, extblocks, flags); +} - if (delay || needj) - freeblks->fb_state |= DEPCOMPLETE; - if (delay) { - /* - * If the inode with zeroed block pointers is now on disk - * we can start freeing blocks. Add freeblks to the worklist - * instead of calling handle_workitem_freeblocks directly as - * it is more likely that additional IO is needed to complete - * the request here than in the !delay case. - */ - if ((freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE) - add_to_worklist(&freeblks->fb_list, 1); - } - if (needj && LIST_EMPTY(&freeblks->fb_jfreeblkhd)) - needj = 0; +/* + * Eliminate pages from the page cache that back parts of this inode and + * adjust the vnode pager's idea of our size. This prevents stale data + * from hanging around in the page cache. + */ +static void +trunc_pages(ip, length, extblocks, flags) + struct inode *ip; + off_t length; + ufs2_daddr_t extblocks; + int flags; +{ + struct vnode *vp; + struct fs *fs; + ufs_lbn_t lbn; + off_t end, extend; - FREE_LOCK(&lk); + vp = ITOV(ip); + fs = ip->i_fs; + extend = OFF_TO_IDX(lblktosize(fs, -extblocks)); + if ((flags & IO_EXT) != 0) + ffs_pages_remove(vp, extend, 0); + if ((flags & IO_NORMAL) == 0) + return; + BO_LOCK(&vp->v_bufobj); + drain_output(vp); + BO_UNLOCK(&vp->v_bufobj); /* - * If the inode has never been written to disk (delay == 0) and - * we're not waiting on any journal writes, then we can process the - * freeblks now that we have deleted the dependencies. + * The vnode pager eliminates file pages we eliminate indirects + * below. */ - if (!delay && !needj) - handle_workitem_freeblocks(freeblks, 0); + vnode_pager_setsize(vp, length); + /* + * Calculate the end based on the last indirect we want to keep. If + * the block extends into indirects we can just use the negative of + * its lbn. Doubles and triples exist at lower numbers so we must + * be careful not to remove those, if they exist. double and triple + * indirect lbns do not overlap with others so it is not important + * to verify how many levels are required. + */ + lbn = lblkno(fs, length); + if (lbn >= NDADDR) { + /* Calculate the virtual lbn of the triple indirect. */ + lbn = -lbn - (NIADDR - 1); + end = OFF_TO_IDX(lblktosize(fs, lbn)); + } else + end = extend; + ffs_pages_remove(vp, OFF_TO_IDX(OFF_MAX), end); +} + +/* + * See if the buf bp is in the range eliminated by truncation. + */ +static int +trunc_check_buf(bp, blkoffp, lastlbn, lastoff, flags) + struct buf *bp; + int *blkoffp; + ufs_lbn_t lastlbn; + int lastoff; + int flags; +{ + ufs_lbn_t lbn; + + *blkoffp = 0; + /* Only match ext/normal blocks as appropriate. */ + if (((flags & IO_EXT) == 0 && (bp->b_xflags & BX_ALTDATA)) || + ((flags & IO_NORMAL) == 0 && (bp->b_xflags & BX_ALTDATA) == 0)) + return (0); + /* ALTDATA is always a full truncation. */ + if ((bp->b_xflags & BX_ALTDATA) != 0) + return (1); + /* -1 is full truncation. */ + if (lastlbn == -1) + return (1); + /* + * If this is a partial truncate we only want those + * blocks and indirect blocks that cover the range + * we're after. + */ + lbn = bp->b_lblkno; + if (lbn < 0) + lbn = -(lbn + lbn_level(lbn)); + if (lbn < lastlbn) + return (0); + /* Here we only truncate lblkno if it's partial. */ + if (lbn == lastlbn) { + if (lastoff == 0) + return (0); + *blkoffp = lastoff; + } + return (1); } /* * Eliminate any dependencies that exist in memory beyond lblkno:off */ static void -softdep_trunc_deps(vp, freeblks, lblkno, off, flags) - struct vnode *vp; +trunc_dependencies(ip, freeblks, lastlbn, lastoff, flags) + struct inode *ip; struct freeblks *freeblks; - ufs_lbn_t lblkno; - int off; + ufs_lbn_t lastlbn; + int lastoff; int flags; { - struct inodedep *inodedep; struct bufobj *bo; + struct vnode *vp; struct buf *bp; - struct mount *mp; - ino_t ino; + struct fs *fs; + int blkoff; /* * We must wait for any I/O in progress to finish so that @@ -5641,96 +6635,140 @@ softdep_trunc_deps(vp, freeblks, lblkno, off, flags) * Once they are all there, walk the list and get rid of * any dependencies. */ - ino = VTOI(vp)->i_number; - mp = vp->v_mount; + fs = ip->i_fs; + vp = ITOV(ip); bo = &vp->v_bufobj; BO_LOCK(bo); drain_output(vp); + TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) + bp->b_vflags &= ~BV_SCANNED; restart: TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) { - if (((flags & IO_EXT) == 0 && (bp->b_xflags & BX_ALTDATA)) || - ((flags & IO_NORMAL) == 0 && - (bp->b_xflags & BX_ALTDATA) == 0)) + if (bp->b_vflags & BV_SCANNED) continue; + if (!trunc_check_buf(bp, &blkoff, lastlbn, lastoff, flags)) { + bp->b_vflags |= BV_SCANNED; + continue; + } if ((bp = getdirtybuf(bp, BO_MTX(bo), MNT_WAIT)) == NULL) goto restart; BO_UNLOCK(bo); - ACQUIRE_LOCK(&lk); - (void) inodedep_lookup(mp, ino, 0, &inodedep); - if (deallocate_dependencies(bp, inodedep, freeblks, 0)) - bp->b_flags |= B_INVAL | B_NOCACHE; - FREE_LOCK(&lk); - brelse(bp); + if (deallocate_dependencies(bp, freeblks, blkoff)) + bqrelse(bp); + else + brelse(bp); BO_LOCK(bo); goto restart; } + /* + * Now do the work of vtruncbuf while also matching indirect blocks. + */ + TAILQ_FOREACH(bp, &bo->bo_clean.bv_hd, b_bobufs) + bp->b_vflags &= ~BV_SCANNED; +cleanrestart: + TAILQ_FOREACH(bp, &bo->bo_clean.bv_hd, b_bobufs) { + if (bp->b_vflags & BV_SCANNED) + continue; + if (!trunc_check_buf(bp, &blkoff, lastlbn, lastoff, flags)) { + bp->b_vflags |= BV_SCANNED; + continue; + } + if (BUF_LOCK(bp, + LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, + BO_MTX(bo)) == ENOLCK) { + BO_LOCK(bo); + goto cleanrestart; + } + bp->b_vflags |= BV_SCANNED; + BO_LOCK(bo); + bremfree(bp); + BO_UNLOCK(bo); + if (blkoff != 0) { + allocbuf(bp, blkoff); + bqrelse(bp); + } else { + bp->b_flags |= B_INVAL | B_NOCACHE | B_RELBUF; + brelse(bp); + } + BO_LOCK(bo); + goto cleanrestart; + } + drain_output(vp); BO_UNLOCK(bo); } static int -cancel_pagedep(pagedep, inodedep, freeblks) +cancel_pagedep(pagedep, freeblks, blkoff) struct pagedep *pagedep; - struct inodedep *inodedep; struct freeblks *freeblks; + int blkoff; { - struct newdirblk *newdirblk; struct jremref *jremref; struct jmvref *jmvref; - struct dirrem *dirrem; + struct dirrem *dirrem, *tmp; int i; /* - * There should be no directory add dependencies present - * as the directory could not be truncated until all - * children were removed. - */ - KASSERT(LIST_FIRST(&pagedep->pd_pendinghd) == NULL, - ("deallocate_dependencies: pendinghd != NULL")); - for (i = 0; i < DAHASHSZ; i++) - KASSERT(LIST_FIRST(&pagedep->pd_diraddhd[i]) == NULL, - ("deallocate_dependencies: diraddhd != NULL")); - /* * Copy any directory remove dependencies to the list - * to be processed after the zero'ed inode is written. - * If the inode has already been written, then they + * to be processed after the freeblks proceeds. If + * directory entry never made it to disk they * can be dumped directly onto the work list. */ - LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next) { + LIST_FOREACH_SAFE(dirrem, &pagedep->pd_dirremhd, dm_next, tmp) { + /* Skip this directory removal if it is intended to remain. */ + if (dirrem->dm_offset < blkoff) + continue; /* - * If there are any dirrems we wait for - * the journal write to complete and - * then restart the buf scan as the lock + * If there are any dirrems we wait for the journal write + * to complete and then restart the buf scan as the lock * has been dropped. */ - while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) - != NULL) { - stat_jwait_filepage++; - jwait(&jremref->jr_list); + while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL) { + jwait(&jremref->jr_list, MNT_WAIT); return (ERESTART); } LIST_REMOVE(dirrem, dm_next); dirrem->dm_dirinum = pagedep->pd_ino; - if (inodedep == NULL || - (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) { - dirrem->dm_state |= COMPLETE; - add_to_worklist(&dirrem->dm_list, 0); - } else - WORKLIST_INSERT(&inodedep->id_bufwait, - &dirrem->dm_list); - } - if ((pagedep->pd_state & NEWBLOCK) != 0) { - newdirblk = pagedep->pd_newdirblk; - WORKLIST_REMOVE(&newdirblk->db_list); - free_newdirblk(newdirblk); + WORKLIST_INSERT(&freeblks->fb_freeworkhd, &dirrem->dm_list); } while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd)) != NULL) { - stat_jwait_filepage++; - jwait(&jmvref->jm_list); + jwait(&jmvref->jm_list, MNT_WAIT); return (ERESTART); } - WORKLIST_REMOVE(&pagedep->pd_list); - LIST_REMOVE(pagedep, pd_hash); - WORKITEM_FREE(pagedep, D_PAGEDEP); + /* + * When we're partially truncating a pagedep we just want to flush + * journal entries and return. There can not be any adds in the + * truncated portion of the directory and newblk must remain if + * part of the block remains. + */ + if (blkoff != 0) { + struct diradd *dap; + + LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist) + if (dap->da_offset > blkoff) + panic("cancel_pagedep: diradd %p off %d > %d", + dap, dap->da_offset, blkoff); + for (i = 0; i < DAHASHSZ; i++) + LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) + if (dap->da_offset > blkoff) + panic("cancel_pagedep: diradd %p off %d > %d", + dap, dap->da_offset, blkoff); + return (0); + } + /* + * There should be no directory add dependencies present + * as the directory could not be truncated until all + * children were removed. + */ + KASSERT(LIST_FIRST(&pagedep->pd_pendinghd) == NULL, + ("deallocate_dependencies: pendinghd != NULL")); + for (i = 0; i < DAHASHSZ; i++) + KASSERT(LIST_FIRST(&pagedep->pd_diraddhd[i]) == NULL, + ("deallocate_dependencies: diraddhd != NULL")); + if ((pagedep->pd_state & NEWBLOCK) != 0) + free_newdirblk(pagedep->pd_newdirblk); + if (free_pagedep(pagedep) == 0) + panic("Failed to free pagedep %p", pagedep); return (0); } @@ -5739,58 +6777,82 @@ cancel_pagedep(pagedep, inodedep, freeblks) * be reallocated to a new vnode. The buffer must be locked, thus, * no I/O completion operations can occur while we are manipulating * its associated dependencies. The mutex is held so that other I/O's - * associated with related dependencies do not occur. Returns 1 if - * all dependencies were cleared, 0 otherwise. + * associated with related dependencies do not occur. */ static int -deallocate_dependencies(bp, inodedep, freeblks, off) +deallocate_dependencies(bp, freeblks, off) struct buf *bp; - struct inodedep *inodedep; struct freeblks *freeblks; int off; { - struct worklist *wk; struct indirdep *indirdep; - struct allocindir *aip; struct pagedep *pagedep; + struct allocdirect *adp; + struct worklist *wk, *wkn; - mtx_assert(&lk, MA_OWNED); - while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) { + ACQUIRE_LOCK(&lk); + LIST_FOREACH_SAFE(wk, &bp->b_dep, wk_list, wkn) { switch (wk->wk_type) { - case D_INDIRDEP: indirdep = WK_INDIRDEP(wk); if (bp->b_lblkno >= 0 || bp->b_blkno != indirdep->ir_savebp->b_lblkno) panic("deallocate_dependencies: not indir"); - cancel_indirdep(indirdep, bp, inodedep, freeblks); + cancel_indirdep(indirdep, bp, freeblks); continue; case D_PAGEDEP: pagedep = WK_PAGEDEP(wk); - if (cancel_pagedep(pagedep, inodedep, freeblks)) - return (0); + if (cancel_pagedep(pagedep, freeblks, off)) { + FREE_LOCK(&lk); + return (ERESTART); + } continue; case D_ALLOCINDIR: - aip = WK_ALLOCINDIR(wk); - cancel_allocindir(aip, inodedep, freeblks); + /* + * Simply remove the allocindir, we'll find it via + * the indirdep where we can clear pointers if + * needed. + */ + WORKLIST_REMOVE(wk); continue; - case D_ALLOCDIRECT: - case D_INODEDEP: - panic("deallocate_dependencies: Unexpected type %s", - TYPENAME(wk->wk_type)); - /* NOTREACHED */ + case D_FREEWORK: + /* + * A truncation is waiting for the zero'd pointers + * to be written. It can be freed when the freeblks + * is journaled. + */ + WORKLIST_REMOVE(wk); + wk->wk_state |= ONDEPLIST; + WORKLIST_INSERT(&freeblks->fb_freeworkhd, wk); + break; + case D_ALLOCDIRECT: + adp = WK_ALLOCDIRECT(wk); + if (off != 0) + continue; + /* FALLTHROUGH */ default: - panic("deallocate_dependencies: Unknown type %s", + panic("deallocate_dependencies: Unexpected type %s", TYPENAME(wk->wk_type)); /* NOTREACHED */ } } + FREE_LOCK(&lk); + /* + * Don't throw away this buf, we were partially truncating and + * some deps may always remain. + */ + if (off) { + allocbuf(bp, off); + bp->b_vflags |= BV_SCANNED; + return (EBUSY); + } + bp->b_flags |= B_INVAL | B_NOCACHE; - return (1); + return (0); } /* @@ -5800,19 +6862,35 @@ deallocate_dependencies(bp, inodedep, freeblks, off) * space is no longer pointed to by the inode or in the bitmap. */ static void -cancel_allocdirect(adphead, adp, freeblks, delay) +cancel_allocdirect(adphead, adp, freeblks) struct allocdirectlst *adphead; struct allocdirect *adp; struct freeblks *freeblks; - int delay; { struct freework *freework; struct newblk *newblk; struct worklist *wk; - ufs_lbn_t lbn; TAILQ_REMOVE(adphead, adp, ad_next); newblk = (struct newblk *)adp; + freework = NULL; + /* + * Find the correct freework structure. + */ + LIST_FOREACH(wk, &freeblks->fb_freeworkhd, wk_list) { + if (wk->wk_type != D_FREEWORK) + continue; + freework = WK_FREEWORK(wk); + if (freework->fw_blkno == newblk->nb_newblkno) + break; + } + if (freework == NULL) + panic("cancel_allocdirect: Freework not found"); + /* + * If a newblk exists at all we still have the journal entry that + * initiated the allocation so we do not need to journal the free. + */ + cancel_jfreeblk(freeblks, freework->fw_blkno); /* * If the journal hasn't been written the jnewblk must be passed * to the call to ffs_blkfree that reclaims the space. We accomplish @@ -5821,33 +6899,9 @@ cancel_allocdirect(adphead, adp, freeblks, delay) * been written we can simply reclaim the journal space when the * freeblks work is complete. */ - if (newblk->nb_jnewblk == NULL) { - if (cancel_newblk(newblk, NULL, &freeblks->fb_jwork) != NULL) - panic("cancel_allocdirect: Unexpected dependency"); - goto found; - } - lbn = newblk->nb_jnewblk->jn_lbn; - /* - * Find the correct freework structure so it releases the canceled - * journal when the bitmap is cleared. This preserves rollback - * until the allocation is reverted. - */ - LIST_FOREACH(wk, &freeblks->fb_freeworkhd, wk_list) { - freework = WK_FREEWORK(wk); - if (freework->fw_lbn != lbn) - continue; - freework->fw_jnewblk = cancel_newblk(newblk, &freework->fw_list, - &freework->fw_jwork); - goto found; - } - panic("cancel_allocdirect: Freework not found for lbn %jd\n", lbn); -found: - if (delay) - WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait, - &newblk->nb_list); - else - free_newblk(newblk); - return; + freework->fw_jnewblk = cancel_newblk(newblk, &freework->fw_list, + &freeblks->fb_jwork); + WORKLIST_INSERT(&freeblks->fb_freeworkhd, &newblk->nb_list); } @@ -5865,33 +6919,18 @@ cancel_newblk(newblk, wk, wkhd) struct worklist *wk; struct workhead *wkhd; { - struct indirdep *indirdep; - struct allocindir *aip; struct jnewblk *jnewblk; - while ((indirdep = LIST_FIRST(&newblk->nb_indirdeps)) != NULL) { - indirdep->ir_state &= ~ONDEPLIST; - LIST_REMOVE(indirdep, ir_next); - /* - * If an indirdep is not on the buf worklist we need to - * free it here as deallocate_dependencies() will never - * find it. These pointers were never visible on disk and - * can be discarded immediately. - */ - while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != NULL) { - LIST_REMOVE(aip, ai_next); - if (cancel_newblk(&aip->ai_block, NULL, wkhd) != NULL) - panic("cancel_newblk: aip has journal entry"); - free_newblk(&aip->ai_block); - } - /* - * If this indirdep is not attached to a buf it was simply - * waiting on completion to clear completehd. free_indirdep() - * asserts that nothing is dangling. - */ - if ((indirdep->ir_state & ONWORKLIST) == 0) - free_indirdep(indirdep); - } + newblk->nb_state |= GOINGAWAY; + /* + * Previously we traversed the completedhd on each indirdep + * attached to this newblk to cancel them and gather journal + * work. Since we need only the oldest journal segment and + * the lowest point on the tree will always have the oldest + * journal segment we are free to release the segments + * of any subordinates and may leave the indirdep list to + * indirdep_complete() when this newblk is freed. + */ if (newblk->nb_state & ONDEPLIST) { newblk->nb_state &= ~ONDEPLIST; LIST_REMOVE(newblk, nb_deps); @@ -5904,17 +6943,44 @@ cancel_newblk(newblk, wk, wkhd) * superseding operation completes. */ jnewblk = newblk->nb_jnewblk; - if (jnewblk != NULL) { + if (jnewblk != NULL && wk != NULL) { newblk->nb_jnewblk = NULL; jnewblk->jn_dep = wk; } if (!LIST_EMPTY(&newblk->nb_jwork)) jwork_move(wkhd, &newblk->nb_jwork); + /* + * When truncating we must free the newdirblk early to remove + * the pagedep from the hash before returning. + */ + if ((wk = LIST_FIRST(&newblk->nb_newdirblk)) != NULL) + free_newdirblk(WK_NEWDIRBLK(wk)); + if (!LIST_EMPTY(&newblk->nb_newdirblk)) + panic("cancel_newblk: extra newdirblk"); return (jnewblk); } /* + * Schedule the freefrag associated with a newblk to be released once + * the pointers are written and the previous block is no longer needed. + */ +static void +newblk_freefrag(newblk) + struct newblk *newblk; +{ + struct freefrag *freefrag; + + if (newblk->nb_freefrag == NULL) + return; + freefrag = newblk->nb_freefrag; + newblk->nb_freefrag = NULL; + freefrag->ff_state |= COMPLETE; + if ((freefrag->ff_state & ALLCOMPLETE) == ALLCOMPLETE) + add_to_worklist(&freefrag->ff_list, 0); +} + +/* * Free a newblk. Generate a new freefrag work request if appropriate. * This must be called after the inode pointer and any direct block pointers * are valid or fully removed via truncate or frag extension. @@ -5924,34 +6990,23 @@ free_newblk(newblk) struct newblk *newblk; { struct indirdep *indirdep; - struct newdirblk *newdirblk; - struct freefrag *freefrag; struct worklist *wk; + KASSERT(newblk->nb_jnewblk == NULL, + ("free_newblk; jnewblk %p still attached", newblk->nb_jnewblk)); mtx_assert(&lk, MA_OWNED); + newblk_freefrag(newblk); if (newblk->nb_state & ONDEPLIST) LIST_REMOVE(newblk, nb_deps); if (newblk->nb_state & ONWORKLIST) WORKLIST_REMOVE(&newblk->nb_list); LIST_REMOVE(newblk, nb_hash); - if ((freefrag = newblk->nb_freefrag) != NULL) { - freefrag->ff_state |= COMPLETE; - if ((freefrag->ff_state & ALLCOMPLETE) == ALLCOMPLETE) - add_to_worklist(&freefrag->ff_list, 0); - } - if ((wk = LIST_FIRST(&newblk->nb_newdirblk)) != NULL) { - newdirblk = WK_NEWDIRBLK(wk); - WORKLIST_REMOVE(&newdirblk->db_list); - if (!LIST_EMPTY(&newblk->nb_newdirblk)) - panic("free_newblk: extra newdirblk"); - free_newdirblk(newdirblk); - } - while ((indirdep = LIST_FIRST(&newblk->nb_indirdeps)) != NULL) { - indirdep->ir_state |= DEPCOMPLETE; + if ((wk = LIST_FIRST(&newblk->nb_newdirblk)) != NULL) + free_newdirblk(WK_NEWDIRBLK(wk)); + if (!LIST_EMPTY(&newblk->nb_newdirblk)) + panic("free_newblk: extra newdirblk"); + while ((indirdep = LIST_FIRST(&newblk->nb_indirdeps)) != NULL) indirdep_complete(indirdep); - } - KASSERT(newblk->nb_jnewblk == NULL, - ("free_newblk; jnewblk %p still attached", newblk->nb_jnewblk)); handle_jwork(&newblk->nb_jwork); newblk->nb_list.wk_type = D_NEWBLK; WORKITEM_FREE(newblk, D_NEWBLK); @@ -5968,9 +7023,9 @@ free_newdirblk(newdirblk) struct pagedep *pagedep; struct diradd *dap; struct worklist *wk; - int i; mtx_assert(&lk, MA_OWNED); + WORKLIST_REMOVE(&newdirblk->db_list); /* * If the pagedep is still linked onto the directory buffer * dependency chain, then some of the entries on the @@ -5983,21 +7038,13 @@ free_newdirblk(newdirblk) */ pagedep = newdirblk->db_pagedep; pagedep->pd_state &= ~NEWBLOCK; - if ((pagedep->pd_state & ONWORKLIST) == 0) + if ((pagedep->pd_state & ONWORKLIST) == 0) { while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL) free_diradd(dap, NULL); - /* - * If no dependencies remain, the pagedep will be freed. - */ - for (i = 0; i < DAHASHSZ; i++) - if (!LIST_EMPTY(&pagedep->pd_diraddhd[i])) - break; - if (i == DAHASHSZ && (pagedep->pd_state & ONWORKLIST) == 0 && - LIST_EMPTY(&pagedep->pd_jmvrefhd)) { - KASSERT(LIST_FIRST(&pagedep->pd_dirremhd) == NULL, - ("free_newdirblk: Freeing non-free pagedep %p", pagedep)); - LIST_REMOVE(pagedep, pd_hash); - WORKITEM_FREE(pagedep, D_PAGEDEP); + /* + * If no dependencies remain, the pagedep will be freed. + */ + free_pagedep(pagedep); } /* Should only ever be one item in the list. */ while ((wk = LIST_FIRST(&newdirblk->db_mkdir)) != NULL) { @@ -6020,6 +7067,7 @@ softdep_freefile(pvp, ino, mode) struct inode *ip = VTOI(pvp); struct inodedep *inodedep; struct freefile *freefile; + struct freeblks *freeblks; /* * This sets up the inode de-allocation dependency. @@ -6048,28 +7096,38 @@ softdep_freefile(pvp, ino, mode) */ ACQUIRE_LOCK(&lk); inodedep_lookup(pvp->v_mount, ino, 0, &inodedep); - /* - * Remove this inode from the unlinked list and set - * GOINGAWAY as appropriate to indicate that this inode - * will never be written. - */ - if (inodedep && inodedep->id_state & UNLINKED) { + if (inodedep) { /* - * Save the journal work to be freed with the bitmap - * before we clear UNLINKED. Otherwise it can be lost - * if the inode block is written. + * Clear out freeblks that no longer need to reference + * this inode. */ - handle_bufwait(inodedep, &freefile->fx_jwork); - clear_unlinked_inodedep(inodedep); - /* Re-acquire inodedep as we've dropped lk. */ - inodedep_lookup(pvp->v_mount, ino, 0, &inodedep); + while ((freeblks = + TAILQ_FIRST(&inodedep->id_freeblklst)) != NULL) { + TAILQ_REMOVE(&inodedep->id_freeblklst, freeblks, + fb_next); + freeblks->fb_state &= ~ONDEPLIST; + } + /* + * Remove this inode from the unlinked list. + */ + if (inodedep->id_state & UNLINKED) { + /* + * Save the journal work to be freed with the bitmap + * before we clear UNLINKED. Otherwise it can be lost + * if the inode block is written. + */ + handle_bufwait(inodedep, &freefile->fx_jwork); + clear_unlinked_inodedep(inodedep); + /* Re-acquire inodedep as we've dropped lk. */ + inodedep_lookup(pvp->v_mount, ino, 0, &inodedep); + } } if (inodedep == NULL || check_inode_unwritten(inodedep)) { FREE_LOCK(&lk); handle_workitem_freefile(freefile); return; } - if (inodedep && (inodedep->id_state & DEPCOMPLETE) == 0) + if ((inodedep->id_state & DEPCOMPLETE) == 0) inodedep->id_state |= GOINGAWAY; WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list); FREE_LOCK(&lk); @@ -6154,6 +7212,7 @@ free_inodedep(inodedep) !TAILQ_EMPTY(&inodedep->id_newinoupdt) || !TAILQ_EMPTY(&inodedep->id_extupdt) || !TAILQ_EMPTY(&inodedep->id_newextupdt) || + !TAILQ_EMPTY(&inodedep->id_freeblklst) || inodedep->id_mkdiradd != NULL || inodedep->id_nlinkdelta != 0 || inodedep->id_savedino1 != NULL) @@ -6181,54 +7240,81 @@ freework_freeblock(freework) struct ufsmount *ump; struct workhead wkhd; struct fs *fs; - int pending; int bsize; int needj; + mtx_assert(&lk, MA_OWNED); + /* + * Handle partial truncate separately. + */ + if (freework->fw_indir) { + complete_trunc_indir(freework); + return; + } freeblks = freework->fw_freeblks; ump = VFSTOUFS(freeblks->fb_list.wk_mp); fs = ump->um_fs; needj = freeblks->fb_list.wk_mp->mnt_kern_flag & MNTK_SUJ; bsize = lfragtosize(fs, freework->fw_frags); - pending = btodb(bsize); LIST_INIT(&wkhd); /* + * DEPCOMPLETE is cleared in indirblk_insert() if the block lives + * on the indirblk hashtable and prevents premature freeing. + */ + freework->fw_state |= DEPCOMPLETE; + /* + * SUJ needs to wait for the segment referencing freed indirect + * blocks to expire so that we know the checker will not confuse + * a re-allocated indirect block with its old contents. + */ + if (needj && freework->fw_lbn <= -NDADDR) + indirblk_insert(freework); + /* * If we are canceling an existing jnewblk pass it to the free * routine, otherwise pass the freeblk which will ultimately * release the freeblks. If we're not journaling, we can just * free the freeblks immediately. */ - ACQUIRE_LOCK(&lk); - LIST_SWAP(&wkhd, &freework->fw_jwork, worklist, wk_list); jnewblk = freework->fw_jnewblk; if (jnewblk != NULL) { - /* Could've already been canceled in indir_trunc(). */ - if ((jnewblk->jn_state & GOINGAWAY) == 0) - cancel_jnewblk(jnewblk, &wkhd); + cancel_jnewblk(jnewblk, &wkhd); needj = 0; - } else if (needj) + } else if (needj) { + freework->fw_state |= DELAYEDFREE; + freeblks->fb_cgwait++; WORKLIST_INSERT(&wkhd, &freework->fw_list); - freeblks->fb_chkcnt -= pending; - FREE_LOCK(&lk); - /* - * extattr blocks don't show up in pending blocks. XXX why? - */ - if (freework->fw_lbn >= 0 || freework->fw_lbn <= -NDADDR) { - UFS_LOCK(ump); - fs->fs_pendingblocks -= pending; - UFS_UNLOCK(ump); } - ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno, - bsize, freeblks->fb_previousinum, &wkhd); - if (needj) - return; + freeblks->fb_freecnt += btodb(bsize); + FREE_LOCK(&lk); + ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno, bsize, + freeblks->fb_inum, freeblks->fb_vtype, &wkhd); + ACQUIRE_LOCK(&lk); /* * The jnewblk will be discarded and the bits in the map never * made it to disk. We can immediately free the freeblk. */ - ACQUIRE_LOCK(&lk); - handle_written_freework(freework); - FREE_LOCK(&lk); + if (needj == 0) + handle_written_freework(freework); +} + +/* + * We enqueue freework items that need processing back on the freeblks and + * add the freeblks to the worklist. This makes it easier to find all work + * required to flush a truncation in process_truncates(). + */ +static void +freework_enqueue(freework) + struct freework *freework; +{ + struct freeblks *freeblks; + + freeblks = freework->fw_freeblks; + if ((freework->fw_state & INPROGRESS) == 0) + WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list); + if ((freeblks->fb_state & + (ONWORKLIST | INPROGRESS | ALLCOMPLETE)) == ALLCOMPLETE && + LIST_EMPTY(&freeblks->fb_jblkdephd)) + add_to_worklist(&freeblks->fb_list, WK_NODELAY); } /* @@ -6246,21 +7332,28 @@ handle_workitem_indirblk(freework) struct ufsmount *ump; struct fs *fs; - freeblks = freework->fw_freeblks; ump = VFSTOUFS(freeblks->fb_list.wk_mp); fs = ump->um_fs; - if (freework->fw_off == NINDIR(fs)) + if (freework->fw_state & DEPCOMPLETE) { + handle_written_freework(freework); + return; + } + if (freework->fw_off == NINDIR(fs)) { freework_freeblock(freework); - else - indir_trunc(freework, fsbtodb(fs, freework->fw_blkno), - freework->fw_lbn); + return; + } + freework->fw_state |= INPROGRESS; + FREE_LOCK(&lk); + indir_trunc(freework, fsbtodb(fs, freework->fw_blkno), + freework->fw_lbn); + ACQUIRE_LOCK(&lk); } /* * Called when a freework structure attached to a cg buf is written. The * ref on either the parent or the freeblks structure is released and - * either may be added to the worklist if it is the final ref. + * the freeblks is added back to the worklist if there is more work to do. */ static void handle_written_freework(freework) @@ -6268,48 +7361,24 @@ handle_written_freework(freework) { struct freeblks *freeblks; struct freework *parent; - struct jsegdep *jsegdep; - struct worklist *wk; - int needj; - needj = 0; freeblks = freework->fw_freeblks; parent = freework->fw_parent; - /* - * SUJ needs to wait for the segment referencing freed indirect - * blocks to expire so that we know the checker will not confuse - * a re-allocated indirect block with its old contents. - */ - if (freework->fw_lbn <= -NDADDR && - freework->fw_list.wk_mp->mnt_kern_flag & MNTK_SUJ) { - LIST_FOREACH(wk, &freeblks->fb_jwork, wk_list) - if (wk->wk_type == D_JSEGDEP) - break; - if (wk) { - jsegdep = WK_JSEGDEP(wk); - LIST_INSERT_HEAD(&jsegdep->jd_seg->js_indirs, - freework, fw_next); - WORKLIST_INSERT(INDIR_HASH(freework->fw_list.wk_mp, - freework->fw_blkno), &freework->fw_list); - needj = 1; - } - } - if (parent) { - if (--parent->fw_ref != 0) - parent = NULL; - freeblks = NULL; - } else if (--freeblks->fb_ref != 0) - freeblks = NULL; - if (needj == 0) + if (freework->fw_state & DELAYEDFREE) + freeblks->fb_cgwait--; + freework->fw_state |= COMPLETE; + if ((freework->fw_state & ALLCOMPLETE) == ALLCOMPLETE) WORKITEM_FREE(freework, D_FREEWORK); - /* - * Don't delay these block frees or it takes an intolerable amount - * of time to process truncates and free their journal entries. - */ - if (freeblks) - add_to_worklist(&freeblks->fb_list, 1); - if (parent) - add_to_worklist(&parent->fw_list, 1); + if (parent) { + if (--parent->fw_ref == 0) + freework_enqueue(parent); + return; + } + if (--freeblks->fb_ref != 0) + return; + if ((freeblks->fb_state & (ALLCOMPLETE | ONWORKLIST | INPROGRESS)) == + ALLCOMPLETE && LIST_EMPTY(&freeblks->fb_jblkdephd)) + add_to_worklist(&freeblks->fb_list, WK_NODELAY); } /* @@ -6320,38 +7389,73 @@ handle_written_freework(freework) * to the number of blocks allocated for the file) are also * performed in this function. */ -static void +static int handle_workitem_freeblocks(freeblks, flags) struct freeblks *freeblks; int flags; { struct freework *freework; + struct newblk *newblk; + struct allocindir *aip; + struct ufsmount *ump; struct worklist *wk; - KASSERT(LIST_EMPTY(&freeblks->fb_jfreeblkhd), + KASSERT(LIST_EMPTY(&freeblks->fb_jblkdephd), ("handle_workitem_freeblocks: Journal entries not written.")); - if (LIST_EMPTY(&freeblks->fb_freeworkhd)) { - handle_complete_freeblocks(freeblks); - return; - } - freeblks->fb_ref++; + ump = VFSTOUFS(freeblks->fb_list.wk_mp); + ACQUIRE_LOCK(&lk); while ((wk = LIST_FIRST(&freeblks->fb_freeworkhd)) != NULL) { - KASSERT(wk->wk_type == D_FREEWORK, - ("handle_workitem_freeblocks: Unknown type %s", - TYPENAME(wk->wk_type))); - WORKLIST_REMOVE_UNLOCKED(wk); - freework = WK_FREEWORK(wk); - if (freework->fw_lbn <= -NDADDR) - handle_workitem_indirblk(freework); - else - freework_freeblock(freework); + WORKLIST_REMOVE(wk); + switch (wk->wk_type) { + case D_DIRREM: + wk->wk_state |= COMPLETE; + add_to_worklist(wk, 0); + continue; + + case D_ALLOCDIRECT: + free_newblk(WK_NEWBLK(wk)); + continue; + + case D_ALLOCINDIR: + aip = WK_ALLOCINDIR(wk); + freework = NULL; + if (aip->ai_state & DELAYEDFREE) { + FREE_LOCK(&lk); + freework = newfreework(ump, freeblks, NULL, + aip->ai_lbn, aip->ai_newblkno, + ump->um_fs->fs_frag, 0, 0); + ACQUIRE_LOCK(&lk); + } + newblk = WK_NEWBLK(wk); + if (newblk->nb_jnewblk) { + freework->fw_jnewblk = newblk->nb_jnewblk; + newblk->nb_jnewblk->jn_dep = &freework->fw_list; + newblk->nb_jnewblk = NULL; + } + free_newblk(newblk); + continue; + + case D_FREEWORK: + freework = WK_FREEWORK(wk); + if (freework->fw_lbn <= -NDADDR) + handle_workitem_indirblk(freework); + else + freework_freeblock(freework); + continue; + default: + panic("handle_workitem_freeblocks: Unknown type %s", + TYPENAME(wk->wk_type)); + } } - ACQUIRE_LOCK(&lk); - if (--freeblks->fb_ref != 0) + if (freeblks->fb_ref != 0) { + freeblks->fb_state &= ~INPROGRESS; + wake_worklist(&freeblks->fb_list); freeblks = NULL; + } FREE_LOCK(&lk); if (freeblks) - handle_complete_freeblocks(freeblks); + return handle_complete_freeblocks(freeblks, flags); + return (0); } /* @@ -6359,41 +7463,64 @@ handle_workitem_freeblocks(freeblks, flags) * freeblocks dependency and any journal work awaiting completion. This * can not be called until all other dependencies are stable on disk. */ -static void -handle_complete_freeblocks(freeblks) +static int +handle_complete_freeblocks(freeblks, flags) struct freeblks *freeblks; + int flags; { + struct inodedep *inodedep; struct inode *ip; struct vnode *vp; struct fs *fs; struct ufsmount *ump; - int flags; + ufs2_daddr_t spare; ump = VFSTOUFS(freeblks->fb_list.wk_mp); fs = ump->um_fs; - flags = LK_NOWAIT; + flags = LK_EXCLUSIVE | flags; + spare = freeblks->fb_freecnt - freeblks->fb_chkcnt; /* - * If we still have not finished background cleanup, then check - * to see if the block count needs to be adjusted. + * If we did not release the expected number of blocks we may have + * to adjust the inode block count here. Only do so if it wasn't + * a truncation to zero and the modrev still matches. */ - if (freeblks->fb_chkcnt != 0 && (fs->fs_flags & FS_UNCLEAN) != 0 && - ffs_vgetf(freeblks->fb_list.wk_mp, freeblks->fb_previousinum, - (flags & LK_NOWAIT) | LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ) == 0) { + if (spare && freeblks->fb_len != 0) { + if (ffs_vgetf(freeblks->fb_list.wk_mp, freeblks->fb_inum, + flags, &vp, FFSV_FORCEINSMQ) != 0) + return (EBUSY); ip = VTOI(vp); - DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + freeblks->fb_chkcnt); - ip->i_flag |= IN_CHANGE; + if (DIP(ip, i_modrev) == freeblks->fb_modrev) { + DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - spare); + ip->i_flag |= IN_CHANGE; + /* + * We must wait so this happens before the + * journal is reclaimed. + */ + ffs_update(vp, 1); + } vput(vp); } - - if (!(freeblks->fb_chkcnt == 0 || - ((fs->fs_flags & FS_UNCLEAN) != 0 && (flags & LK_NOWAIT) == 0))) - printf( - "handle_workitem_freeblocks: inode %ju block count %jd\n", - (uintmax_t)freeblks->fb_previousinum, - (intmax_t)freeblks->fb_chkcnt); - + if (freeblks->fb_chkcnt) { + UFS_LOCK(ump); + fs->fs_pendingblocks -= freeblks->fb_chkcnt; + UFS_UNLOCK(ump); + } +#ifdef QUOTA + /* Handle spare. */ + if (spare) + quotaadj(freeblks->fb_quota, ump, -spare); + quotarele(freeblks->fb_quota); +#endif ACQUIRE_LOCK(&lk); + if (freeblks->fb_state & ONDEPLIST) { + inodedep_lookup(freeblks->fb_list.wk_mp, freeblks->fb_inum, + 0, &inodedep); + TAILQ_REMOVE(&inodedep->id_freeblklst, freeblks, fb_next); + freeblks->fb_state &= ~ONDEPLIST; + if (TAILQ_EMPTY(&inodedep->id_freeblklst)) + free_inodedep(inodedep); + } /* * All of the freeblock deps must be complete prior to this call * so it's now safe to complete earlier outstanding journal entries. @@ -6401,13 +7528,19 @@ handle_complete_freeblocks(freeblks) handle_jwork(&freeblks->fb_jwork); WORKITEM_FREE(freeblks, D_FREEBLKS); FREE_LOCK(&lk); + return (0); } /* - * Release blocks associated with the inode ip and stored in the indirect + * Release blocks associated with the freeblks and stored in the indirect * block dbn. If level is greater than SINGLE, the block is an indirect block * and recursive calls to indirtrunc must be used to cleanse other indirect * blocks. + * + * This handles partial and complete truncation of blocks. Partial is noted + * with goingaway == 0. In this case the freework is completed after the + * zero'd indirects are written to disk. For full truncation the freework + * is completed after the block is freed. */ static void indir_trunc(freework, dbn, lbn) @@ -6417,165 +7550,113 @@ indir_trunc(freework, dbn, lbn) { struct freework *nfreework; struct workhead wkhd; - struct jnewblk *jnewblkn; - struct jnewblk *jnewblk; struct freeblks *freeblks; struct buf *bp; struct fs *fs; - struct worklist *wkn; - struct worklist *wk; struct indirdep *indirdep; struct ufsmount *ump; ufs1_daddr_t *bap1 = 0; ufs2_daddr_t nb, nnb, *bap2 = 0; - ufs_lbn_t lbnadd; + ufs_lbn_t lbnadd, nlbn; int i, nblocks, ufs1fmt; int fs_pendingblocks; + int goingaway; int freedeps; int needj; int level; int cnt; - LIST_INIT(&wkhd); - level = lbn_level(lbn); - if (level == -1) - panic("indir_trunc: Invalid lbn %jd\n", lbn); freeblks = freework->fw_freeblks; ump = VFSTOUFS(freeblks->fb_list.wk_mp); fs = ump->um_fs; - fs_pendingblocks = 0; - freedeps = 0; - needj = UFSTOVFS(ump)->mnt_kern_flag & MNTK_SUJ; - lbnadd = lbn_offset(fs, level); /* - * Get buffer of block pointers to be freed. This routine is not - * called until the zero'ed inode has been written, so it is safe - * to free blocks as they are encountered. Because the inode has - * been zero'ed, calls to bmap on these blocks will fail. So, we - * have to use the on-disk address and the block device for the - * filesystem to look them up. If the file was deleted before its - * indirect blocks were all written to disk, the routine that set - * us up (deallocate_dependencies) will have arranged to leave - * a complete copy of the indirect block in memory for our use. - * Otherwise we have to read the blocks in from the disk. - */ -#ifdef notyet - bp = getblk(freeblks->fb_devvp, dbn, (int)fs->fs_bsize, 0, 0, - GB_NOCREAT); -#else - bp = incore(&freeblks->fb_devvp->v_bufobj, dbn); -#endif + * Get buffer of block pointers to be freed. There are three cases: + * + * 1) Partial truncate caches the indirdep pointer in the freework + * which provides us a back copy to the save bp which holds the + * pointers we want to clear. When this completes the zero + * pointers are written to the real copy. + * 2) The indirect is being completely truncated, cancel_indirdep() + * eliminated the real copy and placed the indirdep on the saved + * copy. The indirdep and buf are discarded when this completes. + * 3) The indirect was not in memory, we read a copy off of the disk + * using the devvp and drop and invalidate the buffer when we're + * done. + */ + goingaway = 1; + indirdep = NULL; + if (freework->fw_indir != NULL) { + goingaway = 0; + indirdep = freework->fw_indir; + bp = indirdep->ir_savebp; + if (bp == NULL || bp->b_blkno != dbn) + panic("indir_trunc: Bad saved buf %p blkno %jd", + bp, (intmax_t)dbn); + } else if ((bp = incore(&freeblks->fb_devvp->v_bufobj, dbn)) != NULL) { + /* + * The lock prevents the buf dep list from changing and + * indirects on devvp should only ever have one dependency. + */ + indirdep = WK_INDIRDEP(LIST_FIRST(&bp->b_dep)); + if (indirdep == NULL || (indirdep->ir_state & GOINGAWAY) == 0) + panic("indir_trunc: Bad indirdep %p from buf %p", + indirdep, bp); + } else if (bread(freeblks->fb_devvp, dbn, (int)fs->fs_bsize, + NOCRED, &bp) != 0) { + brelse(bp); + return; + } ACQUIRE_LOCK(&lk); - if (bp != NULL && (wk = LIST_FIRST(&bp->b_dep)) != NULL) { - if (wk->wk_type != D_INDIRDEP || - (wk->wk_state & GOINGAWAY) == 0) - panic("indir_trunc: lost indirdep %p", wk); - indirdep = WK_INDIRDEP(wk); - LIST_SWAP(&wkhd, &indirdep->ir_jwork, worklist, wk_list); - LIST_FOREACH_SAFE(jnewblk, &indirdep->ir_jnewblkhd, - jn_indirdeps, jnewblkn) { + /* Protects against a race with complete_trunc_indir(). */ + freework->fw_state &= ~INPROGRESS; + /* + * If we have an indirdep we need to enforce the truncation order + * and discard it when it is complete. + */ + if (indirdep) { + if (freework != TAILQ_FIRST(&indirdep->ir_trunc) && + !TAILQ_EMPTY(&indirdep->ir_trunc)) { /* - * XXX This cancel may cause some lengthy delay - * before the record is reclaimed below. + * Add the complete truncate to the list on the + * indirdep to enforce in-order processing. */ - LIST_REMOVE(jnewblk, jn_indirdeps); - cancel_jnewblk(jnewblk, &wkhd); - } - - free_indirdep(indirdep); - if (!LIST_EMPTY(&bp->b_dep)) - panic("indir_trunc: dangling dep %p", - LIST_FIRST(&bp->b_dep)); - ump->um_numindirdeps -= 1; - FREE_LOCK(&lk); - } else { -#ifdef notyet - if (bp) - brelse(bp); -#endif - FREE_LOCK(&lk); - if (bread(freeblks->fb_devvp, dbn, (int)fs->fs_bsize, - NOCRED, &bp) != 0) { - brelse(bp); + if (freework->fw_indir == NULL) + TAILQ_INSERT_TAIL(&indirdep->ir_trunc, + freework, fw_next); + FREE_LOCK(&lk); return; } + /* + * If we're goingaway, free the indirdep. Otherwise it will + * linger until the write completes. + */ + if (goingaway) { + free_indirdep(indirdep); + ump->um_numindirdeps -= 1; + } } - /* - * Recursively free indirect blocks. - */ + FREE_LOCK(&lk); + /* Initialize pointers depending on block size. */ if (ump->um_fstype == UFS1) { - ufs1fmt = 1; bap1 = (ufs1_daddr_t *)bp->b_data; + nb = bap1[freework->fw_off]; + ufs1fmt = 1; } else { - ufs1fmt = 0; bap2 = (ufs2_daddr_t *)bp->b_data; + nb = bap2[freework->fw_off]; + ufs1fmt = 0; } - - /* - * Reclaim indirect blocks which never made it to disk. - */ - cnt = 0; - LIST_FOREACH_SAFE(wk, &wkhd, wk_list, wkn) { - if (wk->wk_type != D_JNEWBLK) - continue; - /* XXX Is the lock necessary here for more than an assert? */ - ACQUIRE_LOCK(&lk); - WORKLIST_REMOVE(wk); - FREE_LOCK(&lk); - jnewblk = WK_JNEWBLK(wk); - if (jnewblk->jn_lbn > 0) - i = (jnewblk->jn_lbn - -lbn) / lbnadd; - else - i = (-(jnewblk->jn_lbn + level - 1) - -(lbn + level)) / - lbnadd; - KASSERT(i >= 0 && i < NINDIR(fs), - ("indir_trunc: Index out of range %d parent %jd lbn %jd level %d", - i, lbn, jnewblk->jn_lbn, level)); - /* Clear the pointer so it isn't found below. */ - if (ufs1fmt) { - nb = bap1[i]; - bap1[i] = 0; - } else { - nb = bap2[i]; - bap2[i] = 0; - } - KASSERT(nb == jnewblk->jn_blkno, - ("indir_trunc: Block mismatch %jd != %jd", - nb, jnewblk->jn_blkno)); - if (level != 0) { - ufs_lbn_t nlbn; - - nlbn = (lbn + 1) - (i * lbnadd); - nfreework = newfreework(ump, freeblks, freework, - nlbn, nb, fs->fs_frag, 0); - nfreework->fw_jnewblk = jnewblk; - freedeps++; - indir_trunc(nfreework, fsbtodb(fs, nb), nlbn); - } else { - struct workhead freewk; - - LIST_INIT(&freewk); - ACQUIRE_LOCK(&lk); - WORKLIST_INSERT(&freewk, wk); - FREE_LOCK(&lk); - ffs_blkfree(ump, fs, freeblks->fb_devvp, - jnewblk->jn_blkno, fs->fs_bsize, - freeblks->fb_previousinum, &freewk); - } - cnt++; - } - ACQUIRE_LOCK(&lk); - /* Any remaining journal work can be completed with freeblks. */ - jwork_move(&freeblks->fb_jwork, &wkhd); - FREE_LOCK(&lk); + level = lbn_level(lbn); + needj = UFSTOVFS(ump)->mnt_kern_flag & MNTK_SUJ; + lbnadd = lbn_offset(fs, level); nblocks = btodb(fs->fs_bsize); - if (ufs1fmt) - nb = bap1[0]; - else - nb = bap2[0]; nfreework = freework; + freedeps = 0; + cnt = 0; /* - * Reclaim on disk blocks. + * Reclaim blocks. Traverses into nested indirect levels and + * arranges for the current level to be freed when subordinates + * are free when journaling. */ for (i = freework->fw_off; i < NINDIR(fs); i++, nb = nnb) { if (i != NINDIR(fs) - 1) { @@ -6589,12 +7670,10 @@ indir_trunc(freework, dbn, lbn) continue; cnt++; if (level != 0) { - ufs_lbn_t nlbn; - nlbn = (lbn + 1) - (i * lbnadd); if (needj != 0) { nfreework = newfreework(ump, freeblks, freework, - nlbn, nb, fs->fs_frag, 0); + nlbn, nb, fs->fs_frag, 0, 0); freedeps++; } indir_trunc(nfreework, fsbtodb(fs, nb), nlbn); @@ -6614,85 +7693,106 @@ indir_trunc(freework, dbn, lbn) freedeps++; } ffs_blkfree(ump, fs, freeblks->fb_devvp, nb, - fs->fs_bsize, freeblks->fb_previousinum, &wkhd); + fs->fs_bsize, freeblks->fb_inum, + freeblks->fb_vtype, &wkhd); } } + if (goingaway) { + bp->b_flags |= B_INVAL | B_NOCACHE; + brelse(bp); + } + fs_pendingblocks = 0; if (level == 0) fs_pendingblocks = (nblocks * cnt); /* - * If we're not journaling we can free the indirect now. Otherwise - * setup the ref counts and offset so this indirect can be completed - * when its children are free. + * If we are journaling set up the ref counts and offset so this + * indirect can be completed when its children are free. */ - if (needj == 0) { - fs_pendingblocks += nblocks; - dbn = dbtofsb(fs, dbn); - ffs_blkfree(ump, fs, freeblks->fb_devvp, dbn, fs->fs_bsize, - freeblks->fb_previousinum, NULL); - ACQUIRE_LOCK(&lk); - freeblks->fb_chkcnt -= fs_pendingblocks; - if (freework->fw_blkno == dbn) - handle_written_freework(freework); - FREE_LOCK(&lk); - freework = NULL; - } else { + if (needj) { ACQUIRE_LOCK(&lk); + freeblks->fb_freecnt += fs_pendingblocks; freework->fw_off = i; freework->fw_ref += freedeps; freework->fw_ref -= NINDIR(fs) + 1; - if (freework->fw_ref != 0) - freework = NULL; - freeblks->fb_chkcnt -= fs_pendingblocks; + if (level == 0) + freeblks->fb_cgwait += freedeps; + if (freework->fw_ref == 0) + freework_freeblock(freework); FREE_LOCK(&lk); + return; } - if (fs_pendingblocks) { - UFS_LOCK(ump); - fs->fs_pendingblocks -= fs_pendingblocks; - UFS_UNLOCK(ump); + /* + * If we're not journaling we can free the indirect now. + */ + fs_pendingblocks += nblocks; + dbn = dbtofsb(fs, dbn); + ffs_blkfree(ump, fs, freeblks->fb_devvp, dbn, fs->fs_bsize, + freeblks->fb_inum, freeblks->fb_vtype, NULL); + /* Non SUJ softdep does single-threaded truncations. */ + freeblks->fb_freecnt += fs_pendingblocks; + if (freework->fw_blkno == dbn) { + freework->fw_state |= ALLCOMPLETE; + ACQUIRE_LOCK(&lk); + handle_written_freework(freework); + FREE_LOCK(&lk); } - bp->b_flags |= B_INVAL | B_NOCACHE; - brelse(bp); - if (freework) - handle_workitem_indirblk(freework); return; } /* - * Cancel an allocindir when it is removed via truncation. + * Cancel an allocindir when it is removed via truncation. When bp is not + * NULL the indirect never appeared on disk and is scheduled to be freed + * independently of the indir so we can more easily track journal work. */ static void -cancel_allocindir(aip, inodedep, freeblks) +cancel_allocindir(aip, bp, freeblks, trunc) struct allocindir *aip; - struct inodedep *inodedep; + struct buf *bp; struct freeblks *freeblks; + int trunc; { - struct jnewblk *jnewblk; + struct indirdep *indirdep; + struct freefrag *freefrag; struct newblk *newblk; + newblk = (struct newblk *)aip; + LIST_REMOVE(aip, ai_next); + /* + * We must eliminate the pointer in bp if it must be freed on its + * own due to partial truncate or pending journal work. + */ + if (bp && (trunc || newblk->nb_jnewblk)) { + /* + * Clear the pointer and mark the aip to be freed + * directly if it never existed on disk. + */ + aip->ai_state |= DELAYEDFREE; + indirdep = aip->ai_indirdep; + if (indirdep->ir_state & UFS1FMT) + ((ufs1_daddr_t *)bp->b_data)[aip->ai_offset] = 0; + else + ((ufs2_daddr_t *)bp->b_data)[aip->ai_offset] = 0; + } + /* + * When truncating the previous pointer will be freed via + * savedbp. Eliminate the freefrag which would dup free. + */ + if (trunc && (freefrag = newblk->nb_freefrag) != NULL) { + newblk->nb_freefrag = NULL; + if (freefrag->ff_jdep) + cancel_jfreefrag( + WK_JFREEFRAG(freefrag->ff_jdep)); + jwork_move(&freeblks->fb_jwork, &freefrag->ff_jwork); + WORKITEM_FREE(freefrag, D_FREEFRAG); + } /* * If the journal hasn't been written the jnewblk must be passed * to the call to ffs_blkfree that reclaims the space. We accomplish - * this by linking the journal dependency into the indirdep to be - * freed when indir_trunc() is called. If the journal has already - * been written we can simply reclaim the journal space when the - * freeblks work is complete. + * this by leaving the journal dependency on the newblk to be freed + * when a freework is created in handle_workitem_freeblocks(). */ - LIST_REMOVE(aip, ai_next); - newblk = (struct newblk *)aip; - if (newblk->nb_jnewblk == NULL) { - if (cancel_newblk(newblk, NULL, &freeblks->fb_jwork)) - panic("cancel_allocindir: Unexpected dependency."); - } else { - jnewblk = cancel_newblk(newblk, &aip->ai_indirdep->ir_list, - &aip->ai_indirdep->ir_jwork); - if (jnewblk) - LIST_INSERT_HEAD(&aip->ai_indirdep->ir_jnewblkhd, - jnewblk, jn_indirdeps); - } - if (inodedep && inodedep->id_state & DEPCOMPLETE) - WORKLIST_INSERT(&inodedep->id_bufwait, &newblk->nb_list); - else - free_newblk(newblk); + cancel_newblk(newblk, NULL, &freeblks->fb_jwork); + WORKLIST_INSERT(&freeblks->fb_freeworkhd, &newblk->nb_list); } /* @@ -6750,13 +7850,15 @@ setup_newdir(dap, newinum, dinum, newdirbp, mkdirp) * any subsequent additions are not marked live until the * block is reachable via the inode. */ - if (pagedep_lookup(mp, newinum, 0, 0, &pagedep) == 0) + if (pagedep_lookup(mp, newdirbp, newinum, 0, 0, &pagedep) == 0) panic("setup_newdir: lost pagedep"); LIST_FOREACH(wk, &newdirbp->b_dep, wk_list) if (wk->wk_type == D_ALLOCDIRECT) break; if (wk == NULL) panic("setup_newdir: lost allocdirect"); + if (pagedep->pd_state & NEWBLOCK) + panic("setup_newdir: NEWBLOCK already set"); newblk = WK_NEWBLK(wk); pagedep->pd_state |= NEWBLOCK; pagedep->pd_newdirblk = newdirblk; @@ -6788,7 +7890,7 @@ setup_newdir(dap, newinum, dinum, newdirbp, mkdirp) WORKITEM_FREE(mkdir2, D_MKDIR); } else { LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs); - WORKLIST_INSERT(&inodedep->id_bufwait,&mkdir2->md_list); + WORKLIST_INSERT(&inodedep->id_bufwait, &mkdir2->md_list); } *mkdirp = mkdir2; @@ -6885,8 +7987,7 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk) /* * Link into parent directory pagedep to await its being written. */ - if (pagedep_lookup(mp, dp->i_number, lbn, DEPALLOC, &pagedep) == 0) - WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list); + pagedep_lookup(mp, bp, dp->i_number, lbn, DEPALLOC, &pagedep); #ifdef DEBUG if (diradd_lookup(pagedep, offset) != NULL) panic("softdep_setup_directory_add: %p already at off %d\n", @@ -7027,11 +8128,8 @@ softdep_change_directoryentry_offset(bp, dp, base, oldloc, newloc, entrysize) oldoffset = offset + (oldloc - base); newoffset = offset + (newloc - base); ACQUIRE_LOCK(&lk); - if (pagedep_lookup(mp, dp->i_number, lbn, flags, &pagedep) == 0) { - if (pagedep) - WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list); + if (pagedep_lookup(mp, bp, dp->i_number, lbn, flags, &pagedep) == 0) goto done; - } dap = diradd_lookup(pagedep, oldoffset); if (dap) { dap->da_offset = newoffset; @@ -7327,7 +8425,7 @@ softdep_setup_remove(bp, dp, ip, isrmdir) direct = LIST_EMPTY(&dirrem->dm_jremrefhd); FREE_LOCK(&lk); if (direct) - handle_workitem_remove(dirrem, NULL); + handle_workitem_remove(dirrem, 0); } } @@ -7367,7 +8465,7 @@ cancel_diradd_dotdot(ip, dirrem, jremref) struct diradd *dap; struct worklist *wk; - if (pagedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, 0, 0, + if (pagedep_lookup(UFSTOVFS(ip->i_ump), NULL, ip->i_number, 0, 0, &pagedep) == 0) return (jremref); dap = diradd_lookup(pagedep, DOTDOT_OFFSET); @@ -7536,10 +8634,10 @@ newdirrem(bp, dp, ip, isrmdir, prevdirremp) ACQUIRE_LOCK(&lk); lbn = lblkno(dp->i_fs, dp->i_offset); offset = blkoff(dp->i_fs, dp->i_offset); - if (pagedep_lookup(UFSTOVFS(dp->i_ump), dp->i_number, lbn, DEPALLOC, - &pagedep) == 0) - WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list); + pagedep_lookup(UFSTOVFS(dp->i_ump), bp, dp->i_number, lbn, DEPALLOC, + &pagedep); dirrem->dm_pagedep = pagedep; + dirrem->dm_offset = offset; /* * If we're renaming a .. link to a new directory, cancel any * existing MKDIR_PARENT mkdir. If it has already been canceled @@ -8092,10 +9190,10 @@ clear_unlinked_inodedep(inodedep) * This workitem decrements the inode's link count. * If the link count reaches zero, the file is removed. */ -static void -handle_workitem_remove(dirrem, xp) +static int +handle_workitem_remove(dirrem, flags) struct dirrem *dirrem; - struct vnode *xp; + int flags; { struct inodedep *inodedep; struct workhead dotdotwk; @@ -8105,7 +9203,6 @@ handle_workitem_remove(dirrem, xp) struct vnode *vp; struct inode *ip; ino_t oldinum; - int error; if (dirrem->dm_state & ONWORKLIST) panic("handle_workitem_remove: dirrem %p still on worklist", @@ -8113,12 +9210,9 @@ handle_workitem_remove(dirrem, xp) oldinum = dirrem->dm_oldinum; mp = dirrem->dm_list.wk_mp; ump = VFSTOUFS(mp); - if ((vp = xp) == NULL && - (error = ffs_vgetf(mp, oldinum, LK_EXCLUSIVE, &vp, - FFSV_FORCEINSMQ)) != 0) { - softdep_error("handle_workitem_remove: vget", error); - return; - } + flags |= LK_EXCLUSIVE; + if (ffs_vgetf(mp, oldinum, flags, &vp, FFSV_FORCEINSMQ) != 0) + return (EBUSY); ip = VTOI(vp); ACQUIRE_LOCK(&lk); if ((inodedep_lookup(mp, oldinum, 0, &inodedep)) == 0) @@ -8209,22 +9303,17 @@ handle_workitem_remove(dirrem, xp) if (inodedep == NULL || (inodedep->id_state & (DEPCOMPLETE | UNLINKED)) == UNLINKED || check_inode_unwritten(inodedep)) { - if (xp != NULL) - add_to_worklist(&dirrem->dm_list, 0); FREE_LOCK(&lk); - if (xp == NULL) { - vput(vp); - handle_workitem_remove(dirrem, NULL); - } - return; + vput(vp); + return handle_workitem_remove(dirrem, flags); } WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list); FREE_LOCK(&lk); ip->i_flag |= IN_CHANGE; out: ffs_update(vp, 0); - if (xp == NULL) - vput(vp); + vput(vp); + return (0); } /* @@ -8318,7 +9407,7 @@ softdep_disk_io_initiation(bp) struct worklist marker; struct inodedep *inodedep; struct freeblks *freeblks; - struct jfreeblk *jfreeblk; + struct jblkdep *jblkdep; struct newblk *newblk; /* @@ -8370,19 +9459,18 @@ softdep_disk_io_initiation(bp) case D_FREEBLKS: freeblks = WK_FREEBLKS(wk); - jfreeblk = LIST_FIRST(&freeblks->fb_jfreeblkhd); + jblkdep = LIST_FIRST(&freeblks->fb_jblkdephd); /* - * We have to wait for the jfreeblks to be journaled + * We have to wait for the freeblks to be journaled * before we can write an inodeblock with updated * pointers. Be careful to arrange the marker so - * we revisit the jfreeblk if it's not removed by + * we revisit the freeblks if it's not removed by * the first jwait(). */ - if (jfreeblk != NULL) { + if (jblkdep != NULL) { LIST_REMOVE(&marker, wk_list); LIST_INSERT_BEFORE(wk, &marker, wk_list); - stat_jwait_freeblks++; - jwait(&jfreeblk->jf_list); + jwait(&jblkdep->jb_list, MNT_WAIT); } continue; case D_ALLOCDIRECT: @@ -8396,12 +9484,11 @@ softdep_disk_io_initiation(bp) */ newblk = WK_NEWBLK(wk); if (newblk->nb_jnewblk != NULL && - indirblk_inseg(newblk->nb_list.wk_mp, + indirblk_lookup(newblk->nb_list.wk_mp, newblk->nb_newblkno)) { LIST_REMOVE(&marker, wk_list); LIST_INSERT_BEFORE(wk, &marker, wk_list); - stat_jwait_newblk++; - jwait(&newblk->nb_jnewblk->jn_list); + jwait(&newblk->nb_jnewblk->jn_list, MNT_WAIT); } continue; @@ -8461,14 +9548,10 @@ initiate_write_filepage(pagedep, bp) * locked so the dependency can not go away. */ LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next) - while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL) { - stat_jwait_filepage++; - jwait(&jremref->jr_list); - } - while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd)) != NULL) { - stat_jwait_filepage++; - jwait(&jmvref->jm_list); - } + while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL) + jwait(&jremref->jr_list, MNT_WAIT); + while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd)) != NULL) + jwait(&jmvref->jm_list, MNT_WAIT); for (i = 0; i < DAHASHSZ; i++) { LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) { ep = (struct direct *) @@ -8811,6 +9894,8 @@ initiate_write_inodeblock_ufs2(inodedep, bp) #ifdef INVARIANTS if (deplist != 0 && prevlbn >= adp->ad_offset) panic("softdep_write_inodeblock: lbn order"); + if ((adp->ad_state & ATTACHED) == 0) + panic("inodedep %p and adp %p not attached", inodedep, adp); prevlbn = adp->ad_offset; if (adp->ad_offset < NDADDR && dp->di_db[adp->ad_offset] != adp->ad_newblkno) @@ -8900,10 +9985,9 @@ initiate_write_inodeblock_ufs2(inodedep, bp) * list. */ static void -cancel_indirdep(indirdep, bp, inodedep, freeblks) +cancel_indirdep(indirdep, bp, freeblks) struct indirdep *indirdep; struct buf *bp; - struct inodedep *inodedep; struct freeblks *freeblks; { struct allocindir *aip; @@ -8924,24 +10008,38 @@ cancel_indirdep(indirdep, bp, inodedep, freeblks) */ if (indirdep->ir_state & GOINGAWAY) panic("cancel_indirdep: already gone"); - if (indirdep->ir_state & ONDEPLIST) { - indirdep->ir_state &= ~ONDEPLIST; + if ((indirdep->ir_state & DEPCOMPLETE) == 0) { + indirdep->ir_state |= DEPCOMPLETE; LIST_REMOVE(indirdep, ir_next); } indirdep->ir_state |= GOINGAWAY; VFSTOUFS(indirdep->ir_list.wk_mp)->um_numindirdeps += 1; + /* + * Pass in bp for blocks still have journal writes + * pending so we can cancel them on their own. + */ while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)) != 0) - cancel_allocindir(aip, inodedep, freeblks); + cancel_allocindir(aip, bp, freeblks, 0); while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) - cancel_allocindir(aip, inodedep, freeblks); + cancel_allocindir(aip, NULL, freeblks, 0); while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != 0) - cancel_allocindir(aip, inodedep, freeblks); + cancel_allocindir(aip, NULL, freeblks, 0); while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != 0) - cancel_allocindir(aip, inodedep, freeblks); - bcopy(bp->b_data, indirdep->ir_savebp->b_data, bp->b_bcount); + cancel_allocindir(aip, NULL, freeblks, 0); + /* + * If there are pending partial truncations we need to keep the + * old block copy around until they complete. This is because + * the current b_data is not a perfect superset of the available + * blocks. + */ + if (TAILQ_EMPTY(&indirdep->ir_trunc)) + bcopy(bp->b_data, indirdep->ir_savebp->b_data, bp->b_bcount); + else + bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount); WORKLIST_REMOVE(&indirdep->ir_list); WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, &indirdep->ir_list); - indirdep->ir_savebp = NULL; + indirdep->ir_bp = NULL; + indirdep->ir_freeblks = freeblks; } /* @@ -8952,10 +10050,8 @@ free_indirdep(indirdep) struct indirdep *indirdep; { - KASSERT(LIST_EMPTY(&indirdep->ir_jwork), - ("free_indirdep: Journal work not empty.")); - KASSERT(LIST_EMPTY(&indirdep->ir_jnewblkhd), - ("free_indirdep: Journal new block list not empty.")); + KASSERT(TAILQ_EMPTY(&indirdep->ir_trunc), + ("free_indirdep: Indir trunc list not empty.")); KASSERT(LIST_EMPTY(&indirdep->ir_completehd), ("free_indirdep: Complete head not empty.")); KASSERT(LIST_EMPTY(&indirdep->ir_writehd), @@ -8964,10 +10060,10 @@ free_indirdep(indirdep) ("free_indirdep: done head not empty.")); KASSERT(LIST_EMPTY(&indirdep->ir_deplisthd), ("free_indirdep: deplist head not empty.")); - KASSERT(indirdep->ir_savebp == NULL, - ("free_indirdep: %p ir_savebp != NULL", indirdep)); - KASSERT((indirdep->ir_state & ONDEPLIST) == 0, - ("free_indirdep: %p still on deplist.", indirdep)); + KASSERT((indirdep->ir_state & DEPCOMPLETE), + ("free_indirdep: %p still on newblk list.", indirdep)); + KASSERT(indirdep->ir_saveddata == NULL, + ("free_indirdep: %p still has saved data.", indirdep)); if (indirdep->ir_state & ONWORKLIST) WORKLIST_REMOVE(&indirdep->ir_list); WORKITEM_FREE(indirdep, D_INDIRDEP); @@ -8984,22 +10080,25 @@ initiate_write_indirdep(indirdep, bp) struct buf *bp; { + indirdep->ir_state |= IOSTARTED; if (indirdep->ir_state & GOINGAWAY) panic("disk_io_initiation: indirdep gone"); - /* * If there are no remaining dependencies, this will be writing * the real pointers. */ - if (LIST_EMPTY(&indirdep->ir_deplisthd)) + if (LIST_EMPTY(&indirdep->ir_deplisthd) && + TAILQ_EMPTY(&indirdep->ir_trunc)) return; /* * Replace up-to-date version with safe version. */ - FREE_LOCK(&lk); - indirdep->ir_saveddata = malloc(bp->b_bcount, M_INDIRDEP, - M_SOFTDEP_FLAGS); - ACQUIRE_LOCK(&lk); + if (indirdep->ir_saveddata == NULL) { + FREE_LOCK(&lk); + indirdep->ir_saveddata = malloc(bp->b_bcount, M_INDIRDEP, + M_SOFTDEP_FLAGS); + ACQUIRE_LOCK(&lk); + } indirdep->ir_state &= ~ATTACHED; indirdep->ir_state |= UNDONE; bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount); @@ -9066,11 +10165,11 @@ softdep_setup_blkfree(mp, bp, blkno, frags, wkhd) int frags; struct workhead *wkhd; { - struct jnewblk *jnewblk; - struct worklist *wk, *wkn; -#ifdef SUJ_DEBUG struct bmsafemap *bmsafemap; + struct jnewblk *jnewblk; + struct worklist *wk; struct fs *fs; +#ifdef SUJ_DEBUG uint8_t *blksfree; struct cg *cgp; ufs2_daddr_t jstart; @@ -9081,25 +10180,29 @@ softdep_setup_blkfree(mp, bp, blkno, frags, wkhd) #endif ACQUIRE_LOCK(&lk); + /* Lookup the bmsafemap so we track when it is dirty. */ + fs = VFSTOUFS(mp)->um_fs; + bmsafemap = bmsafemap_lookup(mp, bp, dtog(fs, blkno)); /* * Detach any jnewblks which have been canceled. They must linger * until the bitmap is cleared again by ffs_blkfree() to prevent * an unjournaled allocation from hitting the disk. */ if (wkhd) { - LIST_FOREACH_SAFE(wk, wkhd, wk_list, wkn) { - if (wk->wk_type != D_JNEWBLK) + while ((wk = LIST_FIRST(wkhd)) != NULL) { + WORKLIST_REMOVE(wk); + if (wk->wk_type != D_JNEWBLK) { + WORKLIST_INSERT(&bmsafemap->sm_freehd, wk); continue; + } jnewblk = WK_JNEWBLK(wk); KASSERT(jnewblk->jn_state & GOINGAWAY, ("softdep_setup_blkfree: jnewblk not canceled.")); - WORKLIST_REMOVE(wk); #ifdef SUJ_DEBUG /* * Assert that this block is free in the bitmap * before we discard the jnewblk. */ - fs = VFSTOUFS(mp)->um_fs; cgp = (struct cg *)bp->b_data; blksfree = cg_blksfree(cgp); bno = dtogd(fs, jnewblk->jn_blkno); @@ -9117,12 +10220,6 @@ softdep_setup_blkfree(mp, bp, blkno, frags, wkhd) wk->wk_state |= COMPLETE | ATTACHED; free_jnewblk(jnewblk); } - /* - * The buf must be locked by the caller otherwise these could - * be added while it's being written and the write would - * complete them before they made it to disk. - */ - jwork_move(&bp->b_dep, wkhd); } #ifdef SUJ_DEBUG @@ -9158,6 +10255,70 @@ softdep_setup_blkfree(mp, bp, blkno, frags, wkhd) FREE_LOCK(&lk); } +/* + * Revert a block allocation when the journal record that describes it + * is not yet written. + */ +int +jnewblk_rollback(jnewblk, fs, cgp, blksfree) + struct jnewblk *jnewblk; + struct fs *fs; + struct cg *cgp; + uint8_t *blksfree; +{ + ufs1_daddr_t fragno; + long cgbno, bbase; + int frags, blk; + int i; + + frags = 0; + cgbno = dtogd(fs, jnewblk->jn_blkno); + /* + * We have to test which frags need to be rolled back. We may + * be operating on a stale copy when doing background writes. + */ + for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; i++) + if (isclr(blksfree, cgbno + i)) + frags++; + if (frags == 0) + return (0); + /* + * This is mostly ffs_blkfree() sans some validation and + * superblock updates. + */ + if (frags == fs->fs_frag) { + fragno = fragstoblks(fs, cgbno); + ffs_setblock(fs, blksfree, fragno); + ffs_clusteracct(fs, cgp, fragno, 1); + cgp->cg_cs.cs_nbfree++; + } else { + cgbno += jnewblk->jn_oldfrags; + bbase = cgbno - fragnum(fs, cgbno); + /* Decrement the old frags. */ + blk = blkmap(fs, blksfree, bbase); + ffs_fragacct(fs, blk, cgp->cg_frsum, -1); + /* Deallocate the fragment */ + for (i = 0; i < frags; i++) + setbit(blksfree, cgbno + i); + cgp->cg_cs.cs_nffree += frags; + /* Add back in counts associated with the new frags */ + blk = blkmap(fs, blksfree, bbase); + ffs_fragacct(fs, blk, cgp->cg_frsum, 1); + /* If a complete block has been reassembled, account for it. */ + fragno = fragstoblks(fs, bbase); + if (ffs_isblock(fs, blksfree, fragno)) { + cgp->cg_cs.cs_nffree -= fs->fs_frag; + ffs_clusteracct(fs, cgp, fragno, 1); + cgp->cg_cs.cs_nbfree++; + } + } + stat_jnewblk++; + jnewblk->jn_state &= ~ATTACHED; + jnewblk->jn_state |= UNDONE; + + return (frags); +} + static void initiate_write_bmsafemap(bmsafemap, bp) struct bmsafemap *bmsafemap; @@ -9169,10 +10330,7 @@ initiate_write_bmsafemap(bmsafemap, bp) uint8_t *blksfree; struct cg *cgp; struct fs *fs; - int cleared; ino_t ino; - long bno; - int i; if (bmsafemap->sm_state & IOSTARTED) panic("initiate_write_bmsafemap: Already started\n"); @@ -9211,25 +10369,9 @@ initiate_write_bmsafemap(bmsafemap, bp) fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs; blksfree = cg_blksfree(cgp); LIST_FOREACH(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps) { - bno = dtogd(fs, jnewblk->jn_blkno); - cleared = 0; - for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; - i++) { - if (isclr(blksfree, bno + i)) { - cleared = 1; - setbit(blksfree, bno + i); - } - } - /* - * We may not clear the block if it's a background - * copy. In that case there is no reason to detach - * it. - */ - if (cleared) { - stat_jnewblk++; - jnewblk->jn_state &= ~ATTACHED; - jnewblk->jn_state |= UNDONE; - } else if ((bp->b_xflags & BX_BKGRDMARKER) == 0) + if (jnewblk_rollback(jnewblk, fs, cgp, blksfree)) + continue; + if ((bp->b_xflags & BX_BKGRDMARKER) == 0) panic("initiate_write_bmsafemap: block %jd " "marked free", jnewblk->jn_blkno); } @@ -9242,6 +10384,8 @@ initiate_write_bmsafemap(bmsafemap, bp) inodedep, id_deps); LIST_SWAP(&bmsafemap->sm_newblkhd, &bmsafemap->sm_newblkwr, newblk, nb_deps); + LIST_SWAP(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr, worklist, + wk_list); } /* @@ -9260,6 +10404,7 @@ softdep_disk_write_complete(bp) struct worklist *wk; struct worklist *owk; struct workhead reattach; + struct freeblks *freeblks; struct buf *sbp; /* @@ -9277,6 +10422,7 @@ softdep_disk_write_complete(bp) ACQUIRE_LOCK(&lk); while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) { WORKLIST_REMOVE(wk); + dep_write[wk->wk_type]++; if (wk == owk) panic("duplicate worklist: %p\n", wk); owk = wk; @@ -9318,18 +10464,16 @@ softdep_disk_write_complete(bp) case D_FREEBLKS: wk->wk_state |= COMPLETE; - if ((wk->wk_state & ALLCOMPLETE) == ALLCOMPLETE) - add_to_worklist(wk, 1); + freeblks = WK_FREEBLKS(wk); + if ((wk->wk_state & ALLCOMPLETE) == ALLCOMPLETE && + LIST_EMPTY(&freeblks->fb_jblkdephd)) + add_to_worklist(wk, WK_NODELAY); continue; case D_FREEWORK: handle_written_freework(WK_FREEWORK(wk)); break; - case D_FREEDEP: - free_freedep(WK_FREEDEP(wk)); - continue; - case D_JSEGDEP: free_jsegdep(WK_JSEGDEP(wk)); continue; @@ -9343,6 +10487,10 @@ softdep_disk_write_complete(bp) WORKLIST_INSERT(&reattach, wk); continue; + case D_FREEDEP: + free_freedep(WK_FREEDEP(wk)); + continue; + default: panic("handle_disk_write_complete: Unknown type %s", TYPENAME(wk->wk_type)); @@ -9459,7 +10607,11 @@ handle_allocindir_partdone(aip) return; indirdep = aip->ai_indirdep; LIST_REMOVE(aip, ai_next); - if (indirdep->ir_state & UNDONE) { + /* + * Don't set a pointer while the buffer is undergoing IO or while + * we have active truncations. + */ + if (indirdep->ir_state & UNDONE || !TAILQ_EMPTY(&indirdep->ir_trunc)) { LIST_INSERT_HEAD(&indirdep->ir_donehd, aip, ai_next); return; } @@ -9490,6 +10642,15 @@ handle_jwork(wkhd) case D_JSEGDEP: free_jsegdep(WK_JSEGDEP(wk)); continue; + case D_FREEDEP: + free_freedep(WK_FREEDEP(wk)); + continue; + case D_FREEFRAG: + rele_jseg(WK_JSEG(WK_FREEFRAG(wk)->ff_jdep)); + WORKITEM_FREE(wk, D_FREEFRAG); + case D_FREEWORK: + handle_written_freework(WK_FREEWORK(wk)); + continue; default: panic("handle_jwork: Unknown type %s\n", TYPENAME(wk->wk_type)); @@ -9852,21 +11013,26 @@ handle_written_indirdep(indirdep, bp, bpp) struct buf **bpp; { struct allocindir *aip; + struct buf *sbp; int chgs; if (indirdep->ir_state & GOINGAWAY) - panic("disk_write_complete: indirdep gone"); + panic("handle_written_indirdep: indirdep gone"); + if ((indirdep->ir_state & IOSTARTED) == 0) + panic("handle_written_indirdep: IO not started"); chgs = 0; /* * If there were rollbacks revert them here. */ if (indirdep->ir_saveddata) { bcopy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount); - free(indirdep->ir_saveddata, M_INDIRDEP); - indirdep->ir_saveddata = 0; + if (TAILQ_EMPTY(&indirdep->ir_trunc)) { + free(indirdep->ir_saveddata, M_INDIRDEP); + indirdep->ir_saveddata = NULL; + } chgs = 1; } - indirdep->ir_state &= ~UNDONE; + indirdep->ir_state &= ~(UNDONE | IOSTARTED); indirdep->ir_state |= ATTACHED; /* * Move allocindirs with written pointers to the completehd if @@ -9878,6 +11044,7 @@ handle_written_indirdep(indirdep, bp, bpp) if ((indirdep->ir_state & DEPCOMPLETE) == 0) { LIST_INSERT_HEAD(&indirdep->ir_completehd, aip, ai_next); + newblk_freefrag(&aip->ai_block); continue; } free_newblk(&aip->ai_block); @@ -9886,50 +11053,42 @@ handle_written_indirdep(indirdep, bp, bpp) * Move allocindirs that have finished dependency processing from * the done list to the write list after updating the pointers. */ - while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) { - handle_allocindir_partdone(aip); - if (aip == LIST_FIRST(&indirdep->ir_donehd)) - panic("disk_write_complete: not gone"); - chgs = 1; + if (TAILQ_EMPTY(&indirdep->ir_trunc)) { + while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) { + handle_allocindir_partdone(aip); + if (aip == LIST_FIRST(&indirdep->ir_donehd)) + panic("disk_write_complete: not gone"); + chgs = 1; + } } /* - * If this indirdep has been detached from its newblk during - * I/O we need to keep this dep attached to the buffer so - * deallocate_dependencies can find it and properly resolve - * any outstanding dependencies. + * Preserve the indirdep if there were any changes or if it is not + * yet valid on disk. */ - if ((indirdep->ir_state & (ONDEPLIST | DEPCOMPLETE)) == 0) - chgs = 1; - if ((bp->b_flags & B_DELWRI) == 0) + if (chgs) { stat_indir_blk_ptrs++; + bdirty(bp); + return (1); + } /* * If there were no changes we can discard the savedbp and detach * ourselves from the buf. We are only carrying completed pointers * in this case. */ - if (chgs == 0) { - struct buf *sbp; - - sbp = indirdep->ir_savebp; - sbp->b_flags |= B_INVAL | B_NOCACHE; - indirdep->ir_savebp = NULL; - if (*bpp != NULL) - panic("handle_written_indirdep: bp already exists."); - *bpp = sbp; - } else - bdirty(bp); + sbp = indirdep->ir_savebp; + sbp->b_flags |= B_INVAL | B_NOCACHE; + indirdep->ir_savebp = NULL; + indirdep->ir_bp = NULL; + if (*bpp != NULL) + panic("handle_written_indirdep: bp already exists."); + *bpp = sbp; /* - * If there are no fresh dependencies and none waiting on writes - * we can free the indirdep. + * The indirdep may not be freed until its parent points at it. */ - if ((indirdep->ir_state & DEPCOMPLETE) && chgs == 0) { - if (indirdep->ir_state & ONDEPLIST) - LIST_REMOVE(indirdep, ir_next); + if (indirdep->ir_state & DEPCOMPLETE) free_indirdep(indirdep); - return (0); - } - return (chgs); + return (0); } /* @@ -9961,6 +11120,58 @@ bmsafemap_rollbacks(bmsafemap) } /* + * Re-apply an allocation when a cg write is complete. + */ +static int +jnewblk_rollforward(jnewblk, fs, cgp, blksfree) + struct jnewblk *jnewblk; + struct fs *fs; + struct cg *cgp; + uint8_t *blksfree; +{ + ufs1_daddr_t fragno; + ufs2_daddr_t blkno; + long cgbno, bbase; + int frags, blk; + int i; + + frags = 0; + cgbno = dtogd(fs, jnewblk->jn_blkno); + for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; i++) { + if (isclr(blksfree, cgbno + i)) + panic("jnewblk_rollforward: re-allocated fragment"); + frags++; + } + if (frags == fs->fs_frag) { + blkno = fragstoblks(fs, cgbno); + ffs_clrblock(fs, blksfree, (long)blkno); + ffs_clusteracct(fs, cgp, blkno, -1); + cgp->cg_cs.cs_nbfree--; + } else { + bbase = cgbno - fragnum(fs, cgbno); + cgbno += jnewblk->jn_oldfrags; + /* If a complete block had been reassembled, account for it. */ + fragno = fragstoblks(fs, bbase); + if (ffs_isblock(fs, blksfree, fragno)) { + cgp->cg_cs.cs_nffree += fs->fs_frag; + ffs_clusteracct(fs, cgp, fragno, -1); + cgp->cg_cs.cs_nbfree--; + } + /* Decrement the old frags. */ + blk = blkmap(fs, blksfree, bbase); + ffs_fragacct(fs, blk, cgp->cg_frsum, -1); + /* Allocate the fragment */ + for (i = 0; i < frags; i++) + clrbit(blksfree, cgbno + i); + cgp->cg_cs.cs_nffree -= frags; + /* Add back in counts associated with the new frags */ + blk = blkmap(fs, blksfree, bbase); + ffs_fragacct(fs, blk, cgp->cg_frsum, 1); + } + return (frags); +} + +/* * Complete a write to a bmsafemap structure. Roll forward any bitmap * changes if it's not a background write. Set all written dependencies * to DEPCOMPLETE and free the structure if possible. @@ -9974,20 +11185,25 @@ handle_written_bmsafemap(bmsafemap, bp) struct inodedep *inodedep; struct jaddref *jaddref, *jatmp; struct jnewblk *jnewblk, *jntmp; + struct ufsmount *ump; uint8_t *inosused; uint8_t *blksfree; struct cg *cgp; struct fs *fs; ino_t ino; - long bno; int chgs; - int i; if ((bmsafemap->sm_state & IOSTARTED) == 0) panic("initiate_write_bmsafemap: Not started\n"); + ump = VFSTOUFS(bmsafemap->sm_list.wk_mp); chgs = 0; bmsafemap->sm_state &= ~IOSTARTED; /* + * Release journal work that was waiting on the write. + */ + handle_jwork(&bmsafemap->sm_freewr); + + /* * Restore unwritten inode allocation pending jaddref writes. */ if (!LIST_EMPTY(&bmsafemap->sm_jaddrefhd)) { @@ -10025,18 +11241,9 @@ handle_written_bmsafemap(bmsafemap, bp) jntmp) { if ((jnewblk->jn_state & UNDONE) == 0) continue; - bno = dtogd(fs, jnewblk->jn_blkno); - for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; - i++) { - if (bp->b_xflags & BX_BKGRDMARKER) - break; - if ((jnewblk->jn_state & NEWBLOCK) == 0 && - isclr(blksfree, bno + i)) - panic("handle_written_bmsafemap: " - "re-allocated fragment"); - clrbit(blksfree, bno + i); + if ((bp->b_xflags & BX_BKGRDMARKER) == 0 && + jnewblk_rollforward(jnewblk, fs, cgp, blksfree)) chgs = 1; - } jnewblk->jn_state &= ~(UNDONE | NEWBLOCK); jnewblk->jn_state |= ATTACHED; free_jnewblk(jnewblk); @@ -10063,16 +11270,17 @@ handle_written_bmsafemap(bmsafemap, bp) LIST_REMOVE(inodedep, id_deps); inodedep->id_bmsafemap = NULL; } - if (LIST_EMPTY(&bmsafemap->sm_jaddrefhd) && + LIST_REMOVE(bmsafemap, sm_next); + if (chgs == 0 && LIST_EMPTY(&bmsafemap->sm_jaddrefhd) && LIST_EMPTY(&bmsafemap->sm_jnewblkhd) && LIST_EMPTY(&bmsafemap->sm_newblkhd) && - LIST_EMPTY(&bmsafemap->sm_inodedephd)) { - if (chgs) - bdirty(bp); + LIST_EMPTY(&bmsafemap->sm_inodedephd) && + LIST_EMPTY(&bmsafemap->sm_freehd)) { LIST_REMOVE(bmsafemap, sm_hash); WORKITEM_FREE(bmsafemap, D_BMSAFEMAP); return (0); } + LIST_INSERT_HEAD(&ump->softdep_dirtycg, bmsafemap, sm_next); bdirty(bp); return (1); } @@ -10113,25 +11321,29 @@ handle_written_mkdir(mkdir, type) complete_mkdir(mkdir); } -static void +static int free_pagedep(pagedep) struct pagedep *pagedep; { int i; - if (pagedep->pd_state & (NEWBLOCK | ONWORKLIST)) - return; + if (pagedep->pd_state & NEWBLOCK) + return (0); + if (!LIST_EMPTY(&pagedep->pd_dirremhd)) + return (0); for (i = 0; i < DAHASHSZ; i++) if (!LIST_EMPTY(&pagedep->pd_diraddhd[i])) - return; - if (!LIST_EMPTY(&pagedep->pd_jmvrefhd)) - return; - if (!LIST_EMPTY(&pagedep->pd_dirremhd)) - return; + return (0); if (!LIST_EMPTY(&pagedep->pd_pendinghd)) - return; + return (0); + if (!LIST_EMPTY(&pagedep->pd_jmvrefhd)) + return (0); + if (pagedep->pd_state & ONWORKLIST) + WORKLIST_REMOVE(&pagedep->pd_list); LIST_REMOVE(pagedep, pd_hash); WORKITEM_FREE(pagedep, D_PAGEDEP); + + return (1); } /* @@ -10217,11 +11429,7 @@ handle_written_filepage(pagedep, bp) * Otherwise it will remain to track any new entries on * the page in case they are fsync'ed. */ - if ((pagedep->pd_state & NEWBLOCK) == 0 && - LIST_EMPTY(&pagedep->pd_jmvrefhd)) { - LIST_REMOVE(pagedep, pd_hash); - WORKITEM_FREE(pagedep, D_PAGEDEP); - } + free_pagedep(pagedep); return (0); } @@ -10324,8 +11532,7 @@ again: TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) { if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY)) == DEPCOMPLETE) { - stat_jwait_inode++; - jwait(&inoref->if_list); + jwait(&inoref->if_list, MNT_WAIT); goto again; } } @@ -10463,8 +11670,7 @@ restart: TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) { if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY)) == DEPCOMPLETE) { - stat_jwait_inode++; - jwait(&inoref->if_list); + jwait(&inoref->if_list, MNT_WAIT); goto restart; } } @@ -10610,6 +11816,8 @@ restart: * Flush all the dirty bitmaps associated with the block device * before flushing the rest of the dirty blocks so as to reduce * the number of dependencies that will have to be rolled back. + * + * XXX Unused? */ void softdep_fsync_mountdev(vp) @@ -10656,76 +11864,129 @@ restart: } /* + * Sync all cylinder groups that were dirty at the time this function is + * called. Newly dirtied cgs will be inserted before the sintenel. This + * is used to flush freedep activity that may be holding up writes to a + * indirect block. + */ +static int +sync_cgs(mp, waitfor) + struct mount *mp; + int waitfor; +{ + struct bmsafemap *bmsafemap; + struct bmsafemap *sintenel; + struct ufsmount *ump; + struct buf *bp; + int error; + + sintenel = malloc(sizeof(*sintenel), M_BMSAFEMAP, M_ZERO | M_WAITOK); + sintenel->sm_cg = -1; + ump = VFSTOUFS(mp); + error = 0; + ACQUIRE_LOCK(&lk); + LIST_INSERT_HEAD(&ump->softdep_dirtycg, sintenel, sm_next); + for (bmsafemap = LIST_NEXT(sintenel, sm_next); bmsafemap != NULL; + bmsafemap = LIST_NEXT(sintenel, sm_next)) { + /* Skip sintenels and cgs with no work to release. */ + if (bmsafemap->sm_cg == -1 || + (LIST_EMPTY(&bmsafemap->sm_freehd) && + LIST_EMPTY(&bmsafemap->sm_freewr))) { + LIST_REMOVE(sintenel, sm_next); + LIST_INSERT_AFTER(bmsafemap, sintenel, sm_next); + continue; + } + /* + * If we don't get the lock and we're waiting try again, if + * not move on to the next buf and try to sync it. + */ + bp = getdirtybuf(bmsafemap->sm_buf, &lk, waitfor); + if (bp == NULL && waitfor == MNT_WAIT) + continue; + LIST_REMOVE(sintenel, sm_next); + LIST_INSERT_AFTER(bmsafemap, sintenel, sm_next); + if (bp == NULL) + continue; + FREE_LOCK(&lk); + if (waitfor == MNT_NOWAIT) + bawrite(bp); + else + error = bwrite(bp); + ACQUIRE_LOCK(&lk); + if (error) + break; + } + LIST_REMOVE(sintenel, sm_next); + FREE_LOCK(&lk); + free(sintenel, M_BMSAFEMAP); + return (error); +} + +/* * This routine is called when we are trying to synchronously flush a * file. This routine must eliminate any filesystem metadata dependencies - * so that the syncing routine can succeed by pushing the dirty blocks - * associated with the file. If any I/O errors occur, they are returned. + * so that the syncing routine can succeed. */ int softdep_sync_metadata(struct vnode *vp) { - struct pagedep *pagedep; - struct allocindir *aip; - struct newblk *newblk; - struct buf *bp, *nbp; - struct worklist *wk; - struct bufobj *bo; - int i, error, waitfor; + int error; - if (!DOINGSOFTDEP(vp)) - return (0); /* - * Ensure that any direct block dependencies have been cleared. + * Ensure that any direct block dependencies have been cleared, + * truncations are started, and inode references are journaled. */ ACQUIRE_LOCK(&lk); - if ((error = flush_inodedep_deps(vp->v_mount, VTOI(vp)->i_number))) { - FREE_LOCK(&lk); - return (error); - } - FREE_LOCK(&lk); /* - * For most files, the only metadata dependencies are the - * cylinder group maps that allocate their inode or blocks. - * The block allocation dependencies can be found by traversing - * the dependency lists for any buffers that remain on their - * dirty buffer list. The inode allocation dependency will - * be resolved when the inode is updated with MNT_WAIT. - * This work is done in two passes. The first pass grabs most - * of the buffers and begins asynchronously writing them. The - * only way to wait for these asynchronous writes is to sleep - * on the filesystem vnode which may stay busy for a long time - * if the filesystem is active. So, instead, we make a second - * pass over the dependencies blocking on each write. In the - * usual case we will be blocking against a write that we - * initiated, so when it is done the dependency will have been - * resolved. Thus the second pass is expected to end quickly. + * Write all journal records to prevent rollbacks on devvp. */ - waitfor = MNT_NOWAIT; - bo = &vp->v_bufobj; + if (vp->v_type == VCHR) + softdep_flushjournal(vp->v_mount); + error = flush_inodedep_deps(vp, vp->v_mount, VTOI(vp)->i_number); + /* + * Ensure that all truncates are written so we won't find deps on + * indirect blocks. + */ + process_truncates(vp); + FREE_LOCK(&lk); + + return (error); +} + +/* + * This routine is called when we are attempting to sync a buf with + * dependencies. If waitfor is MNT_NOWAIT it attempts to schedule any + * other IO it can but returns EBUSY if the buffer is not yet able to + * be written. Dependencies which will not cause rollbacks will always + * return 0. + */ +int +softdep_sync_buf(struct vnode *vp, struct buf *bp, int waitfor) +{ + struct indirdep *indirdep; + struct pagedep *pagedep; + struct allocindir *aip; + struct newblk *newblk; + struct buf *nbp; + struct worklist *wk; + int i, error; -top: /* - * We must wait for any I/O in progress to finish so that - * all potential buffers on the dirty list will be visible. + * For VCHR we just don't want to force flush any dependencies that + * will cause rollbacks. */ - BO_LOCK(bo); - drain_output(vp); - while ((bp = TAILQ_FIRST(&bo->bo_dirty.bv_hd)) != NULL) { - bp = getdirtybuf(bp, BO_MTX(bo), MNT_WAIT); - if (bp) - break; - } - BO_UNLOCK(bo); - if (bp == NULL) + if (vp->v_type == VCHR) { + if (waitfor == MNT_NOWAIT && softdep_count_dependencies(bp, 0)) + return (EBUSY); return (0); -loop: - /* While syncing snapshots, we must allow recursive lookups */ - BUF_AREC(bp); + } ACQUIRE_LOCK(&lk); /* * As we hold the buffer locked, none of its dependencies * will disappear. */ + error = 0; +top: LIST_FOREACH(wk, &bp->b_dep, wk_list) { switch (wk->wk_type) { @@ -10733,46 +11994,54 @@ loop: case D_ALLOCINDIR: newblk = WK_NEWBLK(wk); if (newblk->nb_jnewblk != NULL) { - stat_jwait_newblk++; - jwait(&newblk->nb_jnewblk->jn_list); - goto restart; + if (waitfor == MNT_NOWAIT) { + error = EBUSY; + goto out_unlock; + } + jwait(&newblk->nb_jnewblk->jn_list, waitfor); + goto top; } - if (newblk->nb_state & DEPCOMPLETE) + if (newblk->nb_state & DEPCOMPLETE || + waitfor == MNT_NOWAIT) continue; nbp = newblk->nb_bmsafemap->sm_buf; nbp = getdirtybuf(nbp, &lk, waitfor); if (nbp == NULL) - continue; + goto top; FREE_LOCK(&lk); - if (waitfor == MNT_NOWAIT) { - bawrite(nbp); - } else if ((error = bwrite(nbp)) != 0) { - break; - } + if ((error = bwrite(nbp)) != 0) + goto out; ACQUIRE_LOCK(&lk); continue; case D_INDIRDEP: + indirdep = WK_INDIRDEP(wk); + if (waitfor == MNT_NOWAIT) { + if (!TAILQ_EMPTY(&indirdep->ir_trunc) || + !LIST_EMPTY(&indirdep->ir_deplisthd)) { + error = EBUSY; + goto out_unlock; + } + } + if (!TAILQ_EMPTY(&indirdep->ir_trunc)) + panic("softdep_sync_buf: truncation pending."); restart: - - LIST_FOREACH(aip, - &WK_INDIRDEP(wk)->ir_deplisthd, ai_next) { + LIST_FOREACH(aip, &indirdep->ir_deplisthd, ai_next) { newblk = (struct newblk *)aip; if (newblk->nb_jnewblk != NULL) { - stat_jwait_newblk++; - jwait(&newblk->nb_jnewblk->jn_list); + jwait(&newblk->nb_jnewblk->jn_list, + waitfor); goto restart; } if (newblk->nb_state & DEPCOMPLETE) continue; nbp = newblk->nb_bmsafemap->sm_buf; - nbp = getdirtybuf(nbp, &lk, MNT_WAIT); + nbp = getdirtybuf(nbp, &lk, waitfor); if (nbp == NULL) goto restart; FREE_LOCK(&lk); - if ((error = bwrite(nbp)) != 0) { - goto loop_end; - } + if ((error = bwrite(nbp)) != 0) + goto out; ACQUIRE_LOCK(&lk); goto restart; } @@ -10780,6 +12049,18 @@ loop: case D_PAGEDEP: /* + * Only flush directory entries in synchronous passes. + */ + if (waitfor != MNT_WAIT) { + error = EBUSY; + goto out_unlock; + } + /* + * While syncing snapshots, we must allow recursive + * lookups. + */ + BUF_AREC(bp); + /* * We are trying to sync a directory that may * have dependencies on both its own metadata * and/or dependencies on the inodes of any @@ -10790,64 +12071,28 @@ loop: for (i = 0; i < DAHASHSZ; i++) { if (LIST_FIRST(&pagedep->pd_diraddhd[i]) == 0) continue; - if ((error = - flush_pagedep_deps(vp, wk->wk_mp, - &pagedep->pd_diraddhd[i]))) { - FREE_LOCK(&lk); - goto loop_end; + if ((error = flush_pagedep_deps(vp, wk->wk_mp, + &pagedep->pd_diraddhd[i]))) { + BUF_NOREC(bp); + goto out_unlock; } } + BUF_NOREC(bp); + continue; + + case D_FREEWORK: continue; default: - panic("softdep_sync_metadata: Unknown type %s", + panic("softdep_sync_buf: Unknown type %s", TYPENAME(wk->wk_type)); /* NOTREACHED */ } - loop_end: - /* We reach here only in error and unlocked */ - if (error == 0) - panic("softdep_sync_metadata: zero error"); - BUF_NOREC(bp); - bawrite(bp); - return (error); } +out_unlock: FREE_LOCK(&lk); - BO_LOCK(bo); - while ((nbp = TAILQ_NEXT(bp, b_bobufs)) != NULL) { - nbp = getdirtybuf(nbp, BO_MTX(bo), MNT_WAIT); - if (nbp) - break; - } - BO_UNLOCK(bo); - BUF_NOREC(bp); - bawrite(bp); - if (nbp != NULL) { - bp = nbp; - goto loop; - } - /* - * The brief unlock is to allow any pent up dependency - * processing to be done. Then proceed with the second pass. - */ - if (waitfor == MNT_NOWAIT) { - waitfor = MNT_WAIT; - goto top; - } - - /* - * If we have managed to get rid of all the dirty buffers, - * then we are done. For certain directories and block - * devices, we may need to do further work. - * - * We must wait for any I/O in progress to finish so that - * all potential buffers on the dirty list will be visible. - */ - BO_LOCK(bo); - drain_output(vp); - BO_UNLOCK(bo); - return ffs_update(vp, 1); - /* return (0); */ +out: + return (error); } /* @@ -10855,7 +12100,8 @@ loop: * Called with splbio blocked. */ static int -flush_inodedep_deps(mp, ino) +flush_inodedep_deps(vp, mp, ino) + struct vnode *vp; struct mount *mp; ino_t ino; { @@ -10887,8 +12133,7 @@ restart: TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) { if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY)) == DEPCOMPLETE) { - stat_jwait_inode++; - jwait(&inoref->if_list); + jwait(&inoref->if_list, MNT_WAIT); goto restart; } } @@ -10930,8 +12175,7 @@ flush_deplist(listhead, waitfor, errorp) TAILQ_FOREACH(adp, listhead, ad_next) { newblk = (struct newblk *)adp; if (newblk->nb_jnewblk != NULL) { - stat_jwait_newblk++; - jwait(&newblk->nb_jnewblk->jn_list); + jwait(&newblk->nb_jnewblk->jn_list, MNT_WAIT); return (1); } if (newblk->nb_state & DEPCOMPLETE) @@ -10944,12 +12188,10 @@ flush_deplist(listhead, waitfor, errorp) return (1); } FREE_LOCK(&lk); - if (waitfor == MNT_NOWAIT) { + if (waitfor == MNT_NOWAIT) bawrite(bp); - } else if ((*errorp = bwrite(bp)) != 0) { - ACQUIRE_LOCK(&lk); - return (1); - } + else + *errorp = bwrite(bp); ACQUIRE_LOCK(&lk); return (1); } @@ -10995,8 +12237,7 @@ flush_newblk_dep(vp, mp, lbn) * Flush the journal. */ if (newblk->nb_jnewblk != NULL) { - stat_jwait_newblk++; - jwait(&newblk->nb_jnewblk->jn_list); + jwait(&newblk->nb_jnewblk->jn_list, MNT_WAIT); continue; } /* @@ -11105,8 +12346,7 @@ restart: TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) { if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY)) == DEPCOMPLETE) { - stat_jwait_inode++; - jwait(&inoref->if_list); + jwait(&inoref->if_list, MNT_WAIT); goto restart; } } @@ -11237,6 +12477,8 @@ softdep_slowdown(vp) softdep_speedup(); stat_sync_limit_hit += 1; FREE_LOCK(&lk); + if (DOINGSUJ(vp)) + return (0); return (1); } @@ -11338,8 +12580,9 @@ retry: UFS_UNLOCK(ump); ACQUIRE_LOCK(&lk); process_removes(vp); + process_truncates(vp); if (ump->softdep_on_worklist > 0 && - process_worklist_item(UFSTOVFS(ump), LK_NOWAIT) != -1) { + process_worklist_item(UFSTOVFS(ump), 1, LK_NOWAIT) != 0) { stat_worklist_push += 1; FREE_LOCK(&lk); UFS_LOCK(ump); @@ -11363,24 +12606,14 @@ retry: UFS_UNLOCK(ump); MNT_ILOCK(mp); MNT_VNODE_FOREACH(lvp, mp, mvp) { - UFS_LOCK(ump); - if (ump->softdep_on_worklist > 0) { - UFS_UNLOCK(ump); - MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); - MNT_IUNLOCK(mp); - UFS_LOCK(ump); - stat_cleanup_retries += 1; - goto retry; - } - UFS_UNLOCK(ump); VI_LOCK(lvp); - if (TAILQ_FIRST(&lvp->v_bufobj.bo_dirty.bv_hd) == 0 || - VOP_ISLOCKED(lvp) != 0) { + if (TAILQ_FIRST(&lvp->v_bufobj.bo_dirty.bv_hd) == 0) { VI_UNLOCK(lvp); continue; } MNT_IUNLOCK(mp); - if (vget(lvp, LK_EXCLUSIVE | LK_INTERLOCK, curthread)) { + if (vget(lvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_NOWAIT, + curthread)) { MNT_ILOCK(mp); continue; } @@ -11394,8 +12627,17 @@ retry: MNT_ILOCK(mp); } MNT_IUNLOCK(mp); - stat_cleanup_failures += 1; + lvp = ump->um_devvp; + if (vn_lock(lvp, LK_EXCLUSIVE | LK_NOWAIT) == 0) { + VOP_FSYNC(lvp, MNT_NOWAIT, curthread); + VOP_UNLOCK(lvp, 0); + } UFS_LOCK(ump); + if (ump->softdep_on_worklist > 0) { + stat_cleanup_retries += 1; + goto retry; + } + stat_cleanup_failures += 1; } if (time_second - starttime > stat_cleanup_high_delay) stat_cleanup_high_delay = time_second - starttime; @@ -11432,8 +12674,7 @@ request_cleanup(mp, resource) */ if (ump->softdep_on_worklist > max_softdeps / 10) { td->td_pflags |= TDP_SOFTDEP; - process_worklist_item(mp, LK_NOWAIT); - process_worklist_item(mp, LK_NOWAIT); + process_worklist_item(mp, 2, LK_NOWAIT); td->td_pflags &= ~TDP_SOFTDEP; stat_worklist_push += 2; return(1); @@ -11652,6 +12893,53 @@ clear_inodedeps(td) } } +void +softdep_buf_append(bp, wkhd) + struct buf *bp; + struct workhead *wkhd; +{ + struct worklist *wk; + + ACQUIRE_LOCK(&lk); + while ((wk = LIST_FIRST(wkhd)) != NULL) { + WORKLIST_REMOVE(wk); + WORKLIST_INSERT(&bp->b_dep, wk); + } + FREE_LOCK(&lk); + +} + +void +softdep_inode_append(ip, cred, wkhd) + struct inode *ip; + struct ucred *cred; + struct workhead *wkhd; +{ + struct buf *bp; + struct fs *fs; + int error; + + fs = ip->i_fs; + error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), + (int)fs->fs_bsize, cred, &bp); + if (error) { + softdep_freework(wkhd); + return; + } + softdep_buf_append(bp, wkhd); + bqrelse(bp); +} + +void +softdep_freework(wkhd) + struct workhead *wkhd; +{ + + ACQUIRE_LOCK(&lk); + handle_jwork(wkhd); + FREE_LOCK(&lk); +} + /* * Function to determine if the buffer has outstanding dependencies * that will cause a roll-back if the buffer is written. If wantcount @@ -11664,6 +12952,7 @@ softdep_count_dependencies(bp, wantcount) { struct worklist *wk; struct bmsafemap *bmsafemap; + struct freework *freework; struct inodedep *inodedep; struct indirdep *indirdep; struct freeblks *freeblks; @@ -11711,6 +13000,13 @@ softdep_count_dependencies(bp, wantcount) case D_INDIRDEP: indirdep = WK_INDIRDEP(wk); + TAILQ_FOREACH(freework, &indirdep->ir_trunc, fw_next) { + /* indirect truncation dependency */ + retval += 1; + if (!wantcount) + goto out; + } + LIST_FOREACH(aip, &indirdep->ir_deplisthd, ai_next) { /* indirect block pointer dependency */ retval += 1; @@ -11758,7 +13054,7 @@ softdep_count_dependencies(bp, wantcount) case D_FREEBLKS: freeblks = WK_FREEBLKS(wk); - if (LIST_FIRST(&freeblks->fb_jfreeblkhd)) { + if (LIST_FIRST(&freeblks->fb_jblkdephd)) { /* Freeblk journal dependency. */ retval += 1; if (!wantcount) diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index 68b9619..b0f2d7e 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -269,6 +269,8 @@ ffs_mount(struct mount *mp) vfs_write_resume(mp); return (error); } + if (mp->mnt_flag & MNT_SOFTDEP) + softdep_unmount(mp); DROP_GIANT(); g_topology_lock(); g_access(ump->um_cp, 0, -1, 0); @@ -2034,12 +2036,10 @@ ffs_geom_strategy(struct bufobj *bo, struct buf *bp) static void db_print_ffs(struct ufsmount *ump) { - db_printf("mp %p %s devvp %p fs %p su_wl %d su_wl_in %d su_deps %d " - "su_req %d\n", + db_printf("mp %p %s devvp %p fs %p su_wl %d su_deps %d su_req %d\n", ump->um_mountp, ump->um_mountp->mnt_stat.f_mntonname, ump->um_devvp, ump->um_fs, ump->softdep_on_worklist, - ump->softdep_on_worklist_inprogress, ump->softdep_deps, - ump->softdep_req); + ump->softdep_deps, ump->softdep_req); } DB_SHOW_COMMAND(ffs, db_show_ffs) diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index cf6a5a8..9f528da 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -212,26 +212,32 @@ retry: int ffs_syncvnode(struct vnode *vp, int waitfor) { - struct inode *ip = VTOI(vp); + struct inode *ip; struct bufobj *bo; struct buf *bp; struct buf *nbp; - int s, error, wait, passes, skipmeta; ufs_lbn_t lbn; + int error, wait, passes; - wait = (waitfor == MNT_WAIT); - lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); - bo = &vp->v_bufobj; + ip = VTOI(vp); ip->i_flag &= ~IN_NEEDSYNC; + bo = &vp->v_bufobj; + + /* + * When doing MNT_WAIT we must first flush all dependencies + * on the inode. + */ + if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT && + (error = softdep_sync_metadata(vp)) != 0) + return (error); /* * Flush all dirty buffers associated with a vnode. */ - passes = NIADDR + 1; - skipmeta = 0; - if (wait) - skipmeta = 1; - s = splbio(); + error = 0; + passes = 0; + wait = 0; /* Always do an async pass first. */ + lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); BO_LOCK(bo); loop: TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) @@ -239,70 +245,53 @@ loop: TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { /* * Reasons to skip this buffer: it has already been considered - * on this pass, this pass is the first time through on a - * synchronous flush request and the buffer being considered - * is metadata, the buffer has dependencies that will cause + * on this pass, the buffer has dependencies that will cause * it to be redirtied and it has not already been deferred, * or it is already being written. */ if ((bp->b_vflags & BV_SCANNED) != 0) continue; bp->b_vflags |= BV_SCANNED; - if ((skipmeta == 1 && bp->b_lblkno < 0)) + /* Flush indirects in order. */ + if (waitfor == MNT_WAIT && bp->b_lblkno <= -NDADDR && + lbn_level(bp->b_lblkno) >= passes) continue; + if (bp->b_lblkno > lbn) + panic("ffs_syncvnode: syncing truncated data."); if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) continue; BO_UNLOCK(bo); - if (!wait && !LIST_EMPTY(&bp->b_dep) && - (bp->b_flags & B_DEFERRED) == 0 && - buf_countdeps(bp, 0)) { - bp->b_flags |= B_DEFERRED; - BUF_UNLOCK(bp); - BO_LOCK(bo); - continue; - } if ((bp->b_flags & B_DELWRI) == 0) panic("ffs_fsync: not dirty"); /* - * If this is a synchronous flush request, or it is not a - * file or device, start the write on this buffer immediately. + * Check for dependencies and potentially complete them. */ - if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) { - - /* - * On our final pass through, do all I/O synchronously - * so that we can find out if our flush is failing - * because of write errors. - */ - if (passes > 0 || !wait) { - if ((bp->b_flags & B_CLUSTEROK) && !wait) { - (void) vfs_bio_awrite(bp); - } else { - bremfree(bp); - splx(s); - (void) bawrite(bp); - s = splbio(); - } - } else { - bremfree(bp); - splx(s); - if ((error = bwrite(bp)) != 0) - return (error); - s = splbio(); + if (!LIST_EMPTY(&bp->b_dep) && + (error = softdep_sync_buf(vp, bp, + wait ? MNT_WAIT : MNT_NOWAIT)) != 0) { + /* I/O error. */ + if (error != EBUSY) { + BUF_UNLOCK(bp); + return (error); } - } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) { - /* - * If the buffer is for data that has been truncated - * off the file, then throw it away. - */ + /* If we deferred once, don't defer again. */ + if ((bp->b_flags & B_DEFERRED) == 0) { + bp->b_flags |= B_DEFERRED; + BUF_UNLOCK(bp); + goto next; + } + } + if (wait) { bremfree(bp); - bp->b_flags |= B_INVAL | B_NOCACHE; - splx(s); - brelse(bp); - s = splbio(); - } else - vfs_bio_awrite(bp); - + if ((error = bwrite(bp)) != 0) + return (error); + } else if ((bp->b_flags & B_CLUSTEROK)) { + (void) vfs_bio_awrite(bp); + } else { + bremfree(bp); + (void) bawrite(bp); + } +next: /* * Since we may have slept during the I/O, we need * to start from a known point. @@ -310,51 +299,44 @@ loop: BO_LOCK(bo); nbp = TAILQ_FIRST(&bo->bo_dirty.bv_hd); } + if (waitfor != MNT_WAIT) { + BO_UNLOCK(bo); + return (ffs_update(vp, waitfor)); + } + /* Drain IO to see if we're done. */ + bufobj_wwait(bo, 0, 0); /* - * If we were asked to do this synchronously, then go back for - * another pass, this time doing the metadata. + * Block devices associated with filesystems may have new I/O + * requests posted for them even if the vnode is locked, so no + * amount of trying will get them clean. We make several passes + * as a best effort. + * + * Regular files may need multiple passes to flush all dependency + * work as it is possible that we must write once per indirect + * level, once for the leaf, and once for the inode and each of + * these will be done with one sync and one async pass. */ - if (skipmeta) { - skipmeta = 0; - goto loop; - } - - if (wait) { - bufobj_wwait(bo, 3, 0); - BO_UNLOCK(bo); - - /* - * Ensure that any filesystem metatdata associated - * with the vnode has been written. - */ - splx(s); - if ((error = softdep_sync_metadata(vp)) != 0) - return (error); - s = splbio(); - - BO_LOCK(bo); - if (bo->bo_dirty.bv_cnt > 0) { - /* - * Block devices associated with filesystems may - * have new I/O requests posted for them even if - * the vnode is locked, so no amount of trying will - * get them clean. Thus we give block devices a - * good effort, then just give up. For all other file - * types, go around and try again until it is clean. - */ - if (passes > 0) { - passes -= 1; - goto loop; - } + if (bo->bo_dirty.bv_cnt > 0) { + /* Write the inode after sync passes to flush deps. */ + if (wait && DOINGSOFTDEP(vp)) { + BO_UNLOCK(bo); + ffs_update(vp, MNT_WAIT); + BO_LOCK(bo); + } + /* switch between sync/async. */ + wait = !wait; + if (wait == 1 || ++passes < NIADDR + 2) + goto loop; #ifdef INVARIANTS - if (!vn_isdisk(vp, NULL)) - vprint("ffs_fsync: dirty", vp); + if (!vn_isdisk(vp, NULL)) + vprint("ffs_fsync: dirty", vp); #endif - } } BO_UNLOCK(bo); - splx(s); - return (ffs_update(vp, wait)); + error = ffs_update(vp, MNT_WAIT); + if (DOINGSUJ(vp)) + softdep_journal_fsync(VTOI(vp)); + return (error); } static int diff --git a/sys/ufs/ffs/fs.h b/sys/ufs/ffs/fs.h index 0b7e908..a0b8e5b 100644 --- a/sys/ufs/ffs/fs.h +++ b/sys/ufs/ffs/fs.h @@ -664,6 +664,7 @@ lbn_offset(struct fs *fs, int level) #define JOP_FREEBLK 4 /* Free a block or a tree of blocks. */ #define JOP_MVREF 5 /* Move a reference from one off to another. */ #define JOP_TRUNC 6 /* Partial truncation record. */ +#define JOP_SYNC 7 /* fsync() complete record. */ #define JREC_SIZE 32 /* Record and segment header size. */ @@ -729,7 +730,7 @@ struct jblkrec { /* * Truncation record. Records a partial truncation so that it may be - * completed later. + * completed at check time. Also used for sync records. */ struct jtrncrec { uint32_t jt_op; diff --git a/sys/ufs/ffs/softdep.h b/sys/ufs/ffs/softdep.h index d864a4a..80c7315 100644 --- a/sys/ufs/ffs/softdep.h +++ b/sys/ufs/ffs/softdep.h @@ -127,7 +127,7 @@ #define DIRCHG 0x000080 /* diradd, dirrem only */ #define GOINGAWAY 0x000100 /* indirdep, jremref only */ #define IOSTARTED 0x000200 /* inodedep, pagedep, bmsafemap only */ -#define UNUSED400 0x000400 /* currently available. */ +#define DELAYEDFREE 0x000400 /* allocindirect free delayed. */ #define NEWBLOCK 0x000800 /* pagedep, jaddref only */ #define INPROGRESS 0x001000 /* dirrem, freeblks, freefrag, freefile only */ #define UFS1FMT 0x002000 /* indirdep only */ @@ -195,8 +195,9 @@ struct worklist { #define WK_JFREEBLK(wk) ((struct jfreeblk *)(wk)) #define WK_FREEDEP(wk) ((struct freedep *)(wk)) #define WK_JFREEFRAG(wk) ((struct jfreefrag *)(wk)) -#define WK_SBDEP(wk) ((struct sbdep *)wk) +#define WK_SBDEP(wk) ((struct sbdep *)(wk)) #define WK_JTRUNC(wk) ((struct jtrunc *)(wk)) +#define WK_JFSYNC(wk) ((struct jfsync *)(wk)) /* * Various types of lists @@ -213,10 +214,12 @@ LIST_HEAD(jaddrefhd, jaddref); LIST_HEAD(jremrefhd, jremref); LIST_HEAD(jmvrefhd, jmvref); LIST_HEAD(jnewblkhd, jnewblk); -LIST_HEAD(jfreeblkhd, jfreeblk); +LIST_HEAD(jblkdephd, jblkdep); LIST_HEAD(freeworkhd, freework); +TAILQ_HEAD(freeworklst, freework); TAILQ_HEAD(jseglst, jseg); TAILQ_HEAD(inoreflst, inoref); +TAILQ_HEAD(freeblklst, freeblks); /* * The "pagedep" structure tracks the various dependencies related to @@ -321,6 +324,7 @@ struct inodedep { struct allocdirectlst id_newinoupdt; /* updates when inode written */ struct allocdirectlst id_extupdt; /* extdata updates pre-inode write */ struct allocdirectlst id_newextupdt; /* extdata updates at ino write */ + struct freeblklst id_freeblklst; /* List of partial truncates. */ union { struct ufs1_dinode *idu_savedino1; /* saved ufs1_dinode contents */ struct ufs2_dinode *idu_savedino2; /* saved ufs2_dinode contents */ @@ -342,8 +346,9 @@ struct inodedep { struct bmsafemap { struct worklist sm_list; /* cylgrp buffer */ # define sm_state sm_list.wk_state - int sm_cg; LIST_ENTRY(bmsafemap) sm_hash; /* Hash links. */ + LIST_ENTRY(bmsafemap) sm_next; /* Mount list. */ + int sm_cg; struct buf *sm_buf; /* associated buffer */ struct allocdirecthd sm_allocdirecthd; /* allocdirect deps */ struct allocdirecthd sm_allocdirectwr; /* writing allocdirect deps */ @@ -355,6 +360,8 @@ struct bmsafemap { struct newblkhd sm_newblkwr; /* writing newblk deps */ struct jaddrefhd sm_jaddrefhd; /* Pending inode allocations. */ struct jnewblkhd sm_jnewblkhd; /* Pending block allocations. */ + struct workhead sm_freehd; /* Freedep deps. */ + struct workhead sm_freewr; /* Written freedeps. */ }; /* @@ -442,14 +449,15 @@ struct indirdep { struct worklist ir_list; /* buffer holding indirect block */ # define ir_state ir_list.wk_state /* indirect block pointer state */ LIST_ENTRY(indirdep) ir_next; /* alloc{direct,indir} list */ + TAILQ_HEAD(, freework) ir_trunc; /* List of truncations. */ caddr_t ir_saveddata; /* buffer cache contents */ struct buf *ir_savebp; /* buffer holding safe copy */ + struct buf *ir_bp; /* buffer holding live copy */ struct allocindirhd ir_completehd; /* waiting for indirdep complete */ struct allocindirhd ir_writehd; /* Waiting for the pointer write. */ struct allocindirhd ir_donehd; /* done waiting to update safecopy */ struct allocindirhd ir_deplisthd; /* allocindir deps for this block */ - struct jnewblkhd ir_jnewblkhd; /* Canceled block allocations. */ - struct workhead ir_jwork; /* Journal work pending. */ + struct freeblks *ir_freeblks; /* Freeblks that frees this indir. */ }; /* @@ -471,6 +479,7 @@ struct allocindir { LIST_ENTRY(allocindir) ai_next; /* indirdep's list of allocindir's */ struct indirdep *ai_indirdep; /* address of associated indirdep */ ufs2_daddr_t ai_oldblkno; /* old value of block pointer */ + ufs_lbn_t ai_lbn; /* Logical block number. */ int ai_offset; /* Pointer offset in parent. */ }; #define ai_newblkno ai_block.nb_newblkno @@ -502,6 +511,7 @@ struct freefrag { ufs2_daddr_t ff_blkno; /* fragment physical block number */ long ff_fragsize; /* size of fragment being deleted */ ino_t ff_inum; /* owning inode number */ + enum vtype ff_vtype; /* owning inode's file type */ }; /* @@ -516,14 +526,23 @@ struct freefrag { struct freeblks { struct worklist fb_list; /* id_inowait or delayed worklist */ # define fb_state fb_list.wk_state /* inode and dirty block state */ - struct jfreeblkhd fb_jfreeblkhd; /* Journal entries pending */ + TAILQ_ENTRY(freeblks) fb_next; /* List of inode truncates. */ + struct jblkdephd fb_jblkdephd; /* Journal entries pending */ struct workhead fb_freeworkhd; /* Work items pending */ struct workhead fb_jwork; /* Journal work pending */ - ino_t fb_previousinum; /* inode of previous owner of blocks */ - uid_t fb_uid; /* uid of previous owner of blocks */ struct vnode *fb_devvp; /* filesystem device vnode */ - ufs2_daddr_t fb_chkcnt; /* used to check cnt of blks released */ +#ifdef QUOTA + struct dquot *fb_quota[MAXQUOTAS]; /* quotas to be adjusted */ +#endif + uint64_t fb_modrev; /* Inode revision at start of trunc. */ + off_t fb_len; /* Length we're truncating to. */ + ufs2_daddr_t fb_chkcnt; /* Expected blks released. */ + ufs2_daddr_t fb_freecnt; /* Actual blocks released. */ + ino_t fb_inum; /* inode owner of blocks */ + enum vtype fb_vtype; /* inode owner's file type */ + uid_t fb_uid; /* uid of previous owner of blocks */ int fb_ref; /* Children outstanding. */ + int fb_cgwait; /* cg writes outstanding. */ }; /* @@ -538,16 +557,18 @@ struct freeblks { struct freework { struct worklist fw_list; /* Delayed worklist. */ # define fw_state fw_list.wk_state - LIST_ENTRY(freework) fw_next; /* For seg journal list. */ - struct jnewblk *fw_jnewblk; /* Journal entry to cancel. */ + LIST_ENTRY(freework) fw_segs; /* Seg list. */ + TAILQ_ENTRY(freework) fw_next; /* Hash/Trunc list. */ + struct jnewblk *fw_jnewblk; /* Journal entry to cancel. */ struct freeblks *fw_freeblks; /* Root of operation. */ struct freework *fw_parent; /* Parent indirect. */ + struct indirdep *fw_indir; /* indirect block. */ ufs2_daddr_t fw_blkno; /* Our block #. */ ufs_lbn_t fw_lbn; /* Original lbn before free. */ - int fw_frags; /* Number of frags. */ - int fw_ref; /* Number of children out. */ - int fw_off; /* Current working position. */ - struct workhead fw_jwork; /* Journal work pending. */ + uint16_t fw_frags; /* Number of frags. */ + uint16_t fw_ref; /* Number of children out. */ + uint16_t fw_off; /* Current working position. */ + uint16_t fw_start; /* Start of partial truncate. */ }; /* @@ -674,6 +695,7 @@ struct dirrem { LIST_ENTRY(dirrem) dm_inonext; /* inodedep's list of dirrem's */ struct jremrefhd dm_jremrefhd; /* Pending remove reference deps. */ ino_t dm_oldinum; /* inum of the removed dir entry */ + doff_t dm_offset; /* offset of removed dir entry in blk */ union { struct pagedep *dmu_pagedep; /* pagedep dependency for remove */ ino_t dmu_dirinum; /* parent inode number (for rmdir) */ @@ -707,7 +729,7 @@ struct dirrem { */ struct newdirblk { struct worklist db_list; /* id_inowait or pg_newdirblk */ -# define db_state db_list.wk_state /* unused */ +# define db_state db_list.wk_state struct pagedep *db_pagedep; /* associated pagedep */ struct workhead db_mkdir; }; @@ -807,29 +829,36 @@ struct jnewblk { # define jn_state jn_list.wk_state struct jsegdep *jn_jsegdep; /* Will track our journal record. */ LIST_ENTRY(jnewblk) jn_deps; /* Jnewblks on sm_jnewblkhd. */ - LIST_ENTRY(jnewblk) jn_indirdeps; /* Jnewblks on ir_jnewblkhd. */ struct worklist *jn_dep; /* Dependency to ref completed seg. */ - ino_t jn_ino; /* Ino to which allocated. */ ufs_lbn_t jn_lbn; /* Lbn to which allocated. */ ufs2_daddr_t jn_blkno; /* Blkno allocated */ + ino_t jn_ino; /* Ino to which allocated. */ int jn_oldfrags; /* Previous fragments when extended. */ int jn_frags; /* Number of fragments. */ }; /* + * A "jblkdep" structure tracks jfreeblk and jtrunc records attached to a + * freeblks structure. + */ +struct jblkdep { + struct worklist jb_list; /* For softdep journal pending. */ + struct jsegdep *jb_jsegdep; /* Reference to the jseg. */ + struct freeblks *jb_freeblks; /* Back pointer to freeblks. */ + LIST_ENTRY(jblkdep) jb_deps; /* Dep list on freeblks. */ + +}; + +/* * A "jfreeblk" structure tracks the journal write for freeing a block * or tree of blocks. The block pointer must not be cleared in the inode * or indirect prior to the jfreeblk being written to the journal. */ struct jfreeblk { - struct worklist jf_list; /* Linked to softdep_journal_pending. */ -# define jf_state jf_list.wk_state - struct jsegdep *jf_jsegdep; /* Will track our journal record. */ - struct freeblks *jf_freeblks; /* Back pointer to freeblks. */ - LIST_ENTRY(jfreeblk) jf_deps; /* Jfreeblk on fb_jfreeblkhd. */ - ino_t jf_ino; /* Ino from which blocks freed. */ + struct jblkdep jf_dep; /* freeblks linkage. */ ufs_lbn_t jf_lbn; /* Lbn from which blocks freed. */ ufs2_daddr_t jf_blkno; /* Blkno being freed. */ + ino_t jf_ino; /* Ino from which blocks freed. */ int jf_frags; /* Number of frags being freed. */ }; @@ -843,24 +872,31 @@ struct jfreefrag { # define fr_state fr_list.wk_state struct jsegdep *fr_jsegdep; /* Will track our journal record. */ struct freefrag *fr_freefrag; /* Back pointer to freefrag. */ - ino_t fr_ino; /* Ino from which frag freed. */ ufs_lbn_t fr_lbn; /* Lbn from which frag freed. */ ufs2_daddr_t fr_blkno; /* Blkno being freed. */ + ino_t fr_ino; /* Ino from which frag freed. */ int fr_frags; /* Size of frag being freed. */ }; /* - * A "jtrunc" journals the intent to truncate an inode to a non-zero - * value. This is done synchronously prior to the synchronous partial - * truncation process. The jsegdep is not released until the truncation - * is complete and the truncated inode is fsync'd. + * A "jtrunc" journals the intent to truncate an inode's data or extent area. */ struct jtrunc { - struct worklist jt_list; /* Linked to softdep_journal_pending. */ - struct jsegdep *jt_jsegdep; /* Will track our journal record. */ - ino_t jt_ino; /* Ino being truncated. */ - off_t jt_size; /* Final file size. */ - int jt_extsize; /* Final extent size. */ + struct jblkdep jt_dep; /* freeblks linkage. */ + off_t jt_size; /* Final file size. */ + int jt_extsize; /* Final extent size. */ + ino_t jt_ino; /* Ino being truncated. */ +}; + +/* + * A "jfsync" journals the completion of an fsync which invalidates earlier + * jtrunc records in the journal. + */ +struct jfsync { + struct worklist jfs_list; /* For softdep journal pending. */ + off_t jfs_size; /* Sync file size. */ + int jfs_extsize; /* Sync extent size. */ + ino_t jfs_ino; /* ino being synced. */ }; /* diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h index f6c4bb5..7adcc73 100644 --- a/sys/ufs/ufs/inode.h +++ b/sys/ufs/ufs/inode.h @@ -127,6 +127,8 @@ struct inode { #define IN_EA_LOCKED 0x0200 #define IN_EA_LOCKWAIT 0x0400 +#define IN_TRUNCATED 0x0800 /* Journaled truncation pending. */ + #define i_devvp i_ump->um_devvp #define i_umbufobj i_ump->um_bo #define i_dirhash i_un.dirhash diff --git a/sys/ufs/ufs/quota.h b/sys/ufs/ufs/quota.h index ca0dcce..3dfcf26 100644 --- a/sys/ufs/ufs/quota.h +++ b/sys/ufs/ufs/quota.h @@ -239,6 +239,12 @@ int setuse(struct thread *, struct mount *, u_long, int, void *); int getquotasize(struct thread *, struct mount *, u_long, int, void *); vfs_quotactl_t ufs_quotactl; +#ifdef SOFTUPDATES +int quotaref(struct vnode *, struct dquot **); +void quotarele(struct dquot **); +void quotaadj(struct dquot **, struct ufsmount *, int64_t); +#endif /* SOFTUPDATES */ + #else /* !_KERNEL */ #include <sys/cdefs.h> diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c index 45ebea1..391b3e9 100644 --- a/sys/ufs/ufs/ufs_lookup.c +++ b/sys/ufs/ufs/ufs_lookup.c @@ -967,7 +967,7 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp, isrename) return (0); if (tvp != NULL) VOP_UNLOCK(tvp, 0); - error = VOP_FSYNC(dvp, MNT_WAIT, td); + (void) VOP_FSYNC(dvp, MNT_WAIT, td); if (tvp != NULL) vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); return (error); diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c index be82b8f..59e89f9 100644 --- a/sys/ufs/ufs/ufs_quota.c +++ b/sys/ufs/ufs/ufs_quota.c @@ -1613,6 +1613,101 @@ dqflush(struct vnode *vp) } /* + * The following three functions are provided for the adjustment of + * quotas by the soft updates code. + */ +#ifdef SOFTUPDATES +/* + * Acquire a reference to the quota structures associated with a vnode. + * Return count of number of quota structures found. + */ +int +quotaref(vp, qrp) + struct vnode *vp; + struct dquot **qrp; +{ + struct inode *ip; + struct dquot *dq; + int i, found; + + for (i = 0; i < MAXQUOTAS; i++) + qrp[i] = NODQUOT; + /* + * Disk quotas must be turned off for system files. Currently + * snapshot and quota files. + */ + if ((vp->v_vflag & VV_SYSTEM) != 0) + return (0); + /* + * Iterate through and copy active quotas. + */ + found = 0; + ip = VTOI(vp); + for (i = 0; i < MAXQUOTAS; i++) { + if ((dq = ip->i_dquot[i]) == NODQUOT) + continue; + DQREF(dq); + qrp[i] = dq; + found++; + } + return (found); +} + +/* + * Release a set of quota structures obtained from a vnode. + */ +void +quotarele(qrp) + struct dquot **qrp; +{ + struct dquot *dq; + int i; + + for (i = 0; i < MAXQUOTAS; i++) { + if ((dq = qrp[i]) == NODQUOT) + continue; + dqrele(NULL, dq); + } +} + +/* + * Adjust the number of blocks associated with a quota. + * Positive numbers when adding blocks; negative numbers when freeing blocks. + */ +void +quotaadj(qrp, ump, blkcount) + struct dquot **qrp; + struct ufsmount *ump; + int64_t blkcount; +{ + struct dquot *dq; + ufs2_daddr_t ncurblocks; + int i; + + if (blkcount == 0) + return; + for (i = 0; i < MAXQUOTAS; i++) { + if ((dq = qrp[i]) == NODQUOT) + continue; + DQI_LOCK(dq); + DQI_WAIT(dq, PINOD+1, "adjqta"); + ncurblocks = dq->dq_curblocks + blkcount; + if (ncurblocks >= 0) + dq->dq_curblocks = ncurblocks; + else + dq->dq_curblocks = 0; + if (blkcount < 0) + dq->dq_flags &= ~DQ_BLKS; + else if (dq->dq_curblocks + blkcount >= dq->dq_bsoftlimit && + dq->dq_curblocks < dq->dq_bsoftlimit) + dq->dq_btime = time_second + ump->um_btime[i]; + dq->dq_flags |= DQ_MOD; + DQI_UNLOCK(dq); + } +} +#endif /* SOFTUPDATES */ + +/* * 32-bit / 64-bit conversion functions. * * 32-bit quota records are stored in native byte order. Attention must diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index 34b1758..733413d 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -1838,6 +1838,8 @@ ufs_mkdir(ap) #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, ucp, 0))) { + if (DOINGSOFTDEP(tvp)) + softdep_revert_link(dp, ip); UFS_VFREE(tvp, ip->i_number, dmode); vput(tvp); return (error); @@ -1850,6 +1852,8 @@ ufs_mkdir(ap) #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, cnp->cn_cred, 0))) { + if (DOINGSOFTDEP(tvp)) + softdep_revert_link(dp, ip); UFS_VFREE(tvp, ip->i_number, dmode); vput(tvp); return (error); @@ -2608,6 +2612,8 @@ ufs_makeinode(mode, dvp, vpp, cnp) #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, ucp, 0))) { + if (DOINGSOFTDEP(tvp)) + softdep_revert_link(pdir, ip); UFS_VFREE(tvp, ip->i_number, mode); vput(tvp); return (error); @@ -2620,6 +2626,8 @@ ufs_makeinode(mode, dvp, vpp, cnp) #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, cnp->cn_cred, 0))) { + if (DOINGSOFTDEP(tvp)) + softdep_revert_link(pdir, ip); UFS_VFREE(tvp, ip->i_number, mode); vput(tvp); return (error); diff --git a/sys/ufs/ufs/ufsmount.h b/sys/ufs/ufs/ufsmount.h index c2cfcfb..7874105 100644 --- a/sys/ufs/ufs/ufsmount.h +++ b/sys/ufs/ufs/ufsmount.h @@ -61,6 +61,7 @@ struct jblocks; struct inodedep; TAILQ_HEAD(inodedeplst, inodedep); +LIST_HEAD(bmsafemaphd, bmsafemap); /* This structure describes the UFS specific mount structure data. */ struct ufsmount { @@ -82,10 +83,10 @@ struct ufsmount { struct workhead softdep_journal_pending; /* journal work queue */ struct worklist *softdep_journal_tail; /* Tail pointer for above */ struct jblocks *softdep_jblocks; /* Journal block information */ - struct inodedeplst softdep_unlinked; /* Unlinked inodes */ + struct inodedeplst softdep_unlinked; /* Unlinked inodes */ + struct bmsafemaphd softdep_dirtycg; /* Dirty CGs */ int softdep_on_journal; /* Items on the journal list */ int softdep_on_worklist; /* Items on the worklist */ - int softdep_on_worklist_inprogress; /* Busy items on worklist */ int softdep_deps; /* Total dependency count */ int softdep_accdeps; /* accumulated dep count */ int softdep_req; /* Wakeup when deps hits 0. */ diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index d417a84..7568f57 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -1089,10 +1089,20 @@ vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, * caller's changes may go unnoticed because they are * performed through an unmanaged mapping or by a DMA * operation. + * + * The object lock is not held here. Therefore, like + * a pmap operation, the page queues lock may be + * required in order to call vm_page_dirty(). See + * vm_page_clear_dirty_mask(). */ +#if defined(__amd64__) || defined(__i386__) || defined(__ia64__) || \ + defined(__mips__) + vm_page_dirty(*mp); +#else vm_page_lock_queues(); vm_page_dirty(*mp); vm_page_unlock_queues(); +#endif } } if (pmap_failed) { diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index dbcac85..1a3d398 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -852,6 +852,21 @@ rescan: flags, &clearobjflags); if (object->generation != curgeneration) goto rescan; + + /* + * If the VOP_PUTPAGES() did a truncated write, so + * that even the first page of the run is not fully + * written, vm_pageout_flush() returns 0 as the run + * length. Since the condition that caused truncated + * write may be permanent, e.g. exhausted free space, + * accepting n == 0 would cause an infinite loop. + * + * Forwarding the iterator leaves the unwritten page + * behind, but there is not much we can do there if + * filesystem refuses to write it. + */ + if (n == 0) + n = 1; np = vm_page_find_least(object, pi + n); } #if 0 diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index e2758ec..033c4c9 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -729,7 +729,12 @@ vm_page_sleep(vm_page_t m, const char *msg) /* * vm_page_dirty: * - * make page all dirty + * Set all bits in the page's dirty field. + * + * The object containing the specified page must be locked if the call is + * made from the machine-independent layer. If, however, the call is + * made from the pmap layer, then the page queues lock may be required. + * See vm_page_clear_dirty_mask(). */ void vm_page_dirty(vm_page_t m) @@ -2325,15 +2330,41 @@ vm_page_clear_dirty_mask(vm_page_t m, int pagebits) /* * If the object is locked and the page is neither VPO_BUSY nor * PG_WRITEABLE, then the page's dirty field cannot possibly be - * modified by a concurrent pmap operation. + * set by a concurrent pmap operation. */ VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); if ((m->oflags & VPO_BUSY) == 0 && (m->flags & PG_WRITEABLE) == 0) m->dirty &= ~pagebits; else { +#if defined(__amd64__) || defined(__i386__) || defined(__ia64__) || \ + defined(__mips__) + /* + * On the aforementioned architectures, the page queues lock + * is not required by the following read-modify-write + * operation. The combination of the object's lock and an + * atomic operation suffice. Moreover, the pmap layer on + * these architectures can call vm_page_dirty() without + * holding the page queues lock. + */ +#if PAGE_SIZE == 4096 + atomic_clear_char(&m->dirty, pagebits); +#elif PAGE_SIZE == 8192 + atomic_clear_short(&m->dirty, pagebits); +#elif PAGE_SIZE == 16384 + atomic_clear_int(&m->dirty, pagebits); +#else +#error "PAGE_SIZE is not supported." +#endif +#else + /* + * Otherwise, the page queues lock is required to ensure that + * a concurrent pmap operation does not set the page's dirty + * field during the following read-modify-write operation. + */ vm_page_lock_queues(); m->dirty &= ~pagebits; vm_page_unlock_queues(); +#endif } } @@ -2636,6 +2667,23 @@ vm_page_cowsetup(vm_page_t m) return (0); } +#ifdef INVARIANTS +void +vm_page_object_lock_assert(vm_page_t m) +{ + + /* + * Certain of the page's fields may only be modified by the + * holder of the containing object's lock or the setter of the + * page's VPO_BUSY flag. Unfortunately, the setter of the + * VPO_BUSY flag is not recorded, and thus cannot be checked + * here. + */ + if (m->object != NULL && (m->oflags & VPO_BUSY) == 0) + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); +} +#endif + #include "opt_ddb.h" #ifdef DDB #include <sys/kernel.h> diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index c34d2f0..e852313 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -89,10 +89,26 @@ * and offset to which this page belongs (for pageout), * and sundry status bits. * - * Fields in this structure are locked either by the lock on the - * object that the page belongs to (O), its corresponding page lock (P), - * or by the lock on the page queues (Q). - * + * In general, operations on this structure's mutable fields are + * synchronized using either one of or a combination of the lock on the + * object that the page belongs to (O), the pool lock for the page (P), + * or the lock for either the free or paging queues (Q). If a field is + * annotated below with two of these locks, then holding either lock is + * sufficient for read access, but both locks are required for write + * access. + * + * In contrast, the synchronization of accesses to the page's dirty field + * is machine dependent (M). In the machine-independent layer, the lock + * on the object that the page belongs to must be held in order to + * operate on the field. However, the pmap layer is permitted to set + * all bits within the field without holding that lock. Therefore, if + * the underlying architecture does not support atomic read-modify-write + * operations on the field's type, then the machine-independent layer + * must also hold the page queues lock when performing read-modify-write + * operations and the pmap layer must hold the page queues lock when + * setting the field. In the machine-independent layer, the + * implementation of read-modify-write operations on the field is + * encapsulated in vm_page_clear_dirty_mask(). */ TAILQ_HEAD(pglist, vm_page); @@ -120,18 +136,19 @@ struct vm_page { u_char busy; /* page busy count (O) */ /* NOTE that these must support one bit per DEV_BSIZE in a page!!! */ /* so, on normal X86 kernels, they must be at least 8 bits wide */ + /* In reality, support for 32KB pages is not fully implemented. */ #if PAGE_SIZE == 4096 u_char valid; /* map of valid DEV_BSIZE chunks (O) */ - u_char dirty; /* map of dirty DEV_BSIZE chunks (O) */ + u_char dirty; /* map of dirty DEV_BSIZE chunks (M) */ #elif PAGE_SIZE == 8192 u_short valid; /* map of valid DEV_BSIZE chunks (O) */ - u_short dirty; /* map of dirty DEV_BSIZE chunks (O) */ + u_short dirty; /* map of dirty DEV_BSIZE chunks (M) */ #elif PAGE_SIZE == 16384 u_int valid; /* map of valid DEV_BSIZE chunks (O) */ - u_int dirty; /* map of dirty DEV_BSIZE chunks (O) */ + u_int dirty; /* map of dirty DEV_BSIZE chunks (M) */ #elif PAGE_SIZE == 32768 u_long valid; /* map of valid DEV_BSIZE chunks (O) */ - u_long dirty; /* map of dirty DEV_BSIZE chunks (O) */ + u_long dirty; /* map of dirty DEV_BSIZE chunks (M) */ #endif }; @@ -383,6 +400,13 @@ void vm_page_cowfault (vm_page_t); int vm_page_cowsetup(vm_page_t); void vm_page_cowclear (vm_page_t); +#ifdef INVARIANTS +void vm_page_object_lock_assert(vm_page_t m); +#define VM_PAGE_OBJECT_LOCK_ASSERT(m) vm_page_object_lock_assert(m) +#else +#define VM_PAGE_OBJECT_LOCK_ASSERT(m) (void)0 +#endif + /* * vm_page_sleep_if_busy: * @@ -412,6 +436,8 @@ vm_page_sleep_if_busy(vm_page_t m, int also_m_busy, const char *msg) static __inline void vm_page_undirty(vm_page_t m) { + + VM_PAGE_OBJECT_LOCK_ASSERT(m); m->dirty = 0; } diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index f497d41..a8eca20 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -1089,7 +1089,7 @@ vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *ma, int bytecount, count = bytecount / PAGE_SIZE; for (i = 0; i < count; i++) - rtvals[i] = VM_PAGER_AGAIN; + rtvals[i] = VM_PAGER_ERROR; if ((int64_t)ma[0]->pindex < 0) { printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%lx(%lx)\n", @@ -1191,3 +1191,26 @@ vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *ma, int bytecount, } return rtvals[0]; } + +void +vnode_pager_undirty_pages(vm_page_t *ma, int *rtvals, int written) +{ + vm_object_t obj; + int i, pos; + + if (written == 0) + return; + obj = ma[0]->object; + VM_OBJECT_LOCK(obj); + for (i = 0, pos = 0; pos < written; i++, pos += PAGE_SIZE) { + if (pos < trunc_page(written)) { + rtvals[i] = VM_PAGER_OK; + vm_page_undirty(ma[i]); + } else { + /* Partially written page. */ + rtvals[i] = VM_PAGER_AGAIN; + vm_page_clear_dirty(ma[i], 0, written & PAGE_MASK); + } + } + VM_OBJECT_UNLOCK(obj); +} diff --git a/sys/vm/vnode_pager.h b/sys/vm/vnode_pager.h index 88ae306..5e3d5eb 100644 --- a/sys/vm/vnode_pager.h +++ b/sys/vm/vnode_pager.h @@ -49,5 +49,8 @@ int vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *m, int count, boolean_t sync, int *rtvals); + +void vnode_pager_undirty_pages(vm_page_t *ma, int *rtvals, int written); + #endif /* _KERNEL */ #endif /* _VNODE_PAGER_ */ diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c index e188d62..44e3a2e 100644 --- a/sys/x86/x86/local_apic.c +++ b/sys/x86/x86/local_apic.c @@ -1192,7 +1192,7 @@ lapic_handle_timer(struct trapframe *frame) * and unlike other schedulers it actually schedules threads to * those CPUs. */ - if ((hlt_cpus_mask & (1 << PCPU_GET(cpuid))) != 0) + if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask)) return; #endif diff --git a/sys/x86/x86/tsc.c b/sys/x86/x86/tsc.c index d82bd73..9501eee 100644 --- a/sys/x86/x86/tsc.c +++ b/sys/x86/x86/tsc.c @@ -79,7 +79,8 @@ static void tsc_freq_changed(void *arg, const struct cf_level *level, int status); static void tsc_freq_changing(void *arg, const struct cf_level *level, int *status); -static unsigned tsc_get_timecount(struct timecounter *tc); +static unsigned tsc_get_timecount(struct timecounter *tc); +static unsigned tsc_get_timecount_low(struct timecounter *tc); static void tsc_levels_changed(void *arg, int unit); static struct timecounter tsc_timecounter = { @@ -166,9 +167,6 @@ tsc_freq_vmware(void) tsc_freq = regs[0] | ((uint64_t)regs[1] << 32); } tsc_is_invariant = 1; -#ifdef SMP - smp_tsc = 1; /* XXX */ -#endif return (1); } @@ -385,7 +383,29 @@ test_smp_tsc(void) if (bootverbose) printf("SMP: %sed TSC synchronization test\n", smp_tsc ? "pass" : "fail"); - return (smp_tsc ? 800 : -100); + if (smp_tsc && tsc_is_invariant) { + switch (cpu_vendor_id) { + case CPU_VENDOR_AMD: + /* + * Starting with Family 15h processors, TSC clock + * source is in the north bridge. Check whether + * we have a single-socket/multi-core platform. + * XXX Need more work for complex cases. + */ + if (CPUID_TO_FAMILY(cpu_id) < 0x15 || + (amd_feature2 & AMDID2_CMP) == 0 || + smp_cpus > (cpu_procinfo2 & AMDID_CMP_CORES) + 1) + break; + return (1000); + case CPU_VENDOR_INTEL: + /* + * XXX Assume Intel platforms have synchronized TSCs. + */ + return (1000); + } + return (800); + } + return (-100); } #undef N @@ -395,11 +415,19 @@ test_smp_tsc(void) static void init_TSC_tc(void) { + uint64_t max_freq; + int shift; if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled) return; /* + * Limit timecounter frequency to fit in an int and prevent it from + * overflowing too fast. + */ + max_freq = UINT_MAX; + + /* * We can not use the TSC if we support APM. Precise timekeeping * on an APM'ed machine is at best a fools pursuit, since * any and all of the time spent in various SMM code can't @@ -421,13 +449,30 @@ init_TSC_tc(void) * We can not use the TSC in SMP mode unless the TSCs on all CPUs are * synchronized. If the user is sure that the system has synchronized * TSCs, set kern.timecounter.smp_tsc tunable to a non-zero value. + * We also limit the frequency even lower to avoid "temporal anomalies" + * as much as possible. */ - if (smp_cpus > 1) + if (smp_cpus > 1) { tsc_timecounter.tc_quality = test_smp_tsc(); + max_freq >>= 8; + } else #endif + if (tsc_is_invariant) + tsc_timecounter.tc_quality = 1000; + init: + for (shift = 0; shift < 31 && (tsc_freq >> shift) > max_freq; shift++) + ; + if (shift > 0) { + tsc_timecounter.tc_get_timecount = tsc_get_timecount_low; + tsc_timecounter.tc_name = "TSC-low"; + if (bootverbose) + printf("TSC timecounter discards lower %d bit(s)\n", + shift); + } if (tsc_freq != 0) { - tsc_timecounter.tc_frequency = tsc_freq; + tsc_timecounter.tc_frequency = tsc_freq >> shift; + tsc_timecounter.tc_priv = (void *)(intptr_t)shift; tc_init(&tsc_timecounter); } } @@ -499,7 +544,8 @@ tsc_freq_changed(void *arg, const struct cf_level *level, int status) /* Total setting for this level gives the new frequency in MHz. */ freq = (uint64_t)level->total_set.freq * 1000000; atomic_store_rel_64(&tsc_freq, freq); - atomic_store_rel_64(&tsc_timecounter.tc_frequency, freq); + tsc_timecounter.tc_frequency = + freq >> (int)(intptr_t)tsc_timecounter.tc_priv; } static int @@ -514,7 +560,8 @@ sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS) error = sysctl_handle_64(oidp, &freq, 0, req); if (error == 0 && req->newptr != NULL) { atomic_store_rel_64(&tsc_freq, freq); - atomic_store_rel_64(&tsc_timecounter.tc_frequency, freq); + atomic_store_rel_64(&tsc_timecounter.tc_frequency, + freq >> (int)(intptr_t)tsc_timecounter.tc_priv); } return (error); } @@ -523,8 +570,18 @@ SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_U64 | CTLFLAG_RW, 0, 0, sysctl_machdep_tsc_freq, "QU", "Time Stamp Counter frequency"); static u_int -tsc_get_timecount(struct timecounter *tc) +tsc_get_timecount(struct timecounter *tc __unused) { return (rdtsc32()); } + +static u_int +tsc_get_timecount_low(struct timecounter *tc) +{ + uint32_t rv; + + __asm __volatile("rdtsc; shrd %%cl, %%edx, %0" + : "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx"); + return (rv); +} diff --git a/sys/xen/interface/io/xenbus.h b/sys/xen/interface/io/xenbus.h index 5e24f31..672c7d4 100644 --- a/sys/xen/interface/io/xenbus.h +++ b/sys/xen/interface/io/xenbus.h @@ -64,6 +64,15 @@ enum xenbus_state { /* * Closed: No connection exists between front and back end. + * + * For backend devices with the "online" attribute, the front can + * request a reconnect at any time. To handle this transition + * gracefully, backend devices must reinitialize any XenStore data + * used to negotiate features with a peer before transitioning to + * the closed state. When a reconnect request occurs, the + * XenBus backend support code will automatically transition the + * backend device from Closed to InitWait, kicking off the ring + * and feature negotiation process. */ XenbusStateClosed = 6, diff --git a/sys/xen/xenbus/xenbus.c b/sys/xen/xenbus/xenbus.c index c3e5fee..8887066 100644 --- a/sys/xen/xenbus/xenbus.c +++ b/sys/xen/xenbus/xenbus.c @@ -103,12 +103,13 @@ xenbus_strstate(XenbusState state) int xenbus_watch_path(device_t dev, char *path, struct xs_watch *watch, - xs_watch_cb_t *callback) + xs_watch_cb_t *callback, uintptr_t callback_data) { int error; watch->node = path; watch->callback = callback; + watch->callback_data = callback_data; error = xs_register_watch(watch); @@ -124,7 +125,7 @@ xenbus_watch_path(device_t dev, char *path, struct xs_watch *watch, int xenbus_watch_path2(device_t dev, const char *path, const char *path2, struct xs_watch *watch, - xs_watch_cb_t *callback) + xs_watch_cb_t *callback, uintptr_t callback_data) { int error; char *state = malloc(strlen(path) + 1 + strlen(path2) + 1, @@ -134,7 +135,7 @@ xenbus_watch_path2(device_t dev, const char *path, strcat(state, "/"); strcat(state, path2); - error = xenbus_watch_path(dev, state, watch, callback); + error = xenbus_watch_path(dev, state, watch, callback, callback_data); if (error) { free(state,M_XENBUS); } @@ -286,3 +287,8 @@ xenbus_dev_is_online(device_t dev) return (value); } + +void +xenbus_localend_changed(device_t dev, const char *path) +{ +} diff --git a/sys/xen/xenbus/xenbus_if.m b/sys/xen/xenbus/xenbus_if.m index d671418..87d7c7f 100644 --- a/sys/xen/xenbus/xenbus_if.m +++ b/sys/xen/xenbus/xenbus_if.m @@ -27,7 +27,11 @@ # #include <sys/bus.h> -#include <xen/interface/io/xenbus.h> + +#include <machine/atomic.h> +#include <machine/xen/xen-os.h> +#include <xen/evtchn.h> +#include <xen/xenbus/xenbusvar.h> INTERFACE xenbus; @@ -39,7 +43,21 @@ INTERFACE xenbus; * state has changed.. * \param _newstate The new state of the otherend device. */ -METHOD int otherend_changed { +METHOD void otherend_changed { device_t _dev; enum xenbus_state _newstate; }; + +/** + * \brief Callback triggered when the XenStore tree of the local end + * of a split device changes. + * + * \param _dev NewBus device_t for this XenBus device whose otherend's + * state has changed.. + * \param _path The tree relative sub-path to the modified node. The empty + * string indicates the root of the tree was destroyed. + */ +METHOD void localend_changed { + device_t _dev; + const char * _path; +} DEFAULT xenbus_localend_changed; diff --git a/sys/xen/xenbus/xenbusb.c b/sys/xen/xenbus/xenbusb.c index 4bc86aa..cc519c5 100644 --- a/sys/xen/xenbus/xenbusb.c +++ b/sys/xen/xenbus/xenbusb.c @@ -90,10 +90,16 @@ xenbusb_free_child_ivars(struct xenbus_device_ivars *ivars) ivars->xd_otherend_watch.node = NULL; } + if (ivars->xd_local_watch.node != NULL) { + xs_unregister_watch(&ivars->xd_local_watch); + ivars->xd_local_watch.node = NULL; + } + if (ivars->xd_node != NULL) { free(ivars->xd_node, M_XENBUS); ivars->xd_node = NULL; } + ivars->xd_node_len = 0; if (ivars->xd_type != NULL) { free(ivars->xd_type, M_XENBUS); @@ -104,6 +110,7 @@ xenbusb_free_child_ivars(struct xenbus_device_ivars *ivars) free(ivars->xd_otherend_path, M_XENBUS); ivars->xd_otherend_path = NULL; } + ivars->xd_otherend_path_len = 0; free(ivars, M_XENBUS); } @@ -121,30 +128,64 @@ xenbusb_free_child_ivars(struct xenbus_device_ivars *ivars) * watch event data. The vector should be indexed via the * xs_watch_type enum in xs_wire.h. * \param vec_size The number of elements in vec. - * - * \return The device_t of the found device if any, or NULL. - * - * \note device_t is a pointer type, so it can be compared against - * NULL for validity. */ static void -xenbusb_otherend_changed(struct xs_watch *watch, const char **vec, +xenbusb_otherend_watch_cb(struct xs_watch *watch, const char **vec, unsigned int vec_size __unused) { struct xenbus_device_ivars *ivars; - device_t dev; + device_t child; + device_t bus; + const char *path; enum xenbus_state newstate; - ivars = (struct xenbus_device_ivars *) watch; - dev = ivars->xd_dev; + ivars = (struct xenbus_device_ivars *)watch->callback_data; + child = ivars->xd_dev; + bus = device_get_parent(child); - if (!ivars->xd_otherend_path - || strncmp(ivars->xd_otherend_path, vec[XS_WATCH_PATH], - strlen(ivars->xd_otherend_path))) + path = vec[XS_WATCH_PATH]; + if (ivars->xd_otherend_path == NULL + || strncmp(ivars->xd_otherend_path, path, ivars->xd_otherend_path_len)) return; newstate = xenbus_read_driver_state(ivars->xd_otherend_path); - XENBUS_OTHEREND_CHANGED(dev, newstate); + XENBUSB_OTHEREND_CHANGED(bus, child, newstate); +} + +/** + * XenBus watch callback registered against the XenStore sub-tree + * represnting the local half of a split device connection. + * + * This callback is invoked whenever any XenStore data in the subtree + * is modified, either by us or another privledged domain. + * + * \param watch The xs_watch object used to register this callback + * function. + * \param vec An array of pointers to NUL terminated strings containing + * watch event data. The vector should be indexed via the + * xs_watch_type enum in xs_wire.h. + * \param vec_size The number of elements in vec. + * + */ +static void +xenbusb_local_watch_cb(struct xs_watch *watch, const char **vec, + unsigned int vec_size __unused) +{ + struct xenbus_device_ivars *ivars; + device_t child; + device_t bus; + const char *path; + + ivars = (struct xenbus_device_ivars *)watch->callback_data; + child = ivars->xd_dev; + bus = device_get_parent(child); + + path = vec[XS_WATCH_PATH]; + if (ivars->xd_node == NULL + || strncmp(ivars->xd_node, path, ivars->xd_node_len)) + return; + + XENBUSB_LOCALEND_CHANGED(bus, child, &path[ivars->xd_node_len]); } /** @@ -193,12 +234,14 @@ xenbusb_delete_child(device_t dev, device_t child) /* * We no longer care about the otherend of the - * connection. Cancel the watch now so that we + * connection. Cancel the watches now so that we * don't try to handle an event for a partially * detached child. */ if (ivars->xd_otherend_watch.node != NULL) xs_unregister_watch(&ivars->xd_otherend_watch); + if (ivars->xd_local_watch.node != NULL) + xs_unregister_watch(&ivars->xd_local_watch); device_delete_child(dev, child); xenbusb_free_child_ivars(ivars); @@ -421,6 +464,7 @@ xenbusb_probe_children(device_t dev) */ ivars = device_get_ivars(kids[i]); xs_register_watch(&ivars->xd_otherend_watch); + xs_register_watch(&ivars->xd_local_watch); } free(kids, M_TEMP); } @@ -475,7 +519,7 @@ xenbusb_devices_changed(struct xs_watch *watch, const char **vec, char *p; u_int component; - xbs = (struct xenbusb_softc *)watch; + xbs = (struct xenbusb_softc *)watch->callback_data; dev = xbs->xbs_dev; if (len <= XS_WATCH_PATH) { @@ -620,6 +664,7 @@ xenbusb_add_device(device_t dev, const char *type, const char *id) sx_init(&ivars->xd_lock, "xdlock"); ivars->xd_flags = XDF_CONNECTING; ivars->xd_node = strdup(devpath, M_XENBUS); + ivars->xd_node_len = strlen(devpath); ivars->xd_type = strdup(type, M_XENBUS); ivars->xd_state = XenbusStateInitialising; @@ -630,12 +675,16 @@ xenbusb_add_device(device_t dev, const char *type, const char *id) goto out; } - statepath = malloc(strlen(ivars->xd_otherend_path) + statepath = malloc(ivars->xd_otherend_path_len + strlen("/state") + 1, M_XENBUS, M_WAITOK); sprintf(statepath, "%s/state", ivars->xd_otherend_path); - ivars->xd_otherend_watch.node = statepath; - ivars->xd_otherend_watch.callback = xenbusb_otherend_changed; + ivars->xd_otherend_watch.callback = xenbusb_otherend_watch_cb; + ivars->xd_otherend_watch.callback_data = (uintptr_t)ivars; + + ivars->xd_local_watch.node = ivars->xd_node; + ivars->xd_local_watch.callback = xenbusb_local_watch_cb; + ivars->xd_local_watch.callback_data = (uintptr_t)ivars; mtx_lock(&xbs->xbs_lock); xbs->xbs_connecting_children++; @@ -693,6 +742,7 @@ xenbusb_attach(device_t dev, char *bus_node, u_int id_components) xbs->xbs_device_watch.node = bus_node; xbs->xbs_device_watch.callback = xenbusb_devices_changed; + xbs->xbs_device_watch.callback_data = (uintptr_t)xbs; TASK_INIT(&xbs->xbs_probe_children, 0, xenbusb_probe_children_cb, dev); @@ -735,7 +785,7 @@ xenbusb_resume(device_t dev) DEVICE_RESUME(kids[i]); - statepath = malloc(strlen(ivars->xd_otherend_path) + statepath = malloc(ivars->xd_otherend_path_len + strlen("/state") + 1, M_XENBUS, M_WAITOK); sprintf(statepath, "%s/state", ivars->xd_otherend_path); @@ -819,7 +869,7 @@ xenbusb_write_ivar(device_t dev, device_t child, int index, uintptr_t value) { int error; - newstate = (enum xenbus_state) value; + newstate = (enum xenbus_state)value; sx_xlock(&ivars->xd_lock); if (ivars->xd_state == newstate) { error = 0; @@ -876,3 +926,24 @@ xenbusb_write_ivar(device_t dev, device_t child, int index, uintptr_t value) return (ENOENT); } + +void +xenbusb_otherend_changed(device_t bus, device_t child, enum xenbus_state state) +{ + XENBUS_OTHEREND_CHANGED(child, state); +} + +void +xenbusb_localend_changed(device_t bus, device_t child, const char *path) +{ + + if (strcmp(path, "/state") != 0) { + struct xenbus_device_ivars *ivars; + + ivars = device_get_ivars(child); + sx_xlock(&ivars->xd_lock); + ivars->xd_state = xenbus_read_driver_state(ivars->xd_node); + sx_xunlock(&ivars->xd_lock); + } + XENBUS_LOCALEND_CHANGED(child, path); +} diff --git a/sys/xen/xenbus/xenbusb.h b/sys/xen/xenbus/xenbusb.h index 75abb98..33008f7 100644 --- a/sys/xen/xenbus/xenbusb.h +++ b/sys/xen/xenbus/xenbusb.h @@ -41,7 +41,6 @@ * Datastructures and function declarations for use in implementing * bus attachements (e.g. frontend and backend device busses) for XenBus. */ -#include "xenbusb_if.h" /** * Enumeration of state flag values for the xbs_flags field of @@ -61,10 +60,6 @@ struct xenbusb_softc { * XenStore watch used to monitor the subtree of the * XenStore where devices for this bus attachment arrive * and depart. - * - * \note This field must be the first in the softc structure - * so that a simple cast can be used to retrieve the - * softc from within a XenStore watch event callback. */ struct xs_watch xbs_device_watch; @@ -129,14 +124,17 @@ struct xenbus_device_ivars { * XenStore watch used to monitor the subtree of the * XenStore where information about the otherend of * the split Xen device this device instance represents. - * - * \note This field must be the first in the instance - * variable structure so that a simple cast can be - * used to retrieve ivar data from within a XenStore - * watch event callback. */ struct xs_watch xd_otherend_watch; + /** + * XenStore watch used to monitor the XenStore sub-tree + * associated with this device. This watch will fire + * for modifications that we make from our domain as + * well as for those made by the control domain. + */ + struct xs_watch xd_local_watch; + /** Sleepable lock used to protect instance data. */ struct sx xd_lock; @@ -152,6 +150,9 @@ struct xenbus_device_ivars { */ char *xd_node; + /** The length of xd_node. */ + int xd_node_len; + /** XenBus device type ("vbd", "vif", etc.). */ char *xd_type; @@ -168,6 +169,9 @@ struct xenbus_device_ivars { * about the otherend of this split device instance. */ char *xd_otherend_path; + + /** The length of xd_otherend_path. */ + int xd_otherend_path_len; }; /** @@ -247,6 +251,26 @@ int xenbusb_write_ivar(device_t dev, device_t child, int index, uintptr_t value); /** + * \brief Common XenBus method implementing responses to peer state changes. + * + * \param bus The XenBus bus parent of child. + * \param child The XenBus child whose peer stat has changed. + * \param state The current state of the peer. + */ +void xenbusb_otherend_changed(device_t bus, device_t child, + enum xenbus_state state); + +/** + * \brief Common XenBus method implementing responses to local XenStore changes. + * + * \param bus The XenBus bus parent of child. + * \param child The XenBus child whose peer stat has changed. + * \param path The tree relative sub-path to the modified node. The empty + * string indicates the root of the tree was destroyed. + */ +void xenbusb_localend_changed(device_t bus, device_t child, const char *path); + +/** * \brief Attempt to add a XenBus device instance to this XenBus bus. * * \param dev The NewBus device representing this XenBus bus. @@ -269,4 +293,6 @@ int xenbusb_write_ivar(device_t dev, device_t child, int index, */ int xenbusb_add_device(device_t dev, const char *type, const char *id); +#include "xenbusb_if.h" + #endif /* _XEN_XENBUS_XENBUSB_H */ diff --git a/sys/xen/xenbus/xenbusb_back.c b/sys/xen/xenbus/xenbusb_back.c index 32bbc04..1252abe 100644 --- a/sys/xen/xenbus/xenbusb_back.c +++ b/sys/xen/xenbus/xenbusb_back.c @@ -208,57 +208,79 @@ xenbusb_back_get_otherend_node(device_t dev, struct xenbus_device_ivars *ivars) if (error == 0) { ivars->xd_otherend_path = strdup(otherend_path, M_XENBUS); + ivars->xd_otherend_path_len = strlen(otherend_path); free(otherend_path, M_XENSTORE); } return (error); } /** - * \brief Backend XenBus child instance variable write access method. - * - * \param dev The NewBus device representing this XenBus bus. - * \param child The NewBus device representing a child of dev%'s XenBus bus. - * \param index The index of the instance variable to access. - * \param value The new value to set in the instance variable accessed. - * - * \return On success, 0. Otherwise an errno value indicating the - * type of failure. - * - * Xenbus_back overrides this method so that it can trap state transitions - * of local backend devices and clean up their XenStore entries as necessary - * during device instance teardown. + * \brief Backend XenBus method implementing responses to peer state changes. + * + * \param bus The XenBus bus parent of child. + * \param child The XenBus child whose peer stat has changed. + * \param state The current state of the peer. */ -static int -xenbusb_back_write_ivar(device_t dev, device_t child, int index, - uintptr_t value) +static void +xenbusb_back_otherend_changed(device_t bus, device_t child, + enum xenbus_state peer_state) { - int error; + /* Perform default processing of state. */ + xenbusb_otherend_changed(bus, child, peer_state); - error = xenbusb_write_ivar(dev, child, index, value); + /* + * "Online" devices are never fully detached in the + * newbus sense. Only the front<->back connection is + * torn down. If the front returns to the initialising + * state after closing a previous connection, signal + * our willingness to reconnect and that all necessary + * XenStore data for feature negotiation is present. + */ + if (peer_state == XenbusStateInitialising + && xenbus_dev_is_online(child) != 0 + && xenbus_get_state(child) == XenbusStateClosed) + xenbus_set_state(child, XenbusStateInitWait); +} - if (index == XENBUS_IVAR_STATE - && (enum xenbus_state)value == XenbusStateClosed - && xenbus_dev_is_online(child) == 0) { +/** + * \brief Backend XenBus method implementing responses to local + * XenStore changes. + * + * \param bus The XenBus bus parent of child. + * \param child The XenBus child whose peer stat has changed. + * \param_path The tree relative sub-path to the modified node. The empty + * string indicates the root of the tree was destroyed. + */ +static void +xenbusb_back_localend_changed(device_t bus, device_t child, const char *path) +{ - /* - * Cleanup the hotplug entry in the XenStore if - * present. The control domain expects any userland - * component associated with this device to destroy - * this node in order to signify it is safe to - * teardown the device. However, not all backends - * rely on userland components, and those that - * do should either use a communication channel - * other than the XenStore, or ensure the hotplug - * data is already cleaned up. - * - * This removal ensures that no matter what path - * is taken to mark a back-end closed, the control - * domain will understand that it is closed. - */ - xs_rm(XST_NIL, xenbus_get_node(child), "hotplug-status"); - } + xenbusb_localend_changed(bus, child, path); - return (error); + if (strcmp(path, "/state") != 0 + && strcmp(path, "/online") != 0) + return; + + if (xenbus_get_state(child) != XenbusStateClosed + || xenbus_dev_is_online(child) != 0) + return; + + /* + * Cleanup the hotplug entry in the XenStore if + * present. The control domain expects any userland + * component associated with this device to destroy + * this node in order to signify it is safe to + * teardown the device. However, not all backends + * rely on userland components, and those that + * do should either use a communication channel + * other than the XenStore, or ensure the hotplug + * data is already cleaned up. + * + * This removal ensures that no matter what path + * is taken to mark a back-end closed, the control + * domain will understand that it is closed. + */ + xs_rm(XST_NIL, xenbus_get_node(child), "hotplug-status"); } /*-------------------- Private Device Attachment Data -----------------------*/ @@ -275,7 +297,7 @@ static device_method_t xenbusb_back_methods[] = { /* Bus Interface */ DEVMETHOD(bus_print_child, xenbusb_print_child), DEVMETHOD(bus_read_ivar, xenbusb_read_ivar), - DEVMETHOD(bus_write_ivar, xenbusb_back_write_ivar), + DEVMETHOD(bus_write_ivar, xenbusb_write_ivar), DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource), DEVMETHOD(bus_release_resource, bus_generic_release_resource), DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), @@ -284,6 +306,8 @@ static device_method_t xenbusb_back_methods[] = { /* XenBus Bus Interface */ DEVMETHOD(xenbusb_enumerate_type, xenbusb_back_enumerate_type), DEVMETHOD(xenbusb_get_otherend_node, xenbusb_back_get_otherend_node), + DEVMETHOD(xenbusb_otherend_changed, xenbusb_back_otherend_changed), + DEVMETHOD(xenbusb_localend_changed, xenbusb_back_localend_changed), { 0, 0 } }; diff --git a/sys/xen/xenbus/xenbusb_front.c b/sys/xen/xenbus/xenbusb_front.c index 0bc06a4..b4e470e 100644 --- a/sys/xen/xenbus/xenbusb_front.c +++ b/sys/xen/xenbus/xenbusb_front.c @@ -156,6 +156,7 @@ xenbusb_front_get_otherend_node(device_t dev, struct xenbus_device_ivars *ivars) if (error == 0) { ivars->xd_otherend_path = strdup(otherend_path, M_XENBUS); + ivars->xd_otherend_path_len = strlen(otherend_path); free(otherend_path, M_XENSTORE); } return (error); diff --git a/sys/xen/xenbus/xenbusb_if.m b/sys/xen/xenbus/xenbusb_if.m index a32e3f6..c49f333 100644 --- a/sys/xen/xenbus/xenbusb_if.m +++ b/sys/xen/xenbus/xenbusb_if.m @@ -31,10 +31,12 @@ # #include <sys/bus.h> +#include <sys/lock.h> +#include <sys/sx.h> +#include <sys/taskqueue.h> -HEADER { -struct xenbus_device_ivars; -} +#include <xen/xenstore/xenstorevar.h> +#include <xen/xenbus/xenbusb.h> INTERFACE xenbusb; @@ -76,3 +78,34 @@ METHOD int get_otherend_node { device_t _dev; struct xenbus_device_ivars *_ivars; } + +/** + * \brief Handle a XenStore change detected in the peer tree of a child + * device of the bus. + * + * \param _bus NewBus device_t for this XenBus (front/back) bus instance. + * \param _child NewBus device_t for the child device whose peer XenStore + * tree has changed. + * \param _state The current state of the peer. + */ +METHOD void otherend_changed { + device_t _bus; + device_t _child; + enum xenbus_state _state; +} DEFAULT xenbusb_otherend_changed; + +/** + * \brief Handle a XenStore change detected in the local tree of a child + * device of the bus. + * + * \param _bus NewBus device_t for this XenBus (front/back) bus instance. + * \param _child NewBus device_t for the child device whose peer XenStore + * tree has changed. + * \param _path The tree relative sub-path to the modified node. The empty + * string indicates the root of the tree was destroyed. + */ +METHOD void localend_changed { + device_t _bus; + device_t _child; + const char * _path; +} DEFAULT xenbusb_localend_changed; diff --git a/sys/xen/xenbus/xenbusvar.h b/sys/xen/xenbus/xenbusvar.h index 55d7f29..bf2a342 100644 --- a/sys/xen/xenbus/xenbusvar.h +++ b/sys/xen/xenbus/xenbusvar.h @@ -51,8 +51,6 @@ #include <xen/xenstore/xenstorevar.h> -#include "xenbus_if.h" - /* XenBus allocations including XenStore data returned to clients. */ MALLOC_DECLARE(M_XENBUS); @@ -116,6 +114,8 @@ XenbusState xenbus_read_driver_state(const char *path); * must be stable for the lifetime of the watch. * \param callback The function to call when XenStore objects at or below * path are modified. + * \param cb_data Client data that can be retrieved from the watch object + * during the callback. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. @@ -126,7 +126,8 @@ XenbusState xenbus_read_driver_state(const char *path); */ int xenbus_watch_path(device_t dev, char *path, struct xs_watch *watch, - xs_watch_cb_t *callback); + xs_watch_cb_t *callback, + uintptr_t cb_data); /** * Initialize and register a watch at path/path2 in the XenStore. @@ -138,6 +139,8 @@ int xenbus_watch_path(device_t dev, char *path, * must be stable for the lifetime of the watch. * \param callback The function to call when XenStore objects at or below * path are modified. + * \param cb_data Client data that can be retrieved from the watch object + * during the callback. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. @@ -153,7 +156,8 @@ int xenbus_watch_path(device_t dev, char *path, */ int xenbus_watch_path2(device_t dev, const char *path, const char *path2, struct xs_watch *watch, - xs_watch_cb_t *callback); + xs_watch_cb_t *callback, + uintptr_t cb_data); /** * Grant access to the given ring_mfn to the peer of the given device. @@ -275,4 +279,16 @@ const char *xenbus_strstate(enum xenbus_state state); */ int xenbus_dev_is_online(device_t dev); +/** + * Default callback invoked when a change to the local XenStore sub-tree + * for a device is modified. + * + * \param dev The XenBus device whose tree was modified. + * \param path The tree relative sub-path to the modified node. The empty + * string indicates the root of the tree was destroyed. + */ +void xenbus_localend_changed(device_t dev, const char *path); + +#include "xenbus_if.h" + #endif /* _XEN_XENBUS_XENBUSVAR_H */ diff --git a/sys/xen/xenstore/xenstorevar.h b/sys/xen/xenstore/xenstorevar.h index df41e31..4a1382d 100644 --- a/sys/xen/xenstore/xenstorevar.h +++ b/sys/xen/xenstore/xenstorevar.h @@ -56,8 +56,8 @@ struct xenstore_domain_interface; struct xs_watch; extern struct xenstore_domain_interface *xen_store; -typedef void (xs_watch_cb_t)(struct xs_watch *, - const char **vec, unsigned int len); +typedef void (xs_watch_cb_t)(struct xs_watch *, const char **vec, + unsigned int len); /* Register callback to watch subtree (node) in the XenStore. */ struct xs_watch @@ -69,6 +69,9 @@ struct xs_watch /* Callback (executed in a process context with no locks held). */ xs_watch_cb_t *callback; + + /* Callback client data untouched by the XenStore watch mechanism. */ + uintptr_t callback_data; }; LIST_HEAD(xs_watch_list, xs_watch); @@ -301,7 +304,7 @@ int xs_gather(struct xs_transaction t, const char *dir, ...); * XenStore watches allow a client to be notified via a callback (embedded * within the watch object) of changes to an object in the XenStore. * - * \param watch A xenbus_watch struct with it's node and callback fields + * \param watch An xs_watch struct with it's node and callback fields * properly initialized. * * \return On success, 0. Otherwise an errno value indicating the |