diff options
Diffstat (limited to 'sys')
339 files changed, 20254 insertions, 3033 deletions
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index ff47afb..952539f 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -87,7 +87,6 @@ END(cpu_throw) ENTRY(cpu_switch) /* Switch to new thread. First, save context. */ movq TD_PCB(%rdi),%r8 - orl $PCB_FULL_IRET,PCB_FLAGS(%r8) movq (%rsp),%rax /* Hardware registers */ movq %r15,PCB_R15(%r8) @@ -99,6 +98,30 @@ ENTRY(cpu_switch) movq %rbx,PCB_RBX(%r8) movq %rax,PCB_RIP(%r8) + testl $PCB_FULL_IRET,PCB_FLAGS(%r8) + jnz 2f + orl $PCB_FULL_IRET,PCB_FLAGS(%r8) + testl $TDP_KTHREAD,TD_PFLAGS(%rdi) + jnz 2f + testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) + jz 2f + movl %fs,%eax + cmpl $KUF32SEL,%eax + jne 1f + rdfsbase %rax + movq %rax,PCB_FSBASE(%r8) +1: movl %gs,%eax + cmpl $KUG32SEL,%eax + jne 2f + movq %rdx,%r12 + movl $MSR_KGSBASE,%ecx /* Read user gs base */ + rdmsr + shlq $32,%rdx + orq %rdx,%rax + movq %rax,PCB_GSBASE(%r8) + movq %r12,%rdx + +2: testl $PCB_DBREGS,PCB_FLAGS(%r8) jnz store_dr /* static predict not taken */ done_store_dr: @@ -149,7 +172,7 @@ sw1: * to load up the rest of the next context. */ - /* Skip loading user fsbase/gsbase for kthreads */ + /* Skip loading LDT and user fsbase/gsbase for kthreads */ testl $TDP_KTHREAD,TD_PFLAGS(%r12) jnz do_kthread diff --git a/sys/amd64/amd64/elf_machdep.c b/sys/amd64/amd64/elf_machdep.c index ca07adc..4e9476a 100644 --- a/sys/amd64/amd64/elf_machdep.c +++ b/sys/amd64/amd64/elf_machdep.c @@ -84,6 +84,25 @@ struct sysentvec elf64_freebsd_sysvec = { }; INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec); +void +amd64_lower_shared_page(struct sysentvec *sv) +{ + if (hw_lower_amd64_sharedpage != 0) { + sv->sv_maxuser -= PAGE_SIZE; + sv->sv_shared_page_base -= PAGE_SIZE; + sv->sv_usrstack -= PAGE_SIZE; + sv->sv_psstrings -= PAGE_SIZE; + } +} + +/* + * Do this fixup before INIT_SYSENTVEC (SI_ORDER_ANY) because the latter + * uses the value of sv_shared_page_base. + */ +SYSINIT(elf64_sysvec_fixup, SI_SUB_EXEC, SI_ORDER_FIRST, + (sysinit_cfunc_t) amd64_lower_shared_page, + &elf64_freebsd_sysvec); + static Elf64_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_X86_64, diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S index 39ecfe2..e3afe22 100644 --- a/sys/amd64/amd64/exception.S +++ b/sys/amd64/amd64/exception.S @@ -387,8 +387,24 @@ prot_addrf: je 5f /* kernel but with user gsbase!! */ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 6f /* already running with kernel GS.base */ - swapgs + testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) + jz 2f + cmpw $KUF32SEL,TF_FS(%rsp) + jne 1f + rdfsbase %rax +1: cmpw $KUG32SEL,TF_GS(%rsp) + jne 2f + rdgsbase %rdx +2: swapgs movq PCPU(CURPCB),%rdi + testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) + jz 4f + cmpw $KUF32SEL,TF_FS(%rsp) + jne 3f + movq %rax,PCB_FSBASE(%rdi) +3: cmpw $KUG32SEL,TF_GS(%rsp) + jne 4f + movq %rdx,PCB_GSBASE(%rdi) 4: call handle_ibrs_entry orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */ movw %es,TF_ES(%rsp) @@ -408,8 +424,8 @@ prot_addrf: * pointer. We have to juggle a few things around to find our stack etc. * swapgs gives us access to our PCPU space only. * - * We do not support invoking this from a custom %cs or %ss (e.g. using - * entries from an LDT). + * We do not support invoking this from a custom segment registers, + * esp. %cs, %ss, %fs, %gs, e.g. using entries from an LDT. */ SUPERALIGN_TEXT IDTVEC(fast_syscall_pti) @@ -711,6 +727,19 @@ nmi_fromuserspace: testq %rdi,%rdi jz 3f orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) + testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) + jz 3f + cmpw $KUF32SEL,TF_FS(%rsp) + jne 2f + rdfsbase %rax + movq %rax,PCB_FSBASE(%rdi) +2: cmpw $KUG32SEL,TF_GS(%rsp) + jne 3f + movl $MSR_KGSBASE,%ecx + rdmsr + shlq $32,%rdx + orq %rdx,%rax + movq %rax,PCB_GSBASE(%rdi) 3: /* Note: this label is also used by ddb and gdb: */ nmi_calltrap: @@ -1023,6 +1052,7 @@ doreti_exit: jz ld_regs testl $PCB_FULL_IRET,PCB_FLAGS(%r8) jz ld_regs + andl $~PCB_FULL_IRET,PCB_FLAGS(%r8) testl $TF_HASSEGS,TF_FLAGS(%rsp) je set_segs diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 4c70e88..540966b 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -806,6 +806,7 @@ fpusetregs(struct thread *td, struct savefpu *addr, char *xfpustate, struct pcb *pcb; int error; + addr->sv_env.en_mxcsr &= cpu_mxcsr_mask; pcb = td->td_pcb; critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c index 2818111..de3e461 100644 --- a/sys/amd64/amd64/initcpu.c +++ b/sys/amd64/amd64/initcpu.c @@ -48,6 +48,11 @@ __FBSDID("$FreeBSD$"); static int hw_instruction_sse; SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD, &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU"); +static int lower_sharedpage_init; +int hw_lower_amd64_sharedpage; +SYSCTL_INT(_hw, OID_AUTO, lower_amd64_sharedpage, CTLFLAG_RDTUN, + &hw_lower_amd64_sharedpage, 0, + "Lower sharedpage to work around Ryzen issue with executing code near the top of user memory"); /* * -1: automatic (default) * 0: keep enable CLFLUSH @@ -122,6 +127,28 @@ init_amd(void) wrmsr(0xc0011020, msr); } } + + /* + * Work around a problem on Ryzen that is triggered by executing + * code near the top of user memory, in our case the signal + * trampoline code in the shared page on amd64. + * + * This function is executed once for the BSP before tunables take + * effect so the value determined here can be overridden by the + * tunable. This function is then executed again for each AP and + * also on resume. Set a flag the first time so that value set by + * the tunable is not overwritten. + * + * The stepping and/or microcode versions should be checked after + * this issue is fixed by AMD so that we don't use this mode if not + * needed. + */ + if (lower_sharedpage_init == 0) { + lower_sharedpage_init = 1; + if (CPUID_TO_FAMILY(cpu_id) == 0x17) { + hw_lower_amd64_sharedpage = 1; + } + } } /* diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 3419dea..ab48566 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -382,6 +382,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); fpstate_drop(td); + update_pcb_bases(pcb); sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase; sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase; bzero(sf.sf_uc.uc_mcontext.mc_spare, @@ -452,7 +453,6 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; - set_pcb_flags(pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } @@ -558,6 +558,7 @@ sys_sigreturn(td, uap) return (ret); } bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs)); + update_pcb_bases(pcb); pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase; pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase; @@ -569,7 +570,6 @@ sys_sigreturn(td, uap) #endif kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); - set_pcb_flags(pcb, PCB_FULL_IRET); return (EJUSTRETURN); } @@ -597,11 +597,11 @@ exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) else mtx_unlock(&dt_lock); + update_pcb_bases(pcb); pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; clear_pcb_flags(pcb, PCB_32BIT); pcb->pcb_initial_fpucw = __INITIAL_FPUCW__; - set_pcb_flags(pcb, PCB_FULL_IRET); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = imgp->entry_addr; @@ -1573,6 +1573,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) kmdp = init_ops.parse_preload_data(modulep); identify_cpu1(); + identify_hypervisor(); /* Init basic tunables, hz etc */ init_param1(); @@ -2200,6 +2201,7 @@ get_mcontext(struct thread *td, mcontext_t *mcp, int flags) mcp->mc_flags = tp->tf_flags; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp, NULL, 0); + update_pcb_bases(pcb); mcp->mc_fsbase = pcb->pcb_fsbase; mcp->mc_gsbase = pcb->pcb_gsbase; mcp->mc_xfpustate = 0; @@ -2270,11 +2272,11 @@ set_mcontext(struct thread *td, mcontext_t *mcp) tp->tf_fs = mcp->mc_fs; tp->tf_gs = mcp->mc_gs; } + set_pcb_flags(pcb, PCB_FULL_IRET); if (mcp->mc_flags & _MC_HASBASES) { pcb->pcb_fsbase = mcp->mc_fsbase; pcb->pcb_gsbase = mcp->mc_gsbase; } - set_pcb_flags(pcb, PCB_FULL_IRET); return (0); } @@ -2305,7 +2307,6 @@ static int set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, size_t xfpustate_len) { - struct savefpu *fpstate; int error; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) @@ -2318,9 +2319,8 @@ set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, error = 0; } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { - fpstate = (struct savefpu *)&mcp->mc_fpstate; - fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask; - error = fpusetregs(td, fpstate, xfpustate, xfpustate_len); + error = fpusetregs(td, (struct savefpu *)&mcp->mc_fpstate, + xfpustate, xfpustate_len); } else return (EINVAL); return (error); @@ -2545,6 +2545,71 @@ user_dbreg_trap(void) return 0; } +/* + * The pcb_flags is only modified by current thread, or by other threads + * when current thread is stopped. However, current thread may change it + * from the interrupt context in cpu_switch(), or in the trap handler. + * When we read-modify-write pcb_flags from C sources, compiler may generate + * code that is not atomic regarding the interrupt handler. If a trap or + * interrupt happens and any flag is modified from the handler, it can be + * clobbered with the cached value later. Therefore, we implement setting + * and clearing flags with single-instruction functions, which do not race + * with possible modification of the flags from the trap or interrupt context, + * because traps and interrupts are executed only on instruction boundary. + */ +void +set_pcb_flags_raw(struct pcb *pcb, const u_int flags) +{ + + __asm __volatile("orl %1,%0" + : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags) + : "cc", "memory"); + +} + +/* + * The support for RDFSBASE, WRFSBASE and similar instructions for %gs + * base requires that kernel saves MSR_FSBASE and MSR_{K,}GSBASE into + * pcb if user space modified the bases. We must save on the context + * switch or if the return to usermode happens through the doreti. + * + * Tracking of both events is performed by the pcb flag PCB_FULL_IRET, + * which have a consequence that the base MSRs must be saved each time + * the PCB_FULL_IRET flag is set. We disable interrupts to sync with + * context switches. + */ +void +set_pcb_flags(struct pcb *pcb, const u_int flags) +{ + register_t r; + + if (curpcb == pcb && + (flags & PCB_FULL_IRET) != 0 && + (pcb->pcb_flags & PCB_FULL_IRET) == 0 && + (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0) { + r = intr_disable(); + if ((pcb->pcb_flags & PCB_FULL_IRET) == 0) { + if (rfs() == _ufssel) + pcb->pcb_fsbase = rdfsbase(); + if (rgs() == _ugssel) + pcb->pcb_gsbase = rdmsr(MSR_KGSBASE); + } + set_pcb_flags_raw(pcb, flags); + intr_restore(r); + } else { + set_pcb_flags_raw(pcb, flags); + } +} + +void +clear_pcb_flags(struct pcb *pcb, const u_int flags) +{ + + __asm __volatile("andl %1,%0" + : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags) + : "cc", "memory"); +} + #ifdef KDB /* diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 8e2f048..53de162 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -2566,9 +2566,8 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags) /* * allocate the page directory page */ - while ((pml4pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | - VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) - VM_WAIT; + pml4pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | + VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_WAITOK); pml4phys = VM_PAGE_TO_PHYS(pml4pg); pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(pml4phys); diff --git a/sys/amd64/amd64/ptrace_machdep.c b/sys/amd64/amd64/ptrace_machdep.c index dbb3f13..169e15c 100644 --- a/sys/amd64/amd64/ptrace_machdep.c +++ b/sys/amd64/amd64/ptrace_machdep.c @@ -117,15 +117,17 @@ cpu_ptrace_xstate(struct thread *td, int req, void *addr, int data) static void cpu_ptrace_setbase(struct thread *td, int req, register_t r) { + struct pcb *pcb; + pcb = td->td_pcb; + set_pcb_flags(pcb, PCB_FULL_IRET); if (req == PT_SETFSBASE) { - td->td_pcb->pcb_fsbase = r; + pcb->pcb_fsbase = r; td->td_frame->tf_fs = _ufssel; } else { - td->td_pcb->pcb_gsbase = r; + pcb->pcb_gsbase = r; td->td_frame->tf_gs = _ugssel; } - set_pcb_flags(td->td_pcb, PCB_FULL_IRET); } #ifdef COMPAT_FREEBSD32 @@ -136,6 +138,7 @@ static int cpu32_ptrace(struct thread *td, int req, void *addr, int data) { struct savefpu *fpstate; + struct pcb *pcb; uint32_t r; int error; @@ -167,8 +170,10 @@ cpu32_ptrace(struct thread *td, int req, void *addr, int data) error = EINVAL; break; } - r = req == PT_GETFSBASE ? td->td_pcb->pcb_fsbase : - td->td_pcb->pcb_gsbase; + pcb = td->td_pcb; + if (td == curthread) + update_pcb_bases(pcb); + r = req == PT_GETFSBASE ? pcb->pcb_fsbase : pcb->pcb_gsbase; error = copyout(&r, addr, sizeof(r)); break; @@ -197,6 +202,7 @@ int cpu_ptrace(struct thread *td, int req, void *addr, int data) { register_t *r, rv; + struct pcb *pcb; int error; #ifdef COMPAT_FREEBSD32 @@ -221,8 +227,10 @@ cpu_ptrace(struct thread *td, int req, void *addr, int data) case PT_GETFSBASE: case PT_GETGSBASE: - r = req == PT_GETFSBASE ? &td->td_pcb->pcb_fsbase : - &td->td_pcb->pcb_gsbase; + pcb = td->td_pcb; + if (td == curthread) + update_pcb_bases(pcb); + r = req == PT_GETFSBASE ? &pcb->pcb_fsbase : &pcb->pcb_gsbase; error = copyout(r, addr, sizeof(*r)); break; diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S index f6be94e..aed23d6 100644 --- a/sys/amd64/amd64/support.S +++ b/sys/amd64/amd64/support.S @@ -51,7 +51,6 @@ ENTRY(bzero) movq %rsi,%rcx xorl %eax,%eax shrq $3,%rcx - cld rep stosq movq %rsi,%rcx @@ -77,7 +76,6 @@ ENTRY(bcmp) PUSH_FRAME_POINTER movq %rdx,%rcx shrq $3,%rcx - cld /* compare forwards */ repe cmpsq jne 1f @@ -109,7 +107,6 @@ ENTRY(bcopy) jb 1f shrq $3,%rcx /* copy by 64-bit words */ - cld /* nope, copy forwards */ rep movsq movq %rdx,%rcx @@ -148,7 +145,6 @@ ENTRY(memcpy) movq %rdi,%rax movq %rdx,%rcx shrq $3,%rcx /* copy by 64-bit words */ - cld /* copy forwards */ rep movsq movq %rdx,%rcx @@ -195,7 +191,6 @@ ENTRY(fillw) movq %rdi,%rax movq %rsi,%rdi movq %rdx,%rcx - cld rep stosw POP_FRAME_POINTER @@ -215,7 +210,7 @@ END(fillw) */ /* - * copyout(from_kernel, to_user, len) - MP SAFE + * copyout(from_kernel, to_user, len) * %rdi, %rsi, %rdx */ ENTRY(copyout) @@ -226,12 +221,11 @@ ENTRY(copyout) jz done_copyout /* - * Check explicitly for non-user addresses. If 486 write protection - * is being used, this check is essential because we are in kernel - * mode so the h/w does not provide any protection against writing - * kernel addresses. + * Check explicitly for non-user addresses. This check is essential + * because it prevents usermode from writing into the kernel. We do + * not verify anywhere else that the user did not specify a rogue + * address. */ - /* * First, prevent address wrapping. */ @@ -253,7 +247,6 @@ ENTRY(copyout) movq %rdx,%rcx shrq $3,%rcx - cld rep movsq movb %dl,%cl @@ -278,7 +271,7 @@ copyout_fault: END(copyout) /* - * copyin(from_user, to_kernel, len) - MP SAFE + * copyin(from_user, to_kernel, len) * %rdi, %rsi, %rdx */ ENTRY(copyin) @@ -302,7 +295,6 @@ ENTRY(copyin) movq %rdx,%rcx movb %cl,%al shrq $3,%rcx /* copy longword-wise */ - cld rep movsq movb %al,%cl @@ -495,7 +487,7 @@ fusufault: /* * Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to - * user memory. All these functions are MPSAFE. + * user memory. * addr = %rdi, value = %rsi */ ALTENTRY(suword64) @@ -570,7 +562,7 @@ ENTRY(subyte) END(subyte) /* - * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE + * copyinstr(from, to, maxlen, int *lencopied) * %rdi, %rsi, %rdx, %rcx * * copy a string from from to to, stop when a 0 character is reached. @@ -599,7 +591,6 @@ ENTRY(copyinstr) movq %rax,%r8 1: incq %rdx - cld 2: decq %rdx @@ -641,7 +632,7 @@ cpystrflt_x: END(copyinstr) /* - * copystr(from, to, maxlen, int *lencopied) - MP SAFE + * copystr(from, to, maxlen, int *lencopied) * %rdi, %rsi, %rdx, %rcx */ ENTRY(copystr) @@ -650,7 +641,6 @@ ENTRY(copystr) xchgq %rdi,%rsi incq %rdx - cld 1: decq %rdx jz 4f @@ -681,7 +671,6 @@ END(copystr) /* * Handling of special amd64 registers and descriptor tables etc - * %rdi */ /* void lgdt(struct region_descriptor *rdp); */ ENTRY(lgdt) diff --git a/sys/amd64/amd64/sys_machdep.c b/sys/amd64/amd64/sys_machdep.c index 8867aed..42cae4a 100644 --- a/sys/amd64/amd64/sys_machdep.c +++ b/sys/amd64/amd64/sys_machdep.c @@ -256,39 +256,45 @@ sysarch(td, uap) error = amd64_set_ioperm(td, &iargs); break; case I386_GET_FSBASE: + update_pcb_bases(pcb); i386base = pcb->pcb_fsbase; error = copyout(&i386base, uap->parms, sizeof(i386base)); break; case I386_SET_FSBASE: error = copyin(uap->parms, &i386base, sizeof(i386base)); if (!error) { + set_pcb_flags(pcb, PCB_FULL_IRET); pcb->pcb_fsbase = i386base; td->td_frame->tf_fs = _ufssel; update_gdt_fsbase(td, i386base); } break; case I386_GET_GSBASE: + update_pcb_bases(pcb); i386base = pcb->pcb_gsbase; error = copyout(&i386base, uap->parms, sizeof(i386base)); break; case I386_SET_GSBASE: error = copyin(uap->parms, &i386base, sizeof(i386base)); if (!error) { + set_pcb_flags(pcb, PCB_FULL_IRET); pcb->pcb_gsbase = i386base; td->td_frame->tf_gs = _ugssel; update_gdt_gsbase(td, i386base); } break; case AMD64_GET_FSBASE: - error = copyout(&pcb->pcb_fsbase, uap->parms, sizeof(pcb->pcb_fsbase)); + update_pcb_bases(pcb); + error = copyout(&pcb->pcb_fsbase, uap->parms, + sizeof(pcb->pcb_fsbase)); break; case AMD64_SET_FSBASE: error = copyin(uap->parms, &a64base, sizeof(a64base)); if (!error) { if (a64base < VM_MAXUSER_ADDRESS) { - pcb->pcb_fsbase = a64base; set_pcb_flags(pcb, PCB_FULL_IRET); + pcb->pcb_fsbase = a64base; td->td_frame->tf_fs = _ufssel; } else error = EINVAL; @@ -296,15 +302,17 @@ sysarch(td, uap) break; case AMD64_GET_GSBASE: - error = copyout(&pcb->pcb_gsbase, uap->parms, sizeof(pcb->pcb_gsbase)); + update_pcb_bases(pcb); + error = copyout(&pcb->pcb_gsbase, uap->parms, + sizeof(pcb->pcb_gsbase)); break; case AMD64_SET_GSBASE: error = copyin(uap->parms, &a64base, sizeof(a64base)); if (!error) { if (a64base < VM_MAXUSER_ADDRESS) { - pcb->pcb_gsbase = a64base; set_pcb_flags(pcb, PCB_FULL_IRET); + pcb->pcb_gsbase = a64base; td->td_frame->tf_gs = _ugssel; } else error = EINVAL; diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index dd5d891..4562902 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -172,37 +172,41 @@ trap(struct trapframe *frame) #ifdef KDTRACE_HOOKS struct reg regs; #endif - struct thread *td = curthread; - struct proc *p = td->td_proc; + ksiginfo_t ksi; + struct thread *td; + struct proc *p; + register_t addr; #ifdef KDB register_t dr6; #endif - int i = 0, ucode = 0; + int signo, ucode; u_int type; - register_t addr = 0; - ksiginfo_t ksi; + + td = curthread; + p = td->td_proc; + signo = 0; + ucode = 0; + addr = 0; PCPU_INC(cnt.v_trap); type = frame->tf_trapno; #ifdef SMP /* Handler for NMI IPIs used for stopping CPUs. */ - if (type == T_NMI) { - if (ipi_nmi_handler() == 0) - goto out; - } -#endif /* SMP */ + if (type == T_NMI && ipi_nmi_handler() == 0) + return; +#endif #ifdef KDB if (kdb_active) { kdb_reenter(); - goto out; + return; } #endif if (type == T_RESERVED) { trap_fatal(frame, 0); - goto out; + return; } if (type == T_NMI) { @@ -215,12 +219,12 @@ trap(struct trapframe *frame) */ if (pmc_intr != NULL && (*pmc_intr)(PCPU_GET(cpuid), frame) != 0) - goto out; + return; #endif #ifdef STACK if (stack_nmi_handler(frame) != 0) - goto out; + return; #endif } @@ -265,7 +269,7 @@ trap(struct trapframe *frame) switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ - i = SIGILL; + signo = SIGILL; ucode = ILL_PRVOPC; break; @@ -277,41 +281,41 @@ trap(struct trapframe *frame) fill_frame_regs(frame, ®s); if (dtrace_pid_probe_ptr != NULL && dtrace_pid_probe_ptr(®s) == 0) - goto out; + return; } #endif frame->tf_rflags &= ~PSL_T; - i = SIGTRAP; + signo = SIGTRAP; ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); break; case T_ARITHTRAP: /* arithmetic trap */ ucode = fputrap_x87(); if (ucode == -1) - goto userout; - i = SIGFPE; + return; + signo = SIGFPE; break; case T_PROTFLT: /* general protection fault */ - i = SIGBUS; + signo = SIGBUS; ucode = BUS_OBJERR; break; case T_STKFLT: /* stack fault */ case T_SEGNPFLT: /* segment not present fault */ - i = SIGBUS; + signo = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ - i = SIGBUS; + signo = SIGBUS; ucode = BUS_OBJERR; break; case T_ALIGNFLT: - i = SIGBUS; + signo = SIGBUS; ucode = BUS_ADRALN; break; case T_DOUBLEFLT: /* double fault */ default: - i = SIGBUS; + signo = SIGBUS; ucode = BUS_OBJERR; break; @@ -321,67 +325,64 @@ trap(struct trapframe *frame) */ if (*p->p_sysent->sv_trap != NULL && (*p->p_sysent->sv_trap)(td) == 0) - goto userout; + return; addr = frame->tf_addr; - i = trap_pfault(frame, TRUE); - if (i == -1) - goto userout; - if (i == 0) - goto user; - - if (i == SIGSEGV) + signo = trap_pfault(frame, TRUE); + if (signo == -1) + return; + if (signo == 0) + goto userret; + if (signo == SIGSEGV) { ucode = SEGV_MAPERR; - else { - if (prot_fault_translation == 0) { - /* - * Autodetect. - * This check also covers the images - * without the ABI-tag ELF note. - */ - if (SV_CURPROC_ABI() == SV_ABI_FREEBSD - && p->p_osrel >= P_OSREL_SIGSEGV) { - i = SIGSEGV; - ucode = SEGV_ACCERR; - } else { - i = SIGBUS; - ucode = BUS_PAGE_FAULT; - } - } else if (prot_fault_translation == 1) { - /* - * Always compat mode. - */ - i = SIGBUS; - ucode = BUS_PAGE_FAULT; - } else { - /* - * Always SIGSEGV mode. - */ - i = SIGSEGV; + } else if (prot_fault_translation == 0) { + /* + * Autodetect. This check also covers + * the images without the ABI-tag ELF + * note. + */ + if (SV_CURPROC_ABI() == SV_ABI_FREEBSD && + p->p_osrel >= P_OSREL_SIGSEGV) { + signo = SIGSEGV; ucode = SEGV_ACCERR; + } else { + signo = SIGBUS; + ucode = BUS_PAGE_FAULT; } + } else if (prot_fault_translation == 1) { + /* + * Always compat mode. + */ + signo = SIGBUS; + ucode = BUS_PAGE_FAULT; + } else { + /* + * Always SIGSEGV mode. + */ + signo = SIGSEGV; + ucode = SEGV_ACCERR; } break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; - i = SIGFPE; + signo = SIGFPE; break; #ifdef DEV_ISA case T_NMI: nmi_handle_intr(type, frame); - break; -#endif /* DEV_ISA */ + return; +#endif case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; - i = SIGFPE; + signo = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; - i = SIGFPE; + signo = SIGFPE; break; case T_DNA: @@ -389,27 +390,26 @@ trap(struct trapframe *frame) KASSERT(PCB_USER_FPU(td->td_pcb), ("kernel FPU ctx has leaked")); fpudna(); - goto userout; + return; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; - i = SIGILL; + signo = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = fputrap_sse(); if (ucode == -1) - goto userout; - i = SIGFPE; + return; + signo = SIGFPE; break; #ifdef KDTRACE_HOOKS case T_DTRACE_RET: enable_intr(); fill_frame_regs(frame, ®s); - if (dtrace_return_probe_ptr != NULL && - dtrace_return_probe_ptr(®s) == 0) - goto out; - break; + if (dtrace_return_probe_ptr != NULL) + dtrace_return_probe_ptr(®s); + return; #endif } } else { @@ -420,13 +420,13 @@ trap(struct trapframe *frame) switch (type) { case T_PAGEFLT: /* page fault */ (void) trap_pfault(frame, FALSE); - goto out; + return; case T_DNA: if (PCB_USER_FPU(td->td_pcb)) panic("Unregistered use of FPU in kernel"); fpudna(); - goto out; + return; case T_ARITHTRAP: /* arithmetic trap */ case T_XMMFLT: /* SIMD floating-point exception */ @@ -436,7 +436,7 @@ trap(struct trapframe *frame) * registration for FPU traps is overkill. */ trap_fatal(frame, 0); - goto out; + return; case T_STKFLT: /* stack fault */ case T_PROTFLT: /* general protection fault */ @@ -475,35 +475,35 @@ trap(struct trapframe *frame) sizeof(register_t)) frame->tf_rsp = PCPU_GET(rsp0) - 5 * sizeof(register_t); - goto out; + return; } if (frame->tf_rip == (long)ld_ds) { frame->tf_rip = (long)ds_load_fault; - goto out; + return; } if (frame->tf_rip == (long)ld_es) { frame->tf_rip = (long)es_load_fault; - goto out; + return; } if (frame->tf_rip == (long)ld_fs) { frame->tf_rip = (long)fs_load_fault; - goto out; + return; } if (frame->tf_rip == (long)ld_gs) { frame->tf_rip = (long)gs_load_fault; - goto out; + return; } if (frame->tf_rip == (long)ld_gsbase) { frame->tf_rip = (long)gsbase_load_fault; - goto out; + return; } if (frame->tf_rip == (long)ld_fsbase) { frame->tf_rip = (long)fsbase_load_fault; - goto out; + return; } if (curpcb->pcb_onfault != NULL) { frame->tf_rip = (long)curpcb->pcb_onfault; - goto out; + return; } break; @@ -519,7 +519,7 @@ trap(struct trapframe *frame) */ if (frame->tf_rflags & PSL_NT) { frame->tf_rflags &= ~PSL_NT; - goto out; + return; } break; @@ -540,7 +540,7 @@ trap(struct trapframe *frame) * processor doesn't */ load_dr6(rdr6() & ~0xf); - goto out; + return; } /* @@ -601,27 +601,27 @@ trap(struct trapframe *frame) dr6 = rdr6(); load_dr6(dr6 & ~0x4000); if (kdb_trap(type, dr6, frame)) - goto out; + return; #endif break; #ifdef DEV_ISA case T_NMI: nmi_handle_intr(type, frame); - goto out; -#endif /* DEV_ISA */ + return; +#endif } trap_fatal(frame, 0); - goto out; + return; } /* Translate fault for emulators (e.g. Linux) */ - if (*p->p_sysent->sv_transtrap) - i = (*p->p_sysent->sv_transtrap)(i, type); + if (*p->p_sysent->sv_transtrap != NULL) + signo = (*p->p_sysent->sv_transtrap)(signo, type); ksiginfo_init_trap(&ksi); - ksi.ksi_signo = i; + ksi.ksi_signo = signo; ksi.ksi_code = ucode; ksi.ksi_trapno = type; ksi.ksi_addr = (void *)addr; @@ -629,8 +629,8 @@ trap(struct trapframe *frame) uprintf("pid %d comm %s: signal %d err %lx code %d type %d " "addr 0x%lx rsp 0x%lx rip 0x%lx " "<%02x %02x %02x %02x %02x %02x %02x %02x>\n", - p->p_pid, p->p_comm, i, frame->tf_err, ucode, type, addr, - frame->tf_rsp, frame->tf_rip, + p->p_pid, p->p_comm, signo, frame->tf_err, ucode, type, + addr, frame->tf_rsp, frame->tf_rip, fubyte((void *)(frame->tf_rip + 0)), fubyte((void *)(frame->tf_rip + 1)), fubyte((void *)(frame->tf_rip + 2)), @@ -642,14 +642,10 @@ trap(struct trapframe *frame) } KASSERT((read_rflags() & PSL_I) != 0, ("interrupts disabled")); trapsignal(td, &ksi); - -user: +userret: userret(td, frame); KASSERT(PCB_USER_FPU(td->td_pcb), ("Return from trap with kernel FPU ctx leaked")); -userout: -out: - return; } /* @@ -669,17 +665,20 @@ trap_check(struct trapframe *frame) } static int -trap_pfault(frame, usermode) - struct trapframe *frame; - int usermode; +trap_pfault(struct trapframe *frame, int usermode) { - vm_offset_t va; + struct thread *td; + struct proc *p; vm_map_t map; - int rv = 0; + vm_offset_t va; + int rv; vm_prot_t ftype; - struct thread *td = curthread; - struct proc *p = td->td_proc; - vm_offset_t eva = frame->tf_addr; + vm_offset_t eva; + + td = curthread; + p = td->td_proc; + eva = frame->tf_addr; + rv = 0; if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) { /* @@ -906,16 +905,18 @@ dblfault_handler(struct trapframe *frame) } int -cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +cpu_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; register_t *argp; + struct syscall_args *sa; caddr_t params; int reg, regcnt, error; p = td->td_proc; frame = td->td_frame; + sa = &td->td_sa; reg = 0; regcnt = 6; @@ -966,7 +967,6 @@ cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) void amd64_syscall(struct thread *td, int traced) { - struct syscall_args sa; int error; ksiginfo_t ksi; @@ -976,7 +976,7 @@ amd64_syscall(struct thread *td, int traced) /* NOT REACHED */ } #endif - error = syscallenter(td, &sa); + error = syscallenter(td); /* * Traced syscall. @@ -992,15 +992,16 @@ amd64_syscall(struct thread *td, int traced) KASSERT(PCB_USER_FPU(td->td_pcb), ("System call %s returning with kernel FPU ctx leaked", - syscallname(td->td_proc, sa.code))); + syscallname(td->td_proc, td->td_sa.code))); KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td), ("System call %s returning with mangled pcb_save", - syscallname(td->td_proc, sa.code))); + syscallname(td->td_proc, td->td_sa.code))); KASSERT(td->td_md.md_invl_gen.gen == 0, ("System call %s returning with leaked invl_gen %lu", - syscallname(td->td_proc, sa.code), td->td_md.md_invl_gen.gen)); + syscallname(td->td_proc, td->td_sa.code), + td->td_md.md_invl_gen.gen)); - syscallret(td, error, &sa); + syscallret(td, error); /* * If the user-supplied value of %rip is not a canonical @@ -1010,6 +1011,6 @@ amd64_syscall(struct thread *td, int traced) * not be safe. Instead, use the full return path which * catches the problem safely. */ - if (td->td_frame->tf_rip >= VM_MAXUSER_ADDRESS) + if (__predict_false(td->td_frame->tf_rip >= VM_MAXUSER_ADDRESS)) set_pcb_flags(td->td_pcb, PCB_FULL_IRET); } diff --git a/sys/amd64/amd64/uma_machdep.c b/sys/amd64/amd64/uma_machdep.c index db566ae..220d095 100644 --- a/sys/amd64/amd64/uma_machdep.c +++ b/sys/amd64/amd64/uma_machdep.c @@ -46,20 +46,12 @@ uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) vm_page_t m; vm_paddr_t pa; void *va; - int pflags; *flags = UMA_SLAB_PRIV; - pflags = malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; - for (;;) { - m = vm_page_alloc(NULL, 0, pflags); - if (m == NULL) { - if (wait & M_NOWAIT) - return (NULL); - else - VM_WAIT; - } else - break; - } + m = vm_page_alloc(NULL, 0, + malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); + if (m == NULL) + return (NULL); pa = m->phys_addr; if ((wait & M_NODUMP) == 0) dump_add_page(pa); diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 8846eb8..d95bb56 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -176,6 +176,7 @@ cpu_fork(td1, p2, td2, flags) /* Ensure that td1's pcb is up to date. */ fpuexit(td1); + update_pcb_bases(td1->td_pcb); /* Point the pcb to the top of the stack */ pcb2 = get_pcb_td(td2); @@ -242,7 +243,7 @@ cpu_fork(td1, p2, td2, flags) pcb2->pcb_tssp = NULL; /* New segment registers. */ - set_pcb_flags(pcb2, PCB_FULL_IRET); + set_pcb_flags_raw(pcb2, PCB_FULL_IRET); /* Copy the LDT, if necessary. */ mdp1 = &td1->td_proc->p_md; @@ -439,13 +440,14 @@ cpu_copy_thread(struct thread *td, struct thread *td0) * Those not loaded individually below get their default * values here. */ + update_pcb_bases(td0->td_pcb); bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); clear_pcb_flags(pcb2, PCB_FPUINITDONE | PCB_USERFPUINITDONE | PCB_KERNFPU); pcb2->pcb_save = get_pcb_user_save_pcb(pcb2); bcopy(get_pcb_user_save_td(td0), pcb2->pcb_save, cpu_max_ext_state_size); - set_pcb_flags(pcb2, PCB_FULL_IRET); + set_pcb_flags_raw(pcb2, PCB_FULL_IRET); /* * Create a new fresh stack for the new thread. diff --git a/sys/amd64/cloudabi32/cloudabi32_sysvec.c b/sys/amd64/cloudabi32/cloudabi32_sysvec.c index abede1a..d5dd58c 100644 --- a/sys/amd64/cloudabi32/cloudabi32_sysvec.c +++ b/sys/amd64/cloudabi32/cloudabi32_sysvec.c @@ -90,11 +90,15 @@ cloudabi32_proc_setregs(struct thread *td, struct image_params *imgp, } static int -cloudabi32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +cloudabi32_fetch_syscall_args(struct thread *td) { - struct trapframe *frame = td->td_frame; + struct trapframe *frame; + struct syscall_args *sa; int error; + frame = td->td_frame; + sa = &td->td_sa; + /* Obtain system call number. */ sa->code = frame->tf_rax; if (sa->code >= CLOUDABI32_SYS_MAXSYSCALL) diff --git a/sys/amd64/cloudabi64/cloudabi64_sysvec.c b/sys/amd64/cloudabi64/cloudabi64_sysvec.c index 84f0cb3..679888b 100644 --- a/sys/amd64/cloudabi64/cloudabi64_sysvec.c +++ b/sys/amd64/cloudabi64/cloudabi64_sysvec.c @@ -87,9 +87,13 @@ cloudabi64_proc_setregs(struct thread *td, struct image_params *imgp, } static int -cloudabi64_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +cloudabi64_fetch_syscall_args(struct thread *td) { - struct trapframe *frame = td->td_frame; + struct trapframe *frame; + struct syscall_args *sa; + + frame = td->td_frame; + sa = &td->td_sa; /* Obtain system call number. */ sa->code = frame->tf_rax; diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index de7cbe6..b4eb43d 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -29,6 +29,7 @@ options PREEMPTION # Enable kernel thread preemption options INET # InterNETworking options INET6 # IPv6 communications protocols options IPSEC # IP (v4/v6) security +options IPSEC_SUPPORT # Allow kldload of ipsec and tcpmd5 options TCP_OFFLOAD # TCP offload options SCTP # Stream Control Transmission Protocol options FFS # Berkeley Fast Filesystem diff --git a/sys/amd64/conf/pfSense b/sys/amd64/conf/pfSense new file mode 100644 index 0000000..26c071e --- /dev/null +++ b/sys/amd64/conf/pfSense @@ -0,0 +1,182 @@ +include GENERIC + +nooptions KDB_TRACE +options DDB + +ident pfSense + +nooptions MAC # TrustedBSD MAC Framework +nooptions COMPAT_FREEBSD4 # Compatible with FreeBSD4 +nooptions COMPAT_FREEBSD5 # Compatible with FreeBSD5 +nooptions COMPAT_FREEBSD6 # Compatible with FreeBSD6 +nooptions COMPAT_FREEBSD7 # Compatible with FreeBSD7 + +options GEOM_MIRROR +options GEOM_UZIP +options GEOM_ELI +options GEOM_BDE + +options TMPFS +options UNIONFS +options NULLFS +options PPS_SYNC + +# Wireless +#nooptions IEEE80211_DEBUG # enable debug msgs +device wlan_rssadapt +device wlan_xauth +device wlan_acl +device iwifw +device ipwfw # Firmware for Intel PRO/Wireless 2100 IEEE 802.11 driver +device wpifw # Firmware for Intel 3945ABG Wireless LAN IEEE 802.11 driver +device iwnfw # Firmware for Intel Wireless WiFi Link 4965AGN IEEE 802.11n driver +device uath # Atheros USB IEEE 802.11a/b/g wireless network device +device ralfw # Firmware for Ralink Technology RT2500 wireless NICs. +device ural # Ralink Technology RT2500USB IEEE 802.11 driver +device urtw # Realtek RTL8187B/L USB IEEE 802.11b/g wireless network device +device rum # Ralink Technology USB IEEE 802.11a/b/g wireless network device +device mwlfw # Firmware for Marvell 88W8363 IEEE 802.11n wireless network driver +device zyd # ZyDAS ZD1211/ZD1211B USB IEEE 802.11b/g wireless network device +device upgt # Conexant/Intersil PrismGT SoftMAC USB IEEE 802.11b/g wireless +device udav # Davicom DM9601 USB Ethernet driver +device axe +device axge +device aue +device cue +device kue +device mos +device rsu +device rsufw +device run # Ralink RT2700U/RT2800U/RT3000U USB 802.11agn +device runfw +device rue +device rtwn +device rtwnfw +device siba_bwn # Broadcom BCM43xx IEEE 802.11b/g wireless network driver +device bwn # Broadcom BCM43xx IEEE 802.11b/g wireless network driver +device bwi # Broadcom BCM43xx IEEE 802.11b/g wireless network driver + +# Pseudo devices. +#device pty # Pseudo-ttys (telnet etc) + +# USB support +nooptions USB_DEBUG # enable debug msgs + +# 3G devices +device ufoma +device ucom +device uslcom +device uplcom +device umct +device uvisor +device uark +device uftdi +device uvscom +device umodem +device u3g +device cdce + +device uhid # "Human Interface Devices" + +# FireWire support +device firewire # FireWire bus code +device sbp # SCSI over FireWire (Requires scbus and da) + +# pfsense addons + +device tap +device gre +device if_bridge +device carp +device lagg +device vte + +# IP/IPFW +options IPFIREWALL_DEFAULT_TO_ACCEPT +options IPFIREWALL_VERBOSE +options IPSTEALTH + +# Netgraph +options NETGRAPH #netgraph(4) system +options NETGRAPH_VLAN +options NETGRAPH_L2TP +options NETGRAPH_BPF +options NETGRAPH_ETHER +options NETGRAPH_IFACE +options NETGRAPH_EIFACE +options NETGRAPH_PPP +options NETGRAPH_PPPOE +options NETGRAPH_PPTPGRE +options NETGRAPH_RFC1490 +options NETGRAPH_SOCKET +options NETGRAPH_TTY +options NETGRAPH_MPPC_ENCRYPTION +options NETGRAPH_UI +options NETGRAPH_VJC +options NETGRAPH_KSOCKET +options NETGRAPH_LMI +options NETGRAPH_ONE2MANY +options NETGRAPH_BRIDGE +options NETGRAPH_CISCO +options NETGRAPH_ECHO +options NETGRAPH_ASYNC +options NETGRAPH_FRAME_RELAY +options NETGRAPH_HOLE +options NETGRAPH_TEE +options NETGRAPH_TCPMSS +options NETGRAPH_PIPE +options NETGRAPH_CAR +options NETGRAPH_DEFLATE +options NETGRAPH_PRED1 + +# IPSEC +options TCP_SIGNATURE +device enc + +# ALTQ +options ALTQ +options ALTQ_CBQ +options ALTQ_RED +options ALTQ_RIO +options ALTQ_HFSC +options ALTQ_PRIQ +options ALTQ_FAIRQ +options ALTQ_NOPCC +options ALTQ_CODEL + +# Squid related settings +options MSGMNB=8192 # max # of bytes in a queue +options MSGMNI=40 # number of message queue identifiers +options MSGSEG=512 # number of message segments per queue +options MSGSSZ=32 # size of a message segment +options MSGTQL=2048 # max messages in system + +device pf +device pflog +device pfsync + +device rndtest # FIPS 140-2 entropy tester +device hifn # Hifn 7951, 7781, etc. +options HIFN_DEBUG # enable debugging support: hw.hifn.debug +options HIFN_RNDTEST # enable rndtest support +device ubsec # Broadcom 5501, 5601, 58xx +device safe # safe -- SafeNet crypto accelerator +device padlock + +device speaker + +options MROUTING + +# Additional cards +device mxge # mxge - Myricom Myri10GE 10 Gigabit Ethernet adapter driver +device cxgb # cxgb -- Chelsio T3 10 Gigabit Ethernet adapter driver +device cxgbe # cxgbe -- Chelsio T5 10 Gigabit Ethernet adapter driver +#device nve # nVidia nForce MCP on-board Ethernet Networking +device oce + +# Default serial speed +options CONSPEED=115200 + +# Enable gpioapu +#device gpioapu +#device gpiorcc diff --git a/sys/amd64/ia32/ia32_syscall.c b/sys/amd64/ia32/ia32_syscall.c index c2bf2fb..f743440 100644 --- a/sys/amd64/ia32/ia32_syscall.c +++ b/sys/amd64/ia32/ia32_syscall.c @@ -106,16 +106,18 @@ ia32_set_syscall_retval(struct thread *td, int error) } int -ia32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +ia32_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; + struct syscall_args *sa; caddr_t params; u_int32_t args[8], tmp; int error, i; p = td->td_proc; frame = td->td_frame; + sa = &td->td_sa; params = (caddr_t)frame->tf_rsp + sizeof(u_int32_t); sa->code = frame->tf_rax; @@ -176,7 +178,6 @@ void ia32_syscall(struct trapframe *frame) { struct thread *td; - struct syscall_args sa; register_t orig_tf_rflags; int error; ksiginfo_t ksi; @@ -185,7 +186,7 @@ ia32_syscall(struct trapframe *frame) td = curthread; td->td_frame = frame; - error = syscallenter(td, &sa); + error = syscallenter(td); /* * Traced syscall. @@ -199,7 +200,7 @@ ia32_syscall(struct trapframe *frame) trapsignal(td, &ksi); } - syscallret(td, error, &sa); + syscallret(td, error); } static void diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h index 5fa0d77..2107b2ba 100644 --- a/sys/amd64/include/cpufunc.h +++ b/sys/amd64/include/cpufunc.h @@ -651,6 +651,38 @@ load_gs(u_short sel) } #endif +static __inline uint64_t +rdfsbase(void) +{ + uint64_t x; + + __asm __volatile("rdfsbase %0" : "=r" (x)); + return (x); +} + +static __inline void +wrfsbase(uint64_t x) +{ + + __asm __volatile("wrfsbase %0" : : "r" (x)); +} + +static __inline uint64_t +rdgsbase(void) +{ + uint64_t x; + + __asm __volatile("rdgsbase %0" : "=r" (x)); + return (x); +} + +static __inline void +wrgsbase(uint64_t x) +{ + + __asm __volatile("wrgsbase %0" : : "r" (x)); +} + static __inline void bare_lgdt(struct region_descriptor *addr) { diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h index b81f497..f4d6e60 100644 --- a/sys/amd64/include/md_var.h +++ b/sys/amd64/include/md_var.h @@ -34,7 +34,8 @@ #include <x86/x86_var.h> -extern uint64_t *vm_page_dump; +extern uint64_t *vm_page_dump; +extern int hw_lower_amd64_sharedpage; extern int hw_ibrs_disable; /* @@ -44,9 +45,11 @@ extern int hw_ibrs_disable; extern char kernphys[]; struct savefpu; +struct sysentvec; void amd64_conf_fast_syscall(void); void amd64_db_resume_dbreg(void); +void amd64_lower_shared_page(struct sysentvec *); void amd64_syscall(struct thread *td, int traced); void doreti_iret(void) __asm(__STRING(doreti_iret)); void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault)); diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h index 2b7bb6e..09aea36 100644 --- a/sys/amd64/include/pcb.h +++ b/sys/amd64/include/pcb.h @@ -119,40 +119,15 @@ struct susppcb { #ifdef _KERNEL struct trapframe; -/* - * The pcb_flags is only modified by current thread, or by other threads - * when current thread is stopped. However, current thread may change it - * from the interrupt context in cpu_switch(), or in the trap handler. - * When we read-modify-write pcb_flags from C sources, compiler may generate - * code that is not atomic regarding the interrupt handler. If a trap or - * interrupt happens and any flag is modified from the handler, it can be - * clobbered with the cached value later. Therefore, we implement setting - * and clearing flags with single-instruction functions, which do not race - * with possible modification of the flags from the trap or interrupt context, - * because traps and interrupts are executed only on instruction boundary. - */ -static __inline void -set_pcb_flags(struct pcb *pcb, const u_int flags) -{ - - __asm __volatile("orl %1,%0" - : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags) - : "cc"); -} - -static __inline void -clear_pcb_flags(struct pcb *pcb, const u_int flags) -{ - - __asm __volatile("andl %1,%0" - : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags) - : "cc"); -} - +void clear_pcb_flags(struct pcb *pcb, const u_int flags); void makectx(struct trapframe *, struct pcb *); +void set_pcb_flags(struct pcb *pcb, const u_int flags); +void set_pcb_flags_raw(struct pcb *pcb, const u_int flags); int savectx(struct pcb *) __returns_twice; void resumectx(struct pcb *); +/* Ensure that pcb_gsbase and pcb_fsbase are up to date */ +#define update_pcb_bases(pcb) set_pcb_flags((pcb), PCB_FULL_IRET) #endif #endif /* _AMD64_PCB_H_ */ diff --git a/sys/amd64/include/proc.h b/sys/amd64/include/proc.h index f4b59aa..4c2b244 100644 --- a/sys/amd64/include/proc.h +++ b/sys/amd64/include/proc.h @@ -70,6 +70,13 @@ struct mdproc { #define KINFO_PROC_SIZE 1088 #define KINFO_PROC32_SIZE 768 +struct syscall_args { + u_int code; + struct sysent *callp; + register_t args[8]; + int narg; +}; + #ifdef _KERNEL /* Get the current kernel thread stack usage. */ @@ -92,13 +99,6 @@ int amd64_set_ldt_data(struct thread *td, int start, int num, extern struct mtx dt_lock; extern int max_ldt_segment; - -struct syscall_args { - u_int code; - struct sysent *callp; - register_t args[8]; - int narg; -}; #endif /* _KERNEL */ #endif /* !_MACHINE_PROC_H_ */ diff --git a/sys/amd64/linux/linux_sysvec.c b/sys/amd64/linux/linux_sysvec.c index 6e12d41..942819b 100644 --- a/sys/amd64/linux/linux_sysvec.c +++ b/sys/amd64/linux/linux_sysvec.c @@ -126,7 +126,7 @@ static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel); static void linux_vdso_install(void *param); static void linux_vdso_deinstall(void *param); static void linux_set_syscall_retval(struct thread *td, int error); -static int linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa); +static int linux_fetch_syscall_args(struct thread *td); static void linux_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack); static int linux_vsyscall(struct thread *td); @@ -217,13 +217,15 @@ translate_traps(int signal, int trap_code) } static int -linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +linux_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; + struct syscall_args *sa; p = td->td_proc; frame = td->td_frame; + sa = &td->td_sa; sa->args[0] = frame->tf_rdi; sa->args[1] = frame->tf_rsi; @@ -831,6 +833,8 @@ static void linux_vdso_install(void *param) { + amd64_lower_shared_page(&elf_linux_sysvec); + linux_szsigcode = (&_binary_linux_locore_o_end - &_binary_linux_locore_o_start); diff --git a/sys/amd64/linux32/linux32_sysvec.c b/sys/amd64/linux32/linux32_sysvec.c index c264f3a..ea849ba 100644 --- a/sys/amd64/linux32/linux32_sysvec.c +++ b/sys/amd64/linux32/linux32_sysvec.c @@ -725,13 +725,15 @@ linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) } static int -linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +linux32_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; + struct syscall_args *sa; p = td->td_proc; frame = td->td_frame; + sa = &td->td_sa; sa->args[0] = frame->tf_rbx; sa->args[1] = frame->tf_rcx; diff --git a/sys/amd64/vmm/intel/vmx_msr.c b/sys/amd64/vmm/intel/vmx_msr.c index 91b2c01..a48cb76 100644 --- a/sys/amd64/vmm/intel/vmx_msr.c +++ b/sys/amd64/vmm/intel/vmx_msr.c @@ -31,10 +31,12 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <sys/proc.h> #include <machine/clock.h> #include <machine/cpufunc.h> #include <machine/md_var.h> +#include <machine/pcb.h> #include <machine/specialreg.h> #include <machine/vmm.h> @@ -356,7 +358,8 @@ vmx_msr_guest_enter(struct vmx *vmx, int vcpuid) { uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; - /* Save host MSRs (if any) and restore guest MSRs */ + /* Save host MSRs (in particular, KGSBASE) and restore guest MSRs */ + update_pcb_bases(curpcb); wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]); wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]); wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]); diff --git a/sys/arm/allwinner/aw_ccu.c b/sys/arm/allwinner/aw_ccu.c index 051a950..d6f7264 100644 --- a/sys/arm/allwinner/aw_ccu.c +++ b/sys/arm/allwinner/aw_ccu.c @@ -194,7 +194,7 @@ aw_ccu_search_compatible(void) root = OF_finddevice("/"); for (compat = compat_data; compat->ocd_str != NULL; compat++) - if (fdt_is_compatible(root, compat->ocd_str)) + if (ofw_bus_node_is_compatible(root, compat->ocd_str)) break; return (compat); diff --git a/sys/arm/annapurna/alpine/alpine_machdep_mp.c b/sys/arm/annapurna/alpine/alpine_machdep_mp.c index ada7b02..7dbdd24 100644 --- a/sys/arm/annapurna/alpine/alpine_machdep_mp.c +++ b/sys/arm/annapurna/alpine/alpine_machdep_mp.c @@ -89,7 +89,7 @@ static boolean_t alpine_validate_cpu(u_int, phandle_t, u_int, pcell_t *); static boolean_t alpine_validate_cpu(u_int id, phandle_t child, u_int addr_cell, pcell_t *reg) { - return fdt_is_compatible(child, "arm,cortex-a15"); + return ofw_bus_node_is_compatible(child, "arm,cortex-a15"); } static int diff --git a/sys/arm/arm/busdma_machdep-v6.c b/sys/arm/arm/busdma_machdep-v6.c index 58e2012..a6da87a 100644 --- a/sys/arm/arm/busdma_machdep-v6.c +++ b/sys/arm/arm/busdma_machdep-v6.c @@ -491,6 +491,7 @@ bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, newtag->highaddr = MAX(parent->highaddr, newtag->highaddr); newtag->alignment = MAX(parent->alignment, newtag->alignment); newtag->flags |= parent->flags & BUS_DMA_COULD_BOUNCE; + newtag->flags |= parent->flags & BUS_DMA_COHERENT; if (newtag->boundary == 0) newtag->boundary = parent->boundary; else if (parent->boundary != 0) @@ -755,11 +756,19 @@ bus_dmamem_alloc(bus_dma_tag_t dmat, void **vaddr, int flags, } map->flags = DMAMAP_DMAMEM_ALLOC; - /* Choose a busdma buffer allocator based on memory type flags. */ - if (flags & BUS_DMA_COHERENT) { + /* For coherent memory, set the map flag that disables sync ops. */ + if (flags & BUS_DMA_COHERENT) + map->flags |= DMAMAP_COHERENT; + + /* + * Choose a busdma buffer allocator based on memory type flags. + * If the tag's COHERENT flag is set, that means normal memory + * is already coherent, use the normal allocator. + */ + if ((flags & BUS_DMA_COHERENT) && + ((dmat->flags & BUS_DMA_COHERENT) == 0)) { memattr = VM_MEMATTR_UNCACHEABLE; ba = coherent_allocator; - map->flags |= DMAMAP_COHERENT; } else { memattr = VM_MEMATTR_DEFAULT; ba = standard_allocator; @@ -829,7 +838,8 @@ bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) struct busdma_bufzone *bufzone; busdma_bufalloc_t ba; - if (map->flags & DMAMAP_COHERENT) + if ((map->flags & DMAMAP_COHERENT) && + ((dmat->flags & BUS_DMA_COHERENT) == 0)) ba = coherent_allocator; else ba = standard_allocator; @@ -1030,7 +1040,7 @@ _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); curaddr = add_bounce_page(dmat, map, 0, curaddr, sgsize); - } else { + } else if ((dmat->flags & BUS_DMA_COHERENT) == 0) { if (map->sync_count > 0) sl_end = sl->paddr + sl->datacount; @@ -1144,7 +1154,7 @@ _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, sgsize)) { curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, sgsize); - } else { + } else if ((dmat->flags & BUS_DMA_COHERENT) == 0) { if (map->sync_count > 0) { sl_pend = sl->paddr + sl->datacount; sl_vend = sl->vaddr + sl->datacount; @@ -1353,8 +1363,9 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) bpage->datacount); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); - dcache_wb_poc(bpage->vaddr, bpage->busaddr, - bpage->datacount); + if ((dmat->flags & BUS_DMA_COHERENT) == 0) + dcache_wb_poc(bpage->vaddr, + bpage->busaddr, bpage->datacount); bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; @@ -1374,8 +1385,9 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) if ((op & BUS_DMASYNC_PREREAD) && !(op & BUS_DMASYNC_PREWRITE)) { bpage = STAILQ_FIRST(&map->bpages); while (bpage != NULL) { - dcache_inv_poc_dma(bpage->vaddr, bpage->busaddr, - bpage->datacount); + if ((dmat->flags & BUS_DMA_COHERENT) == 0) + dcache_inv_poc_dma(bpage->vaddr, + bpage->busaddr, bpage->datacount); bpage = STAILQ_NEXT(bpage, links); } } @@ -1391,8 +1403,9 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) */ if (op & BUS_DMASYNC_POSTREAD) { while (bpage != NULL) { - dcache_inv_poc(bpage->vaddr, bpage->busaddr, - bpage->datacount); + if ((dmat->flags & BUS_DMA_COHERENT) == 0) + dcache_inv_poc(bpage->vaddr, + bpage->busaddr, bpage->datacount); tempvaddr = 0; datavaddr = bpage->datavaddr; if (datavaddr == 0) { @@ -1421,7 +1434,8 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) if (map->flags & DMAMAP_COHERENT) { if (op & BUS_DMASYNC_PREWRITE) { dsb(); - cpu_l2cache_drain_writebuf(); + if ((dmat->flags & BUS_DMA_COHERENT) == 0) + cpu_l2cache_drain_writebuf(); } return; } diff --git a/sys/arm/arm/cpufunc.c b/sys/arm/arm/cpufunc.c index e608cc7..304a015 100644 --- a/sys/arm/arm/cpufunc.c +++ b/sys/arm/arm/cpufunc.c @@ -905,6 +905,24 @@ pj4bv7_setup(void) void cortexa_setup(void) { + int cpuaux; + + cpuaux = (1 << 2); /* L1 prefetch by default */ +#ifdef ARM_L2_PREFETCH + /* + * L2 prefetch. + * Keep this if L2 enabled and cache controller supports it. + */ + cpuaux |= (1 << 1); +#endif + +#ifdef SMP + cpuaux |= (1 << 6) | (1 << 0); /* Enable SMP + TLB broadcasting */ +#else + /* Set SMP bit in CPU to add it to the coherency domain */ + cpuaux |= (1 << 6); +#endif + armv7_auxctrl(cpuaux, cpuaux); cpu_scc_setup_ccnt(); } diff --git a/sys/arm/arm/cpuinfo.c b/sys/arm/arm/cpuinfo.c index 8099882..0f65985 100644 --- a/sys/arm/arm/cpuinfo.c +++ b/sys/arm/arm/cpuinfo.c @@ -30,10 +30,16 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/sysctl.h> #include <machine/cpu.h> #include <machine/cpuinfo.h> +#if __ARM_ARCH >= 6 +void reinit_mmu(uint32_t ttb, uint32_t aux_clr, uint32_t aux_set); +#endif + struct cpuinfo cpuinfo = { /* Use safe defaults for start */ @@ -43,11 +49,43 @@ struct cpuinfo cpuinfo = .icache_line_mask = 31, }; +static SYSCTL_NODE(_hw, OID_AUTO, cpu, CTLFLAG_RD, 0, + "CPU"); +static SYSCTL_NODE(_hw_cpu, OID_AUTO, quirks, CTLFLAG_RD, 0, + "CPU quirks"); + +/* + * Tunable CPU quirks. + * Be careful, ACTRL cannot be changed if CPU is started in secure + * mode(world) and write to ACTRL can cause exception! + * These quirks are intended for optimizing CPU performance, not for + * applying errata workarounds. Nobody can expect that CPU with unfixed + * errata is stable enough to execute the kernel until quirks are applied. + */ +static uint32_t cpu_quirks_actlr_mask; +SYSCTL_INT(_hw_cpu_quirks, OID_AUTO, actlr_mask, + CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &cpu_quirks_actlr_mask, 0, + "Bits to be masked in ACTLR"); + +static uint32_t cpu_quirks_actlr_set; +SYSCTL_INT(_hw_cpu_quirks, OID_AUTO, actlr_set, + CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &cpu_quirks_actlr_set, 0, + "Bits to be set in ACTLR"); + + /* Read and parse CPU id scheme */ void cpuinfo_init(void) { + /* + * Prematurely fetch CPU quirks. Standard fetch for tunable + * sysctls is handled using SYSINIT, thus too late for boot CPU. + * Keep names in sync with sysctls. + */ + TUNABLE_INT_FETCH("hw.cpu.quirks.actlr_mask", &cpu_quirks_actlr_mask); + TUNABLE_INT_FETCH("hw.cpu.quirks.actlr_set", &cpu_quirks_actlr_set); + cpuinfo.midr = cp15_midr_get(); /* Test old version id schemes first */ if ((cpuinfo.midr & CPU_ID_IMPLEMENTOR_MASK) == CPU_ID_ARM_LTD) { @@ -153,15 +191,17 @@ cpuinfo_init(void) #endif } +#if __ARM_ARCH >= 6 /* * Get bits that must be set or cleared in ACLR register. * Note: Bits in ACLR register are IMPLEMENTATION DEFINED. * Its expected that SCU is in operational state before this * function is called. */ -void +static void cpuinfo_get_actlr_modifier(uint32_t *actlr_mask, uint32_t *actlr_set) { + *actlr_mask = 0; *actlr_set = 0; @@ -236,3 +276,18 @@ cpuinfo_get_actlr_modifier(uint32_t *actlr_mask, uint32_t *actlr_set) return; } } + +/* Reinitialize MMU to final kernel mapping and apply all CPU quirks. */ +void +cpuinfo_reinit_mmu(uint32_t ttb) +{ + uint32_t actlr_mask; + uint32_t actlr_set; + + cpuinfo_get_actlr_modifier(&actlr_mask, &actlr_set); + actlr_mask |= cpu_quirks_actlr_mask; + actlr_set |= cpu_quirks_actlr_set; + reinit_mmu(ttb, actlr_mask, actlr_set); +} + +#endif /* __ARM_ARCH >= 6 */ diff --git a/sys/arm/arm/elf_trampoline.c b/sys/arm/arm/elf_trampoline.c index 0dee12b..53bf23a 100644 --- a/sys/arm/arm/elf_trampoline.c +++ b/sys/arm/arm/elf_trampoline.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include <machine/pte-v4.h> #include <machine/cpufunc.h> #include <machine/armreg.h> +#include <machine/cpu.h> extern char kernel_start[]; extern char kernel_end[]; @@ -47,7 +48,7 @@ extern void *_end; void _start(void); void __start(void); -void __startC(void); +void __startC(unsigned r0, unsigned r1, unsigned r2, unsigned r3); extern unsigned int cpu_ident(void); extern void armv6_idcache_wbinv_all(void); @@ -124,6 +125,10 @@ static int arm_dcache_l2_nsets; static int arm_dcache_l2_assoc; static int arm_dcache_l2_linesize; +/* + * Boot parameters + */ +static struct arm_boot_params s_boot_params; extern int arm9_dcache_sets_inc; extern int arm9_dcache_sets_max; @@ -172,12 +177,17 @@ bzero(void *addr, int count) static void arm9_setup(void); void -_startC(void) +_startC(unsigned r0, unsigned r1, unsigned r2, unsigned r3) { int tmp1; unsigned int sp = ((unsigned int)&_end & ~3) + 4; unsigned int pc, kernphysaddr; + s_boot_params.abp_r0 = r0; + s_boot_params.abp_r1 = r1; + s_boot_params.abp_r2 = r2; + s_boot_params.abp_r3 = r3; + /* * Figure out the physical address the kernel was loaded at. This * assumes the entry point (this code right here) is in the first page, @@ -211,8 +221,15 @@ _startC(void) /* Temporary set the sp and jump to the new location. */ __asm __volatile( "mov sp, %1\n" + "mov r0, %2\n" + "mov r1, %3\n" + "mov r2, %4\n" + "mov r3, %5\n" "mov pc, %0\n" - : : "r" (target_addr), "r" (tmp_sp)); + : : "r" (target_addr), "r" (tmp_sp), + "r" (s_boot_params.abp_r0), "r" (s_boot_params.abp_r1), + "r" (s_boot_params.abp_r2), "r" (s_boot_params.abp_r3) + : "r0", "r1", "r2", "r3"); } #endif @@ -487,6 +504,7 @@ load_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end, vm_offset_t lastaddr = 0; Elf_Addr ssym = 0; Elf_Dyn *dp; + struct arm_boot_params local_boot_params; eh = (Elf32_Ehdr *)kstart; ssym = 0; @@ -555,6 +573,12 @@ load_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end, if (!d) return ((void *)lastaddr); + /* + * Now the stack is fixed, copy boot params + * before it's overrided + */ + memcpy(&local_boot_params, &s_boot_params, sizeof(local_boot_params)); + j = eh->e_phnum; for (i = 0; i < j; i++) { volatile char c; @@ -604,7 +628,10 @@ load_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end, "mcr p15, 0, %0, c1, c0, 0\n" /* CP15_SCTLR(%0)*/ : "=r" (ssym)); /* Jump to the entry point. */ - ((void(*)(void))(entry_point - KERNVIRTADDR + curaddr))(); + ((void(*)(unsigned, unsigned, unsigned, unsigned)) + (entry_point - KERNVIRTADDR + curaddr)) + (local_boot_params.abp_r0, local_boot_params.abp_r1, + local_boot_params.abp_r2, local_boot_params.abp_r3); __asm __volatile(".globl func_end\n" "func_end:"); diff --git a/sys/arm/arm/gic.c b/sys/arm/arm/gic.c index 7fe5f2d..2fe4a999 100644 --- a/sys/arm/arm/gic.c +++ b/sys/arm/arm/gic.c @@ -376,7 +376,7 @@ gic_decode_fdt(phandle_t iparent, pcell_t *intr, int *interrupt, if (self == 0) { for (ocd = compat_data; ocd->ocd_str != NULL; ocd++) { - if (fdt_is_compatible(iparent, ocd->ocd_str)) { + if (ofw_bus_node_is_compatible(iparent, ocd->ocd_str)) { self = iparent; break; } @@ -1656,11 +1656,11 @@ arm_gicv2m_alloc_msi(device_t dev, device_t child, int count, int maxcount, break; } - KASSERT((psc->gic_irqs[irq].gi_flags & GI_FLAG_MSI)!= 0, + KASSERT((psc->gic_irqs[end_irq].gi_flags & GI_FLAG_MSI)!= 0, ("%s: Non-MSI interrupt found", __func__)); /* This is already used */ - if ((psc->gic_irqs[irq].gi_flags & GI_FLAG_MSI_USED) == + if ((psc->gic_irqs[end_irq].gi_flags & GI_FLAG_MSI_USED) == GI_FLAG_MSI_USED) { found = false; break; diff --git a/sys/arm/arm/identcpu-v4.c b/sys/arm/arm/identcpu-v4.c index 769f0b3..4627ab2 100644 --- a/sys/arm/arm/identcpu-v4.c +++ b/sys/arm/arm/identcpu-v4.c @@ -54,7 +54,10 @@ __FBSDID("$FreeBSD$"); #include <machine/md_var.h> char machine[] = "arm"; +static char cpu_model[128]; +SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD | CTLFLAG_MPSAFE, + cpu_model, 0, "Machine model"); SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "Machine class"); @@ -309,11 +312,13 @@ identify_arm_cpu(void) for (i = 0; cpuids[i].cpuid != 0; i++) if (cpuids[i].cpuid == (cpuid & CPU_ID_CPU_MASK)) { cpu_class = cpuids[i].cpu_class; - printf("CPU: %s %s (%s core)\n", - cpuids[i].cpu_name, + memset(cpu_model, 0, sizeof(cpu_model)); + snprintf(cpu_model, sizeof(cpu_model) - 1, + "%s %s (%s core)", cpuids[i].cpu_name, cpuids[i].cpu_steppings[cpuid & CPU_ID_REVISION_MASK], cpu_classes[cpu_class].class_name); + printf("CPU: %s\n", cpu_model); break; } if (cpuids[i].cpuid == 0) diff --git a/sys/arm/arm/identcpu-v6.c b/sys/arm/arm/identcpu-v6.c index 7cc8170..9868cb4 100644 --- a/sys/arm/arm/identcpu-v6.c +++ b/sys/arm/arm/identcpu-v6.c @@ -52,7 +52,13 @@ __FBSDID("$FreeBSD$"); #include <machine/md_var.h> char machine[] = "arm"; - +static char cpu_info[128]; +static char cpu_model[128]; + +SYSCTL_STRING(_hw, OID_AUTO, cpuinfo, CTLFLAG_RD | CTLFLAG_MPSAFE, + cpu_info, 0, "Machine CPU information"); +SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD | CTLFLAG_MPSAFE, + cpu_model, 0, "Machine model"); SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "Machine class"); @@ -60,29 +66,47 @@ static char hw_buf[81]; static int hw_buf_idx; static bool hw_buf_newline; +enum cpu_class cpu_class = CPU_CLASS_NONE; + static struct { int implementer; int part_number; char *impl_name; char *core_name; + enum cpu_class cpu_class; } cpu_names[] = { - {CPU_IMPLEMENTER_ARM, CPU_ARCH_ARM1176, "ARM", "ARM1176"}, - {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A5 , "ARM", "Cortex-A5"}, - {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A7 , "ARM", "Cortex-A7"}, - {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A8 , "ARM", "Cortex-A8"}, - {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A9 , "ARM", "Cortex-A9"}, - {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A12, "ARM", "Cortex-A12"}, - {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A15, "ARM", "Cortex-A15"}, - {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A17, "ARM", "Cortex-A17"}, - {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A53, "ARM", "Cortex-A53"}, - {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A57, "ARM", "Cortex-A57"}, - {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A72, "ARM", "Cortex-A72"}, - {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A73, "ARM", "Cortex-A73"}, - - {CPU_IMPLEMENTER_MRVL, CPU_ARCH_SHEEVA_581, "Marwell", "PJ4 v7"}, - {CPU_IMPLEMENTER_MRVL, CPU_ARCH_SHEEVA_584, "Marwell", "PJ4MP v7"}, - - {CPU_IMPLEMENTER_QCOM, CPU_ARCH_KRAIT_300, "Qualcomm", "Krait 300"}, + {CPU_IMPLEMENTER_ARM, CPU_ARCH_ARM1176, "ARM", "ARM1176", + CPU_CLASS_ARM11J}, + {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A5 , "ARM", "Cortex-A5", + CPU_CLASS_CORTEXA}, + {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A7 , "ARM", "Cortex-A7", + CPU_CLASS_CORTEXA}, + {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A8 , "ARM", "Cortex-A8", + CPU_CLASS_CORTEXA}, + {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A9 , "ARM", "Cortex-A9", + CPU_CLASS_CORTEXA}, + {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A12, "ARM", "Cortex-A12", + CPU_CLASS_CORTEXA}, + {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A15, "ARM", "Cortex-A15", + CPU_CLASS_CORTEXA}, + {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A17, "ARM", "Cortex-A17", + CPU_CLASS_CORTEXA}, + {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A53, "ARM", "Cortex-A53", + CPU_CLASS_CORTEXA}, + {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A57, "ARM", "Cortex-A57", + CPU_CLASS_CORTEXA}, + {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A72, "ARM", "Cortex-A72", + CPU_CLASS_CORTEXA}, + {CPU_IMPLEMENTER_ARM, CPU_ARCH_CORTEX_A73, "ARM", "Cortex-A73", + CPU_CLASS_CORTEXA}, + + {CPU_IMPLEMENTER_MRVL, CPU_ARCH_SHEEVA_581, "Marvell", "PJ4 v7", + CPU_CLASS_MARVELL}, + {CPU_IMPLEMENTER_MRVL, CPU_ARCH_SHEEVA_584, "Marvell", "PJ4MP v7", + CPU_CLASS_MARVELL}, + + {CPU_IMPLEMENTER_QCOM, CPU_ARCH_KRAIT_300, "Qualcomm", "Krait 300", + CPU_CLASS_KRAIT}, }; @@ -263,9 +287,17 @@ identify_arm_cpu(void) /* * CPU */ + memset(cpu_model, 0, sizeof(cpu_model)); for(i = 0; i < nitems(cpu_names); i++) { if (cpu_names[i].implementer == cpuinfo.implementer && cpu_names[i].part_number == cpuinfo.part_number) { + cpu_class = cpu_names[i].cpu_class; + snprintf(cpu_model, sizeof(cpu_model) - 1, + "CPU: %s %s r%dp%d (ECO: 0x%08X)", + cpu_names[i].impl_name, cpu_names[i].core_name, + cpuinfo.revision, cpuinfo.patch, + cpuinfo.midr != cpuinfo.revidr ? + cpuinfo.revidr : 0); printf("CPU: %s %s r%dp%d (ECO: 0x%08X)\n", cpu_names[i].impl_name, cpu_names[i].core_name, cpuinfo.revision, cpuinfo.patch, @@ -275,8 +307,11 @@ identify_arm_cpu(void) } } - if (i >= nitems(cpu_names)) + if (i >= nitems(cpu_names)) { printf("unknown CPU (ID = 0x%x)\n", cpuinfo.midr); + snprintf(cpu_model, sizeof(cpu_model) - 1, + "unknown CPU (ID = 0x%x)", cpuinfo.midr); + } printf("CPU Features: \n"); hw_buf_idx = 0; @@ -318,8 +353,11 @@ identify_arm_cpu(void) if (val == 1) add_cap("Coherent Walk"); - if (hw_buf_idx != 0) + memset(cpu_info, 0, sizeof(cpu_info)); + if (hw_buf_idx != 0) { printf("%s\n", hw_buf); + snprintf(cpu_info, sizeof(cpu_info) - 1, "%s", hw_buf); + } printf("Optional instructions: \n"); hw_buf_idx = 0; diff --git a/sys/arm/arm/machdep.c b/sys/arm/arm/machdep.c index 032a78b..1530d3c 100644 --- a/sys/arm/arm/machdep.c +++ b/sys/arm/arm/machdep.c @@ -813,9 +813,10 @@ initarm(struct arm_boot_params *abp) /* * Add one table for end of kernel map, one for stacks, msgbuf and - * L1 and L2 tables map and one for vectors map. + * L1 and L2 tables map, one for vectors map and two for + * l2 structures from pmap_bootstrap. */ - l2size += 3; + l2size += 5; /* Make it divisible by 4 */ l2size = (l2size + 3) & ~3; diff --git a/sys/arm/arm/mp_machdep.c b/sys/arm/arm/mp_machdep.c index 2dc7171..ae3d724 100644 --- a/sys/arm/arm/mp_machdep.c +++ b/sys/arm/arm/mp_machdep.c @@ -155,11 +155,9 @@ init_secondary(int cpu) #ifndef INTRNG int start = 0, end = 0; #endif - uint32_t actlr_mask, actlr_set; pmap_set_tex(); - cpuinfo_get_actlr_modifier(&actlr_mask, &actlr_set); - reinit_mmu(pmap_kern_ttb, actlr_mask, actlr_set); + cpuinfo_reinit_mmu(pmap_kern_ttb); cpu_setup(); /* Provide stack pointers for other processor modes. */ diff --git a/sys/arm/arm/nexus.c b/sys/arm/arm/nexus.c index 879bc30..423e11f 100644 --- a/sys/arm/arm/nexus.c +++ b/sys/arm/arm/nexus.c @@ -62,6 +62,8 @@ __FBSDID("$FreeBSD$"); #include <machine/resource.h> #include <machine/intr.h> +#include <arm/arm/nexusvar.h> + #ifdef FDT #include <machine/fdt.h> #include <dev/ofw/ofw_bus_subr.h> @@ -87,6 +89,7 @@ static struct resource *nexus_alloc_resource(device_t, device_t, int, int *, static int nexus_activate_resource(device_t, device_t, int, int, struct resource *); static bus_space_tag_t nexus_get_bus_tag(device_t, device_t); +static bus_dma_tag_t nexus_get_dma_tag(device_t dev, device_t child); #ifdef INTRNG #ifdef SMP static int nexus_bind_intr(device_t, device_t, struct resource *, int); @@ -112,6 +115,13 @@ static int nexus_ofw_map_intr(device_t dev, device_t child, phandle_t iparent, int icells, pcell_t *intr); #endif +/* + * Normally NULL (which results in defaults which are handled in + * busdma_machdep), platform init code can use nexus_set_dma_tag() to set this + * to a tag that will be inherited by all busses and devices on the platform. + */ +static bus_dma_tag_t nexus_dma_tag; + static device_method_t nexus_methods[] = { /* Device interface */ DEVMETHOD(device_probe, nexus_probe), @@ -127,6 +137,7 @@ static device_method_t nexus_methods[] = { DEVMETHOD(bus_setup_intr, nexus_setup_intr), DEVMETHOD(bus_teardown_intr, nexus_teardown_intr), DEVMETHOD(bus_get_bus_tag, nexus_get_bus_tag), + DEVMETHOD(bus_get_dma_tag, nexus_get_dma_tag), #ifdef INTRNG DEVMETHOD(bus_describe_intr, nexus_describe_intr), #ifdef SMP @@ -275,6 +286,20 @@ nexus_get_bus_tag(device_t bus __unused, device_t child __unused) #endif } +static bus_dma_tag_t +nexus_get_dma_tag(device_t dev, device_t child) +{ + + return nexus_dma_tag; +} + +void +nexus_set_dma_tag(bus_dma_tag_t tag) +{ + + nexus_dma_tag = tag; +} + static int nexus_config_intr(device_t dev, int irq, enum intr_trigger trig, enum intr_polarity pol) diff --git a/sys/arm/arm/nexusvar.h b/sys/arm/arm/nexusvar.h new file mode 100644 index 0000000..ec2c808 --- /dev/null +++ b/sys/arm/arm/nexusvar.h @@ -0,0 +1,36 @@ +/*- + * Copyright (c) 2017 Ian Lepore <ian@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _ARM_ARM_NEXUSVAR_H_ +#define _ARM_ARM_NEXUSVAR_H_ + +/* Set a platform busdma tag to be inherited by all busses and devices. */ +void nexus_set_dma_tag(bus_dma_tag_t _tag); + +#endif + diff --git a/sys/arm/arm/pl310.c b/sys/arm/arm/pl310.c index c269b47..3ccace3 100644 --- a/sys/arm/arm/pl310.c +++ b/sys/arm/arm/pl310.c @@ -207,6 +207,10 @@ pl310_cache_sync(void) if ((pl310_softc == NULL) || !pl310_softc->sc_enabled) return; + /* Do not sync outer cache on IO coherent platform */ + if (pl310_softc->sc_io_coherent) + return; + #ifdef PL310_ERRATA_753970 if (pl310_softc->sc_rtl_revision == CACHE_ID_RELEASE_r3p0) /* Write uncached PL310 register */ @@ -441,6 +445,7 @@ pl310_attach(device_t dev) struct pl310_softc *sc = device_get_softc(dev); int rid; uint32_t cache_id, debug_ctrl; + phandle_t node; sc->sc_dev = dev; rid = 0; @@ -468,6 +473,15 @@ pl310_attach(device_t dev) (cache_id >> CACHE_ID_RELEASE_SHIFT) & CACHE_ID_RELEASE_MASK); /* + * Test for "arm,io-coherent" property and disable sync operation if + * platform is I/O coherent. Outer sync operations are not needed + * on coherent platform and may be harmful in certain situations. + */ + node = ofw_bus_get_node(dev); + if (OF_hasprop(node, "arm,io-coherent")) + sc->sc_io_coherent = true; + + /* * If L2 cache is already enabled then something has violated the rules, * because caches are supposed to be off at kernel entry. The cache * must be disabled to write the configuration registers without diff --git a/sys/arm/arm/pmap-v6.c b/sys/arm/arm/pmap-v6.c index 3f41192..494541c 100644 --- a/sys/arm/arm/pmap-v6.c +++ b/sys/arm/arm/pmap-v6.c @@ -522,11 +522,11 @@ void pmap_remap_vm_attr(vm_memattr_t old_attr, vm_memattr_t new_attr) { int old_idx, new_idx; - + /* Map VM memattrs to indexes to tex_class table. */ - old_idx = pte2_attr_tab[(int)old_attr]; - new_idx = pte2_attr_tab[(int)new_attr]; - + old_idx = PTE2_ATTR2IDX(pte2_attr_tab[(int)old_attr]); + new_idx = PTE2_ATTR2IDX(pte2_attr_tab[(int)new_attr]); + /* Replace TEX attribute and apply it. */ tex_class[old_idx] = tex_class[new_idx]; pmap_set_tex(); @@ -762,7 +762,7 @@ pmap_bootstrap_prepare(vm_paddr_t last) pt1_entry_t *pte1p; pt2_entry_t *pte2p; u_int i; - uint32_t actlr_mask, actlr_set, l1_attr; + uint32_t l1_attr; /* * Now, we are going to make real kernel mapping. Note that we are @@ -879,8 +879,7 @@ pmap_bootstrap_prepare(vm_paddr_t last) /* Finally, switch from 'boot_pt1' to 'kern_pt1'. */ pmap_kern_ttb = base_pt1 | ttb_flags; - cpuinfo_get_actlr_modifier(&actlr_mask, &actlr_set); - reinit_mmu(pmap_kern_ttb, actlr_mask, actlr_set); + cpuinfo_reinit_mmu(pmap_kern_ttb); /* * Initialize the first available KVA. As kernel image is mapped by * sections, we are leaving some gap behind. diff --git a/sys/arm/arm/syscall.c b/sys/arm/arm/syscall.c index 1ff48ab..8c33fff 100644 --- a/sys/arm/arm/syscall.c +++ b/sys/arm/arm/syscall.c @@ -99,12 +99,14 @@ __FBSDID("$FreeBSD$"); void swi_handler(struct trapframe *); int -cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +cpu_fetch_syscall_args(struct thread *td) { struct proc *p; register_t *ap; + struct syscall_args *sa; int error; + sa = &td->td_sa; sa->code = td->td_frame->tf_r7; ap = &td->td_frame->tf_r0; if (sa->code == SYS_syscall) { @@ -141,15 +143,14 @@ cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) static void syscall(struct thread *td, struct trapframe *frame) { - struct syscall_args sa; int error; - sa.nap = 4; + td->td_sa.nap = 4; - error = syscallenter(td, &sa); + error = syscallenter(td); KASSERT(error != 0 || td->td_ar == NULL, ("returning from syscall with td_ar set!")); - syscallret(td, error, &sa); + syscallret(td, error); } void diff --git a/sys/arm/at91/at91_common.c b/sys/arm/at91/at91_common.c index 696161f..8ef16fe 100644 --- a/sys/arm/at91/at91_common.c +++ b/sys/arm/at91/at91_common.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include <dev/fdt/fdt_common.h> #include <dev/ofw/openfirm.h> +#include <dev/ofw/ofw_bus_subr.h> #include <machine/fdt.h> @@ -56,7 +57,7 @@ fdt_aic_decode_ic(phandle_t node, pcell_t *intr, int *interrupt, int *trig, { int offset; - if (fdt_is_compatible(node, "atmel,at91rm9200-aic")) + if (ofw_bus_node_is_compatible(node, "atmel,at91rm9200-aic")) offset = 0; else return (ENXIO); diff --git a/sys/arm/at91/at91_spi.c b/sys/arm/at91/at91_spi.c index f96114a..e9b4b13 100644 --- a/sys/arm/at91/at91_spi.c +++ b/sys/arm/at91/at91_spi.c @@ -302,6 +302,8 @@ at91_spi_transfer(device_t dev, device_t child, struct spi_command *cmd) /* get the proper chip select */ spibus_get_cs(child, &cs); + cs &= ~SPIBUS_CS_HIGH; + sc = device_get_softc(dev); i = 0; diff --git a/sys/arm/broadcom/bcm2835/bcm2835_cpufreq.c b/sys/arm/broadcom/bcm2835/bcm2835_cpufreq.c index 46856f2..4921779 100644 --- a/sys/arm/broadcom/bcm2835/bcm2835_cpufreq.c +++ b/sys/arm/broadcom/bcm2835/bcm2835_cpufreq.c @@ -44,6 +44,11 @@ __FBSDID("$FreeBSD$"); #include <machine/cpu.h> #include <machine/intr.h> +#include <dev/fdt/fdt_common.h> + +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> + #include <arm/broadcom/bcm2835/bcm2835_mbox.h> #include <arm/broadcom/bcm2835/bcm2835_mbox_prop.h> #include <arm/broadcom/bcm2835/bcm2835_vcbus.h> @@ -119,6 +124,13 @@ struct bcm2835_cpufreq_softc { struct intr_config_hook init_hook; }; +static struct ofw_compat_data compat_data[] = { + { "broadcom,bcm2835-vc", 1 }, + { "broadcom,bcm2708-vc", 1 }, + { "brcm,bcm2709", 1 }, + { NULL, 0 } +}; + static int cpufreq_verbose = 0; TUNABLE_INT("hw.bcm2835.cpufreq.verbose", &cpufreq_verbose); static int cpufreq_lowest_freq = DEFAULT_LOWEST_FREQ; @@ -1244,6 +1256,16 @@ bcm2835_cpufreq_init(void *arg) static void bcm2835_cpufreq_identify(driver_t *driver, device_t parent) { + const struct ofw_compat_data *compat; + phandle_t root; + + root = OF_finddevice("/"); + for (compat = compat_data; compat->ocd_str != NULL; compat++) + if (ofw_bus_node_is_compatible(root, compat->ocd_str)) + break; + + if (compat->ocd_data == 0) + return; DPRINTF("driver=%p, parent=%p\n", driver, parent); if (device_find_child(parent, "bcm2835_cpufreq", -1) != NULL) diff --git a/sys/arm/broadcom/bcm2835/bcm2835_spi.c b/sys/arm/broadcom/bcm2835/bcm2835_spi.c index 4ade83f..8396426 100644 --- a/sys/arm/broadcom/bcm2835/bcm2835_spi.c +++ b/sys/arm/broadcom/bcm2835/bcm2835_spi.c @@ -434,6 +434,9 @@ bcm_spi_transfer(device_t dev, device_t child, struct spi_command *cmd) /* Get the proper chip select for this child. */ spibus_get_cs(child, &cs); + + cs &= ~SPIBUS_CS_HIGH; + if (cs > 2) { device_printf(dev, "Invalid chip select %d requested by %s\n", cs, diff --git a/sys/arm/cloudabi32/cloudabi32_sysvec.c b/sys/arm/cloudabi32/cloudabi32_sysvec.c index 100fa47..a0bebcc 100644 --- a/sys/arm/cloudabi32/cloudabi32_sysvec.c +++ b/sys/arm/cloudabi32/cloudabi32_sysvec.c @@ -67,11 +67,15 @@ cloudabi32_proc_setregs(struct thread *td, struct image_params *imgp, } static int -cloudabi32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +cloudabi32_fetch_syscall_args(struct thread *td) { - struct trapframe *frame = td->td_frame; + struct trapframe *frame; + struct syscall_args *sa; int error; + frame = td->td_frame; + sa = &td->td_sa; + /* Obtain system call number. */ sa->code = frame->tf_r12; if (sa->code >= CLOUDABI32_SYS_MAXSYSCALL) diff --git a/sys/arm/conf/ARMADA38X b/sys/arm/conf/ARMADA38X index aa5f23d..e9f21a6 100644 --- a/sys/arm/conf/ARMADA38X +++ b/sys/arm/conf/ARMADA38X @@ -12,31 +12,29 @@ ident ARMADA38X options SOC_MV_ARMADA38X makeoptions WERROR="-Werror" +makeoptions MODULES_EXTRA="dtb/mv" -options MD_ROOT +#options MD_ROOT #makeoptions MFS_IMAGE=/path/to/miniroot #options ROOTDEVNAME=\"ufs:md0\" -options ROOTDEVNAME=\"/dev/da0s1a\" +#options ROOTDEVNAME=\"/dev/da0s1a\" options SCHED_ULE # ULE scheduler options SMP -# Pseudo devices -device random -device pty -device loop -device md +options VM_KMEM_SIZE_MAX=0x9CCD000 # Serial ports device uart device uart_snps # Network -device ether -device vlan -device mii device bpf -device re +device mii +device mdio +device etherswitch +device e6000sw +device neta # PCI device pci @@ -57,17 +55,79 @@ device scbus device pass device da +# USB ethernet support, requires miibus +device miibus + +# MMC/SD/SDIO Card slot support +device mmc # mmc/sd bus +device mmcsd # mmc/sd flash cards +device sdhci # mmc/sd host controller + +# SATA +device ahci + +# SPI +device mx25l +device mv_spi +device spibus + # I2C device iic device iicbus device twsi +# LED drivers +device gpio +device pca9552 +device is31fl319x + +# Wireless NIC cards +device wlan # 802.11 support +options IEEE80211_DEBUG # enable debug msgs +options IEEE80211_AMPDU_AGE # age frames in AMPDU reorder q's +options IEEE80211_SUPPORT_MESH # enable 802.11s draft support +device wlan_wep # 802.11 WEP support +device wlan_ccmp # 802.11 CCMP support +device wlan_tkip # 802.11 TKIP support +device wlan_amrr # AMRR transmit rate control algorithm +device an # Aironet 4500/4800 802.11 wireless NICs. +device ath # Atheros NICs +device ath_pci # Atheros pci/cardbus glue +device ath_hal # pci/cardbus chip support +options AH_SUPPORT_AR5416 # enable AR5416 tx/rx descriptors +options AH_AR5416_INTERRUPT_MITIGATION # AR5416 interrupt mitigation +options ATH_ENABLE_11N # Enable 802.11n support for AR5416 and later +device ath_rate_sample # SampleRate tx rate control for ath +#device bwi # Broadcom BCM430x/BCM431x wireless NICs. +#device bwn # Broadcom BCM43xx wireless NICs. +device ipw # Intel 2100 wireless NICs. +device iwi # Intel 2200BG/2225BG/2915ABG wireless NICs. +device iwn # Intel 4965/1000/5000/6000 wireless NICs. +device malo # Marvell Libertas wireless NICs. +device mwl # Marvell 88W8363 802.11n wireless NICs. +device ral # Ralink Technology RT2500 wireless NICs. +device wi # WaveLAN/Intersil/Symbol 802.11 wireless NICs. +device wpi # Intel 3945ABG wireless NICs. + +# Pseudo devices. +device loop # Network loopback +device random # Entropy device +device ether # Ethernet support +device vlan # 802.1Q VLAN support +device tun # Packet tunnel. +device md # Memory "disks" +device gif # IPv6 and IPv4 tunneling +device firmware # firmware assist module +device pty + # CESA device cesa device crypto device cryptodev -#FDT +# L2 Cache +device pl310 # PL310 L2 cache controller +options ARM_L2_PREFETCH # Disabled by default on Marvell Linux + +# FDT options FDT -options FDT_DTB_STATIC -makeoptions FDT_DTS_FILE=armada-388-gp.dts diff --git a/sys/arm/conf/ARMADA38X-netboot b/sys/arm/conf/ARMADA38X-netboot new file mode 100644 index 0000000..f4f4552 --- /dev/null +++ b/sys/arm/conf/ARMADA38X-netboot @@ -0,0 +1,33 @@ +# +# ARMADA38X-netboot -- Custom configuration for the SG-3100 with netboot +# options, for factory installation. Check out # http://www.netgate.com +# +# For more information on this file, please read the config(5) manual page, +# and/or the handbook section on Kernel Configuration Files: +# +# http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html +# +# The handbook is also available locally in /usr/share/doc/handbook +# if you've installed the doc distribution, otherwise always see the +# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the +# latest information. +# +# An exhaustive list of options and more detailed explanations of the +# device lines is also present in the ../../conf/NOTES and NOTES files. +# If you are in doubt as to the purpose or necessity of a line, check first +# in NOTES. +# +# $FreeBSD$ + +include "ARMADA38X" + +ident ARMADA38X-netboot + +# MD_ROOT options +options MD_ROOT +options MD_ROOT_SIZE=16384 +options ROOTDEVNAME=\"ufs:md0.uzip\" +options GEOM_UZIP + +options FDT_DTB_STATIC +makeoptions FDT_DTS_FILE=armada-385-80300-0148-G00-X100.dts diff --git a/sys/arm/conf/pfSense b/sys/arm/conf/pfSense new file mode 100644 index 0000000..bde8b68 --- /dev/null +++ b/sys/arm/conf/pfSense @@ -0,0 +1,155 @@ + +nooptions KDB_TRACE + +ident pfSense + +nooptions MAC # TrustedBSD MAC Framework +nooptions COMPAT_FREEBSD4 # Compatible with FreeBSD4 +nooptions COMPAT_FREEBSD5 # Compatible with FreeBSD5 +nooptions COMPAT_FREEBSD6 # Compatible with FreeBSD6 +nooptions COMPAT_FREEBSD7 # Compatible with FreeBSD7 + +options GEOM_MIRROR +options GEOM_UZIP +options GEOM_ELI +options GEOM_BDE + +options UNIONFS +options NULLFS +options PPS_SYNC + +# Wireless +nooptions IEEE80211_DEBUG # enable debug msgs +device wlan_rssadapt +device wlan_xauth +device wlan_acl +device iwifw +device ipwfw # Firmware for Intel PRO/Wireless 2100 IEEE 802.11 driver +device wpifw # Firmware for Intel 3945ABG Wireless LAN IEEE 802.11 driver +device iwnfw # Firmware for Intel Wireless WiFi Link 4965AGN IEEE 802.11n driver +device uath # Atheros USB IEEE 802.11a/b/g wireless network device +device ralfw # Firmware for Ralink Technology RT2500 wireless NICs. +device ural # Ralink Technology RT2500USB IEEE 802.11 driver +device urtw # Realtek RTL8187B/L USB IEEE 802.11b/g wireless network device +device rum # Ralink Technology USB IEEE 802.11a/b/g wireless network device +device mwlfw # Firmware for Marvell 88W8363 IEEE 802.11n wireless network driver +device upgt # Conexant/Intersil PrismGT SoftMAC USB IEEE 802.11b/g wireless +device udav # Davicom DM9601 USB Ethernet driver +device axe +device axge +device aue +device cue +device kue +device mos +device rsu +device rsufw +device run # Ralink RT2700U/RT2800U/RT3000U USB 802.11agn +device runfw +device rue +#device rtwn +#device urtwnfw +device bwn # Broadcom BCM43xx IEEE 802.11b/g wireless network driver +device bwi # Broadcom BCM43xx IEEE 802.11b/g wireless network driver + +# USB support +nooptions USB_DEBUG # enable debug msgs + +# 3G devices +device ufoma +device ucom +device uslcom +device uplcom +device umct +device uvisor +device uark +device uftdi +device uvscom +device umodem +device u3g +device cdce + +device uhid # "Human Interface Devices" + +# pfsense addons + +device tap +device gre +device if_bridge +device carp +device lagg +device vte + +# IP/IPFW +options IPFIREWALL_DEFAULT_TO_ACCEPT +options IPFIREWALL_VERBOSE +options IPSTEALTH + +# Netgraph +options NETGRAPH #netgraph(4) system +options NETGRAPH_VLAN +options NETGRAPH_L2TP +options NETGRAPH_BPF +options NETGRAPH_ETHER +options NETGRAPH_IFACE +options NETGRAPH_EIFACE +options NETGRAPH_PPP +options NETGRAPH_PPPOE +options NETGRAPH_PPTPGRE +options NETGRAPH_RFC1490 +options NETGRAPH_SOCKET +options NETGRAPH_TTY +options NETGRAPH_MPPC_ENCRYPTION +options NETGRAPH_UI +options NETGRAPH_VJC +options NETGRAPH_KSOCKET +options NETGRAPH_LMI +options NETGRAPH_ONE2MANY +options NETGRAPH_BRIDGE +options NETGRAPH_CISCO +options NETGRAPH_ECHO +options NETGRAPH_ASYNC +options NETGRAPH_FRAME_RELAY +options NETGRAPH_HOLE +options NETGRAPH_TEE +options NETGRAPH_TCPMSS +options NETGRAPH_PIPE +options NETGRAPH_CAR +options NETGRAPH_DEFLATE +options NETGRAPH_PRED1 + +# IPSEC +options IPSEC # IP (v4/v6) security +options IPSEC_SUPPORT # Allow kldload of ipsec and tcpmd5 +options TCP_SIGNATURE +device enc + +# ALTQ +options ALTQ +options ALTQ_CBQ +options ALTQ_RED +options ALTQ_RIO +options ALTQ_HFSC +options ALTQ_PRIQ +options ALTQ_FAIRQ +options ALTQ_NOPCC +options ALTQ_CODEL + +# Squid related settings +options MSGMNB=8192 # max # of bytes in a queue +options MSGMNI=40 # number of message queue identifiers +options MSGSEG=512 # number of message segments per queue +options MSGSSZ=32 # size of a message segment +options MSGTQL=2048 # max messages in system + +device pf +device pflog +device pfsync + +device rndtest # FIPS 140-2 entropy tester +device hifn # Hifn 7951, 7781, etc. +options HIFN_DEBUG # enable debugging support: hw.hifn.debug +options HIFN_RNDTEST # enable rndtest support +device ubsec # Broadcom 5501, 5601, 58xx +device safe # safe -- SafeNet crypto accelerator + +options MROUTING diff --git a/sys/arm/conf/pfSense-ClearFog b/sys/arm/conf/pfSense-ClearFog new file mode 100644 index 0000000..374fed1 --- /dev/null +++ b/sys/arm/conf/pfSense-ClearFog @@ -0,0 +1,4 @@ +include ARMADA38X +include pfSense + +ident pfSense-Clearfog diff --git a/sys/arm/conf/pfSense-SG-3100 b/sys/arm/conf/pfSense-SG-3100 new file mode 100644 index 0000000..68cd76a --- /dev/null +++ b/sys/arm/conf/pfSense-SG-3100 @@ -0,0 +1,4 @@ +include ARMADA38X +include pfSense + +ident pfSense-SG-3100 diff --git a/sys/arm/conf/pfSense-uBMC b/sys/arm/conf/pfSense-uBMC new file mode 100644 index 0000000..cdcd25b --- /dev/null +++ b/sys/arm/conf/pfSense-uBMC @@ -0,0 +1,4 @@ +include uBMC +include pfSense + +ident pfSense-uBMC diff --git a/sys/arm/conf/pfSense-uFW b/sys/arm/conf/pfSense-uFW new file mode 100644 index 0000000..4254b0f --- /dev/null +++ b/sys/arm/conf/pfSense-uFW @@ -0,0 +1,4 @@ +include uFW +include pfSense + +ident pfSense-uFW diff --git a/sys/arm/conf/uBMC b/sys/arm/conf/uBMC new file mode 100644 index 0000000..baff3ef --- /dev/null +++ b/sys/arm/conf/uBMC @@ -0,0 +1,126 @@ +# +# uBMC -- Custom configuration for the micro-BMC development kit, check out +# http://www.netgate.com and http://www.adi.com +# +# For more information on this file, please read the config(5) manual page, +# and/or the handbook section on Kernel Configuration Files: +# +# http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html +# +# The handbook is also available locally in /usr/share/doc/handbook +# if you've installed the doc distribution, otherwise always see the +# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the +# latest information. +# +# An exhaustive list of options and more detailed explanations of the +# device lines is also present in the ../../conf/NOTES and NOTES files. +# If you are in doubt as to the purpose or necessity of a line, check first +# in NOTES. +# +# $FreeBSD$ + +ident uBMC + +include "std.armv6" +include "../ti/am335x/std.am335x" + +makeoptions MODULES_EXTRA="dtb/am335x am335x_dmtpps" + +options INTRNG + +options SCHED_4BSD # 4BSD scheduler +options PLATFORM + +# NFS server support +#options NFSD + +# NFS root from boopt/dhcp +#options BOOTP +#options BOOTP_NFSROOT +#options BOOTP_COMPAT +#options BOOTP_NFSV3 +#options BOOTP_WIRED_TO=cpsw0 + +# Boot device is 2nd slice on eMMC card +options ROOTDEVNAME=\"ufs:mmcsd0s2a\" + +# MMC/SD/SDIO Card slot support +device mmc # mmc/sd bus +device mmcsd # mmc/sd flash cards +device sdhci # mmc/sd host controller + +# I2C support +device iicbus +device iic +device ti_i2c + +# eeprom +device icee + +# lm75 (carrier board) +device lm75 + +#define am335x_dmtpps # Pulse Per Second capture driver + +# Console and misc +device uart +device uart_ns8250 +device pty +device snp +device md +device random # Entropy device + +# GPIO +device gpio +device gpioled + +# ADC support +device ti_adc + +# SPI +device ti_spi +device spibus +device mx25l + +# Watchdog support +# If we don't enable the watchdog driver, the system could potentially +# reboot automatically because the boot loader might have enabled the +# watchdog. +device ti_wdt + +# Mailbox support +device ti_mbox + +# PMU support (for CCNT). +device pmu + +# USB support +device usb +options USB_HOST_ALIGN=64 # Align usb buffers to cache line size. +device musb +device umass +device scbus # SCSI bus (required for ATA/SCSI) +device da # Direct Access (disks) + +# USB ethernet support, requires miibus +device miibus + +# Ethernet +device loop +device ether +device micphy +device cpsw +device bpf + +# Netmap provides direct access to TX/RX rings on supported NICs +#device netmap # netmap(4) support + +# Device mode support and USFS template +device usb_template # Control of the gadget +device usfs + +# Pinmux +device fdt_pinctrl + +# Flattened Device Tree +options FDT # Configure using FDT/DTB data diff --git a/sys/arm/conf/uBMC-netboot b/sys/arm/conf/uBMC-netboot new file mode 100644 index 0000000..9d1160b --- /dev/null +++ b/sys/arm/conf/uBMC-netboot @@ -0,0 +1,128 @@ +# +# uBMC -- Custom configuration for the micro-BMC development kit, check out +# http://www.netgate.com and http://www.adi.com +# +# For more information on this file, please read the config(5) manual page, +# and/or the handbook section on Kernel Configuration Files: +# +# http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html +# +# The handbook is also available locally in /usr/share/doc/handbook +# if you've installed the doc distribution, otherwise always see the +# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the +# latest information. +# +# An exhaustive list of options and more detailed explanations of the +# device lines is also present in the ../../conf/NOTES and NOTES files. +# If you are in doubt as to the purpose or necessity of a line, check first +# in NOTES. +# +# $FreeBSD$ + +ident uBMC-netboot + +include "std.armv6" +include "../ti/am335x/std.am335x" + +makeoptions MODULES_EXTRA="dtb/am335x am335x_dmtpps" + +options INTRNG + +options SCHED_4BSD # 4BSD scheduler +options PLATFORM + +# NFS server support +#options NFSD + +# NFS root from boopt/dhcp +options BOOTP +options BOOTP_NFSROOT +options BOOTP_COMPAT +options BOOTP_NFSV3 +options BOOTP_WIRED_TO=cpsw0 + +# Boot device is 2nd slice on eMMC card +options ROOTDEVNAME=\"ufs:mmcsd0s2a\" + +# MMC/SD/SDIO Card slot support +device mmc # mmc/sd bus +device mmcsd # mmc/sd flash cards +device sdhci # mmc/sd host controller + +# I2C support +device iicbus +device iic +device ti_i2c + +# eeprom +device icee + +# lm75 (carrier board) +device lm75 + +#define am335x_dmtpps # Pulse Per Second capture driver + +# Console and misc +device uart +device uart_ns8250 +device pty +device snp +device md +device random # Entropy device + +# GPIO +device gpio +device gpioled + +# ADC support +device ti_adc + +# SPI +device ti_spi +device spibus +device mx25l + +# Watchdog support +# If we don't enable the watchdog driver, the system could potentially +# reboot automatically because the boot loader might have enabled the +# watchdog. +device ti_wdt + +# Mailbox support +device ti_mbox + +# PMU support (for CCNT). +device pmu + +# USB support +device usb +options USB_HOST_ALIGN=64 # Align usb buffers to cache line size. +device musb +device umass +device scbus # SCSI bus (required for ATA/SCSI) +device da # Direct Access (disks) + +# USB ethernet support, requires miibus +device miibus + +# Ethernet +device loop +device ether +device micphy +device cpsw +device bpf + +# Netmap provides direct access to TX/RX rings on supported NICs +device netmap # netmap(4) support + +# Device mode support and USFS template +device usb_template # Control of the gadget +device usfs + +# Pinmux +device fdt_pinctrl + +# Flattened Device Tree +options FDT # Configure using FDT/DTB data +options FDT_DTB_STATIC +makeoptions FDT_DTS_FILE=ubmc.dts diff --git a/sys/arm/conf/uFW b/sys/arm/conf/uFW new file mode 100644 index 0000000..002e6db --- /dev/null +++ b/sys/arm/conf/uFW @@ -0,0 +1,37 @@ +# +# uFW-netboot -- Custom configuration for the micro-firewall kit with netboot +# options, check out # http://www.netgate.com +# +# For more information on this file, please read the config(5) manual page, +# and/or the handbook section on Kernel Configuration Files: +# +# http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html +# +# The handbook is also available locally in /usr/share/doc/handbook +# if you've installed the doc distribution, otherwise always see the +# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the +# latest information. +# +# An exhaustive list of options and more detailed explanations of the +# device lines is also present in the ../../conf/NOTES and NOTES files. +# If you are in doubt as to the purpose or necessity of a line, check first +# in NOTES. +# +# $FreeBSD$ + +include "uFW-base" + +ident uFW + +# NFS server support +#options NFSD + +# NFS root from boopt/dhcp +#options BOOTP +#options BOOTP_NFSROOT +#options BOOTP_COMPAT +#options BOOTP_NFSV3 +#options BOOTP_WIRED_TO=cpsw0 + +# Boot device is 2nd slice on MMC/SD card +options ROOTDEVNAME=\"ufs:mmcsd0s2a\" diff --git a/sys/arm/conf/uFW-base b/sys/arm/conf/uFW-base new file mode 100644 index 0000000..9f160ff --- /dev/null +++ b/sys/arm/conf/uFW-base @@ -0,0 +1,141 @@ +# +# uFW -- Custom configuration for the microFirewall kit, check out +# https://netgate.com/products/sg-1000.html +# +# For more information on this file, please read the config(5) manual page, +# and/or the handbook section on Kernel Configuration Files: +# +# http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html +# +# The handbook is also available locally in /usr/share/doc/handbook +# if you've installed the doc distribution, otherwise always see the +# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the +# latest information. +# +# An exhaustive list of options and more detailed explanations of the +# device lines is also present in the ../../conf/NOTES and NOTES files. +# If you are in doubt as to the purpose or necessity of a line, check first +# in NOTES. +# +# $FreeBSD$ + +ident uFW + +include "std.armv6" +include "../ti/am335x/std.am335x" + +makeoptions MODULES_EXTRA="dtb/am335x am335x_dmtpps" + +options INTRNG + +options SCHED_4BSD # 4BSD scheduler +options PLATFORM + +# MMC/SD/SDIO Card slot support +device mmc # mmc/sd bus +device mmcsd # mmc/sd flash cards +device sdhci # mmc/sd host controller + +# I2C support +device iicbus +device iic +device ti_i2c + +# eeprom +device icee + +#define am335x_dmtpps # Pulse Per Second capture driver + +# Console and misc +device uart +device uart_ns8250 +device pty +device snp + +# GPIO +device gpio +#device gpioled + +# ADC support +device ti_adc + +# Watchdog support +# If we don't enable the watchdog driver, the system could potentially +# reboot automatically because the boot loader might have enabled the +# watchdog. +device ti_wdt + +# Mailbox support +device ti_mbox + +# PMU support (for CCNT). +device pmu + +# Wireless NIC cards +device wlan # 802.11 support +options IEEE80211_DEBUG # enable debug msgs +options IEEE80211_AMPDU_AGE # age frames in AMPDU reorder q's +options IEEE80211_SUPPORT_MESH # enable 802.11s draft support +device wlan_wep # 802.11 WEP support +device wlan_ccmp # 802.11 CCMP support +device wlan_tkip # 802.11 TKIP support +device wlan_amrr # AMRR transmit rate control algorithm +device an # Aironet 4500/4800 802.11 wireless NICs. +device ath # Atheros NICs +device ath_pci # Atheros pci/cardbus glue +device ath_hal # pci/cardbus chip support +options AH_SUPPORT_AR5416 # enable AR5416 tx/rx descriptors +options AH_AR5416_INTERRUPT_MITIGATION # AR5416 interrupt mitigation +options ATH_ENABLE_11N # Enable 802.11n support for AR5416 and later +device ath_rate_sample # SampleRate tx rate control for ath +#device bwi # Broadcom BCM430x/BCM431x wireless NICs. +#device bwn # Broadcom BCM43xx wireless NICs. +device ipw # Intel 2100 wireless NICs. +device iwi # Intel 2200BG/2225BG/2915ABG wireless NICs. +device iwn # Intel 4965/1000/5000/6000 wireless NICs. +device malo # Marvell Libertas wireless NICs. +device mwl # Marvell 88W8363 802.11n wireless NICs. +device ral # Ralink Technology RT2500 wireless NICs. +device wi # WaveLAN/Intersil/Symbol 802.11 wireless NICs. +device wpi # Intel 3945ABG wireless NICs. + +# Pseudo devices. +device loop # Network loopback +device random # Entropy device +device ether # Ethernet support +device vlan # 802.1Q VLAN support +device tun # Packet tunnel. +device md # Memory "disks" +device gif # IPv6 and IPv4 tunneling +device firmware # firmware assist module + +# USB support +device usb +options USB_HOST_ALIGN=64 # Align usb buffers to cache line size. +device musb +device umass +device scbus # SCSI bus (required for ATA/SCSI) +device da # Direct Access (disks) + +# USB ethernet support, requires miibus +device miibus + +# Ethernet +device micphy +device cpsw +options CPSW_ETHERSWITCH +device etherswitch +device bpf + +# Netmap provides direct access to TX/RX rings on supported NICs +#device netmap # netmap(4) support + +# Device mode support and USFS template +#device usb_template # Control of the gadget +#device usfs + +# Pinmux +device fdt_pinctrl + +# Flattened Device Tree +options FDT # Configure using FDT/DTB data diff --git a/sys/arm/conf/uFW-netboot b/sys/arm/conf/uFW-netboot new file mode 100644 index 0000000..7a05d1c --- /dev/null +++ b/sys/arm/conf/uFW-netboot @@ -0,0 +1,33 @@ +# +# uFW-netboot -- Custom configuration for the micro-firewall kit with netboot +# options, check out # http://www.netgate.com +# +# For more information on this file, please read the config(5) manual page, +# and/or the handbook section on Kernel Configuration Files: +# +# http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html +# +# The handbook is also available locally in /usr/share/doc/handbook +# if you've installed the doc distribution, otherwise always see the +# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the +# latest information. +# +# An exhaustive list of options and more detailed explanations of the +# device lines is also present in the ../../conf/NOTES and NOTES files. +# If you are in doubt as to the purpose or necessity of a line, check first +# in NOTES. +# +# $FreeBSD$ + +include "uFW-base" + +ident uFW-netboot + +# MD_ROOT options +options MD_ROOT +options MD_ROOT_SIZE=16384 +options ROOTDEVNAME=\"ufs:md0.uzip\" +options GEOM_UZIP + +options FDT_DTB_STATIC +makeoptions FDT_DTS_FILE=ufw.dts diff --git a/sys/arm/freescale/vybrid/vf_spi.c b/sys/arm/freescale/vybrid/vf_spi.c index 8fa466b..a327dea 100644 --- a/sys/arm/freescale/vybrid/vf_spi.c +++ b/sys/arm/freescale/vybrid/vf_spi.c @@ -263,6 +263,8 @@ spi_transfer(device_t dev, device_t child, struct spi_command *cmd) /* get the proper chip select */ spibus_get_cs(child, &cs); + cs &= ~SPIBUS_CS_HIGH; + /* Command */ spi_txrx(sc, cmd->tx_cmd, cmd->rx_cmd, cmd->tx_cmd_sz, cs); diff --git a/sys/arm/include/cpuinfo.h b/sys/arm/include/cpuinfo.h index 41b5e6d..c05875c 100644 --- a/sys/arm/include/cpuinfo.h +++ b/sys/arm/include/cpuinfo.h @@ -121,5 +121,7 @@ struct cpuinfo { extern struct cpuinfo cpuinfo; void cpuinfo_init(void); -void cpuinfo_get_actlr_modifier(uint32_t *actlr_mask, uint32_t *actlr_set); +#if __ARM_ARCH >= 6 +void cpuinfo_reinit_mmu(uint32_t ttb); +#endif #endif /* _MACHINE_CPUINFO_H_ */ diff --git a/sys/arm/include/pl310.h b/sys/arm/include/pl310.h index 5fcb7af..6160135 100644 --- a/sys/arm/include/pl310.h +++ b/sys/arm/include/pl310.h @@ -148,6 +148,7 @@ struct pl310_softc { struct mtx sc_mtx; u_int sc_rtl_revision; struct intr_config_hook *sc_ich; + boolean_t sc_io_coherent; }; /** diff --git a/sys/arm/include/pmap-v6.h b/sys/arm/include/pmap-v6.h index e25f7f0..958415e 100644 --- a/sys/arm/include/pmap-v6.h +++ b/sys/arm/include/pmap-v6.h @@ -177,7 +177,6 @@ vm_paddr_t pmap_dump_kextract(vm_offset_t, pt2_entry_t *); int pmap_fault(pmap_t, vm_offset_t, uint32_t, int, bool); void pmap_set_tex(void); -void reinit_mmu(ttb_entry_t ttb, u_int aux_clr, u_int aux_set); /* * Pre-bootstrap epoch functions set. diff --git a/sys/arm/include/pte-v6.h b/sys/arm/include/pte-v6.h index cc92b28..d9aaaea 100644 --- a/sys/arm/include/pte-v6.h +++ b/sys/arm/include/pte-v6.h @@ -149,10 +149,12 @@ #define L2_NX 0x00000001 /* Not executable */ #define L2_B 0x00000004 /* Bufferable page */ #define L2_C 0x00000008 /* Cacheable page */ +#define L2_CB_SHIFT 2 /* C,B bit field shift */ #define L2_AP(x) ((x) << 4) #define L2_AP0 0x00000010 /* access permissions bit 0*/ #define L2_AP1 0x00000020 /* access permissions bit 1*/ -#define L2_TEX(x) ((x) << 6) /* type extension */ +#define L2_TEX_SHIFT 6 /* type extension field shift */ +#define L2_TEX(x) ((x) << L2_TEX_SHIFT) /* type extension */ #define L2_TEX0 0x00000040 /* type extension bit 0 */ #define L2_TEX1 0x00000080 /* type extension bit 1 */ #define L2_TEX2 0x00000100 /* type extension bit 2 */ @@ -271,6 +273,10 @@ #define PTE2_FRAME L2_S_FRAME #define PTE2_ATTR_MASK (L2_TEX0 | L2_C | L2_B) +/* PTE2 attributes to TEX class index: (TEX0 C B) */ +#define PTE2_ATTR2IDX(attr) \ + ((((attr) & (L2_C | L2_B)) >> L2_CB_SHIFT) | \ + (((attr) & L2_TEX0) >> (L2_TEX_SHIFT - L2_CB_SHIFT))) #define PTE2_AP_KR (PTE2_RO | PTE2_NM) #define PTE2_AP_KRW 0 diff --git a/sys/arm/lpc/lpc_intc.c b/sys/arm/lpc/lpc_intc.c index d26ef90..a5ee160 100644 --- a/sys/arm/lpc/lpc_intc.c +++ b/sys/arm/lpc/lpc_intc.c @@ -232,7 +232,7 @@ static int fdt_pic_decode_ic(phandle_t node, pcell_t *intr, int *interrupt, int *trig, int *pol) { - if (!fdt_is_compatible(node, "lpc,pic")) + if (!ofw_bus_node_is_compatible(node, "lpc,pic")) return (ENXIO); *interrupt = fdt32_to_cpu(intr[0]); diff --git a/sys/arm/lpc/lpc_spi.c b/sys/arm/lpc/lpc_spi.c index c9accc8..87d3dcf 100644 --- a/sys/arm/lpc/lpc_spi.c +++ b/sys/arm/lpc/lpc_spi.c @@ -147,6 +147,8 @@ lpc_spi_transfer(device_t dev, device_t child, struct spi_command *cmd) spibus_get_cs(child, &cs); + cs &= ~SPIBUS_CS_HIGH; + /* Set CS active */ lpc_gpio_set_state(child, cs, 0); diff --git a/sys/arm/mv/armada/thermal.c b/sys/arm/mv/armada/thermal.c new file mode 100644 index 0000000..e27add4 --- /dev/null +++ b/sys/arm/mv/armada/thermal.c @@ -0,0 +1,314 @@ +/*- + * Copyright (c) 2017 Semihalf. + * Copyright (c) 2017 Stormshield. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/sysctl.h> +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/conf.h> +#include <sys/rman.h> +#include <sys/types.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/resource.h> + +#include <machine/fdt.h> + +#include <dev/ofw/ofw_bus_subr.h> + +#define READOUT_TO_C(temp) ((temp) / 1000) + +#define STAT_RID 0 +#define CTRL_RID 1 + +#define TSEN_STAT_READOUT_VALID 0x1 + +#define A380_TSEN_CTRL_RESET (1 << 8) + +struct armada_thermal_softc; + +typedef struct armada_thermal_data { + /* Initialize the sensor */ + void (*tsen_init)(struct armada_thermal_softc *); + + /* Test for a valid sensor value */ + boolean_t (*is_valid)(struct armada_thermal_softc *); + + /* Formula coefficients: temp = (b + m * reg) / div */ + u_long coef_b; + u_long coef_m; + u_long coef_div; + + boolean_t inverted; + + /* Shift and mask to access the sensor temperature */ + u_int temp_shift; + u_int temp_mask; + u_int is_valid_shift; +} armada_tdata_t; + +static boolean_t armada_tsen_readout_valid(struct armada_thermal_softc *); +static int armada_tsen_get_temp(struct armada_thermal_softc *, u_long *); +static void armada380_tsen_init(struct armada_thermal_softc *); +static void armada_temp_update(void *); + +static const armada_tdata_t armada380_tdata = { + .tsen_init = armada380_tsen_init, + .is_valid = armada_tsen_readout_valid, + .is_valid_shift = 10, + .temp_shift = 0, + .temp_mask = 0x3ff, + .coef_b = 1172499100UL, + .coef_m = 2000096UL, + .coef_div = 4201, + .inverted = TRUE, +}; + +static int armada_thermal_probe(device_t); +static int armada_thermal_attach(device_t); +static int armada_thermal_detach(device_t); + +static device_method_t armada_thermal_methods[] = { + DEVMETHOD(device_probe, armada_thermal_probe), + DEVMETHOD(device_attach, armada_thermal_attach), + DEVMETHOD(device_detach, armada_thermal_detach), + + DEVMETHOD_END +}; + +struct armada_thermal_softc { + device_t dev; + + struct resource *stat_res; + struct resource *ctrl_res; + + struct callout temp_upd; + struct mtx temp_upd_mtx; + + const armada_tdata_t *tdata; + + u_long chip_temperature; +}; + +static driver_t armada_thermal_driver = { + "armada_thermal", + armada_thermal_methods, + sizeof(struct armada_thermal_softc) +}; + +static devclass_t armada_thermal_devclass; + +DRIVER_MODULE(armada_thermal, simplebus, armada_thermal_driver, + armada_thermal_devclass, 0, 0); +DRIVER_MODULE(armada_thermal, ofwbus, armada_thermal_driver, + armada_thermal_devclass, 0, 0); + +static int +armada_thermal_probe(device_t dev) +{ + struct armada_thermal_softc *sc; + + sc = device_get_softc(dev); + + if (!ofw_bus_status_okay(dev)) + return (ENXIO); + + if (ofw_bus_is_compatible(dev, "marvell,armada380-thermal")) { + device_set_desc(dev, "Armada380 Thermal Control"); + sc->tdata = &armada380_tdata; + + return (BUS_PROBE_DEFAULT); + } + + return (ENXIO); +} + +static int +armada_thermal_attach(device_t dev) +{ + struct armada_thermal_softc *sc; + const armada_tdata_t *tdata; + struct sysctl_ctx_list *sctx; + struct sysctl_oid_list *schildren; + int timeout; + int rid; + + sc = device_get_softc(dev); + + /* Allocate CTRL and STAT register spaces */ + rid = STAT_RID; + sc->stat_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &rid, RF_ACTIVE); + if (sc->stat_res == NULL) { + device_printf(dev, + "Could not allocate memory for the status register\n"); + return (ENXIO); + } + + rid = CTRL_RID; + sc->ctrl_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &rid, RF_ACTIVE); + if (sc->ctrl_res == NULL) { + device_printf(dev, + "Could not allocate memory for the control register\n"); + bus_release_resource(dev, SYS_RES_MEMORY, + rman_get_rid(sc->stat_res), sc->stat_res); + sc->stat_res = NULL; + return (ENXIO); + } + + /* Now initialize the sensor */ + tdata = sc->tdata; + tdata->tsen_init(sc); + /* Set initial temperature value */ + for (timeout = 1000; timeout > 0; timeout--) { + if (armada_tsen_get_temp(sc, &sc->chip_temperature) == 0) + break; + DELAY(10); + } + if (timeout <= 0) { + bus_release_resource(dev, SYS_RES_MEMORY, + rman_get_rid(sc->stat_res), sc->stat_res); + sc->stat_res = NULL; + bus_release_resource(dev, SYS_RES_MEMORY, + rman_get_rid(sc->ctrl_res), sc->ctrl_res); + sc->ctrl_res = NULL; + return (ENXIO); + } + /* Initialize mutex */ + mtx_init(&sc->temp_upd_mtx, "Armada Thermal", NULL, MTX_DEF); + /* Set up the temperature update callout */ + callout_init_mtx(&sc->temp_upd, &sc->temp_upd_mtx, 0); + /* Schedule callout */ + callout_reset(&sc->temp_upd, hz, armada_temp_update, sc); + + sctx = device_get_sysctl_ctx(dev); + schildren = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); + SYSCTL_ADD_LONG(sctx, schildren, OID_AUTO, "temperature", + CTLFLAG_RD, &sc->chip_temperature, "SoC temperature"); + + return (0); +} + +static int +armada_thermal_detach(device_t dev) +{ + struct armada_thermal_softc *sc; + + sc = device_get_softc(dev); + + if (!device_is_attached(dev)) + return (0); + + callout_drain(&sc->temp_upd); + + sc->chip_temperature = 0; + + bus_release_resource(dev, SYS_RES_MEMORY, + rman_get_rid(sc->stat_res), sc->stat_res); + sc->stat_res = NULL; + + bus_release_resource(dev, SYS_RES_MEMORY, + rman_get_rid(sc->ctrl_res), sc->ctrl_res); + sc->ctrl_res = NULL; + + return (0); +} + +static boolean_t +armada_tsen_readout_valid(struct armada_thermal_softc *sc) +{ + const armada_tdata_t *tdata; + uint32_t tsen_stat; + boolean_t is_valid; + + tdata = sc->tdata; + tsen_stat = bus_read_4(sc->stat_res, 0); + + tsen_stat >>= tdata->is_valid_shift; + is_valid = ((tsen_stat & TSEN_STAT_READOUT_VALID) != 0); + + return (is_valid); +} + +static int +armada_tsen_get_temp(struct armada_thermal_softc *sc, u_long *temp) +{ + const armada_tdata_t *tdata; + uint32_t reg; + u_long tmp; + u_long m, b, div; + + tdata = sc->tdata; + /* Check if the readout is valid */ + if ((tdata->is_valid != NULL) && !tdata->is_valid(sc)) + return (EIO); + + reg = bus_read_4(sc->stat_res, 0); + reg = (reg >> tdata->temp_shift) & tdata->temp_mask; + + /* Get formula coefficients */ + b = tdata->coef_b; + m = tdata->coef_m; + div = tdata->coef_div; + + if (tdata->inverted) + tmp = ((m * reg) - b) / div; + else + tmp = (b - (m * reg)) / div; + + *temp = READOUT_TO_C(tmp); + + return (0); +} + +static void +armada380_tsen_init(struct armada_thermal_softc *sc) +{ + uint32_t tsen_ctrl; + + tsen_ctrl = bus_read_4(sc->ctrl_res, 0); + if ((tsen_ctrl & A380_TSEN_CTRL_RESET) == 0) { + tsen_ctrl |= A380_TSEN_CTRL_RESET; + bus_write_4(sc->ctrl_res, 0, tsen_ctrl); + DELAY(10000); + } +} + +static void +armada_temp_update(void *arg) +{ + struct armada_thermal_softc *sc; + + sc = arg; + /* Update temperature value, keel old if the readout is not valid */ + (void)armada_tsen_get_temp(sc, &sc->chip_temperature); + + callout_reset(&sc->temp_upd, hz, armada_temp_update, sc); +} diff --git a/sys/arm/mv/armada/wdt.c b/sys/arm/mv/armada/wdt.c new file mode 100644 index 0000000..8d6dc3c --- /dev/null +++ b/sys/arm/mv/armada/wdt.c @@ -0,0 +1,285 @@ +/*- + * Copyright (c) 2006 Benno Rice. + * Copyright (C) 2007-2008 MARVELL INTERNATIONAL LTD. + * All rights reserved. + * + * Adapted to Marvell SoC by Semihalf. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * from: FreeBSD: //depot/projects/arm/src/sys/arm/xscale/pxa2x0/pxa2x0_timer.c, rev 1 + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/malloc.h> +#include <sys/rman.h> +#include <sys/kdb.h> +#include <sys/timeet.h> +#include <sys/timetc.h> +#include <sys/watchdog.h> +#include <machine/bus.h> +#include <machine/cpu.h> + +#include <arm/mv/mvreg.h> +#include <arm/mv/mvvar.h> + +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> + +#define INITIAL_TIMECOUNTER (0xffffffff) +#define MAX_WATCHDOG_TICKS (0xffffffff) + +#if defined(SOC_MV_ARMADAXP) || defined(SOC_MV_ARMADA38X) +#define MV_CLOCK_SRC 25000000 /* Timers' 25MHz mode */ +#else +#define MV_CLOCK_SRC get_tclk() +#endif + +#if defined(SOC_MV_ARMADA38X) +#define WATCHDOG_TIMER 4 +#else +#define WATCHDOG_TIMER 2 +#endif + +struct mv_wdt_softc { + struct resource * wdt_res; + struct mtx wdt_mtx; +}; + +static struct resource_spec mv_wdt_spec[] = { + { SYS_RES_MEMORY, 0, RF_ACTIVE }, + { -1, 0 } +}; + +static struct ofw_compat_data mv_wdt_compat[] = { + {"marvell,armada-380-wdt", true}, + {NULL, false} +}; + +static struct mv_wdt_softc *wdt_softc = NULL; +int timers_initialized = 0; + +static int mv_wdt_probe(device_t); +static int mv_wdt_attach(device_t); + +static uint32_t mv_get_timer_control(void); +static void mv_set_timer_control(uint32_t); +static void mv_set_timer(uint32_t, uint32_t); + +static void mv_watchdog_enable(void); +static void mv_watchdog_disable(void); +static void mv_watchdog_event(void *, unsigned int, int *); + +static device_method_t mv_wdt_methods[] = { + DEVMETHOD(device_probe, mv_wdt_probe), + DEVMETHOD(device_attach, mv_wdt_attach), + + { 0, 0 } +}; + +static driver_t mv_wdt_driver = { + "wdt", + mv_wdt_methods, + sizeof(struct mv_wdt_softc), +}; + +static devclass_t mv_wdt_devclass; + +DRIVER_MODULE(wdt, simplebus, mv_wdt_driver, mv_wdt_devclass, 0, 0); +static int +mv_wdt_probe(device_t dev) +{ + + if (!ofw_bus_status_okay(dev)) + return (ENXIO); + + if (!ofw_bus_search_compatible(dev, mv_wdt_compat)->ocd_data) + return (ENXIO); + + device_set_desc(dev, "Marvell Watchdog Timer"); + return (0); +} + +static int +mv_wdt_attach(device_t dev) +{ + struct mv_wdt_softc *sc; + int error; + + if (wdt_softc != NULL) + return (ENXIO); + + sc = device_get_softc(dev); + wdt_softc = sc; + + error = bus_alloc_resources(dev, mv_wdt_spec, &sc->wdt_res); + if (error) { + device_printf(dev, "could not allocate resources\n"); + return (ENXIO); + } + + mtx_init(&sc->wdt_mtx, "watchdog", NULL, MTX_DEF); + + mv_watchdog_disable(); + EVENTHANDLER_REGISTER(watchdog_list, mv_watchdog_event, sc, 0); + + return (0); +} + +static __inline uint32_t +mv_get_timer_control(void) +{ + + return (bus_read_4(wdt_softc->wdt_res, CPU_TIMER_CONTROL)); +} + +static __inline void +mv_set_timer_control(uint32_t val) +{ + + bus_write_4(wdt_softc->wdt_res, CPU_TIMER_CONTROL, val); +} + +static __inline void +mv_set_timer(uint32_t timer, uint32_t val) +{ + + bus_write_4(wdt_softc->wdt_res, CPU_TIMER0 + timer * 0x8, val); +} + +static void +mv_watchdog_enable(void) +{ + uint32_t val, irq_cause; +#if !defined(SOC_MV_ARMADAXP) && !defined(SOC_MV_ARMADA38X) + uint32_t irq_mask; +#endif + + irq_cause = read_cpu_ctrl(BRIDGE_IRQ_CAUSE); + irq_cause &= IRQ_TIMER_WD_CLR; + write_cpu_ctrl(BRIDGE_IRQ_CAUSE, irq_cause); + +#if defined(SOC_MV_ARMADAXP) || defined(SOC_MV_ARMADA38X) + val = read_cpu_mp_clocks(WD_RSTOUTn_MASK); + val |= (WD_GLOBAL_MASK | WD_CPU0_MASK); + write_cpu_mp_clocks(WD_RSTOUTn_MASK, val); + + val = read_cpu_misc(RSTOUTn_MASK); + val &= ~RSTOUTn_MASK_WD; + write_cpu_misc(RSTOUTn_MASK, val); +#else + irq_mask = read_cpu_ctrl(BRIDGE_IRQ_MASK); + irq_mask |= IRQ_TIMER_WD_MASK; + write_cpu_ctrl(BRIDGE_IRQ_MASK, irq_mask); + + val = read_cpu_ctrl(RSTOUTn_MASK); + val |= WD_RST_OUT_EN; + write_cpu_ctrl(RSTOUTn_MASK, val); +#endif + + val = mv_get_timer_control(); +#if defined(SOC_MV_ARMADA38X) + val |= CPU_TIMER_WD_EN | CPU_TIMER_WD_AUTO | CPU_TIMER_WD_25MHZ_EN; +#elif defined(SOC_MV_ARMADAXP) + val |= CPU_TIMER2_EN | CPU_TIMER2_AUTO | CPU_TIMER_WD_25MHZ_EN; +#else + val |= CPU_TIMER2_EN | CPU_TIMER2_AUTO; +#endif + mv_set_timer_control(val); +} + +static void +mv_watchdog_disable(void) +{ + uint32_t val, irq_cause; +#if !defined(SOC_MV_ARMADAXP) && !defined(SOC_MV_ARMADA38X) + uint32_t irq_mask; +#endif + + val = mv_get_timer_control(); +#if defined(SOC_MV_ARMADA38X) + val &= ~(CPU_TIMER_WD_EN | CPU_TIMER_WD_AUTO); +#else + val &= ~(CPU_TIMER2_EN | CPU_TIMER2_AUTO); +#endif + mv_set_timer_control(val); + +#if defined(SOC_MV_ARMADAXP) || defined(SOC_MV_ARMADA38X) + val = read_cpu_mp_clocks(WD_RSTOUTn_MASK); + val &= ~(WD_GLOBAL_MASK | WD_CPU0_MASK); + write_cpu_mp_clocks(WD_RSTOUTn_MASK, val); + + val = read_cpu_misc(RSTOUTn_MASK); + val |= RSTOUTn_MASK_WD; + write_cpu_misc(RSTOUTn_MASK, RSTOUTn_MASK_WD); +#else + val = read_cpu_ctrl(RSTOUTn_MASK); + val &= ~WD_RST_OUT_EN; + write_cpu_ctrl(RSTOUTn_MASK, val); + + irq_mask = read_cpu_ctrl(BRIDGE_IRQ_MASK); + irq_mask &= ~(IRQ_TIMER_WD_MASK); + write_cpu_ctrl(BRIDGE_IRQ_MASK, irq_mask); +#endif + + irq_cause = read_cpu_ctrl(BRIDGE_IRQ_CAUSE); + irq_cause &= IRQ_TIMER_WD_CLR; + write_cpu_ctrl(BRIDGE_IRQ_CAUSE, irq_cause); +} + +/* + * Watchdog event handler. + */ +static void +mv_watchdog_event(void *arg, unsigned int cmd, int *error) +{ + struct mv_wdt_softc *sc; + uint64_t ns; + uint64_t ticks; + + sc = arg; + mtx_lock(&sc->wdt_mtx); + if (cmd == 0) + mv_watchdog_disable(); + else { + /* + * Watchdog timeout is in nanosecs, calculation according to + * watchdog(9) + */ + ns = (uint64_t)1 << (cmd & WD_INTERVAL); + ticks = (uint64_t)(ns * MV_CLOCK_SRC) / 1000000000; + if (ticks > MAX_WATCHDOG_TICKS) + mv_watchdog_disable(); + else { + mv_set_timer(WATCHDOG_TIMER, ticks); + mv_watchdog_enable(); + *error = 0; + } + } + mtx_unlock(&sc->wdt_mtx); +} diff --git a/sys/arm/mv/armada38x/armada38x.c b/sys/arm/mv/armada38x/armada38x.c index ae4d459..a72d314 100644 --- a/sys/arm/mv/armada38x/armada38x.c +++ b/sys/arm/mv/armada38x/armada38x.c @@ -29,6 +29,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> +#include <sys/sysctl.h> #include <sys/systm.h> #include <sys/bus.h> @@ -41,6 +42,11 @@ __FBSDID("$FreeBSD$"); int armada38x_open_bootrom_win(void); int armada38x_scu_enable(void); int armada38x_win_set_iosync_barrier(void); +int armada38x_mbus_optimization(void); + +static int hw_clockrate; +SYSCTL_INT(_hw, OID_AUTO, clockrate, CTLFLAG_RD, + &hw_clockrate, 0, "CPU instruction clock rate"); uint32_t get_tclk(void) @@ -59,6 +65,29 @@ get_tclk(void) return (TCLK_200MHZ); } +uint32_t +get_cpu_freq(void) +{ + uint32_t sar; + + static const uint32_t cpu_frequencies[] = { + 0, 0, 0, 0, + 1066, 0, 0, 0, + 1332, 0, 0, 0, + 1600, 0, 0, 0, + 1866, 0, 0, 2000 + }; + + sar = (uint32_t)get_sar_value(); + sar = (sar & A38X_CPU_DDR_CLK_MASK) >> A38X_CPU_DDR_CLK_SHIFT; + if (sar >= nitems(cpu_frequencies)) + return (0); + + hw_clockrate = cpu_frequencies[sar]; + + return (hw_clockrate * 1000 * 1000); +} + int armada38x_win_set_iosync_barrier(void) { @@ -115,6 +144,50 @@ armada38x_open_bootrom_win(void) } int +armada38x_mbus_optimization(void) +{ + bus_space_handle_t vaddr_iowind; + int rv; + + rv = bus_space_map(fdtbus_bs_tag, (bus_addr_t)MV_MBUS_CTRL_BASE, + MV_MBUS_CTRL_REGS_LEN, 0, &vaddr_iowind); + if (rv != 0) + return (rv); + + /* + * MBUS Units Priority Control Register - Prioritize XOR, + * PCIe and GbEs (ID=4,6,3,7,8) DRAM access + * GbE is High and others are Medium. + */ + bus_space_write_4(fdtbus_bs_tag, vaddr_iowind, 0, 0x19180); + + /* + * Fabric Units Priority Control Register - + * Prioritize CPUs requests. + */ + bus_space_write_4(fdtbus_bs_tag, vaddr_iowind, 0x4, 0x3000A); + + /* + * MBUS Units Prefetch Control Register - + * Pre-fetch enable for all IO masters. + */ + bus_space_write_4(fdtbus_bs_tag, vaddr_iowind, 0x8, 0xFFFF); + + /* + * Fabric Units Prefetch Control Register - + * Enable the CPUs Instruction and Data prefetch. + */ + bus_space_write_4(fdtbus_bs_tag, vaddr_iowind, 0xc, 0x303); + + bus_space_barrier(fdtbus_bs_tag, vaddr_iowind, 0, MV_MBUS_CTRL_REGS_LEN, + BUS_SPACE_BARRIER_WRITE); + + bus_space_unmap(fdtbus_bs_tag, vaddr_iowind, MV_MBUS_CTRL_REGS_LEN); + + return (rv); +} + +int armada38x_scu_enable(void) { bus_space_handle_t vaddr_scu; @@ -128,9 +201,13 @@ armada38x_scu_enable(void) /* Enable SCU */ val = bus_space_read_4(fdtbus_bs_tag, vaddr_scu, MV_SCU_REG_CTRL); - if (!(val & MV_SCU_ENABLE)) + if (!(val & MV_SCU_ENABLE)) { + /* Enable SCU Speculative linefills to L2 */ + val |= MV_SCU_SL_L2_ENABLE; + bus_space_write_4(fdtbus_bs_tag, vaddr_scu, 0, val | MV_SCU_ENABLE); + } bus_space_unmap(fdtbus_bs_tag, vaddr_scu, MV_SCU_REGS_LEN); return (0); diff --git a/sys/arm/mv/armada38x/armada38x_mp.c b/sys/arm/mv/armada38x/armada38x_mp.c index 8dc404f..ab655023 100644 --- a/sys/arm/mv/armada38x/armada38x_mp.c +++ b/sys/arm/mv/armada38x/armada38x_mp.c @@ -127,7 +127,7 @@ platform_mp_setmaxid(void) /* Armada38x family supports maximum 2 cores */ mp_ncpus = platform_cnt_cpus(); - mp_maxid = 1; + mp_maxid = mp_ncpus - 1; } void diff --git a/sys/arm/mv/armada38x/armada38x_pl310.c b/sys/arm/mv/armada38x/armada38x_pl310.c new file mode 100644 index 0000000..52e818d --- /dev/null +++ b/sys/arm/mv/armada38x/armada38x_pl310.c @@ -0,0 +1,77 @@ +/*- + * Copyright (c) 2017 Stormshield. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +/* + * The machine-dependent part of the arm/pl310 driver for Armada 38x SoCs. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/rman.h> +#include <sys/lock.h> +#include <sys/mutex.h> + +#include <machine/bus.h> +#include <machine/pl310.h> + +void +platform_pl310_init(struct pl310_softc *sc) +{ + uint32_t reg; + + /* + * Enable power saving modes: + * - Dynamic Gating stops the clock when the controller is idle. + */ + reg = pl310_read4(sc, PL310_POWER_CTRL); + reg |= POWER_CTRL_ENABLE_GATING; + pl310_write4(sc, PL310_POWER_CTRL, reg); + + pl310_write4(sc, PL310_PREFETCH_CTRL, PREFETCH_CTRL_DL | + PREFETCH_CTRL_DATA_PREFETCH | PREFETCH_CTRL_INCR_DL | + PREFETCH_CTRL_DL_ON_WRAP); + + /* Disable L2 cache sync for IO coherent operation */ + sc->sc_io_coherent = true; +} + +void +platform_pl310_write_ctrl(struct pl310_softc *sc, uint32_t val) +{ + + pl310_write4(sc, PL310_CTRL, val); +} + +void +platform_pl310_write_debug(struct pl310_softc *sc, uint32_t val) +{ + + pl310_write4(sc, PL310_DEBUG_CTRL, val); +} diff --git a/sys/arm/mv/armada38x/files.armada38x b/sys/arm/mv/armada38x/files.armada38x index 599919d..5459bba 100644 --- a/sys/arm/mv/armada38x/files.armada38x +++ b/sys/arm/mv/armada38x/files.armada38x @@ -1,7 +1,12 @@ # $FreeBSD$ arm/mv/mpic.c standard +arm/mv/armada/thermal.c optional fdt +arm/mv/armada/wdt.c optional fdt + arm/mv/armada38x/armada38x.c standard arm/mv/armada38x/armada38x_mp.c optional smp arm/mv/armada38x/pmsu.c standard arm/mv/armada38x/rtc.c standard +arm/mv/armada38x/armada38x_pl310.c optional pl310 +dev/sdhci/sdhci_fdt.c optional sdhci diff --git a/sys/arm/mv/armada38x/rtc.c b/sys/arm/mv/armada38x/rtc.c index 8b3821c..a24a559 100644 --- a/sys/arm/mv/armada38x/rtc.c +++ b/sys/arm/mv/armada38x/rtc.c @@ -56,18 +56,43 @@ __FBSDID("$FreeBSD$"); #define RTC_STATUS 0x0 #define RTC_TIME 0xC +#define RTC_TEST_CONFIG 0x1C +#define RTC_IRQ_1_CONFIG 0x4 +#define RTC_IRQ_2_CONFIG 0x8 +#define RTC_ALARM_1 0x10 +#define RTC_ALARM_2 0x14 +#define RTC_CLOCK_CORR 0x18 + +#define RTC_NOMINAL_TIMING 0x2000 +#define RTC_NOMINAL_TIMING_MASK 0x7fff + +#define RTC_STATUS_ALARM1_MASK 0x1 +#define RTC_STATUS_ALARM2_MASK 0x2 + +#define MV_RTC_LOCK(sc) mtx_lock_spin(&(sc)->mutex) +#define MV_RTC_UNLOCK(sc) mtx_unlock_spin(&(sc)->mutex) + +#define RTC_BRIDGE_TIMING_CTRL 0x0 +#define RTC_WRCLK_PERIOD_SHIFT 0 +#define RTC_WRCLK_PERIOD_MASK 0x00000003FF +#define RTC_WRCLK_PERIOD_MAX 0x3FF +#define RTC_READ_OUTPUT_DELAY_SHIFT 26 +#define RTC_READ_OUTPUT_DELAY_MASK 0x007C000000 +#define RTC_READ_OUTPUT_DELAY_MAX 0x1F + +#define RTC_RES 0 +#define RTC_SOC_RES 1 -#define MV_RTC_LOCK(sc) mtx_lock(&(sc)->mutex) -#define MV_RTC_UNLOCK(sc) mtx_unlock(&(sc)->mutex) static struct resource_spec res_spec[] = { { SYS_RES_MEMORY, 0, RF_ACTIVE }, + { SYS_RES_MEMORY, 1, RF_ACTIVE }, { -1, 0 } }; struct mv_rtc_softc { device_t dev; - struct resource *res; + struct resource *res[2]; struct mtx mutex; }; @@ -78,9 +103,11 @@ static int mv_rtc_detach(device_t dev); static int mv_rtc_gettime(device_t dev, struct timespec *ts); static int mv_rtc_settime(device_t dev, struct timespec *ts); -static uint32_t mv_rtc_reg_read(struct mv_rtc_softc *sc, bus_size_t off); -static int mv_rtc_reg_write(struct mv_rtc_softc *sc, bus_size_t off, +static inline uint32_t mv_rtc_reg_read(struct mv_rtc_softc *sc, + bus_size_t off); +static inline int mv_rtc_reg_write(struct mv_rtc_softc *sc, bus_size_t off, uint32_t val); +static inline void mv_rtc_configure_bus(struct mv_rtc_softc *sc); static device_method_t mv_rtc_methods[] = { DEVMETHOD(device_probe, mv_rtc_probe), @@ -103,6 +130,43 @@ static devclass_t mv_rtc_devclass; DRIVER_MODULE(mv_rtc, simplebus, mv_rtc_driver, mv_rtc_devclass, 0, 0); +static void +mv_rtc_reset(device_t dev) +{ + struct mv_rtc_softc *sc; + + sc = device_get_softc(dev); + + /* Reset Test register */ + mv_rtc_reg_write(sc, RTC_TEST_CONFIG, 0); + DELAY(500000); + + /* Reset Time register */ + mv_rtc_reg_write(sc, RTC_TIME, 0); + DELAY(62); + + /* Reset Status register */ + mv_rtc_reg_write(sc, RTC_STATUS, (RTC_STATUS_ALARM1_MASK | RTC_STATUS_ALARM2_MASK)); + DELAY(62); + + /* Turn off Int1 and Int2 sources & clear the Alarm count */ + mv_rtc_reg_write(sc, RTC_IRQ_1_CONFIG, 0); + mv_rtc_reg_write(sc, RTC_IRQ_2_CONFIG, 0); + mv_rtc_reg_write(sc, RTC_ALARM_1, 0); + mv_rtc_reg_write(sc, RTC_ALARM_2, 0); + + /* Setup nominal register access timing */ + mv_rtc_reg_write(sc, RTC_CLOCK_CORR, RTC_NOMINAL_TIMING); + + /* Reset Time register */ + mv_rtc_reg_write(sc, RTC_TIME, 0); + DELAY(10); + + /* Reset Status register */ + mv_rtc_reg_write(sc, RTC_STATUS, (RTC_STATUS_ALARM1_MASK | RTC_STATUS_ALARM2_MASK)); + DELAY(50); +} + static int mv_rtc_probe(device_t dev) { @@ -131,14 +195,16 @@ mv_rtc_attach(device_t dev) clock_register(dev, RTC_RES_US); - mtx_init(&sc->mutex, device_get_nameunit(dev), NULL, MTX_DEF); + mtx_init(&sc->mutex, device_get_nameunit(dev), NULL, MTX_SPIN); + + ret = bus_alloc_resources(dev, res_spec, sc->res); - ret = bus_alloc_resources(dev, res_spec, &sc->res); if (ret != 0) { device_printf(dev, "could not allocate resources\n"); mtx_destroy(&sc->mutex); return (ENXIO); } + mv_rtc_configure_bus(sc); return (0); } @@ -152,7 +218,7 @@ mv_rtc_detach(device_t dev) mtx_destroy(&sc->mutex); - bus_release_resources(dev, res_spec, &sc->res); + bus_release_resources(dev, res_spec, sc->res); return (0); } @@ -198,6 +264,12 @@ mv_rtc_settime(device_t dev, struct timespec *ts) MV_RTC_LOCK(sc); + if ((mv_rtc_reg_read(sc, RTC_CLOCK_CORR) & RTC_NOMINAL_TIMING_MASK) != + RTC_NOMINAL_TIMING) { + /* RTC was not resetted yet */ + mv_rtc_reset(dev); + } + /* * According to errata FE-3124064, Write to RTC TIME register * may fail. As a workaround, before writing to RTC TIME register, @@ -212,11 +284,11 @@ mv_rtc_settime(device_t dev, struct timespec *ts) return (0); } -static uint32_t +static inline uint32_t mv_rtc_reg_read(struct mv_rtc_softc *sc, bus_size_t off) { - return (bus_read_4(sc->res, off)); + return (bus_read_4(sc->res[RTC_RES], off)); } /* @@ -224,12 +296,24 @@ mv_rtc_reg_read(struct mv_rtc_softc *sc, bus_size_t off) * register write to the RTC hard macro so that the required update * can occur without holding off the system bus */ -static int +static inline int mv_rtc_reg_write(struct mv_rtc_softc *sc, bus_size_t off, uint32_t val) { - bus_write_4(sc->res, off, val); + bus_write_4(sc->res[RTC_RES], off, val); DELAY(5); return (0); } + +static inline void +mv_rtc_configure_bus(struct mv_rtc_softc *sc) +{ + int val; + + val = bus_read_4(sc->res[RTC_SOC_RES], RTC_BRIDGE_TIMING_CTRL); + val &= ~(RTC_WRCLK_PERIOD_MASK | RTC_READ_OUTPUT_DELAY_MASK); + val |= RTC_WRCLK_PERIOD_MAX << RTC_WRCLK_PERIOD_SHIFT; + val |= RTC_READ_OUTPUT_DELAY_MAX << RTC_READ_OUTPUT_DELAY_SHIFT; + bus_write_4(sc->res[RTC_SOC_RES], RTC_BRIDGE_TIMING_CTRL, val); +} diff --git a/sys/arm/mv/armadaxp/armadaxp.c b/sys/arm/mv/armadaxp/armadaxp.c index 172d3d7..bef84c8 100644 --- a/sys/arm/mv/armadaxp/armadaxp.c +++ b/sys/arm/mv/armadaxp/armadaxp.c @@ -137,6 +137,13 @@ get_tclk(void) return (TCLK_200MHZ); } +uint32_t +get_cpu_freq(void) +{ + + return (0); +} + static uint32_t count_l2clk(void) { diff --git a/sys/arm/mv/discovery/discovery.c b/sys/arm/mv/discovery/discovery.c index 3968b78..96878a2 100644 --- a/sys/arm/mv/discovery/discovery.c +++ b/sys/arm/mv/discovery/discovery.c @@ -109,3 +109,10 @@ get_tclk(void) panic("Unknown TCLK settings!"); } } + +uint32_t +get_cpu_freq(void) +{ + + return (0); +} diff --git a/sys/arm/mv/files.mv b/sys/arm/mv/files.mv index 8c303c6..b5508e9 100644 --- a/sys/arm/mv/files.mv +++ b/sys/arm/mv/files.mv @@ -12,22 +12,27 @@ # - JTAG/ICE # - Vector Floating Point (VFP) unit # -arm/mv/gpio.c optional gpio +#arm/mv/gpio.c optional gpio arm/mv/mv_common.c standard arm/mv/mv_localbus.c standard arm/mv/mv_machdep.c standard +arm/mv/mv_pci_ctrl.c optional pci | fdt arm/mv/mv_pci.c optional pci +arm/mv/mv_spi.c optional mv_spi spibus arm/mv/mv_ts.c standard -arm/mv/timer.c standard +arm/mv/timer.c optional !soc_mv_armada38x dev/cesa/cesa.c optional cesa dev/iicbus/twsi/mv_twsi.c optional twsi dev/mge/if_mge.c optional mge +dev/neta/if_mvneta_fdt.c optional neta fdt +dev/neta/if_mvneta.c optional neta mdio mii dev/nand/nfc_mv.c optional nand dev/mvs/mvs_soc.c optional mvs dev/uart/uart_dev_ns8250.c optional uart dev/uart/uart_dev_snps.c optional uart dev/usb/controller/ehci_mv.c optional ehci dev/usb/controller/xhci_mv.c optional xhci +dev/ahci/ahci_mv_fdt.c optional ahci kern/kern_clocksource.c standard diff --git a/sys/arm/mv/gpio.c b/sys/arm/mv/gpio.c index 6708167..70d3d06b 100644 --- a/sys/arm/mv/gpio.c +++ b/sys/arm/mv/gpio.c @@ -641,7 +641,7 @@ mv_gpio_init(void) */ ctrl = OF_node_from_xref(gpios[0]); - if (fdt_is_compatible(ctrl, e->compat)) + if (ofw_bus_node_is_compatible(ctrl, e->compat)) /* Call a handler. */ if ((rv = e->handler(ctrl, (pcell_t *)&gpios, len))) diff --git a/sys/arm/mv/kirkwood/kirkwood.c b/sys/arm/mv/kirkwood/kirkwood.c index e6fd442..b8e9f1d 100644 --- a/sys/arm/mv/kirkwood/kirkwood.c +++ b/sys/arm/mv/kirkwood/kirkwood.c @@ -79,3 +79,10 @@ get_tclk(void) return (TCLK_166MHZ); } + +uint32_t +get_cpu_freq(void) +{ + + return (0); +} diff --git a/sys/arm/mv/mpic.c b/sys/arm/mv/mpic.c index 3fc06d7..c16bf83 100644 --- a/sys/arm/mv/mpic.c +++ b/sys/arm/mv/mpic.c @@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include <sys/mutex.h> #include <sys/rman.h> #include <sys/proc.h> +#include <sys/smp.h> #include <machine/bus.h> #include <machine/intr.h> @@ -70,6 +71,7 @@ __FBSDID("$FreeBSD$"); #define debugf(fmt, args...) #endif +#define MPIC_INT_LOCAL 3 #define MPIC_INT_ERR 4 #define MPIC_INT_MSI 96 @@ -93,7 +95,9 @@ __FBSDID("$FreeBSD$"); #define MPIC_IIACK 0x44 #define MPIC_ISM 0x48 #define MPIC_ICM 0x4c -#define MPIC_ERR_MASK 0xe50 +#define MPIC_ERR_MASK 0x50 +#define MPIC_LOCAL_MASK 0x54 +#define MPIC_CPU(n) (n) * 0x100 #define MPIC_PPI 32 @@ -148,12 +152,11 @@ static void mpic_unmask_irq(uintptr_t nb); static void mpic_mask_irq(uintptr_t nb); static void mpic_mask_irq_err(uintptr_t nb); static void mpic_unmask_irq_err(uintptr_t nb); +static boolean_t mpic_irq_is_percpu(uintptr_t); +#ifdef INTRNG static int mpic_intr(void *arg); -static void mpic_unmask_msi(void); -#ifndef INTRNG -static void arm_mask_irq_err(uintptr_t); -static void arm_unmask_irq_err(uintptr_t); #endif +static void mpic_unmask_msi(void); #define MPIC_WRITE(softc, reg, val) \ bus_space_write_4((softc)->mpic_bst, (softc)->mpic_bsh, (reg), (val)) @@ -224,6 +227,7 @@ mv_mpic_attach(device_t dev) struct mv_mpic_softc *sc; int error; uint32_t val; + int cpu; sc = (struct mv_mpic_softc *)device_get_softc(dev); @@ -260,8 +264,7 @@ mv_mpic_attach(device_t dev) sc->drbl_bsh = rman_get_bushandle(sc->mpic_res[2]); } - bus_space_write_4(mv_mpic_sc->mpic_bst, mv_mpic_sc->mpic_bsh, - MPIC_CTRL, 1); + MPIC_WRITE(mv_mpic_sc, MPIC_CTRL, 1); MPIC_CPU_WRITE(mv_mpic_sc, MPIC_CTP, 0); val = MPIC_READ(mv_mpic_sc, MPIC_CTRL); @@ -273,6 +276,9 @@ mv_mpic_attach(device_t dev) bus_release_resources(dev, mv_mpic_spec, sc->mpic_res); return (ENXIO); } + + OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); + if (intr_pic_register(dev, OF_xref_from_device(dev)) == NULL) { device_printf(dev, "could not register PIC\n"); bus_release_resources(dev, mv_mpic_spec, sc->mpic_res); @@ -282,6 +288,12 @@ mv_mpic_attach(device_t dev) mpic_unmask_msi(); + /* Unmask CPU performance counters overflow irq */ + for (cpu = 0; cpu < mp_ncpus; cpu++) + MPIC_CPU_WRITE(mv_mpic_sc, MPIC_CPU(cpu) + MPIC_LOCAL_MASK, + (1 << cpu) | MPIC_CPU_READ(mv_mpic_sc, + MPIC_CPU(cpu) + MPIC_LOCAL_MASK)); + return (0); } @@ -398,7 +410,7 @@ static driver_t mv_mpic_driver = { static devclass_t mv_mpic_devclass; EARLY_DRIVER_MODULE(mpic, simplebus, mv_mpic_driver, mv_mpic_devclass, 0, 0, - BUS_PASS_INTERRUPT); + BUS_PASS_INTERRUPT + BUS_PASS_ORDER_LATE); #ifndef INTRNG int @@ -432,27 +444,12 @@ arm_mask_irq(uintptr_t nb) mpic_mask_irq(nb); } - -static void -arm_mask_irq_err(uintptr_t nb) -{ - - mpic_mask_irq_err(nb); -} - void arm_unmask_irq(uintptr_t nb) { mpic_unmask_irq(nb); } - -void -arm_unmask_irq_err(uintptr_t nb) -{ - - mpic_unmask_irq_err(nb); -} #endif static void @@ -468,8 +465,7 @@ mpic_unmask_irq_err(uintptr_t nb) uint32_t mask; uint8_t bit_off; - bus_space_write_4(mv_mpic_sc->mpic_bst, mv_mpic_sc->mpic_bsh, - MPIC_ISE, MPIC_INT_ERR); + MPIC_WRITE(mv_mpic_sc, MPIC_ISE, MPIC_INT_ERR); MPIC_CPU_WRITE(mv_mpic_sc, MPIC_ICM, MPIC_INT_ERR); bit_off = nb - ERR_IRQ; @@ -490,15 +486,34 @@ mpic_mask_irq_err(uintptr_t nb) MPIC_CPU_WRITE(mv_mpic_sc, MPIC_ERR_MASK, mask); } +static boolean_t +mpic_irq_is_percpu(uintptr_t nb) +{ + if (nb < MPIC_PPI) + return TRUE; + + return FALSE; +} + static void mpic_unmask_irq(uintptr_t nb) { - if (nb < ERR_IRQ) { - bus_space_write_4(mv_mpic_sc->mpic_bst, mv_mpic_sc->mpic_bsh, - MPIC_ISE, nb); +#ifdef SMP + int cpu; + + if (nb == MPIC_INT_LOCAL) { + for (cpu = 0; cpu < mp_ncpus; cpu++) + MPIC_CPU_WRITE(mv_mpic_sc, + MPIC_CPU(cpu) + MPIC_ICM, nb); + return; + } +#endif + if (mpic_irq_is_percpu(nb)) MPIC_CPU_WRITE(mv_mpic_sc, MPIC_ICM, nb); - } else if (nb < MSI_IRQ) + else if (nb < ERR_IRQ) + MPIC_WRITE(mv_mpic_sc, MPIC_ISE, nb); + else if (nb < MSI_IRQ) mpic_unmask_irq_err(nb); if (nb == 0) @@ -509,11 +524,21 @@ static void mpic_mask_irq(uintptr_t nb) { - if (nb < ERR_IRQ) { - bus_space_write_4(mv_mpic_sc->mpic_bst, mv_mpic_sc->mpic_bsh, - MPIC_ICE, nb); +#ifdef SMP + int cpu; + + if (nb == MPIC_INT_LOCAL) { + for (cpu = 0; cpu < mp_ncpus; cpu++) + MPIC_CPU_WRITE(mv_mpic_sc, + MPIC_CPU(cpu) + MPIC_ISM, nb); + return; + } +#endif + if (mpic_irq_is_percpu(nb)) MPIC_CPU_WRITE(mv_mpic_sc, MPIC_ISM, nb); - } else if (nb < MSI_IRQ) + else if (nb < ERR_IRQ) + MPIC_WRITE(mv_mpic_sc, MPIC_ICE, nb); + else if (nb < MSI_IRQ) mpic_mask_irq_err(nb); } @@ -530,8 +555,7 @@ mv_mpic_get_cause_err(void) uint32_t err_cause; uint8_t bit_off; - err_cause = bus_space_read_4(mv_mpic_sc->mpic_bst, - mv_mpic_sc->mpic_bsh, MPIC_ERR_CAUSE); + err_cause = MPIC_READ(mv_mpic_sc, MPIC_ERR_CAUSE); if (err_cause) bit_off = ffs(err_cause) - 1; @@ -612,8 +636,7 @@ pic_ipi_send(cpuset_t cpus, u_int ipi) if (CPU_ISSET(i, &cpus)) val |= (1 << (8 + i)); val |= ipi; - bus_space_write_4(mv_mpic_sc->mpic_bst, mv_mpic_sc->mpic_bsh, - MPIC_SOFT_INT, val); + MPIC_WRITE(mv_mpic_sc, MPIC_SOFT_INT, val); } int diff --git a/sys/arm/mv/mv_common.c b/sys/arm/mv/mv_common.c index 497860f..826f99f 100644 --- a/sys/arm/mv/mv_common.c +++ b/sys/arm/mv/mv_common.c @@ -39,9 +39,11 @@ __FBSDID("$FreeBSD$"); #include <sys/malloc.h> #include <sys/kdb.h> #include <sys/reboot.h> +#include <sys/sysctl.h> #include <dev/fdt/fdt_common.h> #include <dev/ofw/openfirm.h> +#include <dev/ofw/ofw_bus_subr.h> #include <machine/bus.h> #include <machine/fdt.h> @@ -73,39 +75,51 @@ MALLOC_DEFINE(M_IDMA, "idma", "idma dma test memory"); #define MV_DUMP_WIN 0 #endif +static char mv_soc_model[128]; + +SYSCTL_STRING(_hw, OID_AUTO, mv_soc_model, CTLFLAG_RD | CTLFLAG_MPSAFE, + mv_soc_model, 0, "Marvell SoC model"); + static int win_eth_can_remap(int i); -#ifndef SOC_MV_FREY +static int decode_win_cesa_valid(void); static int decode_win_cpu_valid(void); -#endif static int decode_win_usb_valid(void); static int decode_win_usb3_valid(void); static int decode_win_eth_valid(void); static int decode_win_pcie_valid(void); static int decode_win_sata_valid(void); +static int decode_win_sdhci_valid(void); static int decode_win_idma_valid(void); static int decode_win_xor_valid(void); -#ifndef SOC_MV_FREY static void decode_win_cpu_setup(void); -#endif #ifdef SOC_MV_ARMADAXP static int decode_win_sdram_fixup(void); #endif +static void decode_win_cesa_setup(u_long); static void decode_win_usb_setup(u_long); static void decode_win_usb3_setup(u_long); static void decode_win_eth_setup(u_long); +static void decode_win_neta_setup(u_long); static void decode_win_sata_setup(u_long); +static void decode_win_ahci_setup(u_long); +static void decode_win_sdhci_setup(u_long); static void decode_win_idma_setup(u_long); static void decode_win_xor_setup(u_long); +static void decode_win_cesa_dump(u_long); static void decode_win_usb_dump(u_long); static void decode_win_usb3_dump(u_long); static void decode_win_eth_dump(u_long base); +static void decode_win_neta_dump(u_long base); static void decode_win_idma_dump(u_long base); static void decode_win_xor_dump(u_long base); +static void decode_win_ahci_dump(u_long base); +static void decode_win_sdhci_dump(u_long); +static void decode_win_pcie_dump(u_long); static int fdt_get_ranges(const char *, void *, int, int *, int *); #ifdef SOC_MV_ARMADA38X @@ -120,6 +134,7 @@ static uint32_t dev_mask = 0; static int cpu_wins_no = 0; static int eth_port = 0; static int usb_port = 0; +static boolean_t platform_io_coherent = false; static struct decode_win cpu_win_tbl[MAX_CPU_WIN]; @@ -128,6 +143,16 @@ const struct decode_win *cpu_wins = cpu_win_tbl; typedef void (*decode_win_setup_t)(u_long); typedef void (*dump_win_t)(u_long); +/* + * The power status of device feature is only supported on + * Kirkwood and Discovery SoCs. + */ +#if defined(SOC_MV_KIRKWOOD) || defined(SOC_MV_DISCOVERY) +#define SOC_MV_POWER_STAT_SUPPORTED 1 +#else +#define SOC_MV_POWER_STAT_SUPPORTED 0 +#endif + struct soc_node_spec { const char *compat; decode_win_setup_t decode_handler; @@ -136,16 +161,26 @@ struct soc_node_spec { static struct soc_node_spec soc_nodes[] = { { "mrvl,ge", &decode_win_eth_setup, &decode_win_eth_dump }, + { "marvell,armada-370-neta", &decode_win_neta_setup, &decode_win_neta_dump }, { "mrvl,usb-ehci", &decode_win_usb_setup, &decode_win_usb_dump }, + { "marvell,orion-ehci", &decode_win_usb_setup, &decode_win_usb_dump }, { "marvell,armada-380-xhci", &decode_win_usb3_setup, &decode_win_usb3_dump }, + { "marvell,armada-380-ahci", &decode_win_ahci_setup, &decode_win_ahci_dump }, + { "marvell,armada-380-sdhci", &decode_win_sdhci_setup, &decode_win_sdhci_dump }, { "mrvl,sata", &decode_win_sata_setup, NULL }, { "mrvl,xor", &decode_win_xor_setup, &decode_win_xor_dump }, { "mrvl,idma", &decode_win_idma_setup, &decode_win_idma_dump }, - { "mrvl,pcie", &decode_win_pcie_setup, NULL }, + { "mrvl,cesa", &decode_win_cesa_setup, &decode_win_cesa_dump }, + { "mrvl,pcie", &decode_win_pcie_setup, &decode_win_pcie_dump }, { NULL, NULL, NULL }, }; -struct fdt_pm_mask_entry fdt_pm_mask_table[] = { +struct fdt_pm_mask_entry { + char *compat; + uint32_t mask; +}; + +static struct fdt_pm_mask_entry fdt_pm_mask_table[] = { { "mrvl,ge", CPU_PM_CTRL_GE(0) }, { "mrvl,ge", CPU_PM_CTRL_GE(1) }, { "mrvl,usb-ehci", CPU_PM_CTRL_USB(0) }, @@ -160,10 +195,10 @@ struct fdt_pm_mask_entry fdt_pm_mask_table[] = { static __inline int pm_is_disabled(uint32_t mask) { -#if defined(SOC_MV_KIRKWOOD) - return (soc_power_ctrl_get(mask) == mask); -#else +#if SOC_MV_POWER_STAT_SUPPORTED return (soc_power_ctrl_get(mask) == mask ? 0 : 1); +#else + return (0); #endif } @@ -230,7 +265,8 @@ fdt_pm(phandle_t node) if (dev_mask & (1 << i)) continue; - compat = fdt_is_compatible(node, fdt_pm_mask_table[i].compat); + compat = ofw_bus_node_is_compatible(node, + fdt_pm_mask_table[i].compat); #if defined(SOC_MV_KIRKWOOD) if (compat && (cpu_pm_ctrl & fdt_pm_mask_table[i].mask)) { dev_mask |= (1 << i); @@ -349,7 +385,7 @@ uint32_t soc_power_ctrl_get(uint32_t mask) { -#if !defined(SOC_MV_ORION) && !defined(SOC_MV_LOKIPLUS) && !defined(SOC_MV_FREY) +#if SOC_MV_POWER_STAT_SUPPORTED if (mask != CPU_PM_CTRL_NONE) mask &= read_cpu_ctrl(CPU_PM_CTRL); @@ -367,7 +403,7 @@ void soc_power_ctrl_set(uint32_t mask) { -#if !defined(SOC_MV_ORION) && !defined(SOC_MV_LOKIPLUS) +#if !defined(SOC_MV_ORION) if (mask != CPU_PM_CTRL_NONE) write_cpu_ctrl(CPU_PM_CTRL, mask); #endif @@ -390,7 +426,8 @@ soc_id(uint32_t *dev, uint32_t *rev) static void soc_identify(void) { - uint32_t d, r, size, mode; + uint32_t d, r, size, mode, freq; + char tmp[128]; const char *dev; const char *rev; @@ -480,10 +517,27 @@ soc_identify(void) break; } + memset(tmp, 0, sizeof(tmp)); + memset(mv_soc_model, 0, sizeof(mv_soc_model)); + snprintf(mv_soc_model, sizeof(mv_soc_model) - 1, "SOC: %s", dev); printf("%s", dev); - if (*rev != '\0') + if (*rev != '\0') { printf(" rev %s", rev); - printf(", TClock %dMHz\n", get_tclk() / 1000 / 1000); + snprintf(tmp, sizeof(tmp) - 1, " rev %s", rev); + strlcat(mv_soc_model, tmp, sizeof(mv_soc_model)); + } + printf(", TClock %dMHz", get_tclk() / 1000 / 1000); + snprintf(tmp, sizeof(tmp) - 1, ", TClock %dMHz", + get_tclk() / 1000 / 1000); + strlcat(mv_soc_model, tmp, sizeof(mv_soc_model)); + freq = get_cpu_freq(); + if (freq != 0) { + printf(", Frequency %dMHz", freq / 1000 / 1000); + snprintf(tmp, sizeof(tmp) - 1, ", Frequency %dMHz", + freq / 1000 / 1000); + strlcat(mv_soc_model, tmp, sizeof(mv_soc_model)); + } + printf("\n"); mode = read_cpu_ctrl(CPU_CONFIG); printf(" Instruction cache prefetch %s, data cache prefetch %s\n", @@ -559,21 +613,14 @@ soc_decode_win(void) return(err); #endif -#ifndef SOC_MV_FREY if (!decode_win_cpu_valid() || !decode_win_usb_valid() || !decode_win_eth_valid() || !decode_win_idma_valid() || !decode_win_pcie_valid() || !decode_win_sata_valid() || - !decode_win_xor_valid() || !decode_win_usb3_valid()) + !decode_win_xor_valid() || !decode_win_usb3_valid() || + !decode_win_sdhci_valid() || !decode_win_cesa_valid()) return (EINVAL); decode_win_cpu_setup(); -#else - if (!decode_win_usb_valid() || - !decode_win_eth_valid() || !decode_win_idma_valid() || - !decode_win_pcie_valid() || !decode_win_sata_valid() || - !decode_win_xor_valid() || !decode_win_usb3_valid()) - return (EINVAL); -#endif if (MV_DUMP_WIN) soc_dump_decode_win(); @@ -588,7 +635,6 @@ soc_decode_win(void) /************************************************************************** * Decode windows registers accessors **************************************************************************/ -#if !defined(SOC_MV_FREY) WIN_REG_IDX_RD(win_cpu, cr, MV_WIN_CPU_CTRL, MV_MBUS_BRIDGE_BASE) WIN_REG_IDX_RD(win_cpu, br, MV_WIN_CPU_BASE, MV_MBUS_BRIDGE_BASE) WIN_REG_IDX_RD(win_cpu, remap_l, MV_WIN_CPU_REMAP_LO, MV_MBUS_BRIDGE_BASE) @@ -597,7 +643,11 @@ WIN_REG_IDX_WR(win_cpu, cr, MV_WIN_CPU_CTRL, MV_MBUS_BRIDGE_BASE) WIN_REG_IDX_WR(win_cpu, br, MV_WIN_CPU_BASE, MV_MBUS_BRIDGE_BASE) WIN_REG_IDX_WR(win_cpu, remap_l, MV_WIN_CPU_REMAP_LO, MV_MBUS_BRIDGE_BASE) WIN_REG_IDX_WR(win_cpu, remap_h, MV_WIN_CPU_REMAP_HI, MV_MBUS_BRIDGE_BASE) -#endif + +WIN_REG_BASE_IDX_RD(win_cesa, cr, MV_WIN_CESA_CTRL) +WIN_REG_BASE_IDX_RD(win_cesa, br, MV_WIN_CESA_BASE) +WIN_REG_BASE_IDX_WR(win_cesa, cr, MV_WIN_CESA_CTRL) +WIN_REG_BASE_IDX_WR(win_cesa, br, MV_WIN_CESA_BASE) WIN_REG_BASE_IDX_RD(win_usb, cr, MV_WIN_USB_CTRL) WIN_REG_BASE_IDX_RD(win_usb, br, MV_WIN_USB_BASE) @@ -639,6 +689,8 @@ WIN_REG_BASE_IDX_WR(win_pcie, cr, MV_WIN_PCIE_CTRL); WIN_REG_BASE_IDX_WR(win_pcie, br, MV_WIN_PCIE_BASE); WIN_REG_BASE_IDX_WR(win_pcie, remap, MV_WIN_PCIE_REMAP); WIN_REG_BASE_IDX_RD(pcie_bar, br, MV_PCIE_BAR_BASE); +WIN_REG_BASE_IDX_RD(pcie_bar, brh, MV_PCIE_BAR_BASE_H); +WIN_REG_BASE_IDX_RD(pcie_bar, cr, MV_PCIE_BAR_CTRL); WIN_REG_BASE_IDX_WR(pcie_bar, br, MV_PCIE_BAR_BASE); WIN_REG_BASE_IDX_WR(pcie_bar, brh, MV_PCIE_BAR_BASE_H); WIN_REG_BASE_IDX_WR(pcie_bar, cr, MV_PCIE_BAR_CTRL); @@ -658,6 +710,16 @@ WIN_REG_BASE_IDX_RD(win_sata, cr, MV_WIN_SATA_CTRL); WIN_REG_BASE_IDX_RD(win_sata, br, MV_WIN_SATA_BASE); WIN_REG_BASE_IDX_WR(win_sata, cr, MV_WIN_SATA_CTRL); WIN_REG_BASE_IDX_WR(win_sata, br, MV_WIN_SATA_BASE); +#if defined(SOC_MV_ARMADA38X) +WIN_REG_BASE_IDX_RD(win_sata, sz, MV_WIN_SATA_SIZE); +WIN_REG_BASE_IDX_WR(win_sata, sz, MV_WIN_SATA_SIZE); +#endif + +WIN_REG_BASE_IDX_RD(win_sdhci, cr, MV_WIN_SDHCI_CTRL); +WIN_REG_BASE_IDX_RD(win_sdhci, br, MV_WIN_SDHCI_BASE); +WIN_REG_BASE_IDX_WR(win_sdhci, cr, MV_WIN_SDHCI_CTRL); +WIN_REG_BASE_IDX_WR(win_sdhci, br, MV_WIN_SDHCI_BASE); + #ifndef SOC_MV_DOVE WIN_REG_IDX_RD(ddr, br, MV_WIN_DDR_BASE, MV_DDR_CADR_BASE) WIN_REG_IDX_RD(ddr, sz, MV_WIN_DDR_SIZE, MV_DDR_CADR_BASE) @@ -697,7 +759,6 @@ static inline uint32_t ddr_sz_read(int i) } #endif -#if !defined(SOC_MV_FREY) /************************************************************************** * Decode windows helper routines **************************************************************************/ @@ -920,7 +981,6 @@ decode_win_cpu_setup(void) cpu_wins[i].size, cpu_wins[i].remap); } -#endif #ifdef SOC_MV_ARMADAXP static int @@ -1025,7 +1085,7 @@ ddr_size(int i) uint32_t ddr_attr(int i) { - uint32_t dev, rev; + uint32_t dev, rev, attr; soc_id(&dev, &rev); if (dev == MV_DEV_88RC8180) @@ -1033,10 +1093,14 @@ ddr_attr(int i) if (dev == MV_DEV_88F6781) return (0); - return (i == 0 ? 0xe : + attr = (i == 0 ? 0xe : (i == 1 ? 0xd : (i == 2 ? 0xb : (i == 3 ? 0x7 : 0xff)))); + if (platform_io_coherent) + attr |= 0x10; + + return (attr); } uint32_t @@ -1061,6 +1125,78 @@ ddr_target(int i) } /************************************************************************** + * CESA windows routines + **************************************************************************/ +static int +decode_win_cesa_valid(void) +{ + + return (decode_win_can_cover_ddr(MV_WIN_CESA_MAX)); +} + +static void +decode_win_cesa_dump(u_long base) +{ + int i; + + for (i = 0; i < MV_WIN_CESA_MAX; i++) + printf("CESA window#%d: c 0x%08x, b 0x%08x\n", i, + win_cesa_cr_read(base, i), win_cesa_br_read(base, i)); +} + +/* + * Set CESA decode windows. + */ +static void +decode_win_cesa_setup(u_long base) +{ + uint32_t br, cr; + uint64_t size; + int i, j; + + for (i = 0; i < MV_WIN_CESA_MAX; i++) { + win_cesa_cr_write(base, i, 0); + win_cesa_br_write(base, i, 0); + } + + /* Only access to active DRAM banks is required */ + for (i = 0; i < MV_WIN_DDR_MAX; i++) { + if (ddr_is_active(i)) { + br = ddr_base(i); + + size = ddr_size(i); +#ifdef SOC_MV_ARMADA38X + /* + * Armada 38x SoC's equipped with 4GB DRAM + * suffer freeze during CESA operation, if + * MBUS window opened at given DRAM CS reaches + * end of the address space. Apply a workaround + * by setting the window size to the closest possible + * value, i.e. divide it by 2. + */ + if (size + ddr_base(i) == 0x100000000ULL) + size /= 2; +#endif + + cr = (((size - 1) & 0xffff0000) | + (ddr_attr(i) << IO_WIN_ATTR_SHIFT) | + (ddr_target(i) << IO_WIN_TGT_SHIFT) | + IO_WIN_ENA_MASK); + + /* Set the first free CESA window */ + for (j = 0; j < MV_WIN_CESA_MAX; j++) { + if (win_cesa_cr_read(base, j) & 0x1) + continue; + + win_cesa_br_write(base, j, br); + win_cesa_cr_write(base, j, cr); + break; + } + } + } +} + +/************************************************************************** * USB windows routines **************************************************************************/ static int @@ -1092,7 +1228,6 @@ decode_win_usb_setup(u_long base) uint32_t br, cr; int i, j; - if (pm_is_disabled(CPU_PM_CTRL_USB(usb_port))) return; @@ -1279,11 +1414,7 @@ decode_win_eth_dump(u_long base) win_eth_epap_read(base)); } -#if defined(SOC_MV_LOKIPLUS) -#define MV_WIN_ETH_DDR_TRGT(n) 0 -#else #define MV_WIN_ETH_DDR_TRGT(n) ddr_target(n) -#endif static void decode_win_eth_setup(u_long base) @@ -1334,6 +1465,20 @@ decode_win_eth_setup(u_long base) } } +static void +decode_win_neta_dump(u_long base) +{ + + decode_win_eth_dump(base + MV_WIN_NETA_OFFSET); +} + +static void +decode_win_neta_setup(u_long base) +{ + + decode_win_eth_setup(base + MV_WIN_NETA_OFFSET); +} + static int decode_win_eth_valid(void) { @@ -1344,6 +1489,22 @@ decode_win_eth_valid(void) /************************************************************************** * PCIE windows routines **************************************************************************/ +static void +decode_win_pcie_dump(u_long base) +{ + int i; + + printf("PCIE windows base 0x%08lx\n", base); + for (i = 0; i < MV_WIN_PCIE_MAX; i++) + printf("PCIE window#%d: cr 0x%08x br 0x%08x remap 0x%08x\n", + i, win_pcie_cr_read(base, i), + win_pcie_br_read(base, i), win_pcie_remap_read(base, i)); + + for (i = 0; i < MV_PCIE_BAR_MAX; i++) + printf("PCIE bar#%d: cr 0x%08x br 0x%08x brh 0x%08x\n", + i, pcie_bar_cr_read(base, i), + pcie_bar_br_read(base, i), pcie_bar_brh_read(base, i)); +} void decode_win_pcie_setup(u_long base) @@ -1997,6 +2158,75 @@ decode_win_sata_setup(u_long base) } } +#ifdef SOC_MV_ARMADA38X +/* + * Configure AHCI decoding windows + */ +static void +decode_win_ahci_setup(u_long base) +{ + uint32_t br, cr, sz; + int i, j; + + for (i = 0; i < MV_WIN_SATA_MAX; i++) { + win_sata_cr_write(base, i, 0); + win_sata_br_write(base, i, 0); + win_sata_sz_write(base, i, 0); + } + + for (i = 0; i < MV_WIN_DDR_MAX; i++) { + if (ddr_is_active(i)) { + cr = (ddr_attr(i) << IO_WIN_ATTR_SHIFT) | + (ddr_target(i) << IO_WIN_TGT_SHIFT) | + IO_WIN_ENA_MASK; + br = ddr_base(i); + sz = (ddr_size(i) - 1) & + (IO_WIN_SIZE_MASK << IO_WIN_SIZE_SHIFT); + + /* Use first available SATA window */ + for (j = 0; j < MV_WIN_SATA_MAX; j++) { + if (win_sata_cr_read(base, j) & IO_WIN_ENA_MASK) + continue; + + /* BASE is set to DRAM base (0x00000000) */ + win_sata_br_write(base, j, br); + /* CTRL targets DRAM ctrl with 0x0E or 0x0D */ + win_sata_cr_write(base, j, cr); + /* SIZE is set to 16MB - max value */ + win_sata_sz_write(base, j, sz); + break; + } + } + } +} + +static void +decode_win_ahci_dump(u_long base) +{ + int i; + + for (i = 0; i < MV_WIN_SATA_MAX; i++) + printf("SATA window#%d: cr 0x%08x, br 0x%08x, sz 0x%08x\n", i, + win_sata_cr_read(base, i), win_sata_br_read(base, i), + win_sata_sz_read(base,i)); +} + +#else +/* + * Provide dummy functions to satisfy the build + * for SoC's not equipped with AHCI controller + */ +static void +decode_win_ahci_setup(u_long base) +{ +} + +static void +decode_win_ahci_dump(u_long base) +{ +} +#endif + static int decode_win_sata_valid(void) { @@ -2009,6 +2239,60 @@ decode_win_sata_valid(void) return (decode_win_can_cover_ddr(MV_WIN_SATA_MAX)); } +static void +decode_win_sdhci_setup(u_long base) +{ + uint32_t cr, br; + int i, j; + + for (i = 0; i < MV_WIN_SDHCI_MAX; i++) { + win_sdhci_cr_write(base, i, 0); + win_sdhci_br_write(base, i, 0); + } + + for (i = 0; i < MV_WIN_DDR_MAX; i++) + if (ddr_is_active(i)) { + br = ddr_base(i); + cr = (((ddr_size(i) - 1) & + (IO_WIN_SIZE_MASK << IO_WIN_SIZE_SHIFT)) | + (ddr_attr(i) << IO_WIN_ATTR_SHIFT) | + (ddr_target(i) << IO_WIN_TGT_SHIFT) | + IO_WIN_ENA_MASK); + + /* Use the first available SDHCI window */ + for (j = 0; j < MV_WIN_SDHCI_MAX; j++) { + if (win_sdhci_cr_read(base, j) & IO_WIN_ENA_MASK) + continue; + + win_sdhci_cr_write(base, j, cr); + win_sdhci_br_write(base, j, br); + break; + } + } +} + +static void +decode_win_sdhci_dump(u_long base) +{ + int i; + + for (i = 0; i < MV_WIN_SDHCI_MAX; i++) + printf("SDHCI window#%d: c 0x%08x, b 0x%08x\n", i, + win_sdhci_cr_read(base, i), win_sdhci_br_read(base, i)); +} + +static int +decode_win_sdhci_valid(void) +{ + +#ifdef SOC_MV_ARMADA38X + return (decode_win_can_cover_ddr(MV_WIN_SDHCI_MAX)); +#endif + + /* Satisfy platforms not equipped with this controller. */ + return (1); +} + /************************************************************************** * FDT parsing routines. **************************************************************************/ @@ -2074,6 +2358,12 @@ win_cpu_from_dt(void) entry_size = tuple_size / sizeof(pcell_t); cpu_wins_no = tuples; + /* Check range */ + if (tuples > nitems(cpu_win_tbl)) { + debugf("too many tuples to fit into cpu_win_tbl\n"); + return (ENOMEM); + } + for (i = 0, t = 0; t < tuples; i += entry_size, t++) { cpu_win_tbl[t].target = 1; cpu_win_tbl[t].attr = fdt32_to_cpu(ranges[i + 1]); @@ -2092,7 +2382,7 @@ win_cpu_from_dt(void) * Retrieve CESA SRAM data. */ if ((node = OF_finddevice("sram")) != -1) - if (fdt_is_compatible(node, "mrvl,cesa-sram")) + if (ofw_bus_node_is_compatible(node, "mrvl,cesa-sram")) goto moveon; if ((node = OF_finddevice("/")) == 0) @@ -2106,6 +2396,12 @@ moveon: if (fdt_regsize(node, &sram_base, &sram_size) != 0) return (EINVAL); + /* Check range */ + if (t >= nitems(cpu_win_tbl)) { + debugf("cannot fit CESA tuple into cpu_win_tbl\n"); + return (ENOMEM); + } + cpu_win_tbl[t].target = MV_WIN_CESA_TARGET; #ifdef SOC_MV_ARMADA38X cpu_win_tbl[t].attr = MV_WIN_CESA_ATTR(0); @@ -2120,7 +2416,7 @@ moveon: /* Check if there is a second CESA node */ while ((node = OF_peer(node)) != 0) { - if (fdt_is_compatible(node, "mrvl,cesa-sram")) { + if (ofw_bus_node_is_compatible(node, "mrvl,cesa-sram")) { if (fdt_regsize(node, &sram_base, &sram_size) != 0) return (EINVAL); break; @@ -2149,42 +2445,90 @@ moveon: } static int -fdt_win_setup(void) +fdt_win_process(phandle_t child) { - phandle_t node, child; + int i; struct soc_node_spec *soc_node; + int addr_cells, size_cells; + pcell_t reg[8]; u_long size, base; - int err, i; + for (i = 0; soc_nodes[i].compat != NULL; i++) { + + soc_node = &soc_nodes[i]; + + /* Setup only for enabled devices */ + if (ofw_bus_node_status_okay(child) == 0) + continue; + + if (!ofw_bus_node_is_compatible(child, soc_node->compat)) + continue; + + if (fdt_addrsize_cells(OF_parent(child), &addr_cells, + &size_cells)) + return (ENXIO); + + if ((sizeof(pcell_t) * (addr_cells + size_cells)) > sizeof(reg)) + return (ENOMEM); + + if (OF_getprop(child, "reg", ®, sizeof(reg)) <= 0) + return (EINVAL); + + if (addr_cells <= 2) + base = fdt_data_get(®[0], addr_cells); + else + base = fdt_data_get(®[addr_cells - 2], 2); + size = fdt_data_get(®[addr_cells], size_cells); + + base = (base & 0x000fffff) | fdt_immr_va; + if (soc_node->decode_handler != NULL) + soc_node->decode_handler(base); + else + return (ENXIO); + + if (MV_DUMP_WIN && (soc_node->dump_handler != NULL)) + soc_node->dump_handler(base); + } + + return (0); +} +static int +fdt_win_setup(void) +{ + phandle_t node, child, sb; + phandle_t child_pci; + int err; + + sb = 0; node = OF_finddevice("/"); if (node == -1) panic("fdt_win_setup: no root node"); + /* Allow for coherent transactions on the A38x MBUS */ + if (ofw_bus_node_is_compatible(node, "marvell,armada380")) + platform_io_coherent = true; + /* * Traverse through all children of root and simple-bus nodes. * For each found device retrieve decode windows data (if applicable). */ child = OF_child(node); while (child != 0) { - for (i = 0; soc_nodes[i].compat != NULL; i++) { - - soc_node = &soc_nodes[i]; - - if (!fdt_is_compatible(child, soc_node->compat)) - continue; - - err = fdt_regsize(child, &base, &size); - if (err != 0) - return (err); - - base = (base & 0x000fffff) | fdt_immr_va; - if (soc_node->decode_handler != NULL) - soc_node->decode_handler(base); - else - return (ENXIO); - - if (MV_DUMP_WIN && (soc_node->dump_handler != NULL)) - soc_node->dump_handler(base); + /* Lookup for callback and run */ + err = fdt_win_process(child); + if (err != 0) + return (err); + + /* Process Marvell Armada-XP/38x PCIe controllers */ + if (ofw_bus_node_is_compatible(child, "marvell,armada-370-pcie")) { + child_pci = OF_child(child); + while (child_pci != 0) { + err = fdt_win_process(child_pci); + if (err != 0) + return (err); + + child_pci = OF_peer(child_pci); + } } /* @@ -2193,7 +2537,7 @@ fdt_win_setup(void) */ child = OF_peer(child); if ((child == 0) && (node == OF_finddevice("/"))) { - node = fdt_find_compatible(node, "simple-bus", 0); + sb = node = fdt_find_compatible(node, "simple-bus", 0); if (node == 0) return (ENXIO); child = OF_child(node); @@ -2203,7 +2547,7 @@ fdt_win_setup(void) * it is present) and its children. This node also have * "simple-bus" compatible. */ - if ((child == 0) && (node == OF_finddevice("simple-bus"))) { + if ((child == 0) && (node == sb)) { node = fdt_find_compatible(node, "simple-bus", 0); if (node == 0) return (0); @@ -2319,8 +2663,8 @@ fdt_pic_decode_ic(phandle_t node, pcell_t *intr, int *interrupt, int *trig, int *pol) { - if (!fdt_is_compatible(node, "mrvl,pic") && - !fdt_is_compatible(node, "mrvl,mpic")) + if (!ofw_bus_node_is_compatible(node, "mrvl,pic") && + !ofw_bus_node_is_compatible(node, "mrvl,mpic")) return (ENXIO); *interrupt = fdt32_to_cpu(intr[0]); diff --git a/sys/arm/mv/mv_machdep.c b/sys/arm/mv/mv_machdep.c index 782fcf0..174394a 100644 --- a/sys/arm/mv/mv_machdep.c +++ b/sys/arm/mv/mv_machdep.c @@ -46,10 +46,14 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <sys/bus.h> #include <sys/devmap.h> +#include <sys/kernel.h> #include <vm/vm.h> #include <vm/pmap.h> +#include <arm/arm/mpcore_timervar.h> +#include <arm/arm/nexusvar.h> + #include <machine/bus.h> #include <machine/fdt.h> #include <machine/machdep.h> @@ -59,6 +63,7 @@ __FBSDID("$FreeBSD$"); #include <machine/cpu-v4.h> #else #include <machine/cpu-v6.h> +#include <machine/pte-v6.h> #endif #include <arm/mv/mvreg.h> /* XXX */ @@ -66,6 +71,7 @@ __FBSDID("$FreeBSD$"); #include <arm/mv/mvwin.h> #include <dev/fdt/fdt_common.h> +#include <dev/ofw/ofw_bus_subr.h> static int platform_mpp_init(void); #if defined(SOC_MV_ARMADAXP) @@ -76,6 +82,7 @@ void armadaxp_l2_init(void); int armada38x_win_set_iosync_barrier(void); int armada38x_scu_enable(void); int armada38x_open_bootrom_win(void); +int armada38x_mbus_optimization(void); #endif #define MPP_PIN_MAX 68 @@ -84,6 +91,39 @@ int armada38x_open_bootrom_win(void); #define MPP_SEL(pin,func) (((func) & 0xf) << \ (((pin) % MPP_PINS_PER_REG) * 4)) +static void +mv_busdma_tag_init(void *arg __unused) +{ + phandle_t node; + bus_dma_tag_t dmat; + + /* + * If this platform has coherent DMA, create the parent DMA tag to pass + * down the coherent flag to all busses and devices on the platform, + * otherwise return without doing anything. By default create tag + * for all A38x-based platforms only. + */ + if ((node = OF_finddevice("/")) == -1) + return; + if (ofw_bus_node_is_compatible(node, "marvell,armada380") == 0) + return; + + bus_dma_tag_create(NULL, /* No parent tag */ + 1, 0, /* alignment, bounds */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + BUS_SPACE_MAXSIZE, /* maxsize */ + BUS_SPACE_UNRESTRICTED, /* nsegments */ + BUS_SPACE_MAXSIZE, /* maxsegsize */ + BUS_DMA_COHERENT, /* flags */ + NULL, NULL, /* lockfunc, lockarg */ + &dmat); + + nexus_set_dma_tag(dmat); +} +SYSINIT(mv_busdma_tag, SI_SUB_DRIVERS, SI_ORDER_ANY, mv_busdma_tag_init, NULL); + static int platform_mpp_init(void) { @@ -103,7 +143,7 @@ platform_mpp_init(void) * Try to access the MPP node directly i.e. through /aliases/mpp. */ if ((node = OF_finddevice("mpp")) != -1) - if (fdt_is_compatible(node, "mrvl,mpp")) + if (ofw_bus_node_is_compatible(node, "mrvl,mpp")) goto moveon; /* * Find the node the long way. @@ -243,14 +283,9 @@ platform_late_init(void) /* * Re-initialise decode windows */ -#if !defined(SOC_MV_FREY) if (soc_decode_win() != 0) printf("WARNING: could not re-initialise decode windows! " "Running with existing settings...\n"); -#else - /* Disable watchdog and timers */ - write_cpu_ctrl(CPU_TIMERS_BASE + CPU_TIMER_CONTROL, 0); -#endif #if defined(SOC_MV_ARMADAXP) #if !defined(SMP) /* For SMP case it should be initialized after APs are booted */ @@ -260,9 +295,23 @@ platform_late_init(void) #endif #if defined(SOC_MV_ARMADA38X) + /* Configure timers' base frequency */ + arm_tmr_change_frequency(get_cpu_freq() / 2); + + /* + * Workaround for Marvell Armada38X family HW issue + * between Cortex-A9 CPUs and on-chip devices that may + * cause hang on heavy load. + * To avoid that, map all registers including PCIe IO + * as strongly ordered instead of device memory. + */ + pmap_remap_vm_attr(VM_MEMATTR_DEVICE, VM_MEMATTR_SO); + /* Set IO Sync Barrier bit for all Mbus devices */ if (armada38x_win_set_iosync_barrier() != 0) printf("WARNING: could not map CPU Subsystem registers\n"); + if (armada38x_mbus_optimization() != 0) + printf("WARNING: could not enable mbus optimization\n"); if (armada38x_scu_enable() != 0) printf("WARNING: could not enable SCU\n"); #ifdef SMP @@ -288,8 +337,8 @@ platform_sram_devmap(struct devmap_entry *map) * SRAM range. */ if ((child = OF_finddevice("/sram")) != 0) - if (fdt_is_compatible(child, "mrvl,cesa-sram") || - fdt_is_compatible(child, "mrvl,scratchpad")) + if (ofw_bus_node_is_compatible(child, "mrvl,cesa-sram") || + ofw_bus_node_is_compatible(child, "mrvl,scratchpad")) goto moveon; if ((root = OF_finddevice("/")) == 0) @@ -404,7 +453,7 @@ platform_devmap_init(void) i += 2; } - if (fdt_is_compatible(child, "mrvl,lbc")) { + if (ofw_bus_node_is_compatible(child, "mrvl,lbc")) { /* Check available space */ if (OF_getencprop(child, "bank-count", &bank_count, sizeof(bank_count)) <= 0) diff --git a/sys/arm/mv/mv_pci.c b/sys/arm/mv/mv_pci.c index 55d3f15..0fb528a 100644 --- a/sys/arm/mv/mv_pci.c +++ b/sys/arm/mv/mv_pci.c @@ -394,6 +394,7 @@ static driver_t mv_pcib_driver = { devclass_t pcib_devclass; DRIVER_MODULE(pcib, ofwbus, mv_pcib_driver, pcib_devclass, 0, 0); +DRIVER_MODULE(pcib, pcib_ctrl, mv_pcib_driver, pcib_devclass, 0, 0); static struct mtx pcicfg_mtx; @@ -419,22 +420,30 @@ mv_pcib_attach(device_t self) { struct mv_pcib_softc *sc; phandle_t node, parnode; - uint32_t val, unit; - int err; + uint32_t val, reg0; + int err, bus, devfn, port_id; sc = device_get_softc(self); sc->sc_dev = self; - unit = fdt_get_unit(self); - node = ofw_bus_get_node(self); parnode = OF_parent(node); - if (fdt_is_compatible(node, "mrvl,pcie")) { + + if (OF_getencprop(node, "marvell,pcie-port", &(port_id), + sizeof(port_id)) <= 0) { + /* If port ID does not exist in the FDT set value to 0 */ + if (!OF_hasprop(node, "marvell,pcie-port")) + port_id = 0; + else + return(ENXIO); + } + + if (ofw_bus_node_is_compatible(node, "mrvl,pcie")) { sc->sc_type = MV_TYPE_PCIE; - sc->sc_win_target = MV_WIN_PCIE_TARGET(unit); - sc->sc_mem_win_attr = MV_WIN_PCIE_MEM_ATTR(unit); - sc->sc_io_win_attr = MV_WIN_PCIE_IO_ATTR(unit); - } else if (fdt_is_compatible(node, "mrvl,pci")) { + sc->sc_win_target = MV_WIN_PCIE_TARGET(port_id); + sc->sc_mem_win_attr = MV_WIN_PCIE_MEM_ATTR(port_id); + sc->sc_io_win_attr = MV_WIN_PCIE_IO_ATTR(port_id); + } else if (ofw_bus_node_is_compatible(node, "mrvl,pci")) { sc->sc_type = MV_TYPE_PCI; sc->sc_win_target = MV_WIN_PCI_TARGET; sc->sc_mem_win_attr = MV_WIN_PCI_MEM_ATTR; @@ -476,7 +485,7 @@ mv_pcib_attach(device_t self) /* * Enable PCIE device. */ - mv_pcib_enable(sc, unit); + mv_pcib_enable(sc, port_id); /* * Memory management. @@ -485,6 +494,22 @@ mv_pcib_attach(device_t self) if (err) return (err); + /* + * Preliminary bus enumeration to find first linked devices and set + * appropriate bus number from which should start the actual enumeration + */ + for (bus = 0; bus < PCI_BUSMAX; bus++) { + for (devfn = 0; devfn < mv_pcib_maxslots(self); devfn++) { + reg0 = mv_pcib_read_config(self, bus, devfn, devfn & 0x7, 0x0, 4); + if (reg0 == (~0U)) + continue; /* no device */ + else { + sc->sc_busnr = bus; /* update bus number */ + break; + } + } + } + if (sc->sc_mode == MV_MODE_ROOT) { err = mv_pcib_init(sc, sc->sc_busnr, mv_pcib_maxslots(sc->sc_dev)); @@ -918,7 +943,7 @@ static inline void pcib_write_irq_mask(struct mv_pcib_softc *sc, uint32_t mask) { - if (sc->sc_type != MV_TYPE_PCI) + if (sc->sc_type != MV_TYPE_PCIE) return; bus_space_write_4(sc->sc_bst, sc->sc_bsh, PCIE_REG_IRQ_MASK, mask); diff --git a/sys/arm/mv/mv_pci_ctrl.c b/sys/arm/mv/mv_pci_ctrl.c new file mode 100644 index 0000000..c3d0def --- /dev/null +++ b/sys/arm/mv/mv_pci_ctrl.c @@ -0,0 +1,333 @@ +/*- + * Copyright (c) 2016 Stormshield + * Copyright (c) 2016 Semihalf + * All rights reserved. + * + * Developed by Semihalf. + * + * Portions of this software were developed by Semihalf + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of MARVELL nor the names of contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Marvell integrated PCI/PCI-Express Bus Controller Driver. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/bus.h> +#include <sys/rman.h> + +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> + +static int mv_pcib_ctrl_probe(device_t); +static int mv_pcib_ctrl_attach(device_t); +static device_t mv_pcib_ctrl_add_child(device_t, u_int, const char *, int); +static const struct ofw_bus_devinfo * mv_pcib_ctrl_get_devinfo(device_t, device_t); +static struct resource * mv_pcib_ctrl_alloc_resource(device_t, device_t, int, + int *, rman_res_t, rman_res_t, rman_res_t, u_int); +void mv_pcib_ctrl_init(device_t, phandle_t); +static int mv_pcib_ofw_bus_attach(device_t); + +struct mv_pcib_ctrl_range { + uint64_t bus; + uint64_t host; + uint64_t size; +}; + +struct mv_pcib_ctrl_softc { + pcell_t addr_cells; + pcell_t size_cells; + int nranges; + struct mv_pcib_ctrl_range *ranges; +}; + +struct mv_pcib_ctrl_devinfo { + struct ofw_bus_devinfo di_dinfo; + struct resource_list di_rl; +}; + +static int mv_pcib_ctrl_fill_ranges(phandle_t, struct mv_pcib_ctrl_softc *); + +/* + * Bus interface definitions + */ +static device_method_t mv_pcib_ctrl_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, mv_pcib_ctrl_probe), + DEVMETHOD(device_attach, mv_pcib_ctrl_attach), + + /* Bus interface */ + DEVMETHOD(bus_add_child, mv_pcib_ctrl_add_child), + DEVMETHOD(bus_alloc_resource, mv_pcib_ctrl_alloc_resource), + DEVMETHOD(bus_release_resource, bus_generic_release_resource), + DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), + DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), + + /* ofw_bus interface */ + DEVMETHOD(ofw_bus_get_devinfo, mv_pcib_ctrl_get_devinfo), + DEVMETHOD(ofw_bus_get_compat, ofw_bus_gen_get_compat), + DEVMETHOD(ofw_bus_get_model, ofw_bus_gen_get_model), + DEVMETHOD(ofw_bus_get_name, ofw_bus_gen_get_name), + DEVMETHOD(ofw_bus_get_node, ofw_bus_gen_get_node), + DEVMETHOD(ofw_bus_get_type, ofw_bus_gen_get_type), + + DEVMETHOD_END +}; + +static driver_t mv_pcib_ctrl_driver = { + "pcib_ctrl", + mv_pcib_ctrl_methods, + sizeof(struct mv_pcib_ctrl_softc), +}; + +devclass_t pcib_ctrl_devclass; + +DRIVER_MODULE(pcib_ctrl, simplebus, mv_pcib_ctrl_driver, pcib_ctrl_devclass, 0, 0); + +MALLOC_DEFINE(M_PCIB_CTRL, "PCIe Bus Controller", + "Marvell Integrated PCIe Bus Controller"); + +static int +mv_pcib_ctrl_probe(device_t dev) +{ + + if (!ofw_bus_is_compatible(dev, "mrvl,pcie-ctrl") && + !ofw_bus_is_compatible(dev, "marvell,armada-370-pcie")) + return (ENXIO); + + device_set_desc(dev, "Marvell Integrated PCIe Bus Controller"); + return (BUS_PROBE_DEFAULT); +} + +static int +mv_pcib_ctrl_attach(device_t dev) +{ + int err; + + err = mv_pcib_ofw_bus_attach(dev); + if (err != 0) + return (err); + + return (bus_generic_attach(dev)); +} + +static int +mv_pcib_ofw_bus_attach(device_t dev) +{ + struct mv_pcib_ctrl_devinfo *di; + struct mv_pcib_ctrl_softc *sc; + device_t child; + phandle_t parent, node; + + parent = ofw_bus_get_node(dev); + sc = device_get_softc(dev); + if (parent > 0) { + sc->addr_cells = 1; + if (OF_getencprop(parent, "#address-cells", &(sc->addr_cells), + sizeof(sc->addr_cells)) <= 0) + return(ENXIO); + + sc->size_cells = 1; + if (OF_getencprop(parent, "#size-cells", &(sc->size_cells), + sizeof(sc->size_cells)) <= 0) + return(ENXIO); + + for (node = OF_child(parent); node > 0; node = OF_peer(node)) { + di = malloc(sizeof(*di), M_PCIB_CTRL, M_WAITOK | M_ZERO); + if (ofw_bus_gen_setup_devinfo(&di->di_dinfo, node)) { + if (bootverbose) { + device_printf(dev, + "Could not set up devinfo for PCI\n"); + } + free(di, M_PCIB_CTRL); + continue; + } + + child = device_add_child(dev, NULL, -1); + if (child == NULL) { + if (bootverbose) { + device_printf(dev, + "Could not add child: %s\n", + di->di_dinfo.obd_name); + } + ofw_bus_gen_destroy_devinfo(&di->di_dinfo); + free(di, M_PCIB_CTRL); + continue; + } + + resource_list_init(&di->di_rl); + ofw_bus_reg_to_rl(child, node, sc->addr_cells, + sc->size_cells, &di->di_rl); + + device_set_ivars(child, di); + } + } + + if (mv_pcib_ctrl_fill_ranges(parent, sc) < 0) { + device_printf(dev, "could not get ranges\n"); + return (ENXIO); + } + + return (0); +} + +static device_t +mv_pcib_ctrl_add_child(device_t dev, u_int order, const char *name, int unit) +{ + device_t cdev; + struct mv_pcib_ctrl_devinfo *di; + + cdev = device_add_child_ordered(dev, order, name, unit); + if (cdev == NULL) + return (NULL); + + di = malloc(sizeof(*di), M_DEVBUF, M_WAITOK | M_ZERO); + di->di_dinfo.obd_node = -1; + resource_list_init(&di->di_rl); + device_set_ivars(cdev, di); + + return (cdev); +} + +static struct resource * +mv_pcib_ctrl_alloc_resource(device_t bus, device_t child, int type, int *rid, + rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) +{ + struct mv_pcib_ctrl_devinfo *di; + struct resource_list_entry *rle; + struct mv_pcib_ctrl_softc *sc; + int i; + + if (RMAN_IS_DEFAULT_RANGE(start, end)) { + + if ((di = device_get_ivars(child)) == NULL) + return (NULL); + if (type != SYS_RES_MEMORY) + return (NULL); + + /* Find defaults for this rid */ + rle = resource_list_find(&di->di_rl, type, *rid); + + if (rle == NULL) + return (NULL); + + start = rle->start; + end = rle->end; + count = rle->count; + } + + sc = device_get_softc(bus); + if (type == SYS_RES_MEMORY) { + /* Remap through ranges property */ + for (i = 0; i < sc->nranges; i++) { + if (start >= sc->ranges[i].bus && end < + sc->ranges[i].bus + sc->ranges[i].size) { + start -= sc->ranges[i].bus; + start += sc->ranges[i].host; + end -= sc->ranges[i].bus; + end += sc->ranges[i].host; + break; + } + } + + if (i == sc->nranges && sc->nranges != 0) { + device_printf(bus, "Could not map resource " + "%#llx-%#llx\n", start, end); + return (NULL); + } + } + + return (bus_generic_alloc_resource(bus, child, type, rid, start, end, + count, flags)); +} + +static int +mv_pcib_ctrl_fill_ranges(phandle_t node, struct mv_pcib_ctrl_softc *sc) +{ + int host_address_cells; + cell_t *base_ranges; + ssize_t nbase_ranges; + int err; + int i, j, k; + + err = OF_searchencprop(OF_parent(node), "#address-cells", + &host_address_cells, sizeof(host_address_cells)); + if (err <= 0) + return (-1); + + nbase_ranges = OF_getproplen(node, "ranges"); + if (nbase_ranges < 0) + return (-1); + sc->nranges = nbase_ranges / sizeof(cell_t) / + (sc->addr_cells + host_address_cells + sc->size_cells); + if (sc->nranges == 0) + return (0); + + sc->ranges = malloc(sc->nranges * sizeof(sc->ranges[0]), + M_DEVBUF, M_WAITOK); + base_ranges = malloc(nbase_ranges, M_DEVBUF, M_WAITOK); + OF_getencprop(node, "ranges", base_ranges, nbase_ranges); + + for (i = 0, j = 0; i < sc->nranges; i++) { + sc->ranges[i].bus = 0; + for (k = 0; k < sc->addr_cells; k++) { + sc->ranges[i].bus <<= 32; + sc->ranges[i].bus |= base_ranges[j++]; + } + sc->ranges[i].host = 0; + for (k = 0; k < host_address_cells; k++) { + sc->ranges[i].host <<= 32; + sc->ranges[i].host |= base_ranges[j++]; + } + sc->ranges[i].size = 0; + for (k = 0; k < sc->size_cells; k++) { + sc->ranges[i].size <<= 32; + sc->ranges[i].size |= base_ranges[j++]; + } + } + + free(base_ranges, M_DEVBUF); + return (sc->nranges); +} + +static const struct ofw_bus_devinfo * +mv_pcib_ctrl_get_devinfo(device_t bus __unused, device_t child) +{ + struct mv_pcib_ctrl_devinfo *di; + + di = device_get_ivars(child); + return (&di->di_dinfo); +} diff --git a/sys/arm/mv/mv_spi.c b/sys/arm/mv/mv_spi.c new file mode 100644 index 0000000..a230a89 --- /dev/null +++ b/sys/arm/mv/mv_spi.c @@ -0,0 +1,351 @@ +/*- + * Copyright (c) 2017 Rubicon Communications, LLC (Netgate) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bus.h> + +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/rman.h> + +#include <machine/bus.h> +#include <machine/resource.h> +#include <machine/intr.h> + +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> +#include <dev/spibus/spi.h> +#include <dev/spibus/spibusvar.h> + +#include "spibus_if.h" + +struct mv_spi_softc { + device_t sc_dev; + struct mtx sc_mtx; + struct resource *sc_mem_res; + struct resource *sc_irq_res; + struct spi_command *sc_cmd; + bus_space_tag_t sc_bst; + bus_space_handle_t sc_bsh; + uint32_t sc_len; + uint32_t sc_read; + uint32_t sc_flags; + uint32_t sc_written; + void *sc_intrhand; +}; + +#define MV_SPI_BUSY 0x1 +#define MV_SPI_WRITE(_sc, _off, _val) \ + bus_space_write_4((_sc)->sc_bst, (_sc)->sc_bsh, (_off), (_val)) +#define MV_SPI_READ(_sc, _off) \ + bus_space_read_4((_sc)->sc_bst, (_sc)->sc_bsh, (_off)) +#define MV_SPI_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) +#define MV_SPI_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) + +#define MV_SPI_CONTROL 0 +#define MV_SPI_CTRL_CS_SHIFT 2 +#define MV_SPI_CTRL_SMEMREADY (1 << 1) +#define MV_SPI_CTRL_CS_ACTIVE (1 << 0) +#define MV_SPI_CONF 0x4 +#define MV_SPI_CONF_BYTELEN (1 << 5) +#define MV_SPI_DATAOUT 0x8 +#define MV_SPI_DATAIN 0xc +#define MV_SPI_INTR_STAT 0x10 +#define MV_SPI_INTR_MASK 0x14 +#define MV_SPI_INTR_SMEMREADY (1 << 0) + +static struct ofw_compat_data compat_data[] = { + {"marvell,armada-380-spi", 1}, + {NULL, 0} +}; + +static void mv_spi_intr(void *); + +static int +mv_spi_probe(device_t dev) +{ + + if (!ofw_bus_status_okay(dev)) + return (ENXIO); + if (ofw_bus_search_compatible(dev, compat_data)->ocd_data == 0) + return (ENXIO); + + device_set_desc(dev, "Marvell SPI controller"); + + return (BUS_PROBE_DEFAULT); +} + +static int +mv_spi_attach(device_t dev) +{ + struct mv_spi_softc *sc; + int rid; + uint32_t reg; + + sc = device_get_softc(dev); + sc->sc_dev = dev; + + rid = 0; + sc->sc_mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, + RF_ACTIVE); + if (!sc->sc_mem_res) { + device_printf(dev, "cannot allocate memory window\n"); + return (ENXIO); + } + + sc->sc_bst = rman_get_bustag(sc->sc_mem_res); + sc->sc_bsh = rman_get_bushandle(sc->sc_mem_res); + + rid = 0; + sc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, + RF_ACTIVE); + if (!sc->sc_irq_res) { + bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->sc_mem_res); + device_printf(dev, "cannot allocate interrupt\n"); + return (ENXIO); + } + + /* Deactivate the bus - just in case... */ + reg = MV_SPI_READ(sc, MV_SPI_CONTROL); + MV_SPI_WRITE(sc, MV_SPI_CONTROL, reg & ~MV_SPI_CTRL_CS_ACTIVE); + + /* Disable the two bytes FIFO. */ + reg = MV_SPI_READ(sc, MV_SPI_CONF); + MV_SPI_WRITE(sc, MV_SPI_CONF, reg & ~MV_SPI_CONF_BYTELEN); + + /* Clear and disable interrupts. */ + MV_SPI_WRITE(sc, MV_SPI_INTR_MASK, 0); + MV_SPI_WRITE(sc, MV_SPI_INTR_STAT, 0); + + /* Hook up our interrupt handler. */ + if (bus_setup_intr(dev, sc->sc_irq_res, INTR_TYPE_MISC | INTR_MPSAFE, + NULL, mv_spi_intr, sc, &sc->sc_intrhand)) { + bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq_res); + bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->sc_mem_res); + device_printf(dev, "cannot setup the interrupt handler\n"); + return (ENXIO); + } + + mtx_init(&sc->sc_mtx, "mv_spi", NULL, MTX_DEF); + + device_add_child(dev, "spibus", -1); + + /* Probe and attach the spibus when interrupts are available. */ + config_intrhook_oneshot((ich_func_t)bus_generic_attach, dev); + + return (0); +} + +static int +mv_spi_detach(device_t dev) +{ + struct mv_spi_softc *sc; + + bus_generic_detach(dev); + + sc = device_get_softc(dev); + mtx_destroy(&sc->sc_mtx); + if (sc->sc_intrhand) + bus_teardown_intr(dev, sc->sc_irq_res, sc->sc_intrhand); + if (sc->sc_irq_res) + bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq_res); + if (sc->sc_mem_res) + bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->sc_mem_res); + + return (0); +} + +static __inline void +mv_spi_rx_byte(struct mv_spi_softc *sc) +{ + struct spi_command *cmd; + uint32_t read; + uint8_t *p; + + cmd = sc->sc_cmd; + p = (uint8_t *)cmd->rx_cmd; + read = sc->sc_read++; + if (read >= cmd->rx_cmd_sz) { + p = (uint8_t *)cmd->rx_data; + read -= cmd->rx_cmd_sz; + } + p[read] = MV_SPI_READ(sc, MV_SPI_DATAIN) & 0xff; +} + +static __inline void +mv_spi_tx_byte(struct mv_spi_softc *sc) +{ + struct spi_command *cmd; + uint32_t written; + uint8_t *p; + + cmd = sc->sc_cmd; + p = (uint8_t *)cmd->tx_cmd; + written = sc->sc_written++; + if (written >= cmd->tx_cmd_sz) { + p = (uint8_t *)cmd->tx_data; + written -= cmd->tx_cmd_sz; + } + MV_SPI_WRITE(sc, MV_SPI_DATAOUT, p[written]); +} + +static void +mv_spi_intr(void *arg) +{ + struct mv_spi_softc *sc; + + sc = (struct mv_spi_softc *)arg; + MV_SPI_LOCK(sc); + + /* Filter stray interrupts. */ + if ((sc->sc_flags & MV_SPI_BUSY) == 0) { + MV_SPI_UNLOCK(sc); + return; + } + + /* RX */ + mv_spi_rx_byte(sc); + + /* TX */ + mv_spi_tx_byte(sc); + + /* Check for end of transfer. */ + if (sc->sc_written == sc->sc_len && sc->sc_read == sc->sc_len) + wakeup(sc->sc_dev); + + MV_SPI_UNLOCK(sc); +} + +static int +mv_spi_transfer(device_t dev, device_t child, struct spi_command *cmd) +{ + struct mv_spi_softc *sc; + uint32_t cs, reg; + int resid, timeout; + + KASSERT(cmd->tx_cmd_sz == cmd->rx_cmd_sz, + ("TX/RX command sizes should be equal")); + KASSERT(cmd->tx_data_sz == cmd->rx_data_sz, + ("TX/RX data sizes should be equal")); + + /* Get the proper chip select for this child. */ + spibus_get_cs(child, &cs); + cs &= ~SPIBUS_CS_HIGH; + + sc = device_get_softc(dev); + MV_SPI_LOCK(sc); + + /* Wait until the controller is free. */ + while (sc->sc_flags & MV_SPI_BUSY) + mtx_sleep(dev, &sc->sc_mtx, 0, "mv_spi", 0); + + /* Now we have control over SPI controller. */ + sc->sc_flags = MV_SPI_BUSY; + + /* Save a pointer to the SPI command. */ + sc->sc_cmd = cmd; + sc->sc_read = 0; + sc->sc_written = 0; + sc->sc_len = cmd->tx_cmd_sz + cmd->tx_data_sz; + + MV_SPI_WRITE(sc, MV_SPI_CONTROL, cs << MV_SPI_CTRL_CS_SHIFT); + reg = MV_SPI_READ(sc, MV_SPI_CONTROL); + MV_SPI_WRITE(sc, MV_SPI_CONTROL, reg | MV_SPI_CTRL_CS_ACTIVE); + + while ((resid = sc->sc_len - sc->sc_written) > 0) { + + MV_SPI_WRITE(sc, MV_SPI_INTR_STAT, 0); + + /* + * Write to start the transmission and read the byte + * back when ready. + */ + mv_spi_tx_byte(sc); + timeout = 1000; + while (--timeout > 0) { + reg = MV_SPI_READ(sc, MV_SPI_CONTROL); + if (reg & MV_SPI_CTRL_SMEMREADY) + break; + DELAY(1); + } + if (timeout == 0) + break; + mv_spi_rx_byte(sc); + } + + /* Stop the controller. */ + reg = MV_SPI_READ(sc, MV_SPI_CONTROL); + MV_SPI_WRITE(sc, MV_SPI_CONTROL, reg & ~MV_SPI_CTRL_CS_ACTIVE); + MV_SPI_WRITE(sc, MV_SPI_INTR_MASK, 0); + MV_SPI_WRITE(sc, MV_SPI_INTR_STAT, 0); + + /* Release the controller and wakeup the next thread waiting for it. */ + sc->sc_flags = 0; + wakeup_one(dev); + MV_SPI_UNLOCK(sc); + + /* + * Check for transfer timeout. The SPI controller doesn't + * return errors. + */ + return ((timeout == 0) ? EIO : 0); +} + +static phandle_t +mv_spi_get_node(device_t bus, device_t dev) +{ + + return (ofw_bus_get_node(bus)); +} + +static device_method_t mv_spi_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, mv_spi_probe), + DEVMETHOD(device_attach, mv_spi_attach), + DEVMETHOD(device_detach, mv_spi_detach), + + /* SPI interface */ + DEVMETHOD(spibus_transfer, mv_spi_transfer), + + /* ofw_bus interface */ + DEVMETHOD(ofw_bus_get_node, mv_spi_get_node), + + DEVMETHOD_END +}; + +static devclass_t mv_spi_devclass; + +static driver_t mv_spi_driver = { + "spi", + mv_spi_methods, + sizeof(struct mv_spi_softc), +}; + +DRIVER_MODULE(mv_spi, simplebus, mv_spi_driver, mv_spi_devclass, 0, 0); diff --git a/sys/arm/mv/mvreg.h b/sys/arm/mv/mvreg.h index abfefc1..dfc07d1 100644 --- a/sys/arm/mv/mvreg.h +++ b/sys/arm/mv/mvreg.h @@ -52,18 +52,6 @@ #define ENDPOINT_IRQ_MASK(n) 0x30 #define ENDPOINT_IRQ_MASK_HI(n) 0x34 #define ENDPOINT_IRQ_CAUSE_SELECT 0x38 -#elif defined (SOC_MV_LOKIPLUS) || defined (SOC_MV_FREY) -#define IRQ_CAUSE 0x0 -#define IRQ_MASK 0x4 -#define FIQ_MASK 0x8 -#define ENDPOINT_IRQ_MASK(n) (0xC + (n) * 4) -#define IRQ_CAUSE_HI (-1) /* Fake defines for unified */ -#define IRQ_MASK_HI (-1) /* interrupt controller code */ -#define FIQ_MASK_HI (-1) -#define ENDPOINT_IRQ_MASK_HI(n) (-1) -#define ENDPOINT_IRQ_MASK_ERROR(n) (-1) -#define IRQ_CAUSE_ERROR (-1) -#define IRQ_MASK_ERROR (-1) #elif defined (SOC_MV_ARMADAXP) #define IRQ_CAUSE 0x18 #define IRQ_MASK 0x30 @@ -84,17 +72,7 @@ #define IRQ_MASK_ERROR (-1) /* interrupt controller code */ #endif -#if defined(SOC_MV_FREY) -#define BRIDGE_IRQ_CAUSE 0x118 -#define IRQ_TIMER0 0x00000002 -#define IRQ_TIMER1 0x00000004 -#define IRQ_TIMER_WD 0x00000008 - -#define BRIDGE_IRQ_MASK 0x11c -#define IRQ_TIMER0_MASK 0x00000002 -#define IRQ_TIMER1_MASK 0x00000004 -#define IRQ_TIMER_WD_MASK 0x00000008 -#elif defined(SOC_MV_ARMADAXP) +#if defined(SOC_MV_ARMADAXP) #define BRIDGE_IRQ_CAUSE 0x68 #define IRQ_TIMER0 0x00000001 #define IRQ_TIMER1 0x00000002 @@ -113,17 +91,10 @@ #define IRQ_TIMER_WD_MASK 0x00000008 #endif -#if defined(SOC_MV_LOKIPLUS) || defined(SOC_MV_FREY) -#define IRQ_CPU_SELF_CLR IRQ_CPU_SELF -#define IRQ_TIMER0_CLR IRQ_TIMER0 -#define IRQ_TIMER1_CLR IRQ_TIMER1 -#define IRQ_TIMER_WD_CLR IRQ_TIMER_WD -#else #define IRQ_CPU_SELF_CLR (~IRQ_CPU_SELF) #define IRQ_TIMER0_CLR (~IRQ_TIMER0) #define IRQ_TIMER1_CLR (~IRQ_TIMER1) #define IRQ_TIMER_WD_CLR (~IRQ_TIMER_WD) -#endif /* * System reset @@ -348,22 +319,14 @@ #define GPIO2IRQ(gpio) ((gpio) + NIRQ) #define IRQ2GPIO(irq) ((irq) - NIRQ) -#if defined(SOC_MV_ORION) || defined(SOC_MV_LOKIPLUS) +#if defined(SOC_MV_ORION) #define SAMPLE_AT_RESET 0x10 #elif defined(SOC_MV_KIRKWOOD) #define SAMPLE_AT_RESET 0x30 -#elif defined(SOC_MV_FREY) -#define SAMPLE_AT_RESET 0x100 #elif defined(SOC_MV_ARMADA38X) #define SAMPLE_AT_RESET 0x400 #endif -#if defined(SOC_MV_DISCOVERY) -#define SAMPLE_AT_RESET_LO 0x30 -#define SAMPLE_AT_RESET_HI 0x34 -#elif defined(SOC_MV_DOVE) -#define SAMPLE_AT_RESET_LO 0x14 -#define SAMPLE_AT_RESET_HI 0x18 -#elif defined(SOC_MV_ARMADAXP) +#if defined(SOC_MV_DISCOVERY) || defined(SOC_MV_ARMADAXP) #define SAMPLE_AT_RESET_LO 0x30 #define SAMPLE_AT_RESET_HI 0x34 #endif @@ -377,9 +340,6 @@ #elif defined(SOC_MV_DISCOVERY) #define TCLK_MASK 0x00000180 #define TCLK_SHIFT 0x07 -#elif defined(SOC_MV_LOKIPLUS) -#define TCLK_MASK 0x0000F000 -#define TCLK_SHIFT 0x0C #elif defined(SOC_MV_ARMADA38X) #define TCLK_MASK 0x00008000 #define TCLK_SHIFT 15 @@ -395,6 +355,9 @@ #define TCLK_300MHZ 300000000 #define TCLK_667MHZ 667000000 +#define A38X_CPU_DDR_CLK_MASK 0x00007c00 +#define A38X_CPU_DDR_CLK_SHIFT 10 + /* * CPU Cache Configuration */ @@ -452,15 +415,9 @@ #define MV_DRBL_PCIE_TO_CPU 0 #define MV_DRBL_CPU_TO_PCIE 1 -#if defined(SOC_MV_FREY) -#define MV_DRBL_CAUSE(d,u) (0x60 + 0x20 * (d) + 0x8 * (u)) -#define MV_DRBL_MASK(d,u) (0x60 + 0x20 * (d) + 0x8 * (u) + 0x4) -#define MV_DRBL_MSG(m,d,u) (0x8 * (u) + 0x20 * (d) + 0x4 * (m)) -#else #define MV_DRBL_CAUSE(d,u) (0x10 * (u) + 0x8 * (d)) #define MV_DRBL_MASK(d,u) (0x10 * (u) + 0x8 * (d) + 0x4) #define MV_DRBL_MSG(m,d,u) (0x10 * (u) + 0x8 * (d) + 0x4 * (m) + 0x30) -#endif /* * SCU @@ -470,7 +427,8 @@ #define MV_SCU_REGS_LEN 0x100 #define MV_SCU_REG_CTRL 0x00 #define MV_SCU_REG_CONFIG 0x04 -#define MV_SCU_ENABLE 1 +#define MV_SCU_ENABLE (1 << 0) +#define MV_SCU_SL_L2_ENABLE (1 << 3) #define SCU_CFG_REG_NCPU_MASK 0x3 #endif @@ -493,4 +451,9 @@ #define CPU_RESET_ASSERT 0x1 #endif +#if defined(SOC_MV_ARMADA38X) +#define MV_MBUS_CTRL_BASE (MV_BASE + 0x20420) +#define MV_MBUS_CTRL_REGS_LEN 0x10 +#endif + #endif /* _MVREG_H_ */ diff --git a/sys/arm/mv/mvvar.h b/sys/arm/mv/mvvar.h index c08064f..1356ae2 100644 --- a/sys/arm/mv/mvvar.h +++ b/sys/arm/mv/mvvar.h @@ -104,6 +104,7 @@ uint32_t ddr_target(int i); uint32_t cpu_extra_feat(void); uint32_t get_tclk(void); +uint32_t get_cpu_freq(void); uint32_t get_l2clk(void); uint32_t read_cpu_ctrl(uint32_t); void write_cpu_ctrl(uint32_t, uint32_t); diff --git a/sys/arm/mv/mvwin.h b/sys/arm/mv/mvwin.h index 7e773a9..b8c36b3 100644 --- a/sys/arm/mv/mvwin.h +++ b/sys/arm/mv/mvwin.h @@ -52,25 +52,16 @@ /* SRAM */ #define MV_CESA_SRAM_BASE 0xF1100000 -/* AXI Regs */ -#ifdef SOC_MV_DOVE -#define MV_AXI_PHYS_BASE 0xF1800000 -#define MV_AXI_BASE MV_AXI_PHYS_BASE -#define MV_AXI_SIZE (16 * 1024 * 1024) /* 16 MB */ -#endif - /* * External devices: 0x80000000, 1 GB (VA == PA) * Includes Device Bus, PCI and PCIE. */ #if defined(SOC_MV_ORION) #define MV_PCI_PORTS 2 /* 1x PCI + 1x PCIE */ -#elif defined(SOC_MV_KIRKWOOD) || defined(SOC_MV_FREY) +#elif defined(SOC_MV_KIRKWOOD) #define MV_PCI_PORTS 1 /* 1x PCIE */ #elif defined(SOC_MV_DISCOVERY) #define MV_PCI_PORTS 8 /* 8x PCIE */ -#elif defined(SOC_MV_DOVE) || defined(SOC_MV_LOKIPLUS) -#define MV_PCI_PORTS 2 /* 2x PCIE */ #elif defined(SOC_MV_ARMADAXP) #define MV_PCI_PORTS 3 /* 3x PCIE */ #elif defined(SOC_MV_ARMADA38X) @@ -93,11 +84,7 @@ #define MV_PCI_IO_SLICE_SIZE (MV_PCI_IO_SIZE / MV_PCI_PORTS) #define MV_PCI_IO_SLICE(n) (MV_PCI_IO_BASE + ((n) * MV_PCI_IO_SLICE_SIZE)) -#if defined(SOC_MV_FREY) -#define MV_PCI_VA_MEM_BASE MV_PCI_MEM_BASE -#else #define MV_PCI_VA_MEM_BASE 0 -#endif #define MV_PCI_VA_IO_BASE 0 /* @@ -120,11 +107,7 @@ * Integrated SoC peripherals addresses */ #define MV_BASE MV_PHYS_BASE /* VA == PA mapping */ -#if defined(SOC_MV_DOVE) -#define MV_DDR_CADR_BASE (MV_AXI_BASE + 0x100) -#elif defined(SOC_MV_LOKIPLUS) -#define MV_DDR_CADR_BASE (MV_BASE + 0xF1500) -#elif defined(SOC_MV_ARMADAXP) || defined(SOC_MV_ARMADA38X) +#if defined(SOC_MV_ARMADAXP) || defined(SOC_MV_ARMADA38X) #define MV_DDR_CADR_BASE (MV_BASE + 0x20180) #else #define MV_DDR_CADR_BASE (MV_BASE + 0x1500) @@ -137,20 +120,16 @@ #define MV_INTREGS_BASE (MV_MBUS_BRIDGE_BASE + 0x80) #define MV_MP_CLOCKS_BASE (MV_MBUS_BRIDGE_BASE + 0x700) #define MV_CPU_CONTROL_BASE (MV_MBUS_BRIDGE_BASE + 0x1800) -#elif !defined(SOC_MV_FREY) +#else #define MV_MBUS_BRIDGE_BASE (MV_BASE + 0x20000) #define MV_INTREGS_BASE (MV_MBUS_BRIDGE_BASE + 0x80) #define MV_CPU_CONTROL_BASE (MV_MBUS_BRIDGE_BASE + 0x100) -#else -#define MV_CPU_CONTROL_BASE (MV_BASE + 0x10000) #endif #define MV_PCI_BASE (MV_BASE + 0x30000) #define MV_PCI_SIZE 0x2000 -#if defined(SOC_MV_FREY) -#define MV_PCIE_BASE (MV_BASE + 0x8000) -#elif defined(SOC_MV_ARMADA38X) +#if defined(SOC_MV_ARMADA38X) #define MV_PCIE_BASE (MV_BASE + 0x80000) #else #define MV_PCIE_BASE (MV_BASE + 0x40000) @@ -193,26 +172,12 @@ #endif #define MV_WIN_CPU_ATTR_SHIFT 8 -#if defined(SOC_MV_LOKIPLUS) -#define MV_WIN_CPU_TARGET_SHIFT 0 -#define MV_WIN_CPU_ENABLE_BIT (1 << 5) -#else #define MV_WIN_CPU_TARGET_SHIFT 4 #define MV_WIN_CPU_ENABLE_BIT 1 -#endif -#if defined(SOC_MV_DOVE) -#define MV_WIN_DDR_MAX 2 -#else /* SOC_MV_DOVE */ -#if defined(SOC_MV_LOKIPLUS) -#define MV_WIN_DDR_BASE(n) (0xc * (n) + 0x4) -#define MV_WIN_DDR_SIZE(n) (0xc * (n) + 0x0) -#else /* SOC_MV_LOKIPLUS */ #define MV_WIN_DDR_BASE(n) (0x8 * (n) + 0x0) #define MV_WIN_DDR_SIZE(n) (0x8 * (n) + 0x4) -#endif /* SOC_MV_LOKIPLUS */ #define MV_WIN_DDR_MAX 4 -#endif /* SOC_MV_DOVE */ /* * These values are valid only for peripherals decoding windows @@ -251,14 +216,22 @@ #define MV_WIN_CESA_ATTR(eng_sel) 0 #endif +/* CESA TDMA address decoding registers */ +#define MV_WIN_CESA_CTRL(n) (0x8 * (n) + 0xA04) +#define MV_WIN_CESA_BASE(n) (0x8 * (n) + 0xA00) +#define MV_WIN_CESA_MAX 4 + #define MV_WIN_USB_CTRL(n) (0x10 * (n) + 0x320) #define MV_WIN_USB_BASE(n) (0x10 * (n) + 0x324) #define MV_WIN_USB_MAX 4 -#define MV_WIN_USB3_CTRL(n) (0x8 * (n)) -#define MV_WIN_USB3_BASE(n) (0x8 * (n) + 0x4) +#define MV_WIN_USB3_CTRL(n) (0x8 * (n) + 0x4000) +#define MV_WIN_USB3_BASE(n) (0x8 * (n) + 0x4004) #define MV_WIN_USB3_MAX 8 +#define MV_WIN_NETA_OFFSET 0x2000 +#define MV_WIN_NETA_BASE(n) MV_WIN_ETH_BASE(n) + MV_WIN_NETA_OFFSET + #define MV_WIN_ETH_BASE(n) (0x8 * (n) + 0x200) #define MV_WIN_ETH_SIZE(n) (0x8 * (n) + 0x204) #define MV_WIN_ETH_REMAP(n) (0x4 * (n) + 0x280) @@ -280,7 +253,7 @@ #define MV_XOR_CHAN_MAX 2 #define MV_XOR_NON_REMAP 4 -#if defined(SOC_MV_DISCOVERY) || defined(SOC_MV_KIRKWOOD) || defined(SOC_MV_DOVE) +#if defined(SOC_MV_DISCOVERY) || defined(SOC_MV_KIRKWOOD) #define MV_WIN_PCIE_TARGET(n) 4 #define MV_WIN_PCIE_MEM_ATTR(n) 0xE8 #define MV_WIN_PCIE_IO_ATTR(n) 0xE0 @@ -296,10 +269,6 @@ #define MV_WIN_PCIE_TARGET(n) 4 #define MV_WIN_PCIE_MEM_ATTR(n) 0x59 #define MV_WIN_PCIE_IO_ATTR(n) 0x51 -#elif defined(SOC_MV_LOKIPLUS) -#define MV_WIN_PCIE_TARGET(n) (3 + (n)) -#define MV_WIN_PCIE_MEM_ATTR(n) 0x59 -#define MV_WIN_PCIE_IO_ATTR(n) 0x51 #endif #define MV_WIN_PCI_TARGET 3 @@ -324,27 +293,26 @@ #define MV_PCIE_CONTROL (0x1a00) #define MV_PCIE_ROOT_CMPLX (1 << 1) +#if defined(SOC_MV_ARMADA38X) +#define MV_WIN_SATA_CTRL(n) (0x10 * (n) + 0x60) +#define MV_WIN_SATA_BASE(n) (0x10 * (n) + 0x64) +#define MV_WIN_SATA_SIZE(n) (0x10 * (n) + 0x68) +#define MV_WIN_SATA_MAX 4 +#else #define MV_WIN_SATA_CTRL(n) (0x10 * (n) + 0x30) #define MV_WIN_SATA_BASE(n) (0x10 * (n) + 0x34) #define MV_WIN_SATA_MAX 4 +#endif + +#define MV_WIN_SDHCI_CTRL(n) (0x8 * (n) + 0x4080) +#define MV_WIN_SDHCI_BASE(n) (0x8 * (n) + 0x4084) +#define MV_WIN_SDHCI_MAX 8 #if defined(SOC_MV_ARMADA38X) #define MV_BOOTROM_MEM_ADDR 0xFFF00000 #define MV_BOOTROM_WIN_SIZE 0xF #define MV_CPU_SUBSYS_REGS_LEN 0x100 -/* IO Window Control Register fields */ -#define IO_WIN_SIZE_SHIFT 16 -#define IO_WIN_SIZE_MASK 0xFFFF -#define IO_WIN_ATTR_SHIFT 8 -#define IO_WIN_ATTR_MASK 0xFF -#define IO_WIN_TGT_SHIFT 4 -#define IO_WIN_TGT_MASK 0xF -#define IO_WIN_SYNC_SHIFT 1 -#define IO_WIN_SYNC_MASK 0x1 -#define IO_WIN_ENA_SHIFT 0 -#define IO_WIN_ENA_MASK 0x1 - #define IO_WIN_9_CTRL_OFFSET 0x98 #define IO_WIN_9_BASE_OFFSET 0x9C @@ -357,6 +325,19 @@ #define MV_SYNC_BARRIER_CTRL_ALL 0xFFFF #endif +/* IO Window Control Register fields */ +#define IO_WIN_SIZE_SHIFT 16 +#define IO_WIN_SIZE_MASK 0xFFFF +#define IO_WIN_COH_ATTR_MASK (0xF << 12) +#define IO_WIN_ATTR_SHIFT 8 +#define IO_WIN_ATTR_MASK 0xFF +#define IO_WIN_TGT_SHIFT 4 +#define IO_WIN_TGT_MASK 0xF +#define IO_WIN_SYNC_SHIFT 1 +#define IO_WIN_SYNC_MASK 0x1 +#define IO_WIN_ENA_SHIFT 0 +#define IO_WIN_ENA_MASK 0x1 + #define WIN_REG_IDX_RD(pre,reg,off,base) \ static __inline uint32_t \ pre ## _ ## reg ## _read(int i) \ diff --git a/sys/arm/mv/orion/orion.c b/sys/arm/mv/orion/orion.c index d4c3a16..2d1774e 100644 --- a/sys/arm/mv/orion/orion.c +++ b/sys/arm/mv/orion/orion.c @@ -100,3 +100,10 @@ get_tclk(void) panic("Unknown TCLK settings!"); } } + +uint32_t +get_cpu_freq(void) +{ + + return (0); +} diff --git a/sys/arm/rockchip/rk30xx_gpio.c b/sys/arm/rockchip/rk30xx_gpio.c index 60dd57b..75bfd00 100644 --- a/sys/arm/rockchip/rk30xx_gpio.c +++ b/sys/arm/rockchip/rk30xx_gpio.c @@ -612,7 +612,7 @@ rk30_gpio_init(void) */ ctrl = OF_node_from_xref(gpios[0]); - if (fdt_is_compatible(ctrl, e->compat)) + if (ofw_bus_node_is_compatible(ctrl, e->compat)) /* Call a handler. */ if ((rv = e->handler(ctrl, (pcell_t *)&gpios, len))) diff --git a/sys/arm/samsung/exynos/exynos5_spi.c b/sys/arm/samsung/exynos/exynos5_spi.c index 2a574b8..de25b42 100644 --- a/sys/arm/samsung/exynos/exynos5_spi.c +++ b/sys/arm/samsung/exynos/exynos5_spi.c @@ -204,6 +204,8 @@ spi_transfer(device_t dev, device_t child, struct spi_command *cmd) /* get the proper chip select */ spibus_get_cs(child, &cs); + cs &= ~SPIBUS_CS_HIGH; + /* Command */ spi_txrx(sc, cmd->tx_cmd, cmd->rx_cmd, cmd->tx_cmd_sz, cs); diff --git a/sys/arm/ti/aintc.c b/sys/arm/ti/aintc.c index ae5bad6..522276b 100644 --- a/sys/arm/ti/aintc.c +++ b/sys/arm/ti/aintc.c @@ -136,8 +136,9 @@ ti_aintc_intr(void *arg) /* Get active interrupt */ irq = aintc_read_4(sc, INTC_SIR_IRQ); if ((irq & INTC_SIR_SPURIOUS_MASK) != 0) { - device_printf(sc->sc_dev, - "Spurious interrupt detected (0x%08x)\n", irq); + if (bootverbose) + device_printf(sc->sc_dev, + "Spurious interrupt detected (0x%08x)\n", irq); ti_aintc_irq_eoi(sc); return (FILTER_HANDLED); } @@ -351,8 +352,9 @@ arm_get_next_irq(int last_irq) /* Check for spurious interrupt */ if ((active_irq & 0xffffff80)) { - device_printf(sc->sc_dev, - "Spurious interrupt detected (0x%08x)\n", active_irq); + if (bootverbose) + device_printf(sc->sc_dev, + "Spurious interrupt detected (0x%08x)\n", active_irq); aintc_write_4(sc, INTC_SIR_IRQ, 0); return -1; } diff --git a/sys/arm/ti/am335x/am335x_ecap.c b/sys/arm/ti/am335x/am335x_ecap.c index eba5f72..ea05a70 100644 --- a/sys/arm/ti/am335x/am335x_ecap.c +++ b/sys/arm/ti/am335x/am335x_ecap.c @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD: head/sys/arm/ti/am335x/am335x_ecap.c 283276 2015-05-22 03:16:18Z gonzo $"); #include <sys/param.h> #include <sys/systm.h> @@ -53,10 +53,27 @@ __FBSDID("$FreeBSD$"); #define ECAP_CAP2 0x0C #define ECAP_CAP3 0x10 #define ECAP_CAP4 0x14 +#define ECAP_ECCTL1 0x28 +#define ECCTL1_CAPLDEN (1 << 8) +#define ECCTL1_CTRRST4 (1 << 7) +#define ECCTL1_CTRRST3 (1 << 5) +#define ECCTL1_CTRRST2 (1 << 3) +#define ECCTL1_CTRRST1 (1 << 1) #define ECAP_ECCTL2 0x2A #define ECCTL2_MODE_APWM (1 << 9) #define ECCTL2_SYNCO_SEL (3 << 6) #define ECCTL2_TSCTRSTOP_FREERUN (1 << 4) +#define ECCTL2_REARM (1 << 3) +#define ECCTL2_STOP_WRAP_EVENT1 (0 << 1) +#define ECCTL2_STOP_WRAP_EVENT2 (1 << 1) +#define ECCTL2_STOP_WRAP_EVENT3 (2 << 1) +#define ECCTL2_STOP_WRAP_EVENT4 (3 << 1) +#define ECCTL2_CONT_ONESHT (1 << 0) +#define ECAP_ECEINT 0x2C +#define ECEINT_CEVT4 (1 << 4) +#define ECAP_ECFLG 0x2E +#define ECAP_ECCLR 0x30 +#define ECCLR_MASK 0xff #define ECAP_READ2(_sc, reg) bus_read_2((_sc)->sc_mem_res, reg); #define ECAP_WRITE2(_sc, reg, value) \ @@ -78,8 +95,13 @@ static device_detach_t am335x_ecap_detach; struct am335x_ecap_softc { device_t sc_dev; struct mtx sc_mtx; + struct resource *sc_irq_res; struct resource *sc_mem_res; + int sc_ecap_mode; + int sc_irq_rid; int sc_mem_rid; + uint32_t sc_period; + void *sc_intrhand; }; static device_method_t am335x_ecap_methods[] = { @@ -119,6 +141,9 @@ am335x_pwm_config_ecap(int unit, int period, int duty) return (EINVAL); sc = device_get_softc(dev); + if (sc->sc_ecap_mode) + return (EINVAL); + PWM_LOCK(sc); reg = ECAP_READ2(sc, ECAP_ECCTL2); @@ -138,6 +163,31 @@ am335x_pwm_config_ecap(int unit, int period, int duty) return (0); } +static void +am335x_ecap_intr(void *arg) +{ + struct am335x_ecap_softc *sc; + uint16_t reg; + uint64_t v; + + sc = (struct am335x_ecap_softc *)arg; + PWM_LOCK(sc); + v = ECAP_READ4(sc, ECAP_CAP1); + v += ECAP_READ4(sc, ECAP_CAP2); + v += ECAP_READ4(sc, ECAP_CAP3); + v += ECAP_READ4(sc, ECAP_CAP4); + v /= 4; + sc->sc_period = (uint32_t)v; + + reg = ECAP_READ2(sc, ECAP_ECFLG); + ECAP_WRITE2(sc, ECAP_ECCLR, ECCLR_MASK); + + reg = ECAP_READ2(sc, ECAP_ECCTL2); + reg |= ECCTL2_REARM; + ECAP_WRITE2(sc, ECAP_ECCTL2, reg); + PWM_UNLOCK(sc); +} + static int am335x_ecap_probe(device_t dev) { @@ -157,9 +207,11 @@ static int am335x_ecap_attach(device_t dev) { struct am335x_ecap_softc *sc; + uint16_t reg; sc = device_get_softc(dev); sc->sc_dev = dev; + sc->sc_ecap_mode = 1; PWM_LOCK_INIT(sc); @@ -169,6 +221,45 @@ am335x_ecap_attach(device_t dev) device_printf(dev, "cannot allocate memory resources\n"); goto fail; } + sc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, + &sc->sc_irq_rid, RF_ACTIVE); + if (sc->sc_irq_res == NULL) { + bus_release_resource(dev, SYS_RES_MEMORY, sc->sc_mem_rid, + sc->sc_mem_res); + device_printf(dev, "cannot allocate interrupt\n"); + return (ENXIO); + } + + /* Hook up our interrupt handler. */ + if (bus_setup_intr(dev, sc->sc_irq_res, INTR_TYPE_MISC | INTR_MPSAFE, + NULL, am335x_ecap_intr, sc, &sc->sc_intrhand)) { + bus_release_resource(dev, SYS_RES_IRQ, sc->sc_irq_rid, + sc->sc_irq_res); + bus_release_resource(dev, SYS_RES_MEMORY, sc->sc_mem_rid, + sc->sc_mem_res); + device_printf(dev, "cannot setup the interrupt handler\n"); + return (ENXIO); + } + + SYSCTL_ADD_INT(device_get_sysctl_ctx(sc->sc_dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(sc->sc_dev)), + OID_AUTO, "period", CTLFLAG_RD, &sc->sc_period, 0, "eCMP period"); + + /* One shot, wrap counter after read event 4, no sync, stopped. */ + ECAP_WRITE2(sc, ECAP_ECCTL2, ECCTL2_SYNCO_SEL | + ECCTL2_STOP_WRAP_EVENT4 | ECCTL2_CONT_ONESHT); + /* Delta mode, rising edge. */ + ECAP_WRITE2(sc, ECAP_ECCTL1, ECCTL1_CAPLDEN | ECCTL1_CTRRST1 | + ECCTL1_CTRRST2 | ECCTL1_CTRRST3 | ECCTL1_CTRRST4); + /* Restart counter */ + ECAP_WRITE4(sc, ECAP_TSCTR, 0); + /* Enable overflow interrupt. */ + ECAP_WRITE2(sc, ECAP_ECCLR, ECCLR_MASK); + ECAP_WRITE2(sc, ECAP_ECEINT, ECEINT_CEVT4); + /* Start count. */ + reg = ECAP_READ2(sc, ECAP_ECCTL2); + reg |= ECCTL2_TSCTRSTOP_FREERUN; + ECAP_WRITE2(sc, ECAP_ECCTL2, reg); return (0); @@ -185,6 +276,13 @@ am335x_ecap_detach(device_t dev) sc = device_get_softc(dev); PWM_LOCK(sc); + ECAP_WRITE2(sc, ECAP_ECEINT, 0); + ECAP_WRITE2(sc, ECAP_ECCLR, ECCLR_MASK); + if (sc->sc_intrhand) + bus_teardown_intr(dev, sc->sc_irq_res, sc->sc_intrhand); + if (sc->sc_irq_res) + bus_release_resource(dev, SYS_RES_IRQ, + sc->sc_irq_rid, sc->sc_irq_res); if (sc->sc_mem_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->sc_mem_rid, sc->sc_mem_res); diff --git a/sys/arm/ti/am335x/am335x_lcd_syscons.c b/sys/arm/ti/am335x/am335x_lcd_syscons.c index e8ab873..f3bbe43 100644 --- a/sys/arm/ti/am335x/am335x_lcd_syscons.c +++ b/sys/arm/ti/am335x/am335x_lcd_syscons.c @@ -351,7 +351,7 @@ am335x_syscons_find_panel_node(phandle_t start) phandle_t result; for (child = OF_child(start); child != 0; child = OF_peer(child)) { - if (fdt_is_compatible(child, "ti,am335x-lcd")) + if (ofw_bus_node_is_compatible(child, "ti,am335x-lcd")) return (child); if ((result = am335x_syscons_find_panel_node(child))) return (result); diff --git a/sys/arm/ti/cpsw/if_cpsw.c b/sys/arm/ti/cpsw/if_cpsw.c index 2e722e1..a887561 100644 --- a/sys/arm/ti/cpsw/if_cpsw.c +++ b/sys/arm/ti/cpsw/if_cpsw.c @@ -92,6 +92,9 @@ __FBSDID("$FreeBSD$"); #include "miibus_if.h" +static int cpsw_ntxd = 128; +TUNABLE_INT("hw.cpsw.txd", &cpsw_ntxd); + /* Device probe/attach/detach. */ static int cpsw_probe(device_t); static int cpsw_attach(device_t); @@ -869,9 +872,11 @@ cpsw_attach(device_t dev) STAILQ_INIT(&sc->rx.active); STAILQ_INIT(&sc->tx.avail); STAILQ_INIT(&sc->tx.active); - // For now: 128 slots to TX, rest to RX. - // XXX TODO: start with 32/64 and grow dynamically based on demand. - if (cpsw_add_slots(sc, &sc->tx, 128) || + if (cpsw_ntxd < 32) + cpsw_ntxd = 32; + if (cpsw_ntxd > 128) + cpsw_ntxd = 128; + if (cpsw_add_slots(sc, &sc->tx, cpsw_ntxd) || cpsw_add_slots(sc, &sc->rx, -1)) { device_printf(dev, "failed to allocate dmamaps\n"); cpsw_detach(dev); @@ -1135,7 +1140,7 @@ cpswp_init_locked(void *arg) #endif struct cpswp_softc *sc = arg; struct ifnet *ifp; - uint32_t reg; + uint32_t ports, reg; CPSW_DEBUGF(sc->swsc, ("")); CPSW_PORT_LOCK_ASSERT(sc); @@ -1172,10 +1177,13 @@ cpswp_init_locked(void *arg) /* Set Port VID. */ cpsw_write_4(sc->swsc, CPSW_PORT_P_VLAN(sc->unit + 1), sc->vlan & 0xfff); + /* Default to port + CPU port. */ + ports = (1 << (sc->unit + 1)) | (1 << 0); cpsw_ale_update_vlan_table(sc->swsc, sc->vlan, - (1 << (sc->unit + 1)) | (1 << 0), /* Member list */ - (1 << (sc->unit + 1)) | (1 << 0), /* Untagged egress */ - (1 << (sc->unit + 1)) | (1 << 0), 0); /* mcast reg flood */ + ports, /* Member list */ + ports, /* Untagged egress */ + ports, /* Mcast reg flood */ + (sc->swsc->allmulti ? ports : 0)); /* Mcast unreg flood */ #ifdef CPSW_ETHERSWITCH for (i = 0; i < CPSW_VLANS; i++) { if (cpsw_vgroups[i].vid != -1) @@ -1366,9 +1374,8 @@ cpsw_set_promisc(struct cpswp_softc *sc, int set) static void cpsw_set_allmulti(struct cpswp_softc *sc, int set) { - if (set) { - printf("All-multicast mode unimplemented\n"); - } + sc->swsc->allmulti = (set) ? true : false; + cpswp_ale_update_addresses(sc, 1); } static int @@ -1832,7 +1839,7 @@ cpswp_tx_enqueue(struct cpswp_softc *sc) first_new_slot = NULL; last_old_slot = STAILQ_LAST(&sc->swsc->tx.active, cpsw_slot, next); while ((slot = STAILQ_FIRST(&sc->swsc->tx.avail)) != NULL) { - IF_DEQUEUE(&sc->ifp->if_snd, m0); + IFQ_DEQUEUE(&sc->ifp->if_snd, m0); if (m0 == NULL) break; @@ -2949,7 +2956,8 @@ cpsw_setvgroup(device_t dev, etherswitch_vlangroup_t *vg) cpsw_vgroups[vg->es_vlangroup].vid = vg->es_vid; cpsw_ale_update_vlan_table(sc, vg->es_vid, vg->es_member_ports, - vg->es_untagged_ports, vg->es_member_ports, 0); + vg->es_untagged_ports, vg->es_member_ports, + (sc->allmulti ? vg->es_member_ports : 0)); return (0); } @@ -2973,16 +2981,36 @@ cpsw_writereg(device_t dev, int addr, int value) static int cpsw_readphy(device_t dev, int phy, int reg) { + int i; + struct cpsw_softc *sc; - /* Not supported. */ - return (0); + sc = device_get_softc(dev); + if (!sc->dualemac) + return (0); + for (i = 0; i < CPSW_PORTS; i++) + if (sc->port[i].phy == phy) + break; + if (i >= CPSW_PORTS) + return (0); + + return (cpswp_miibus_readreg(sc->port[i].dev, phy, reg)); } static int cpsw_writephy(device_t dev, int phy, int reg, int data) { + int i; + struct cpsw_softc *sc; - /* Not supported. */ - return (0); + sc = device_get_softc(dev); + if (!sc->dualemac) + return (0); + for (i = 0; i < CPSW_PORTS; i++) + if (sc->port[i].phy == phy) + break; + if (i >= CPSW_PORTS) + return (0); + + return (cpswp_miibus_writereg(sc->port[i].dev, phy, reg, data)); } #endif diff --git a/sys/arm/ti/cpsw/if_cpswvar.h b/sys/arm/ti/cpsw/if_cpswvar.h index 003af22..1333565 100644 --- a/sys/arm/ti/cpsw/if_cpswvar.h +++ b/sys/arm/ti/cpsw/if_cpswvar.h @@ -88,7 +88,8 @@ struct cpsw_softc { device_t dev; int active_slave; int debug; - int dualemac; + bool dualemac; + bool allmulti; phandle_t node; struct bintime attach_uptime; /* system uptime when attach happened. */ struct cpsw_port port[2]; diff --git a/sys/arm/ti/ti_cpuid.c b/sys/arm/ti/ti_cpuid.c index 7048efe..d7cefd1 100644 --- a/sys/arm/ti/ti_cpuid.c +++ b/sys/arm/ti/ti_cpuid.c @@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$"); #include <sys/rman.h> #include <sys/lock.h> #include <sys/mutex.h> +#include <sys/sysctl.h> #include <machine/bus.h> #include <machine/fdt.h> @@ -60,6 +61,10 @@ __FBSDID("$FreeBSD$"); #define OMAP3_ID_CODE 0xA204 static uint32_t chip_revision = 0xffffffff; +static char ti_soc_model[128]; + +SYSCTL_STRING(_hw, OID_AUTO, ti_soc_model, CTLFLAG_RD | CTLFLAG_MPSAFE, + ti_soc_model, 0, "TI SoC model"); /** * ti_revision - Returns the revision number of the device @@ -230,8 +235,11 @@ am335x_get_revision(void) cpu_last_char='x'; } - printf("Texas Instruments AM335%c Processor, Revision ES1.%u\n", + memset(ti_soc_model, 0, sizeof(ti_soc_model)); + snprintf(ti_soc_model, sizeof(ti_soc_model) - 1, + "Texas Instruments AM335%c Processor, Revision ES1.%u", cpu_last_char, AM335X_DEVREV(chip_revision)); + printf("%s\n", ti_soc_model); } /** diff --git a/sys/arm/ti/ti_spi.c b/sys/arm/ti/ti_spi.c index 68805a8..199bc06 100644 --- a/sys/arm/ti/ti_spi.c +++ b/sys/arm/ti/ti_spi.c @@ -458,6 +458,9 @@ ti_spi_transfer(device_t dev, device_t child, struct spi_command *cmd) /* Get the proper chip select for this child. */ spibus_get_cs(child, &cs); + + cs &= ~SPIBUS_CS_HIGH; + if (cs > sc->sc_numcs) { device_printf(dev, "Invalid chip select %d requested by %s\n", cs, device_get_nameunit(child)); diff --git a/sys/arm/versatile/versatile_common.c b/sys/arm/versatile/versatile_common.c index 983b3ce..54d6192 100644 --- a/sys/arm/versatile/versatile_common.c +++ b/sys/arm/versatile/versatile_common.c @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include <dev/fdt/fdt_common.h> #include <dev/ofw/openfirm.h> +#include <dev/ofw/ofw_bus_subr.h> #include <machine/bus.h> #include <machine/vmparam.h> @@ -52,7 +53,7 @@ fdt_intc_decode_ic(phandle_t node, pcell_t *intr, int *interrupt, int *trig, int *pol) { - if (!fdt_is_compatible(node, "arm,versatile-vic")) + if (!ofw_bus_node_is_compatible(node, "arm,versatile-vic")) return (ENXIO); *interrupt = fdt32_to_cpu(intr[0]); diff --git a/sys/arm64/arm64/trap.c b/sys/arm64/arm64/trap.c index d7209de1..bb9fe75 100644 --- a/sys/arm64/arm64/trap.c +++ b/sys/arm64/arm64/trap.c @@ -92,15 +92,17 @@ call_trapsignal(struct thread *td, int sig, int code, void *addr) } int -cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +cpu_fetch_syscall_args(struct thread *td) { struct proc *p; register_t *ap; + struct syscall_args *sa; int nap; nap = 8; p = td->td_proc; ap = td->td_frame->tf_x; + sa = &td->td_sa; sa->code = td->td_frame->tf_x[8]; @@ -132,12 +134,11 @@ cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) static void svc_handler(struct thread *td, struct trapframe *frame) { - struct syscall_args sa; int error; if ((frame->tf_esr & ESR_ELx_ISS_MASK) == 0) { - error = syscallenter(td, &sa); - syscallret(td, error, &sa); + error = syscallenter(td); + syscallret(td, error); } else { call_trapsignal(td, SIGILL, ILL_ILLOPN, (void *)frame->tf_elr); userret(td, frame); diff --git a/sys/arm64/arm64/uma_machdep.c b/sys/arm64/arm64/uma_machdep.c index 9b9df5c..34dc675 100644 --- a/sys/arm64/arm64/uma_machdep.c +++ b/sys/arm64/arm64/uma_machdep.c @@ -46,20 +46,12 @@ uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) vm_page_t m; vm_paddr_t pa; void *va; - int pflags; *flags = UMA_SLAB_PRIV; - pflags = malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; - for (;;) { - m = vm_page_alloc(NULL, 0, pflags); - if (m == NULL) { - if (wait & M_NOWAIT) - return (NULL); - else - VM_WAIT; - } else - break; - } + m = vm_page_alloc(NULL, 0, + malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); + if (m == NULL) + return (NULL); pa = m->phys_addr; if ((wait & M_NODUMP) == 0) dump_add_page(pa); diff --git a/sys/arm64/cloudabi64/cloudabi64_sysvec.c b/sys/arm64/cloudabi64/cloudabi64_sysvec.c index fbad0ba..f6e75f5 100644 --- a/sys/arm64/cloudabi64/cloudabi64_sysvec.c +++ b/sys/arm64/cloudabi64/cloudabi64_sysvec.c @@ -67,11 +67,15 @@ cloudabi64_proc_setregs(struct thread *td, struct image_params *imgp, } static int -cloudabi64_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +cloudabi64_fetch_syscall_args(struct thread *td) { - struct trapframe *frame = td->td_frame; + struct trapframe *frame; + struct syscall_args *sa; int i; + frame = td->td_frame; + sa = &td->td_sa; + /* Obtain system call number. */ sa->code = frame->tf_x[8]; if (sa->code >= CLOUDABI64_SYS_MAXSYSCALL) diff --git a/sys/arm64/conf/GENERIC b/sys/arm64/conf/GENERIC index 986d726..6bedef2 100644 --- a/sys/arm64/conf/GENERIC +++ b/sys/arm64/conf/GENERIC @@ -29,6 +29,7 @@ options PREEMPTION # Enable kernel thread preemption options INET # InterNETworking options INET6 # IPv6 communications protocols options IPSEC # IP (v4/v6) security +options IPSEC_SUPPORT # Allow kldload of ipsec and tcpmd5 options TCP_OFFLOAD # TCP offload options SCTP # Stream Control Transmission Protocol options FFS # Berkeley Fast Filesystem diff --git a/sys/arm64/include/proc.h b/sys/arm64/include/proc.h index 94260f7..86dedbf 100644 --- a/sys/arm64/include/proc.h +++ b/sys/arm64/include/proc.h @@ -45,8 +45,6 @@ struct mdproc { #define KINFO_PROC_SIZE 1088 -#ifdef _KERNEL - #define MAXARGS 8 struct syscall_args { u_int code; @@ -55,6 +53,4 @@ struct syscall_args { int narg; }; -#endif - #endif /* !_MACHINE_PROC_H_ */ diff --git a/sys/boot/arm/uboot/start.S b/sys/boot/arm/uboot/start.S index 1ef21f3..3f7764f 100644 --- a/sys/boot/arm/uboot/start.S +++ b/sys/boot/arm/uboot/start.S @@ -45,6 +45,13 @@ _start: orr ip, ip, #(CPU_CONTROL_AFLT_ENABLE) mcr p15, 0, ip, c1, c0, 0 #endif + + /* + * Save r0 and r1 (argc and argv passed from u-boot), and lr (trashed + * by the call to self_reloc below) until we're ready to call main(). + */ + push {r0, r1, lr} + /* * Do self-relocation when the weak external symbol _DYNAMIC is non-NULL. * When linked as a dynamic relocatable file, the linker automatically @@ -71,9 +78,11 @@ _start: str r9, [ip, #4] /* + * First restore argc, argv, and the u-boot return address, then * Start loader. This is basically a tail-recursion call; if main() * returns, it returns to u-boot (which reports the value returned r0). */ + pop {r0, r1, lr} b main /* diff --git a/sys/boot/common/part.c b/sys/boot/common/part.c index 51cceca5b..f77897e 100644 --- a/sys/boot/common/part.c +++ b/sys/boot/common/part.c @@ -486,7 +486,7 @@ ptable_bsdread(struct ptable *table, void *dev, diskread_t dread) break; entry->part.start = le32toh(part->p_offset) - raw_offset; entry->part.end = entry->part.start + - le32toh(part->p_size) + 1; + le32toh(part->p_size) - 1; entry->part.type = bsd_parttype(part->p_fstype); entry->part.index = i; /* starts from zero */ entry->type.bsd = part->p_fstype; diff --git a/sys/boot/fdt/dts/arm/armada-380.dtsi b/sys/boot/fdt/dts/arm/armada-380.dtsi index 876bae4..20a236a 100644 --- a/sys/boot/fdt/dts/arm/armada-380.dtsi +++ b/sys/boot/fdt/dts/arm/armada-380.dtsi @@ -88,26 +88,29 @@ <0x82000000 0 0x80000 MBUS_ID(0xf0, 0x01) 0x80000 0 0x00002000 0x82000000 0 0x40000 MBUS_ID(0xf0, 0x01) 0x40000 0 0x00002000 0x82000000 0 0x44000 MBUS_ID(0xf0, 0x01) 0x44000 0 0x00002000 - 0x82000000 0 0x48000 MBUS_ID(0xf0, 0x01) 0x48000 0 0x00002000 - 0x82000000 0x1 0 MBUS_ID(0x08, 0xe8) 0 1 0 /* Port 0 MEM */ - 0x81000000 0x1 0 MBUS_ID(0x08, 0xe0) 0 1 0 /* Port 0 IO */ - 0x82000000 0x2 0 MBUS_ID(0x04, 0xe8) 0 1 0 /* Port 1 MEM */ - 0x81000000 0x2 0 MBUS_ID(0x04, 0xe0) 0 1 0 /* Port 1 IO */ - 0x82000000 0x3 0 MBUS_ID(0x04, 0xd8) 0 1 0 /* Port 2 MEM */ - 0x81000000 0x3 0 MBUS_ID(0x04, 0xd0) 0 1 0 /* Port 2 IO */>; + 0x82000000 0x0 0xf1200000 MBUS_ID(0x08, 0xe8) 0xf1200000 0 0x00100000 /* Port 0 MEM */ + 0x81000000 0x0 0xf1300000 MBUS_ID(0x08, 0xe0) 0xf1300000 0 0x00100000 /* Port 0 IO */ + 0x82000000 0x0 0xf1400000 MBUS_ID(0x04, 0xe8) 0xf1400000 0 0x00100000 /* Port 1 MEM */ + 0x81000000 0x0 0xf1500000 MBUS_ID(0x04, 0xe0) 0xf1500000 0 0x00100000 /* Port 1 IO */ + 0x82000000 0x0 0xf1600000 MBUS_ID(0x04, 0xd8) 0xf1600000 0 0x00100000 /* Port 2 MEM */ + 0x81000000 0x0 0xf1700000 MBUS_ID(0x04, 0xd0) 0xf1700000 0 0x00100000 /* Port 2 IO */ + >; /* x1 port */ pcie@1,0 { + compatible = "mrvl,pcie"; device_type = "pci"; assigned-addresses = <0x82000800 0 0x80000 0 0x2000>; - reg = <0x0800 0 0 0 0>; + reg = <0x0 0x0 0x80000 0x0 0x2000>; #address-cells = <3>; #size-cells = <2>; - #interrupt-cells = <1>; - ranges = <0x82000000 0 0 0x82000000 0x1 0 1 0 - 0x81000000 0 0 0x81000000 0x1 0 1 0>; + #interrupt-cells = <3>; + bus-range = <0 255>; + ranges = <0x82000000 0x0 0x0 0x82000000 0x0 0xf1200000 0x0 0x00100000 + 0x81000000 0x0 0x0 0x81000000 0x0 0xf1300000 0x0 0x00100000>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &gic GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&gic>; marvell,pcie-port = <0>; marvell,pcie-lane = <0>; clocks = <&gateclk 8>; @@ -116,16 +119,19 @@ /* x1 port */ pcie@2,0 { + compatible = "mrvl,pcie"; device_type = "pci"; assigned-addresses = <0x82000800 0 0x40000 0 0x2000>; - reg = <0x1000 0 0 0 0>; + reg = <0x0 0x0 0x40000 0x0 0x2000>; #address-cells = <3>; #size-cells = <2>; - #interrupt-cells = <1>; - ranges = <0x82000000 0 0 0x82000000 0x2 0 1 0 - 0x81000000 0 0 0x81000000 0x2 0 1 0>; + #interrupt-cells = <3>; + bus-range = <0 255>; + ranges = <0x82000000 0x0 0x0 0x82000000 0x0 0xf1400000 0x0 0x00100000 + 0x81000000 0x0 0x0 0x81000000 0x0 0xf1500000 0x0 0x00100000>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &gic GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&gic>; marvell,pcie-port = <1>; marvell,pcie-lane = <0>; clocks = <&gateclk 5>; @@ -134,16 +140,19 @@ /* x1 port */ pcie@3,0 { + compatible = "mrvl,pcie"; device_type = "pci"; assigned-addresses = <0x82000800 0 0x44000 0 0x2000>; - reg = <0x1800 0 0 0 0>; + reg = <0x0 0x0 0x44000 0x0 0x2000>; #address-cells = <3>; #size-cells = <2>; - #interrupt-cells = <1>; - ranges = <0x82000000 0 0 0x82000000 0x3 0 1 0 - 0x81000000 0 0 0x81000000 0x3 0 1 0>; + #interrupt-cells = <3>; + bus-range = <0 255>; + ranges = <0x82000000 0x0 0x0 0x82000000 0x0 0xf1600000 0x0 0x00100000 + 0x81000000 0x0 0x0 0x81000000 0x0 0xf1700000 0x0 0x00100000>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &gic GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&gic>; marvell,pcie-port = <2>; marvell,pcie-lane = <0>; clocks = <&gateclk 6>; diff --git a/sys/boot/fdt/dts/arm/armada-385-80300-0148-G00-X100.dts b/sys/boot/fdt/dts/arm/armada-385-80300-0148-G00-X100.dts new file mode 100644 index 0000000..3406a7e --- /dev/null +++ b/sys/boot/fdt/dts/arm/armada-385-80300-0148-G00-X100.dts @@ -0,0 +1,453 @@ +/* + * Device Tree file for Marvell Armada 385 Access Point Development board + * (DB-88F6820-AP) + * + * Copyright (C) 2014 Marvell + * + * Nadav Haklai <nadavh@marvell.com> + * + * This file is dual-licensed: you can use it either under the terms + * of the GPL or the X11 license, at your option. Note that this dual + * licensing only applies to this file, and not this project as a + * whole. + * + * a) This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without + * any warranty of any kind, whether express or implied. + * + * Or, alternatively, + * + * b) Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/dts-v1/; +#include "armada-385.dtsi" + +/* For Rogue-1 SERDES layout is + * + * | PHY Lane | Host Controller | Rate (Gbps) | + * | SRD0 | SATA 0 | 3.0 | + * | SRD1 | PCIe0 RC | 5.0 | + * | SRD2 | SATA 1 | 3.0 | + * | SRD3 | SGMII 2 | 3.125 | + * | SRD4 | PCIe1 RC | 5.0 | + * | SRD5 | USB3 1 | 5.0 | + */ + +#include <dt-bindings/gpio/gpio.h> + +/ { + model = "Netgate Rogue-1"; + compatible = "marvell,a385-db-ap", "marvell,armada385", "marvell,armada380"; + + chosen { + stdout-path = "serial0:115200n8"; + }; + + /* This needs to be defined per variant depending on memory population */ + memory { + device_type = "memory"; + reg = <0x00000000 0x40000000>; /* 1GB */ + }; + + soc { + /* range definitions from armada-38x.dtsi + * <0 MBUS_ID(0x01, 0x2f) 0 0xffffffff> devbus-bootcs + * <0 MBUS_ID(0x01, 0x3e) 0 0xffffffff> devbus-cs0 + * <0 MBUS_ID(0x01, 0x3d) 0 0xffffffff> devbus-cs1 + * <0 MBUS_ID(0x01, 0x3b) 0 0xffffffff> devbus-cs2 + * <0 MBUS_ID(0x01, 0x37) 0 0xffffffff> devbus-cs3 + * <0 MBUS_ID(0xf0, 0x01) 0 0x100000> internal-regs @ 0xf100_0000 + * <0 MBUS_ID(0x09, 0x19) 0 0x800> crypto-sram0 + * <0 MBUS_ID(0x09, 0x15) 0 0x800> crypto-sram1 + * <0 MBUS_ID(0x0c, 0x04) 0 0x100000> bm_bppi + */ + ranges = <MBUS_ID(0xf0, 0x01) 0 0xf1000000 0x100000 + MBUS_ID(0x01, 0x1d) 0 0xfff00000 0x100000 + MBUS_ID(0x09, 0x19) 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x15) 0 0xf1110000 0x10000 + MBUS_ID(0x0c, 0x04) 0 0xf1200000 0x100000>; + + internal-regs { + /* Rogue one uses the following pin mapping: + * MPP[22] SPI0_MOSI + * MPP[23] SPI0_SCK + * MPP[24] SPI0_MISO + * MPP[25] SPI0_CSn[0] + * MPP[26] SPI0_CSn[1] + */ + spi0: spi@10600 { + pinctrl-names = "default"; + /* TODO - Double check pin mappings */ + pinctrl-0 = <&spi0_pins>; + status = "okay"; + + spi-flash@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "st,m25p128", "jedec,spi-nor"; + reg = <0>; /* Chip select 0 */ + spi-max-frequency = <54000000>; + }; + }; + + /* Rogue one uses the following pin mapping: + * MPP[2] I2C0_SCK + * MPP[3] I2C0_SDA + */ + i2c0: i2c@11000 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c0_pins>; + status = "okay"; + + /* + * This bus is wired to two EEPROM + * sockets, one of which holding the + * board ID used by the bootloader. + * Erasing this EEPROM's content will + * brick the board. + * Use this bus with caution. + */ + + pca95520 { + compatible = "nxp,pca9552"; + gpio-controller; + #gpio-cells = <2>; + reg = <0x60>; + status = "okay"; + }; + + is31fl3199 { + compatible = "issi,is31fl3199"; + gpio-controller; + #gpio-cells = <2>; + reg = <0x67>; + status = "okay"; + }; + }; + + /* TODO - Add definitions for the I2C LED/GPIO parts */ + + /* Rogue one uses the following pin mapping: + * MPP[4] SMI_MDC + * MPP[5] SMI_MDIO + */ + mdio@72004 { + pinctrl-names = "default"; + pinctrl-0 = <&mdio_pins>; + + /* Marvell 1510 @ address 0x00 */ + phy0: ethernet-phy@0 { + reg = <0>; + }; + + /* Marvell 1510 @ address 0x01 */ + phy1: ethernet-phy@1 { + reg = <1>; + }; + + /* Marvell 88E6141 @ address 0x10-x01f, + * see the DSA entry below for switch setup + */ + }; + + /* UART0 is exposed through miniUSB socket */ + /* Rogue one uses the following pin mapping: + * MPP[0] UA0_RXD + * MPP[1] UA0_TXD + */ + uart0: serial@12000 { + pinctrl-names = "default"; + pinctrl-0 = <&uart0_pins>; + status = "okay"; + }; + + /* UART1 is run to Mikrobus & debug headers */ + /* Rogue one uses the following pin mapping: + * MPP[19] UA1_RXD + * MPP[20] UA1_TXD + */ + uart1: serial@12100 { + pinctrl-names = "default"; + pinctrl-0 = <&uart1_pins>; + status = "okay"; + }; + + /* TODO - Any other GPIO that have functions we need + * to export here? (USB OC, resets, etc) + */ + pinctrl@18000 { + xhci1_vbus_pins: xhci1-vbus-pins { + marvell,pins = "mpp33"; + marvell,function = "gpio"; + }; + }; + + /* Ethernet 1/2.5Gbps MAC Port 1 */ + /* Can operate as: SGMII/HS-SGMII/RGMII + * This MAC shares an MBUS unit with MAC 2 + * + * Rogue one uses the following pin mapping: + * MPP[21] GE1_RXD[0] + * MPP[27] GE1_TXCLKOUT + * MPP[28] GE1_TXD[0] + * MPP[29] GE1_TXD[1] + * MPP[30] GE1_TXD[2] + * MPP[31] GE1_TXD[3] + * MPP[32] GE1_TXCTL + * MPP[37] GE1_RXCLK + * MPP[38] GE1_RXD[1] + * MPP[39] GE1_RXD[2] + * MPP[40] GE1_RXD[3] + * MPP[41] GE1_RXCTL + */ + ethernet@30000 { + pinctrl-names = "default"; + + /* + * The Reference Clock 0 is used to + * provide a clock to the PHY + */ + pinctrl-0 = <&ge1_rgmii_pins>; + status = "okay"; + phy = <&phy1>; + phy-mode = "rgmii-id"; + buffer-manager = <&bm>; + bm,pool-long = <1>; + bm,pool-short = <3>; + }; + + /* Ethernet 1/2.5Gbps MAC Port 2 */ + /* Can operate as: SGMII/HS-SGMII/RGMII + * This MAC shares an MBUS unit with MAC 2 + * + * Rogue one uses the following pin mapping: + * SRD3 as HS-SGMII + */ + ethernet@34000 { + status = "okay"; + phy-mode = "sgmii"; + buffer-manager = <&bm>; + bm,pool-long = <2>; + bm,pool-short = <3>; + fixed-link { + speed = <2500>; + full-duplex; + }; + }; + + /* Ethernet 1/2.5Gbps MAC Port 0 + * Can operate as: SGMII/HS-SGMII/RGMII/MII + * This MAC shares an MBUS unit with the SMI controller + */ + ethernet@70000 { + pinctrl-names = "default"; + + /* + * The Reference Clock 0 is used to + * provide a clock to the PHY + */ + pinctrl-0 = <&ge0_rgmii_pins>; + status = "okay"; + phy = <&phy0>; + phy-mode = "rgmii-id"; + buffer-manager = <&bm>; + bm,pool-long = <0>; + bm,pool-short = <3>; + }; + + /* Rogue-1 prototypes connect a SD card to the bottom 3 DAT + * lines, and CD# is run to MPP51 (as GPIO) + * Rogue one uses the following pin mapping: + * MPP[48] GE1_RXD[0] + * MPP[49] GE1_TXCLKOUT + * MPP[50] GE1_TXD[0] + * MPP[52] GE1_TXD[1] + * MPP[53] GE1_TXD[2] + * MPP[54] GE1_TXD[3] + * MPP[55] GE1_TXCTL + * MPP[57] GE1_RXCLK + * MPP[58] GE1_RXD[1] + * MPP[59] GE1_RXD[2] + */ + sdhci@d8000 { + pinctrl-names = "default"; + pinctrl-0 = <&sdhci_pins>; + no-1-8-v; + /* + * A388-GP board v1.5 and higher replace + * hitherto card detection method based on GPIO + * with the one using DAT3 pin. As they are + * incompatible, software-based polling is + * enabled with 'broken-cd' property. For boards + * older than v1.5 it can be replaced with: + * 'cd-gpios = <&expander0 5 GPIO_ACTIVE_LOW>;', + * whereas for the newer ones following can be + * used instead: + * 'dat3-cd;' + * 'cd-inverted;' + */ + dat3-cd; + cd-inverted; + wp-inverted; + bus-width = <8>; + status = "okay"; + }; + + /* SATA links to M.2 connectors J10 & J11, sataX_pins are + * for activity LED MPP muxing to LD17 & LD18 + */ + sata@a8000 { + pinctrl-names = "default"; + pinctrl-0 = <&sata0_pins>, <&sata1_pins>; + status = "okay"; + #address-cells = <1>; + #size-cells = <0>; + + sata0: sata-port@0 { + reg = <0>; + }; + + sata1: sata-port@1 { + reg = <1>; + }; + }; + + /* TODO - check buffer manage offset */ + bm@c8000 { + status = "okay"; + }; + + /* TODO - Any PHY or VBUS setup for EHCI host? */ + usb@58000 { + status = "okay"; + }; + + /* USB 3.0 Device 0 is @ 0x50000 + * USB 3.0 Host 0 is @ 0xf0000 + * USB 3.0 Host 1 is @ 0xf8000 */ + usb3@f8000 { + status = "okay"; + usb-phy = <&usb3_phy>; + }; + + crypto@90000 { + status = "okay"; + }; + + crypto@92000 { + status = "okay"; + }; + }; + + /* TODO - Check buffer manager... */ + bm-bppi { + status = "okay"; + }; + + pcie-controller { + status = "okay"; + + /* + * The three PCIe units are accessible through + * standard mini-PCIe slots on the board. + */ + pcie@1,0 { + /* Port 0, Lane 0 */ + status = "okay"; + }; + + pcie@2,0 { + /* Port 1, Lane 0 */ + status = "okay"; + }; + }; + }; + + /* This is a 88E6141 switch operating in the "Single Chip Mode" + * addressing mode where it claims addresses 0x10-0x1f + */ + dsa@0 { + compatible = "marvell,dsa"; + #address-cells = <2>; + #size-cells = <0>; + + dsa,ethernet = <ð2>; + dsa,mii-bus = <&mdio>; + + switch@0 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0 0>; + + port@1 { + reg = <1>; + label = "lan1"; + vlangroup = <0>; + }; + + port@2 { + reg = <2>; + label = "lan2"; + vlangroup = <0>; + }; + + port@3 { + reg = <3>; + label = "lan3"; + vlangroup = <0>; + }; + + port@4 { + reg = <4>; + label = "lan4"; + vlangroup = <0>; + }; + + port@5 { + reg = <5>; + label = "cpu"; + vlangroup = <0>; + fixed-link { + speed = <2500>; + full-duplex; + }; + }; + }; + }; + + usb3_phy: usb3_phy { + compatible = "usb-nop-xceiv"; + vcc-supply = <®_xhci1_vbus>; + }; + + reg_xhci1_vbus: xhci1-vbus { + compatible = "regulator-fixed"; + pinctrl-names = "default"; + pinctrl-0 = <&xhci1_vbus_pins>; + regulator-name = "xhci1-vbus"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + enable-active-high; + gpio = <&gpio1 1 GPIO_ACTIVE_HIGH>; + }; +}; diff --git a/sys/boot/fdt/dts/arm/armada-385-db-ap.dts b/sys/boot/fdt/dts/arm/armada-385-db-ap.dts new file mode 100644 index 0000000..77bd350 --- /dev/null +++ b/sys/boot/fdt/dts/arm/armada-385-db-ap.dts @@ -0,0 +1,275 @@ +/* + * Device Tree file for Marvell Armada 385 Access Point Development board + * (DB-88F6820-AP) + * + * Copyright (C) 2014 Marvell + * + * Nadav Haklai <nadavh@marvell.com> + * + * This file is dual-licensed: you can use it either under the terms + * of the GPL or the X11 license, at your option. Note that this dual + * licensing only applies to this file, and not this project as a + * whole. + * + * a) This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without + * any warranty of any kind, whether express or implied. + * + * Or, alternatively, + * + * b) Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * $FreeBSD$ + */ + +/dts-v1/; +#include "armada-385.dtsi" + +#include <dt-bindings/gpio/gpio.h> + +/ { + model = "Marvell Armada 385 Access Point Development Board"; + compatible = "marvell,a385-db-ap", "marvell,armada385", "marvell,armada380"; + + chosen { + stdout-path = "serial1"; + }; + + memory { + device_type = "memory"; + reg = <0x00000000 0x80000000>; /* 2GB */ + }; + + soc { + ranges = <MBUS_ID(0xf0, 0x01) 0 0xf1000000 0x100000 + MBUS_ID(0x01, 0x1d) 0 0xfff00000 0x100000 + MBUS_ID(0x09, 0x19) 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x15) 0 0xf1110000 0x10000 + MBUS_ID(0x0c, 0x04) 0 0xf1200000 0x100000>; + + internal-regs { + i2c0: i2c@11000 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c0_pins>; + status = "okay"; + + /* + * This bus is wired to two EEPROM + * sockets, one of which holding the + * board ID used by the bootloader. + * Erasing this EEPROM's content will + * brick the board. + * Use this bus with caution. + */ + }; + + mdio@72004 { + pinctrl-names = "default"; + pinctrl-0 = <&mdio_pins>; + + phy0: ethernet-phy@1 { + reg = <1>; + }; + + phy1: ethernet-phy@4 { + reg = <4>; + }; + + phy2: ethernet-phy@6 { + reg = <6>; + }; + }; + + /* UART0 is exposed through the JP8 connector */ + uart0: serial@12000 { + pinctrl-names = "default"; + pinctrl-0 = <&uart0_pins>; + status = "okay"; + }; + + /* + * UART1 is exposed through a FTDI chip + * wired to the mini-USB connector + */ + uart1: serial@12100 { + pinctrl-names = "default"; + pinctrl-0 = <&uart1_pins>; + status = "okay"; + }; + + pinctrl@18000 { + xhci0_vbus_pins: xhci0-vbus-pins { + marvell,pins = "mpp44"; + marvell,function = "gpio"; + }; + }; + + /* CON3 */ + ethernet@30000 { + status = "okay"; + phy = <&phy2>; + phy-mode = "sgmii"; + buffer-manager = <&bm>; + bm,pool-long = <1>; + bm,pool-short = <3>; + }; + + /* CON2 */ + ethernet@34000 { + status = "okay"; + phy = <&phy1>; + phy-mode = "sgmii"; + buffer-manager = <&bm>; + bm,pool-long = <2>; + bm,pool-short = <3>; + }; + + usb@58000 { + status = "okay"; + }; + + /* CON4 */ + ethernet@70000 { + pinctrl-names = "default"; + + /* + * The Reference Clock 0 is used to + * provide a clock to the PHY + */ + pinctrl-0 = <&ge0_rgmii_pins>, <&ref_clk0_pins>; + status = "okay"; + phy = <&phy0>; + phy-mode = "rgmii-id"; + buffer-manager = <&bm>; + bm,pool-long = <0>; + bm,pool-short = <3>; + }; + + crypto@90000 { + status = "okay"; + }; + + crypto@92000 { + status = "okay"; + }; + + bm@c8000 { + status = "okay"; + }; + + nfc: flash@d0000 { + status = "okay"; + num-cs = <1>; + nand-ecc-strength = <4>; + nand-ecc-step-size = <512>; + marvell,nand-keep-config; + marvell,nand-enable-arbiter; + nand-on-flash-bbt; + + partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; + + partition@0 { + label = "U-Boot"; + reg = <0x00000000 0x00800000>; + read-only; + }; + + partition@800000 { + label = "uImage"; + reg = <0x00800000 0x00400000>; + read-only; + }; + + partition@c00000 { + label = "Root"; + reg = <0x00c00000 0x3f400000>; + }; + }; + }; + + usb3@f0000 { + status = "okay"; + usb-phy = <&usb3_phy>; + }; + }; + + bm-bppi { + status = "okay"; + }; + + pcie-controller { + status = "okay"; + + /* + * The three PCIe units are accessible through + * standard mini-PCIe slots on the board. + */ + pcie@1,0 { + /* Port 0, Lane 0 */ + status = "okay"; + }; + + pcie@2,0 { + /* Port 1, Lane 0 */ + status = "okay"; + }; + + pcie@3,0 { + /* Port 2, Lane 0 */ + status = "okay"; + }; + }; + }; + + usb3_phy: usb3_phy { + compatible = "usb-nop-xceiv"; + vcc-supply = <®_xhci0_vbus>; + }; + + reg_xhci0_vbus: xhci0-vbus { + compatible = "regulator-fixed"; + pinctrl-names = "default"; + pinctrl-0 = <&xhci0_vbus_pins>; + regulator-name = "xhci0-vbus"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + enable-active-high; + gpio = <&gpio1 12 GPIO_ACTIVE_HIGH>; + }; +}; + +&spi1 { + pinctrl-names = "default"; + pinctrl-0 = <&spi1_pins>; + status = "okay"; + + spi-flash@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "st,m25p128", "jedec,spi-nor"; + reg = <0>; /* Chip select 0 */ + spi-max-frequency = <54000000>; + }; +}; diff --git a/sys/boot/fdt/dts/arm/armada-385.dtsi b/sys/boot/fdt/dts/arm/armada-385.dtsi index 8dc6202..b29cd06 100644 --- a/sys/boot/fdt/dts/arm/armada-385.dtsi +++ b/sys/boot/fdt/dts/arm/armada-385.dtsi @@ -77,5 +77,124 @@ compatible = "marvell,mv88f6820-pinctrl"; }; }; + + pcie-controller { + compatible = "marvell,armada-370-pcie"; + status = "disabled"; + device_type = "pci"; + + #address-cells = <3>; + #size-cells = <2>; + + msi-parent = <&mpic>; + bus-range = <0x00 0xff>; + + ranges = + <0x82000000 0 0x80000 MBUS_ID(0xf0, 0x01) 0x80000 0 0x00002000 + 0x82000000 0 0x40000 MBUS_ID(0xf0, 0x01) 0x40000 0 0x00002000 + 0x82000000 0 0x44000 MBUS_ID(0xf0, 0x01) 0x44000 0 0x00002000 + 0x82000000 0 0x48000 MBUS_ID(0xf0, 0x01) 0x48000 0 0x00002000 + 0x82000000 0x0 0xf1200000 MBUS_ID(0x08, 0xe8) 0xf1200000 0 0x00100000 /* Port 0 MEM */ + 0x81000000 0x0 0xf1300000 MBUS_ID(0x08, 0xe0) 0xf1300000 0 0x00100000 /* Port 0 IO */ + 0x82000000 0x0 0xf1400000 MBUS_ID(0x04, 0xe8) 0xf1400000 0 0x00100000 /* Port 1 MEM */ + 0x81000000 0x0 0xf1500000 MBUS_ID(0x04, 0xe0) 0xf1500000 0 0x00100000 /* Port 1 IO */ + 0x82000000 0x0 0xf1600000 MBUS_ID(0x04, 0xd8) 0xf1600000 0 0x00100000 /* Port 2 MEM */ + 0x81000000 0x0 0xf1700000 MBUS_ID(0x04, 0xd0) 0xf1700000 0 0x00100000 /* Port 2 IO */ + 0x82000000 0x0 0xf1800000 MBUS_ID(0x04, 0xb8) 0xf1800000 0 0x00100000 /* Port 3 MEM */ + 0x81000000 0x0 0xf1900000 MBUS_ID(0x04, 0xb0) 0xf1900000 0 0x00100000 /* Port 3 IO */ + >; + + /* + * This port can be either x4 or x1. When + * configured in x4 by the bootloader, then + * pcie@4,0 is not available. + */ + pcie@1,0 { + compatible = "mrvl,pcie"; + device_type = "pci"; + assigned-addresses = <0x82000800 0 0x80000 0 0x2000>; + reg = <0x0 0x0 0x80000 0x0 0x2000>; + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <3>; + bus-range = <0 255>; + ranges = <0x82000000 0x0 0x0 0x82000000 0x0 0xf1200000 0x0 0x00100000 + 0x81000000 0x0 0x0 0x81000000 0x0 0xf1300000 0x0 0x00100000>; + interrupt-map-mask = <0 0 0 0>; + interrupt-map = <0 0 0 0 &gic GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&gic>; + marvell,pcie-port = <0>; + marvell,pcie-lane = <0>; + clocks = <&gateclk 8>; + status = "disabled"; + }; + + /* x1 port */ + pcie@2,0 { + compatible = "mrvl,pcie"; + device_type = "pci"; + assigned-addresses = <0x82000800 0 0x40000 0 0x2000>; + reg = <0x0 0x0 0x40000 0x0 0x2000>; + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <3>; + bus-range = <0 255>; + ranges = <0x82000000 0x0 0x0 0x82000000 0x0 0xf1400000 0x0 0x00100000 + 0x81000000 0x0 0x0 0x81000000 0x0 0xf1500000 0x0 0x00100000>; + interrupt-map-mask = <0 0 0 0>; + interrupt-map = <0 0 0 0 &gic GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&gic>; + marvell,pcie-port = <1>; + marvell,pcie-lane = <0>; + clocks = <&gateclk 5>; + status = "disabled"; + }; + + /* x1 port */ + pcie@3,0 { + compatible = "mrvl,pcie"; + device_type = "pci"; + assigned-addresses = <0x82000800 0 0x44000 0 0x2000>; + reg = <0x0 0x0 0x44000 0x0 0x2000>; + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <3>; + bus-range = <0 255>; + ranges = <0x82000000 0x0 0x0 0x82000000 0x0 0xf1600000 0x0 0x00100000 + 0x81000000 0x0 0x0 0x81000000 0x0 0xf1700000 0x0 0x00100000>; + interrupt-map-mask = <0 0 0 0>; + interrupt-map = <0 0 0 0 &gic GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&gic>; + marvell,pcie-port = <2>; + marvell,pcie-lane = <0>; + clocks = <&gateclk 6>; + status = "disabled"; + }; + + /* + * x1 port only available when pcie@1,0 is + * configured as a x1 port + */ + pcie@4,0 { + compatible = "mrvl,pcie"; + device_type = "pci"; + assigned-addresses = <0x82000800 0 0x48000 0 0x2000>; + reg = <0x0 0x0 0x48000 0x0 0x2000>; + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <3>; + bus-range = <0 255>; + ranges = <0x82000000 0x0 0x0 0x82000000 0x0 0xf1800000 0x0 0x00100000 + 0x81000000 0x0 0x0 0x81000000 0x0 0xf1900000 0x0 0x00100000>; + interrupt-map-mask = <0 0 0 0>; + interrupt-map = <0 0 0 0 &gic GIC_SPI 71 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&gic>; + marvell,pcie-port = <3>; + marvell,pcie-lane = <0>; + clocks = <&gateclk 7>; + status = "disabled"; + }; + }; }; + }; diff --git a/sys/boot/fdt/dts/arm/armada-388-clearfog.dts b/sys/boot/fdt/dts/arm/armada-388-clearfog.dts new file mode 100644 index 0000000..78c8007 --- /dev/null +++ b/sys/boot/fdt/dts/arm/armada-388-clearfog.dts @@ -0,0 +1,459 @@ +/* + * Device Tree file for SolidRun Clearfog revision A1 rev 2.0 (88F6828) + * + * Copyright (C) 2015 Russell King + * + * This board is in development; the contents of this file work with + * the A1 rev 2.0 of the board, which does not represent final + * production board. Things will change, don't expect this file to + * remain compatible info the future. + * + * This file is dual-licensed: you can use it either under the terms + * of the GPL or the X11 license, at your option. Note that this dual + * licensing only applies to this file, and not this project as a + * whole. + * + * a) This file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This file is distributed in the hope that it will be useful + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Or, alternatively + * + * b) Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * $FreeBSD$ + */ + +/dts-v1/; +#include "armada-388.dtsi" +#include "armada-38x-solidrun-microsom.dtsi" + +/ { + model = "SolidRun Clearfog A1"; + compatible = "solidrun,clearfog-a1", "marvell,armada388", + "marvell,armada385", "marvell,armada380"; + + aliases { + /* So that mvebu u-boot can update the MAC addresses */ + ethernet1 = ð0; + ethernet2 = ð1; + ethernet3 = ð2; + }; + + chosen { + stdout-path = "serial0:115200n8"; + }; + + reg_3p3v: regulator-3p3v { + compatible = "regulator-fixed"; + regulator-name = "3P3V"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + + soc { + internal-regs { + ethernet@30000 { + phy-mode = "sgmii"; + buffer-manager = <&bm>; + bm,pool-long = <2>; + bm,pool-short = <1>; + status = "okay"; + + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + + ethernet@34000 { + phy-mode = "sgmii"; + buffer-manager = <&bm>; + bm,pool-long = <3>; + bm,pool-short = <1>; + status = "okay"; + managed = "in-band-status"; + }; + + i2c@11000 { + /* Is there anything on this? */ + clock-frequency = <100000>; + pinctrl-0 = <&i2c0_pins>; + pinctrl-names = "default"; + status = "okay"; + + /* + * PCA9655 GPIO expander, up to 1MHz clock. + * 0-CON3 CLKREQ# + * 1-CON3 PERST# + * 2-CON2 PERST# + * 3-CON3 W_DISABLE + * 4-CON2 CLKREQ# + * 5-USB3 overcurrent + * 6-USB3 power + * 7-CON2 W_DISABLE + * 8-JP4 P1 + * 9-JP4 P4 + * 10-JP4 P5 + * 11-m.2 DEVSLP + * 12-SFP_LOS + * 13-SFP_TX_FAULT + * 14-SFP_TX_DISABLE + * 15-SFP_MOD_DEF0 + */ + expander0: gpio-expander@20 { + /* + * This is how it should be: + * compatible = "onnn,pca9655", + * "nxp,pca9555"; + * but you can't do this because of + * the way I2C works. + */ + compatible = "nxp,pca9555"; + gpio-controller; + #gpio-cells = <2>; + reg = <0x20>; + + pcie1_0_clkreq { + gpio-hog; + gpios = <0 GPIO_ACTIVE_LOW>; + input; + line-name = "pcie1.0-clkreq"; + }; + pcie1_0_w_disable { + gpio-hog; + gpios = <3 GPIO_ACTIVE_LOW>; + output-low; + line-name = "pcie1.0-w-disable"; + }; + pcie2_0_clkreq { + gpio-hog; + gpios = <4 GPIO_ACTIVE_LOW>; + input; + line-name = "pcie2.0-clkreq"; + }; + pcie2_0_w_disable { + gpio-hog; + gpios = <7 GPIO_ACTIVE_LOW>; + output-low; + line-name = "pcie2.0-w-disable"; + }; + usb3_ilimit { + gpio-hog; + gpios = <5 GPIO_ACTIVE_LOW>; + input; + line-name = "usb3-current-limit"; + }; + usb3_power { + gpio-hog; + gpios = <6 GPIO_ACTIVE_HIGH>; + output-high; + line-name = "usb3-power"; + }; + m2_devslp { + gpio-hog; + gpios = <11 GPIO_ACTIVE_HIGH>; + output-low; + line-name = "m.2 devslp"; + }; + sfp_los { + /* SFP loss of signal */ + gpio-hog; + gpios = <12 GPIO_ACTIVE_HIGH>; + input; + line-name = "sfp-los"; + }; + sfp_tx_fault { + /* SFP laser fault */ + gpio-hog; + gpios = <13 GPIO_ACTIVE_HIGH>; + input; + line-name = "sfp-tx-fault"; + }; + sfp_tx_disable { + /* SFP transmit disable */ + gpio-hog; + gpios = <14 GPIO_ACTIVE_HIGH>; + output-low; + line-name = "sfp-tx-disable"; + }; + sfp_mod_def0 { + /* SFP module present */ + gpio-hog; + gpios = <15 GPIO_ACTIVE_LOW>; + input; + line-name = "sfp-mod-def0"; + }; + }; + + /* The MCP3021 is 100kHz clock only */ + mikrobus_adc: mcp3021@4c { + compatible = "microchip,mcp3021"; + reg = <0x4c>; + }; + + /* Also something at 0x64 */ + }; + + i2c@11100 { + /* + * Routed to SFP, mikrobus, and PCIe. + * SFP limits this to 100kHz, and requires + * an AT24C01A/02/04 with address pins tied + * low, which takes addresses 0x50 and 0x51. + * Mikrobus doesn't specify beyond an I2C + * bus being present. + * PCIe uses ARP to assign addresses, or + * 0x63-0x64. + */ + clock-frequency = <100000>; + pinctrl-0 = <&clearfog_i2c1_pins>; + pinctrl-names = "default"; + status = "okay"; + }; + + pinctrl@18000 { + clearfog_dsa0_clk_pins: clearfog-dsa0-clk-pins { + marvell,pins = "mpp46"; + marvell,function = "ref"; + }; + clearfog_dsa0_pins: clearfog-dsa0-pins { + marvell,pins = "mpp23", "mpp41"; + marvell,function = "gpio"; + }; + clearfog_i2c1_pins: i2c1-pins { + /* SFP, PCIe, mSATA, mikrobus */ + marvell,pins = "mpp26", "mpp27"; + marvell,function = "i2c1"; + }; + clearfog_sdhci_cd_pins: clearfog-sdhci-cd-pins { + marvell,pins = "mpp20"; + marvell,function = "gpio"; + }; + clearfog_sdhci_pins: clearfog-sdhci-pins { + marvell,pins = "mpp21", "mpp28", + "mpp37", "mpp38", + "mpp39", "mpp40"; + marvell,function = "sd0"; + }; + clearfog_spi1_cs_pins: spi1-cs-pins { + marvell,pins = "mpp55"; + marvell,function = "spi1"; + }; + mikro_pins: mikro-pins { + /* int: mpp22 rst: mpp29 */ + marvell,pins = "mpp22", "mpp29"; + marvell,function = "gpio"; + }; + mikro_spi_pins: mikro-spi-pins { + marvell,pins = "mpp43"; + marvell,function = "spi1"; + }; + mikro_uart_pins: mikro-uart-pins { + marvell,pins = "mpp24", "mpp25"; + marvell,function = "ua1"; + }; + rear_button_pins: rear-button-pins { + marvell,pins = "mpp34"; + marvell,function = "gpio"; + }; + }; + + sata@a8000 { + /* pinctrl? */ + status = "okay"; + }; + + sata@e0000 { + /* pinctrl? */ + status = "okay"; + }; + + sdhci@d8000 { + bus-width = <4>; + cd-gpios = <&gpio0 20 GPIO_ACTIVE_LOW>; + no-1-8-v; + pinctrl-0 = <&clearfog_sdhci_pins + &clearfog_sdhci_cd_pins>; + pinctrl-names = "default"; + status = "okay"; + vmmc = <®_3p3v>; + wp-inverted; + }; + + serial@12100 { + /* mikrobus uart */ + pinctrl-0 = <&mikro_uart_pins>; + pinctrl-names = "default"; + status = "okay"; + }; + + usb@58000 { + /* CON3, nearest power. */ + status = "okay"; + }; + + crypto@90000 { + status = "okay"; + }; + + crypto@92000 { + status = "okay"; + }; + + usb3@f0000 { + /* CON2, nearest CPU, USB2 only. */ + status = "okay"; + }; + + usb3@f8000 { + /* CON7 */ + status = "okay"; + }; + }; + + pcie-controller { + status = "okay"; + /* + * The two PCIe units are accessible through + * the mini-PCIe connectors on the board. + */ + pcie@2,0 { + /* Port 1, Lane 0. CON3, nearest power. */ + reset-gpios = <&expander0 1 GPIO_ACTIVE_LOW>; + status = "okay"; + }; + pcie@3,0 { + /* Port 2, Lane 0. CON2, nearest CPU. */ + reset-gpios = <&expander0 2 GPIO_ACTIVE_LOW>; + status = "okay"; + }; + }; + }; + + dsa@0 { + compatible = "marvell,dsa"; + dsa,ethernet = <ð1>; + dsa,mii-bus = <&mdio>; + pinctrl-0 = <&clearfog_dsa0_clk_pins &clearfog_dsa0_pins>; + pinctrl-names = "default"; + #address-cells = <2>; + #size-cells = <0>; + + switch@0 { + #address-cells = <1>; + #size-cells = <0>; + reg = <4 0>; + + port@0 { + reg = <0>; + label = "lan5"; + vlangroup = <0>; + }; + + port@1 { + reg = <1>; + label = "lan4"; + vlangroup = <0>; + }; + + port@2 { + reg = <2>; + label = "lan3"; + vlangroup = <0>; + }; + + port@3 { + reg = <3>; + label = "lan2"; + vlangroup = <0>; + }; + + port@4 { + reg = <4>; + label = "lan1"; + vlangroup = <0>; + }; + + port@5 { + reg = <5>; + label = "cpu"; + vlangroup = <0>; + }; + + port@6 { + /* 88E1512 external phy */ + reg = <6>; + label = "lan6"; + vlangroup = <0>; + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + }; + }; + + gpio-keys { + compatible = "gpio-keys"; + pinctrl-0 = <&rear_button_pins>; + pinctrl-names = "default"; + + button_0 { + /* The rear SW3 button */ + label = "Rear Button"; + gpios = <&gpio1 2 GPIO_ACTIVE_LOW>; + linux,can-disable; + linux,code = <BTN_0>; + }; + }; +}; + +&spi1 { + /* + * We don't seem to have the W25Q32 on the + * A1 Rev 2.0 boards, so disable SPI. + * CS0: W25Q32 (doesn't appear to be present) + * CS1: + * CS2: mikrobus + */ + pinctrl-0 = <&spi1_pins + &clearfog_spi1_cs_pins + &mikro_spi_pins>; + pinctrl-names = "default"; + status = "okay"; + + spi-flash@0 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "w25q32", "jedec,spi-nor"; + reg = <0>; /* Chip select 0 */ + spi-max-frequency = <3000000>; + status = "disabled"; + }; +}; diff --git a/sys/boot/fdt/dts/arm/armada-388-gp.dts b/sys/boot/fdt/dts/arm/armada-388-gp.dts index 44ff1cc..2079247 100644 --- a/sys/boot/fdt/dts/arm/armada-388-gp.dts +++ b/sys/boot/fdt/dts/arm/armada-388-gp.dts @@ -59,7 +59,11 @@ }; soc { - ranges = <MBUS_ID(0xf0, 0x01) 0 0xf1000000 0x100000>; + ranges = <MBUS_ID(0xf0, 0x01) 0 0xf1000000 0x100000 + MBUS_ID(0x01, 0x1d) 0 0xfff00000 0x100000 + MBUS_ID(0x09, 0x19) 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x15) 0 0xf1110000 0x10000 + MBUS_ID(0x0c, 0x04) 0 0xf1200000 0x100000>; internal-regs { crypto@90000 { @@ -239,12 +243,33 @@ gpio-fan,speed-map = < 0 0 3000 1>; }; - }; + pcie-controller { + status = "okay"; + /* + * One PCIe units is accessible through + * standard PCIe slot on the board. + */ + pcie@1,0 { + /* Port 0, Lane 0 */ + status = "okay"; + }; - pci0: pcie@f1080000 { - status = "okay"; + /* + * The two other PCIe units are accessible + * through mini PCIe slot on the board. + */ + pcie@2,0 { + /* Port 1, Lane 0 */ + status = "okay"; + }; + pcie@3,0 { + /* Port 2, Lane 0 */ + status = "okay"; + }; + }; }; + reg_usb3_vbus: usb3-vbus { compatible = "regulator-fixed"; regulator-name = "usb3-vbus"; diff --git a/sys/boot/fdt/dts/arm/armada-38x-solidrun-microsom.dtsi b/sys/boot/fdt/dts/arm/armada-38x-solidrun-microsom.dtsi new file mode 100644 index 0000000..2595eae --- /dev/null +++ b/sys/boot/fdt/dts/arm/armada-38x-solidrun-microsom.dtsi @@ -0,0 +1,130 @@ +/* + * Device Tree file for SolidRun Armada 38x Microsom + * + * Copyright (C) 2015 Russell King + * + * This board is in development; the contents of this file work with + * the A1 rev 2.0 of the board, which does not represent final + * production board. Things will change, don't expect this file to + * remain compatible info the future. + * + * This file is dual-licensed: you can use it either under the terms + * of the GPL or the X11 license, at your option. Note that this dual + * licensing only applies to this file, and not this project as a + * whole. + * + * a) This file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This file is distributed in the hope that it will be useful + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Or, alternatively + * + * b) Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * $FreeBSD$ + */ +#include <dt-bindings/input/input.h> +#include <dt-bindings/gpio/gpio.h> + +/ { + memory { + device_type = "memory"; + reg = <0x00000000 0x10000000>; /* 256 MB */ + }; + + soc { + ranges = <MBUS_ID(0xf0, 0x01) 0 0xf1000000 0x100000 + MBUS_ID(0x01, 0x1d) 0 0xfff00000 0x100000 + MBUS_ID(0x09, 0x19) 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x15) 0 0xf1110000 0x10000 + MBUS_ID(0x0c, 0x04) 0 0xf1200000 0x100000>; + + internal-regs { + ethernet@70000 { + pinctrl-0 = <&ge0_rgmii_pins>; + pinctrl-names = "default"; + phy = <&phy_dedicated>; + phy-mode = "rgmii-id"; + buffer-manager = <&bm>; + bm,pool-long = <0>; + bm,pool-short = <1>; + status = "okay"; + }; + + mdio@72004 { + /* + * Add the phy clock here, so the phy can be + * accessed to read its IDs prior to binding + * with the driver. + */ + pinctrl-0 = <&mdio_pins µsom_phy_clk_pins>; + pinctrl-names = "default"; + + phy_dedicated: ethernet-phy@0 { + /* + * Annoyingly, the marvell phy driver + * configures the LED register, rather + * than preserving reset-loaded setting. + * We undo that rubbish here. + */ + marvell,reg-init = <3 16 0 0x101e>; + reg = <0>; + }; + }; + + pinctrl@18000 { + microsom_phy_clk_pins: microsom-phy-clk-pins { + marvell,pins = "mpp45"; + marvell,function = "ref"; + }; + }; + + rtc@a3800 { + /* + * If the rtc doesn't work, run "date reset" + * twice in u-boot. + */ + status = "okay"; + }; + + serial@12000 { + pinctrl-0 = <&uart0_pins>; + pinctrl-names = "default"; + status = "okay"; + }; + + bm@c8000 { + status = "okay"; + }; + }; + + bm-bppi { + status = "okay"; + }; + + }; +}; diff --git a/sys/boot/fdt/dts/arm/armada-38x.dtsi b/sys/boot/fdt/dts/arm/armada-38x.dtsi index 736d41a..8f07899 100644 --- a/sys/boot/fdt/dts/arm/armada-38x.dtsi +++ b/sys/boot/fdt/dts/arm/armada-38x.dtsi @@ -154,7 +154,8 @@ crypto@90000 { compatible = "mrvl,cesa"; - reg = <0x90000 0x10000>; + reg = <0x90000 0x1000 /* tdma base reg chan 0 */ + 0x9D000 0x1000>; /* cesa base reg chan 0 */ interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&gic>; sram-handle = <&SRAM0>; @@ -176,6 +177,10 @@ reg = <0x8000 0x1000>; cache-unified; cache-level = <2>; + arm,double-linefill-incr = <1>; + arm,double-linefill-wrap = <0>; + arm,double-linefill = <1>; + prefetch-data = <1>; }; scu@c000 { @@ -187,7 +192,6 @@ compatible = "arm,cortex-a9-global-timer"; reg = <0xc200 0x20>; interrupts = <GIC_PPI 11 (IRQ_TYPE_EDGE_RISING | GIC_CPU_MASK_SIMPLE(2))>; - clock-frequency = <800000000>; clocks = <&coreclk 2>; }; @@ -195,7 +199,6 @@ compatible = "arm,cortex-a9-twd-timer"; reg = <0xc600 0x20>; interrupts = <GIC_PPI 13 (IRQ_TYPE_EDGE_RISING | GIC_CPU_MASK_SIMPLE(2))>; - clock-frequency = <800000000>; clocks = <&coreclk 2>; }; @@ -418,7 +421,7 @@ mpic: interrupt-controller@20a00 { compatible = "marvell,mpic"; - reg = <0x20a00 0x2d0>, <0x21070 0x58>; + reg = <0x20a00 0x2d0>, <0x21870 0x300>; #interrupt-cells = <1>; #size-cells = <1>; interrupt-controller; @@ -562,6 +565,14 @@ status = "disabled"; }; + bm: bm@c8000 { + compatible = "marvell,armada-380-neta-bm"; + reg = <0xc8000 0xac>; + clocks = <&gateclk 13>; + internal-mem = <&bm_bppi>; + status = "disabled"; + }; + sata@e0000 { compatible = "marvell,armada-380-ahci"; reg = <0xe0000 0x2000>; @@ -622,25 +633,17 @@ status = "disabled"; }; }; - }; - pci0: pcie@f1080000 { - compatible = "mrvl,pcie"; - status = "disabled"; - device_type = "pci"; - #interrupt-cells = <3>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0xf1080000 0x2000>; - bus-range = <0 255>; - ranges = <0x42000000 0x0 0xf1200000 0xf1200000 0x0 0x00100000 - 0x41000000 0x0 0x00000000 0xf1300000 0x0 0x00100000>; - interrupt-parent = <&gic>; - interrupts = <GIC_SPI 91 0>; - interrupt-map-mask = <0xf800 0x0 0x0 0x7>; - interrupt-map = < - 0x0000 0x0 0x0 0x1 &gic GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH - >; + bm_bppi: bm-bppi { + compatible = "mmio-sram"; + reg = <MBUS_ID(0x0c, 0x04) 0 0x100000>; + ranges = <0 MBUS_ID(0x0c, 0x04) 0 0x100000>; + #address-cells = <1>; + #size-cells = <1>; + clocks = <&gateclk 13>; + no-memory-wc; + status = "disabled"; + }; }; clocks { diff --git a/sys/boot/fdt/dts/arm/ubmc.dts b/sys/boot/fdt/dts/arm/ubmc.dts new file mode 100644 index 0000000..e33498c --- /dev/null +++ b/sys/boot/fdt/dts/arm/ubmc.dts @@ -0,0 +1,159 @@ +/*- + * Copyright (c) 2016 Rubicon Communications (Netgate) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/dts-v1/; + +#include "am33xx.dtsi" +#include "ubmc.dtsi" + +/ { + model = "AM335x uBMC"; + compatible = "ti,am335x-ubmc", "ti,am33xx"; +}; + +&mmc1 { + status = "okay"; +}; + +&mmc2 { + vmmc-supply = <&vmmcsd_fixed>; + pinctrl-names = "default"; + pinctrl-0 = <&emmc_pins>; + bus-width = <8>; + ti,dual-volt; + non-removable; + status = "okay"; +}; + +&i2c0 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c0_pins>; + + status = "okay"; + + lm750 { + compatible = "national,lm75"; + i2c-address = <0x48>; + }; +}; + +&i2c1 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c1_pins>; + + status = "okay"; + + eeprom1 { + compatible = "atmel,24c256"; + i2c-address = <0x50>; + }; +}; + +&spi0 { + pinctrl-names = "default"; + pinctrl-0 = <&spi0_pins>; + status = "okay"; + + flash1: m25p64@1 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "st,m25p64", "st,m25p"; + spi-max-frequency = <20000000>; + reg = <0>; + }; +}; + +&spi1 { + pinctrl-names = "default"; + pinctrl-0 = <&spi1_pins>; + status = "okay"; + + flash2: m25p64@2 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "st,m25p64", "st,m25p"; + spi-max-frequency = <20000000>; + reg = <0>; + }; +}; + +&tscadc { + status = "okay"; + + adc { + #io-channel-cells = <0x1>; + compatible = "ti,am3359-adc"; + ti,adc-channels = <0x00 0x01 0x02 0x03 0x4 0x5 0x6 0x7>; + }; +}; + +&epwmss0 { + status = "okay"; +}; + +&ecap0 { + pinctrl-names = "default"; + pinctrl-0 = <&ecap0_pins>; + status = "okay"; +}; + +&ehrpwm0 { + status = "okay"; +}; + +&epwmss1 { + status = "okay"; +}; + +&ecap1 { + pinctrl-names = "default"; + pinctrl-0 = <&ecap1_pins>; + status = "okay"; +}; + +&ehrpwm1 { + pinctrl-names = "default"; + pinctrl-0 = <&ehrpwm1_pins>; + status = "okay"; +}; + +&epwmss2 { + status = "okay"; +}; + +&ecap2 { + pinctrl-names = "default"; + pinctrl-0 = <&ecap2_pins>; + status = "okay"; +}; + +&ehrpwm2 { + pinctrl-names = "default"; + pinctrl-0 = <&ehrpwm2_pins>; + status = "okay"; +}; diff --git a/sys/boot/fdt/dts/arm/ubmc.dtsi b/sys/boot/fdt/dts/arm/ubmc.dtsi new file mode 100644 index 0000000..3d7f9bc --- /dev/null +++ b/sys/boot/fdt/dts/arm/ubmc.dtsi @@ -0,0 +1,313 @@ +/* + * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/ { + memory { + device_type = "memory"; + reg = <0x80000000 0x10000000>; /* 256 MB */ + }; + + vmmcsd_fixed: fixedregulator@0 { + compatible = "regulator-fixed"; + regulator-name = "vmmcsd_fixed"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; +}; + +&am33xx_pinmux { + pinctrl-names = "default"; + pinctrl-0 = <&clkout2_pin>; + + i2c0_pins: pinmux_i2c0_pins { + pinctrl-single,pins = < + AM33XX_IOPAD(0x988, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c0_sda.i2c0_sda */ + AM33XX_IOPAD(0x98c, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c0_scl.i2c0_scl */ + >; + }; + + i2c1_pins: pinmux_i2c1_pins { + pinctrl-single,pins = < + AM33XX_IOPAD(0x968, PIN_INPUT_PULLUP | MUX_MODE3) /* uart0_ctsn.i2c1_sda */ + AM33XX_IOPAD(0x96c, PIN_INPUT_PULLUP | MUX_MODE3) /* uart0_rtsn.i2c1_scl */ + >; + }; + + spi0_pins: pinmux_spi0_pins { + pinctrl-single,pins = < + AM33XX_IOPAD(0x950, PIN_INPUT_PULLDOWN | MUX_MODE0) /* spi0_sclk.spi0_sclk */ + AM33XX_IOPAD(0x954, PIN_INPUT_PULLDOWN | MUX_MODE0) /* spi0_d0.spi0_miso */ + AM33XX_IOPAD(0x958, PIN_INPUT_PULLUP | MUX_MODE0) /* spi0_d1.spi0_mosi */ + AM33XX_IOPAD(0x95c, PIN_INPUT_PULLUP | MUX_MODE0) /* spi0_cs0.spi0_cs0 */ + >; + }; + + spi1_pins: pinmux_spi1_pins { + pinctrl-single,pins = < + AM33XX_IOPAD(0x908, PIN_INPUT_PULLDOWN | MUX_MODE2) /* mii1_col.spi1_sclk */ + AM33XX_IOPAD(0x90c, PIN_INPUT_PULLDOWN | MUX_MODE2) /* mii1_crs.spi1_miso */ + AM33XX_IOPAD(0x910, PIN_INPUT_PULLUP | MUX_MODE2) /* mii1_rx_er.spi1_mosi */ + AM33XX_IOPAD(0x944, PIN_INPUT_PULLUP | MUX_MODE2) /* rmii1_ref_clk.spi1_cs0 */ + >; + }; + + uart0_pins: pinmux_uart0_pins { + pinctrl-single,pins = < + AM33XX_IOPAD(0x970, PIN_INPUT_PULLUP | MUX_MODE0) /* uart0_rxd.uart0_rxd */ + AM33XX_IOPAD(0x974, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* uart0_txd.uart0_txd */ + >; + }; + + clkout2_pin: pinmux_clkout2_pin { + pinctrl-single,pins = < + AM33XX_IOPAD(0x9b4, PIN_OUTPUT_PULLDOWN | MUX_MODE3) /* xdma_event_intr1.clkout2 */ + >; + }; + + cpsw_default: cpsw_default { + pinctrl-single,pins = < + /* Slave 1 */ + AM33XX_IOPAD(0x914, PIN_OUTPUT_PULLDOWN | MUX_MODE2) /* mii1_txen.rgmii_1_txen */ + AM33XX_IOPAD(0x918, PIN_INPUT_PULLUP | MUX_MODE2) /* mii1_rxdv.rgmii_1_rxdv */ + AM33XX_IOPAD(0x91c, PIN_OUTPUT_PULLDOWN | MUX_MODE2) /* mii1_txd3.rgmii_1_txd3 */ + AM33XX_IOPAD(0x920, PIN_OUTPUT_PULLDOWN | MUX_MODE2) /* mii1_txd2.rgmii_1_txd2 */ + AM33XX_IOPAD(0x924, PIN_OUTPUT_PULLDOWN | MUX_MODE2) /* mii1_txd1.rgmii_1_txd1 */ + AM33XX_IOPAD(0x928, PIN_OUTPUT_PULLDOWN | MUX_MODE2) /* mii1_txd0.rgmii_1_txd0 */ + AM33XX_IOPAD(0x92c, PIN_OUTPUT_PULLDOWN | MUX_MODE2) /* mii1_txclk.rgmii_1_txclk */ + AM33XX_IOPAD(0x930, PIN_INPUT_PULLUP | MUX_MODE2) /* mii1_rxclk.rgmii_1_rxclk */ + AM33XX_IOPAD(0x934, PIN_INPUT_PULLUP | MUX_MODE2) /* mii1_rxd3.rgmii_1_rxd3 */ + AM33XX_IOPAD(0x938, PIN_INPUT_PULLUP | MUX_MODE2) /* mii1_rxd2.rgmii_1_rxd2 */ + AM33XX_IOPAD(0x93c, PIN_INPUT_PULLUP | MUX_MODE2) /* mii1_rxd1.rgmii_1_rxd1 */ + AM33XX_IOPAD(0x940, PIN_INPUT_PULLUP | MUX_MODE2) /* mii1_rxd0.rgmii_1_rxd0 */ + + /* Slave 2 */ + AM33XX_IOPAD(0x840, PIN_OUTPUT_PULLDOWN | MUX_MODE2) /* gmpc_a0.rgmii_2_txen */ + AM33XX_IOPAD(0x844, PIN_INPUT_PULLUP | MUX_MODE2) /* gmpc_a1.rgmii_2_rxdv */ + AM33XX_IOPAD(0x848, PIN_OUTPUT_PULLDOWN | MUX_MODE2) /* gmpc_a2.rgmii_2_txd3 */ + AM33XX_IOPAD(0x84c, PIN_OUTPUT_PULLDOWN | MUX_MODE2) /* gmpc_a3.rgmii_2_txd2 */ + AM33XX_IOPAD(0x850, PIN_OUTPUT_PULLDOWN | MUX_MODE2) /* gmpc_a4.rgmii_2_txd1 */ + AM33XX_IOPAD(0x854, PIN_OUTPUT_PULLDOWN | MUX_MODE2) /* gmpc_a5.rgmii_2_txd0 */ + AM33XX_IOPAD(0x858, PIN_OUTPUT_PULLDOWN | MUX_MODE2) /* gmpc_a6.rgmii_2_txclk */ + AM33XX_IOPAD(0x85c, PIN_INPUT_PULLUP | MUX_MODE2) /* gmpc_a7.rgmii_2_rxclk */ + AM33XX_IOPAD(0x860, PIN_INPUT_PULLUP | MUX_MODE2) /* gmpc_a8.rgmii_2_rxd3 */ + AM33XX_IOPAD(0x864, PIN_INPUT_PULLUP | MUX_MODE2) /* gmpc_a9.rgmii_2_rxd2 */ + AM33XX_IOPAD(0x868, PIN_INPUT_PULLUP | MUX_MODE2) /* gmpc_a10.rgmii_2_rxd1 */ + AM33XX_IOPAD(0x86c, PIN_INPUT_PULLUP | MUX_MODE2) /* gmpc_a11.rgmii_2_rxd0 */ + >; + }; + + cpsw_sleep: cpsw_sleep { + pinctrl-single,pins = < + /* Slave 1 reset value */ + AM33XX_IOPAD(0x914, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x918, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x91c, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x920, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x924, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x928, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x92c, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x930, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x934, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x938, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x93c, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x940, PIN_INPUT_PULLDOWN | MUX_MODE7) + + /* Slave 2 reset value */ + AM33XX_IOPAD(0x840, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x844, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x848, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x84c, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x850, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x854, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x858, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x85c, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x860, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x864, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x868, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x86c, PIN_INPUT_PULLDOWN | MUX_MODE7) + >; + }; + + davinci_mdio_default: davinci_mdio_default { + pinctrl-single,pins = < + /* MDIO */ + AM33XX_IOPAD(0x948, PIN_INPUT_PULLUP | SLEWCTRL_FAST | MUX_MODE0) /* mdio_data.mdio_data */ + AM33XX_IOPAD(0x94c, PIN_OUTPUT_PULLUP | MUX_MODE0) /* mdio_clk.mdio_clk */ + >; + }; + + davinci_mdio_sleep: davinci_mdio_sleep { + pinctrl-single,pins = < + /* MDIO reset value */ + AM33XX_IOPAD(0x948, PIN_INPUT_PULLDOWN | MUX_MODE7) + AM33XX_IOPAD(0x94c, PIN_INPUT_PULLDOWN | MUX_MODE7) + >; + }; + + mmc1_pins: pinmux_mmc1_pins { + pinctrl-single,pins = < + AM33XX_IOPAD(0x8f0, PIN_INPUT_PULLUP | MUX_MODE0) /* mmc0_dat3.mmc0_dat3 */ + AM33XX_IOPAD(0x8f4, PIN_INPUT_PULLUP | MUX_MODE0) /* mmc0_dat2.mmc0_dat2 */ + AM33XX_IOPAD(0x8f8, PIN_INPUT_PULLUP | MUX_MODE0) /* mmc0_dat1.mmc0_dat1 */ + AM33XX_IOPAD(0x8fc, PIN_INPUT_PULLUP | MUX_MODE0) /* mmc0_dat0.mmc0_dat0 */ + AM33XX_IOPAD(0x900, PIN_INPUT_PULLUP | MUX_MODE0) /* mmc0_clk.mmc0_clk */ + AM33XX_IOPAD(0x904, PIN_INPUT_PULLUP | MUX_MODE0) /* mmc0_cmd.mmc0_cmd */ + AM33XX_IOPAD(0x960, PIN_INPUT_PULLUP | MUX_MODE5) /* spi0_cs1.mmc0_cd */ + >; + }; + + emmc_pins: pinmux_emmc_pins { + pinctrl-single,pins = < + AM33XX_IOPAD(0x994, PIN_INPUT_PULLUP | MUX_MODE4) /* mcasp0_fsx.mmc1_cd */ + AM33XX_IOPAD(0x880, PIN_INPUT_PULLUP | MUX_MODE2) /* gpmc_csn1.mmc1_clk */ + AM33XX_IOPAD(0x884, PIN_INPUT_PULLUP | MUX_MODE2) /* gpmc_csn2.mmc1_cmd */ + AM33XX_IOPAD(0x800, PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad0.mmc1_dat0 */ + AM33XX_IOPAD(0x804, PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad1.mmc1_dat1 */ + AM33XX_IOPAD(0x808, PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad2.mmc1_dat2 */ + AM33XX_IOPAD(0x80c, PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad3.mmc1_dat3 */ + AM33XX_IOPAD(0x810, PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad4.mmc1_dat4 */ + AM33XX_IOPAD(0x814, PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad5.mmc1_dat5 */ + AM33XX_IOPAD(0x818, PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad6.mmc1_dat6 */ + AM33XX_IOPAD(0x81c, PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad7.mmc1_dat7 */ + >; + }; + + ecap0_pins: pinmux_ecap0_pins { + pinctrl-single,pins = < + AM33XX_IOPAD(0x964, PIN_INPUT | MUX_MODE0) /* ecap0_in_pwm0_out.ecap0_in_pwm0_out */ + >; + }; + + ecap1_pins: pinmux_ecap1_pins { + pinctrl-single,pins = < + AM33XX_IOPAD(0x960, PIN_INPUT | MUX_MODE2) /* spi0_cs1.ecap1_in_pwm1_out */ + >; + }; + + ehrpwm1_pins: pinmux_ehrpwm1_pins { + pinctrl-single,pins = < + AM33XX_IOPAD(0x8c8, PIN_OUTPUT | MUX_MODE2) /* lcd_data10.ehrpwm1a */ + >; + }; + + ecap2_pins: pinmux_ecap2_pins { + pinctrl-single,pins = < + AM33XX_IOPAD(0x99c, PIN_INPUT | MUX_MODE4) /* mcasp0_ahclkr.ecap2_in_pwm2_out */ + >; + }; + + ehrpwm2_pins: pinmux_ehrpwm2_pins { + pinctrl-single,pins = < + AM33XX_IOPAD(0x8a0, PIN_OUTPUT | MUX_MODE3) /* lcd_data0.ehrpwm2a */ + >; + }; +}; + +&uart0 { + pinctrl-names = "default"; + pinctrl-0 = <&uart0_pins>; + + status = "okay"; +}; + +&usb { + status = "okay"; +}; + +&usb_ctrl_mod { + status = "okay"; +}; + +&usb0_phy { + status = "okay"; +}; + +&usb1_phy { + status = "okay"; +}; + +&usb0 { + status = "okay"; + dr_mode = "host"; +}; + +&usb1 { + status = "okay"; + dr_mode = "host"; +}; + +&cppi41dma { + status = "okay"; +}; + +&i2c0 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c0_pins>; + + status = "okay"; + clock-frequency = <400000>; + + baseboard_eeprom: baseboard_eeprom@50 { + compatible = "atmel,24c02"; + reg = <0x50>; + + #address-cells = <1>; + #size-cells = <1>; + baseboard_data: baseboard_data@0 { + reg = <0 0x100>; + }; + }; +}; + +&cpsw_emac0 { + phy_id = <&davinci_mdio>, <1>; + phy-mode = "rgmii"; + dual_emac_res_vlan = <1>; +}; + +&cpsw_emac1 { + phy_id = <&davinci_mdio>, <2>; + phy-mode = "rgmii"; + dual_emac_res_vlan = <2>; +}; + +&mac { + pinctrl-names = "default", "sleep"; + pinctrl-0 = <&cpsw_default>; + pinctrl-1 = <&cpsw_sleep>; + active_slave = <1>; + status = "okay"; + dual_emac; + txen-skew-ps = <0>; + rxdv-skew-ps = <1400>; + rxd0-skew-ps = <1400>; + rxd1-skew-ps = <1400>; + rxd2-skew-ps = <1400>; + rxd3-skew-ps = <1400>; + txd0-skew-ps = <0>; + txd1-skew-ps = <0>; + txd2-skew-ps = <0>; + txd3-skew-ps = <0>; + rxc-skew-ps = <4400>; + txc-skew-ps = <6200>; +}; + +&davinci_mdio { + pinctrl-names = "default", "sleep"; + pinctrl-0 = <&davinci_mdio_default>; + pinctrl-1 = <&davinci_mdio_sleep>; + status = "okay"; +}; + +&aes { + status = "okay"; +}; + +&sham { + status = "okay"; +}; diff --git a/sys/boot/forth/Makefile.inc b/sys/boot/forth/Makefile.inc index 97ab433..7c82ac4 100644 --- a/sys/boot/forth/Makefile.inc +++ b/sys/boot/forth/Makefile.inc @@ -23,3 +23,7 @@ FILES+= shortcuts.4th FILES+= support.4th FILES+= version.4th FILESDIR_loader.conf= /boot/defaults + +# pfSense +FILES+= logo-pfSensebw.4th +FILES+= brand-pfSense.4th diff --git a/sys/boot/forth/brand-pfSense.4th b/sys/boot/forth/brand-pfSense.4th new file mode 100644 index 0000000..7c8696e --- /dev/null +++ b/sys/boot/forth/brand-pfSense.4th @@ -0,0 +1,46 @@ +\ Copyright (c) 2004-2015 Electric Sheep Fencing LLC +\ All rights reserved. +\ +\ Redistribution and use in source and binary forms, with or without +\ modification, are permitted provided that the following conditions +\ are met: +\ 1. Redistributions of source code must retain the above copyright +\ notice, this list of conditions and the following disclaimer. +\ 2. Redistributions in binary form must reproduce the above copyright +\ notice, this list of conditions and the following disclaimer in the +\ documentation and/or other materials provided with the distribution. +\ +\ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +\ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +\ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +\ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +\ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +\ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +\ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +\ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +\ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +\ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +\ SUCH DAMAGE. +\ +\ $FreeBSD$ + +2 brandX ! 1 brandY ! \ Initialize brand placement defaults + +: brand+ ( x y c-addr/u -- x y' ) + 2swap 2dup at-xy 2swap \ position the cursor + type \ print to the screen + 1+ \ increase y for next time we're called +; + +: brand ( x y -- ) \ "pfSense" [wide] logo in B/W (7 rows x 42 columns) + + s" __ " brand+ + s" _ __ / _|___ ___ _ __ ___ ___ " brand+ + s" | '_ \| |_/ __|/ _ \ '_ \/ __|/ _ \ " brand+ + s" | |_) | _\__ \ __/ | | \__ \ __/ " brand+ + s" | .__/|_| |___/\___|_| |_|___/\___| " brand+ + s" |_| " brand+ + s" " brand+ + + 2drop +; diff --git a/sys/boot/forth/loader.conf b/sys/boot/forth/loader.conf index 3af56d3..46b49ec 100644 --- a/sys/boot/forth/loader.conf +++ b/sys/boot/forth/loader.conf @@ -573,3 +573,10 @@ mac_seeotheruids_load="NO" # UID visbility MAC policy #module_before="cmd" # executes "cmd" before loading the module #module_after="cmd" # executes "cmd" after loading the module #module_error="cmd" # executes "cmd" if load fails + +# pfSense specific default values +loader_color="NO" +loader_logo="pfSensebw" +loader_brand="pfSense" +hw.usb.no_pf="1" +net.isr.maxthreads="-1" diff --git a/sys/boot/forth/logo-pfSensebw.4th b/sys/boot/forth/logo-pfSensebw.4th new file mode 100644 index 0000000..4804011 --- /dev/null +++ b/sys/boot/forth/logo-pfSensebw.4th @@ -0,0 +1,54 @@ +\ Copyright (c) 2004-2015 Electric Sheep Fencing LLC +\ All rights reserved. +\ +\ Redistribution and use in source and binary forms, with or without +\ modification, are permitted provided that the following conditions +\ are met: +\ 1. Redistributions of source code must retain the above copyright +\ notice, this list of conditions and the following disclaimer. +\ 2. Redistributions in binary form must reproduce the above copyright +\ notice, this list of conditions and the following disclaimer in the +\ documentation and/or other materials provided with the distribution. +\ +\ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +\ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +\ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +\ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +\ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +\ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +\ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +\ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +\ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +\ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +\ SUCH DAMAGE. +\ +\ $FreeBSD$ + +46 logoX ! 7 logoY ! \ Initialize logo placement defaults + +: logo+ ( x y c-addr/u -- x y' ) + 2swap 2dup at-xy 2swap \ position the cursor + type \ print to the screen + 1+ \ increase y for next time we're called +; + +: logo ( x y -- ) \ B/W pfSense logo (15 rows x 32 columns) + + s" " logo+ + s" " logo+ + s" __________________________ " logo+ + s" / ___\ " logo+ + s" | /` " logo+ + s" | / :-|" logo+ + s" | _________ ___/ /_ |" logo+ + s" | /` ____ / /__ ___/ |" logo+ + s" | / / / / / / |" logo+ + s" | / /___/ / / / |" logo+ + s" | / ______/ / / |" logo+ + s" |/ / / / |" logo+ + s" / /___/ |" logo+ + s" / |" logo+ + s" /_________________________/ " logo+ + + 2drop +; diff --git a/sys/boot/forth/menu-commands.4th b/sys/boot/forth/menu-commands.4th index 9adf30a..5c6350a 100644 --- a/sys/boot/forth/menu-commands.4th +++ b/sys/boot/forth/menu-commands.4th @@ -253,7 +253,7 @@ also menu-namespace also menu-command-helpers cr ." To get back to the menu, type `menu' and press ENTER" cr - ." or type `boot' and press ENTER to start FreeBSD." cr + ." or type `boot' and press ENTER to start pfSense." cr cr FALSE \ exit the menu diff --git a/sys/boot/forth/menu.4th b/sys/boot/forth/menu.4th index e3fe0f7..fb4eb41 100644 --- a/sys/boot/forth/menu.4th +++ b/sys/boot/forth/menu.4th @@ -470,7 +470,7 @@ also menu-infrastructure definitions \ Print the frame caption at (x,y) s" loader_menu_title" getenv dup -1 = if - drop s" Welcome to FreeBSD" + drop s" Welcome to pfSense" then TRUE ( use default alignment ) s" loader_menu_title_align" getenv dup -1 <> if diff --git a/sys/boot/i386/boot0/boot0.S b/sys/boot/i386/boot0/boot0.S index 708f093..2c6ed04 100644 --- a/sys/boot/i386/boot0/boot0.S +++ b/sys/boot/i386/boot0/boot0.S @@ -647,8 +647,8 @@ os_dos: #endif os_win: .ascii "Wi"; .byte 'n'|0x80 os_linux: .ascii "Linu"; .byte 'x'|0x80 -os_freebsd: .ascii "Free" -os_bsd: .ascii "BS"; .byte 'D'|0x80 +os_freebsd: .ascii "pfSe" +os_bsd: .ascii "ns"; .byte 'e'|0x80 #ifndef SAVE_MORE_MEMORY os_ext: .ascii "EX"; .byte 'T'|0x80 #endif diff --git a/sys/boot/uboot/common/main.c b/sys/boot/uboot/common/main.c index 3b97a6e..c4efb1f 100644 --- a/sys/boot/uboot/common/main.c +++ b/sys/boot/uboot/common/main.c @@ -416,7 +416,9 @@ main(int argc, char **argv) /* * Initialise the heap as early as possible. Once this is done, - * alloc() is usable. The stack is buried inside us, so this is safe. + * alloc() is usable. We are using the stack u-boot set up near the top + * of physical ram; hopefully there is sufficient space between the end + * of our bss and the bottom of the u-boot stack to avoid overlap. */ uboot_heap_start = round_page((uintptr_t)end); uboot_heap_end = uboot_heap_start + 512 * 1024; diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/sysmacros.h b/sys/cddl/contrib/opensolaris/uts/common/sys/sysmacros.h index aa84f36..ee6e552 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/sys/sysmacros.h +++ b/sys/cddl/contrib/opensolaris/uts/common/sys/sysmacros.h @@ -388,7 +388,7 @@ highbit(ulong_t i) #if defined(__FreeBSD__) && defined(_KERNEL) && defined(HAVE_INLINE_FLSL) return (flsl(i)); #else - register int h = 1; + int h = 1; if (i == 0) return (0); diff --git a/sys/compat/ia32/ia32_util.h b/sys/compat/ia32/ia32_util.h index 6b5f9a1..41aad4e 100644 --- a/sys/compat/ia32/ia32_util.h +++ b/sys/compat/ia32/ia32_util.h @@ -50,7 +50,7 @@ #define IA32_MAXVMEM 0 /* Unlimited */ struct syscall_args; -int ia32_fetch_syscall_args(struct thread *td, struct syscall_args *sa); +int ia32_fetch_syscall_args(struct thread *td); void ia32_set_syscall_retval(struct thread *, int); void ia32_fixlimit(struct rlimit *rl, int which); diff --git a/sys/compat/linux/linux_fork.c b/sys/compat/linux/linux_fork.c index 971006d..930be5d 100644 --- a/sys/compat/linux/linux_fork.c +++ b/sys/compat/linux/linux_fork.c @@ -306,6 +306,7 @@ linux_clone_thread(struct thread *td, struct linux_clone_args *args) __rangeof(struct thread, td_startzero, td_endzero)); bcopy(&td->td_startcopy, &newtd->td_startcopy, __rangeof(struct thread, td_startcopy, td_endcopy)); + newtd->td_sa = td->td_sa; newtd->td_proc = p; thread_cow_get(newtd, td); diff --git a/sys/conf/files b/sys/conf/files index e4da191..6b14143 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1581,6 +1581,7 @@ dev/etherswitch/ip17x/ip17x_phy.c optional ip17x dev/etherswitch/ip17x/ip17x_vlans.c optional ip17x dev/etherswitch/miiproxy.c optional miiproxy dev/etherswitch/rtl8366/rtl8366rb.c optional rtl8366rb +dev/etherswitch/e6000sw/e6000sw.c optional e6000sw dev/etherswitch/ukswitch/ukswitch.c optional ukswitch dev/evdev/cdev.c optional evdev dev/evdev/evdev.c optional evdev @@ -1698,8 +1699,10 @@ dev/iicbus/iiconf.c optional iicbus dev/iicbus/iicsmb.c optional iicsmb \ dependency "iicbus_if.h" dev/iicbus/iicoc.c optional iicoc +dev/iicbus/is31fl319x.c optional is31fl319x gpio dev/iicbus/lm75.c optional lm75 dev/iicbus/ofw_iicbus.c optional fdt iicbus +dev/iicbus/pca9552.c optional pca9552 gpio dev/iicbus/pcf8563.c optional pcf8563 dev/iicbus/s35390a.c optional s35390a dev/iir/iir.c optional iir diff --git a/sys/conf/options b/sys/conf/options index f52b390..b0d1b2e 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -877,6 +877,10 @@ MWL_DIAGAPI opt_mwl.h MWL_AGGR_SIZE opt_mwl.h MWL_TX_NODROP opt_mwl.h +# Options for the Marvell NETA driver +MVNETA_MULTIQUEUE opt_mvneta.h +MVNETA_KTR opt_mvneta.h + # Options for the Intel 802.11ac wireless driver IWM_DEBUG opt_iwm.h diff --git a/sys/conf/options.arm b/sys/conf/options.arm index d46f921..4e6aa74 100644 --- a/sys/conf/options.arm +++ b/sys/conf/options.arm @@ -3,6 +3,7 @@ ARMV6 opt_global.h ARM_CACHE_LOCK_ENABLE opt_global.h ARM_KERN_DIRECTMAP opt_vm.h ARM_L2_PIPT opt_global.h +ARM_L2_PREFETCH opt_global.h ARM_MANY_BOARD opt_global.h NKPT2PG opt_pmap.h ARM_WANT_TP_ADDRESS opt_global.h @@ -56,10 +57,7 @@ SOC_IMX6 opt_global.h SOC_MV_ARMADAXP opt_global.h SOC_MV_ARMADA38X opt_global.h SOC_MV_DISCOVERY opt_global.h -SOC_MV_DOVE opt_global.h -SOC_MV_FREY opt_global.h SOC_MV_KIRKWOOD opt_global.h -SOC_MV_LOKIPLUS opt_global.h SOC_MV_ORION opt_global.h SOC_OMAP3 opt_global.h SOC_OMAP4 opt_global.h diff --git a/sys/dev/ahci/ahci.c b/sys/dev/ahci/ahci.c index fa8b673..41ba932 100644 --- a/sys/dev/ahci/ahci.c +++ b/sys/dev/ahci/ahci.c @@ -1609,6 +1609,14 @@ ahci_execute_transaction(struct ahci_slot *slot) } /* + * Some Marvell controllers require additional time + * after soft reset to work properly. Setup delay + * to 50ms after soft reset. + */ + if (ch->quirks & AHCI_Q_MRVL_SR_DEL) + DELAY(50000); + + /* * Marvell HBAs with non-RAID firmware do not wait for * readiness after soft reset, so we have to wait here. * Marvell RAIDs do not have this problem, but instead diff --git a/sys/dev/ahci/ahci.h b/sys/dev/ahci/ahci.h index 0dd9a4d..b8b908f 100644 --- a/sys/dev/ahci/ahci.h +++ b/sys/dev/ahci/ahci.h @@ -598,6 +598,7 @@ enum ahci_err_type { #define AHCI_Q_FORCE_PI 0x00040000 #define AHCI_Q_RESTORE_CAP 0x00080000 #define AHCI_Q_NOMSIX 0x00100000 +#define AHCI_Q_MRVL_SR_DEL 0x00200000 #define AHCI_Q_NOCCS 0x00400000 #define AHCI_Q_NOAUX 0x00800000 @@ -624,6 +625,7 @@ enum ahci_err_type { "\023FORCE_PI" \ "\024RESTORE_CAP" \ "\025NOMSIX" \ + "\026MRVL_SR_DEL" \ "\027NOCCS" \ "\030NOAUX" diff --git a/sys/dev/ahci/ahci_mv_fdt.c b/sys/dev/ahci/ahci_mv_fdt.c new file mode 100644 index 0000000..cf5b853 --- /dev/null +++ b/sys/dev/ahci/ahci_mv_fdt.c @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2017 Semihalf. + * Copyright (c) 2017 Stormshield. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/stdint.h> +#include <sys/stddef.h> +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/bus.h> +#include <sys/module.h> +#include <sys/sysctl.h> +#include <sys/rman.h> +#include <sys/unistd.h> + +#include <machine/bus.h> +#include <machine/resource.h> + +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> + +#include <dev/ahci/ahci.h> + +#define AHCI_VENDOR_SPECIFIC_0_ADDR 0xa0 +#define AHCI_VENDOR_SPECIFIC_0_DATA 0xa4 + +#define AHCI_HC_DEVSTR "Marvell AHCI Controller" +#define AHCI_HC_VENDOR "Marvell" + +static device_attach_t ahci_mv_fdt_attach; + +static struct ofw_compat_data compatible_data[] = { + {"marvell,armada-380-ahci", true}, + {NULL, false} +}; + +static void +ahci_mv_regret_config(struct ahci_controller *ctlr) +{ + + /* + * Enable the regret bit to allow the SATA unit to regret + * a request that didn't receive an acknowledge + * and a avoid deadlock + */ + ATA_OUTL(ctlr->r_mem, AHCI_VENDOR_SPECIFIC_0_ADDR, 0x4); + ATA_OUTL(ctlr->r_mem, AHCI_VENDOR_SPECIFIC_0_DATA, 0x80); +} + +static int +ahci_mv_fdt_probe(device_t dev) +{ + + if (!ofw_bus_status_okay(dev)) + return (ENXIO); + + if (!ofw_bus_search_compatible(dev, compatible_data)->ocd_data) + return (ENXIO); + + device_set_desc(dev, AHCI_HC_DEVSTR); + + return (BUS_PROBE_DEFAULT); +} + +static int +ahci_mv_fdt_attach(device_t dev) +{ + struct ahci_controller *ctlr; + int rc; + + ctlr = device_get_softc(dev); + ctlr->dev = dev; + ctlr->r_rid = 0; + ctlr->quirks = AHCI_Q_2CH; + ctlr->numirqs = 1; + + if (ofw_bus_is_compatible(dev, "marvell,armada-380-ahci")) + ctlr->quirks |= AHCI_Q_MRVL_SR_DEL; + + /* Allocate memory for controller */ + ctlr->r_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &ctlr->r_rid, RF_ACTIVE | RF_SHAREABLE); + if (ctlr->r_mem == NULL) { + device_printf(dev, "Failed to alloc memory for controller\n"); + return (ENOMEM); + } + + /* Reset controller */ + rc = ahci_ctlr_reset(dev); + if (rc != 0) { + device_printf(dev, "Failed to reset controller\n"); + bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem); + return (ENXIO); + } + + ahci_mv_regret_config(ctlr); + + rc = ahci_attach(dev); + if (rc != 0) { + device_printf(dev, "Failed to initialize AHCI, with error %d\n", rc); + return (ENXIO); + } + + return (0); +} + +static device_method_t ahci_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, ahci_mv_fdt_probe), + DEVMETHOD(device_attach, ahci_mv_fdt_attach), + DEVMETHOD(device_detach, ahci_detach), + DEVMETHOD(bus_alloc_resource, ahci_alloc_resource), + DEVMETHOD(bus_release_resource, ahci_release_resource), + DEVMETHOD(bus_setup_intr, ahci_setup_intr), + DEVMETHOD(bus_teardown_intr, ahci_teardown_intr), + DEVMETHOD(bus_print_child, ahci_print_child), + DEVMETHOD(bus_child_location_str, ahci_child_location_str), + DEVMETHOD(bus_get_dma_tag, ahci_get_dma_tag), + DEVMETHOD_END +}; + +static devclass_t ahci_devclass; +static driver_t ahci_driver = { + "ahci", + ahci_methods, + sizeof(struct ahci_controller) +}; + +DRIVER_MODULE(ahci, simplebus, ahci_driver, ahci_devclass, NULL, NULL); +DRIVER_MODULE(ahci, ofwbus, ahci_driver, ahci_devclass, NULL, NULL); diff --git a/sys/dev/cesa/cesa.c b/sys/dev/cesa/cesa.c index 5b3b06c..3b74d70 100644 --- a/sys/dev/cesa/cesa.c +++ b/sys/dev/cesa/cesa.c @@ -69,7 +69,6 @@ __FBSDID("$FreeBSD$"); #include "cryptodev_if.h" #include <arm/mv/mvreg.h> -#include <arm/mv/mvwin.h> #include <arm/mv/mvvar.h> #include "cesa.h" @@ -80,7 +79,6 @@ static void cesa_intr(void *); static int cesa_newsession(device_t, u_int32_t *, struct cryptoini *); static int cesa_freesession(device_t, u_int64_t); static int cesa_process(device_t, struct cryptop *, int); -static int decode_win_cesa_setup(struct cesa_softc *sc); static struct resource_spec cesa_res_spec[] = { { SYS_RES_MEMORY, 0, RF_ACTIVE }, @@ -953,11 +951,13 @@ cesa_execute(struct cesa_softc *sc) ctd = STAILQ_FIRST(&cr->cr_tdesc); CESA_TDMA_WRITE(sc, CESA_TDMA_ND, ctd->ctd_cthd_paddr); -#if defined (SOC_MV_ARMADA38X) - CESA_REG_WRITE(sc, CESA_SA_CMD, CESA_SA_CMD_ACTVATE | CESA_SA_CMD_SHA2); -#else - CESA_REG_WRITE(sc, CESA_SA_CMD, CESA_SA_CMD_ACTVATE); -#endif + + if (sc->sc_soc_id == MV_DEV_88F6828 || + sc->sc_soc_id == MV_DEV_88F6820 || + sc->sc_soc_id == MV_DEV_88F6810) + CESA_REG_WRITE(sc, CESA_SA_CMD, CESA_SA_CMD_ACTVATE | CESA_SA_CMD_SHA2); + else + CESA_REG_WRITE(sc, CESA_SA_CMD, CESA_SA_CMD_ACTVATE); CESA_UNLOCK(sc, requests); } @@ -968,6 +968,7 @@ cesa_setup_sram(struct cesa_softc *sc) phandle_t sram_node; ihandle_t sram_ihandle; pcell_t sram_handle, sram_reg[2]; + void *sram_va; int rv; rv = OF_getencprop(ofw_bus_get_node(sc->sc_dev), "sram-handle", @@ -986,15 +987,17 @@ cesa_setup_sram(struct cesa_softc *sc) /* Store SRAM size to be able to unmap in detach() */ sc->sc_sram_size = sram_reg[1]; -#if defined(SOC_MV_ARMADA38X) - void *sram_va; + if (sc->sc_soc_id != MV_DEV_88F6828 && + sc->sc_soc_id != MV_DEV_88F6820 && + sc->sc_soc_id != MV_DEV_88F6810) + return (0); /* SRAM memory was not mapped in platform_sram_devmap(), map it now */ sram_va = pmap_mapdev(sc->sc_sram_base_pa, sc->sc_sram_size); if (sram_va == NULL) return (ENOMEM); sc->sc_sram_base_va = (vm_offset_t)sram_va; -#endif + return (0); } @@ -1018,7 +1021,7 @@ static int cesa_attach(device_t dev) { struct cesa_softc *sc; - uint32_t d, r; + uint32_t d, r, val; int error; int i; @@ -1027,34 +1030,40 @@ cesa_attach(device_t dev) sc->sc_error = 0; sc->sc_dev = dev; - /* Check if CESA peripheral device has power turned on */ -#if defined(SOC_MV_KIRKWOOD) - if (soc_power_ctrl_get(CPU_PM_CTRL_CRYPTO) == CPU_PM_CTRL_CRYPTO) { - device_printf(dev, "not powered on\n"); - return (ENXIO); - } -#else - if (soc_power_ctrl_get(CPU_PM_CTRL_CRYPTO) != CPU_PM_CTRL_CRYPTO) { - device_printf(dev, "not powered on\n"); - return (ENXIO); - } -#endif soc_id(&d, &r); switch (d) { case MV_DEV_88F6281: case MV_DEV_88F6282: + /* Check if CESA peripheral device has power turned on */ + if (soc_power_ctrl_get(CPU_PM_CTRL_CRYPTO) == + CPU_PM_CTRL_CRYPTO) { + device_printf(dev, "not powered on\n"); + return (ENXIO); + } + sc->sc_tperr = 0; + break; case MV_DEV_88F6828: + case MV_DEV_88F6820: + case MV_DEV_88F6810: sc->sc_tperr = 0; break; case MV_DEV_MV78100: case MV_DEV_MV78100_Z0: + /* Check if CESA peripheral device has power turned on */ + if (soc_power_ctrl_get(CPU_PM_CTRL_CRYPTO) != + CPU_PM_CTRL_CRYPTO) { + device_printf(dev, "not powered on\n"); + return (ENXIO); + } sc->sc_tperr = CESA_ICR_TPERR; break; default: return (ENXIO); } + sc->sc_soc_id = d; + /* Initialize mutexes */ mtx_init(&sc->sc_sc_lock, device_get_nameunit(dev), "CESA Shared Data", MTX_DEF); @@ -1074,13 +1083,6 @@ cesa_attach(device_t dev) goto err0; } - /* Setup CESA decoding windows */ - error = decode_win_cesa_setup(sc); - if (error) { - device_printf(dev, "could not setup decoding windows\n"); - goto err1; - } - /* Acquire SRAM base address */ error = cesa_setup_sram(sc); if (error) { @@ -1189,12 +1191,15 @@ cesa_attach(device_t dev) * - Outstanding reads enabled, * - No byte-swap. */ - CESA_TDMA_WRITE(sc, CESA_TDMA_CR, CESA_TDMA_CR_DBL128 | - CESA_TDMA_CR_SBL128 | CESA_TDMA_CR_ORDEN | CESA_TDMA_CR_NBS | -#if defined (SOC_MV_ARMADA38X) - CESA_TDMA_NUM_OUTSTAND | -#endif - CESA_TDMA_CR_ENABLE); + val = CESA_TDMA_CR_DBL128 | CESA_TDMA_CR_SBL128 | + CESA_TDMA_CR_ORDEN | CESA_TDMA_CR_NBS | CESA_TDMA_CR_ENABLE; + + if (sc->sc_soc_id == MV_DEV_88F6828 || + sc->sc_soc_id == MV_DEV_88F6820 || + sc->sc_soc_id == MV_DEV_88F6810) + val |= CESA_TDMA_NUM_OUTSTAND; + + CESA_TDMA_WRITE(sc, CESA_TDMA_CR, val); /* * Initialize SA: @@ -1228,7 +1233,10 @@ cesa_attach(device_t dev) crypto_register(sc->sc_cid, CRYPTO_MD5_HMAC, 0, 0); crypto_register(sc->sc_cid, CRYPTO_SHA1, 0, 0); crypto_register(sc->sc_cid, CRYPTO_SHA1_HMAC, 0, 0); - crypto_register(sc->sc_cid, CRYPTO_SHA2_256_HMAC, 0, 0); + if (sc->sc_soc_id == MV_DEV_88F6828 || + sc->sc_soc_id == MV_DEV_88F6820 || + sc->sc_soc_id == MV_DEV_88F6810) + crypto_register(sc->sc_cid, CRYPTO_SHA2_256_HMAC, 0, 0); return (0); err8: @@ -1246,9 +1254,10 @@ err4: err3: bus_teardown_intr(dev, sc->sc_res[RES_CESA_IRQ], sc->sc_icookie); err2: -#if defined(SOC_MV_ARMADA38X) - pmap_unmapdev(sc->sc_sram_base_va, sc->sc_sram_size); -#endif + if (sc->sc_soc_id == MV_DEV_88F6828 || + sc->sc_soc_id == MV_DEV_88F6820 || + sc->sc_soc_id == MV_DEV_88F6810) + pmap_unmapdev(sc->sc_sram_base_va, sc->sc_sram_size); err1: bus_release_resources(dev, cesa_res_spec, sc->sc_res); err0: @@ -1296,10 +1305,12 @@ cesa_detach(device_t dev) /* Relase I/O and IRQ resources */ bus_release_resources(dev, cesa_res_spec, sc->sc_res); -#if defined(SOC_MV_ARMADA38X) /* Unmap SRAM memory */ - pmap_unmapdev(sc->sc_sram_base_va, sc->sc_sram_size); -#endif + if (sc->sc_soc_id == MV_DEV_88F6828 || + sc->sc_soc_id == MV_DEV_88F6820 || + sc->sc_soc_id == MV_DEV_88F6810) + pmap_unmapdev(sc->sc_sram_base_va, sc->sc_sram_size); + /* Destroy mutexes */ mtx_destroy(&sc->sc_sessions_lock); mtx_destroy(&sc->sc_requests_lock); @@ -1686,50 +1697,3 @@ cesa_process(device_t dev, struct cryptop *crp, int hint) return (0); } - -/* - * Set CESA TDMA decode windows. - */ -static int -decode_win_cesa_setup(struct cesa_softc *sc) -{ - struct mem_region availmem_regions[FDT_MEM_REGIONS]; - int availmem_regions_sz; - uint32_t br, cr, i; - - /* Grab physical memory regions information from DTS */ - if (fdt_get_mem_regions(availmem_regions, &availmem_regions_sz, - NULL) != 0) - return (ENXIO); - - if (availmem_regions_sz > MV_WIN_CESA_MAX) { - device_printf(sc->sc_dev, "Too much memory regions, cannot " - " set CESA windows to cover whole DRAM \n"); - return (ENXIO); - } - - /* Disable and clear all CESA windows */ - for (i = 0; i < MV_WIN_CESA_MAX; i++) { - CESA_TDMA_WRITE(sc, MV_WIN_CESA_BASE(i), 0); - CESA_TDMA_WRITE(sc, MV_WIN_CESA_CTRL(i), 0); - } - - /* Fill CESA TDMA decoding windows with information acquired from DTS */ - for (i = 0; i < availmem_regions_sz; i++) { - br = availmem_regions[i].mr_start; - cr = availmem_regions[i].mr_size; - - /* Don't add entries with size lower than 64KB */ - if (cr & 0xffff0000) { - cr = (((cr - 1) & 0xffff0000) | - (MV_WIN_DDR_ATTR(i) << MV_WIN_CPU_ATTR_SHIFT) | - (MV_WIN_DDR_TARGET << MV_WIN_CPU_TARGET_SHIFT) | - MV_WIN_CPU_ENABLE_BIT); - CESA_TDMA_WRITE(sc, MV_WIN_CESA_BASE(i), br); - CESA_TDMA_WRITE(sc, MV_WIN_CESA_CTRL(i), cr); - } - } - - return (0); -} - diff --git a/sys/dev/cesa/cesa.h b/sys/dev/cesa/cesa.h index e8f6372..eb1342a 100644 --- a/sys/dev/cesa/cesa.h +++ b/sys/dev/cesa/cesa.h @@ -61,8 +61,8 @@ */ /* Values below are optimized for requests containing about 1.5 kB of data */ -#define CESA_SA_DESC_PER_REQ 2 -#define CESA_TDMA_DESC_PER_REQ 8 +#define CESA_SA_DESC_PER_REQ 8 +#define CESA_TDMA_DESC_PER_REQ 32 #define CESA_SA_DESCRIPTORS (CESA_SA_DESC_PER_REQ * CESA_REQUESTS) #define CESA_TDMA_DESCRIPTORS (CESA_TDMA_DESC_PER_REQ * CESA_REQUESTS) @@ -231,6 +231,7 @@ struct cesa_packet { struct cesa_softc { device_t sc_dev; int32_t sc_cid; + uint32_t sc_soc_id; struct resource *sc_res[RES_CESA_NUM]; void *sc_icookie; bus_dma_tag_t sc_data_dtag; @@ -335,10 +336,7 @@ struct cesa_chain_info { #define CESA_TDMA_CR_ENABLE (1 << 12) #define CESA_TDMA_CR_FETCHND (1 << 13) #define CESA_TDMA_CR_ACTIVE (1 << 14) - -#if defined (SOC_MV_ARMADA38X) #define CESA_TDMA_NUM_OUTSTAND (2 << 16) -#endif #define CESA_TDMA_ECR 0x08C8 #define CESA_TDMA_ECR_MISS (1 << 0) @@ -352,18 +350,10 @@ struct cesa_chain_info { #define CESA_TDMA_EMR_BOTH_HIT CESA_TDMA_ECR_BOTH_HIT #define CESA_TDMA_EMR_DATA_ERROR CESA_TDMA_ECR_DATA_ERROR -/* CESA TDMA address decoding registers */ -#define MV_WIN_CESA_CTRL(n) (0x8 * (n) + 0xA04) -#define MV_WIN_CESA_BASE(n) (0x8 * (n) + 0xA00) -#define MV_WIN_CESA_MAX 4 - /* CESA SA registers definitions */ #define CESA_SA_CMD 0x0E00 #define CESA_SA_CMD_ACTVATE (1 << 0) - -#if defined (SOC_MV_ARMADA38X) #define CESA_SA_CMD_SHA2 (1 << 31) -#endif #define CESA_SA_DPR 0x0E04 diff --git a/sys/dev/etherswitch/arswitch/arswitch.c b/sys/dev/etherswitch/arswitch/arswitch.c index 8396f8a..82f22ad 100644 --- a/sys/dev/etherswitch/arswitch/arswitch.c +++ b/sys/dev/etherswitch/arswitch/arswitch.c @@ -77,6 +77,14 @@ static SYSCTL_NODE(_debug, OID_AUTO, arswitch, CTLFLAG_RD, 0, "arswitch"); #endif +/* Map ETHERSWITCH_PORT_LED_* to Atheros pattern codes */ +static int led_pattern_table[] = { + [ETHERSWITCH_PORT_LED_DEFAULT] = 0x3, + [ETHERSWITCH_PORT_LED_ON] = 0x2, + [ETHERSWITCH_PORT_LED_OFF] = 0x0, + [ETHERSWITCH_PORT_LED_BLINK] = 0x1 +}; + static inline int arswitch_portforphy(int phy); static void arswitch_tick(void *arg); static int arswitch_ifmedia_upd(struct ifnet *); @@ -85,6 +93,8 @@ static int ar8xxx_port_vlan_setup(struct arswitch_softc *sc, etherswitch_port_t *p); static int ar8xxx_port_vlan_get(struct arswitch_softc *sc, etherswitch_port_t *p); +static int arswitch_setled(struct arswitch_softc *sc, int phy, int led, + int style); static int arswitch_probe(device_t dev) @@ -188,9 +198,23 @@ arswitch_attach_phys(struct arswitch_softc *sc) device_printf(sc->sc_dev, "attaching PHY %d failed\n", phy); + return (err); + } + + if (AR8X16_IS_SWITCH(sc, AR8327)) { + int led; + char ledname[IFNAMSIZ+4]; + + for (led = 0; led < 3; led++) { + sprintf(ledname, "%s%dled%d", name, + arswitch_portforphy(phy), led+1); + sc->dev_led[phy][led].sc = sc; + sc->dev_led[phy][led].phy = phy; + sc->dev_led[phy][led].lednum = led; + } } } - return (err); + return (0); } static int @@ -683,6 +707,38 @@ arswitch_getport(device_t dev, etherswitch_port_t *p) } else { return (ENXIO); } + + if (!arswitch_is_cpuport(sc, p->es_port) && + AR8X16_IS_SWITCH(sc, AR8327)) { + int led; + p->es_nleds = 3; + + for (led = 0; led < p->es_nleds; led++) + { + int style; + uint32_t val; + + /* Find the right style enum for our pattern */ + val = arswitch_readreg(dev, + ar8327_led_mapping[p->es_port-1][led].reg); + val = (val>>ar8327_led_mapping[p->es_port-1][led].shift)&0x03; + + for (style = 0; style < ETHERSWITCH_PORT_LED_MAX; style++) + { + if (led_pattern_table[style] == val) break; + } + + /* can't happen */ + if (style == ETHERSWITCH_PORT_LED_MAX) + style = ETHERSWITCH_PORT_LED_DEFAULT; + + p->es_led[led] = style; + } + } else + { + p->es_nleds = 0; + } + return (0); } @@ -727,7 +783,7 @@ ar8xxx_port_vlan_setup(struct arswitch_softc *sc, etherswitch_port_t *p) static int arswitch_setport(device_t dev, etherswitch_port_t *p) { - int err; + int err, i; struct arswitch_softc *sc; struct ifmedia *ifm; struct mii_data *mii; @@ -744,9 +800,20 @@ arswitch_setport(device_t dev, etherswitch_port_t *p) return (err); } - /* Do not allow media changes on CPU port. */ + /* Do not allow media or led changes on CPU port. */ if (arswitch_is_cpuport(sc, p->es_port)) return (0); + + if (AR8X16_IS_SWITCH(sc, AR8327)) + { + for (i = 0; i < 3; i++) + { + int err; + err = arswitch_setled(sc, p->es_port-1, i, p->es_led[i]); + if (err) + return (err); + } + } mii = arswitch_miiforport(sc, p->es_port); if (mii == NULL) @@ -758,6 +825,23 @@ arswitch_setport(device_t dev, etherswitch_port_t *p) return (ifmedia_ioctl(ifp, &p->es_ifr, ifm, SIOCSIFMEDIA)); } +static int +arswitch_setled(struct arswitch_softc *sc, int phy, int led, int style) +{ + int shift; + + if (phy < 0 || phy > sc->numphys) + return EINVAL; + + if (style < 0 || style > ETHERSWITCH_PORT_LED_MAX) + return (EINVAL); + + shift = ar8327_led_mapping[phy][led].shift; + return (arswitch_modifyreg(sc->sc_dev, + ar8327_led_mapping[phy][led].reg, + 0x03 << shift, led_pattern_table[style] << shift)); +} + static void arswitch_statchg(device_t dev) { diff --git a/sys/dev/etherswitch/arswitch/arswitch_8327.c b/sys/dev/etherswitch/arswitch/arswitch_8327.c index 92e44fc..b0b2541 100644 --- a/sys/dev/etherswitch/arswitch/arswitch_8327.c +++ b/sys/dev/etherswitch/arswitch/arswitch_8327.c @@ -75,6 +75,36 @@ * lead to traffic storms/loops. */ +/* Map port+led to register+shift */ +struct ar8327_led_mapping ar8327_led_mapping[AR8327_NUM_PHYS][ETHERSWITCH_PORT_MAX_LEDS] = +{ + { /* PHY0 */ + {AR8327_REG_LED_CTRL0, 14 }, + {AR8327_REG_LED_CTRL1, 14 }, + {AR8327_REG_LED_CTRL2, 14 } + }, + { /* PHY1 */ + {AR8327_REG_LED_CTRL3, 8 }, + {AR8327_REG_LED_CTRL3, 10 }, + {AR8327_REG_LED_CTRL3, 12 } + }, + { /* PHY2 */ + {AR8327_REG_LED_CTRL3, 14 }, + {AR8327_REG_LED_CTRL3, 16 }, + {AR8327_REG_LED_CTRL3, 18 } + }, + { /* PHY3 */ + {AR8327_REG_LED_CTRL3, 20 }, + {AR8327_REG_LED_CTRL3, 22 }, + {AR8327_REG_LED_CTRL3, 24 } + }, + { /* PHY4 */ + {AR8327_REG_LED_CTRL0, 30 }, + {AR8327_REG_LED_CTRL1, 30 }, + {AR8327_REG_LED_CTRL2, 30 } + } +}; + static int ar8327_vlan_op(struct arswitch_softc *sc, uint32_t op, uint32_t vid, uint32_t data) diff --git a/sys/dev/etherswitch/arswitch/arswitch_8327.h b/sys/dev/etherswitch/arswitch/arswitch_8327.h index 1f35d96..e2a5f5e 100644 --- a/sys/dev/etherswitch/arswitch/arswitch_8327.h +++ b/sys/dev/etherswitch/arswitch/arswitch_8327.h @@ -85,6 +85,11 @@ struct ar8327_port_cfg { uint32_t rxpause; }; +extern struct ar8327_led_mapping { + int reg; + int shift; +} ar8327_led_mapping[AR8327_NUM_PHYS][ETHERSWITCH_PORT_MAX_LEDS]; + extern void ar8327_attach(struct arswitch_softc *sc); #endif /* __ARSWITCH_8327_H__ */ diff --git a/sys/dev/etherswitch/arswitch/arswitchvar.h b/sys/dev/etherswitch/arswitch/arswitchvar.h index a322a4f..dc08799 100644 --- a/sys/dev/etherswitch/arswitch/arswitchvar.h +++ b/sys/dev/etherswitch/arswitch/arswitchvar.h @@ -48,6 +48,15 @@ typedef enum { #define ARSWITCH_NUM_PORTS MAX(AR8327_NUM_PORTS, AR8X16_NUM_PORTS) #define ARSWITCH_NUM_PHYS MAX(AR8327_NUM_PHYS, AR8X16_NUM_PHYS) +#define ARSWITCH_NUM_LEDS 3 + +struct arswitch_dev_led { + struct arswitch_softc *sc; + struct cdev *led; + int phy; + int lednum; +}; + struct arswitch_softc { struct mtx sc_mtx; /* serialize access to softc */ device_t sc_dev; @@ -66,6 +75,7 @@ struct arswitch_softc { char *ifname[ARSWITCH_NUM_PHYS]; device_t miibus[ARSWITCH_NUM_PHYS]; struct ifnet *ifp[ARSWITCH_NUM_PHYS]; + struct arswitch_dev_led dev_led[ARSWITCH_NUM_PHYS][ARSWITCH_NUM_LEDS]; struct callout callout_tick; etherswitch_info_t info; diff --git a/sys/dev/etherswitch/e6000sw/e6000sw.c b/sys/dev/etherswitch/e6000sw/e6000sw.c index ae552b8..2739dd9 100644 --- a/sys/dev/etherswitch/e6000sw/e6000sw.c +++ b/sys/dev/etherswitch/e6000sw/e6000sw.c @@ -28,36 +28,32 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#include <sys/types.h> +#include "opt_platform.h" + #include <sys/param.h> -#include <sys/systm.h> -#include <sys/sockio.h> +#include <sys/bus.h> +#include <sys/errno.h> #include <sys/kernel.h> #include <sys/kthread.h> -#include <sys/socket.h> #include <sys/module.h> -#include <sys/errno.h> -#include <sys/bus.h> -#include <sys/conf.h> -#include <sys/uio.h> -#include <sys/fcntl.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/sysctl.h> #include <net/if.h> #include <net/if_media.h> #include <net/if_types.h> -#include <machine/bus.h> -#include <machine/resource.h> - -#include <arm/mv/mvwin.h> -#include <arm/mv/mvreg.h> -#include <arm/mv/mvvar.h> - #include <dev/etherswitch/etherswitch.h> -#include <dev/mdio/mdio.h> #include <dev/mii/mii.h> #include <dev/mii/miivar.h> -#include <dev/mge/if_mgevar.h> + +#ifdef FDT +#include <dev/fdt/fdt_common.h> +#include <dev/ofw/ofw_bus.h> +#else +#include <machine/stdarg.h> +#endif #include "e6000swreg.h" #include "etherswitch_if.h" @@ -67,78 +63,116 @@ __FBSDID("$FreeBSD$"); MALLOC_DECLARE(M_E6000SW); MALLOC_DEFINE(M_E6000SW, "e6000sw", "e6000sw switch"); -#define E6000SW_LOCK(_sc) \ - sx_xlock(&(_sc)->sx) -#define E6000SW_UNLOCK(_sc) \ - sx_unlock(&(_sc)->sx) -#define E6000SW_LOCK_ASSERT(_sc, _what) \ - sx_assert(&(_sc)->sx, (_what)) -#define E6000SW_TRYLOCK(_sc) \ - sx_tryxlock(&(_sc)->sx) +#define E6000SW_LOCK(_sc) sx_xlock(&(_sc)->sx) +#define E6000SW_UNLOCK(_sc) sx_unlock(&(_sc)->sx) +#define E6000SW_LOCK_ASSERT(_sc, _what) sx_assert(&(_sc)->sx, (_what)) +#define E6000SW_TRYLOCK(_sc) sx_tryxlock(&(_sc)->sx) +#define E6000SW_MULTICHIP(_sc) (((_sc)->sw_addr != 0) ? 1 : 0) +#define E6000SW_WAITREADY(_sc, _reg, _bit) \ + e6000sw_waitready((_sc), REG_GLOBAL, (_reg), (_bit)) +#define E6000SW_WAITREADY2(_sc, _reg, _bit) \ + e6000sw_waitready((_sc), REG_GLOBAL2, (_reg), (_bit)) +#define MDIO_READ(dev, addr, reg) \ + MDIO_READREG(device_get_parent(dev), (addr), (reg)) +#define MDIO_WRITE(dev, addr, reg, val) \ + MDIO_WRITEREG(device_get_parent(dev), (addr), (reg), (val)) typedef struct e6000sw_softc { device_t dev; +#ifdef FDT + phandle_t node; +#endif struct sx sx; - struct ifnet *ifp[E6000SW_NUM_PHYS]; - char *ifname[E6000SW_NUM_PHYS]; - device_t miibus[E6000SW_NUM_PHYS]; - struct mii_data *mii[E6000SW_NUM_PHYS]; - struct callout tick_callout; - + struct ifnet *ifp[E6000SW_MAX_PORTS]; + char *ifname[E6000SW_MAX_PORTS]; + device_t miibus[E6000SW_MAX_PORTS]; + struct proc *kproc; + + int vlans[E6000SW_NUM_VLANS]; + uint32_t swid; + uint32_t vlan_mode; uint32_t cpuports_mask; - - int vid[E6000SW_NUM_VGROUPS]; - int members[E6000SW_NUM_VGROUPS]; - int vgroup[E6000SW_NUM_PORTS]; + uint32_t fixed_mask; + uint32_t fixed25_mask; + uint32_t used_mask; + uint32_t ports_mask; + int phy_base; /* SMI base addr of PHY regs */ + int port_base; /* SMI base addr of port regs */ + int sw_addr; + int num_laggs; + int num_ports; + + ssize_t iosize; + void *iobuf; } e6000sw_softc_t; static etherswitch_info_t etherswitch_info = { - .es_nports = E6000SW_NUM_PORTS, - .es_nvlangroups = E6000SW_NUM_VGROUPS, + .es_nports = 0, + .es_nlaggroups = 0, + .es_nvlangroups = 0, + .es_vlan_caps = ETHERSWITCH_VLAN_PORT | ETHERSWITCH_VLAN_DOT1Q, + .es_switch_caps = ETHERSWITCH_CAPS_PORTS_MASK | + ETHERSWITCH_CAPS_PSTATE | ETHERSWITCH_CAPS_LAGG, .es_name = "Marvell 6000 series switch" }; -static void e6000sw_identify(driver_t *driver, device_t parent); -static int e6000sw_probe(device_t dev); -static int e6000sw_attach(device_t dev); -static int e6000sw_detach(device_t dev); -static int e6000sw_readphy(device_t dev, int phy, int reg); -static int e6000sw_writephy(device_t dev, int phy, int reg, int data); -static etherswitch_info_t* e6000sw_getinfo(device_t dev); -static void e6000sw_lock(device_t dev); -static void e6000sw_unlock(device_t dev); -static int e6000sw_getport(device_t dev, etherswitch_port_t *p); -static int e6000sw_setport(device_t dev, etherswitch_port_t *p); -static int e6000sw_readreg_wrapper(device_t dev, int addr_reg); -static int e6000sw_writereg_wrapper(device_t dev, int addr_reg, int val); -static int e6000sw_readphy_wrapper(device_t dev, int phy, int reg); -static int e6000sw_writephy_wrapper(device_t dev, int phy, int reg, int data); -static int e6000sw_getvgroup_wrapper(device_t dev, etherswitch_vlangroup_t *vg); -static int e6000sw_setvgroup_wrapper(device_t dev, etherswitch_vlangroup_t *vg); -static int e6000sw_setvgroup(device_t dev, etherswitch_vlangroup_t *vg); -static int e6000sw_getvgroup(device_t dev, etherswitch_vlangroup_t *vg); -static void e6000sw_setup(device_t dev, e6000sw_softc_t *sc); -static void e6000sw_port_vlan_conf(e6000sw_softc_t *sc); -static void e6000sw_tick(void *arg); -static void e6000sw_set_atustat(device_t dev, e6000sw_softc_t *sc, int bin, - int flag); -static int e6000sw_atu_flush(device_t dev, e6000sw_softc_t *sc, int flag); -static __inline void e6000sw_writereg(e6000sw_softc_t *sc, int addr, int reg, - int val); -static __inline uint32_t e6000sw_readreg(e6000sw_softc_t *sc, int addr, - int reg); -static int e6000sw_ifmedia_upd(struct ifnet *ifp); -static void e6000sw_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); -static int e6000sw_atu_mac_table(device_t dev, e6000sw_softc_t *sc, struct - atu_opt *atu, int flag); -static int e6000sw_get_pvid(e6000sw_softc_t *sc, int port, int *pvid); -static int e6000sw_set_pvid(e6000sw_softc_t *sc, int port, int pvid); -static __inline int e6000sw_cpuport(e6000sw_softc_t *sc, int port); -static __inline struct mii_data *e6000sw_miiforphy(e6000sw_softc_t *sc, - unsigned int phy); - -static struct proc *e6000sw_kproc; +static void e6000sw_identify(driver_t *, device_t); +static int e6000sw_probe(device_t); +static int e6000sw_attach(device_t); +static int e6000sw_detach(device_t); +static int e6000sw_read_xmdio(device_t, int, int, int); +static int e6000sw_write_xmdio(device_t, int, int, int, int); +static int e6000sw_readphy(device_t, int, int); +static int e6000sw_writephy(device_t, int, int, int); +static etherswitch_info_t* e6000sw_getinfo(device_t); +static int e6000sw_getconf(device_t, etherswitch_conf_t *); +static int e6000sw_setconf(device_t, etherswitch_conf_t *); +static void e6000sw_lock(device_t); +static void e6000sw_unlock(device_t); +static int e6000sw_getport(device_t, etherswitch_port_t *); +static int e6000sw_setport(device_t, etherswitch_port_t *); +static int e6000sw_set_vlan_mode(e6000sw_softc_t *, uint32_t); +static int e6000sw_readreg_wrapper(device_t, int); +static int e6000sw_writereg_wrapper(device_t, int, int); +static int e6000sw_readphy_wrapper(device_t, int, int); +static int e6000sw_writephy_wrapper(device_t, int, int, int); +static int e6000sw_getvgroup_wrapper(device_t, etherswitch_vlangroup_t *); +static int e6000sw_setvgroup_wrapper(device_t, etherswitch_vlangroup_t *); +static int e6000sw_setvgroup(device_t, etherswitch_vlangroup_t *); +static int e6000sw_getvgroup(device_t, etherswitch_vlangroup_t *); +static int e6000sw_resetlagg(e6000sw_softc_t *); +static int e6000sw_getlaggroup_wrapper(device_t, etherswitch_laggroup_t *); +static int e6000sw_setlaggroup_wrapper(device_t, etherswitch_laggroup_t *); +static int e6000sw_setlaggroup(device_t, etherswitch_laggroup_t *); +static int e6000sw_getlaggroup(device_t, etherswitch_laggroup_t *); +static ssize_t e6000sw_getiosize(device_t); +static ssize_t e6000sw_getioblksize(device_t); +static void *e6000sw_getiobuf(device_t); +static int e6000sw_ioread(device_t, off_t, ssize_t); +static int e6000sw_iowrite(device_t, off_t, ssize_t); +static void e6000sw_setup(device_t, e6000sw_softc_t *); +static void e6000sw_tick(void *); +static void e6000sw_set_atustat(device_t, e6000sw_softc_t *, int, int); +static int e6000sw_atu_flush(device_t, e6000sw_softc_t *, int); +static __inline void e6000sw_writereg(e6000sw_softc_t *, int, int, int); +static __inline uint32_t e6000sw_readreg(e6000sw_softc_t *, int, int); +static int e6000sw_ifmedia_upd(struct ifnet *); +static void e6000sw_ifmedia_sts(struct ifnet *, struct ifmediareq *); +static int e6000sw_atu_mac_table(device_t, e6000sw_softc_t *, struct atu_opt *, + int); +static int e6000sw_vtu_flush(e6000sw_softc_t *); +static int e6000sw_vtu_update(e6000sw_softc_t *, int, int, int, int, int); +static int e6000sw_waitready(e6000sw_softc_t *, uint32_t, uint32_t, uint32_t); +static void e6000sw_get_pvid(e6000sw_softc_t *, int, int *); +static void e6000sw_set_pvid(e6000sw_softc_t *, int, int); +static __inline bool e6000sw_is_cpuport(e6000sw_softc_t *, int); +static __inline bool e6000sw_is_fixedport(e6000sw_softc_t *, int); +static __inline bool e6000sw_is_fixed25port(e6000sw_softc_t *, int); +static __inline bool e6000sw_is_phyport(e6000sw_softc_t *, int); +static __inline bool e6000sw_is_portenabled(e6000sw_softc_t *, int); +static __inline struct mii_data *e6000sw_miiforphy(e6000sw_softc_t *, + unsigned int); static device_method_t e6000sw_methods[] = { /* device interface */ @@ -156,6 +190,8 @@ static device_method_t e6000sw_methods[] = { /* etherswitch interface */ DEVMETHOD(etherswitch_getinfo, e6000sw_getinfo), + DEVMETHOD(etherswitch_getconf, e6000sw_getconf), + DEVMETHOD(etherswitch_setconf, e6000sw_setconf), DEVMETHOD(etherswitch_lock, e6000sw_lock), DEVMETHOD(etherswitch_unlock, e6000sw_unlock), DEVMETHOD(etherswitch_getport, e6000sw_getport), @@ -166,6 +202,13 @@ static device_method_t e6000sw_methods[] = { DEVMETHOD(etherswitch_writephyreg, e6000sw_writephy_wrapper), DEVMETHOD(etherswitch_setvgroup, e6000sw_setvgroup_wrapper), DEVMETHOD(etherswitch_getvgroup, e6000sw_getvgroup_wrapper), + DEVMETHOD(etherswitch_setlaggroup, e6000sw_setlaggroup_wrapper), + DEVMETHOD(etherswitch_getlaggroup, e6000sw_getlaggroup_wrapper), + DEVMETHOD(etherswitch_getioblksize, e6000sw_getioblksize), + DEVMETHOD(etherswitch_getiosize, e6000sw_getiosize), + DEVMETHOD(etherswitch_getiobuf, e6000sw_getiobuf), + DEVMETHOD(etherswitch_ioread, e6000sw_ioread), + DEVMETHOD(etherswitch_iowrite, e6000sw_iowrite), DEVMETHOD_END }; @@ -181,6 +224,145 @@ DRIVER_MODULE(etherswitch, e6000sw, etherswitch_driver, etherswitch_devclass, 0, DRIVER_MODULE(miibus, e6000sw, miibus_driver, miibus_devclass, 0, 0); MODULE_DEPEND(e6000sw, mdio, 1, 1, 1); +static SYSCTL_NODE(_hw, OID_AUTO, e6000sw, CTLFLAG_RD, 0, + "Marvell E6000 series Switch Parameters"); + +static int e6000sw_eeprom_wp = TRUE; +SYSCTL_INT(_hw_e6000sw, OID_AUTO, eeprom_wp, CTLFLAG_RDTUN, &e6000sw_eeprom_wp, + 0, "Enable eeprom write protect."); +static int e6000sw_default_disabled = FALSE; +SYSCTL_INT(_hw_e6000sw, OID_AUTO, default_disabled, CTLFLAG_RDTUN, + &e6000sw_default_disabled, 0, "Keep ports disabled at boot."); + +#undef E6000SW_DEBUG +#if defined(E6000SW_DEBUG) +static void +e6000sw_atu_dump(e6000sw_softc_t *sc, int fid) +{ + uint16_t data, mac1, mac2, mac3, reg; + + if (E6000SW_WAITREADY(sc, ATU_OPERATION, ATU_UNIT_BUSY)) { + device_printf(sc->dev, "ATU unit is busy, cannot access\n"); + return; + } + + /* Set the start MAC address and FID. */ + e6000sw_writereg(sc, REG_GLOBAL, ATU_FID, fid); + e6000sw_writereg(sc, REG_GLOBAL, ATU_DATA, 0); + e6000sw_writereg(sc, REG_GLOBAL, ATU_MAC_ADDR01, 0); + e6000sw_writereg(sc, REG_GLOBAL, ATU_MAC_ADDR23, 0); + e6000sw_writereg(sc, REG_GLOBAL, ATU_MAC_ADDR45, 0); + reg = e6000sw_readreg(sc, REG_GLOBAL, ATU_OPERATION) & ~ATU_OP_MASK; + e6000sw_writereg(sc, REG_GLOBAL, ATU_OPERATION, reg | GET_NEXT_IN_FIB); + for (;;) { + reg = e6000sw_readreg(sc, REG_GLOBAL, ATU_OPERATION); + if ((reg & VTU_OP_MASK) != GET_NEXT_IN_FIB) { + device_printf(sc->dev, "Out of sync!\n"); + return; + } + e6000sw_writereg(sc, REG_GLOBAL, ATU_OPERATION, + reg | ATU_UNIT_BUSY); + if (E6000SW_WAITREADY(sc, ATU_OPERATION, ATU_UNIT_BUSY)) { + device_printf(sc->dev, "Timeout while reading\n"); + return; + } + data = e6000sw_readreg(sc, REG_GLOBAL, ATU_DATA); + if ((data & ATU_STATE_MASK) == 0) + return; + + mac1 = e6000sw_readreg(sc, REG_GLOBAL, ATU_MAC_ADDR01); + mac2 = e6000sw_readreg(sc, REG_GLOBAL, ATU_MAC_ADDR23); + mac3 = e6000sw_readreg(sc, REG_GLOBAL, ATU_MAC_ADDR45); + if (data & ATU_DATA_LAG) + device_printf(sc->dev, "fid: %4d lag: %3d ", fid, + (data & ATU_LAG_MASK) >> ATU_LAG_SHIFT); + else + device_printf(sc->dev, "fid: %4d port: %2d ", fid, + ffs((data & ATU_PORT_MASK(sc)) >> ATU_PORT_SHIFT) - 1); + printf("MAC: %02x:%02x:%02x:%02x:%02x:%02x (%#x)\n", + (mac1 >> 8) & 0xff, mac1 & 0xff, + (mac2 >> 8) & 0xff, mac2 & 0xff, + (mac3 >> 8) & 0xff, mac3 & 0xff, data); + } +} + +#define E6000SW_BUFSZ 32 + +static void +e6000sw_vtu_dump(e6000sw_softc_t *sc) +{ + char *buf, discard[E6000SW_BUFSZ], tagged[E6000SW_BUFSZ]; + char unmodified[E6000SW_BUFSZ], untagged[E6000SW_BUFSZ]; + char tmp[E6000SW_BUFSZ]; + int i, port, vlan; + uint32_t reg; + + if (E6000SW_WAITREADY(sc, VTU_OPERATION, VTU_BUSY)) { + device_printf(sc->dev, "VTU unit is busy, cannot access\n"); + return; + } + + /* Start at VID 1. */ + e6000sw_writereg(sc, REG_GLOBAL, VTU_VID, 0); + reg = e6000sw_readreg(sc, REG_GLOBAL, VTU_OPERATION) & ~VTU_OP_MASK; + e6000sw_writereg(sc, REG_GLOBAL, VTU_OPERATION, reg | VTU_GET_NEXT); + for (;;) { + reg = e6000sw_readreg(sc, REG_GLOBAL, VTU_OPERATION); + if ((reg & VTU_OP_MASK) != VTU_GET_NEXT) { + device_printf(sc->dev, "Out of sync!\n"); + return; + } + e6000sw_writereg(sc, REG_GLOBAL, VTU_OPERATION, reg | VTU_BUSY); + if (E6000SW_WAITREADY(sc, VTU_OPERATION, VTU_BUSY)) { + device_printf(sc->dev, "Timeout while reading\n"); + return; + } + + vlan = e6000sw_readreg(sc, REG_GLOBAL, VTU_VID); + if (vlan == VTU_VID_MASK || (vlan & VTU_VID_VALID) == 0) + return; + + memset(discard, 0, sizeof(discard)); + memset(tagged, 0, sizeof(tagged)); + memset(unmodified, 0, sizeof(unmodified)); + memset(untagged, 0, sizeof(untagged)); + reg = e6000sw_readreg(sc, REG_GLOBAL, VTU_DATA); + for (i = 0; i < sc->num_ports; i++) { + if (i == VTU_PPREG(sc)) + reg = e6000sw_readreg(sc, REG_GLOBAL, VTU_DATA2); + port = (reg >> VTU_PORT(sc, i)) & VTU_PORT_MASK; + if (port == VTU_PORT_UNMODIFIED) + buf = unmodified; + else if (port == VTU_PORT_UNTAGGED) + buf = untagged; + else if (port == VTU_PORT_TAGGED) + buf = tagged; + else if (port == VTU_PORT_DISCARD) + buf = discard; + else + buf = NULL; + memset(tmp, 0, sizeof(tmp)); + snprintf(tmp, sizeof(tmp), "%d", i); + if (buf != NULL) { + if (strlen(buf) > 0) + strlcat(buf, ",", E6000SW_BUFSZ); + strlcat(buf, tmp, E6000SW_BUFSZ); + } + } + + reg = e6000sw_readreg(sc, REG_GLOBAL, VTU_FID); + device_printf(sc->dev, + "fid: %4d%s vlan: %4d discard: %22s tagged: %22s untagged: %22s unmodified: %22s\n", + reg & VTU_FID_MASK(sc), (reg & VTU_FID_POLICY) ? "*" : "", + vlan & VTU_VID_MASK, + strlen(discard) > 0 ? discard : "none", + strlen(tagged) > 0 ? tagged : "none", + strlen(untagged) > 0 ? untagged : "none", + strlen(unmodified) > 0 ? unmodified : "none"); + } +} +#endif + static void e6000sw_identify(driver_t *driver, device_t parent) { @@ -192,108 +374,484 @@ e6000sw_identify(driver_t *driver, device_t parent) static int e6000sw_probe(device_t dev) { + int is_8190; e6000sw_softc_t *sc; const char *description; - unsigned int id; +#ifdef FDT + phandle_t dsa_node, switch_node; +#endif + is_8190 = 0; sc = device_get_softc(dev); - bzero(sc, sizeof(e6000sw_softc_t)); sc->dev = dev; - /* Lock is necessary due to assertions. */ - sx_init(&sc->sx, "e6000sw"); - E6000SW_LOCK(sc); - id = e6000sw_readreg(sc, REG_PORT(0), SWITCH_ID); + /* Do not set iosize until iobuf is ready. */ + sc->iosize = -1; + sc->iobuf = NULL; + +#ifdef FDT + dsa_node = fdt_find_compatible(OF_finddevice("/"), + "marvell,dsa", 0); + switch_node = OF_child(dsa_node); + if (switch_node == 0) + return (ENXIO); + sc->node = switch_node; + + if (OF_getencprop(sc->node, "reg", &sc->sw_addr, + sizeof(sc->sw_addr)) < 0) + return (ENXIO); +#else + if (resource_int_value(device_get_name(sc->dev), + device_get_unit(sc->dev), "addr", &sc->sw_addr) != 0) + return (ENXIO); + resource_int_value(device_get_name(sc->dev), + device_get_unit(sc->dev), "is8190", &is_8190); +#endif + if (sc->sw_addr < 0 || sc->sw_addr > 32) + return (ENXIO); + + /* Set defaults for 88E6XXX family. */ + sc->ports_mask = 0x7f; + sc->port_base = 0x10; + + /* 88E6190 with 11 ports uses a different mapping. */ + if (is_8190 != 0) { + sc->port_base = 0; + sc->ports_mask = 0x7ff; + } - switch (id & 0xfff0) { - case 0x3520: + /* + * Create temporary lock, just to satisfy assertions, + * when obtaining the switch ID. Destroy immediately afterwards. + */ + sx_init(&sc->sx, "e6000sw_tmp"); + E6000SW_LOCK(sc); + sc->swid = e6000sw_readreg(sc, REG_PORT(sc, 0), SWITCH_ID) & 0xfff0; + E6000SW_UNLOCK(sc); + sx_destroy(&sc->sx); + + sc->num_laggs = 16; + switch (sc->swid) { + case MV88E6141: + description = "Marvell 88E6141"; + sc->phy_base = 0x10; + sc->num_ports = 6; + break; + case MV88E6341: + description = "Marvell 88E6341"; + sc->phy_base = 0x10; + sc->num_ports = 6; + break; + case MV88E6352: description = "Marvell 88E6352"; + sc->num_ports = 7; break; - case 0x1720: + case MV88E6172: description = "Marvell 88E6172"; + sc->num_ports = 7; break; - case 0x1760: + case MV88E6176: description = "Marvell 88E6176"; + sc->num_ports = 7; + break; + case MV88E6190: + description = "Marvell 88E6190"; + //sc->num_laggs = 32; /* Only 16 LAGGs for now. */ + sc->num_ports = 11; break; default: - E6000SW_UNLOCK(sc); - sx_destroy(&sc->sx); - device_printf(dev, "Unrecognized device.\n"); + device_printf(dev, "Unrecognized device, id 0x%x.\n", sc->swid); return (ENXIO); } device_set_desc(dev, description); - E6000SW_UNLOCK(sc); - return (BUS_PROBE_DEFAULT); } +#ifdef FDT +static int +e6000sw_parse_child_fdt(e6000sw_softc_t *sc, phandle_t child, int *pport) +{ + char *name, *portlabel; + int speed; + phandle_t fixed_link; + uint32_t port; + + if (pport == NULL) + return (ENXIO); + + if (OF_getencprop(child, "reg", (void *)&port, sizeof(port)) < 0) + return (ENXIO); + if (port >= sc->num_ports) + return (ENXIO); + *pport = port; + + if (OF_getprop_alloc(child, "label", 1, (void **)&portlabel) > 0) { + if (strncmp(portlabel, "cpu", 3) == 0) { + if (bootverbose) + device_printf(sc->dev, "CPU port at %d\n", port); + sc->cpuports_mask |= (1 << port); + sc->fixed_mask |= (1 << port); + } + free(portlabel, M_OFWPROP); + } + + fixed_link = OF_child(child); + if (fixed_link != 0 && + OF_getprop_alloc(fixed_link, "name", 1, (void **)&name) > 0) { + if (strncmp(name, "fixed-link", 10) == 0) { + /* Assume defaults: 1g - full-duplex. */ + sc->fixed_mask |= (1 << port); + if (OF_getencprop(fixed_link, "speed", &speed, + sizeof(speed)) > 0) { + if (speed == 2500 && + (MVSWITCH(sc, MV88E6141) || + MVSWITCH(sc, MV88E6341))) { + sc->fixed25_mask |= (1 << port); + } + } + } + free(name, M_OFWPROP); + } + if (bootverbose) { + if ((sc->fixed_mask & (1 << port)) != 0) + device_printf(sc->dev, "fixed port at %d\n", port); + else + device_printf(sc->dev, "PHY at port %d\n", port); + } + + return (0); +} +#else + +static int +e6000sw_check_hint_val(device_t dev, int *val, char *fmt, ...) +{ + char *resname; + int err, len; + va_list ap; + + len = min(strlen(fmt) * 2, 128); + if (len == 0) + return (-1); + resname = malloc(len, M_E6000SW, M_WAITOK); + memset(resname, 0, len); + va_start(ap, fmt); + vsnprintf(resname, len - 1, fmt, ap); + va_end(ap); + err = resource_int_value(device_get_name(dev), device_get_unit(dev), + resname, val); + free(resname, M_E6000SW); + + return (err); +} + +static int +e6000sw_parse_hinted_port(e6000sw_softc_t *sc, int port) +{ + int err, val; + + err = e6000sw_check_hint_val(sc->dev, &val, "port%ddisabled", port); + if (err == 0 && val != 0) + return (1); + + err = e6000sw_check_hint_val(sc->dev, &val, "port%dcpu", port); + if (err == 0 && val != 0) { + sc->cpuports_mask |= (1 << port); + sc->fixed_mask |= (1 << port); + if (bootverbose) + device_printf(sc->dev, "CPU port at %d\n", port); + } + err = e6000sw_check_hint_val(sc->dev, &val, "port%dspeed", port); + if (err == 0 && val != 0) { + sc->fixed_mask |= (1 << port); + if (val == 2500) + sc->fixed25_mask |= (1 << port); + } + + if (bootverbose) { + if ((sc->fixed_mask & (1 << port)) != 0) + device_printf(sc->dev, "fixed port at %d\n", port); + else + device_printf(sc->dev, "PHY at port %d\n", port); + } + + return (0); +} +#endif + +static int +e6000sw_init_interface(e6000sw_softc_t *sc, int port) +{ + char name[IFNAMSIZ]; + + snprintf(name, IFNAMSIZ, "%sport", device_get_nameunit(sc->dev)); + + sc->ifp[port] = if_alloc(IFT_ETHER); + if (sc->ifp[port] == NULL) + return (ENOMEM); + sc->ifp[port]->if_softc = sc; + sc->ifp[port]->if_flags |= IFF_UP | IFF_BROADCAST | + IFF_DRV_RUNNING | IFF_SIMPLEX; + sc->ifname[port] = malloc(strlen(name) + 1, M_E6000SW, M_NOWAIT); + if (sc->ifname[port] == NULL) { + if_free(sc->ifp[port]); + return (ENOMEM); + } + memcpy(sc->ifname[port], name, strlen(name) + 1); + if_initname(sc->ifp[port], sc->ifname[port], port); + + return (0); +} + +static int +e6000sw_attach_miibus(e6000sw_softc_t *sc, int port) +{ + int err; + + err = mii_attach(sc->dev, &sc->miibus[port], sc->ifp[port], + e6000sw_ifmedia_upd, e6000sw_ifmedia_sts, BMSR_DEFCAPMASK, + port + sc->phy_base, MII_OFFSET_ANY, 0); + if (err != 0) + return (err); + + return (0); +} + +static void +e6000sw_serdes_power(device_t dev, int port, bool sgmii) +{ + uint32_t reg; + + /* SGMII */ + reg = e6000sw_read_xmdio(dev, port, E6000SW_SERDES_DEV, + E6000SW_SERDES_SGMII_CTL); + if (sgmii) + reg &= ~E6000SW_SERDES_PDOWN; + else + reg |= E6000SW_SERDES_PDOWN; + e6000sw_write_xmdio(dev, port, E6000SW_SERDES_DEV, + E6000SW_SERDES_SGMII_CTL, reg); + + /* 10GBASE-R/10GBASE-X4/X2 */ + reg = e6000sw_read_xmdio(dev, port, E6000SW_SERDES_DEV, + E6000SW_SERDES_PCS_CTL1); + if (sgmii) + reg |= E6000SW_SERDES_PDOWN; + else + reg &= ~E6000SW_SERDES_PDOWN; + e6000sw_write_xmdio(dev, port, E6000SW_SERDES_DEV, + E6000SW_SERDES_PCS_CTL1, reg); +} + static int e6000sw_attach(device_t dev) { + bool sgmii; e6000sw_softc_t *sc; - int phy, err, port; - char name[IFNAMSIZ]; +#ifdef FDT + phandle_t child; +#endif + int err, port; + uint32_t reg; err = 0; sc = device_get_softc(dev); + + if (E6000SW_MULTICHIP(sc)) + device_printf(dev, "multi-chip addressing mode (%#x)\n", + sc->sw_addr); + else + device_printf(dev, "single-chip addressing mode\n"); + + sx_init(&sc->sx, "e6000sw"); + sc->iobuf = malloc(E6000SW_IOBUF_BLKSIZE, M_E6000SW, M_WAITOK); + sc->iosize = E6000SW_IOBUF_SIZE; + E6000SW_LOCK(sc); - sc->cpuports_mask = E6000SW_CPUPORTS_MASK; - for (port = 0; port < E6000SW_NUM_PORTS; port++) - sc->vgroup[port] = E6000SW_PORT_NO_VGROUP; e6000sw_setup(dev, sc); - snprintf(name, IFNAMSIZ, "%sport", device_get_nameunit(sc->dev)); - for (phy = 0; phy < E6000SW_NUM_PHYS; phy++) { - sc->ifp[phy] = if_alloc(IFT_ETHER); - if (sc->ifp[phy] == NULL) +#ifdef FDT + for (child = OF_child(sc->node); child != 0; child = OF_peer(child)) { + err = e6000sw_parse_child_fdt(sc, child, &port); + if (err != 0) { + device_printf(sc->dev, "failed to parse DTS\n"); goto out_fail; - sc->ifp[phy]->if_softc = sc; - sc->ifp[phy]->if_flags |= IFF_UP | IFF_BROADCAST | - IFF_DRV_RUNNING | IFF_SIMPLEX; - sc->ifname[phy] = malloc(strlen(name) + 1, M_E6000SW, M_WAITOK); - if (sc->ifname[phy] == NULL) + } +#else + for (port = 0; port < sc->num_ports; port++) { + err = e6000sw_parse_hinted_port(sc, port); + if (err != 0) + continue; +#endif + + /* Port is in use. */ + sc->used_mask |= (1 << port); + + err = e6000sw_init_interface(sc, port); + if (err != 0) { + device_printf(sc->dev, "failed to init interface\n"); goto out_fail; - bcopy(name, sc->ifname[phy], strlen(name) + 1); - if_initname(sc->ifp[phy], sc->ifname[phy], phy); - err = mii_attach(sc->dev, &sc->miibus[phy], sc->ifp[phy], - e6000sw_ifmedia_upd, e6000sw_ifmedia_sts, BMSR_DEFCAPMASK, - phy, MII_OFFSET_ANY, 0); + } + + if (e6000sw_is_fixedport(sc, port)) { + /* Link must be down to change speed force value. */ + reg = e6000sw_readreg(sc, REG_PORT(sc, port), + PSC_CONTROL); + reg &= ~PSC_CONTROL_LINK_UP; + reg |= PSC_CONTROL_FORCED_LINK; + e6000sw_writereg(sc, REG_PORT(sc, port), PSC_CONTROL, + reg); + + /* + * Force speed, full-duplex, EEE off and flow-control + * on. + */ + reg &= ~(PSC_CONTROL_SPD2500 | PSC_CONTROL_ALT_SPD | + PSC_CONTROL_FORCED_FC | PSC_CONTROL_FC_ON | + PSC_CONTROL_FORCED_EEE); + if (e6000sw_is_fixed25port(sc, port)) + reg |= PSC_CONTROL_SPD2500; + else + reg |= PSC_CONTROL_SPD1000; + if (MVSWITCH(sc, MV88E6190) && + e6000sw_is_fixed25port(sc, port)) + reg |= PSC_CONTROL_ALT_SPD; + reg |= PSC_CONTROL_FORCED_DPX | PSC_CONTROL_FULLDPX | + PSC_CONTROL_FORCED_LINK | PSC_CONTROL_LINK_UP | + PSC_CONTROL_FORCED_SPD; + if (!MVSWITCH(sc, MV88E6190)) + reg |= PSC_CONTROL_FORCED_FC | PSC_CONTROL_FC_ON; + if (MVSWITCH(sc, MV88E6141) || + MVSWITCH(sc, MV88E6341) || + MVSWITCH(sc, MV88E6190)) + reg |= PSC_CONTROL_FORCED_EEE; + e6000sw_writereg(sc, REG_PORT(sc, port), PSC_CONTROL, + reg); + /* Power on the SERDES interfaces. */ + if (MVSWITCH(sc, MV88E6190) && + (port == 9 || port == 10)) { + if (e6000sw_is_fixed25port(sc, port)) + sgmii = false; + else + sgmii = true; + e6000sw_serdes_power(sc->dev, port, sgmii); + } + } + + /* Don't attach miibus at CPU/fixed ports */ + if (!e6000sw_is_phyport(sc, port)) + continue; + + err = e6000sw_attach_miibus(sc, port); if (err != 0) { - device_printf(sc->dev, - "attaching PHY %d failed\n", - phy); + device_printf(sc->dev, "failed to attach miibus\n"); goto out_fail; } - sc->mii[phy] = device_get_softc(sc->miibus[phy]); } + + etherswitch_info.es_nports = sc->num_ports; + etherswitch_info.es_nlaggroups = sc->num_laggs; + etherswitch_info.es_ports_mask[0] = sc->used_mask; + + /* Reset LAGG settings. */ + e6000sw_resetlagg(sc); + + /* Default to port vlan. */ + e6000sw_set_vlan_mode(sc, ETHERSWITCH_VLAN_PORT); + + reg = e6000sw_readreg(sc, REG_GLOBAL, SWITCH_GLOBAL_STATUS); + if (reg & SWITCH_GLOBAL_STATUS_IR) + device_printf(dev, "switch is ready.\n"); E6000SW_UNLOCK(sc); bus_generic_probe(dev); - bus_enumerate_hinted_children(dev); bus_generic_attach(dev); - kproc_create(e6000sw_tick, sc, &e6000sw_kproc, 0, 0, - "e6000sw tick kproc"); + kproc_create(e6000sw_tick, sc, &sc->kproc, 0, 0, "e6000sw tick kproc"); return (0); out_fail: + E6000SW_UNLOCK(sc); e6000sw_detach(dev); - return (ENXIO); + return (err); } -static __inline void -e6000sw_poll_done(e6000sw_softc_t *sc) +/* XMDIO/Clause 45 access. */ +static int +e6000sw_read_xmdio(device_t dev, int phy, int devaddr, int devreg) { + e6000sw_softc_t *sc; + uint32_t reg; + + sc = device_get_softc(dev); + E6000SW_LOCK_ASSERT(sc, SA_XLOCKED); + if (E6000SW_WAITREADY2(sc, SMI_PHY_CMD_REG, SMI_CMD_BUSY)) { + device_printf(dev, "Timeout while waiting for switch\n"); + return (ETIMEDOUT); + } - while (e6000sw_readreg(sc, REG_GLOBAL2, PHY_CMD) & - (1 << PHY_CMD_SMI_BUSY)) - continue; + reg = devaddr & SMI_CMD_REG_ADDR_MASK; + reg |= (phy << SMI_CMD_DEV_ADDR) & SMI_CMD_DEV_ADDR_MASK; + + /* Load C45 register address. */ + e6000sw_writereg(sc, REG_GLOBAL2, SMI_PHY_DATA_REG, devreg); + e6000sw_writereg(sc, REG_GLOBAL2, SMI_PHY_CMD_REG, + reg | SMI_CMD_OP_C45_ADDR); + if (E6000SW_WAITREADY2(sc, SMI_PHY_CMD_REG, SMI_CMD_BUSY)) { + device_printf(dev, "Timeout while waiting for switch\n"); + return (ETIMEDOUT); + } + + /* Start C45 read operation. */ + e6000sw_writereg(sc, REG_GLOBAL2, SMI_PHY_CMD_REG, + reg | SMI_CMD_OP_C45_READ); + if (E6000SW_WAITREADY2(sc, SMI_PHY_CMD_REG, SMI_CMD_BUSY)) { + device_printf(dev, "Timeout while waiting for switch\n"); + return (ETIMEDOUT); + } + + /* Read C45 data. */ + reg = e6000sw_readreg(sc, REG_GLOBAL2, SMI_PHY_DATA_REG); + + return (reg & PHY_DATA_MASK); } +static int +e6000sw_write_xmdio(device_t dev, int phy, int devaddr, int devreg, int val) +{ + e6000sw_softc_t *sc; + uint32_t reg; + + sc = device_get_softc(dev); + E6000SW_LOCK_ASSERT(sc, SA_XLOCKED); + if (E6000SW_WAITREADY2(sc, SMI_PHY_CMD_REG, SMI_CMD_BUSY)) { + device_printf(dev, "Timeout while waiting for switch\n"); + return (ETIMEDOUT); + } + + reg = devaddr & SMI_CMD_REG_ADDR_MASK; + reg |= (phy << SMI_CMD_DEV_ADDR) & SMI_CMD_DEV_ADDR_MASK; + + /* Load C45 register address. */ + e6000sw_writereg(sc, REG_GLOBAL2, SMI_PHY_DATA_REG, devreg); + e6000sw_writereg(sc, REG_GLOBAL2, SMI_PHY_CMD_REG, + reg | SMI_CMD_OP_C45_ADDR); + if (E6000SW_WAITREADY2(sc, SMI_PHY_CMD_REG, SMI_CMD_BUSY)) { + device_printf(dev, "Timeout while waiting for switch\n"); + return (ETIMEDOUT); + } + + /* Load data and start the C45 write operation. */ + e6000sw_writereg(sc, REG_GLOBAL2, SMI_PHY_DATA_REG, devreg); + e6000sw_writereg(sc, REG_GLOBAL2, SMI_PHY_CMD_REG, + reg | SMI_CMD_OP_C45_WRITE); + + return (0); +} /* * PHY registers are paged. Put page index in reg 22 (accessible from every @@ -306,55 +864,52 @@ e6000sw_readphy(device_t dev, int phy, int reg) uint32_t val; sc = device_get_softc(dev); - val = 0; - - if (phy >= E6000SW_NUM_PHYS || reg >= E6000SW_NUM_PHY_REGS) { + if (!e6000sw_is_phyport(sc, phy) || reg >= E6000SW_NUM_PHY_REGS) { device_printf(dev, "Wrong register address.\n"); return (EINVAL); } E6000SW_LOCK_ASSERT(sc, SA_XLOCKED); + if (E6000SW_WAITREADY2(sc, SMI_PHY_CMD_REG, SMI_CMD_BUSY)) { + device_printf(dev, "Timeout while waiting for switch\n"); + return (ETIMEDOUT); + } - e6000sw_poll_done(sc); - val |= 1 << PHY_CMD_SMI_BUSY; - val |= PHY_CMD_MODE_MDIO << PHY_CMD_MODE; - val |= PHY_CMD_OPCODE_READ << PHY_CMD_OPCODE; - val |= (reg << PHY_CMD_REG_ADDR) & PHY_CMD_REG_ADDR_MASK; - val |= (phy << PHY_CMD_DEV_ADDR) & PHY_CMD_DEV_ADDR_MASK; - e6000sw_writereg(sc, REG_GLOBAL2, SMI_PHY_CMD_REG, val); - e6000sw_poll_done(sc); - val = e6000sw_readreg(sc, REG_GLOBAL2, SMI_PHY_DATA_REG) - & PHY_DATA_MASK; + e6000sw_writereg(sc, REG_GLOBAL2, SMI_PHY_CMD_REG, + SMI_CMD_OP_C22_READ | (reg & SMI_CMD_REG_ADDR_MASK) | + ((phy << SMI_CMD_DEV_ADDR) & SMI_CMD_DEV_ADDR_MASK)); + if (E6000SW_WAITREADY2(sc, SMI_PHY_CMD_REG, SMI_CMD_BUSY)) { + device_printf(dev, "Timeout while waiting for switch\n"); + return (ETIMEDOUT); + } - return (val); + val = e6000sw_readreg(sc, REG_GLOBAL2, SMI_PHY_DATA_REG); + + return (val & PHY_DATA_MASK); } static int e6000sw_writephy(device_t dev, int phy, int reg, int data) { e6000sw_softc_t *sc; - uint32_t val; sc = device_get_softc(dev); - val = 0; - - if (phy >= E6000SW_NUM_PHYS || reg >= E6000SW_NUM_PHY_REGS) { + if (!e6000sw_is_phyport(sc, phy) || reg >= E6000SW_NUM_PHY_REGS) { device_printf(dev, "Wrong register address.\n"); return (EINVAL); } E6000SW_LOCK_ASSERT(sc, SA_XLOCKED); + if (E6000SW_WAITREADY2(sc, SMI_PHY_CMD_REG, SMI_CMD_BUSY)) { + device_printf(dev, "Timeout while waiting for switch\n"); + return (ETIMEDOUT); + } - e6000sw_poll_done(sc); - val |= PHY_CMD_MODE_MDIO << PHY_CMD_MODE; - val |= 1 << PHY_CMD_SMI_BUSY; - val |= PHY_CMD_OPCODE_WRITE << PHY_CMD_OPCODE; - val |= (reg << PHY_CMD_REG_ADDR) & PHY_CMD_REG_ADDR_MASK; - val |= (phy << PHY_CMD_DEV_ADDR) & PHY_CMD_DEV_ADDR_MASK; e6000sw_writereg(sc, REG_GLOBAL2, SMI_PHY_DATA_REG, - data & PHY_DATA_MASK); - e6000sw_writereg(sc, REG_GLOBAL2, SMI_PHY_CMD_REG, val); - e6000sw_poll_done(sc); + data & PHY_DATA_MASK); + e6000sw_writereg(sc, REG_GLOBAL2, SMI_PHY_CMD_REG, + SMI_CMD_OP_C22_WRITE | (reg & SMI_CMD_REG_ADDR_MASK) | + ((phy << SMI_CMD_DEV_ADDR) & SMI_CMD_DEV_ADDR_MASK)); return (0); } @@ -367,8 +922,10 @@ e6000sw_detach(device_t dev) sc = device_get_softc(dev); bus_generic_detach(dev); + if (sc->iobuf != NULL) + free(sc->iobuf, M_E6000SW); sx_destroy(&sc->sx); - for (phy = 0; phy < E6000SW_NUM_PHYS; phy++) { + for (phy = 0; phy < sc->num_ports; phy++) { if (sc->miibus[phy] != NULL) device_delete_child(dev, sc->miibus[phy]); if (sc->ifp[phy] != NULL) @@ -383,10 +940,54 @@ e6000sw_detach(device_t dev) static etherswitch_info_t* e6000sw_getinfo(device_t dev) { +#if defined(E6000SW_DEBUG) + int i; + struct e6000sw_softc *sc; + + sc = device_get_softc(dev); + E6000SW_LOCK(sc); + if (sc->vlan_mode == ETHERSWITCH_VLAN_DOT1Q) { + e6000sw_vtu_dump(sc); + for (i = 0; i < etherswitch_info.es_nvlangroups; i++) + if (sc->vlans[i] != 0) + e6000sw_atu_dump(sc, i + 1); + } else + e6000sw_atu_dump(sc, 0); + E6000SW_UNLOCK(sc); +#endif return (ðerswitch_info); } +static int +e6000sw_getconf(device_t dev, etherswitch_conf_t *conf) +{ + struct e6000sw_softc *sc; + + /* Return the VLAN mode. */ + sc = device_get_softc(dev); + conf->cmd = ETHERSWITCH_CONF_VLAN_MODE; + conf->vlan_mode = sc->vlan_mode; + + return (0); +} + +static int +e6000sw_setconf(device_t dev, etherswitch_conf_t *conf) +{ + struct e6000sw_softc *sc; + + /* Set the VLAN mode. */ + sc = device_get_softc(dev); + if (conf->cmd & ETHERSWITCH_CONF_VLAN_MODE) { + E6000SW_LOCK(sc); + e6000sw_set_vlan_mode(sc, conf->vlan_mode); + E6000SW_UNLOCK(sc); + } + + return (0); +} + static void e6000sw_lock(device_t dev) { @@ -412,40 +1013,66 @@ e6000sw_unlock(device_t dev) static int e6000sw_getport(device_t dev, etherswitch_port_t *p) { - struct mii_data *mii; int err; struct ifmediareq *ifmr; + struct mii_data *mii; + uint16_t reg; - err = 0; e6000sw_softc_t *sc = device_get_softc(dev); E6000SW_LOCK_ASSERT(sc, SA_UNLOCKED); + if (p->es_port >= sc->num_ports || p->es_port < 0) + return (EINVAL); + if (!e6000sw_is_portenabled(sc, p->es_port)) + return (0); + E6000SW_LOCK(sc); + e6000sw_get_pvid(sc, p->es_port, &p->es_pvid); - if (p->es_port >= E6000SW_NUM_PORTS || - p->es_port < 0) { - err = EINVAL; - goto out; + /* Port state. */ + reg = e6000sw_readreg(sc, REG_PORT(sc, p->es_port), PORT_CONTROL); + switch (reg & PORT_CONTROL_ENABLE) { + case PORT_CONTROL_BLOCKING: + p->es_state = ETHERSWITCH_PSTATE_BLOCKING; + break; + case PORT_CONTROL_LEARNING: + p->es_state = ETHERSWITCH_PSTATE_LEARNING; + break; + case PORT_CONTROL_FORWARDING: + p->es_state = ETHERSWITCH_PSTATE_FORWARDING; + break; + default: + p->es_state = ETHERSWITCH_PSTATE_DISABLED; } - e6000sw_get_pvid(sc, p->es_port, &p->es_pvid); + /* Port flags. */ + reg = e6000sw_readreg(sc, REG_PORT(sc, p->es_port), PORT_CONTROL2); + if (reg & PORT_CONTROL2_DISC_TAGGED) + p->es_flags |= ETHERSWITCH_PORT_DROPTAGGED; + if (reg & PORT_CONTROL2_DISC_UNTAGGED) + p->es_flags |= ETHERSWITCH_PORT_DROPUNTAGGED; - if (e6000sw_cpuport(sc, p->es_port)) { - p->es_flags |= ETHERSWITCH_PORT_CPU; + err = 0; + if (e6000sw_is_fixedport(sc, p->es_port)) { + if (e6000sw_is_cpuport(sc, p->es_port)) + p->es_flags |= ETHERSWITCH_PORT_CPU; ifmr = &p->es_ifmr; ifmr->ifm_status = IFM_ACTIVE | IFM_AVALID; ifmr->ifm_count = 0; - ifmr->ifm_current = ifmr->ifm_active = - IFM_ETHER | IFM_1000_T | IFM_FDX; + if (e6000sw_is_fixed25port(sc, p->es_port)) + ifmr->ifm_active = IFM_2500_KX; /* IFM_2500_T */ + else + ifmr->ifm_active = IFM_1000_T; + ifmr->ifm_active |= IFM_ETHER | IFM_FDX; + ifmr->ifm_current = ifmr->ifm_active; ifmr->ifm_mask = 0; } else { mii = e6000sw_miiforphy(sc, p->es_port); err = ifmedia_ioctl(mii->mii_ifp, &p->es_ifr, &mii->mii_media, SIOCGIFMEDIA); } - -out: E6000SW_UNLOCK(sc); + return (err); } @@ -455,32 +1082,194 @@ e6000sw_setport(device_t dev, etherswitch_port_t *p) e6000sw_softc_t *sc; int err; struct mii_data *mii; + uint16_t reg; - err = 0; sc = device_get_softc(dev); E6000SW_LOCK_ASSERT(sc, SA_UNLOCKED); + if (p->es_port >= sc->num_ports || p->es_port < 0) + return (EINVAL); + if (!e6000sw_is_portenabled(sc, p->es_port)) + return (0); + + err = 0; E6000SW_LOCK(sc); - if (p->es_port >= E6000SW_NUM_PORTS || - p->es_port < 0) { - err = EINVAL; - goto out; + /* Port state. */ + reg = e6000sw_readreg(sc, REG_PORT(sc, p->es_port), PORT_CONTROL); + reg &= ~PORT_CONTROL_ENABLE; + switch (p->es_state) { + case ETHERSWITCH_PSTATE_BLOCKING: + reg |= PORT_CONTROL_BLOCKING; + break; + case ETHERSWITCH_PSTATE_LEARNING: + reg |= PORT_CONTROL_LEARNING; + break; + case ETHERSWITCH_PSTATE_FORWARDING: + reg |= PORT_CONTROL_FORWARDING; + break; + default: + reg |= PORT_CONTROL_DISABLED; } - + e6000sw_writereg(sc, REG_PORT(sc, p->es_port), PORT_CONTROL, reg); + + /* Port flags. */ + reg = e6000sw_readreg(sc, REG_PORT(sc, p->es_port), PORT_CONTROL2); + if (p->es_flags & ETHERSWITCH_PORT_DROPTAGGED) + reg |= PORT_CONTROL2_DISC_TAGGED; + else + reg &= ~PORT_CONTROL2_DISC_TAGGED; + if (p->es_flags & ETHERSWITCH_PORT_DROPUNTAGGED) + reg |= PORT_CONTROL2_DISC_UNTAGGED; + else + reg &= ~PORT_CONTROL2_DISC_UNTAGGED; + e6000sw_writereg(sc, REG_PORT(sc, p->es_port), PORT_CONTROL2, reg); if (p->es_pvid != 0) e6000sw_set_pvid(sc, p->es_port, p->es_pvid); - if (!e6000sw_cpuport(sc, p->es_port)) { + if (e6000sw_is_phyport(sc, p->es_port)) { mii = e6000sw_miiforphy(sc, p->es_port); err = ifmedia_ioctl(mii->mii_ifp, &p->es_ifr, &mii->mii_media, SIOCSIFMEDIA); } - -out: E6000SW_UNLOCK(sc); + return (err); } +static __inline void +e6000sw_port_vlan_assign(e6000sw_softc_t *sc, int port, uint32_t fid, + uint32_t members) +{ + uint32_t reg; + + reg = e6000sw_readreg(sc, REG_PORT(sc, port), PORT_VLAN_MAP); + reg &= ~sc->ports_mask; + reg &= ~PORT_VLAN_MAP_FID_MASK; + reg |= members & sc->ports_mask & ~(1 << port); + reg |= (fid << PORT_VLAN_MAP_FID) & PORT_VLAN_MAP_FID_MASK; + e6000sw_writereg(sc, REG_PORT(sc, port), PORT_VLAN_MAP, reg); + reg = e6000sw_readreg(sc, REG_PORT(sc, port), PORT_CONTROL1); + reg &= ~PORT_CONTROL1_FID_MASK; + reg |= (fid >> 4) & PORT_CONTROL1_FID_MASK; + e6000sw_writereg(sc, REG_PORT(sc, port), PORT_CONTROL1, reg); +} + +static int +e6000sw_init_vlan(struct e6000sw_softc *sc) +{ + int i, port, ret; + uint32_t members; + +#if defined(E6000SW_DEBUG) + e6000sw_vtu_dump(sc); +#endif + + /* Disable all ports */ + for (port = 0; port < sc->num_ports; port++) { + ret = e6000sw_readreg(sc, REG_PORT(sc, port), PORT_CONTROL); + e6000sw_writereg(sc, REG_PORT(sc, port), PORT_CONTROL, + (ret & ~PORT_CONTROL_ENABLE)); + } + + /* Flush VTU. */ + e6000sw_vtu_flush(sc); + + for (port = 0; port < sc->num_ports; port++) { + /* Reset the egress and frame mode. */ + ret = e6000sw_readreg(sc, REG_PORT(sc, port), PORT_CONTROL); + ret &= ~(PORT_CONTROL_EGRESS | PORT_CONTROL_FRAME); + e6000sw_writereg(sc, REG_PORT(sc, port), PORT_CONTROL, ret); + + /* Set the the 802.1q mode. */ + ret = e6000sw_readreg(sc, REG_PORT(sc, port), PORT_CONTROL2); + ret &= ~PORT_CONTROL2_DOT1Q; + if (sc->vlan_mode == ETHERSWITCH_VLAN_DOT1Q) + ret |= PORT_CONTROL2_DOT1Q; + e6000sw_writereg(sc, REG_PORT(sc, port), PORT_CONTROL2, ret); + } + + for (port = 0; port < sc->num_ports; port++) { + if (!e6000sw_is_portenabled(sc, port)) + continue; + + ret = e6000sw_readreg(sc, REG_PORT(sc, port), PORT_VID); + + /* Set port priority */ + ret &= ~PORT_VID_PRIORITY_MASK; + + /* Set VID map */ + ret &= ~PORT_VID_DEF_VID_MASK; + if (sc->vlan_mode == ETHERSWITCH_VLAN_DOT1Q) + ret |= 1; + else + ret |= (port + 1); + e6000sw_writereg(sc, REG_PORT(sc, port), PORT_VID, ret); + } + + /* Assign the member ports to each origin port. */ + for (port = 0; port < sc->num_ports; port++) { + members = 0; + if (e6000sw_is_portenabled(sc, port)) { + for (i = 0; i < sc->num_ports; i++) { + if (i == port || !e6000sw_is_portenabled(sc, i)) + continue; + members |= (1 << i); + } + } + /* Default to FID 0. */ + e6000sw_port_vlan_assign(sc, port, 0, members); + } + + /* Reset internal VLAN table. */ + for (i = 0; i < nitems(sc->vlans); i++) + sc->vlans[i] = 0; + + /* Create default VLAN (1). */ + if (sc->vlan_mode == ETHERSWITCH_VLAN_DOT1Q) { + sc->vlans[0] = 1; + e6000sw_vtu_update(sc, 0, sc->vlans[0], 1, 0, sc->used_mask); + } + + if (e6000sw_default_disabled == false) { + /* Enable all ports */ + for (port = 0; port < sc->num_ports; port++) { + if (!e6000sw_is_portenabled(sc, port)) + continue; + ret = e6000sw_readreg(sc, REG_PORT(sc, port), PORT_CONTROL); + e6000sw_writereg(sc, REG_PORT(sc, port), PORT_CONTROL, + (ret | PORT_CONTROL_ENABLE)); + } + } + +#if defined(E6000SW_DEBUG) + if (sc->vlan_mode == ETHERSWITCH_VLAN_DOT1Q) + e6000sw_vtu_dump(sc); +#endif + + return (0); +} + +static int +e6000sw_set_vlan_mode(struct e6000sw_softc *sc, uint32_t mode) +{ + + E6000SW_LOCK_ASSERT(sc, SA_XLOCKED); + switch (mode) { + case ETHERSWITCH_VLAN_PORT: + sc->vlan_mode = ETHERSWITCH_VLAN_PORT; + etherswitch_info.es_nvlangroups = sc->num_ports; + return (e6000sw_init_vlan(sc)); + break; + case ETHERSWITCH_VLAN_DOT1Q: + sc->vlan_mode = ETHERSWITCH_VLAN_DOT1Q; + etherswitch_info.es_nvlangroups = E6000SW_NUM_VLANS; + return (e6000sw_init_vlan(sc)); + break; + default: + return (EINVAL); + } +} + /* * Registers in this switch are divided into sections, specified in * documentation. So as to access any of them, section index and reg index @@ -490,9 +1279,11 @@ out: static int e6000sw_readreg_wrapper(device_t dev, int addr_reg) { - + e6000sw_softc_t *sc; + + sc = device_get_softc(dev); if ((addr_reg > (REG_GLOBAL2 * 32 + REG_NUM_MAX)) || - (addr_reg < (REG_PORT(0) * 32))) { + (addr_reg < (REG_PORT(sc, 0) * 32))) { device_printf(dev, "Wrong register address.\n"); return (EINVAL); } @@ -504,9 +1295,11 @@ e6000sw_readreg_wrapper(device_t dev, int addr_reg) static int e6000sw_writereg_wrapper(device_t dev, int addr_reg, int val) { - + e6000sw_softc_t *sc; + + sc = device_get_softc(dev); if ((addr_reg > (REG_GLOBAL2 * 32 + REG_NUM_MAX)) || - (addr_reg < (REG_PORT(0) * 32))) { + (addr_reg < (REG_PORT(sc, 0) * 32))) { device_printf(dev, "Wrong register address.\n"); return (EINVAL); } @@ -588,98 +1381,365 @@ e6000sw_getvgroup_wrapper(device_t dev, etherswitch_vlangroup_t *vg) return (ret); } -static __inline void -e6000sw_flush_port(e6000sw_softc_t *sc, int port) +static int +e6000sw_setlaggroup_wrapper(device_t dev, etherswitch_laggroup_t *lag) { + e6000sw_softc_t *sc; + int ret; + + sc = device_get_softc(dev); + E6000SW_LOCK_ASSERT(sc, SA_UNLOCKED); + + E6000SW_LOCK(sc); + ret = e6000sw_setlaggroup(dev, lag); + E6000SW_UNLOCK(sc); + + return (ret); +} + +static int +e6000sw_getlaggroup_wrapper(device_t dev, etherswitch_laggroup_t *lag) +{ + e6000sw_softc_t *sc; + int ret; + + sc = device_get_softc(dev); + E6000SW_LOCK_ASSERT(sc, SA_UNLOCKED); + + E6000SW_LOCK(sc); + ret = e6000sw_getlaggroup(dev, lag); + E6000SW_UNLOCK(sc); + + return (ret); +} + +static int +e6000sw_resetlagg(e6000sw_softc_t *sc) +{ + int i; uint32_t reg; - reg = e6000sw_readreg(sc, REG_PORT(port), - PORT_VLAN_MAP); - reg &= ~PORT_VLAN_MAP_TABLE_MASK; - reg &= ~PORT_VLAN_MAP_FID_MASK; - e6000sw_writereg(sc, REG_PORT(port), - PORT_VLAN_MAP, reg); - if (sc->vgroup[port] != E6000SW_PORT_NO_VGROUP) { - /* - * If port belonged somewhere, owner-group - * should have its entry removed. - */ - sc->members[sc->vgroup[port]] &= ~(1 << port); - sc->vgroup[port] = E6000SW_PORT_NO_VGROUP; + for (i = 0; i < sc->num_ports; i++) { + if (!e6000sw_is_portenabled(sc, i)) + continue; + reg = e6000sw_readreg(sc, REG_PORT(sc, i), PORT_CONTROL1); + if (reg & PORT_CONTROL1_LAG_PORT) { + /* Disable LAG on port. */ + reg &= ~PORT_CONTROL1_LAG_PORT; + reg &= ~(PORT_CONTROL1_LAG_ID_MASK << + PORT_CONTROL1_LAG_ID_SHIFT); + e6000sw_writereg(sc, REG_PORT(sc, i), + PORT_CONTROL1, reg); + } } + for (i = 0; i < sc->num_laggs; i++) + e6000sw_writereg(sc, REG_GLOBAL2, LAG_MAPPING, + i << LAGID_SHIFT | LAG_UPDATE); + for (i = 0; i < E6000SW_NUM_LAGMASK; i++) + e6000sw_writereg(sc, REG_GLOBAL2, LAG_MASK, + i << LAG_MASKNUM_SHIFT | LAG_UPDATE | sc->ports_mask); + + return (0); } -static __inline void -e6000sw_port_assign_vgroup(e6000sw_softc_t *sc, int port, int fid, int vgroup, - int members) +static int +e6000sw_setlaggmask(e6000sw_softc_t *sc) { + int count, cycle, i, m, mask, port; + struct lagg_map { + int cycle; + int lag; + int pcount; + uint32_t ports; + } *map; uint32_t reg; - reg = e6000sw_readreg(sc, REG_PORT(port), - PORT_VLAN_MAP); - reg &= ~PORT_VLAN_MAP_TABLE_MASK; - reg &= ~PORT_VLAN_MAP_FID_MASK; - reg |= members & ~(1 << port); - reg |= (fid << PORT_VLAN_MAP_FID) & PORT_VLAN_MAP_FID_MASK; - e6000sw_writereg(sc, REG_PORT(port), PORT_VLAN_MAP, - reg); - sc->vgroup[port] = vgroup; + map = malloc(sizeof(*map) * sc->num_laggs, M_E6000SW, M_WAITOK); + for (i = 0; i < sc->num_laggs; i++) { + map[i].lag = 0; + map[i].cycle = 0; + map[i].ports = 0; + map[i].pcount = 0; + } + count = 0; + for (i = 0; i < sc->num_laggs; i++) { + + /* Read the LAGG ports. */ + e6000sw_writereg(sc, REG_GLOBAL2, LAG_MAPPING, + i << LAGID_SHIFT); + reg = e6000sw_readreg(sc, REG_GLOBAL2, LAG_MAPPING); + if ((reg & sc->ports_mask) == 0) + continue; + map[count].lag = i; + map[count].ports = reg & sc->ports_mask; + for (port = 0; port < sc->num_ports; port++) { + if ((map[count].ports & (1 << port)) == 0) + continue; + map[count].pcount++; + } + ++count; + } + + for (mask = 0; mask < E6000SW_NUM_LAGMASK; mask++) { + e6000sw_writereg(sc, REG_GLOBAL2, LAG_MASK, + mask << LAG_MASKNUM_SHIFT); + reg = e6000sw_readreg(sc, REG_GLOBAL2, LAG_MASK); + reg |= sc->ports_mask; + for (port = 0; port < sc->num_ports; port++) { + + for (m = 0; m < count; m++) { + cycle = mask % map[m].pcount; + if ((map[m].ports & (1 << port)) == 0) + continue; + if (map[m].cycle != cycle) + reg &= ~(1 << port); + map[m].cycle = ++map[m].cycle % map[m].pcount; + } + } + e6000sw_writereg(sc, REG_GLOBAL2, LAG_MASK, reg | LAG_UPDATE); + } + + free(map, M_E6000SW); + + return (0); } static int -e6000sw_setvgroup(device_t dev, etherswitch_vlangroup_t *vg) +e6000sw_setlaggroup(device_t dev, etherswitch_laggroup_t *lag) { e6000sw_softc_t *sc; - int port, fid; + int i, laggid; + uint32_t laggports, reg; sc = device_get_softc(dev); + E6000SW_LOCK_ASSERT(sc, SA_XLOCKED); - if (vg->es_vlangroup >= E6000SW_NUM_VGROUPS) + laggports = 0; + for (i = 0; i < sc->num_ports; i++) { + if (!e6000sw_is_portenabled(sc, i)) + continue; + reg = e6000sw_readreg(sc, REG_PORT(sc, i), PORT_CONTROL1); + laggid = reg >> PORT_CONTROL1_LAG_ID_SHIFT; + laggid &= PORT_CONTROL1_LAG_ID_MASK; + if ((lag->es_untagged_ports & (1 << i)) == 0) { + if ((reg & PORT_CONTROL1_LAG_PORT) != 0 && + laggid == lag->es_laggroup) { + /* Disable LAG on port. */ + reg &= ~PORT_CONTROL1_LAG_PORT; + reg &= ~(PORT_CONTROL1_LAG_ID_MASK << + PORT_CONTROL1_LAG_ID_SHIFT); + e6000sw_writereg(sc, REG_PORT(sc, i), + PORT_CONTROL1, reg); + } + continue; + } + reg |= PORT_CONTROL1_LAG_PORT; + laggid = lag->es_laggroup & PORT_CONTROL1_LAG_ID_MASK; + reg |= laggid << PORT_CONTROL1_LAG_ID_SHIFT; + e6000sw_writereg(sc, REG_PORT(sc, i), PORT_CONTROL1, reg); + + laggports |= (1 << i); + } + + /* Update LAG mapping. */ + reg = (lag->es_laggroup & PORT_CONTROL1_LAG_ID_MASK) << LAGID_SHIFT; + e6000sw_writereg(sc, REG_GLOBAL2, LAG_MAPPING, reg); + reg = e6000sw_readreg(sc, REG_GLOBAL2, LAG_MAPPING); + reg &= ~sc->ports_mask; + reg |= laggports | LAG_UPDATE; + e6000sw_writereg(sc, REG_GLOBAL2, LAG_MAPPING, reg); + + lag->es_lagg_valid = 1; + + return (e6000sw_setlaggmask(sc)); +} + +static int +e6000sw_set_port_vlan(e6000sw_softc_t *sc, etherswitch_vlangroup_t *vg) +{ + uint32_t port; + + port = vg->es_vlangroup; + if (port > sc->num_ports) return (EINVAL); + if (vg->es_member_ports != vg->es_untagged_ports) { - device_printf(dev, "Tagged ports not supported.\n"); + device_printf(sc->dev, "Tagged ports not supported.\n"); return (EINVAL); } - vg->es_untagged_ports &= PORT_VLAN_MAP_TABLE_MASK; - fid = vg->es_vlangroup + 1; - for (port = 0; port < E6000SW_NUM_PORTS; port++) { - if ((sc->members[vg->es_vlangroup] & (1 << port)) || - (vg->es_untagged_ports & (1 << port))) - e6000sw_flush_port(sc, port); - if (vg->es_untagged_ports & (1 << port)) - e6000sw_port_assign_vgroup(sc, port, fid, - vg->es_vlangroup, vg->es_untagged_ports); + e6000sw_port_vlan_assign(sc, port, 0, vg->es_untagged_ports); + vg->es_vid = port | ETHERSWITCH_VID_VALID; + + return (0); +} + +static int +e6000sw_set_dot1q_vlan(e6000sw_softc_t *sc, etherswitch_vlangroup_t *vg) +{ + int i, vlan; + + vlan = vg->es_vid & ETHERSWITCH_VID_MASK; + + /* Set VLAN to '0' removes it from table. */ + if (vlan == 0) { + e6000sw_vtu_update(sc, VTU_PURGE, + sc->vlans[vg->es_vlangroup], 0, 0, 0); + sc->vlans[vg->es_vlangroup] = 0; + return (0); } - sc->vid[vg->es_vlangroup] = vg->es_vid; - sc->members[vg->es_vlangroup] = vg->es_untagged_ports; + + /* Is this VLAN already in table ? */ + for (i = 0; i < etherswitch_info.es_nvlangroups; i++) + if (i != vg->es_vlangroup && vlan == sc->vlans[i]) + return (EINVAL); + + sc->vlans[vg->es_vlangroup] = vlan; + e6000sw_vtu_update(sc, 0, vlan, vg->es_vlangroup + 1, + vg->es_member_ports & sc->used_mask, + vg->es_untagged_ports & sc->used_mask); return (0); } static int -e6000sw_getvgroup(device_t dev, etherswitch_vlangroup_t *vg) +e6000sw_setvgroup(device_t dev, etherswitch_vlangroup_t *vg) { e6000sw_softc_t *sc; sc = device_get_softc(dev); E6000SW_LOCK_ASSERT(sc, SA_XLOCKED); - if (vg->es_vlangroup >= E6000SW_NUM_VGROUPS) + if (sc->vlan_mode == ETHERSWITCH_VLAN_PORT) + return (e6000sw_set_port_vlan(sc, vg)); + else if (sc->vlan_mode == ETHERSWITCH_VLAN_DOT1Q) + return (e6000sw_set_dot1q_vlan(sc, vg)); + + return (EINVAL); +} + +static int +e6000sw_getlaggroup(device_t dev, etherswitch_laggroup_t *lag) +{ + e6000sw_softc_t *sc; + int laggid; + uint32_t reg; + + sc = device_get_softc(dev); + E6000SW_LOCK_ASSERT(sc, SA_XLOCKED); + + lag->es_lagg_valid = 0; + lag->es_member_ports = lag->es_untagged_ports = 0; + /* Read the LAGG ports. */ + laggid = lag->es_laggroup & PORT_CONTROL1_LAG_ID_MASK; + e6000sw_writereg(sc, REG_GLOBAL2, LAG_MAPPING, laggid << LAGID_SHIFT); + reg = e6000sw_readreg(sc, REG_GLOBAL2, LAG_MAPPING); + lag->es_member_ports = reg & sc->ports_mask; + lag->es_untagged_ports = reg & sc->ports_mask; + + /* Is this LAG group in use ? */ + if (lag->es_untagged_ports != 0) + lag->es_lagg_valid = 1; + + return (0); +} + +static int +e6000sw_get_port_vlan(e6000sw_softc_t *sc, etherswitch_vlangroup_t *vg) +{ + uint32_t port, reg; + + port = vg->es_vlangroup; + if (port > sc->num_ports) + return (EINVAL); + + if (!e6000sw_is_portenabled(sc, port)) { + vg->es_vid = port; + return (0); + } + + reg = e6000sw_readreg(sc, REG_PORT(sc, port), PORT_VLAN_MAP); + vg->es_untagged_ports = vg->es_member_ports = reg & sc->ports_mask; + vg->es_vid = port | ETHERSWITCH_VID_VALID; + vg->es_fid = (reg & PORT_VLAN_MAP_FID_MASK) >> PORT_VLAN_MAP_FID; + reg = e6000sw_readreg(sc, REG_PORT(sc, port), PORT_CONTROL1); + vg->es_fid |= (reg & PORT_CONTROL1_FID_MASK) << 4; + + return (0); +} + +static int +e6000sw_get_dot1q_vlan(e6000sw_softc_t *sc, etherswitch_vlangroup_t *vg) +{ + int i, port; + uint32_t reg; + + vg->es_fid = 0; + vg->es_vid = sc->vlans[vg->es_vlangroup]; + vg->es_untagged_ports = vg->es_member_ports = 0; + if (vg->es_vid == 0) + return (0); + + if (E6000SW_WAITREADY(sc, VTU_OPERATION, VTU_BUSY)) { + device_printf(sc->dev, "VTU unit is busy, cannot access\n"); + return (EBUSY); + } + + e6000sw_writereg(sc, REG_GLOBAL, VTU_VID, vg->es_vid - 1); + + reg = e6000sw_readreg(sc, REG_GLOBAL, VTU_OPERATION); + reg &= ~VTU_OP_MASK; + reg |= VTU_GET_NEXT | VTU_BUSY; + e6000sw_writereg(sc, REG_GLOBAL, VTU_OPERATION, reg); + if (E6000SW_WAITREADY(sc, VTU_OPERATION, VTU_BUSY)) { + device_printf(sc->dev, "Timeout while reading\n"); + return (EBUSY); + } + + reg = e6000sw_readreg(sc, REG_GLOBAL, VTU_VID); + if (reg == VTU_VID_MASK || (reg & VTU_VID_VALID) == 0) + return (EINVAL); + if ((reg & VTU_VID_MASK) != vg->es_vid) return (EINVAL); - vg->es_untagged_ports = vg->es_member_ports = - sc->members[vg->es_vlangroup]; - vg->es_vid = ETHERSWITCH_VID_VALID; + + vg->es_vid |= ETHERSWITCH_VID_VALID; + reg = e6000sw_readreg(sc, REG_GLOBAL, VTU_DATA); + for (i = 0; i < sc->num_ports; i++) { + if (i == VTU_PPREG(sc)) + reg = e6000sw_readreg(sc, REG_GLOBAL, VTU_DATA2); + port = (reg >> VTU_PORT(sc, i)) & VTU_PORT_MASK; + if (port == VTU_PORT_UNTAGGED) { + vg->es_untagged_ports |= (1 << i); + vg->es_member_ports |= (1 << i); + } else if (port == VTU_PORT_TAGGED) + vg->es_member_ports |= (1 << i); + } return (0); } +static int +e6000sw_getvgroup(device_t dev, etherswitch_vlangroup_t *vg) +{ + e6000sw_softc_t *sc; + + sc = device_get_softc(dev); + E6000SW_LOCK_ASSERT(sc, SA_XLOCKED); + + if (sc->vlan_mode == ETHERSWITCH_VLAN_PORT) + return (e6000sw_get_port_vlan(sc, vg)); + else if (sc->vlan_mode == ETHERSWITCH_VLAN_DOT1Q) + return (e6000sw_get_dot1q_vlan(sc, vg)); + + return (EINVAL); +} + static __inline struct mii_data* e6000sw_miiforphy(e6000sw_softc_t *sc, unsigned int phy) { - if (phy >= E6000SW_NUM_PHYS) + if (!e6000sw_is_phyport(sc, phy)) return (NULL); return (device_get_softc(sc->miibus[phy])); @@ -717,13 +1777,42 @@ e6000sw_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) ifmr->ifm_status = mii->mii_media_status; } +static int +e6000sw_smi_waitready(e6000sw_softc_t *sc, int phy) +{ + int i; + + for (i = 0; i < E6000SW_SMI_TIMEOUT; i++) { + if ((MDIO_READ(sc->dev, phy, SMI_CMD) & SMI_CMD_BUSY) == 0) + return (0); + DELAY(1); + } + + return (1); +} + static __inline uint32_t e6000sw_readreg(e6000sw_softc_t *sc, int addr, int reg) { E6000SW_LOCK_ASSERT(sc, SA_XLOCKED); - return (MDIO_READREG(device_get_parent(sc->dev), addr, reg)); + if (!E6000SW_MULTICHIP(sc)) + return (MDIO_READ(sc->dev, addr, reg) & 0xffff); + + if (e6000sw_smi_waitready(sc, sc->sw_addr)) { + printf("e6000sw: readreg timeout\n"); + return (0xffff); + } + MDIO_WRITE(sc->dev, sc->sw_addr, SMI_CMD, + SMI_CMD_OP_C22_READ | (reg & SMI_CMD_REG_ADDR_MASK) | + ((addr << SMI_CMD_DEV_ADDR) & SMI_CMD_DEV_ADDR_MASK)); + if (e6000sw_smi_waitready(sc, sc->sw_addr)) { + printf("e6000sw: readreg timeout\n"); + return (0xffff); + } + + return (MDIO_READ(sc->dev, sc->sw_addr, SMI_DATA) & 0xffff); } static __inline void @@ -732,55 +1821,147 @@ e6000sw_writereg(e6000sw_softc_t *sc, int addr, int reg, int val) E6000SW_LOCK_ASSERT(sc, SA_XLOCKED); - MDIO_WRITEREG(device_get_parent(sc->dev), addr, reg, val); + if (!E6000SW_MULTICHIP(sc)) { + MDIO_WRITE(sc->dev, addr, reg, val); + return; + } + + if (e6000sw_smi_waitready(sc, sc->sw_addr)) { + printf("e6000sw: readreg timeout\n"); + return; + } + MDIO_WRITE(sc->dev, sc->sw_addr, SMI_DATA, val); + MDIO_WRITE(sc->dev, sc->sw_addr, SMI_CMD, + SMI_CMD_OP_C22_WRITE | (reg & SMI_CMD_REG_ADDR_MASK) | + ((addr << SMI_CMD_DEV_ADDR) & SMI_CMD_DEV_ADDR_MASK)); +} + +static __inline bool +e6000sw_is_cpuport(e6000sw_softc_t *sc, int port) +{ + + return ((sc->cpuports_mask & (1 << port)) ? true : false); } -static __inline int -e6000sw_cpuport(e6000sw_softc_t *sc, int port) +static __inline bool +e6000sw_is_fixedport(e6000sw_softc_t *sc, int port) { - return (sc->cpuports_mask & (1 << port)); + return ((sc->fixed_mask & (1 << port)) ? true : false); } -static __inline int -e6000sw_set_pvid(e6000sw_softc_t *sc, int port, int pvid) +static __inline bool +e6000sw_is_fixed25port(e6000sw_softc_t *sc, int port) { - e6000sw_writereg(sc, REG_PORT(port), PORT_VID, pvid & - PORT_VID_DEF_VID_MASK); + return ((sc->fixed25_mask & (1 << port)) ? true : false); +} - return (0); +static __inline bool +e6000sw_is_phyport(e6000sw_softc_t *sc, int port) +{ + uint32_t phy_mask; + phy_mask = ~(sc->fixed_mask | sc->cpuports_mask); + + return ((phy_mask & (1 << port)) ? true : false); +} + +static __inline bool +e6000sw_is_portenabled(e6000sw_softc_t *sc, int port) +{ + + return ((sc->used_mask & (1 << port)) ? true : false); +} + +static __inline void +e6000sw_set_pvid(e6000sw_softc_t *sc, int port, int pvid) +{ + uint32_t data; + + data = e6000sw_readreg(sc, REG_PORT(sc, port), PORT_VID); + data &= ~PORT_VID_DEF_VID_MASK; + data |= (pvid & PORT_VID_DEF_VID_MASK); + e6000sw_writereg(sc, REG_PORT(sc, port), PORT_VID, data); } -static __inline int +static __inline void e6000sw_get_pvid(e6000sw_softc_t *sc, int port, int *pvid) { if (pvid == NULL) - return (ENXIO); + return; - *pvid = e6000sw_readreg(sc, REG_PORT(port), PORT_VID) & + *pvid = e6000sw_readreg(sc, REG_PORT(sc, port), PORT_VID) & PORT_VID_DEF_VID_MASK; +} - return (0); +/* + * Convert port status to ifmedia. + */ +static void +e6000sw_update_ifmedia(uint16_t portstatus, u_int *media_status, u_int *media_active) +{ + *media_active = IFM_ETHER; + *media_status = IFM_AVALID; + + if ((portstatus & PORT_STATUS_LINK_MASK) != 0) + *media_status |= IFM_ACTIVE; + else { + *media_active |= IFM_NONE; + return; + } + + switch (portstatus & PORT_STATUS_SPEED_MASK) { + case PORT_STATUS_SPEED_10: + *media_active |= IFM_10_T; + break; + case PORT_STATUS_SPEED_100: + *media_active |= IFM_100_TX; + break; + case PORT_STATUS_SPEED_1000: + *media_active |= IFM_1000_T; + break; + } + + if ((portstatus & PORT_STATUS_DUPLEX_MASK) == 0) + *media_active |= IFM_FDX; + else + *media_active |= IFM_HDX; } static void -e6000sw_tick (void *arg) +e6000sw_tick(void *arg) { e6000sw_softc_t *sc; + struct mii_data *mii; struct mii_softc *miisc; - int i; + uint16_t portstatus; + int port; sc = arg; E6000SW_LOCK_ASSERT(sc, SA_UNLOCKED); + for (;;) { E6000SW_LOCK(sc); - for (i = 0; i < E6000SW_NUM_PHYS; i++) { - mii_tick(sc->mii[i]); - LIST_FOREACH(miisc, &sc->mii[i]->mii_phys, mii_list) { - if (IFM_INST(sc->mii[i]->mii_media.ifm_cur->ifm_media) + for (port = 0; port < sc->num_ports; port++) { + /* Tick only on PHY ports */ + if (!e6000sw_is_portenabled(sc, port) || + !e6000sw_is_phyport(sc, port)) + continue; + + mii = e6000sw_miiforphy(sc, port); + if (mii == NULL) + continue; + + portstatus = e6000sw_readreg(sc, REG_PORT(sc, port), + PORT_STATUS); + + e6000sw_update_ifmedia(portstatus, + &mii->mii_media_status, &mii->mii_media_active); + + LIST_FOREACH(miisc, &mii->mii_phys, mii_list) { + if (IFM_INST(mii->mii_media.ifm_cur->ifm_media) != miisc->mii_inst) continue; mii_phy_update(miisc, MII_POLLSTAT); @@ -794,12 +1975,13 @@ e6000sw_tick (void *arg) static void e6000sw_setup(device_t dev, e6000sw_softc_t *sc) { - uint16_t atu_ctrl, atu_age; + uint16_t atu_ctrl; - /* Set aging time */ - e6000sw_writereg(sc, REG_GLOBAL, ATU_CONTROL, - (E6000SW_DEFAULT_AGETIME << ATU_CONTROL_AGETIME) | - (1 << ATU_CONTROL_LEARN2ALL)); + /* Set aging time. */ + atu_ctrl = e6000sw_readreg(sc, REG_GLOBAL, ATU_CONTROL); + atu_ctrl &= ~ATU_CONTROL_AGETIME_MASK; + atu_ctrl |= E6000SW_DEFAULT_AGETIME << ATU_CONTROL_AGETIME; + e6000sw_writereg(sc, REG_GLOBAL, ATU_CONTROL, atu_ctrl); /* Send all with specific mac address to cpu port */ e6000sw_writereg(sc, REG_GLOBAL2, MGMT_EN_2x, MGMT_EN_ALL); @@ -815,70 +1997,9 @@ e6000sw_setup(device_t dev, e6000sw_softc_t *sc) SWITCH_MGMT_FC_PRI_MASK | (1 << SWITCH_MGMT_FORCEFLOW)); - /* Set VLAN configuration */ - e6000sw_port_vlan_conf(sc); - e6000sw_atu_flush(dev, sc, NO_OPERATION); e6000sw_atu_mac_table(dev, sc, NULL, NO_OPERATION); e6000sw_set_atustat(dev, sc, 0, COUNT_ALL); - - /* Set ATU AgeTime to 15 seconds */ - atu_age = 1; - - atu_ctrl = e6000sw_readreg(sc, REG_GLOBAL, ATU_CONTROL); - - /* Set new AgeTime field */ - atu_ctrl &= ~ATU_CONTROL_AGETIME_MASK; - e6000sw_writereg(sc, REG_GLOBAL, ATU_CONTROL, atu_ctrl | - (atu_age << ATU_CONTROL_AGETIME)); -} - -static void -e6000sw_port_vlan_conf(e6000sw_softc_t *sc) -{ - int port, ret; - etherswitch_vlangroup_t vg; - device_t dev; - - dev = sc->dev; - /* Disable all ports */ - for (port = 0; port < E6000SW_NUM_PORTS; port++) { - ret = e6000sw_readreg(sc, REG_PORT(port), PORT_CONTROL); - e6000sw_writereg(sc, REG_PORT(port), PORT_CONTROL, - (ret & ~PORT_CONTROL_ENABLE)); - } - - /* Set port priority */ - for (port = 0; port < E6000SW_NUM_PORTS; port++) { - ret = e6000sw_readreg(sc, REG_PORT(port), PORT_VID); - ret &= ~PORT_VID_PRIORITY_MASK; - e6000sw_writereg(sc, REG_PORT(port), PORT_VID, ret); - } - - vg.es_vlangroup = 0; - vg.es_vid = 0; - vg.es_member_ports = vg.es_untagged_ports = E6000SW_DEF_VLANGROUP0; - e6000sw_setvgroup(dev, &vg); - vg.es_vlangroup = 1; - vg.es_vid = 1; - vg.es_member_ports = vg.es_untagged_ports = E6000SW_DEF_VLANGROUP1; - e6000sw_setvgroup(dev, &vg); - - device_printf(dev, "Default vlangroups set.\n"); - /* Set VID map */ - for (port = 0; port < E6000SW_NUM_PORTS; port++) { - ret = e6000sw_readreg(sc, REG_PORT(port), PORT_VID); - ret &= ~PORT_VID_DEF_VID_MASK; - ret |= (port + 1); - e6000sw_writereg(sc, REG_PORT(port), PORT_VID, ret); - } - - /* Enable all ports */ - for (port = 0; port < E6000SW_NUM_PORTS; port++) { - ret = e6000sw_readreg(sc, REG_PORT(port), PORT_CONTROL); - e6000sw_writereg(sc, REG_PORT(port), PORT_CONTROL, (ret | - PORT_CONTROL_ENABLE)); - } } static void @@ -897,7 +2018,6 @@ e6000sw_atu_mac_table(device_t dev, e6000sw_softc_t *sc, struct atu_opt *atu, { uint16_t ret_opt; uint16_t ret_data; - int retries; if (flag == NO_OPERATION) return (0); @@ -907,41 +2027,34 @@ e6000sw_atu_mac_table(device_t dev, e6000sw_softc_t *sc, struct atu_opt *atu, return (EINVAL); } - ret_opt = e6000sw_readreg(sc, REG_GLOBAL, ATU_OPERATION); - - if (ret_opt & ATU_UNIT_BUSY) { - device_printf(dev, "ATU unit is busy, cannot access" - "register\n"); + if (E6000SW_WAITREADY(sc, ATU_OPERATION, ATU_UNIT_BUSY)) { + device_printf(dev, "ATU unit is busy, cannot access\n"); return (EBUSY); - } else { - if(flag & LOAD_FROM_FIB) { - ret_data = e6000sw_readreg(sc, REG_GLOBAL, ATU_DATA); - e6000sw_writereg(sc, REG_GLOBAL2, ATU_DATA, (ret_data & - ~ENTRY_STATE)); - } - e6000sw_writereg(sc, REG_GLOBAL, ATU_MAC_ADDR01, atu->mac_01); - e6000sw_writereg(sc, REG_GLOBAL, ATU_MAC_ADDR23, atu->mac_23); - e6000sw_writereg(sc, REG_GLOBAL, ATU_MAC_ADDR45, atu->mac_45); - e6000sw_writereg(sc, REG_GLOBAL, ATU_FID, atu->fid); - - e6000sw_writereg(sc, REG_GLOBAL, ATU_OPERATION, (ret_opt | - ATU_UNIT_BUSY | flag)); - - retries = E6000SW_RETRIES; - while (--retries & (e6000sw_readreg(sc, REG_GLOBAL, - ATU_OPERATION) & ATU_UNIT_BUSY)) - DELAY(1); - - if (retries == 0) - device_printf(dev, "Timeout while flushing\n"); - else if (flag & GET_NEXT_IN_FIB) { - atu->mac_01 = e6000sw_readreg(sc, REG_GLOBAL, - ATU_MAC_ADDR01); - atu->mac_23 = e6000sw_readreg(sc, REG_GLOBAL, - ATU_MAC_ADDR23); - atu->mac_45 = e6000sw_readreg(sc, REG_GLOBAL, - ATU_MAC_ADDR45); - } + } + + ret_opt = e6000sw_readreg(sc, REG_GLOBAL, ATU_OPERATION); + if(flag & LOAD_FROM_FIB) { + ret_data = e6000sw_readreg(sc, REG_GLOBAL, ATU_DATA); + e6000sw_writereg(sc, REG_GLOBAL2, ATU_DATA, (ret_data & + ~ENTRY_STATE)); + } + e6000sw_writereg(sc, REG_GLOBAL, ATU_MAC_ADDR01, atu->mac_01); + e6000sw_writereg(sc, REG_GLOBAL, ATU_MAC_ADDR23, atu->mac_23); + e6000sw_writereg(sc, REG_GLOBAL, ATU_MAC_ADDR45, atu->mac_45); + e6000sw_writereg(sc, REG_GLOBAL, ATU_FID, atu->fid); + + e6000sw_writereg(sc, REG_GLOBAL, ATU_OPERATION, + (ret_opt | ATU_UNIT_BUSY | flag)); + + if (E6000SW_WAITREADY(sc, ATU_OPERATION, ATU_UNIT_BUSY)) + device_printf(dev, "Timeout while waiting ATU\n"); + else if (flag & GET_NEXT_IN_FIB) { + atu->mac_01 = e6000sw_readreg(sc, REG_GLOBAL, + ATU_MAC_ADDR01); + atu->mac_23 = e6000sw_readreg(sc, REG_GLOBAL, + ATU_MAC_ADDR23); + atu->mac_45 = e6000sw_readreg(sc, REG_GLOBAL, + ATU_MAC_ADDR45); } return (0); @@ -951,25 +2064,180 @@ static int e6000sw_atu_flush(device_t dev, e6000sw_softc_t *sc, int flag) { uint16_t ret; - int retries; if (flag == NO_OPERATION) return (0); + if (E6000SW_WAITREADY(sc, ATU_OPERATION, ATU_UNIT_BUSY)) { + device_printf(dev, "ATU unit is busy, cannot access\n"); + return (EBUSY); + } ret = e6000sw_readreg(sc, REG_GLOBAL, ATU_OPERATION); - if (ret & ATU_UNIT_BUSY) { - device_printf(dev, "Atu unit is busy, cannot flush\n"); + e6000sw_writereg(sc, REG_GLOBAL, ATU_OPERATION, + (ret | ATU_UNIT_BUSY | flag)); + if (E6000SW_WAITREADY(sc, ATU_OPERATION, ATU_UNIT_BUSY)) + device_printf(dev, "Timeout while flushing ATU\n"); + + return (0); +} + +static int +e6000sw_waitready(e6000sw_softc_t *sc, uint32_t phy, uint32_t reg, + uint32_t busybit) +{ + int i; + + for (i = 0; i < E6000SW_RETRIES; i++) { + if ((e6000sw_readreg(sc, phy, reg) & busybit) == 0) + return (0); + DELAY(1); + } + + return (1); +} + +static int +e6000sw_vtu_flush(e6000sw_softc_t *sc) +{ + + if (E6000SW_WAITREADY(sc, VTU_OPERATION, VTU_BUSY)) { + device_printf(sc->dev, "VTU unit is busy, cannot access\n"); return (EBUSY); - } else { - e6000sw_writereg(sc, REG_GLOBAL, ATU_OPERATION, (ret | - ATU_UNIT_BUSY | flag)); - retries = E6000SW_RETRIES; - while (--retries & (e6000sw_readreg(sc, REG_GLOBAL, - ATU_OPERATION) & ATU_UNIT_BUSY)) - DELAY(1); - - if (retries == 0) - device_printf(dev, "Timeout while flushing\n"); + } + + e6000sw_writereg(sc, REG_GLOBAL, VTU_OPERATION, VTU_FLUSH | VTU_BUSY); + if (E6000SW_WAITREADY(sc, VTU_OPERATION, VTU_BUSY)) { + device_printf(sc->dev, "Timeout while flushing VTU\n"); + return (ETIMEDOUT); + } + + return (0); +} + +static int +e6000sw_vtu_update(e6000sw_softc_t *sc, int purge, int vid, int fid, + int members, int untagged) +{ + int i, op; + uint32_t data[2]; + + if (E6000SW_WAITREADY(sc, VTU_OPERATION, VTU_BUSY)) { + device_printf(sc->dev, "VTU unit is busy, cannot access\n"); + return (EBUSY); + } + + *data = (vid & VTU_VID_MASK); + if (purge == 0) + *data |= VTU_VID_VALID; + e6000sw_writereg(sc, REG_GLOBAL, VTU_VID, *data); + + if (purge == 0) { + data[0] = 0; + data[1] = 0; + for (i = 0; i < sc->num_ports; i++) { + if ((untagged & (1 << i)) != 0) + data[i / VTU_PPREG(sc)] |= + VTU_PORT_UNTAGGED << VTU_PORT(sc, i); + else if ((members & (1 << i)) != 0) + data[i / VTU_PPREG(sc)] |= + VTU_PORT_TAGGED << VTU_PORT(sc, i); + else + data[i / VTU_PPREG(sc)] |= + VTU_PORT_DISCARD << VTU_PORT(sc, i); + } + e6000sw_writereg(sc, REG_GLOBAL, VTU_DATA, data[0]); + e6000sw_writereg(sc, REG_GLOBAL, VTU_DATA2, data[1]); + e6000sw_writereg(sc, REG_GLOBAL, VTU_FID, + fid & VTU_FID_MASK(sc)); + op = VTU_LOAD; + } else + op = VTU_PURGE; + + e6000sw_writereg(sc, REG_GLOBAL, VTU_OPERATION, op | VTU_BUSY); + if (E6000SW_WAITREADY(sc, VTU_OPERATION, VTU_BUSY)) { + device_printf(sc->dev, "Timeout while flushing VTU\n"); + return (ETIMEDOUT); + } + + return (0); +} + +static ssize_t +e6000sw_getiosize(device_t dev) +{ + e6000sw_softc_t *sc; + + sc = device_get_softc(dev); + + return (sc->iosize); +} + +static ssize_t +e6000sw_getioblksize(device_t dev __unused) +{ + + return (E6000SW_IOBUF_BLKSIZE); +} + +static void * +e6000sw_getiobuf(device_t dev) +{ + e6000sw_softc_t *sc; + + sc = device_get_softc(dev); + + return (sc->iobuf); +} + +static int +e6000sw_ioread(device_t dev, off_t off, ssize_t len) +{ + e6000sw_softc_t *sc; + ssize_t resid; + uint8_t *iobuf; + uint32_t reg; + + sc = device_get_softc(dev); + iobuf = (uint8_t *)sc->iobuf; + for (resid = 0; resid < len; resid++) { + if (E6000SW_WAITREADY2(sc, EEPROM_CMD, EEPROM_BUSY)) { + device_printf(sc->dev, "EEPROM is busy, cannot access\n"); + return (ETIMEDOUT); + } + e6000sw_writereg(sc, REG_GLOBAL2, EEPROM_ADDR, off + resid); + e6000sw_writereg(sc, REG_GLOBAL2, EEPROM_CMD, + EEPROM_READ_CMD | EEPROM_BUSY); + if (E6000SW_WAITREADY2(sc, EEPROM_CMD, EEPROM_BUSY)) { + device_printf(sc->dev, "EEPROM is busy, cannot access\n"); + return (ETIMEDOUT); + } + reg = e6000sw_readreg(sc, REG_GLOBAL2, EEPROM_CMD); + iobuf[resid] = reg & EEPROM_DATA_MASK; + } + + return (0); +} + +static int +e6000sw_iowrite(device_t dev, off_t off, ssize_t len) +{ + e6000sw_softc_t *sc; + ssize_t resid; + uint8_t *iobuf; + + if (e6000sw_eeprom_wp) + return (EPERM); + sc = device_get_softc(dev); + iobuf = (uint8_t *)sc->iobuf; + for (resid = 0; resid < len; resid++) { + if (E6000SW_WAITREADY2(sc, EEPROM_CMD, EEPROM_BUSY)) { + device_printf(sc->dev, "EEPROM is busy, cannot access\n"); + return (ETIMEDOUT); + } + e6000sw_writereg(sc, REG_GLOBAL2, EEPROM_ADDR, off + resid); + e6000sw_writereg(sc, REG_GLOBAL2, EEPROM_CMD, + EEPROM_BUSY | EEPROM_WRITE_CMD | EEPROM_WRITE_EN | + (iobuf[resid] & EEPROM_DATA_MASK)); } return (0); diff --git a/sys/dev/etherswitch/e6000sw/e6000swreg.h b/sys/dev/etherswitch/e6000sw/e6000swreg.h index b8cdfe3..98f8827 100644 --- a/sys/dev/etherswitch/e6000sw/e6000swreg.h +++ b/sys/dev/etherswitch/e6000sw/e6000swreg.h @@ -29,7 +29,7 @@ */ #ifndef _E6000SWREG_H_ -#define _E6000SWREG_H_ +#define _E6000SWREG_H_ struct atu_opt { uint16_t mac_01; @@ -42,144 +42,264 @@ struct atu_opt { * Definitions for the Marvell 88E6000 series Ethernet Switch. */ -#define CPU_PORT 0x5 +/* Switch IDs. */ +#define MV88E6141 0x3400 +#define MV88E6341 0x3410 +#define MV88E6352 0x3520 +#define MV88E6172 0x1720 +#define MV88E6176 0x1760 +#define MV88E6190 0x1900 + +#define MVSWITCH(_sc, id) ((_sc)->swid == (id)) /* * Switch Registers */ -#define REG_GLOBAL 0x1b -#define REG_GLOBAL2 0x1c -#define REG_PORT(p) (0x10 + (p)) +#define REG_GLOBAL 0x1b +#define REG_GLOBAL2 0x1c +#define REG_PORT(_sc, p) (((_sc)->port_base) + (p)) -#define REG_NUM_MAX 31 +#define REG_NUM_MAX 31 /* * Per-Port Switch Registers */ -#define PORT_STATUS 0x0 -#define PSC_CONTROL 0x1 -#define SWITCH_ID 0x3 -#define PORT_CONTROL 0x4 -#define PORT_CONTROL_1 0x5 -#define PORT_VLAN_MAP 0x6 -#define PORT_VID 0x7 -#define PORT_ASSOCIATION_VECTOR 0xb -#define PORT_ATU_CTRL 0xc -#define RX_COUNTER 0x12 -#define TX_COUNTER 0x13 - -#define PORT_VID_DEF_VID 0 -#define PORT_VID_DEF_VID_MASK 0xfff -#define PORT_VID_PRIORITY_MASK 0xc00 - -#define PORT_CONTROL_ENABLE 0x3 +#define PORT_STATUS 0x0 +#define PORT_STATUS_SPEED_MASK 0x300 +#define PORT_STATUS_SPEED_10 0 +#define PORT_STATUS_SPEED_100 1 +#define PORT_STATUS_SPEED_1000 2 +#define PORT_STATUS_DUPLEX_MASK (1 << 10) +#define PORT_STATUS_LINK_MASK (1 << 11) +#define PORT_STATUS_PHY_DETECT_MASK (1 << 12) + +#define PSC_CONTROL 0x1 +#define PSC_CONTROL_FORCED_SPD (1 << 13) +#define PSC_CONTROL_ALT_SPD (1 << 12) +#define PSC_CONTROL_EEE_ON (1 << 9) +#define PSC_CONTROL_FORCED_EEE (1 << 8) +#define PSC_CONTROL_FC_ON (1 << 7) +#define PSC_CONTROL_FORCED_FC (1 << 6) +#define PSC_CONTROL_LINK_UP (1 << 5) +#define PSC_CONTROL_FORCED_LINK (1 << 4) +#define PSC_CONTROL_FULLDPX (1 << 3) +#define PSC_CONTROL_FORCED_DPX (1 << 2) +#define PSC_CONTROL_SPD10G 0x3 +#define PSC_CONTROL_SPD2500 PSC_CONTROL_SPD10G +#define PSC_CONTROL_SPD1000 0x2 +#define SWITCH_ID 0x3 +#define PORT_CONTROL 0x4 +#define PORT_CONTROL1 0x5 +#define PORT_CONTROL1_LAG_PORT (1 << 14) +#define PORT_CONTROL1_LAG_ID_MASK 0xf +#define PORT_CONTROL1_LAG_ID_SHIFT 8 +#define PORT_CONTROL1_FID_MASK 0xf +#define PORT_VLAN_MAP 0x6 +#define PORT_VID 0x7 +#define PORT_CONTROL2 0x8 +#define PORT_ASSOCIATION_VECTOR 0xb +#define PORT_ATU_CTRL 0xc +#define RX_COUNTER 0x12 +#define TX_COUNTER 0x13 + +#define PORT_VID_DEF_VID 0 +#define PORT_VID_DEF_VID_MASK 0xfff +#define PORT_VID_PRIORITY_MASK 0xc00 + +#define PORT_CONTROL_DISABLED 0 +#define PORT_CONTROL_BLOCKING 1 +#define PORT_CONTROL_LEARNING 2 +#define PORT_CONTROL_FORWARDING 3 +#define PORT_CONTROL_ENABLE 3 +#define PORT_CONTROL_FRAME 0x0300 +#define PORT_CONTROL_EGRESS 0x3000 +#define PORT_CONTROL2_DOT1Q 0x0c00 +#define PORT_CONTROL2_DISC_TAGGED (1 << 9) +#define PORT_CONTROL2_DISC_UNTAGGED (1 << 8) /* PORT_VLAN fields */ -#define PORT_VLAN_MAP_TABLE_MASK 0x7f -#define PORT_VLAN_MAP_FID 12 -#define PORT_VLAN_MAP_FID_MASK 0xf000 +#define PORT_VLAN_MAP_FID 12 +#define PORT_VLAN_MAP_FID_MASK 0xf000 + /* * Switch Global Register 1 accessed via REG_GLOBAL_ADDR */ -#define SWITCH_GLOBAL_STATUS 0 -#define SWITCH_GLOBAL_CONTROL 4 -#define SWITCH_GLOBAL_CONTROL2 28 +#define SWITCH_GLOBAL_STATUS 0 +#define SWITCH_GLOBAL_STATUS_IR (1 << 11) +#define SWITCH_GLOBAL_CONTROL 4 +#define SWITCH_GLOBAL_CONTROL2 28 -#define MONITOR_CONTROL 26 +#define MONITOR_CONTROL 26 + +/* VTU operation */ +#define VTU_FID 2 +#define VTU_OPERATION 5 +#define VTU_VID 6 +#define VTU_DATA 7 +#define VTU_DATA2 8 + +#define VTU_FID_MASK(_sc) (MVSWITCH((_sc), MV88E6190) ? 0xfff: 0xff) +#define VTU_FID_POLICY (1 << 12) +#define VTU_PORT_UNMODIFIED 0 +#define VTU_PORT_UNTAGGED 1 +#define VTU_PORT_TAGGED 2 +#define VTU_PORT_DISCARD 3 +#define VTU_PPREG(_sc) (MVSWITCH((_sc), MV88E6190) ? 8 : 4) +#define VTU_PORT(_sc, p) (((p) % VTU_PPREG(_sc)) * (16 / VTU_PPREG(_sc))) +#define VTU_PORT_MASK 3 +#define VTU_BUSY (1 << 15) +#define VTU_VID_VALID (1 << 12) +#define VTU_VID_MASK 0xfff + +/* VTU opcodes */ +#define VTU_OP_MASK (7 << 12) +#define VTU_NOP (0 << 12) +#define VTU_FLUSH (1 << 12) +#define VTU_LOAD (3 << 12) +#define VTU_PURGE (3 << 12) +#define VTU_GET_NEXT (4 << 12) +#define STU_LOAD (5 << 12) +#define STU_PURGE (5 << 12) +#define STU_GET_NEXT (6 << 12) +#define VTU_GET_VIOLATION_DATA (7 << 12) +#define VTU_CLEAR_VIOLATION_DATA (7 << 12) /* ATU operation */ -#define ATU_FID 1 -#define ATU_CONTROL 10 -#define ATU_OPERATION 11 -#define ATU_DATA 12 -#define ATU_MAC_ADDR01 13 -#define ATU_MAC_ADDR23 14 -#define ATU_MAC_ADDR45 15 +#define ATU_FID 1 +#define ATU_CONTROL 10 +#define ATU_OPERATION 11 +#define ATU_DATA 12 +#define ATU_MAC_ADDR01 13 +#define ATU_MAC_ADDR23 14 +#define ATU_MAC_ADDR45 15 -#define ATU_UNIT_BUSY (1 << 15) -#define ENTRY_STATE 0xf +#define ATU_DATA_LAG (1 << 15) +#define ATU_PORT_MASK(_sc) (MVSWITCH((_sc), MV88E6190) ? 0xfff0: 0xff0) +#define ATU_PORT_SHIFT 4 +#define ATU_LAG_MASK 0xf0 +#define ATU_LAG_SHIFT 4 +#define ATU_STATE_MASK 0xf +#define ATU_UNIT_BUSY (1 << 15) +#define ENTRY_STATE 0xf /* ATU_CONTROL fields */ -#define ATU_CONTROL_AGETIME 4 -#define ATU_CONTROL_AGETIME_MASK 0xff0 -#define ATU_CONTROL_LEARN2ALL 3 +#define ATU_CONTROL_AGETIME 4 +#define ATU_CONTROL_AGETIME_MASK 0xff0 +#define ATU_CONTROL_LEARN2ALL 3 /* ATU opcode */ -#define NO_OPERATION (0 << 0) -#define FLUSH_ALL (1 << 0) -#define FLUSH_NON_STATIC (1 << 1) -#define LOAD_FROM_FIB (3 << 0) -#define PURGE_FROM_FIB (3 << 0) -#define GET_NEXT_IN_FIB (1 << 2) -#define FLUSH_ALL_IN_FIB (5 << 0) -#define FLUSH_NON_STATIC_IN_FIB (3 << 1) -#define GET_VIOLATION_DATA (7 << 0) -#define CLEAR_VIOLATION_DATA (7 << 0) +#define ATU_OP_MASK (7 << 12) +#define NO_OPERATION (0 << 12) +#define FLUSH_ALL (1 << 12) +#define FLUSH_NON_STATIC (2 << 12) +#define LOAD_FROM_FIB (3 << 12) +#define PURGE_FROM_FIB (3 << 12) +#define GET_NEXT_IN_FIB (4 << 12) +#define FLUSH_ALL_IN_FIB (5 << 12) +#define FLUSH_NON_STATIC_IN_FIB (6 << 12) +#define GET_VIOLATION_DATA (7 << 12) +#define CLEAR_VIOLATION_DATA (7 << 12) /* ATU Stats */ -#define COUNT_ALL (0 << 0) +#define COUNT_ALL (0 << 0) /* * Switch Global Register 2 accessed via REG_GLOBAL2_ADDR */ -#define MGMT_EN_2x 2 -#define MGMT_EN_0x 3 -#define SWITCH_MGMT 5 -#define ATU_STATS 14 +#define MGMT_EN_2x 2 +#define MGMT_EN_0x 3 +#define SWITCH_MGMT 5 +#define LAG_MASK 7 +#define LAG_MAPPING 8 +#define ATU_STATS 14 -#define MGMT_EN_ALL 0xffff +#define MGMT_EN_ALL 0xffff +#define LAG_UPDATE (1 << 15) +#define LAG_MASKNUM_SHIFT 12 +#define LAGID_SHIFT 11 /* SWITCH_MGMT fields */ -#define SWITCH_MGMT_PRI 0 -#define SWITCH_MGMT_PRI_MASK 7 +#define SWITCH_MGMT_PRI 0 +#define SWITCH_MGMT_PRI_MASK 7 #define SWITCH_MGMT_RSVD2CPU 3 -#define SWITCH_MGMT_FC_PRI 4 -#define SWITCH_MGMT_FC_PRI_MASK (7 << 4) -#define SWITCH_MGMT_FORCEFLOW 7 +#define SWITCH_MGMT_FC_PRI 4 +#define SWITCH_MGMT_FC_PRI_MASK (7 << 4) +#define SWITCH_MGMT_FORCEFLOW 7 /* ATU_STATS fields */ -#define ATU_STATS_BIN 14 -#define ATU_STATS_FLAG 12 +#define ATU_STATS_BIN 14 +#define ATU_STATS_FLAG 12 + +/* Offset of SMI registers in multi-chip setup. */ +#define SMI_CMD 0 +#define SMI_DATA 1 /* - * PHY registers accessed via 'Switch Global Registers' (REG_GLOBAL2). + * 'Switch Global Registers 2' (REG_GLOBAL2). */ -#define SMI_PHY_CMD_REG 0x18 -#define SMI_PHY_DATA_REG 0x19 - -#define PHY_CMD 0x18 -#define PHY_DATA 0x19 -#define PHY_DATA_MASK 0xffff - -#define PHY_CMD_SMI_BUSY 15 -#define PHY_CMD_MODE 12 -#define PHY_CMD_MODE_MDIO 1 -#define PHY_CMD_MODE_XMDIO 0 -#define PHY_CMD_OPCODE 10 -#define PHY_CMD_OPCODE_WRITE 1 -#define PHY_CMD_OPCODE_READ 2 -#define PHY_CMD_DEV_ADDR 5 -#define PHY_CMD_DEV_ADDR_MASK 0x3e0 -#define PHY_CMD_REG_ADDR 0 -#define PHY_CMD_REG_ADDR_MASK 0x1f - -#define PHY_PAGE_REG 22 - -#define E6000SW_NUM_PHYS 5 -#define E6000SW_NUM_PHY_REGS 29 -#define E6000SW_CPUPORTS_MASK ((1 << 5) | (1 << 6)) -#define E6000SW_NUM_VGROUPS 8 -#define E6000SW_NUM_PORTS 7 -#define E6000SW_PORT_NO_VGROUP -1 -#define E6000SW_DEFAULT_AGETIME 20 -#define E6000SW_RETRIES 100 - - -/* Default vlangroups */ -#define E6000SW_DEF_VLANGROUP0 (1 | (1 << 1) | (1 << 2) | (1 << 3) | \ - (1 << 6)) -#define E6000SW_DEF_VLANGROUP1 ((1 << 4) | (1 << 5)) + +/* EEPROM registers */ +#define EEPROM_CMD 0x14 +#define EEPROM_BUSY (1 << 15) +#define EEPROM_READ_CMD (4 << 12) +#define EEPROM_WRITE_CMD (3 << 12) +#define EEPROM_WRITE_EN (1 << 10) +#define EEPROM_DATA_MASK 0xff +#define EEPROM_ADDR 0x15 + +/* PHY registers */ +#define SMI_PHY_CMD_REG 0x18 +#define SMI_CMD_BUSY (1 << 15) +#define SMI_CMD_MODE_C22 (1 << 12) +#define SMI_CMD_C22_WRITE (1 << 10) +#define SMI_CMD_C22_READ (2 << 10) +#define SMI_CMD_OP_C22_WRITE \ + (SMI_CMD_C22_WRITE | SMI_CMD_BUSY | SMI_CMD_MODE_C22) +#define SMI_CMD_OP_C22_READ \ + (SMI_CMD_C22_READ | SMI_CMD_BUSY | SMI_CMD_MODE_C22) +#define SMI_CMD_C45 (0 << 12) +#define SMI_CMD_C45_ADDR (0 << 10) +#define SMI_CMD_C45_WRITE (1 << 10) +#define SMI_CMD_C45_READ (3 << 10) +#define SMI_CMD_OP_C45_ADDR \ + (SMI_CMD_C45_ADDR | SMI_CMD_BUSY | SMI_CMD_C45) +#define SMI_CMD_OP_C45_WRITE \ + (SMI_CMD_C45_WRITE | SMI_CMD_BUSY | SMI_CMD_C45) +#define SMI_CMD_OP_C45_READ \ + (SMI_CMD_C45_READ | SMI_CMD_BUSY | SMI_CMD_C45) +#define SMI_CMD_DEV_ADDR 5 +#define SMI_CMD_DEV_ADDR_MASK 0x3e0 +#define SMI_CMD_REG_ADDR_MASK 0x1f +#define SMI_PHY_DATA_REG 0x19 +#define PHY_DATA_MASK 0xffff + +#define PHY_PAGE_REG 22 + +/* + * Scratch and Misc register accessed via + * 'Switch Global Registers' (REG_GLOBAL2) + */ +#define SCR_AND_MISC_REG 0x1a + +#define SCR_AND_MISC_PTR_CFG 0x7000 +#define SCR_AND_MISC_DATA_CFG_MASK 0xf0 + +/* SERDES registers. */ +#define E6000SW_SERDES_DEV 4 +#define E6000SW_SERDES_PCS_CTL1 0x1000 +#define E6000SW_SERDES_SGMII_CTL 0x2000 +#define E6000SW_SERDES_PDOWN (1 << 11) + +#define E6000SW_NUM_VLANS 128 +#define E6000SW_NUM_LAGMASK 8 +#define E6000SW_NUM_PHY_REGS 29 +#define E6000SW_MAX_PORTS 11 +#define E6000SW_DEFAULT_AGETIME 20 +#define E6000SW_RETRIES 100 +#define E6000SW_SMI_TIMEOUT 16 +#define E6000SW_IOBUF_BLKSIZE (4 * 1024) /* 4 KiB block */ +#define E6000SW_IOBUF_SIZE (64 * 1024) /* 64 KiB max. */ #endif /* _E6000SWREG_H_ */ diff --git a/sys/dev/etherswitch/etherswitch.c b/sys/dev/etherswitch/etherswitch.c index ee9b710..1f4b0dc 100644 --- a/sys/dev/etherswitch/etherswitch.c +++ b/sys/dev/etherswitch/etherswitch.c @@ -72,13 +72,16 @@ driver_t etherswitch_driver = { sizeof(struct etherswitch_softc), }; -static d_ioctl_t etherswitchioctl; +static d_ioctl_t etherswitchioctl; +static d_read_t etherswitchioread; +static d_write_t etherswitchiowrite; static struct cdevsw etherswitch_cdevsw = { .d_version = D_VERSION, - .d_flags = D_TRACKCLOSE, .d_ioctl = etherswitchioctl, .d_name = "etherswitch", + .d_read = etherswitchioread, + .d_write = etherswitchiowrite, }; static void @@ -180,6 +183,14 @@ etherswitchioctl(struct cdev *cdev, u_long cmd, caddr_t data, int flags, struct error = ETHERSWITCH_SETVGROUP(etherswitch, (etherswitch_vlangroup_t *)data); break; + case IOETHERSWITCHGETLAGGROUP: + error = ETHERSWITCH_GETLAGGROUP(etherswitch, (etherswitch_laggroup_t *)data); + break; + + case IOETHERSWITCHSETLAGGROUP: + error = ETHERSWITCH_SETLAGGROUP(etherswitch, (etherswitch_laggroup_t *)data); + break; + case IOETHERSWITCHGETPHYREG: phyreg = (etherswitch_phyreg_t *)data; phyreg->val = ETHERSWITCH_READPHYREG(etherswitch, phyreg->phy, phyreg->reg); @@ -207,4 +218,84 @@ etherswitchioctl(struct cdev *cdev, u_long cmd, caddr_t data, int flags, struct return (error); } +static int +etherswitchioread(struct cdev *cdev, struct uio *uio, int ioflag) +{ + device_t etherswitch; + int error; + ssize_t ioblksize, iosize, len; + struct etherswitch_softc *sc; + void *iobuf; + + sc = (struct etherswitch_softc *)cdev->si_drv1; + etherswitch = device_get_parent(sc->sc_dev); + ioblksize = ETHERSWITCH_GETIOBLKSIZE(etherswitch); + iosize = ETHERSWITCH_GETIOSIZE(etherswitch); + iobuf = ETHERSWITCH_GETIOBUF(etherswitch); + if (ioblksize == -1 || iosize == -1 || iobuf == NULL) + return (EINVAL); + if (uio->uio_offset == iosize) + return (0); + if (uio->uio_offset > iosize) + return (EIO); + if (uio->uio_resid > ioblksize) + return (EIO); + + error = 0; + while (uio->uio_resid > 0) { + if (uio->uio_offset >= iosize) + break; + len = MIN(ioblksize - (uio->uio_offset & (ioblksize - 1)), + uio->uio_resid); + error = ETHERSWITCH_IOREAD(etherswitch, uio->uio_offset, len); + if (error != 0) + break; + error = uiomove(iobuf, len, uio); + if (error != 0) + break; + } + + return (error); +} + +static int +etherswitchiowrite(struct cdev *cdev, struct uio *uio, int ioflag) +{ + device_t etherswitch; + int error; + off_t offset; + ssize_t ioblksize, iosize, len; + struct etherswitch_softc *sc; + void *iobuf; + + sc = (struct etherswitch_softc *)cdev->si_drv1; + etherswitch = device_get_parent(sc->sc_dev); + ioblksize = ETHERSWITCH_GETIOBLKSIZE(etherswitch); + iosize = ETHERSWITCH_GETIOSIZE(etherswitch); + iobuf = ETHERSWITCH_GETIOBUF(etherswitch); + if (ioblksize == -1 || iosize == -1 || iobuf == NULL) + return (EINVAL); + if (uio->uio_offset >= iosize) + return (EIO); + if (uio->uio_resid > ioblksize) + return (EIO); + + error = 0; + while (uio->uio_resid > 0) { + if (uio->uio_offset >= iosize) + break; + len = MIN(ioblksize - (uio->uio_offset & (ioblksize - 1)), + uio->uio_resid); + offset = uio->uio_offset; + error = uiomove(iobuf, len, uio); + if (error != 0) + break; + error = ETHERSWITCH_IOWRITE(etherswitch, offset, len); + if (error != 0) + break; + } + + return (error); +} + MODULE_VERSION(etherswitch, 1); diff --git a/sys/dev/etherswitch/etherswitch.h b/sys/dev/etherswitch/etherswitch.h index 2619019..c047c72 100644 --- a/sys/dev/etherswitch/etherswitch.h +++ b/sys/dev/etherswitch/etherswitch.h @@ -14,7 +14,7 @@ extern driver_t etherswitch_driver; struct etherswitch_reg { uint16_t reg; - uint16_t val; + uint32_t val; }; typedef struct etherswitch_reg etherswitch_reg_t; @@ -36,11 +36,23 @@ typedef struct etherswitch_phyreg etherswitch_phyreg_t; #define ETHERSWITCH_VLAN_CAPS_BITS \ "\020\1ISL\2PORT\3DOT1Q\4DOT1Q4K\5QinQ" +#define ETHERSWITCH_CAPS_PORTS_MASK (1 << 0) /* Ports mask */ +#define ETHERSWITCH_CAPS_LAGG (1 << 1) /* LAGG support */ +#define ETHERSWITCH_CAPS_PSTATE (1 << 2) /* Port state */ +#define ETHERSWITCH_CAPS_BITS \ +"\020\1PORTSMASK\2LAGG\3PSTATE" + +#define MAX_PORTS 1024 +#define MAX_PORTS_UINT32 (MAX_PORTS / sizeof(uint32_t)) + struct etherswitch_info { int es_nports; int es_nvlangroups; + int es_nlaggroups; char es_name[ETHERSWITCH_NAMEMAX]; uint32_t es_vlan_caps; + uint32_t es_switch_caps; + uint32_t es_ports_mask[MAX_PORTS_UINT32]; }; typedef struct etherswitch_info etherswitch_info_t; @@ -61,13 +73,36 @@ typedef struct etherswitch_conf etherswitch_conf_t; #define ETHERSWITCH_PORT_DROPUNTAGGED (1 << 4) #define ETHERSWITCH_PORT_DOUBLE_TAG (1 << 5) #define ETHERSWITCH_PORT_INGRESS (1 << 6) -#define ETHERSWITCH_PORT_FLAGS_BITS \ -"\020\1CPUPORT\2STRIPTAG\3ADDTAG\4FIRSTLOCK\5DROPUNTAGGED\6QinQ\7INGRESS" +#define ETHERSWITCH_PORT_DROPTAGGED (1 << 7) +#define ETHERSWITCH_PORT_FLAGS_BITS \ +"\020\1CPUPORT\2STRIPTAG\3ADDTAG\4FIRSTLOCK\5DROPUNTAGGED\6QinQ\7INGRESS" \ +"\10DROPTAGGED" + +#define ETHERSWITCH_PSTATE_DISABLED (1 << 0) +#define ETHERSWITCH_PSTATE_BLOCKING (1 << 1) +#define ETHERSWITCH_PSTATE_LEARNING (1 << 2) +#define ETHERSWITCH_PSTATE_FORWARDING (1 << 3) +#define ETHERSWITCH_PSTATE_BITS \ +"\020\1DISABLED\2BLOCKING\3LEARNING\4FORWARDING" + +#define ETHERSWITCH_PORT_MAX_LEDS 3 + +enum etherswitch_port_led { + ETHERSWITCH_PORT_LED_DEFAULT, + ETHERSWITCH_PORT_LED_ON, + ETHERSWITCH_PORT_LED_OFF, + ETHERSWITCH_PORT_LED_BLINK, + ETHERSWITCH_PORT_LED_MAX +}; +typedef enum etherswitch_port_led etherswitch_port_led_t; struct etherswitch_port { int es_port; int es_pvid; + int es_nleds; uint32_t es_flags; + uint32_t es_state; + etherswitch_port_led_t es_led[ETHERSWITCH_PORT_MAX_LEDS]; union { struct ifreq es_uifr; struct ifmediareq es_uifmr; @@ -77,6 +112,14 @@ struct etherswitch_port { }; typedef struct etherswitch_port etherswitch_port_t; +struct etherswitch_laggroup { + int es_lagg_valid; + int es_laggroup; + int es_member_ports; + int es_untagged_ports; +}; +typedef struct etherswitch_laggroup etherswitch_laggroup_t; + struct etherswitch_vlangroup { int es_vlangroup; int es_vid; @@ -99,5 +142,7 @@ typedef struct etherswitch_vlangroup etherswitch_vlangroup_t; #define IOETHERSWITCHSETPHYREG _IOW('i', 9, etherswitch_phyreg_t) #define IOETHERSWITCHGETCONF _IOR('i', 10, etherswitch_conf_t) #define IOETHERSWITCHSETCONF _IOW('i', 11, etherswitch_conf_t) +#define IOETHERSWITCHGETLAGGROUP _IOWR('i', 12, etherswitch_laggroup_t) +#define IOETHERSWITCHSETLAGGROUP _IOW('i', 13, etherswitch_laggroup_t) #endif diff --git a/sys/dev/etherswitch/etherswitch_if.m b/sys/dev/etherswitch/etherswitch_if.m index a2aea02..b684027 100644 --- a/sys/dev/etherswitch/etherswitch_if.m +++ b/sys/dev/etherswitch/etherswitch_if.m @@ -25,6 +25,18 @@ CODE { } static int + null_etherswitch_getlaggroup(device_t dev, etherswitch_laggroup_t *conf) + { + return (EINVAL); + } + + static int + null_etherswitch_setlaggroup(device_t dev, etherswitch_laggroup_t *conf) + { + return (EINVAL); + } + + static int null_etherswitch_getconf(device_t dev, etherswitch_conf_t *conf) { return (0); @@ -35,6 +47,36 @@ CODE { { return (0); } + + static ssize_t + null_etherswitch_getioblksize(device_t dev) + { + return (-1); + } + + static ssize_t + null_etherswitch_getiosize(device_t dev) + { + return (-1); + } + + static void * + null_etherswitch_getiobuf(device_t dev) + { + return (NULL); + } + + static int + null_etherswitch_ioread(device_t dev, off_t off, ssize_t len) + { + return (EIO); + } + + static int + null_etherswitch_iowrite(device_t dev, off_t off, ssize_t len) + { + return (EIO); + } }; # @@ -127,6 +169,22 @@ METHOD int setvgroup { } # +# Get LAGG configuration +# +METHOD int getlaggroup { + device_t dev; + etherswitch_laggroup_t *vg; +} DEFAULT null_etherswitch_getlaggroup; + +# +# Set LAGG configuration +# +METHOD int setlaggroup { + device_t dev; + etherswitch_laggroup_t *vg; +} DEFAULT null_etherswitch_setlaggroup; + +# # Get the Switch configuration # METHOD int getconf { @@ -141,3 +199,42 @@ METHOD int setconf { device_t dev; etherswitch_conf_t *conf; } DEFAULT null_etherswitch_setconf; + +# +# Get the IO buffer block size +# +METHOD ssize_t getioblksize { + device_t dev; +} DEFAULT null_etherswitch_getioblksize; + +# +# Get the IO buffer size +# +METHOD ssize_t getiosize { + device_t dev; +} DEFAULT null_etherswitch_getiosize; + +# +# Get the IO buffer +# +METHOD void * getiobuf { + device_t dev; +} DEFAULT null_etherswitch_getiobuf; + +# +# Perform a read operation and save data into IO buffer +# +METHOD int ioread { + device_t dev; + off_t off; + ssize_t len; +} DEFAULT null_etherswitch_ioread; + +# +# Perform a write operation (write the data in the IO buffer) +# +METHOD int iowrite { + device_t dev; + off_t off; + ssize_t len; +} DEFAULT null_etherswitch_iowrite; diff --git a/sys/dev/fdt/fdt_common.c b/sys/dev/fdt/fdt_common.c index 4e0d6e2..395b77b 100644 --- a/sys/dev/fdt/fdt_common.c +++ b/sys/dev/fdt/fdt_common.c @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #define FDT_TYPE_LEN 64 #define FDT_REG_CELLS 4 +#define FDT_RANGES_SIZE 48 vm_paddr_t fdt_immr_pa; vm_offset_t fdt_immr_va; @@ -144,7 +145,7 @@ fdt_get_range_by_busaddr(phandle_t node, u_long addr, u_long *base, int fdt_get_range(phandle_t node, int range_id, u_long *base, u_long *size) { - pcell_t ranges[6], *rangesptr; + pcell_t ranges[FDT_RANGES_SIZE], *rangesptr; pcell_t addr_cells, size_cells, par_addr_cells; u_long par_bus_addr, pbase, psize; int err, len, tuple_size, tuples; @@ -416,13 +417,13 @@ fdt_addrsize_cells(phandle_t node, int *addr_cells, int *size_cells) * Retrieve #{address,size}-cells. */ cell_size = sizeof(cell); - if (OF_getprop(node, "#address-cells", &cell, cell_size) < cell_size) + if (OF_getencprop(node, "#address-cells", &cell, cell_size) < cell_size) cell = 2; - *addr_cells = fdt32_to_cpu((int)cell); + *addr_cells = (int)cell; - if (OF_getprop(node, "#size-cells", &cell, cell_size) < cell_size) + if (OF_getencprop(node, "#size-cells", &cell, cell_size) < cell_size) cell = 1; - *size_cells = fdt32_to_cpu((int)cell); + *size_cells = (int)cell; if (*addr_cells > 3 || *size_cells > 2) return (ERANGE); @@ -537,11 +538,11 @@ fdt_get_phyaddr(phandle_t node, device_t dev, int *phy_addr, void **phy_sc) phy_node = OF_node_from_xref(phy_handle); - if (OF_getprop(phy_node, "reg", (void *)&phy_reg, + if (OF_getencprop(phy_node, "reg", (void *)&phy_reg, sizeof(phy_reg)) <= 0) return (ENXIO); - *phy_addr = fdt32_to_cpu(phy_reg); + *phy_addr = phy_reg; /* * Search for softc used to communicate with phy. diff --git a/sys/dev/fdt/fdt_common.h b/sys/dev/fdt/fdt_common.h index 94f84ff..4d5e47b 100644 --- a/sys/dev/fdt/fdt_common.h +++ b/sys/dev/fdt/fdt_common.h @@ -70,12 +70,6 @@ extern vm_paddr_t fdt_immr_pa; extern vm_offset_t fdt_immr_va; extern vm_offset_t fdt_immr_size; -struct fdt_pm_mask_entry { - char *compat; - uint32_t mask; -}; -extern struct fdt_pm_mask_entry fdt_pm_mask_table[]; - #if defined(FDT_DTB_STATIC) extern u_char fdt_static_dtb; #endif diff --git a/sys/dev/flash/mx25l.c b/sys/dev/flash/mx25l.c index b41986f..b2983e9 100644 --- a/sys/dev/flash/mx25l.c +++ b/sys/dev/flash/mx25l.c @@ -112,6 +112,7 @@ struct mx25l_flash_ident flash_devices[] = { { "en25p64", 0x1c, 0x2017, 64 * 1024, 128, FL_NONE }, { "en25q64", 0x1c, 0x3017, 64 * 1024, 128, FL_ERASE_4K }, { "m25p64", 0x20, 0x2017, 64 * 1024, 128, FL_NONE }, + { "n25q128", 0x20, 0xba18, 64 * 1024, 256, FL_ERASE_4K }, { "mx25ll32", 0xc2, 0x2016, 64 * 1024, 64, FL_NONE }, { "mx25ll64", 0xc2, 0x2017, 64 * 1024, 128, FL_NONE }, { "mx25ll128", 0xc2, 0x2018, 64 * 1024, 256, FL_ERASE_4K | FL_ERASE_32K }, @@ -126,6 +127,7 @@ struct mx25l_flash_ident flash_devices[] = { { "w25x32", 0xef, 0x3016, 64 * 1024, 64, FL_ERASE_4K }, { "w25x64", 0xef, 0x3017, 64 * 1024, 128, FL_ERASE_4K }, { "w25q32", 0xef, 0x4016, 64 * 1024, 64, FL_ERASE_4K }, + { "w25q32jv", 0xef, 0x7016, 64 * 1024, 64, FL_ERASE_4K }, { "w25q64", 0xef, 0x4017, 64 * 1024, 128, FL_ERASE_4K }, { "w25q64bv", 0xef, 0x4017, 64 * 1024, 128, FL_ERASE_4K }, { "w25q128", 0xef, 0x4018, 64 * 1024, 256, FL_ERASE_4K }, @@ -396,6 +398,7 @@ mx25l_read(device_t dev, off_t offset, caddr_t data, off_t count) txBuf[4] = 0; } + memset(data, 0, count); cmd.tx_cmd = txBuf; cmd.rx_cmd = rxBuf; cmd.tx_data = data; diff --git a/sys/dev/gpio/gpio_if.m b/sys/dev/gpio/gpio_if.m index 6306f33..d25a0c7 100644 --- a/sys/dev/gpio/gpio_if.m +++ b/sys/dev/gpio/gpio_if.m @@ -56,6 +56,43 @@ CODE { return (0); } + + static int + gpio_default_pwm_getcaps(device_t dev __unused, int32_t pwm __unused, + uint32_t pini __unused, uint32_t *caps) + { + + *caps = 0; + + return (0); + } + + static int + gpio_default_pwm_max(device_t dev __unused, uint32_t *pwmmax) + { + + *pwmmax = 0; + + return (0); + } + + static int + gpio_default_pwm_get(device_t dev __unused, uint32_t pwm __unused, + uint32_t pin __unused, uint32_t reg __unused, + uint32_t *value __unused) + { + + return (EINVAL); + } + + static int + gpio_default_pwm_set(device_t dev __unused, uint32_t pwm __unused, + uint32_t pin __unused, uint32_t reg __unused, + uint32_t value __unused) + { + + return (EINVAL); + } }; HEADER { @@ -140,6 +177,46 @@ METHOD int pin_setflags { }; # +# Get maximum pwm number +# +METHOD int pwm_max { + device_t dev; + int *maxpwm; +} DEFAULT gpio_default_pwm_max; + +# +# Get pwm capabilities +# +METHOD int pwm_getcaps { + device_t dev; + int32_t pwm_num; + uint32_t pin_num; + uint32_t *caps; +} DEFAULT gpio_default_pwm_getcaps; + +# +# Get pwm settings of pin specifed by pin_num +# +METHOD int pwm_get { + device_t dev; + int32_t pwm_num; + uint32_t pin_num; + uint32_t pwm_reg; + uint32_t *pwm_value; +} DEFAULT gpio_default_pwm_get; + +# +# Set pwm settings of pin specifed by pin_num +# +METHOD int pwm_set { + device_t dev; + int32_t pwm_num; + uint32_t pin_num; + uint32_t pwm_reg; + uint32_t pwm_value; +} DEFAULT gpio_default_pwm_set; + +# # Allow the GPIO controller to map the gpio-specifier on its own. # METHOD int map_gpios { diff --git a/sys/dev/gpio/gpioc.c b/sys/dev/gpio/gpioc.c index a5a9d81..2a6895a 100644 --- a/sys/dev/gpio/gpioc.c +++ b/sys/dev/gpio/gpioc.c @@ -121,9 +121,10 @@ gpioc_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int fflag, struct thread *td) { device_t bus; - int max_pin, res; + int max_pin, max_pwm, res; struct gpioc_softc *sc = cdev->si_drv1; struct gpio_pin pin; + struct gpio_pwm_req pwmreq; struct gpio_req req; uint32_t caps; @@ -142,9 +143,16 @@ gpioc_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int fflag, res = GPIO_PIN_GETFLAGS(sc->sc_pdev, pin.gp_pin, &pin.gp_flags); /* Fail early */ - if (res) + if (res != 0) + break; + res = GPIO_PIN_GETCAPS(sc->sc_pdev, pin.gp_pin, + &pin.gp_caps); + if (res != 0) + break; + res = GPIO_PWM_GETCAPS(sc->sc_pdev, -1, pin.gp_pin, + &pin.gp_pwm_caps); + if (res != 0) break; - GPIO_PIN_GETCAPS(sc->sc_pdev, pin.gp_pin, &pin.gp_caps); GPIOBUS_PIN_GETNAME(bus, pin.gp_pin, pin.gp_name); bcopy(&pin, arg, sizeof(pin)); break; @@ -185,6 +193,39 @@ gpioc_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int fflag, res = GPIOBUS_PIN_SETNAME(bus, pin.gp_pin, pin.gp_name); break; + case GPIOMAXPWM: + max_pwm = -1; + res = GPIO_PWM_MAX(sc->sc_pdev, &max_pwm); + bcopy(&max_pwm, arg, sizeof(max_pwm)); + break; + case GPIOPWMGETCONFIG: + bcopy(arg, &pwmreq, sizeof(pwmreq)); + res = GPIO_PWM_GETCAPS(sc->sc_pdev, pwmreq.gp_pwm, + pwmreq.gp_pwm_pin, &pwmreq.gp_pwm_caps); + dprintf("pwm getcaps pwm %d pin %d -> caps %#x\n", + pwmreq.gp_pwm, pwmreq.gp_pwm_pin, + pwmreq.gp_pwm_caps); + bcopy(&pwmreq, arg, sizeof(pwmreq)); + break; + case GPIOPWMGET: + bcopy(arg, &pwmreq, sizeof(pwmreq)); + res = GPIO_PWM_GET(sc->sc_pdev, pwmreq.gp_pwm, + pwmreq.gp_pwm_pin, pwmreq.gp_pwm_reg, + &pwmreq.gp_pwm_value); + dprintf("pwm get pwm %d pin %d -> reg %#x %d\n", + pwmreq.gp_pwm, pwmreq.gp_pwm_pin, pwmreq.gp_pwm_reg, + pwmreq.gp_pwm_value); + bcopy(&pwmreq, arg, sizeof(pwmreq)); + break; + case GPIOPWMSET: + bcopy(arg, &pwmreq, sizeof(pwmreq)); + res = GPIO_PWM_SET(sc->sc_pdev, pwmreq.gp_pwm, + pwmreq.gp_pwm_pin, pwmreq.gp_pwm_reg, + pwmreq.gp_pwm_value); + dprintf("pwm set pwm %d pin %d -> reg %#x %d\n", + pwmreq.gp_pwm, pwmreq.gp_pwm_pin, pwmreq.gp_pwm_reg, + pwmreq.gp_pwm_value); + break; default: return (ENOTTY); break; diff --git a/sys/dev/hyperv/include/hyperv.h b/sys/dev/hyperv/include/hyperv.h index f0161fc..d3536a1 100644 --- a/sys/dev/hyperv/include/hyperv.h +++ b/sys/dev/hyperv/include/hyperv.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2009-2012,2016 Microsoft Corp. + * Copyright (c) 2009-2012,2016-2017 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. diff --git a/sys/dev/hyperv/netvsc/hn_nvs.c b/sys/dev/hyperv/netvsc/hn_nvs.c index a445224..5605b2d 100644 --- a/sys/dev/hyperv/netvsc/hn_nvs.c +++ b/sys/dev/hyperv/netvsc/hn_nvs.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2009-2012,2016 Microsoft Corp. + * Copyright (c) 2009-2012,2016-2017 Microsoft Corp. * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2012 NetApp Inc. * All rights reserved. diff --git a/sys/dev/hyperv/netvsc/hn_nvs.h b/sys/dev/hyperv/netvsc/hn_nvs.h index f716abf..a14d7b7 100644 --- a/sys/dev/hyperv/netvsc/hn_nvs.h +++ b/sys/dev/hyperv/netvsc/hn_nvs.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2009-2012,2016 Microsoft Corp. + * Copyright (c) 2009-2012,2016-2017 Microsoft Corp. * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2012 NetApp Inc. * All rights reserved. diff --git a/sys/dev/hyperv/netvsc/hn_rndis.c b/sys/dev/hyperv/netvsc/hn_rndis.c index 6e6e9cc..35138ea 100644 --- a/sys/dev/hyperv/netvsc/hn_rndis.c +++ b/sys/dev/hyperv/netvsc/hn_rndis.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2009-2012,2016 Microsoft Corp. + * Copyright (c) 2009-2012,2016-2017 Microsoft Corp. * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2012 NetApp Inc. * All rights reserved. diff --git a/sys/dev/hyperv/netvsc/hn_rndis.h b/sys/dev/hyperv/netvsc/hn_rndis.h index 38238b8..c4cd952 100644 --- a/sys/dev/hyperv/netvsc/hn_rndis.h +++ b/sys/dev/hyperv/netvsc/hn_rndis.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2009-2012,2016 Microsoft Corp. + * Copyright (c) 2009-2012,2016-2017 Microsoft Corp. * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2012 NetApp Inc. * All rights reserved. diff --git a/sys/dev/hyperv/netvsc/if_hn.c b/sys/dev/hyperv/netvsc/if_hn.c index 3e47c8b..8ddd922 100644 --- a/sys/dev/hyperv/netvsc/if_hn.c +++ b/sys/dev/hyperv/netvsc/if_hn.c @@ -1,6 +1,6 @@ /*- * Copyright (c) 2010-2012 Citrix Inc. - * Copyright (c) 2009-2012,2016 Microsoft Corp. + * Copyright (c) 2009-2012,2016-2017 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * @@ -464,7 +464,7 @@ SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, #ifdef HN_IFSTART_SUPPORT /* Use ifnet.if_start instead of ifnet.if_transmit */ -static int hn_use_if_start = 0; +static int hn_use_if_start = 1; SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, &hn_use_if_start, 0, "Use if_start TX method"); #endif @@ -1345,7 +1345,7 @@ hn_attach(device_t dev) ifp->if_start = hn_start; IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); - ifp->if_snd.ifq_drv_maxlen = qdepth - 1; + ifp->if_snd.ifq_drv_maxlen = 0; IFQ_SET_READY(&ifp->if_snd); } else #endif diff --git a/sys/dev/hyperv/netvsc/if_hnreg.h b/sys/dev/hyperv/netvsc/if_hnreg.h index a964b4f..166f10e 100644 --- a/sys/dev/hyperv/netvsc/if_hnreg.h +++ b/sys/dev/hyperv/netvsc/if_hnreg.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2016 Microsoft Corp. + * Copyright (c) 2016-2017 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/sys/dev/hyperv/netvsc/if_hnvar.h b/sys/dev/hyperv/netvsc/if_hnvar.h index 42b885e..3e16963 100644 --- a/sys/dev/hyperv/netvsc/if_hnvar.h +++ b/sys/dev/hyperv/netvsc/if_hnvar.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2016 Microsoft Corp. + * Copyright (c) 2016-2017 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/sys/dev/hyperv/pcib/vmbus_pcib.c b/sys/dev/hyperv/pcib/vmbus_pcib.c index b54c67b..0ef9187 100644 --- a/sys/dev/hyperv/pcib/vmbus_pcib.c +++ b/sys/dev/hyperv/pcib/vmbus_pcib.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2016 Microsoft Corp. + * Copyright (c) 2016-2017 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c index 5d6279e..1256f09 100644 --- a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c +++ b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2009-2012,2016 Microsoft Corp. + * Copyright (c) 2009-2012,2016-2017 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. diff --git a/sys/dev/hyperv/storvsc/hv_vstorage.h b/sys/dev/hyperv/storvsc/hv_vstorage.h index 95354bb..992cea2 100644 --- a/sys/dev/hyperv/storvsc/hv_vstorage.h +++ b/sys/dev/hyperv/storvsc/hv_vstorage.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2009-2012 Microsoft Corp. + * Copyright (c) 2009-2012,2017 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. diff --git a/sys/dev/hyperv/utilities/hv_kvp.c b/sys/dev/hyperv/utilities/hv_kvp.c index e70c4c4..80b0cff 100644 --- a/sys/dev/hyperv/utilities/hv_kvp.c +++ b/sys/dev/hyperv/utilities/hv_kvp.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014,2016 Microsoft Corp. + * Copyright (c) 2014,2016-2017 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/sys/dev/hyperv/utilities/vmbus_timesync.c b/sys/dev/hyperv/utilities/vmbus_timesync.c index 66b1e48..2a8d3a9 100644 --- a/sys/dev/hyperv/utilities/vmbus_timesync.c +++ b/sys/dev/hyperv/utilities/vmbus_timesync.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014,2016 Microsoft Corp. + * Copyright (c) 2014,2016-2017 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c b/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c index d1c1b83..1c90d4b 100644 --- a/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c +++ b/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2016 Microsoft Corp. + * Copyright (c) 2016-2017 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/sys/dev/hyperv/vmbus/hyperv.c b/sys/dev/hyperv/vmbus/hyperv.c index 0c03bdb..2d00b45 100644 --- a/sys/dev/hyperv/vmbus/hyperv.c +++ b/sys/dev/hyperv/vmbus/hyperv.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2009-2012,2016 Microsoft Corp. + * Copyright (c) 2009-2012,2016-2017 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. diff --git a/sys/dev/hyperv/vmbus/vmbus.c b/sys/dev/hyperv/vmbus/vmbus.c index c0faada..b027752 100644 --- a/sys/dev/hyperv/vmbus/vmbus.c +++ b/sys/dev/hyperv/vmbus/vmbus.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2009-2012,2016 Microsoft Corp. + * Copyright (c) 2009-2012,2016-2017 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. @@ -129,6 +129,13 @@ static void vmbus_event_proc_dummy(struct vmbus_softc *, static struct vmbus_softc *vmbus_sc; +SYSCTL_NODE(_hw, OID_AUTO, vmbus, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, + "Hyper-V vmbus"); + +static int vmbus_pin_evttask = 1; +SYSCTL_INT(_hw_vmbus, OID_AUTO, pin_evttask, CTLFLAG_RDTUN, + &vmbus_pin_evttask, 0, "Pin event tasks to their respective CPU"); + extern inthand_t IDTVEC(vmbus_isr), IDTVEC(vmbus_isr_pti); static const uint32_t vmbus_version[] = { @@ -906,10 +913,16 @@ vmbus_intr_setup(struct vmbus_softc *sc) VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast( "hyperv event", M_WAITOK, taskqueue_thread_enqueue, VMBUS_PCPU_PTR(sc, event_tq, cpu)); - CPU_SETOF(cpu, &cpu_mask); - taskqueue_start_threads_cpuset( - VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET, &cpu_mask, - "hvevent%d", cpu); + if (vmbus_pin_evttask) { + CPU_SETOF(cpu, &cpu_mask); + taskqueue_start_threads_cpuset( + VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET, + &cpu_mask, "hvevent%d", cpu); + } else { + taskqueue_start_threads( + VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET, + "hvevent%d", cpu); + } /* * Setup tasks and taskqueues to handle messages. diff --git a/sys/dev/hyperv/vmbus/vmbus_et.c b/sys/dev/hyperv/vmbus/vmbus_et.c index 722e5bf..d9ab2a9 100644 --- a/sys/dev/hyperv/vmbus/vmbus_et.c +++ b/sys/dev/hyperv/vmbus/vmbus_et.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2015,2016 Microsoft Corp. + * Copyright (c) 2015,2016-2017 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/sys/dev/ichsmb/ichsmb_pci.c b/sys/dev/ichsmb/ichsmb_pci.c index 38d8d44..383db94 100644 --- a/sys/dev/ichsmb/ichsmb_pci.c +++ b/sys/dev/ichsmb/ichsmb_pci.c @@ -67,35 +67,89 @@ __FBSDID("$FreeBSD$"); #include <dev/ichsmb/ichsmb_reg.h> /* PCI unique identifiers */ -#define ID_82801AA 0x24138086 -#define ID_82801AB 0x24238086 -#define ID_82801BA 0x24438086 -#define ID_82801CA 0x24838086 -#define ID_82801DC 0x24C38086 -#define ID_82801EB 0x24D38086 -#define ID_82801FB 0x266A8086 -#define ID_82801GB 0x27da8086 -#define ID_82801H 0x283e8086 -#define ID_82801I 0x29308086 -#define ID_82801JI 0x3a308086 -#define ID_PCH 0x3b308086 -#define ID_6300ESB 0x25a48086 -#define ID_631xESB 0x269b8086 -#define ID_DH89XXCC 0x23308086 -#define ID_PATSBURG 0x1d228086 -#define ID_CPT 0x1c228086 -#define ID_PPT 0x1e228086 -#define ID_AVOTON 0x1f3c8086 -#define ID_COLETOCRK 0x23B08086 -#define ID_LPT 0x8c228086 -#define ID_LPTLP 0x9c228086 -#define ID_WCPT 0x8ca28086 -#define ID_WCPTLP 0x9ca28086 -#define ID_WELLSBURG 0x8d228086 -#define ID_SRPT 0xa1238086 -#define ID_SRPTLP 0x9d238086 +#define PCI_VENDOR_INTEL 0x8086 +#define ID_82801AA 0x2413 +#define ID_82801AB 0x2423 +#define ID_82801BA 0x2443 +#define ID_82801CA 0x2483 +#define ID_82801DC 0x24C3 +#define ID_82801EB 0x24D3 +#define ID_82801FB 0x266A +#define ID_82801GB 0x27da +#define ID_82801H 0x283e +#define ID_82801I 0x2930 +#define ID_EP80579 0x5032 +#define ID_82801JI 0x3a30 +#define ID_82801JD 0x3a60 +#define ID_PCH 0x3b30 +#define ID_6300ESB 0x25a4 +#define ID_631xESB 0x269b +#define ID_DH89XXCC 0x2330 +#define ID_PATSBURG 0x1d22 +#define ID_CPT 0x1c22 +#define ID_PPT 0x1e22 +#define ID_AVOTON 0x1f3c +#define ID_COLETOCRK 0x23B0 +#define ID_LPT 0x8c22 +#define ID_LPTLP 0x9c22 +#define ID_WCPT 0x8ca2 +#define ID_WCPTLP 0x9ca2 +#define ID_BAYTRAIL 0x0f12 +#define ID_BRASWELL 0x2292 +#define ID_WELLSBURG 0x8d22 +#define ID_SRPT 0xa123 +#define ID_SRPTLP 0x9d23 +#define ID_DENVERTON 0x19df +#define ID_BROXTON 0x5ad4 +#define ID_LEWISBURG 0xa1a3 +#define ID_LEWISBURG2 0xa223 +#define ID_KABYLAKE 0xa2a3 -#define PCIS_SERIALBUS_SMBUS_PROGIF 0x00 +static const struct ichsmb_device { + uint16_t id; + const char *name; +} ichsmb_devices[] = { + { ID_82801AA, "Intel 82801AA (ICH) SMBus controller" }, + { ID_82801AB, "Intel 82801AB (ICH0) SMBus controller" }, + { ID_82801BA, "Intel 82801BA (ICH2) SMBus controller" }, + { ID_82801CA, "Intel 82801CA (ICH3) SMBus controller" }, + { ID_82801DC, "Intel 82801DC (ICH4) SMBus controller" }, + { ID_82801EB, "Intel 82801EB (ICH5) SMBus controller" }, + { ID_82801FB, "Intel 82801FB (ICH6) SMBus controller" }, + { ID_82801GB, "Intel 82801GB (ICH7) SMBus controller" }, + { ID_82801H, "Intel 82801H (ICH8) SMBus controller" }, + { ID_82801I, "Intel 82801I (ICH9) SMBus controller" }, + { ID_82801GB, "Intel 82801GB (ICH7) SMBus controller" }, + { ID_82801H, "Intel 82801H (ICH8) SMBus controller" }, + { ID_82801I, "Intel 82801I (ICH9) SMBus controller" }, + { ID_EP80579, "Intel EP80579 SMBus controller" }, + { ID_82801JI, "Intel 82801JI (ICH10) SMBus controller" }, + { ID_82801JD, "Intel 82801JD (ICH10) SMBus controller" }, + { ID_PCH, "Intel PCH SMBus controller" }, + { ID_6300ESB, "Intel 6300ESB (ICH) SMBus controller" }, + { ID_631xESB, "Intel 631xESB/6321ESB (ESB2) SMBus controller" }, + { ID_DH89XXCC, "Intel DH89xxCC SMBus controller" }, + { ID_PATSBURG, "Intel Patsburg SMBus controller" }, + { ID_CPT, "Intel Cougar Point SMBus controller" }, + { ID_PPT, "Intel Panther Point SMBus controller" }, + { ID_AVOTON, "Intel Avoton SMBus controller" }, + { ID_LPT, "Intel Lynx Point SMBus controller" }, + { ID_LPTLP, "Intel Lynx Point-LP SMBus controller" }, + { ID_WCPT, "Intel Wildcat Point SMBus controller" }, + { ID_WCPTLP, "Intel Wildcat Point-LP SMBus controller" }, + { ID_BAYTRAIL, "Intel Baytrail SMBus controller" }, + { ID_BRASWELL, "Intel Braswell SMBus controller" }, + { ID_COLETOCRK, "Intel Coleto Creek SMBus controller" }, + { ID_WELLSBURG, "Intel Wellsburg SMBus controller" }, + { ID_SRPT, "Intel Sunrise Point-H SMBus controller" }, + { ID_SRPTLP, "Intel Sunrise Point-LP SMBus controller" }, + { ID_DENVERTON, "Intel Denverton SMBus controller" }, + { ID_BROXTON, "Intel Broxton SMBus controller" }, + { ID_LEWISBURG, "Intel Lewisburg SMBus controller" }, + { ID_LEWISBURG2,"Intel Lewisburg SMBus controller" }, + { ID_KABYLAKE, "Intel Kaby Lake SMBus controller" }, + { 0, NULL }, +}; /* Internal functions */ static int ichsmb_pci_probe(device_t dev); @@ -141,95 +195,19 @@ DRIVER_MODULE(ichsmb, pci, ichsmb_pci_driver, ichsmb_pci_devclass, 0, 0); static int ichsmb_pci_probe(device_t dev) { - /* Check PCI identifier */ - switch (pci_get_devid(dev)) { - case ID_82801AA: - device_set_desc(dev, "Intel 82801AA (ICH) SMBus controller"); - break; - case ID_82801AB: - device_set_desc(dev, "Intel 82801AB (ICH0) SMBus controller"); - break; - case ID_82801BA: - device_set_desc(dev, "Intel 82801BA (ICH2) SMBus controller"); - break; - case ID_82801CA: - device_set_desc(dev, "Intel 82801CA (ICH3) SMBus controller"); - break; - case ID_82801DC: - device_set_desc(dev, "Intel 82801DC (ICH4) SMBus controller"); - break; - case ID_82801EB: - device_set_desc(dev, "Intel 82801EB (ICH5) SMBus controller"); - break; - case ID_82801FB: - device_set_desc(dev, "Intel 82801FB (ICH6) SMBus controller"); - break; - case ID_82801GB: - device_set_desc(dev, "Intel 82801GB (ICH7) SMBus controller"); - break; - case ID_82801H: - device_set_desc(dev, "Intel 82801H (ICH8) SMBus controller"); - break; - case ID_82801I: - device_set_desc(dev, "Intel 82801I (ICH9) SMBus controller"); - break; - case ID_82801JI: - device_set_desc(dev, "Intel 82801JI (ICH10) SMBus controller"); - break; - case ID_PCH: - device_set_desc(dev, "Intel PCH SMBus controller"); - break; - case ID_6300ESB: - device_set_desc(dev, "Intel 6300ESB (ICH) SMBus controller"); - break; - case ID_631xESB: - device_set_desc(dev, "Intel 631xESB/6321ESB (ESB2) SMBus controller"); - break; - case ID_DH89XXCC: - device_set_desc(dev, "Intel DH89xxCC SMBus controller"); - break; - case ID_PATSBURG: - device_set_desc(dev, "Intel Patsburg SMBus controller"); - break; - case ID_CPT: - device_set_desc(dev, "Intel Cougar Point SMBus controller"); - break; - case ID_PPT: - device_set_desc(dev, "Intel Panther Point SMBus controller"); - break; - case ID_AVOTON: - device_set_desc(dev, "Intel Avoton SMBus controller"); - break; - case ID_LPT: - device_set_desc(dev, "Intel Lynx Point SMBus controller"); - break; - case ID_LPTLP: - device_set_desc(dev, "Intel Lynx Point-LP SMBus controller"); - break; - case ID_WCPT: - device_set_desc(dev, "Intel Wildcat Point SMBus controller"); - break; - case ID_WCPTLP: - device_set_desc(dev, "Intel Wildcat Point-LP SMBus controller"); - break; - case ID_COLETOCRK: - device_set_desc(dev, "Intel Coleto Creek SMBus controller"); - break; - case ID_WELLSBURG: - device_set_desc(dev, "Intel Wellsburg SMBus controller"); - break; - case ID_SRPT: - device_set_desc(dev, "Intel Sunrise Point-H SMBus controller"); - break; - case ID_SRPTLP: - device_set_desc(dev, "Intel Sunrise Point-LP SMBus controller"); - break; - default: + const struct ichsmb_device *device; + + if (pci_get_vendor(dev) != PCI_VENDOR_INTEL) return (ENXIO); + + for (device = ichsmb_devices; device->name != NULL; device++) { + if (pci_get_device(dev) == device->id) { + device_set_desc(dev, device->name); + return (ichsmb_probe(dev)); + } } - /* Done */ - return (ichsmb_probe(dev)); + return (ENXIO); } static int diff --git a/sys/dev/iicbus/iiconf.c b/sys/dev/iicbus/iiconf.c index 155cba7..1edc773 100644 --- a/sys/dev/iicbus/iiconf.c +++ b/sys/dev/iicbus/iiconf.c @@ -468,3 +468,55 @@ iicbus_transfer_gen(device_t dev, struct iic_msg *msgs, uint32_t nmsgs) iicbus_stop(bus); return (error); } + +int +iicdev_readfrom(device_t slavedev, uint8_t regaddr, void *buffer, + uint16_t buflen, int waithow) +{ + struct iic_msg msgs[2]; + uint8_t slaveaddr; + + /* + * Two transfers back to back with a repeat-start between them; first we + * write the address-within-device, then we read from the device. + */ + slaveaddr = iicbus_get_addr(slavedev); + + msgs[0].slave = slaveaddr; + msgs[0].flags = IIC_M_WR | IIC_M_NOSTOP; + msgs[0].len = 1; + msgs[0].buf = ®addr; + + msgs[1].slave = slaveaddr; + msgs[1].flags = IIC_M_RD; + msgs[1].len = buflen; + msgs[1].buf = buffer; + + return (iicbus_transfer_excl(slavedev, msgs, nitems(msgs), waithow)); +} + +int iicdev_writeto(device_t slavedev, uint8_t regaddr, void *buffer, + uint16_t buflen, int waithow) +{ + struct iic_msg msgs[2]; + uint8_t slaveaddr; + + /* + * Two transfers back to back with no stop or start between them; first + * we write the address then we write the data to that address, all in a + * single transfer from two scattered buffers. + */ + slaveaddr = iicbus_get_addr(slavedev); + + msgs[0].slave = slaveaddr; + msgs[0].flags = IIC_M_WR | IIC_M_NOSTOP; + msgs[0].len = 1; + msgs[0].buf = ®addr; + + msgs[1].slave = slaveaddr; + msgs[1].flags = IIC_M_WR | IIC_M_NOSTART; + msgs[1].len = buflen; + msgs[1].buf = buffer; + + return (iicbus_transfer_excl(slavedev, msgs, nitems(msgs), waithow)); +} diff --git a/sys/dev/iicbus/iiconf.h b/sys/dev/iicbus/iiconf.h index 03b9583..856b029 100644 --- a/sys/dev/iicbus/iiconf.h +++ b/sys/dev/iicbus/iiconf.h @@ -133,6 +133,16 @@ int iicbus_transfer_excl(device_t bus, struct iic_msg *msgs, uint32_t nmsgs, int how); int iicbus_transfer_gen(device_t bus, struct iic_msg *msgs, uint32_t nmsgs); +/* + * Simple register read/write routines, but the "register" can be any size. + * The transfers are done with iicbus_transfer_excl(). Reads use a repeat-start + * between sending the address and reading; writes use a single start/stop. + */ +int iicdev_readfrom(device_t _slavedev, uint8_t _regaddr, void *_buffer, + uint16_t _buflen, int _waithow); +int iicdev_writeto(device_t _slavedev, uint8_t _regaddr, void *_buffer, + uint16_t _buflen, int _waithow); + #define IICBUS_MODVER 1 #define IICBUS_MINVER 1 #define IICBUS_MAXVER 1 diff --git a/sys/dev/iicbus/is31fl319x.c b/sys/dev/iicbus/is31fl319x.c new file mode 100644 index 0000000..1ad7f52 --- /dev/null +++ b/sys/dev/iicbus/is31fl319x.c @@ -0,0 +1,676 @@ +/*- + * Copyright (c) 2017 Rubicon Communications, LLC (Netgate) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +/* + * Driver for the ISSI IS31FL319x - 3/6/9 channel light effect LED driver. + */ + +#include "opt_platform.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/gpio.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/sysctl.h> + +#include <dev/iicbus/iicbus.h> +#include <dev/iicbus/iiconf.h> + +#include <dev/gpio/gpiobusvar.h> +#ifdef FDT +#include <dev/ofw/ofw_bus.h> +#endif + +#include <dev/iicbus/is31fl319xreg.h> + +#include "gpio_if.h" +#include "iicbus_if.h" + +#define IS31FL3193 1 +#define IS31FL3196 2 +#define IS31FL3199 3 + +static struct ofw_compat_data compat_data[] = { + { "issi,is31fl3193", IS31FL3193 }, + { "issi,is31fl3196", IS31FL3196 }, + { "issi,is31fl3199", IS31FL3199 }, + { NULL, 0 } +}; + +struct is31fl319x_reg { + struct is31fl319x_softc *sc; + uint8_t data; + uint8_t id; + uint8_t reg; +}; + +struct is31fl319x_softc { + device_t sc_dev; + device_t sc_gpio_busdev; + int sc_max_pins; + uint8_t sc_pwm[IS31FL319X_MAX_PINS]; + uint8_t sc_conf1; + struct is31fl319x_reg sc_t0[IS31FL319X_MAX_PINS]; + struct is31fl319x_reg sc_t123[IS31FL319X_MAX_PINS / 3]; + struct is31fl319x_reg sc_t4[IS31FL319X_MAX_PINS]; +}; + +static __inline int +is31fl319x_write(device_t dev, uint8_t reg, uint8_t *data, size_t len) +{ + + return (iicdev_writeto(dev, reg, data, len, IIC_INTRWAIT)); +} + +static __inline int +is31fl319x_reg_update(struct is31fl319x_softc *sc, uint8_t reg) +{ + uint8_t data = 0; + + return (iicdev_writeto(sc->sc_dev, reg, &data, 1, IIC_INTRWAIT)); +} + +static int +is31fl319x_pwm_sysctl(SYSCTL_HANDLER_ARGS) +{ + int error, led; + int32_t enable; + struct is31fl319x_softc *sc; + + sc = (struct is31fl319x_softc *)arg1; + led = arg2; + + enable = ((sc->sc_conf1 & IS31FL319X_CONF1_PWM(led)) != 0) ? 0 : 1; + error = sysctl_handle_int(oidp, &enable, sizeof(enable), req); + if (error != 0 || req->newptr == NULL) + return (error); + + sc->sc_conf1 &= ~IS31FL319X_CONF1_PWM(led); + if (enable == 0) + sc->sc_conf1 |= IS31FL319X_CONF1_PWM(led); + if (is31fl319x_write(sc->sc_dev, IS31FL319X_CONF1, &sc->sc_conf1, + sizeof(sc->sc_conf1)) != 0) + return (ENXIO); + if (is31fl319x_reg_update(sc, IS31FL319X_DATA_UPDATE) != 0) + return (ENXIO); + if (is31fl319x_reg_update(sc, IS31FL319X_TIME_UPDATE) != 0) + return (ENXIO); + + return (0); +} + +static int +is31fl319x_pin_timer_sysctl(SYSCTL_HANDLER_ARGS) +{ + int error; + int32_t a, b, ms; + struct is31fl319x_reg *timer; + struct is31fl319x_softc *sc; + + timer = (struct is31fl319x_reg *)arg1; + sc = timer->sc; + + a = timer->data & IS31FL319X_T0_A_MASK; + b = (timer->data & IS31FL319X_T0_B_MASK) >> 4; + ms = 260 * a * (2 << b); + error = sysctl_handle_int(oidp, &ms, sizeof(ms), req); + if (error != 0 || req->newptr == NULL) + return (error); + + if (ms > IS31FL319X_T0_MAX_TIME) + ms = IS31FL319X_T0_MAX_TIME; + + a = b = 0; + if (ms >= 260) { + ms /= 260; + while (ms / (2 << b) > 15) { + if (ms / (2 << b) > 15) + b++; + else + break; + } + a = ms / (2 << b); + } + timer->data = (b << 4) | a; + + if (is31fl319x_write(sc->sc_dev, timer->reg, &timer->data, + sizeof(timer->data)) != 0) + return (ENXIO); + if (is31fl319x_reg_update(sc, IS31FL319X_TIME_UPDATE) != 0) + return (ENXIO); + + return (0); +} + +static int +is31fl319x_dt_sysctl(SYSCTL_HANDLER_ARGS) +{ + int error; + int32_t enable; + struct is31fl319x_reg *led; + struct is31fl319x_softc *sc; + + led = (struct is31fl319x_reg *)arg1; + sc = led->sc; + + enable = ((led->data & IS31FL319X_DT) != 0) ? 1 : 0; + error = sysctl_handle_int(oidp, &enable, sizeof(enable), req); + if (error != 0 || req->newptr == NULL) + return (error); + + if (enable) + led->data |= IS31FL319X_DT; + else + led->data &= ~IS31FL319X_DT; + if (is31fl319x_write(sc->sc_dev, IS31FL319X_T123(led->id), &led->data, + sizeof(led->data)) != 0) + return (ENXIO); + if (is31fl319x_reg_update(sc, IS31FL319X_TIME_UPDATE) != 0) + return (ENXIO); + + return (0); +} + +static int +is31fl319x_t1t3_sysctl(SYSCTL_HANDLER_ARGS) +{ + int error; + int32_t a, ms; + struct is31fl319x_reg *led; + struct is31fl319x_softc *sc; + + led = (struct is31fl319x_reg *)arg1; + sc = led->sc; + + a = (led->data & IS31FL319X_T1_A_MASK); + if (a >= 5 && a <= 6) + ms = 0; + else if (a == 7) + ms = 100; + else + ms = 260 * (2 << a); + error = sysctl_handle_int(oidp, &ms, sizeof(ms), req); + if (error != 0 || req->newptr == NULL) + return (error); + + if (ms > IS31FL319X_T1_MAX_TIME) + ms = IS31FL319X_T1_MAX_TIME; + + a = 0; + if (ms == 0) + a = 5; /* Breathing function disabled. */ + if (ms == 100) + a = 7; /* 100 ms */ + else if (ms >= 260) { + ms /= 260; + while (ms / (2 << a) > 1) { + if (ms / (2 << a) > 1) + a++; + else + break; + } + } + led->data &= ~IS31FL319X_T1_A_MASK; + led->data |= (a & IS31FL319X_T1_A_MASK); + + if (is31fl319x_write(sc->sc_dev, IS31FL319X_T123(led->id), &led->data, + sizeof(led->data)) != 0) + return (ENXIO); + if (is31fl319x_reg_update(sc, IS31FL319X_TIME_UPDATE) != 0) + return (ENXIO); + + return (0); +} + +static int +is31fl319x_t2_sysctl(SYSCTL_HANDLER_ARGS) +{ + int error; + int32_t b, ms; + struct is31fl319x_reg *led; + struct is31fl319x_softc *sc; + + led = (struct is31fl319x_reg *)arg1; + sc = led->sc; + + b = (led->data & IS31FL319X_T2_B_MASK) >> 4; + if (b > 0) + ms = 260 * (2 << (b - 1)); + else + ms = 0; + error = sysctl_handle_int(oidp, &ms, sizeof(ms), req); + if (error != 0 || req->newptr == NULL) + return (error); + + if (ms > IS31FL319X_T2_MAX_TIME) + ms = IS31FL319X_T2_MAX_TIME; + + b = 0; + if (ms >= 260) { + ms /= 260; + b = 1; + while (ms / (2 << (b - 1)) > 1) { + if (ms / (2 << (b - 1)) > 1) + b++; + else + break; + } + } + led->data &= ~IS31FL319X_T2_B_MASK; + led->data |= ((b << 4) & IS31FL319X_T2_B_MASK); + + if (is31fl319x_write(sc->sc_dev, IS31FL319X_T123(led->id), &led->data, + sizeof(led->data)) != 0) + return (ENXIO); + if (is31fl319x_reg_update(sc, IS31FL319X_TIME_UPDATE) != 0) + return (ENXIO); + + return (0); +} + +static void +is31fl319x_sysctl_attach(device_t dev) +{ + char strbuf[4]; + struct is31fl319x_softc *sc; + struct sysctl_ctx_list *ctx; + struct sysctl_oid *tree_node, *led_node, *ledN_node, *pin_node; + struct sysctl_oid *pinN_node; + struct sysctl_oid_list *tree, *led_tree, *ledN_tree, *pin_tree; + struct sysctl_oid_list *pinN_tree; + int led, pin; + + ctx = device_get_sysctl_ctx(dev); + tree_node = device_get_sysctl_tree(dev); + tree = SYSCTL_CHILDREN(tree_node); + pin_node = SYSCTL_ADD_NODE(ctx, tree, OID_AUTO, "pin", CTLFLAG_RD, + NULL, "Output Pins"); + pin_tree = SYSCTL_CHILDREN(pin_node); + + sc = device_get_softc(dev); + for (pin = 0; pin < sc->sc_max_pins; pin++) { + + snprintf(strbuf, sizeof(strbuf), "%d", pin); + pinN_node = SYSCTL_ADD_NODE(ctx, pin_tree, OID_AUTO, strbuf, + CTLFLAG_RD, NULL, "Output Pin"); + pinN_tree = SYSCTL_CHILDREN(pinN_node); + + sc->sc_t0[pin].sc = sc; + sc->sc_t0[pin].data = 0; + sc->sc_t0[pin].id = pin; + sc->sc_t0[pin].reg = IS31FL319X_T0(pin); + SYSCTL_ADD_PROC(ctx, pinN_tree, OID_AUTO, "T0", + CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_MPSAFE, + &sc->sc_t0[pin], 0, is31fl319x_pin_timer_sysctl, "IU", + "T0 timer in ms"); + sc->sc_t4[pin].sc = sc; + sc->sc_t4[pin].data = 0; + sc->sc_t4[pin].id = pin; + sc->sc_t4[pin].reg = IS31FL319X_T4(pin); + SYSCTL_ADD_PROC(ctx, pinN_tree, OID_AUTO, "T4", + CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_MPSAFE, + &sc->sc_t4[pin], 0, is31fl319x_pin_timer_sysctl, "IU", + "T4 timer in ms"); + } + led_node = SYSCTL_ADD_NODE(ctx, tree, OID_AUTO, "led", CTLFLAG_RD, + NULL, "RGB LEDs"); + led_tree = SYSCTL_CHILDREN(led_node); + for (led = 0; led < (sc->sc_max_pins / 3); led++) { + snprintf(strbuf, sizeof(strbuf), "%d", led); + ledN_node = SYSCTL_ADD_NODE(ctx, led_tree, OID_AUTO, strbuf, + CTLFLAG_RD, NULL, "RGB LED"); + ledN_tree = SYSCTL_CHILDREN(ledN_node); + + SYSCTL_ADD_PROC(ctx, ledN_tree, OID_AUTO, "pwm", + CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_MPSAFE, sc, led, + is31fl319x_pwm_sysctl, "IU", "Enable the PWM control"); + sc->sc_t123[led].sc = sc; + sc->sc_t123[led].data = 0; + sc->sc_t123[led].id = led; + sc->sc_t123[led].reg = IS31FL319X_T123(led); + SYSCTL_ADD_PROC(ctx, ledN_tree, OID_AUTO, "T1-T3", + CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_MPSAFE, + &sc->sc_t123[led], 0, is31fl319x_t1t3_sysctl, "IU", + "T1 and T3 timer"); + SYSCTL_ADD_PROC(ctx, ledN_tree, OID_AUTO, "DT", + CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_MPSAFE, + &sc->sc_t123[led], 0, is31fl319x_dt_sysctl, "IU", + "T3 Double Time (T3 = 2T1)"); + SYSCTL_ADD_PROC(ctx, ledN_tree, OID_AUTO, "T2", + CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_MPSAFE, + &sc->sc_t123[led], 0, is31fl319x_t2_sysctl, "IU", + "T2 timer"); + } +} + +static int +is31fl319x_probe(device_t dev) +{ + const char *desc; + struct is31fl319x_softc *sc; +#ifdef FDT + phandle_t node; + + if (!ofw_bus_status_okay(dev)) + return (ENXIO); + sc = device_get_softc(dev); + switch (ofw_bus_search_compatible(dev, compat_data)->ocd_data) { + case IS31FL3193: + desc = "ISSI IS31FL3193 3 channel light effect LED driver"; + sc->sc_max_pins = 3; + break; + case IS31FL3196: + desc = "ISSI IS31FL3196 6 channel light effect LED driver"; + sc->sc_max_pins = 6; + break; + case IS31FL3199: + desc = "ISSI IS31FL3199 9 channel light effect LED driver"; + sc->sc_max_pins = 9; + break; + default: + return (ENXIO); + } + node = ofw_bus_get_node(dev); + if (!OF_hasprop(node, "gpio-controller")) + /* Node is not a GPIO controller. */ + return (ENXIO); +#else + sc = device_get_softc(dev); + sc->sc_max_pins = IS31FL319X_MAX_PINS; + desc = "ISSI IS31FL319x light effect LED driver"; +#endif + device_set_desc(dev, desc); + + return (BUS_PROBE_DEFAULT); +} + +static int +is31fl319x_attach(device_t dev) +{ + struct is31fl319x_softc *sc; + uint8_t data[3]; + + sc = device_get_softc(dev); + sc->sc_dev = dev; + + /* Reset the LED driver. */ + data[0] = 0; + if (is31fl319x_write(dev, IS31FL319X_RESET, data, 1) != 0) + return (ENXIO); + + /* Disable the shutdown mode. */ + data[0] = 1; + if (is31fl319x_write(dev, IS31FL319X_SHUTDOWN, data, 1) != 0) + return (ENXIO); + + /* Attach gpiobus. */ + sc->sc_gpio_busdev = gpiobus_attach_bus(dev); + if (sc->sc_gpio_busdev == NULL) + return (ENXIO); + + is31fl319x_sysctl_attach(dev); + + /* Update the booting status, kernel is loading. */ + data[0] = 0; + data[1] = 0; + data[2] = 35; + if (is31fl319x_write(dev, IS31FL319X_PWM(6), data, sizeof(data)) != 0) + return (ENXIO); + data[2] = 100; + if (is31fl319x_write(dev, IS31FL319X_PWM(3), data, sizeof(data)) != 0) + return (ENXIO); + + /* Enable breath on LED 2 and 3. */ + sc->sc_conf1 |= (6 << 4); + if (is31fl319x_write(sc->sc_dev, IS31FL319X_CONF1, &sc->sc_conf1, + sizeof(sc->sc_conf1)) != 0) + return (ENXIO); + + /* Update register data. */ + if (is31fl319x_reg_update(sc, IS31FL319X_DATA_UPDATE) != 0) + return (ENXIO); + if (is31fl319x_reg_update(sc, IS31FL319X_TIME_UPDATE) != 0) + return (ENXIO); + + return (0); +} + +static device_t +is31fl319x_gpio_get_bus(device_t dev) +{ + struct is31fl319x_softc *sc; + + sc = device_get_softc(dev); + + return (sc->sc_gpio_busdev); +} + +static int +is31fl319x_gpio_pin_max(device_t dev, int *maxpin) +{ + struct is31fl319x_softc *sc; + + sc = device_get_softc(dev); + *maxpin = sc->sc_max_pins - 1; + + return (0); +} + +static int +is31fl319x_gpio_pin_getname(device_t dev, uint32_t pin, char *name) +{ + const char *buf[] = { "R", "G", "B" }; + struct is31fl319x_softc *sc; + + sc = device_get_softc(dev); + if (pin >= sc->sc_max_pins) + return (EINVAL); + + memset(name, 0, GPIOMAXNAME); + snprintf(name, GPIOMAXNAME, "%s %d", buf[pin % 3], pin / 3); + + return (0); +} + +static int +is31fl319x_gpio_pin_getcaps(device_t dev, uint32_t pin, uint32_t *caps) +{ + struct is31fl319x_softc *sc; + + sc = device_get_softc(dev); + if (pin >= sc->sc_max_pins) + return (EINVAL); + + *caps = GPIO_PIN_PWM; + + return (0); +} + +static int +is31fl319x_gpio_pin_getflags(device_t dev, uint32_t pin, uint32_t *flags) +{ + struct is31fl319x_softc *sc; + + sc = device_get_softc(dev); + if (pin >= sc->sc_max_pins) + return (EINVAL); + + *flags = GPIO_PIN_PWM; + + return (0); +} + +static int +is31fl319x_gpio_pin_setflags(device_t dev, uint32_t pin, uint32_t flags) +{ + struct is31fl319x_softc *sc; + + sc = device_get_softc(dev); + if (pin >= sc->sc_max_pins) + return (EINVAL); + + if ((flags & GPIO_PIN_PWM) == 0) + return (EINVAL); + + return (0); +} + +static int +is31fl319x_gpio_pin_set(device_t dev, uint32_t pin, uint32_t value) +{ + struct is31fl319x_softc *sc; + + sc = device_get_softc(dev); + if (pin >= sc->sc_max_pins) + return (EINVAL); + + if (value != 0) + sc->sc_pwm[pin] = IS31FL319X_PWM_MAX; + else + sc->sc_pwm[pin] = 0; + if (is31fl319x_write(dev, IS31FL319X_PWM(pin), + &sc->sc_pwm[pin], 1) != 0) + return (ENXIO); + + return (is31fl319x_reg_update(sc, IS31FL319X_DATA_UPDATE)); +} + +static int +is31fl319x_gpio_pin_get(device_t dev, uint32_t pin, uint32_t *val) +{ + struct is31fl319x_softc *sc; + + sc = device_get_softc(dev); + if (pin >= sc->sc_max_pins) + return (EINVAL); + + *val = (sc->sc_pwm[pin] != 0) ? 1 : 0; + + return (0); +} + +static int +is31fl319x_gpio_pin_toggle(device_t dev, uint32_t pin) +{ + struct is31fl319x_softc *sc; + uint32_t val; + + sc = device_get_softc(dev); + if (pin >= sc->sc_max_pins) + return (EINVAL); + + val = (sc->sc_pwm[pin] != 0) ? 1 : 0; + + return (is31fl319x_gpio_pin_set(dev, pin, val ^ 1)); +} + +static int +is31fl319x_gpio_pwm_get(device_t dev, int32_t pwm, uint32_t pin, uint32_t reg, + uint32_t *val) +{ + struct is31fl319x_softc *sc; + + sc = device_get_softc(dev); + if (pin >= sc->sc_max_pins) + return (EINVAL); + + if (pwm != -1 || reg != GPIO_PWM_DUTY) + return (EINVAL); + + *val = (uint32_t)sc->sc_pwm[pin]; + + return (0); +} + +static int +is31fl319x_gpio_pwm_set(device_t dev, int32_t pwm, uint32_t pin, uint32_t reg, + uint32_t val) +{ + struct is31fl319x_softc *sc; + + sc = device_get_softc(dev); + if (pin >= sc->sc_max_pins) + return (EINVAL); + + if (pwm != -1 || reg != GPIO_PWM_DUTY) + return (EINVAL); + + sc->sc_pwm[pin] = (uint8_t)val; + if (is31fl319x_write(dev, IS31FL319X_PWM(pin), + &sc->sc_pwm[pin], 1) != 0) + return (ENXIO); + + return (is31fl319x_reg_update(sc, IS31FL319X_DATA_UPDATE)); +} + +static phandle_t +is31fl319x_gpio_get_node(device_t bus, device_t dev) +{ + + /* Used by ofw_gpiobus. */ + return (ofw_bus_get_node(bus)); +} + +static device_method_t is31fl319x_methods[] = { + DEVMETHOD(device_probe, is31fl319x_probe), + DEVMETHOD(device_attach, is31fl319x_attach), + + /* GPIO protocol */ + DEVMETHOD(gpio_get_bus, is31fl319x_gpio_get_bus), + DEVMETHOD(gpio_pin_max, is31fl319x_gpio_pin_max), + DEVMETHOD(gpio_pin_getname, is31fl319x_gpio_pin_getname), + DEVMETHOD(gpio_pin_getcaps, is31fl319x_gpio_pin_getcaps), + DEVMETHOD(gpio_pin_getflags, is31fl319x_gpio_pin_getflags), + DEVMETHOD(gpio_pin_setflags, is31fl319x_gpio_pin_setflags), + DEVMETHOD(gpio_pin_get, is31fl319x_gpio_pin_get), + DEVMETHOD(gpio_pin_set, is31fl319x_gpio_pin_set), + DEVMETHOD(gpio_pin_toggle, is31fl319x_gpio_pin_toggle), + DEVMETHOD(gpio_pwm_get, is31fl319x_gpio_pwm_get), + DEVMETHOD(gpio_pwm_set, is31fl319x_gpio_pwm_set), + + /* ofw_bus interface */ + DEVMETHOD(ofw_bus_get_node, is31fl319x_gpio_get_node), + + DEVMETHOD_END +}; + +static driver_t is31fl319x_driver = { + "gpio", + is31fl319x_methods, + sizeof(struct is31fl319x_softc), +}; + +static devclass_t is31fl319x_devclass; + +DRIVER_MODULE(is31fl319x, iicbus, is31fl319x_driver, is31fl319x_devclass, + NULL, NULL); +MODULE_VERSION(is31fl319x, 1); +MODULE_DEPEND(is31fl319x, iicbus, 1, 1, 1); diff --git a/sys/dev/iicbus/is31fl319xreg.h b/sys/dev/iicbus/is31fl319xreg.h new file mode 100644 index 0000000..d780eb9 --- /dev/null +++ b/sys/dev/iicbus/is31fl319xreg.h @@ -0,0 +1,63 @@ +/*- + * Copyright (c) 2017 Rubicon Communications, LLC (Netgate) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * ISSI IS31FL319X [3|6|9]-Channel Light Effect LED Driver. + */ + +#ifndef _IS31FL319XREG_H_ +#define _IS31FL319XREG_H_ + +#define IS31FL319X_SHUTDOWN 0x00 +#define IS31FL319X_LEDCTRL1 0x01 +#define IS31FL319X_LEDCTRL2 0x02 +#define IS31FL319X_CONF1 0x03 +#define IS31FL319X_CONF1_PWM(led) (1 << (4 + led)) +#define IS31FL319X_CONF2 0x04 +#define IS31FL319X_RAMPMODE 0x05 +#define IS31FL319X_BREATHMARK 0x06 +#define IS31FL319X_PWM(out) (0x07 + (out)) +#define IS31FL319X_PWM_MAX 0xff +#define IS31FL319X_DATA_UPDATE 0x10 +#define IS31FL319X_T0(out) (0x11 + (out)) +#define IS31FL319X_T0_A_MASK 0x0f +#define IS31FL319X_T0_B_MASK 0x30 +#define IS31FL319X_T0_MAX_TIME 31200 +#define IS31FL319X_T123(led) (0x1a + (led)) +#define IS31FL319X_T1_A_MASK 0x07 +#define IS31FL319X_T1_MAX_TIME 4160 +#define IS31FL319X_T2_B_MASK 0x70 +#define IS31FL319X_T2_MAX_TIME 16640 +#define IS31FL319X_DT (1 << 7) +#define IS31FL319X_T4(out) (0x1d + (out)) +#define IS31FL319X_TIME_UPDATE 0x26 +#define IS31FL319X_RESET 0xff + +#define IS31FL319X_MAX_PINS 9 + +#endif /* _IS31FL319XREG_H_ */ diff --git a/sys/dev/iicbus/ofw_iicbus.c b/sys/dev/iicbus/ofw_iicbus.c index 0820bbf..bff23ec 100644 --- a/sys/dev/iicbus/ofw_iicbus.c +++ b/sys/dev/iicbus/ofw_iicbus.c @@ -84,6 +84,8 @@ EARLY_DRIVER_MODULE(ofw_iicbus, iicbb, ofw_iicbus_driver, ofwiicbus_devclass, 0, 0, BUS_PASS_BUS); EARLY_DRIVER_MODULE(ofw_iicbus, iichb, ofw_iicbus_driver, ofwiicbus_devclass, 0, 0, BUS_PASS_BUS); +EARLY_DRIVER_MODULE(ofw_iicbus, twsi, ofw_iicbus_driver, ofwiicbus_devclass, + 0, 0, BUS_PASS_BUS); MODULE_VERSION(ofw_iicbus, 1); MODULE_DEPEND(ofw_iicbus, iicbus, 1, 1, 1); diff --git a/sys/dev/iicbus/pca9552.c b/sys/dev/iicbus/pca9552.c new file mode 100644 index 0000000..43fe540 --- /dev/null +++ b/sys/dev/iicbus/pca9552.c @@ -0,0 +1,414 @@ +/*- + * Copyright (c) 2017 Rubicon Communications, LLC (Netgate) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +/* + * Driver for the NXP PCA9552 - I2C LED driver with programmable blink rates. + */ + +#include "opt_platform.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/gpio.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/sysctl.h> + +#include <dev/iicbus/iicbus.h> +#include <dev/iicbus/iiconf.h> + +#include <dev/gpio/gpiobusvar.h> +#ifdef FDT +#include <dev/ofw/ofw_bus.h> +#endif + +#include <dev/iicbus/pca9552reg.h> + +#include "gpio_if.h" +#include "iicbus_if.h" + +#define PCA9552_GPIO_PINS 16 +#define PCA9552_GPIO_CAPS GPIO_PIN_INPUT | GPIO_PIN_OUTPUT | \ + GPIO_PIN_OPENDRAIN + +struct pca9552_softc { + device_t sc_dev; + device_t sc_gpio_busdev; + uint16_t sc_addr; +}; + +static int +pca9552_read(device_t dev, uint16_t addr, uint8_t ctrl, uint8_t *data, + size_t len) +{ + struct iic_msg msg[2] = { + { addr, IIC_M_WR | IIC_M_NOSTOP, 1, &ctrl }, + { addr, IIC_M_RD, len, data }, + }; + + return (iicbus_transfer(dev, msg, nitems(msg))); +} + +static int +pca9552_write(device_t dev, uint16_t addr, uint8_t *data, size_t len) +{ + struct iic_msg msg[1] = { + { addr, IIC_M_WR, len, data }, + }; + + return (iicbus_transfer(dev, msg, nitems(msg))); +} + +static int +pca9552_probe(device_t dev) +{ +#ifdef FDT + phandle_t node; + + if (!ofw_bus_status_okay(dev)) + return (ENXIO); + if (!ofw_bus_is_compatible(dev, "nxp,pca9552")) + return (ENXIO); + node = ofw_bus_get_node(dev); + if (!OF_hasprop(node, "gpio-controller")) + /* Node is not a GPIO controller. */ + return (ENXIO); +#endif + device_set_desc(dev, "NXP PCA9552 LED driver"); + + return (BUS_PROBE_DEFAULT); +} + +static int +pca9552_period_sysctl(SYSCTL_HANDLER_ARGS) +{ + int error, new; + struct pca9552_softc *sc; + uint8_t data[2], psc; + + sc = (struct pca9552_softc *)arg1; + error = pca9552_read(sc->sc_dev, sc->sc_addr, PCA9552_PSC(arg2), &psc, + sizeof(psc)); + if (error != 0) + return (error); + + new = ((((int)psc) + 1) * 1000) / 44; + error = sysctl_handle_int(oidp, &new, sizeof(new), req); + if (error != 0 || req->newptr == NULL) + return (error); + new = ((new * 44) / 1000) - 1; + if (new != psc && new >= 0 && new <= 255) { + data[0] = PCA9552_PSC(arg2); + data[1] = new; + error = pca9552_write(sc->sc_dev, sc->sc_addr, data, + sizeof(data)); + if (error != 0) + return (error); + } + + return (error); +} + +static int +pca9552_duty_sysctl(SYSCTL_HANDLER_ARGS) +{ + int error, new; + struct pca9552_softc *sc; + uint8_t data[2], duty; + + sc = (struct pca9552_softc *)arg1; + error = pca9552_read(sc->sc_dev, sc->sc_addr, PCA9552_PWM(arg2), &duty, + sizeof(duty)); + if (error != 0) + return (error); + + new = duty; + error = sysctl_handle_int(oidp, &new, sizeof(new), req); + if (error != 0 || req->newptr == NULL) + return (error); + if (new != duty && new >= 0 && new <= 255) { + data[0] = PCA9552_PWM(arg2); + data[1] = new; + error = pca9552_write(sc->sc_dev, sc->sc_addr, data, + sizeof(data)); + if (error != 0) + return (error); + } + + return (error); +} + +static int +pca9552_attach(device_t dev) +{ + char pwmbuf[4]; + int i; + struct pca9552_softc *sc; + struct sysctl_ctx_list *ctx; + struct sysctl_oid *pwm_node, *pwmN_node, *tree_node; + struct sysctl_oid_list *pwm_tree, *pwmN_tree, *tree; + uint8_t data[2]; + + sc = device_get_softc(dev); + sc->sc_dev = dev; + sc->sc_addr = iicbus_get_addr(dev); + + ctx = device_get_sysctl_ctx(dev); + tree_node = device_get_sysctl_tree(dev); + tree = SYSCTL_CHILDREN(tree_node); + + /* Reset output. */ + for (i = 0; i < 4; i++) { + data[0] = PCA9552_LS(i * 4); + data[1] = 0x55; + if (pca9552_write(dev, sc->sc_addr, data, sizeof(data)) != 0) + return (ENXIO); + } + + /* Attach gpiobus. */ + sc->sc_gpio_busdev = gpiobus_attach_bus(dev); + if (sc->sc_gpio_busdev == NULL) + return (ENXIO); + + pwm_node = SYSCTL_ADD_NODE(ctx, tree, OID_AUTO, "pwm", + CTLFLAG_RD, NULL, "PWM settings"); + pwm_tree = SYSCTL_CHILDREN(pwm_node); + + for (i = 0; i < 2; i++) { + snprintf(pwmbuf, sizeof(pwmbuf), "%d", i); + pwmN_node = SYSCTL_ADD_NODE(ctx, pwm_tree, OID_AUTO, pwmbuf, + CTLFLAG_RD, NULL, "PWM settings"); + pwmN_tree = SYSCTL_CHILDREN(pwmN_node); + + SYSCTL_ADD_PROC(ctx, pwmN_tree, OID_AUTO, "period", + CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_MPSAFE, sc, i, + pca9552_period_sysctl, "IU", "PCA9552 PWM period (in ms)"); + SYSCTL_ADD_PROC(ctx, pwmN_tree, OID_AUTO, "duty", + CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_MPSAFE, sc, i, + pca9552_duty_sysctl, "IU", "PCA9552 PWM duty cycle"); + } + + return (0); +} + +static device_t +pca9552_gpio_get_bus(device_t dev) +{ + struct pca9552_softc *sc; + + sc = device_get_softc(dev); + + return (sc->sc_gpio_busdev); +} + +static int +pca9552_gpio_pin_max(device_t dev, int *maxpin) +{ + + *maxpin = PCA9552_GPIO_PINS - 1; + + return (0); +} + +static int +pca9552_gpio_pin_getname(device_t dev, uint32_t pin, char *name) +{ + + if (pin >= PCA9552_GPIO_PINS) + return (EINVAL); + + memset(name, 0, GPIOMAXNAME); + snprintf(name, GPIOMAXNAME, "LED %d", pin); + + return (0); +} + +static int +pca9552_gpio_pin_getcaps(device_t dev, uint32_t pin, uint32_t *caps) +{ + + if (pin >= PCA9552_GPIO_PINS) + return (EINVAL); + + *caps = PCA9552_GPIO_CAPS; + + return (0); +} + +static int +pca9552_gpio_pin_getflags(device_t dev, uint32_t pin, uint32_t *flags) +{ + struct pca9552_softc *sc; + uint8_t curr; + + if (pin >= PCA9552_GPIO_PINS) + return (EINVAL); + + sc = device_get_softc(dev); + if (pca9552_read(dev, sc->sc_addr, PCA9552_LS(pin), &curr, + sizeof(curr)) != 0) + return (ENXIO); + + switch ((curr >> PCA9552_LS_SHIFT(pin)) & 0x3) { + case 0: + *flags = GPIO_PIN_OUTPUT | GPIO_PIN_OPENDRAIN; + break; + case 1: + *flags = GPIO_PIN_INPUT; + break; + default: + *flags = 0; + } + + return (0); +} + +static int +pca9552_gpio_pin_setflags(device_t dev, uint32_t pin, uint32_t flags) +{ + struct pca9552_softc *sc; + uint8_t curr, new[2]; + + if (pin >= PCA9552_GPIO_PINS) + return (EINVAL); + if ((flags & (GPIO_PIN_INPUT | GPIO_PIN_OUTPUT)) == 0) + return (0); + + sc = device_get_softc(dev); + if (pca9552_read(dev, sc->sc_addr, PCA9552_LS(pin), &curr, + sizeof(curr)) != 0) + return (ENXIO); + + curr &= ~(0x3 << PCA9552_LS_SHIFT(pin)); + if ((flags & GPIO_PIN_INPUT) != 0) + curr |= (0x1 << PCA9552_LS_SHIFT(pin)); + new[0] = PCA9552_LS(pin); + new[1] = curr; + if (pca9552_write(dev, sc->sc_addr, new, sizeof(new)) != 0) + return (ENXIO); + + return (0); +} + +static int +pca9552_gpio_pin_set(device_t dev, uint32_t pin, unsigned int value) +{ + struct pca9552_softc *sc; + uint8_t curr, new[2]; + + if (pin >= PCA9552_GPIO_PINS) + return (EINVAL); + + sc = device_get_softc(dev); + if (pca9552_read(dev, sc->sc_addr, PCA9552_LS(pin), &curr, + sizeof(curr)) != 0) + return (ENXIO); + + curr &= ~(0x3 << PCA9552_LS_SHIFT(pin)); + if (value != 0) + curr |= (0x1 << PCA9552_LS_SHIFT(pin)); + new[0] = PCA9552_LS(pin); + new[1] = curr; + if (pca9552_write(dev, sc->sc_addr, new, sizeof(new)) != 0) + return (ENXIO); + + return (0); +} + +static int +pca9552_gpio_pin_get(device_t dev, uint32_t pin, unsigned int *val) +{ + struct pca9552_softc *sc; + uint8_t data; + + if (pin >= PCA9552_GPIO_PINS) + return (EINVAL); + + sc = device_get_softc(dev); + if (pca9552_read(dev, sc->sc_addr, PCA9552_INPUT(pin), &data, + sizeof(data)) != 0) + return (ENXIO); + + *val = ((data & (1 << (pin % 8))) != 0) ? 1 : 0; + + return (0); +} + +static int +pca9552_gpio_pin_toggle(device_t dev, uint32_t pin) +{ + unsigned int val; + + if (pca9552_gpio_pin_get(dev, pin, &val) != 0) + return (ENXIO); + + return (pca9552_gpio_pin_set(dev, pin, val ^ 1)); +} + +static phandle_t +pca9552_gpio_get_node(device_t bus, device_t dev) +{ + + /* Used by ofw_gpiobus. */ + return (ofw_bus_get_node(bus)); +} + +static device_method_t pca9552_methods[] = { + DEVMETHOD(device_probe, pca9552_probe), + DEVMETHOD(device_attach, pca9552_attach), + + /* GPIO protocol */ + DEVMETHOD(gpio_get_bus, pca9552_gpio_get_bus), + DEVMETHOD(gpio_pin_max, pca9552_gpio_pin_max), + DEVMETHOD(gpio_pin_getname, pca9552_gpio_pin_getname), + DEVMETHOD(gpio_pin_getcaps, pca9552_gpio_pin_getcaps), + DEVMETHOD(gpio_pin_getflags, pca9552_gpio_pin_getflags), + DEVMETHOD(gpio_pin_setflags, pca9552_gpio_pin_setflags), + DEVMETHOD(gpio_pin_get, pca9552_gpio_pin_get), + DEVMETHOD(gpio_pin_set, pca9552_gpio_pin_set), + DEVMETHOD(gpio_pin_toggle, pca9552_gpio_pin_toggle), + + /* ofw_bus interface */ + DEVMETHOD(ofw_bus_get_node, pca9552_gpio_get_node), + + DEVMETHOD_END +}; + +static driver_t pca9552_driver = { + "gpio", + pca9552_methods, + sizeof(struct pca9552_softc), +}; + +static devclass_t pca9552_devclass; + +DRIVER_MODULE(pca9552, iicbus, pca9552_driver, pca9552_devclass, NULL, NULL); +MODULE_VERSION(pca9552, 1); +MODULE_DEPEND(pca9552, iicbus, 1, 1, 1); diff --git a/sys/dev/iicbus/pca9552reg.h b/sys/dev/iicbus/pca9552reg.h new file mode 100644 index 0000000..d5a5a43 --- /dev/null +++ b/sys/dev/iicbus/pca9552reg.h @@ -0,0 +1,42 @@ +/*- + * Copyright (c) 2017 Rubicon Communications, LLC (Netgate) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * NXP PCA9552 LED Driver Registers. + */ + +#ifndef _PCA9552REG_H_ +#define _PCA9552REG_H_ + +#define PCA9552_INPUT(a) (0x0 + ((a) / 8)) +#define PCA9552_PSC(a) (0x2 + 2 * (a)) +#define PCA9552_PWM(a) (0x3 + 2 * (a)) +#define PCA9552_LS(a) (0x6 + ((a) / 4)) +#define PCA9552_LS_SHIFT(a) (((a) % 4) * 2) + +#endif /* _PCA9552REG_H_ */ diff --git a/sys/dev/iicbus/twsi/mv_twsi.c b/sys/dev/iicbus/twsi/mv_twsi.c index 998a197..997263e 100644 --- a/sys/dev/iicbus/twsi/mv_twsi.c +++ b/sys/dev/iicbus/twsi/mv_twsi.c @@ -91,6 +91,7 @@ __FBSDID("$FreeBSD$"); #define debugf(fmt, args...) #endif +static phandle_t mv_twsi_get_node(device_t, device_t); static int mv_twsi_probe(device_t); static int mv_twsi_attach(device_t); @@ -105,7 +106,10 @@ static device_method_t mv_twsi_methods[] = { DEVMETHOD(device_probe, mv_twsi_probe), DEVMETHOD(device_attach, mv_twsi_attach), - { 0, 0 } + /* ofw_bus interface */ + DEVMETHOD(ofw_bus_get_node, mv_twsi_get_node), + + DEVMETHOD_END }; DEFINE_CLASS_1(twsi, mv_twsi_driver, mv_twsi_methods, @@ -117,6 +121,14 @@ DRIVER_MODULE(twsi, simplebus, mv_twsi_driver, mv_twsi_devclass, 0, 0); DRIVER_MODULE(iicbus, twsi, iicbus_driver, iicbus_devclass, 0, 0); MODULE_DEPEND(twsi, iicbus, 1, 1, 1); +static phandle_t +mv_twsi_get_node(device_t bus, device_t dev) +{ + + /* Used by ofw_iicbus. */ + return (ofw_bus_get_node(bus)); +} + static int mv_twsi_probe(device_t dev) { diff --git a/sys/dev/iicbus/twsi/twsi.c b/sys/dev/iicbus/twsi/twsi.c index dee0b7a..aa5f943 100644 --- a/sys/dev/iicbus/twsi/twsi.c +++ b/sys/dev/iicbus/twsi/twsi.c @@ -114,6 +114,7 @@ twsi_control_clear(struct twsi_softc *sc, uint32_t mask) uint32_t val; val = TWSI_READ(sc, sc->reg_control); + val &= ~(TWSI_CONTROL_STOP | TWSI_CONTROL_START); val &= ~mask; TWSI_WRITE(sc, sc->reg_control, val); } @@ -124,6 +125,7 @@ twsi_control_set(struct twsi_softc *sc, uint32_t mask) uint32_t val; val = TWSI_READ(sc, sc->reg_control); + val &= ~(TWSI_CONTROL_STOP | TWSI_CONTROL_START); val |= mask; TWSI_WRITE(sc, sc->reg_control, val); } @@ -204,8 +206,8 @@ twsi_locked_start(device_t dev, struct twsi_softc *sc, int32_t mask, } TWSI_WRITE(sc, sc->reg_data, slave); - DELAY(1000); twsi_clear_iflg(sc); + DELAY(1000); if (twsi_poll_ctrl(sc, timeout, TWSI_CONTROL_IFLG)) { debugf("timeout sending slave address\n"); @@ -251,7 +253,7 @@ twsi_reset(device_t dev, u_char speed, u_char addr, u_char *oldaddr) TWSI_WRITE(sc, sc->reg_soft_reset, 0x0); DELAY(2000); TWSI_WRITE(sc, sc->reg_baud_rate, param); - TWSI_WRITE(sc, sc->reg_control, TWSI_CONTROL_TWSIEN | TWSI_CONTROL_ACK); + TWSI_WRITE(sc, sc->reg_control, TWSI_CONTROL_TWSIEN); DELAY(1000); mtx_unlock(&sc->mutex); @@ -266,9 +268,10 @@ twsi_stop(device_t dev) sc = device_get_softc(dev); mtx_lock(&sc->mutex); + twsi_control_clear(sc, TWSI_CONTROL_ACK); twsi_control_set(sc, TWSI_CONTROL_STOP); - DELAY(1000); twsi_clear_iflg(sc); + DELAY(1000); mtx_unlock(&sc->mutex); return (IIC_NOERR); @@ -341,8 +344,8 @@ twsi_read(device_t dev, char *buf, int len, int *read, int last, int delay) else twsi_control_set(sc, TWSI_CONTROL_ACK); - DELAY (1000); twsi_clear_iflg(sc); + DELAY(1000); if (twsi_poll_ctrl(sc, delay, TWSI_CONTROL_IFLG)) { debugf("timeout reading data\n"); @@ -382,6 +385,7 @@ twsi_write(device_t dev, const char *buf, int len, int *sent, int timeout) TWSI_WRITE(sc, sc->reg_data, *buf++); twsi_clear_iflg(sc); + DELAY(1000); if (twsi_poll_ctrl(sc, timeout, TWSI_CONTROL_IFLG)) { debugf("timeout writing data\n"); rv = IIC_ETIMEOUT; diff --git a/sys/dev/ismt/ismt.c b/sys/dev/ismt/ismt.c index 3fbcfed..7d0622c 100644 --- a/sys/dev/ismt/ismt.c +++ b/sys/dev/ismt/ismt.c @@ -72,7 +72,7 @@ __FBSDID("$FreeBSD$"); #define ISMT_DESC_LPR 0x80 /* Large Packet Received */ /* Macros */ -#define ISMT_DESC_ADDR_RW(addr, is_read) ((addr << 1) | (is_read)) +#define ISMT_DESC_ADDR_RW(addr, is_read) ((addr) | (is_read)) /* iSMT General Register address offsets (SMBBAR + <addr>) */ #define ISMT_GR_GCTRL 0x000 /* General Control */ @@ -717,6 +717,7 @@ fail: #define ID_INTEL_S1200_SMT0 0x0c598086 #define ID_INTEL_S1200_SMT1 0x0c5a8086 #define ID_INTEL_C2000_SMT 0x1f158086 +#define ID_INTEL_DNV_SMT 0x19ac8086 static int ismt_probe(device_t dev) @@ -733,6 +734,9 @@ ismt_probe(device_t dev) case ID_INTEL_C2000_SMT: desc = "Atom Processor C2000 SMBus 2.0"; break; + case ID_INTEL_DNV_SMT: + desc = "Denverton Host SMBus Controller"; + break; default: return (ENXIO); } diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c index f54dbf7..7fed368 100644 --- a/sys/dev/ixgbe/if_ix.c +++ b/sys/dev/ixgbe/if_ix.c @@ -1253,7 +1253,7 @@ ixgbe_init_locked(struct adapter *adapter) * need to be kick-started */ if (hw->phy.type == ixgbe_phy_none) { - err = hw->phy.ops.identify(hw); + err = hw->phy.ops.identify_sfp(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module type was detected.\n"); @@ -3830,8 +3830,7 @@ ixgbe_handle_mod(void *context, int pending) "Setup failure - unsupported SFP+ module type.\n"); goto out; } - if (hw->phy.multispeed_fiber) - taskqueue_enqueue(adapter->tq, &adapter->msf_task); + taskqueue_enqueue(adapter->tq, &adapter->msf_task); out: /* Update media type */ switch (hw->mac.ops.get_media_type(hw)) { diff --git a/sys/dev/mii/e1000phy.c b/sys/dev/mii/e1000phy.c index d97c7d5..075368c 100644 --- a/sys/dev/mii/e1000phy.c +++ b/sys/dev/mii/e1000phy.c @@ -209,6 +209,9 @@ e1000phy_reset(struct mii_softc *sc) } } else { switch (sc->mii_mpd_model) { + case MII_MODEL_xxMARVELL_E1000: + reg |= E1000_SCR_AUTO_X_MODE; + break; case MII_MODEL_xxMARVELL_E1111: case MII_MODEL_xxMARVELL_E1112: case MII_MODEL_xxMARVELL_E1116: diff --git a/sys/dev/mii/micphy.c b/sys/dev/mii/micphy.c index ddd0a19..a33c497 100644 --- a/sys/dev/mii/micphy.c +++ b/sys/dev/mii/micphy.c @@ -220,6 +220,13 @@ ksz9031_load_values(struct mii_softc *sc, phandle_t node) ksz90x1_load_values(sc, node, 2, MII_KSZ9031_CLOCK_PAD_SKEW, "rxc-skew-ps", 0x1f, 0, "txc-skew-ps", 0x1f, 5, NULL, 0, 0, NULL, 0, 0); + + /* + * Fix for errata 5. + * Set the device's Auto-Negotiation FLP (Fast Link Pulse) to 16ms. + */ + ksz9031_write(sc, 0, 0x4, 0x6); + ksz9031_write(sc, 0, 0x3, 0x1a80); } static void diff --git a/sys/dev/neta/if_mvneta.c b/sys/dev/neta/if_mvneta.c new file mode 100644 index 0000000..f453642 --- /dev/null +++ b/sys/dev/neta/if_mvneta.c @@ -0,0 +1,3608 @@ +/* + * Copyright (c) 2017 Stormshield. + * Copyright (c) 2017 Semihalf. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "opt_platform.h" +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/endian.h> +#include <sys/mbuf.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/socket.h> +#include <sys/sysctl.h> +#include <sys/smp.h> +#include <sys/taskqueue.h> +#ifdef MVNETA_KTR +#include <sys/ktr.h> +#endif + +#include <net/ethernet.h> +#include <net/bpf.h> +#include <net/if.h> +#include <net/if_arp.h> +#include <net/if_dl.h> +#include <net/if_media.h> +#include <net/if_types.h> +#include <net/if_vlan_var.h> + +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/tcp_lro.h> + +#include <sys/sockio.h> +#include <sys/bus.h> +#include <machine/bus.h> +#include <sys/rman.h> +#include <machine/resource.h> + +#include <dev/mii/mii.h> +#include <dev/mii/miivar.h> + +#include <dev/ofw/openfirm.h> +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> + +#include <dev/mdio/mdio.h> + +#include <arm/mv/mvreg.h> +#include <arm/mv/mvvar.h> +#include <arm/mv/mvwin.h> + +#include "if_mvnetareg.h" +#include "if_mvnetavar.h" + +#include "miibus_if.h" +#include "mdio_if.h" + +#ifdef MVNETA_DEBUG +#define STATIC /* nothing */ +#else +#define STATIC static +#endif + +#define DASSERT(x) KASSERT((x), (#x)) + +/* Device Register Initialization */ +STATIC int mvneta_initreg(struct ifnet *); + +/* Descriptor Ring Control for each of queues */ +STATIC int mvneta_ring_alloc_rx_queue(struct mvneta_softc *, int); +STATIC int mvneta_ring_alloc_tx_queue(struct mvneta_softc *, int); +STATIC void mvneta_ring_dealloc_rx_queue(struct mvneta_softc *, int); +STATIC void mvneta_ring_dealloc_tx_queue(struct mvneta_softc *, int); +STATIC int mvneta_ring_init_rx_queue(struct mvneta_softc *, int); +STATIC int mvneta_ring_init_tx_queue(struct mvneta_softc *, int); +STATIC void mvneta_ring_flush_rx_queue(struct mvneta_softc *, int); +STATIC void mvneta_ring_flush_tx_queue(struct mvneta_softc *, int); +STATIC void mvneta_dmamap_cb(void *, bus_dma_segment_t *, int, int); +STATIC int mvneta_dma_create(struct mvneta_softc *); + +/* Rx/Tx Queue Control */ +STATIC int mvneta_rx_queue_init(struct ifnet *, int); +STATIC int mvneta_tx_queue_init(struct ifnet *, int); +STATIC int mvneta_rx_queue_enable(struct ifnet *, int); +STATIC int mvneta_tx_queue_enable(struct ifnet *, int); +STATIC void mvneta_rx_lockq(struct mvneta_softc *, int); +STATIC void mvneta_rx_unlockq(struct mvneta_softc *, int); +STATIC void mvneta_tx_lockq(struct mvneta_softc *, int); +STATIC void mvneta_tx_unlockq(struct mvneta_softc *, int); + +/* Interrupt Handlers */ +STATIC void mvneta_disable_intr(struct mvneta_softc *); +STATIC void mvneta_enable_intr(struct mvneta_softc *); +STATIC void mvneta_rxtxth_intr(void *); +STATIC int mvneta_misc_intr(struct mvneta_softc *); +STATIC void mvneta_tick(void *); +/* struct ifnet and mii callbacks*/ +STATIC int mvneta_xmitfast_locked(struct mvneta_softc *, int, struct mbuf **); +STATIC int mvneta_xmit_locked(struct mvneta_softc *, int); +#ifdef MVNETA_MULTIQUEUE +STATIC int mvneta_transmit(struct ifnet *, struct mbuf *); +#else /* !MVNETA_MULTIQUEUE */ +STATIC void mvneta_start(struct ifnet *); +#endif +STATIC void mvneta_qflush(struct ifnet *); +STATIC void mvneta_tx_task(void *, int); +STATIC int mvneta_ioctl(struct ifnet *, u_long, caddr_t); +STATIC void mvneta_init(void *); +STATIC void mvneta_init_locked(void *); +STATIC void mvneta_stop(struct mvneta_softc *); +STATIC void mvneta_stop_locked(struct mvneta_softc *); +STATIC int mvneta_mediachange(struct ifnet *); +STATIC void mvneta_mediastatus(struct ifnet *, struct ifmediareq *); +STATIC void mvneta_portup(struct mvneta_softc *); +STATIC void mvneta_portdown(struct mvneta_softc *); + +/* Link State Notify */ +STATIC void mvneta_update_autoneg(struct mvneta_softc *, int); +STATIC int mvneta_update_media(struct mvneta_softc *, int); +STATIC void mvneta_adjust_link(struct mvneta_softc *); +STATIC void mvneta_update_eee(struct mvneta_softc *); +STATIC void mvneta_update_fc(struct mvneta_softc *); +STATIC void mvneta_link_isr(struct mvneta_softc *); +STATIC void mvneta_linkupdate(struct mvneta_softc *, boolean_t); +STATIC void mvneta_linkup(struct mvneta_softc *); +STATIC void mvneta_linkdown(struct mvneta_softc *); +STATIC void mvneta_linkreset(struct mvneta_softc *); + +/* Tx Subroutines */ +STATIC int mvneta_tx_queue(struct mvneta_softc *, struct mbuf **, int); +STATIC void mvneta_tx_set_csumflag(struct ifnet *, + struct mvneta_tx_desc *, struct mbuf *); +STATIC void mvneta_tx_queue_complete(struct mvneta_softc *, int); +STATIC void mvneta_tx_drain(struct mvneta_softc *); + +/* Rx Subroutines */ +STATIC int mvneta_rx(struct mvneta_softc *, int, int); +STATIC void mvneta_rx_queue(struct mvneta_softc *, int, int); +STATIC void mvneta_rx_queue_refill(struct mvneta_softc *, int); +STATIC void mvneta_rx_set_csumflag(struct ifnet *, + struct mvneta_rx_desc *, struct mbuf *); +STATIC void mvneta_rx_buf_free(struct mvneta_softc *, struct mvneta_buf *); + +/* MAC address filter */ +STATIC void mvneta_filter_setup(struct mvneta_softc *); + +/* sysctl(9) */ +STATIC int sysctl_read_mib(SYSCTL_HANDLER_ARGS); +STATIC int sysctl_clear_mib(SYSCTL_HANDLER_ARGS); +STATIC int sysctl_set_queue_rxthtime(SYSCTL_HANDLER_ARGS); +STATIC void sysctl_mvneta_init(struct mvneta_softc *); + +/* MIB */ +STATIC void mvneta_clear_mib(struct mvneta_softc *); +STATIC void mvneta_update_mib(struct mvneta_softc *); + +/* Switch */ +STATIC boolean_t mvneta_has_switch(device_t); + +#define mvneta_sc_lock(sc) mtx_lock(&sc->mtx) +#define mvneta_sc_unlock(sc) mtx_unlock(&sc->mtx) + +STATIC struct mtx mii_mutex; +STATIC int mii_init = 0; + +/* Device */ +STATIC int mvneta_detach(device_t); +/* MII */ +STATIC int mvneta_miibus_readreg(device_t, int, int); +STATIC int mvneta_miibus_writereg(device_t, int, int, int); + +static device_method_t mvneta_methods[] = { + /* Device interface */ + DEVMETHOD(device_detach, mvneta_detach), + /* MII interface */ + DEVMETHOD(miibus_readreg, mvneta_miibus_readreg), + DEVMETHOD(miibus_writereg, mvneta_miibus_writereg), + /* MDIO interface */ + DEVMETHOD(mdio_readreg, mvneta_miibus_readreg), + DEVMETHOD(mdio_writereg, mvneta_miibus_writereg), + + /* End */ + DEVMETHOD_END +}; + +DEFINE_CLASS_0(mvneta, mvneta_driver, mvneta_methods, sizeof(struct mvneta_softc)); + +DRIVER_MODULE(miibus, mvneta, miibus_driver, miibus_devclass, 0, 0); +DRIVER_MODULE(mdio, mvneta, mdio_driver, mdio_devclass, 0, 0); +MODULE_DEPEND(mvneta, mdio, 1, 1, 1); +MODULE_DEPEND(mvneta, ether, 1, 1, 1); +MODULE_DEPEND(mvneta, miibus, 1, 1, 1); +MODULE_DEPEND(mvneta, mvxpbm, 1, 1, 1); + +/* + * List of MIB register and names + */ +enum mvneta_mib_idx +{ + MVNETA_MIB_RX_GOOD_OCT_IDX, + MVNETA_MIB_RX_BAD_OCT_IDX, + MVNETA_MIB_TX_MAC_TRNS_ERR_IDX, + MVNETA_MIB_RX_GOOD_FRAME_IDX, + MVNETA_MIB_RX_BAD_FRAME_IDX, + MVNETA_MIB_RX_BCAST_FRAME_IDX, + MVNETA_MIB_RX_MCAST_FRAME_IDX, + MVNETA_MIB_RX_FRAME64_OCT_IDX, + MVNETA_MIB_RX_FRAME127_OCT_IDX, + MVNETA_MIB_RX_FRAME255_OCT_IDX, + MVNETA_MIB_RX_FRAME511_OCT_IDX, + MVNETA_MIB_RX_FRAME1023_OCT_IDX, + MVNETA_MIB_RX_FRAMEMAX_OCT_IDX, + MVNETA_MIB_TX_GOOD_OCT_IDX, + MVNETA_MIB_TX_GOOD_FRAME_IDX, + MVNETA_MIB_TX_EXCES_COL_IDX, + MVNETA_MIB_TX_MCAST_FRAME_IDX, + MVNETA_MIB_TX_BCAST_FRAME_IDX, + MVNETA_MIB_TX_MAC_CTL_ERR_IDX, + MVNETA_MIB_FC_SENT_IDX, + MVNETA_MIB_FC_GOOD_IDX, + MVNETA_MIB_FC_BAD_IDX, + MVNETA_MIB_PKT_UNDERSIZE_IDX, + MVNETA_MIB_PKT_FRAGMENT_IDX, + MVNETA_MIB_PKT_OVERSIZE_IDX, + MVNETA_MIB_PKT_JABBER_IDX, + MVNETA_MIB_MAC_RX_ERR_IDX, + MVNETA_MIB_MAC_CRC_ERR_IDX, + MVNETA_MIB_MAC_COL_IDX, + MVNETA_MIB_MAC_LATE_COL_IDX, +}; + +STATIC struct mvneta_mib_def { + uint32_t regnum; + int reg64; + const char *sysctl_name; + const char *desc; +} mvneta_mib_list[] = { + [MVNETA_MIB_RX_GOOD_OCT_IDX] = {MVNETA_MIB_RX_GOOD_OCT, 1, + "rx_good_oct", "Good Octets Rx"}, + [MVNETA_MIB_RX_BAD_OCT_IDX] = {MVNETA_MIB_RX_BAD_OCT, 0, + "rx_bad_oct", "Bad Octets Rx"}, + [MVNETA_MIB_TX_MAC_TRNS_ERR_IDX] = {MVNETA_MIB_TX_MAC_TRNS_ERR, 0, + "tx_mac_err", "MAC Transmit Error"}, + [MVNETA_MIB_RX_GOOD_FRAME_IDX] = {MVNETA_MIB_RX_GOOD_FRAME, 0, + "rx_good_frame", "Good Frames Rx"}, + [MVNETA_MIB_RX_BAD_FRAME_IDX] = {MVNETA_MIB_RX_BAD_FRAME, 0, + "rx_bad_frame", "Bad Frames Rx"}, + [MVNETA_MIB_RX_BCAST_FRAME_IDX] = {MVNETA_MIB_RX_BCAST_FRAME, 0, + "rx_bcast_frame", "Broadcast Frames Rx"}, + [MVNETA_MIB_RX_MCAST_FRAME_IDX] = {MVNETA_MIB_RX_MCAST_FRAME, 0, + "rx_mcast_frame", "Multicast Frames Rx"}, + [MVNETA_MIB_RX_FRAME64_OCT_IDX] = {MVNETA_MIB_RX_FRAME64_OCT, 0, + "rx_frame_1_64", "Frame Size 1 - 64"}, + [MVNETA_MIB_RX_FRAME127_OCT_IDX] = {MVNETA_MIB_RX_FRAME127_OCT, 0, + "rx_frame_65_127", "Frame Size 65 - 127"}, + [MVNETA_MIB_RX_FRAME255_OCT_IDX] = {MVNETA_MIB_RX_FRAME255_OCT, 0, + "rx_frame_128_255", "Frame Size 128 - 255"}, + [MVNETA_MIB_RX_FRAME511_OCT_IDX] = {MVNETA_MIB_RX_FRAME511_OCT, 0, + "rx_frame_256_511", "Frame Size 256 - 511"}, + [MVNETA_MIB_RX_FRAME1023_OCT_IDX] = {MVNETA_MIB_RX_FRAME1023_OCT, 0, + "rx_frame_512_1023", "Frame Size 512 - 1023"}, + [MVNETA_MIB_RX_FRAMEMAX_OCT_IDX] = {MVNETA_MIB_RX_FRAMEMAX_OCT, 0, + "rx_fame_1024_max", "Frame Size 1024 - Max"}, + [MVNETA_MIB_TX_GOOD_OCT_IDX] = {MVNETA_MIB_TX_GOOD_OCT, 1, + "tx_good_oct", "Good Octets Tx"}, + [MVNETA_MIB_TX_GOOD_FRAME_IDX] = {MVNETA_MIB_TX_GOOD_FRAME, 0, + "tx_good_frame", "Good Frames Tx"}, + [MVNETA_MIB_TX_EXCES_COL_IDX] = {MVNETA_MIB_TX_EXCES_COL, 0, + "tx_exces_collision", "Excessive Collision"}, + [MVNETA_MIB_TX_MCAST_FRAME_IDX] = {MVNETA_MIB_TX_MCAST_FRAME, 0, + "tx_mcast_frame", "Multicast Frames Tx"}, + [MVNETA_MIB_TX_BCAST_FRAME_IDX] = {MVNETA_MIB_TX_BCAST_FRAME, 0, + "tx_bcast_frame", "Broadcast Frames Tx"}, + [MVNETA_MIB_TX_MAC_CTL_ERR_IDX] = {MVNETA_MIB_TX_MAC_CTL_ERR, 0, + "tx_mac_ctl_err", "Unknown MAC Control"}, + [MVNETA_MIB_FC_SENT_IDX] = {MVNETA_MIB_FC_SENT, 0, + "fc_tx", "Flow Control Tx"}, + [MVNETA_MIB_FC_GOOD_IDX] = {MVNETA_MIB_FC_GOOD, 0, + "fc_rx_good", "Good Flow Control Rx"}, + [MVNETA_MIB_FC_BAD_IDX] = {MVNETA_MIB_FC_BAD, 0, + "fc_rx_bad", "Bad Flow Control Rx"}, + [MVNETA_MIB_PKT_UNDERSIZE_IDX] = {MVNETA_MIB_PKT_UNDERSIZE, 0, + "pkt_undersize", "Undersized Packets Rx"}, + [MVNETA_MIB_PKT_FRAGMENT_IDX] = {MVNETA_MIB_PKT_FRAGMENT, 0, + "pkt_fragment", "Fragmented Packets Rx"}, + [MVNETA_MIB_PKT_OVERSIZE_IDX] = {MVNETA_MIB_PKT_OVERSIZE, 0, + "pkt_oversize", "Oversized Packets Rx"}, + [MVNETA_MIB_PKT_JABBER_IDX] = {MVNETA_MIB_PKT_JABBER, 0, + "pkt_jabber", "Jabber Packets Rx"}, + [MVNETA_MIB_MAC_RX_ERR_IDX] = {MVNETA_MIB_MAC_RX_ERR, 0, + "mac_rx_err", "MAC Rx Errors"}, + [MVNETA_MIB_MAC_CRC_ERR_IDX] = {MVNETA_MIB_MAC_CRC_ERR, 0, + "mac_crc_err", "MAC CRC Errors"}, + [MVNETA_MIB_MAC_COL_IDX] = {MVNETA_MIB_MAC_COL, 0, + "mac_collision", "MAC Collision"}, + [MVNETA_MIB_MAC_LATE_COL_IDX] = {MVNETA_MIB_MAC_LATE_COL, 0, + "mac_late_collision", "MAC Late Collision"}, +}; + +static struct resource_spec res_spec[] = { + { SYS_RES_MEMORY, 0, RF_ACTIVE }, + { SYS_RES_IRQ, 0, RF_ACTIVE }, + { -1, 0} +}; + +static struct { + driver_intr_t *handler; + char * description; +} mvneta_intrs[] = { + { mvneta_rxtxth_intr, "MVNETA aggregated interrupt" }, +}; + +static int +mvneta_set_mac_address(struct mvneta_softc *sc, uint8_t *addr) +{ + unsigned int mac_h; + unsigned int mac_l; + + mac_l = (addr[4] << 8) | (addr[5]); + mac_h = (addr[0] << 24) | (addr[1] << 16) | + (addr[2] << 8) | (addr[3] << 0); + + MVNETA_WRITE(sc, MVNETA_MACAL, mac_l); + MVNETA_WRITE(sc, MVNETA_MACAH, mac_h); + return (0); +} + +static int +mvneta_get_mac_address(struct mvneta_softc *sc, uint8_t *addr) +{ + char env[128], *macstr; + int count, i; + uint32_t mac_l, mac_h; + uint32_t tmpmac[ETHER_ADDR_LEN]; + + if (device_get_unit(sc->dev) == 0) + strlcpy(env, "uboot.ethaddr", sizeof(env)); + else { + env[sizeof(env) - 1] = 0; + snprintf(env, sizeof(env) - 1, "uboot.eth%daddr", + device_get_unit(sc->dev)); + } + macstr = kern_getenv(env); + if (macstr != NULL) { + count = sscanf(macstr, "%x%*c%x%*c%x%*c%x%*c%x%*c%x", + &tmpmac[0], &tmpmac[1], + &tmpmac[2], &tmpmac[3], + &tmpmac[4], &tmpmac[5]); + if (count == ETHER_ADDR_LEN) { + for (i = 0; i < ETHER_ADDR_LEN; i++) + addr[i] = tmpmac[i]; + } + freeenv(macstr); + return (0); + } + +#ifdef FDT + if (mvneta_fdt_mac_address(sc, addr) == 0) + return (0); +#endif + /* + * Fall back -- use the currently programmed address. + */ + mac_l = MVNETA_READ(sc, MVNETA_MACAL); + mac_h = MVNETA_READ(sc, MVNETA_MACAH); + if (mac_l == 0 && mac_h == 0) { + /* + * Generate pseudo-random MAC. + * Set lower part to random number | unit number. + */ + mac_l = arc4random() & ~0xff; + mac_l |= device_get_unit(sc->dev) & 0xff; + mac_h = arc4random(); + mac_h &= ~(3 << 24); /* Clear multicast and LAA bits */ + if (bootverbose) { + device_printf(sc->dev, + "Could not acquire MAC address. " + "Using randomized one.\n"); + } + } + + addr[0] = (mac_h & 0xff000000) >> 24; + addr[1] = (mac_h & 0x00ff0000) >> 16; + addr[2] = (mac_h & 0x0000ff00) >> 8; + addr[3] = (mac_h & 0x000000ff); + addr[4] = (mac_l & 0x0000ff00) >> 8; + addr[5] = (mac_l & 0x000000ff); + return (0); +} + +STATIC boolean_t +mvneta_has_switch(device_t self) +{ + phandle_t node, switch_node, switch_eth, switch_eth_handle; + + node = ofw_bus_get_node(self); + switch_node = + ofw_bus_find_compatible(OF_finddevice("/"), "marvell,dsa"); + switch_eth = 0; + + OF_getencprop(switch_node, "dsa,ethernet", + (void*)&switch_eth_handle, sizeof(switch_eth_handle)); + + if (switch_eth_handle > 0) + switch_eth = OF_node_from_xref(switch_eth_handle); + + /* Return true if dsa,ethernet cell points to us */ + return (node == switch_eth); +} + +STATIC int +mvneta_dma_create(struct mvneta_softc *sc) +{ + size_t maxsize, maxsegsz; + size_t q; + int error; + + /* + * Create Tx DMA + */ + maxsize = maxsegsz = sizeof(struct mvneta_tx_desc) * MVNETA_TX_RING_CNT; + + error = bus_dma_tag_create( + bus_get_dma_tag(sc->dev), /* parent */ + 16, 0, /* alignment, boundary */ + BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filtfunc, filtfuncarg */ + maxsize, /* maxsize */ + 1, /* nsegments */ + maxsegsz, /* maxsegsz */ + BUS_DMA_COHERENT, /* flags */ + NULL, NULL, /* lockfunc, lockfuncarg */ + &sc->tx_dtag); /* dmat */ + if (error != 0) { + device_printf(sc->dev, + "Failed to create DMA tag for Tx descriptors.\n"); + goto fail; + } + error = bus_dma_tag_create( + bus_get_dma_tag(sc->dev), /* parent */ + 1, 0, /* alignment, boundary */ + BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filtfunc, filtfuncarg */ + MVNETA_PACKET_SIZE, /* maxsize */ + MVNETA_TX_SEGLIMIT, /* nsegments */ + MVNETA_PACKET_SIZE, /* maxsegsz */ + BUS_DMA_COHERENT | BUS_DMA_ALLOCNOW, /* flags */ + NULL, NULL, /* lockfunc, lockfuncarg */ + &sc->txmbuf_dtag); + if (error != 0) { + device_printf(sc->dev, + "Failed to create DMA tag for Tx mbufs.\n"); + goto fail; + } + + for (q = 0; q < MVNETA_TX_QNUM_MAX; q++) { + error = mvneta_ring_alloc_tx_queue(sc, q); + if (error != 0) { + device_printf(sc->dev, + "Failed to allocate DMA safe memory for TxQ: %d\n", q); + goto fail; + } + } + + /* + * Create Rx DMA. + */ + /* Create tag for Rx descripors */ + error = bus_dma_tag_create( + bus_get_dma_tag(sc->dev), /* parent */ + 32, 0, /* alignment, boundary */ + BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filtfunc, filtfuncarg */ + sizeof(struct mvneta_rx_desc) * MVNETA_RX_RING_CNT, /* maxsize */ + 1, /* nsegments */ + sizeof(struct mvneta_rx_desc) * MVNETA_RX_RING_CNT, /* maxsegsz */ + BUS_DMA_COHERENT, /* flags */ + NULL, NULL, /* lockfunc, lockfuncarg */ + &sc->rx_dtag); /* dmat */ + if (error != 0) { + device_printf(sc->dev, + "Failed to create DMA tag for Rx descriptors.\n"); + goto fail; + } + + /* Create tag for Rx buffers */ + error = bus_dma_tag_create( + bus_get_dma_tag(sc->dev), /* parent */ + 32, 0, /* alignment, boundary */ + BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filtfunc, filtfuncarg */ + MVNETA_PACKET_SIZE, 1, /* maxsize, nsegments */ + MVNETA_PACKET_SIZE, /* maxsegsz */ + BUS_DMA_COHERENT, /* flags */ + NULL, NULL, /* lockfunc, lockfuncarg */ + &sc->rxbuf_dtag); /* dmat */ + if (error != 0) { + device_printf(sc->dev, + "Failed to create DMA tag for Rx buffers.\n"); + goto fail; + } + + for (q = 0; q < MVNETA_RX_QNUM_MAX; q++) { + if (mvneta_ring_alloc_rx_queue(sc, q) != 0) { + device_printf(sc->dev, + "Failed to allocate DMA safe memory for RxQ: %d\n", q); + goto fail; + } + } + + return (0); +fail: + mvneta_detach(sc->dev); + + return (error); +} + +/* ARGSUSED */ +int +mvneta_attach(device_t self) +{ + struct mvneta_softc *sc; + struct ifnet *ifp; + device_t child; + int ifm_target; + int q, error; + uint32_t reg; + + sc = device_get_softc(self); + sc->dev = self; + + mtx_init(&sc->mtx, "mvneta_sc", NULL, MTX_DEF); + + error = bus_alloc_resources(self, res_spec, sc->res); + if (error) { + device_printf(self, "could not allocate resources\n"); + return (ENXIO); + } + + sc->version = MVNETA_READ(sc, MVNETA_PV); + device_printf(self, "version is %x\n", sc->version); + callout_init(&sc->tick_ch, 0); + + /* + * make sure DMA engines are in reset state + */ + MVNETA_WRITE(sc, MVNETA_PRXINIT, 0x00000001); + MVNETA_WRITE(sc, MVNETA_PTXINIT, 0x00000001); + + /* + * Disable port snoop for buffers and descriptors + * to avoid L2 caching of both without DRAM copy. + * Obtain coherency settings from the first MBUS + * window attribute. + */ + if ((MVNETA_READ(sc, MV_WIN_NETA_BASE(0)) & IO_WIN_COH_ATTR_MASK) == 0) { + reg = MVNETA_READ(sc, MVNETA_PSNPCFG); + reg &= ~MVNETA_PSNPCFG_DESCSNP_MASK; + reg &= ~MVNETA_PSNPCFG_BUFSNP_MASK; + MVNETA_WRITE(sc, MVNETA_PSNPCFG, reg); + } + + /* + * MAC address + */ + if (mvneta_get_mac_address(sc, sc->enaddr)) { + device_printf(self, "no mac address.\n"); + return (ENXIO); + } + mvneta_set_mac_address(sc, sc->enaddr); + + mvneta_disable_intr(sc); + + /* Allocate network interface */ + ifp = sc->ifp = if_alloc(IFT_ETHER); + if (ifp == NULL) { + device_printf(self, "if_alloc() failed\n"); + mvneta_detach(self); + return (ENOMEM); + } + if_initname(ifp, device_get_name(self), device_get_unit(self)); + + /* + * We can support 802.1Q VLAN-sized frames and jumbo + * Ethernet frames. + */ + ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_JUMBO_MTU; + + ifp->if_softc = sc; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; +#ifdef MVNETA_MULTIQUEUE + ifp->if_transmit = mvneta_transmit; + ifp->if_qflush = mvneta_qflush; +#else /* !MVNETA_MULTIQUEUE */ + ifp->if_start = mvneta_start; + ifp->if_snd.ifq_drv_maxlen = 0; + IFQ_SET_MAXLEN(&ifp->if_snd, MVNETA_TX_RING_CNT - 1); + IFQ_SET_READY(&ifp->if_snd); +#endif + ifp->if_init = mvneta_init; + ifp->if_ioctl = mvneta_ioctl; + + /* + * We can do IPv4/TCPv4/UDPv4/TCPv6/UDPv6 checksums in hardware. + */ + ifp->if_capabilities |= IFCAP_HWCSUM; + + /* + * As VLAN hardware tagging is not supported + * but is necessary to perform VLAN hardware checksums, + * it is done in the driver + */ + ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; + + /* + * Currently IPv6 HW checksum is broken, so make sure it is disabled. + */ + ifp->if_capabilities &= ~IFCAP_HWCSUM_IPV6; + ifp->if_capenable = ifp->if_capabilities; + + /* + * Disabled option(s): + * - Support for Large Receive Offload + */ + ifp->if_capabilities |= IFCAP_LRO; + + ifp->if_hwassist = CSUM_IP | CSUM_TCP | CSUM_UDP; + + /* + * Device DMA Buffer allocation. + * Handles resource deallocation in case of failure. + */ + error = mvneta_dma_create(sc); + if (error != 0) { + mvneta_detach(self); + return (error); + } + + /* Initialize queues */ + for (q = 0; q < MVNETA_TX_QNUM_MAX; q++) { + error = mvneta_ring_init_tx_queue(sc, q); + if (error != 0) { + mvneta_detach(self); + return (error); + } + } + + for (q = 0; q < MVNETA_RX_QNUM_MAX; q++) { + error = mvneta_ring_init_rx_queue(sc, q); + if (error != 0) { + mvneta_detach(self); + return (error); + } + } + + ether_ifattach(ifp, sc->enaddr); + + /* + * Enable DMA engines and Initialize Device Registers. + */ + MVNETA_WRITE(sc, MVNETA_PRXINIT, 0x00000000); + MVNETA_WRITE(sc, MVNETA_PTXINIT, 0x00000000); + MVNETA_WRITE(sc, MVNETA_PACC, MVNETA_PACC_ACCELERATIONMODE_EDM); + mvneta_sc_lock(sc); + mvneta_filter_setup(sc); + mvneta_sc_unlock(sc); + mvneta_initreg(ifp); + + /* + * Now MAC is working, setup MII. + */ + if (mii_init == 0) { + /* + * MII bus is shared by all MACs and all PHYs in SoC. + * serializing the bus access should be safe. + */ + mtx_init(&mii_mutex, "mvneta_mii", NULL, MTX_DEF); + mii_init = 1; + } + + /* Attach PHY(s) */ + if ((sc->phy_addr != MII_PHY_ANY) && (!sc->use_inband_status)) { + error = mii_attach(self, &sc->miibus, ifp, mvneta_mediachange, + mvneta_mediastatus, BMSR_DEFCAPMASK, sc->phy_addr, + MII_OFFSET_ANY, 0); + if (error != 0) { + if (bootverbose) { + device_printf(self, + "MII attach failed, error: %d\n", error); + } + ether_ifdetach(sc->ifp); + mvneta_detach(self); + return (error); + } + sc->mii = device_get_softc(sc->miibus); + sc->phy_attached = 1; + + /* Disable auto-negotiation in MAC - rely on PHY layer */ + mvneta_update_autoneg(sc, FALSE); + } else if (sc->use_inband_status == TRUE) { + /* In-band link status */ + ifmedia_init(&sc->mvneta_ifmedia, 0, mvneta_mediachange, + mvneta_mediastatus); + + /* Configure media */ + ifmedia_add(&sc->mvneta_ifmedia, IFM_ETHER | IFM_1000_T | IFM_FDX, + 0, NULL); + ifmedia_add(&sc->mvneta_ifmedia, IFM_ETHER | IFM_100_TX, 0, NULL); + ifmedia_add(&sc->mvneta_ifmedia, IFM_ETHER | IFM_100_TX | IFM_FDX, + 0, NULL); + ifmedia_add(&sc->mvneta_ifmedia, IFM_ETHER | IFM_10_T, 0, NULL); + ifmedia_add(&sc->mvneta_ifmedia, IFM_ETHER | IFM_10_T | IFM_FDX, + 0, NULL); + ifmedia_add(&sc->mvneta_ifmedia, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(&sc->mvneta_ifmedia, IFM_ETHER | IFM_AUTO); + + /* Enable auto-negotiation */ + mvneta_update_autoneg(sc, TRUE); + + mvneta_sc_lock(sc); + if (MVNETA_IS_LINKUP(sc)) + mvneta_linkup(sc); + else + mvneta_linkdown(sc); + mvneta_sc_unlock(sc); + + } else { + /* Fixed-link, use predefined values */ + mvneta_update_autoneg(sc, FALSE); + ifmedia_init(&sc->mvneta_ifmedia, 0, mvneta_mediachange, + mvneta_mediastatus); + + ifm_target = IFM_ETHER; + switch (sc->phy_speed) { + case 2500: + if (sc->phy_mode != MVNETA_PHY_SGMII && + sc->phy_mode != MVNETA_PHY_QSGMII) { + device_printf(self, + "2.5G speed can work only in (Q)SGMII mode\n"); + ether_ifdetach(sc->ifp); + mvneta_detach(self); + return (ENXIO); + } + ifm_target |= IFM_2500_KX; /* IFM_2500_T */ + break; + case 1000: + ifm_target |= IFM_1000_T; + break; + case 100: + ifm_target |= IFM_100_TX; + break; + case 10: + ifm_target |= IFM_10_T; + break; + default: + ether_ifdetach(sc->ifp); + mvneta_detach(self); + return (ENXIO); + } + + if (sc->phy_fdx) + ifm_target |= IFM_FDX; + else + ifm_target |= IFM_HDX; + + ifmedia_add(&sc->mvneta_ifmedia, ifm_target, 0, NULL); + ifmedia_set(&sc->mvneta_ifmedia, ifm_target); + if_link_state_change(sc->ifp, LINK_STATE_UP); + + if (mvneta_has_switch(self)) { + child = device_add_child(sc->dev, "mdio", -1); + if (child == NULL) { + ether_ifdetach(sc->ifp); + mvneta_detach(self); + return (ENXIO); + } + bus_generic_attach(sc->dev); + bus_generic_attach(child); + } + + /* Configure MAC media */ + mvneta_update_media(sc, ifm_target); + } + + sysctl_mvneta_init(sc); + + callout_reset(&sc->tick_ch, 0, mvneta_tick, sc); + + error = bus_setup_intr(self, sc->res[1], + INTR_TYPE_NET | INTR_MPSAFE, NULL, mvneta_intrs[0].handler, sc, + &sc->ih_cookie[0]); + if (error) { + device_printf(self, "could not setup %s\n", + mvneta_intrs[0].description); + ether_ifdetach(sc->ifp); + mvneta_detach(self); + return (error); + } + + return (0); +} + +STATIC int +mvneta_detach(device_t dev) +{ + struct mvneta_softc *sc; + struct ifnet *ifp; + int q; + + sc = device_get_softc(dev); + ifp = sc->ifp; + + mvneta_stop(sc); + /* Detach network interface */ + if (sc->ifp) + if_free(sc->ifp); + + for (q = 0; q < MVNETA_RX_QNUM_MAX; q++) + mvneta_ring_dealloc_rx_queue(sc, q); + for (q = 0; q < MVNETA_TX_QNUM_MAX; q++) + mvneta_ring_dealloc_tx_queue(sc, q); + + if (sc->tx_dtag != NULL) + bus_dma_tag_destroy(sc->tx_dtag); + if (sc->rx_dtag != NULL) + bus_dma_tag_destroy(sc->rx_dtag); + if (sc->txmbuf_dtag != NULL) + bus_dma_tag_destroy(sc->txmbuf_dtag); + + bus_release_resources(dev, res_spec, sc->res); + return (0); +} + +/* + * MII + */ +STATIC int +mvneta_miibus_readreg(device_t dev, int phy, int reg) +{ + struct mvneta_softc *sc; + struct ifnet *ifp; + uint32_t smi, val; + int i; + + sc = device_get_softc(dev); + ifp = sc->ifp; + + mtx_lock(&mii_mutex); + + for (i = 0; i < MVNETA_PHY_TIMEOUT; i++) { + if ((MVNETA_READ(sc, MVNETA_SMI) & MVNETA_SMI_BUSY) == 0) + break; + DELAY(1); + } + if (i == MVNETA_PHY_TIMEOUT) { + if_printf(ifp, "SMI busy timeout\n"); + mtx_unlock(&mii_mutex); + return (-1); + } + + smi = MVNETA_SMI_PHYAD(phy) | + MVNETA_SMI_REGAD(reg) | MVNETA_SMI_OPCODE_READ; + MVNETA_WRITE(sc, MVNETA_SMI, smi); + + for (i = 0; i < MVNETA_PHY_TIMEOUT; i++) { + if ((MVNETA_READ(sc, MVNETA_SMI) & MVNETA_SMI_BUSY) == 0) + break; + DELAY(1); + } + + if (i == MVNETA_PHY_TIMEOUT) { + if_printf(ifp, "SMI busy timeout\n"); + mtx_unlock(&mii_mutex); + return (-1); + } + for (i = 0; i < MVNETA_PHY_TIMEOUT; i++) { + smi = MVNETA_READ(sc, MVNETA_SMI); + if (smi & MVNETA_SMI_READVALID) + break; + DELAY(1); + } + + if (i == MVNETA_PHY_TIMEOUT) { + if_printf(ifp, "SMI busy timeout\n"); + mtx_unlock(&mii_mutex); + return (-1); + } + + mtx_unlock(&mii_mutex); + +#ifdef MVNETA_KTR + CTR3(KTR_SPARE2, "%s i=%d, timeout=%d\n", ifp->if_xname, i, + MVNETA_PHY_TIMEOUT); +#endif + + val = smi & MVNETA_SMI_DATA_MASK; + +#ifdef MVNETA_KTR + CTR4(KTR_SPARE2, "%s phy=%d, reg=%#x, val=%#x\n", ifp->if_xname, phy, + reg, val); +#endif + return (val); +} + +STATIC int +mvneta_miibus_writereg(device_t dev, int phy, int reg, int val) +{ + struct mvneta_softc *sc; + struct ifnet *ifp; + uint32_t smi; + int i; + + sc = device_get_softc(dev); + ifp = sc->ifp; +#ifdef MVNETA_KTR + CTR4(KTR_SPARE2, "%s phy=%d, reg=%#x, val=%#x\n", ifp->if_xname, + phy, reg, val); +#endif + + mtx_lock(&mii_mutex); + + for (i = 0; i < MVNETA_PHY_TIMEOUT; i++) { + if ((MVNETA_READ(sc, MVNETA_SMI) & MVNETA_SMI_BUSY) == 0) + break; + DELAY(1); + } + if (i == MVNETA_PHY_TIMEOUT) { + if_printf(ifp, "SMI busy timeout\n"); + mtx_unlock(&mii_mutex); + return (0); + } + + smi = MVNETA_SMI_PHYAD(phy) | MVNETA_SMI_REGAD(reg) | + MVNETA_SMI_OPCODE_WRITE | (val & MVNETA_SMI_DATA_MASK); + MVNETA_WRITE(sc, MVNETA_SMI, smi); + + for (i = 0; i < MVNETA_PHY_TIMEOUT; i++) { + if ((MVNETA_READ(sc, MVNETA_SMI) & MVNETA_SMI_BUSY) == 0) + break; + DELAY(1); + } + + mtx_unlock(&mii_mutex); + + if (i == MVNETA_PHY_TIMEOUT) + if_printf(ifp, "phy write timed out\n"); + + return (0); +} + +STATIC void +mvneta_portup(struct mvneta_softc *sc) +{ + int q; + + for (q = 0; q < MVNETA_RX_QNUM_MAX; q++) { + mvneta_rx_lockq(sc, q); + mvneta_rx_queue_enable(sc->ifp, q); + mvneta_rx_unlockq(sc, q); + } + + for (q = 0; q < MVNETA_TX_QNUM_MAX; q++) { + mvneta_tx_lockq(sc, q); + mvneta_tx_queue_enable(sc->ifp, q); + mvneta_tx_unlockq(sc, q); + } + +} + +STATIC void +mvneta_portdown(struct mvneta_softc *sc) +{ + struct mvneta_rx_ring *rx; + struct mvneta_tx_ring *tx; + int q, cnt; + uint32_t reg; + + for (q = 0; q < MVNETA_RX_QNUM_MAX; q++) { + rx = MVNETA_RX_RING(sc, q); + mvneta_rx_lockq(sc, q); + rx->queue_status = MVNETA_QUEUE_DISABLED; + mvneta_rx_unlockq(sc, q); + } + + for (q = 0; q < MVNETA_TX_QNUM_MAX; q++) { + tx = MVNETA_TX_RING(sc, q); + mvneta_tx_lockq(sc, q); + tx->queue_status = MVNETA_QUEUE_DISABLED; + mvneta_tx_unlockq(sc, q); + } + + /* Wait for all Rx activity to terminate. */ + reg = MVNETA_READ(sc, MVNETA_RQC) & MVNETA_RQC_EN_MASK; + reg = MVNETA_RQC_DIS(reg); + MVNETA_WRITE(sc, MVNETA_RQC, reg); + cnt = 0; + do { + if (cnt >= RX_DISABLE_TIMEOUT) { + if_printf(sc->ifp, + "timeout for RX stopped. rqc 0x%x\n", reg); + break; + } + cnt++; + reg = MVNETA_READ(sc, MVNETA_RQC); + } while ((reg & MVNETA_RQC_EN_MASK) != 0); + + /* Wait for all Tx activity to terminate. */ + reg = MVNETA_READ(sc, MVNETA_PIE); + reg &= ~MVNETA_PIE_TXPKTINTRPTENB_MASK; + MVNETA_WRITE(sc, MVNETA_PIE, reg); + + reg = MVNETA_READ(sc, MVNETA_PRXTXTIM); + reg &= ~MVNETA_PRXTXTI_TBTCQ_MASK; + MVNETA_WRITE(sc, MVNETA_PRXTXTIM, reg); + + reg = MVNETA_READ(sc, MVNETA_TQC) & MVNETA_TQC_EN_MASK; + reg = MVNETA_TQC_DIS(reg); + MVNETA_WRITE(sc, MVNETA_TQC, reg); + cnt = 0; + do { + if (cnt >= TX_DISABLE_TIMEOUT) { + if_printf(sc->ifp, + "timeout for TX stopped. tqc 0x%x\n", reg); + break; + } + cnt++; + reg = MVNETA_READ(sc, MVNETA_TQC); + } while ((reg & MVNETA_TQC_EN_MASK) != 0); + + /* Wait for all Tx FIFO is empty */ + cnt = 0; + do { + if (cnt >= TX_FIFO_EMPTY_TIMEOUT) { + if_printf(sc->ifp, + "timeout for TX FIFO drained. ps0 0x%x\n", reg); + break; + } + cnt++; + reg = MVNETA_READ(sc, MVNETA_PS0); + } while (((reg & MVNETA_PS0_TXFIFOEMP) == 0) && + ((reg & MVNETA_PS0_TXINPROG) != 0)); +} + +/* + * Device Register Initialization + * reset device registers to device driver default value. + * the device is not enabled here. + */ +STATIC int +mvneta_initreg(struct ifnet *ifp) +{ + struct mvneta_softc *sc; + int q, i; + uint32_t reg; + + sc = ifp->if_softc; +#ifdef MVNETA_KTR + CTR1(KTR_SPARE2, "%s initializing device register", ifp->if_xname); +#endif + + /* Disable Legacy WRR, Disable EJP, Release from reset. */ + MVNETA_WRITE(sc, MVNETA_TQC_1, 0); + /* Enable mbus retry. */ + MVNETA_WRITE(sc, MVNETA_MBUS_CONF, MVNETA_MBUS_RETRY_EN); + + /* Init TX/RX Queue Registers */ + for (q = 0; q < MVNETA_RX_QNUM_MAX; q++) { + mvneta_rx_lockq(sc, q); + if (mvneta_rx_queue_init(ifp, q) != 0) { + device_printf(sc->dev, + "initialization failed: cannot initialize queue\n"); + mvneta_rx_unlockq(sc, q); + return (ENOBUFS); + } + mvneta_rx_unlockq(sc, q); + } + for (q = 0; q < MVNETA_TX_QNUM_MAX; q++) { + mvneta_tx_lockq(sc, q); + if (mvneta_tx_queue_init(ifp, q) != 0) { + device_printf(sc->dev, + "initialization failed: cannot initialize queue\n"); + mvneta_tx_unlockq(sc, q); + return (ENOBUFS); + } + mvneta_tx_unlockq(sc, q); + } + + /* + * Ethernet Unit Control - disable automatic PHY management by HW. + * In case the port uses SMI-controlled PHY, poll its status with + * mii_tick() and update MAC settings accordingly. + */ + reg = MVNETA_READ(sc, MVNETA_EUC); + reg &= ~MVNETA_EUC_POLLING; + MVNETA_WRITE(sc, MVNETA_EUC, reg); + + /* EEE: Low Power Idle */ + reg = MVNETA_LPIC0_LILIMIT(MVNETA_LPI_LI); + reg |= MVNETA_LPIC0_TSLIMIT(MVNETA_LPI_TS); + MVNETA_WRITE(sc, MVNETA_LPIC0, reg); + + reg = MVNETA_LPIC1_TWLIMIT(MVNETA_LPI_TW); + MVNETA_WRITE(sc, MVNETA_LPIC1, reg); + + reg = MVNETA_LPIC2_MUSTSET; + MVNETA_WRITE(sc, MVNETA_LPIC2, reg); + + /* Port MAC Control set 0 */ + reg = MVNETA_PMACC0_MUSTSET; /* must write 0x1 */ + reg &= ~MVNETA_PMACC0_PORTEN; /* port is still disabled */ + reg |= MVNETA_PMACC0_FRAMESIZELIMIT(MVNETA_MAX_FRAME); + MVNETA_WRITE(sc, MVNETA_PMACC0, reg); + + /* Port MAC Control set 2 */ + reg = MVNETA_READ(sc, MVNETA_PMACC2); + switch (sc->phy_mode) { + case MVNETA_PHY_QSGMII: + reg |= (MVNETA_PMACC2_PCSEN | MVNETA_PMACC2_RGMIIEN); + MVNETA_WRITE(sc, MVNETA_PSERDESCFG, MVNETA_PSERDESCFG_QSGMII); + break; + case MVNETA_PHY_SGMII: + reg |= (MVNETA_PMACC2_PCSEN | MVNETA_PMACC2_RGMIIEN); + MVNETA_WRITE(sc, MVNETA_PSERDESCFG, MVNETA_PSERDESCFG_SGMII); + break; + case MVNETA_PHY_RGMII: + case MVNETA_PHY_RGMII_ID: + reg |= MVNETA_PMACC2_RGMIIEN; + break; + } + reg |= MVNETA_PMACC2_MUSTSET; + reg &= ~MVNETA_PMACC2_PORTMACRESET; + MVNETA_WRITE(sc, MVNETA_PMACC2, reg); + + /* Port Configuration Extended: enable Tx CRC generation */ + reg = MVNETA_READ(sc, MVNETA_PXCX); + reg &= ~MVNETA_PXCX_TXCRCDIS; + MVNETA_WRITE(sc, MVNETA_PXCX, reg); + + /* clear MIB counter registers(clear by read) */ + for (i = 0; i < nitems(mvneta_mib_list); i++) { + if (mvneta_mib_list[i].reg64) + MVNETA_READ_MIB_8(sc, mvneta_mib_list[i].regnum); + else + MVNETA_READ_MIB_4(sc, mvneta_mib_list[i].regnum); + } + MVNETA_READ(sc, MVNETA_PDFC); + MVNETA_READ(sc, MVNETA_POFC); + + /* Set SDC register except IPGINT bits */ + reg = MVNETA_SDC_RXBSZ_16_64BITWORDS; + reg |= MVNETA_SDC_TXBSZ_16_64BITWORDS; + reg |= MVNETA_SDC_BLMR; + reg |= MVNETA_SDC_BLMT; + MVNETA_WRITE(sc, MVNETA_SDC, reg); + + return (0); +} + +STATIC void +mvneta_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error) +{ + + if (error != 0) + return; + *(bus_addr_t *)arg = segs->ds_addr; +} + +STATIC int +mvneta_ring_alloc_rx_queue(struct mvneta_softc *sc, int q) +{ + struct mvneta_rx_ring *rx; + struct mvneta_buf *rxbuf; + bus_dmamap_t dmap; + int i, error; + + if (q >= MVNETA_RX_QNUM_MAX) + return (EINVAL); + + rx = MVNETA_RX_RING(sc, q); + mtx_init(&rx->ring_mtx, "mvneta_rx", NULL, MTX_DEF); + /* Allocate DMA memory for Rx descriptors */ + error = bus_dmamem_alloc(sc->rx_dtag, + (void**)&(rx->desc), + BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, + &rx->desc_map); + if (error != 0 || rx->desc == NULL) + goto fail; + error = bus_dmamap_load(sc->rx_dtag, rx->desc_map, + rx->desc, + sizeof(struct mvneta_rx_desc) * MVNETA_RX_RING_CNT, + mvneta_dmamap_cb, &rx->desc_pa, BUS_DMA_NOWAIT); + if (error != 0) + goto fail; + + for (i = 0; i < MVNETA_RX_RING_CNT; i++) { + error = bus_dmamap_create(sc->rxbuf_dtag, BUS_DMA_COHERENT, + &dmap); + if (error != 0) { + device_printf(sc->dev, + "Failed to create DMA map for Rx buffer num: %d\n", i); + goto fail; + } + rxbuf = &rx->rxbuf[i]; + rxbuf->dmap = dmap; + rxbuf->m = NULL; + } + + return (0); +fail: + mvneta_ring_dealloc_rx_queue(sc, q); + device_printf(sc->dev, "DMA Ring buffer allocation failure.\n"); + return (error); +} + +STATIC int +mvneta_ring_alloc_tx_queue(struct mvneta_softc *sc, int q) +{ + struct mvneta_tx_ring *tx; + int error; + + if (q >= MVNETA_TX_QNUM_MAX) + return (EINVAL); + tx = MVNETA_TX_RING(sc, q); + mtx_init(&tx->ring_mtx, "mvneta_tx", NULL, MTX_DEF); + error = bus_dmamem_alloc(sc->tx_dtag, + (void**)&(tx->desc), + BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, + &tx->desc_map); + if (error != 0 || tx->desc == NULL) + goto fail; + error = bus_dmamap_load(sc->tx_dtag, tx->desc_map, + tx->desc, + sizeof(struct mvneta_tx_desc) * MVNETA_TX_RING_CNT, + mvneta_dmamap_cb, &tx->desc_pa, BUS_DMA_NOWAIT); + if (error != 0) + goto fail; + +#ifdef MVNETA_MULTIQUEUE + tx->br = buf_ring_alloc(MVNETA_BUFRING_SIZE, M_DEVBUF, M_NOWAIT, + &tx->ring_mtx); + if (tx->br == NULL) { + device_printf(sc->dev, + "Could not setup buffer ring for TxQ(%d)\n", q); + error = ENOMEM; + goto fail; + } +#endif + + return (0); +fail: + mvneta_ring_dealloc_tx_queue(sc, q); + device_printf(sc->dev, "DMA Ring buffer allocation failure.\n"); + return (error); +} + +STATIC void +mvneta_ring_dealloc_tx_queue(struct mvneta_softc *sc, int q) +{ + struct mvneta_tx_ring *tx; + struct mvneta_buf *txbuf; + void *kva; + int error; + int i; + + if (q >= MVNETA_TX_QNUM_MAX) + return; + tx = MVNETA_TX_RING(sc, q); + + if (tx->taskq != NULL) { + /* Remove task */ + while (taskqueue_cancel(tx->taskq, &tx->task, NULL) != 0) + taskqueue_drain(tx->taskq, &tx->task); + } +#ifdef MVNETA_MULTIQUEUE + if (tx->br != NULL) + drbr_free(tx->br, M_DEVBUF); +#endif + + if (sc->txmbuf_dtag != NULL) { + if (mtx_name(&tx->ring_mtx) != NULL) { + /* + * It is assumed that maps are being loaded after mutex + * is initialized. Therefore we can skip unloading maps + * when mutex is empty. + */ + mvneta_tx_lockq(sc, q); + mvneta_ring_flush_tx_queue(sc, q); + mvneta_tx_unlockq(sc, q); + } + for (i = 0; i < MVNETA_TX_RING_CNT; i++) { + txbuf = &tx->txbuf[i]; + if (txbuf->dmap != NULL) { + error = bus_dmamap_destroy(sc->txmbuf_dtag, + txbuf->dmap); + if (error != 0) { + panic("%s: map busy for Tx descriptor (Q%d, %d)", + __func__, q, i); + } + } + } + } + + if (tx->desc_pa != 0) + bus_dmamap_unload(sc->tx_dtag, tx->desc_map); + + kva = (void *)tx->desc; + if (kva != NULL) + bus_dmamem_free(sc->tx_dtag, tx->desc, tx->desc_map); + + if (mtx_name(&tx->ring_mtx) != NULL) + mtx_destroy(&tx->ring_mtx); + + memset(tx, 0, sizeof(*tx)); +} + +STATIC void +mvneta_ring_dealloc_rx_queue(struct mvneta_softc *sc, int q) +{ + struct mvneta_rx_ring *rx; + struct lro_ctrl *lro; + void *kva; + + if (q >= MVNETA_RX_QNUM_MAX) + return; + + rx = MVNETA_RX_RING(sc, q); + + mvneta_ring_flush_rx_queue(sc, q); + + if (rx->desc_pa != 0) + bus_dmamap_unload(sc->rx_dtag, rx->desc_map); + + kva = (void *)rx->desc; + if (kva != NULL) + bus_dmamem_free(sc->rx_dtag, rx->desc, rx->desc_map); + + lro = &rx->lro; + tcp_lro_free(lro); + + if (mtx_name(&rx->ring_mtx) != NULL) + mtx_destroy(&rx->ring_mtx); + + memset(rx, 0, sizeof(*rx)); +} + +STATIC int +mvneta_ring_init_rx_queue(struct mvneta_softc *sc, int q) +{ + struct mvneta_rx_ring *rx; + struct lro_ctrl *lro; + int error; + + if (q >= MVNETA_RX_QNUM_MAX) + return (0); + + rx = MVNETA_RX_RING(sc, q); + rx->dma = rx->cpu = 0; + rx->queue_th_received = MVNETA_RXTH_COUNT; + rx->queue_th_time = (get_tclk() / 1000) / 10; /* 0.1 [ms] */ + + /* Initialize LRO */ + rx->lro_enabled = FALSE; + if ((sc->ifp->if_capenable & IFCAP_LRO) != 0) { + lro = &rx->lro; + error = tcp_lro_init(lro); + if (error != 0) + device_printf(sc->dev, "LRO Initialization failed!\n"); + else { + rx->lro_enabled = TRUE; + lro->ifp = sc->ifp; + } + } + + return (0); +} + +STATIC int +mvneta_ring_init_tx_queue(struct mvneta_softc *sc, int q) +{ + struct mvneta_tx_ring *tx; + struct mvneta_buf *txbuf; + int i, error; + + if (q >= MVNETA_TX_QNUM_MAX) + return (0); + + tx = MVNETA_TX_RING(sc, q); + + /* Tx handle */ + for (i = 0; i < MVNETA_TX_RING_CNT; i++) { + txbuf = &tx->txbuf[i]; + txbuf->m = NULL; + /* Tx handle needs DMA map for busdma_load_mbuf() */ + error = bus_dmamap_create(sc->txmbuf_dtag, BUS_DMA_COHERENT, + &txbuf->dmap); + if (error != 0) { + device_printf(sc->dev, + "can't create dma map (tx ring %d)\n", i); + return (error); + } + } + tx->dma = tx->cpu = 0; + tx->used = 0; + tx->drv_error = 0; + tx->queue_status = MVNETA_QUEUE_DISABLED; + tx->queue_hung = FALSE; + + tx->ifp = sc->ifp; + tx->qidx = q; + TASK_INIT(&tx->task, 0, mvneta_tx_task, tx); + tx->taskq = taskqueue_create_fast("mvneta_tx_taskq", M_WAITOK, + taskqueue_thread_enqueue, &tx->taskq); + taskqueue_start_threads(&tx->taskq, 1, PI_NET, "%s: tx_taskq(%d)", + device_get_nameunit(sc->dev), q); + + return (0); +} + +STATIC void +mvneta_ring_flush_tx_queue(struct mvneta_softc *sc, int q) +{ + struct mvneta_tx_ring *tx; + struct mvneta_buf *txbuf; + int i; + + tx = MVNETA_TX_RING(sc, q); + KASSERT_TX_MTX(sc, q); + + /* Tx handle */ + for (i = 0; i < MVNETA_TX_RING_CNT; i++) { + txbuf = &tx->txbuf[i]; + bus_dmamap_unload(sc->txmbuf_dtag, txbuf->dmap); + if (txbuf->m != NULL) { + m_freem(txbuf->m); + txbuf->m = NULL; + } + } + tx->dma = tx->cpu = 0; + tx->used = 0; +} + +STATIC void +mvneta_ring_flush_rx_queue(struct mvneta_softc *sc, int q) +{ + struct mvneta_rx_ring *rx; + struct mvneta_buf *rxbuf; + int i; + + rx = MVNETA_RX_RING(sc, q); + KASSERT_RX_MTX(sc, q); + + /* Rx handle */ + for (i = 0; i < MVNETA_RX_RING_CNT; i++) { + rxbuf = &rx->rxbuf[i]; + mvneta_rx_buf_free(sc, rxbuf); + } + rx->dma = rx->cpu = 0; +} + +/* + * Rx/Tx Queue Control + */ +STATIC int +mvneta_rx_queue_init(struct ifnet *ifp, int q) +{ + struct mvneta_softc *sc; + struct mvneta_rx_ring *rx; + uint32_t reg; + + sc = ifp->if_softc; + KASSERT_RX_MTX(sc, q); + rx = MVNETA_RX_RING(sc, q); + DASSERT(rx->desc_pa != 0); + + /* descriptor address */ + MVNETA_WRITE(sc, MVNETA_PRXDQA(q), rx->desc_pa); + + /* Rx buffer size and descriptor ring size */ + reg = MVNETA_PRXDQS_BUFFERSIZE(MVNETA_PACKET_SIZE >> 3); + reg |= MVNETA_PRXDQS_DESCRIPTORSQUEUESIZE(MVNETA_RX_RING_CNT); + MVNETA_WRITE(sc, MVNETA_PRXDQS(q), reg); +#ifdef MVNETA_KTR + CTR3(KTR_SPARE2, "%s PRXDQS(%d): %#x", ifp->if_xname, q, + MVNETA_READ(sc, MVNETA_PRXDQS(q))); +#endif + /* Rx packet offset address */ + reg = MVNETA_PRXC_PACKETOFFSET(MVNETA_PACKET_OFFSET >> 3); + MVNETA_WRITE(sc, MVNETA_PRXC(q), reg); +#ifdef MVNETA_KTR + CTR3(KTR_SPARE2, "%s PRXC(%d): %#x", ifp->if_xname, q, + MVNETA_READ(sc, MVNETA_PRXC(q))); +#endif + + /* if DMA is not working, register is not updated */ + DASSERT(MVNETA_READ(sc, MVNETA_PRXDQA(q)) == rx->desc_pa); + return (0); +} + +STATIC int +mvneta_tx_queue_init(struct ifnet *ifp, int q) +{ + struct mvneta_softc *sc; + struct mvneta_tx_ring *tx; + uint32_t reg; + + sc = ifp->if_softc; + KASSERT_TX_MTX(sc, q); + tx = MVNETA_TX_RING(sc, q); + DASSERT(tx->desc_pa != 0); + + /* descriptor address */ + MVNETA_WRITE(sc, MVNETA_PTXDQA(q), tx->desc_pa); + + /* descriptor ring size */ + reg = MVNETA_PTXDQS_DQS(MVNETA_TX_RING_CNT); + MVNETA_WRITE(sc, MVNETA_PTXDQS(q), reg); + + /* if DMA is not working, register is not updated */ + DASSERT(MVNETA_READ(sc, MVNETA_PTXDQA(q)) == tx->desc_pa); + return (0); +} + +STATIC int +mvneta_rx_queue_enable(struct ifnet *ifp, int q) +{ + struct mvneta_softc *sc; + struct mvneta_rx_ring *rx; + uint32_t reg; + + sc = ifp->if_softc; + rx = MVNETA_RX_RING(sc, q); + KASSERT_RX_MTX(sc, q); + + /* Set Rx interrupt threshold */ + reg = MVNETA_PRXDQTH_ODT(rx->queue_th_received); + MVNETA_WRITE(sc, MVNETA_PRXDQTH(q), reg); + + reg = MVNETA_PRXITTH_RITT(rx->queue_th_time); + MVNETA_WRITE(sc, MVNETA_PRXITTH(q), reg); + + /* Unmask RXTX_TH Intr. */ + reg = MVNETA_READ(sc, MVNETA_PRXTXTIM); + reg |= MVNETA_PRXTXTI_RBICTAPQ(q); /* Rx Buffer Interrupt Coalese */ + MVNETA_WRITE(sc, MVNETA_PRXTXTIM, reg); + + /* Enable Rx queue */ + reg = MVNETA_READ(sc, MVNETA_RQC) & MVNETA_RQC_EN_MASK; + reg |= MVNETA_RQC_ENQ(q); + MVNETA_WRITE(sc, MVNETA_RQC, reg); + + rx->queue_status = MVNETA_QUEUE_WORKING; + return (0); +} + +STATIC int +mvneta_tx_queue_enable(struct ifnet *ifp, int q) +{ + struct mvneta_softc *sc; + struct mvneta_tx_ring *tx; + + sc = ifp->if_softc; + tx = MVNETA_TX_RING(sc, q); + KASSERT_TX_MTX(sc, q); + + /* Enable Tx queue */ + MVNETA_WRITE(sc, MVNETA_TQC, MVNETA_TQC_ENQ(q)); + + tx->queue_status = MVNETA_QUEUE_IDLE; + tx->queue_hung = FALSE; + return (0); +} + +STATIC __inline void +mvneta_rx_lockq(struct mvneta_softc *sc, int q) +{ + + DASSERT(q >= 0); + DASSERT(q < MVNETA_RX_QNUM_MAX); + mtx_lock(&sc->rx_ring[q].ring_mtx); +} + +STATIC __inline void +mvneta_rx_unlockq(struct mvneta_softc *sc, int q) +{ + + DASSERT(q >= 0); + DASSERT(q < MVNETA_RX_QNUM_MAX); + mtx_unlock(&sc->rx_ring[q].ring_mtx); +} + +STATIC __inline int __unused +mvneta_tx_trylockq(struct mvneta_softc *sc, int q) +{ + + DASSERT(q >= 0); + DASSERT(q < MVNETA_TX_QNUM_MAX); + return (mtx_trylock(&sc->tx_ring[q].ring_mtx)); +} + +STATIC __inline void +mvneta_tx_lockq(struct mvneta_softc *sc, int q) +{ + + DASSERT(q >= 0); + DASSERT(q < MVNETA_TX_QNUM_MAX); + mtx_lock(&sc->tx_ring[q].ring_mtx); +} + +STATIC __inline void +mvneta_tx_unlockq(struct mvneta_softc *sc, int q) +{ + + DASSERT(q >= 0); + DASSERT(q < MVNETA_TX_QNUM_MAX); + mtx_unlock(&sc->tx_ring[q].ring_mtx); +} + +/* + * Interrupt Handlers + */ +STATIC void +mvneta_disable_intr(struct mvneta_softc *sc) +{ + + MVNETA_WRITE(sc, MVNETA_EUIM, 0); + MVNETA_WRITE(sc, MVNETA_EUIC, 0); + MVNETA_WRITE(sc, MVNETA_PRXTXTIM, 0); + MVNETA_WRITE(sc, MVNETA_PRXTXTIC, 0); + MVNETA_WRITE(sc, MVNETA_PRXTXIM, 0); + MVNETA_WRITE(sc, MVNETA_PRXTXIC, 0); + MVNETA_WRITE(sc, MVNETA_PMIM, 0); + MVNETA_WRITE(sc, MVNETA_PMIC, 0); + MVNETA_WRITE(sc, MVNETA_PIE, 0); +} + +STATIC void +mvneta_enable_intr(struct mvneta_softc *sc) +{ + uint32_t reg; + + /* Enable Summary Bit to check all interrupt cause. */ + reg = MVNETA_READ(sc, MVNETA_PRXTXTIM); + reg |= MVNETA_PRXTXTI_PMISCICSUMMARY; + MVNETA_WRITE(sc, MVNETA_PRXTXTIM, reg); + + if (sc->use_inband_status) { + /* Enable Port MISC Intr. (via RXTX_TH_Summary bit) */ + MVNETA_WRITE(sc, MVNETA_PMIM, MVNETA_PMI_PHYSTATUSCHNG | + MVNETA_PMI_LINKCHANGE | MVNETA_PMI_PSCSYNCCHANGE); + } + + /* Enable All Queue Interrupt */ + reg = MVNETA_READ(sc, MVNETA_PIE); + reg |= MVNETA_PIE_RXPKTINTRPTENB_MASK; + reg |= MVNETA_PIE_TXPKTINTRPTENB_MASK; + MVNETA_WRITE(sc, MVNETA_PIE, reg); +} + +STATIC void +mvneta_rxtxth_intr(void *arg) +{ + struct mvneta_softc *sc; + struct ifnet *ifp; + uint32_t ic, queues; + + sc = arg; + ifp = sc->ifp; +#ifdef MVNETA_KTR + CTR1(KTR_SPARE2, "%s got RXTX_TH_Intr", ifp->if_xname); +#endif + ic = MVNETA_READ(sc, MVNETA_PRXTXTIC); + if (ic == 0) + return; + MVNETA_WRITE(sc, MVNETA_PRXTXTIC, ~ic); + + /* Ack maintance interrupt first */ + if (__predict_false((ic & MVNETA_PRXTXTI_PMISCICSUMMARY) && + sc->use_inband_status)) { + mvneta_sc_lock(sc); + mvneta_misc_intr(sc); + mvneta_sc_unlock(sc); + } + if (__predict_false(!(ifp->if_drv_flags & IFF_DRV_RUNNING))) + return; + /* RxTxTH interrupt */ + queues = MVNETA_PRXTXTI_GET_RBICTAPQ(ic); + if (__predict_true(queues)) { +#ifdef MVNETA_KTR + CTR1(KTR_SPARE2, "%s got PRXTXTIC: +RXEOF", ifp->if_xname); +#endif + /* At the moment the driver support only one RX queue. */ + DASSERT(MVNETA_IS_QUEUE_SET(queues, 0)); + mvneta_rx(sc, 0, 0); + } +} + +STATIC int +mvneta_misc_intr(struct mvneta_softc *sc) +{ + uint32_t ic; + int claimed = 0; + +#ifdef MVNETA_KTR + CTR1(KTR_SPARE2, "%s got MISC_INTR", sc->ifp->if_xname); +#endif + KASSERT_SC_MTX(sc); + + for (;;) { + ic = MVNETA_READ(sc, MVNETA_PMIC); + ic &= MVNETA_READ(sc, MVNETA_PMIM); + if (ic == 0) + break; + MVNETA_WRITE(sc, MVNETA_PMIC, ~ic); + claimed = 1; + + if (ic & (MVNETA_PMI_PHYSTATUSCHNG | + MVNETA_PMI_LINKCHANGE | MVNETA_PMI_PSCSYNCCHANGE)) + mvneta_link_isr(sc); + } + return (claimed); +} + +STATIC void +mvneta_tick(void *arg) +{ + struct mvneta_softc *sc; + struct mvneta_tx_ring *tx; + struct mvneta_rx_ring *rx; + int q; + uint32_t fc_prev, fc_curr; + + sc = arg; + + /* + * This is done before mib update to get the right stats + * for this tick. + */ + mvneta_tx_drain(sc); + + /* Extract previous flow-control frame received counter. */ + fc_prev = sc->sysctl_mib[MVNETA_MIB_FC_GOOD_IDX].counter; + /* Read mib registers (clear by read). */ + mvneta_update_mib(sc); + /* Extract current flow-control frame received counter. */ + fc_curr = sc->sysctl_mib[MVNETA_MIB_FC_GOOD_IDX].counter; + + + if (sc->phy_attached && sc->ifp->if_flags & IFF_UP) { + mvneta_sc_lock(sc); + mii_tick(sc->mii); + + /* Adjust MAC settings */ + mvneta_adjust_link(sc); + mvneta_sc_unlock(sc); + } + + /* + * We were unable to refill the rx queue and left the rx func, leaving + * the ring without mbuf and no way to call the refill func. + */ + for (q = 0; q < MVNETA_RX_QNUM_MAX; q++) { + rx = MVNETA_RX_RING(sc, q); + if (rx->needs_refill == TRUE) { + mvneta_rx_lockq(sc, q); + mvneta_rx_queue_refill(sc, q); + mvneta_rx_unlockq(sc, q); + } + } + + /* + * Watchdog: + * - check if queue is mark as hung. + * - ignore hung status if we received some pause frame + * as hardware may have paused packet transmit. + */ + for (q = 0; q < MVNETA_TX_QNUM_MAX; q++) { + /* + * We should take queue lock, but as we only read + * queue status we can do it without lock, we may + * only missdetect queue status for one tick. + */ + tx = MVNETA_TX_RING(sc, q); + + if (tx->queue_hung && (fc_curr - fc_prev) == 0) + goto timeout; + } + + callout_schedule(&sc->tick_ch, hz); + return; + +timeout: + if_printf(sc->ifp, "watchdog timeout\n"); + + mvneta_sc_lock(sc); + sc->counter_watchdog++; + sc->counter_watchdog_mib++; + /* Trigger reinitialize sequence. */ + mvneta_stop_locked(sc); + mvneta_init_locked(sc); + mvneta_sc_unlock(sc); +} + +STATIC void +mvneta_qflush(struct ifnet *ifp) +{ +#ifdef MVNETA_MULTIQUEUE + struct mvneta_softc *sc; + struct mvneta_tx_ring *tx; + struct mbuf *m; + size_t q; + + sc = ifp->if_softc; + + for (q = 0; q < MVNETA_TX_QNUM_MAX; q++) { + tx = MVNETA_TX_RING(sc, q); + mvneta_tx_lockq(sc, q); + while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) + m_freem(m); + mvneta_tx_unlockq(sc, q); + } +#endif + if_qflush(ifp); +} + +STATIC void +mvneta_tx_task(void *arg, int pending) +{ + struct mvneta_softc *sc; + struct mvneta_tx_ring *tx; + struct ifnet *ifp; + int error; + + tx = arg; + ifp = tx->ifp; + sc = ifp->if_softc; + + mvneta_tx_lockq(sc, tx->qidx); + error = mvneta_xmit_locked(sc, tx->qidx); + mvneta_tx_unlockq(sc, tx->qidx); + + /* Try again */ + if (__predict_false(error != 0 && error != ENETDOWN)) { + pause("mvneta_tx_task_sleep", 1); + taskqueue_enqueue(tx->taskq, &tx->task); + } +} + +STATIC int +mvneta_xmitfast_locked(struct mvneta_softc *sc, int q, struct mbuf **m) +{ + struct mvneta_tx_ring *tx; + struct ifnet *ifp; + int error; + + KASSERT_TX_MTX(sc, q); + tx = MVNETA_TX_RING(sc, q); + error = 0; + + ifp = sc->ifp; + + /* Dont enqueue packet if the queue is disabled. */ + if (__predict_false(tx->queue_status == MVNETA_QUEUE_DISABLED)) { + m_freem(*m); + *m = NULL; + return (ENETDOWN); + } + + /* Reclaim mbuf if above threshold. */ + if (__predict_true(tx->used > MVNETA_TX_RECLAIM_COUNT)) + mvneta_tx_queue_complete(sc, q); + + /* Do not call transmit path if queue is already too full. */ + if (__predict_false(tx->used > + MVNETA_TX_RING_CNT - MVNETA_TX_SEGLIMIT)) + return (ENOBUFS); + + error = mvneta_tx_queue(sc, m, q); + if (__predict_false(error != 0)) + return (error); + + /* Send a copy of the frame to the BPF listener */ + ETHER_BPF_MTAP(ifp, *m); + + /* Set watchdog on */ + tx->watchdog_time = ticks; + tx->queue_status = MVNETA_QUEUE_WORKING; + + return (error); +} + +#ifdef MVNETA_MULTIQUEUE +STATIC int +mvneta_transmit(struct ifnet *ifp, struct mbuf *m) +{ + struct mvneta_softc *sc; + struct mvneta_tx_ring *tx; + int error; + int q; + + sc = ifp->if_softc; + + /* Use default queue if there is no flow id as thread can migrate. */ + if (__predict_true(M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)) + q = m->m_pkthdr.flowid % MVNETA_TX_QNUM_MAX; + else + q = 0; + + tx = MVNETA_TX_RING(sc, q); + + /* If buf_ring is full start transmit immediatly. */ + if (buf_ring_full(tx->br)) { + mvneta_tx_lockq(sc, q); + mvneta_xmit_locked(sc, q); + mvneta_tx_unlockq(sc, q); + } + + /* + * If the buf_ring is empty we will not reorder packets. + * If the lock is available transmit without using buf_ring. + */ + if (buf_ring_empty(tx->br) && mvneta_tx_trylockq(sc, q) != 0) { + error = mvneta_xmitfast_locked(sc, q, &m); + mvneta_tx_unlockq(sc, q); + if (__predict_true(error == 0)) + return (0); + + /* Transmit can fail in fastpath. */ + if (__predict_false(m == NULL)) + return (error); + } + + /* Enqueue then schedule taskqueue. */ + error = drbr_enqueue(ifp, tx->br, m); + if (__predict_false(error != 0)) + return (error); + + taskqueue_enqueue(tx->taskq, &tx->task); + return (0); +} + +STATIC int +mvneta_xmit_locked(struct mvneta_softc *sc, int q) +{ + struct ifnet *ifp; + struct mvneta_tx_ring *tx; + struct mbuf *m; + int error; + + KASSERT_TX_MTX(sc, q); + ifp = sc->ifp; + tx = MVNETA_TX_RING(sc, q); + error = 0; + + while ((m = drbr_peek(ifp, tx->br)) != NULL) { + error = mvneta_xmitfast_locked(sc, q, &m); + if (__predict_false(error != 0)) { + if (m != NULL) + drbr_putback(ifp, tx->br, m); + else + drbr_advance(ifp, tx->br); + break; + } + drbr_advance(ifp, tx->br); + } + + return (error); +} +#else /* !MVNETA_MULTIQUEUE */ +STATIC void +mvneta_start(struct ifnet *ifp) +{ + struct mvneta_softc *sc; + struct mvneta_tx_ring *tx; + int error; + + sc = ifp->if_softc; + tx = MVNETA_TX_RING(sc, 0); + + mvneta_tx_lockq(sc, 0); + error = mvneta_xmit_locked(sc, 0); + mvneta_tx_unlockq(sc, 0); + /* Handle retransmit in the background taskq. */ + if (__predict_false(error != 0 && error != ENETDOWN)) + taskqueue_enqueue(tx->taskq, &tx->task); +} + +STATIC int +mvneta_xmit_locked(struct mvneta_softc *sc, int q) +{ + struct ifnet *ifp; + struct mvneta_tx_ring *tx; + struct mbuf *m; + int error; + + KASSERT_TX_MTX(sc, q); + ifp = sc->ifp; + tx = MVNETA_TX_RING(sc, 0); + error = 0; + + while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { + IFQ_DRV_DEQUEUE(&ifp->if_snd, m); + if (m == NULL) + break; + + error = mvneta_xmitfast_locked(sc, q, &m); + if (__predict_false(error != 0)) { + if (m != NULL) + IFQ_DRV_PREPEND(&ifp->if_snd, m); + break; + } + } + + return (error); +} +#endif + +STATIC int +mvneta_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + struct mvneta_softc *sc; + struct mvneta_rx_ring *rx; + struct ifreq *ifr; + int error, mask; + uint32_t flags; + char *mediatype; + int q; + + error = 0; + sc = ifp->if_softc; + ifr = (struct ifreq *)data; + switch (cmd) { + case SIOCSIFFLAGS: + mvneta_sc_lock(sc); + if (ifp->if_flags & IFF_UP) { + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + flags = ifp->if_flags ^ sc->mvneta_if_flags; + + if (flags != 0) + sc->mvneta_if_flags = ifp->if_flags; + + if ((flags & IFF_PROMISC) != 0) + mvneta_filter_setup(sc); + } else { + mvneta_init_locked(sc); + sc->mvneta_if_flags = ifp->if_flags; + if (sc->phy_attached) + mii_mediachg(sc->mii); + mvneta_sc_unlock(sc); + break; + } + } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) + mvneta_stop_locked(sc); + + sc->mvneta_if_flags = ifp->if_flags; + mvneta_sc_unlock(sc); + break; + case SIOCSIFCAP: + if (ifp->if_mtu > MVNETA_MAX_CSUM_MTU && + ifr->ifr_reqcap & IFCAP_TXCSUM) + ifr->ifr_reqcap &= ~IFCAP_TXCSUM; + mask = ifp->if_capenable ^ ifr->ifr_reqcap; + if (mask & IFCAP_HWCSUM) { + ifp->if_capenable &= ~IFCAP_HWCSUM; + ifp->if_capenable |= IFCAP_HWCSUM & ifr->ifr_reqcap; + if (ifp->if_capenable & IFCAP_TXCSUM) + ifp->if_hwassist = CSUM_IP | CSUM_TCP | + CSUM_UDP; + else + ifp->if_hwassist = 0; + } + if (mask & IFCAP_LRO) { + mvneta_sc_lock(sc); + ifp->if_capenable ^= IFCAP_LRO; + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { + for (q = 0; q < MVNETA_RX_QNUM_MAX; q++) { + rx = MVNETA_RX_RING(sc, q); + rx->lro_enabled = !rx->lro_enabled; + } + } + mvneta_sc_unlock(sc); + } + VLAN_CAPABILITIES(ifp); + break; + case SIOCSIFMEDIA: + if ((IFM_SUBTYPE(ifr->ifr_media) == IFM_1000_T || + IFM_SUBTYPE(ifr->ifr_media) == IFM_2500_T || + IFM_SUBTYPE(ifr->ifr_media) == IFM_2500_KX) && + (ifr->ifr_media & IFM_FDX) == 0) { + if (IFM_SUBTYPE(ifr->ifr_media) == IFM_2500_T) + mediatype = "2500Base-T"; + else if (IFM_SUBTYPE(ifr->ifr_media) == IFM_2500_KX) + mediatype = "2500Base-KX"; + else + mediatype = "1000Base-T"; + device_printf(sc->dev, + "%s half-duplex unsupported\n", mediatype); + error = EINVAL; + break; + } + case SIOCGIFMEDIA: /* FALLTHROUGH */ + case SIOCGIFXMEDIA: + if (!sc->phy_attached) + error = ifmedia_ioctl(ifp, ifr, &sc->mvneta_ifmedia, + cmd); + else + error = ifmedia_ioctl(ifp, ifr, &sc->mii->mii_media, + cmd); + break; + case SIOCSIFMTU: + if (ifr->ifr_mtu < 68 || ifr->ifr_mtu > MVNETA_MAX_FRAME - + MVNETA_ETHER_SIZE) { + error = EINVAL; + } else { + ifp->if_mtu = ifr->ifr_mtu; + mvneta_sc_lock(sc); + if (ifp->if_mtu > MVNETA_MAX_CSUM_MTU) { + ifp->if_capenable &= ~IFCAP_TXCSUM; + ifp->if_hwassist = 0; + } else { + ifp->if_capenable |= IFCAP_TXCSUM; + ifp->if_hwassist = CSUM_IP | CSUM_TCP | + CSUM_UDP; + } + + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + /* Trigger reinitialize sequence */ + mvneta_stop_locked(sc); + mvneta_init_locked(sc); + } + mvneta_sc_unlock(sc); + } + break; + + default: + error = ether_ioctl(ifp, cmd, data); + break; + } + + return (error); +} + +STATIC void +mvneta_init_locked(void *arg) +{ + struct mvneta_softc *sc; + struct ifnet *ifp; + uint32_t reg; + int q, cpu; + + sc = arg; + ifp = sc->ifp; + + if (!device_is_attached(sc->dev) || + (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) + return; + + mvneta_disable_intr(sc); + callout_stop(&sc->tick_ch); + + /* Get the latest mac address */ + bcopy(IF_LLADDR(ifp), sc->enaddr, ETHER_ADDR_LEN); + mvneta_set_mac_address(sc, sc->enaddr); + mvneta_filter_setup(sc); + + /* Start DMA Engine */ + MVNETA_WRITE(sc, MVNETA_PRXINIT, 0x00000000); + MVNETA_WRITE(sc, MVNETA_PTXINIT, 0x00000000); + MVNETA_WRITE(sc, MVNETA_PACC, MVNETA_PACC_ACCELERATIONMODE_EDM); + + /* Enable port */ + reg = MVNETA_READ(sc, MVNETA_PMACC0); + reg |= MVNETA_PMACC0_PORTEN; + MVNETA_WRITE(sc, MVNETA_PMACC0, reg); + + /* Allow access to each TXQ/RXQ from both CPU's */ + for (cpu = 0; cpu < mp_ncpus; ++cpu) + MVNETA_WRITE(sc, MVNETA_PCP2Q(cpu), + MVNETA_PCP2Q_TXQEN_MASK | MVNETA_PCP2Q_RXQEN_MASK); + + for (q = 0; q < MVNETA_RX_QNUM_MAX; q++) { + mvneta_rx_lockq(sc, q); + mvneta_rx_queue_refill(sc, q); + mvneta_rx_unlockq(sc, q); + } + + if (!sc->phy_attached) + mvneta_linkup(sc); + + /* Enable interrupt */ + mvneta_enable_intr(sc); + + /* Set Counter */ + callout_schedule(&sc->tick_ch, hz); + + ifp->if_drv_flags |= IFF_DRV_RUNNING; +} + +STATIC void +mvneta_init(void *arg) +{ + struct mvneta_softc *sc; + + sc = arg; + mvneta_sc_lock(sc); + mvneta_init_locked(sc); + if (sc->phy_attached) + mii_mediachg(sc->mii); + mvneta_sc_unlock(sc); +} + +/* ARGSUSED */ +STATIC void +mvneta_stop_locked(struct mvneta_softc *sc) +{ + struct ifnet *ifp; + struct mvneta_rx_ring *rx; + struct mvneta_tx_ring *tx; + uint32_t reg; + int q; + + ifp = sc->ifp; + if (ifp == NULL || (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + return; + + mvneta_disable_intr(sc); + + callout_stop(&sc->tick_ch); + + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + + /* Link down */ + if (sc->linkup == TRUE) + mvneta_linkdown(sc); + + /* Reset the MAC Port Enable bit */ + reg = MVNETA_READ(sc, MVNETA_PMACC0); + reg &= ~MVNETA_PMACC0_PORTEN; + MVNETA_WRITE(sc, MVNETA_PMACC0, reg); + + /* Disable each of queue */ + for (q = 0; q < MVNETA_RX_QNUM_MAX; q++) { + rx = MVNETA_RX_RING(sc, q); + + mvneta_rx_lockq(sc, q); + mvneta_ring_flush_rx_queue(sc, q); + mvneta_rx_unlockq(sc, q); + } + + /* + * Hold Reset state of DMA Engine + * (must write 0x0 to restart it) + */ + MVNETA_WRITE(sc, MVNETA_PRXINIT, 0x00000001); + MVNETA_WRITE(sc, MVNETA_PTXINIT, 0x00000001); + + for (q = 0; q < MVNETA_TX_QNUM_MAX; q++) { + tx = MVNETA_TX_RING(sc, q); + + mvneta_tx_lockq(sc, q); + mvneta_ring_flush_tx_queue(sc, q); + mvneta_tx_unlockq(sc, q); + } +} + +STATIC void +mvneta_stop(struct mvneta_softc *sc) +{ + + mvneta_sc_lock(sc); + mvneta_stop_locked(sc); + mvneta_sc_unlock(sc); +} + +STATIC int +mvneta_mediachange(struct ifnet *ifp) +{ + struct mvneta_softc *sc; + + sc = ifp->if_softc; + + if (!sc->phy_attached && !sc->use_inband_status) { + /* We shouldn't be here */ + if_printf(ifp, "Cannot change media in fixed-link mode!\n"); + return (0); + } + + if (sc->use_inband_status) { + mvneta_update_media(sc, sc->mvneta_ifmedia.ifm_media); + return (0); + } + + mvneta_sc_lock(sc); + + /* Update PHY */ + mii_mediachg(sc->mii); + + mvneta_sc_unlock(sc); + + return (0); +} + +STATIC void +mvneta_get_media(struct mvneta_softc *sc, struct ifmediareq *ifmr) +{ + uint32_t psr; + + psr = MVNETA_READ(sc, MVNETA_PSR); + + /* Speed */ + if (psr & MVNETA_PSR_GMIISPEED) + ifmr->ifm_active = IFM_ETHER_SUBTYPE_SET(IFM_1000_T); + else if (psr & MVNETA_PSR_MIISPEED) + ifmr->ifm_active = IFM_ETHER_SUBTYPE_SET(IFM_100_TX); + else if (psr & MVNETA_PSR_LINKUP) + ifmr->ifm_active = IFM_ETHER_SUBTYPE_SET(IFM_10_T); + + /* Duplex */ + if (psr & MVNETA_PSR_FULLDX) + ifmr->ifm_active |= IFM_FDX; + + /* Link */ + ifmr->ifm_status = IFM_AVALID; + if (psr & MVNETA_PSR_LINKUP) + ifmr->ifm_status |= IFM_ACTIVE; +} + +STATIC void +mvneta_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr) +{ + struct mvneta_softc *sc; + struct mii_data *mii; + + sc = ifp->if_softc; + + if (!sc->phy_attached && !sc->use_inband_status) { + ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE; + return; + } + + mvneta_sc_lock(sc); + + if (sc->use_inband_status) { + mvneta_get_media(sc, ifmr); + mvneta_sc_unlock(sc); + return; + } + + mii = sc->mii; + mii_pollstat(mii); + + ifmr->ifm_active = mii->mii_media_active; + ifmr->ifm_status = mii->mii_media_status; + + mvneta_sc_unlock(sc); +} + +/* + * Link State Notify + */ +STATIC void +mvneta_update_autoneg(struct mvneta_softc *sc, int enable) +{ + int reg; + + if (enable) { + reg = MVNETA_READ(sc, MVNETA_PANC); + reg &= ~(MVNETA_PANC_FORCELINKFAIL | MVNETA_PANC_FORCELINKPASS | + MVNETA_PANC_ANFCEN); + reg |= MVNETA_PANC_ANDUPLEXEN | MVNETA_PANC_ANSPEEDEN | + MVNETA_PANC_INBANDANEN; + MVNETA_WRITE(sc, MVNETA_PANC, reg); + + reg = MVNETA_READ(sc, MVNETA_PMACC2); + reg |= MVNETA_PMACC2_INBANDANMODE; + MVNETA_WRITE(sc, MVNETA_PMACC2, reg); + + reg = MVNETA_READ(sc, MVNETA_PSOMSCD); + reg |= MVNETA_PSOMSCD_ENABLE; + MVNETA_WRITE(sc, MVNETA_PSOMSCD, reg); + } else { + reg = MVNETA_READ(sc, MVNETA_PANC); + reg &= ~(MVNETA_PANC_FORCELINKFAIL | MVNETA_PANC_FORCELINKPASS | + MVNETA_PANC_ANDUPLEXEN | MVNETA_PANC_ANSPEEDEN | + MVNETA_PANC_INBANDANEN); + MVNETA_WRITE(sc, MVNETA_PANC, reg); + + reg = MVNETA_READ(sc, MVNETA_PMACC2); + reg &= ~MVNETA_PMACC2_INBANDANMODE; + MVNETA_WRITE(sc, MVNETA_PMACC2, reg); + + reg = MVNETA_READ(sc, MVNETA_PSOMSCD); + reg &= ~MVNETA_PSOMSCD_ENABLE; + MVNETA_WRITE(sc, MVNETA_PSOMSCD, reg); + } +} + +STATIC int +mvneta_update_media(struct mvneta_softc *sc, int media) +{ + boolean_t autoneg, running; + char *type; + int reg, err; + + err = 0; + + mvneta_sc_lock(sc); + + mvneta_linkreset(sc); + + running = (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) != 0; + if (running) + mvneta_stop_locked(sc); + + autoneg = (IFM_SUBTYPE(media) == IFM_AUTO); + + if (sc->use_inband_status) + mvneta_update_autoneg(sc, autoneg); + + mvneta_update_eee(sc); + mvneta_update_fc(sc); + + if (!autoneg) { + reg = MVNETA_READ(sc, MVNETA_PANC); + reg &= ~(MVNETA_PANC_SETGMIISPEED | + MVNETA_PANC_SETMIISPEED | + MVNETA_PANC_SETFULLDX); + if (IFM_SUBTYPE(media) == IFM_1000_T || + IFM_SUBTYPE(media) == IFM_2500_T || + IFM_SUBTYPE(media) == IFM_2500_KX) { + if ((media & IFM_FDX) == 0) { + if (IFM_SUBTYPE(media) == IFM_2500_T) + type = "2500Base-T"; + else if (IFM_SUBTYPE(media) == IFM_2500_KX) + type = "2500Base-KX"; + else + type = "1000Base-T"; + device_printf(sc->dev, + "%s half-duplex unsupported\n", type); + err = EINVAL; + goto out; + } + reg |= MVNETA_PANC_SETGMIISPEED; + } else if (IFM_SUBTYPE(media) == IFM_100_TX) + reg |= MVNETA_PANC_SETMIISPEED; + + if (media & IFM_FDX) + reg |= MVNETA_PANC_SETFULLDX; + + MVNETA_WRITE(sc, MVNETA_PANC, reg); + } +out: + if (running) + mvneta_init_locked(sc); + mvneta_sc_unlock(sc); + return (err); +} + +STATIC void +mvneta_adjust_link(struct mvneta_softc *sc) +{ + boolean_t phy_linkup; + int reg; + + /* Update eee/fc */ + mvneta_update_eee(sc); + mvneta_update_fc(sc); + + /* Check for link change */ + phy_linkup = (sc->mii->mii_media_status & + (IFM_AVALID | IFM_ACTIVE)) == (IFM_AVALID | IFM_ACTIVE); + + if (sc->linkup != phy_linkup) + mvneta_linkupdate(sc, phy_linkup); + + /* Don't update media on disabled link */ + if (!phy_linkup) + return; + + /* Check for media type change */ + if (sc->mvneta_media != sc->mii->mii_media_active) { + sc->mvneta_media = sc->mii->mii_media_active; + + reg = MVNETA_READ(sc, MVNETA_PANC); + reg &= ~(MVNETA_PANC_SETGMIISPEED | + MVNETA_PANC_SETMIISPEED | + MVNETA_PANC_SETFULLDX); + if (IFM_SUBTYPE(sc->mvneta_media) == IFM_1000_T || + IFM_SUBTYPE(sc->mvneta_media) == IFM_2500_T || + IFM_SUBTYPE(sc->mvneta_media) == IFM_2500_KX) { + reg |= MVNETA_PANC_SETGMIISPEED; + } else if (IFM_SUBTYPE(sc->mvneta_media) == IFM_100_TX) + reg |= MVNETA_PANC_SETMIISPEED; + + if (sc->mvneta_media & IFM_FDX) + reg |= MVNETA_PANC_SETFULLDX; + + MVNETA_WRITE(sc, MVNETA_PANC, reg); + } +} + +STATIC void +mvneta_link_isr(struct mvneta_softc *sc) +{ + int linkup; + + KASSERT_SC_MTX(sc); + + linkup = MVNETA_IS_LINKUP(sc) ? TRUE : FALSE; + if (sc->linkup == linkup) + return; + + if (linkup == TRUE) + mvneta_linkup(sc); + else + mvneta_linkdown(sc); + +#ifdef DEBUG + log(LOG_DEBUG, + "%s: link %s\n", device_xname(sc->dev), linkup ? "up" : "down"); +#endif +} + +STATIC void +mvneta_linkupdate(struct mvneta_softc *sc, boolean_t linkup) +{ + + KASSERT_SC_MTX(sc); + + if (linkup == TRUE) + mvneta_linkup(sc); + else + mvneta_linkdown(sc); + +#ifdef DEBUG + log(LOG_DEBUG, + "%s: link %s\n", device_xname(sc->dev), linkup ? "up" : "down"); +#endif +} + +STATIC void +mvneta_update_eee(struct mvneta_softc *sc) +{ + uint32_t reg; + + KASSERT_SC_MTX(sc); + + /* set EEE parameters */ + reg = MVNETA_READ(sc, MVNETA_LPIC1); + if (sc->cf_lpi) + reg |= MVNETA_LPIC1_LPIRE; + else + reg &= ~MVNETA_LPIC1_LPIRE; + MVNETA_WRITE(sc, MVNETA_LPIC1, reg); +} + +STATIC void +mvneta_update_fc(struct mvneta_softc *sc) +{ + uint32_t reg; + + KASSERT_SC_MTX(sc); + + reg = MVNETA_READ(sc, MVNETA_PANC); + if (sc->cf_fc) { + /* Flow control negotiation */ + reg |= MVNETA_PANC_PAUSEADV; + reg |= MVNETA_PANC_ANFCEN; + } else { + /* Disable flow control negotiation */ + reg &= ~MVNETA_PANC_PAUSEADV; + reg &= ~MVNETA_PANC_ANFCEN; + } + + MVNETA_WRITE(sc, MVNETA_PANC, reg); +} + +STATIC void +mvneta_linkup(struct mvneta_softc *sc) +{ + uint32_t reg; + + KASSERT_SC_MTX(sc); + + if (!sc->use_inband_status) { + reg = MVNETA_READ(sc, MVNETA_PANC); + reg |= MVNETA_PANC_FORCELINKPASS; + reg &= ~MVNETA_PANC_FORCELINKFAIL; + MVNETA_WRITE(sc, MVNETA_PANC, reg); + } + + mvneta_qflush(sc->ifp); + mvneta_portup(sc); + sc->linkup = TRUE; + if_link_state_change(sc->ifp, LINK_STATE_UP); +} + +STATIC void +mvneta_linkdown(struct mvneta_softc *sc) +{ + uint32_t reg; + + KASSERT_SC_MTX(sc); + + if (!sc->use_inband_status) { + reg = MVNETA_READ(sc, MVNETA_PANC); + reg &= ~MVNETA_PANC_FORCELINKPASS; + reg |= MVNETA_PANC_FORCELINKFAIL; + MVNETA_WRITE(sc, MVNETA_PANC, reg); + } + + mvneta_portdown(sc); + mvneta_qflush(sc->ifp); + sc->linkup = FALSE; + if_link_state_change(sc->ifp, LINK_STATE_DOWN); +} + +STATIC void +mvneta_linkreset(struct mvneta_softc *sc) +{ + struct mii_softc *mii; + + if (sc->phy_attached) { + /* Force reset PHY */ + mii = LIST_FIRST(&sc->mii->mii_phys); + if (mii) + mii_phy_reset(mii); + } +} + +/* + * Tx Subroutines + */ +STATIC int +mvneta_tx_queue(struct mvneta_softc *sc, struct mbuf **mbufp, int q) +{ + struct ifnet *ifp; + bus_dma_segment_t txsegs[MVNETA_TX_SEGLIMIT]; + struct mbuf *mtmp, *mbuf; + struct mvneta_tx_ring *tx; + struct mvneta_buf *txbuf; + struct mvneta_tx_desc *t; + uint32_t ptxsu; + int start, used, error, i, txnsegs; + + mbuf = *mbufp; + tx = MVNETA_TX_RING(sc, q); + DASSERT(tx->used >= 0); + DASSERT(tx->used <= MVNETA_TX_RING_CNT); + t = NULL; + ifp = sc->ifp; + + if (__predict_false(mbuf->m_flags & M_VLANTAG)) { + mbuf = ether_vlanencap(mbuf, mbuf->m_pkthdr.ether_vtag); + if (mbuf == NULL) { + tx->drv_error++; + *mbufp = NULL; + return (ENOBUFS); + } + mbuf->m_flags &= ~M_VLANTAG; + *mbufp = mbuf; + } + + if (__predict_false(mbuf->m_next != NULL && + (mbuf->m_pkthdr.csum_flags & + (CSUM_IP | CSUM_TCP | CSUM_UDP)) != 0)) { + if (M_WRITABLE(mbuf) == 0) { + mtmp = m_dup(mbuf, M_NOWAIT); + m_freem(mbuf); + if (mtmp == NULL) { + tx->drv_error++; + *mbufp = NULL; + return (ENOBUFS); + } + *mbufp = mbuf = mtmp; + } + } + + /* load mbuf using dmamap of 1st descriptor */ + txbuf = &tx->txbuf[tx->cpu]; + error = bus_dmamap_load_mbuf_sg(sc->txmbuf_dtag, + txbuf->dmap, mbuf, txsegs, &txnsegs, + BUS_DMA_NOWAIT); + if (__predict_false(error != 0)) { +#ifdef MVNETA_KTR + CTR3(KTR_SPARE2, "%s:%u bus_dmamap_load_mbuf_sg error=%d", ifp->if_xname, q, error); +#endif + /* This is the only recoverable error (except EFBIG). */ + if (error != ENOMEM) { + tx->drv_error++; + m_freem(mbuf); + *mbufp = NULL; + return (ENOBUFS); + } + return (error); + } + + if (__predict_false(txnsegs <= 0 + || (txnsegs + tx->used) > MVNETA_TX_RING_CNT)) { + /* we have no enough descriptors or mbuf is broken */ +#ifdef MVNETA_KTR + CTR3(KTR_SPARE2, "%s:%u not enough descriptors txnsegs=%d", + ifp->if_xname, q, txnsegs); +#endif + bus_dmamap_unload(sc->txmbuf_dtag, txbuf->dmap); + return (ENOBUFS); + } + DASSERT(txbuf->m == NULL); + + /* remember mbuf using 1st descriptor */ + txbuf->m = mbuf; + bus_dmamap_sync(sc->txmbuf_dtag, txbuf->dmap, + BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE); + + /* load to tx descriptors */ + start = tx->cpu; + used = 0; + for (i = 0; i < txnsegs; i++) { + t = &tx->desc[tx->cpu]; + t->command = 0; + t->l4ichk = 0; + t->flags = 0; + if (__predict_true(i == 0)) { + /* 1st descriptor */ + t->command |= MVNETA_TX_CMD_W_PACKET_OFFSET(0); + t->command |= MVNETA_TX_CMD_F; + mvneta_tx_set_csumflag(ifp, t, mbuf); + } + t->bufptr_pa = txsegs[i].ds_addr; + t->bytecnt = txsegs[i].ds_len; + tx->cpu = tx_counter_adv(tx->cpu, 1); + + tx->used++; + used++; + } + /* t is last descriptor here */ + DASSERT(t != NULL); + t->command |= MVNETA_TX_CMD_L|MVNETA_TX_CMD_PADDING; + + bus_dmamap_sync(sc->tx_dtag, tx->desc_map, + BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE); + + while (__predict_false(used > 255)) { + ptxsu = MVNETA_PTXSU_NOWD(255); + MVNETA_WRITE(sc, MVNETA_PTXSU(q), ptxsu); + used -= 255; + } + if (__predict_true(used > 0)) { + ptxsu = MVNETA_PTXSU_NOWD(used); + MVNETA_WRITE(sc, MVNETA_PTXSU(q), ptxsu); + } + return (0); +} + +STATIC void +mvneta_tx_set_csumflag(struct ifnet *ifp, + struct mvneta_tx_desc *t, struct mbuf *m) +{ + struct ether_header *eh; + int csum_flags; + uint32_t iphl, ipoff; + struct ip *ip; + + iphl = ipoff = 0; + csum_flags = ifp->if_hwassist & m->m_pkthdr.csum_flags; + eh = mtod(m, struct ether_header *); + switch (ntohs(eh->ether_type)) { + case ETHERTYPE_IP: + ipoff = ETHER_HDR_LEN; + break; + case ETHERTYPE_IPV6: + return; + case ETHERTYPE_VLAN: + ipoff = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; + break; + } + + if (__predict_true(csum_flags & (CSUM_IP|CSUM_IP_TCP|CSUM_IP_UDP))) { + ip = (struct ip *)(m->m_data + ipoff); + iphl = ip->ip_hl<<2; + t->command |= MVNETA_TX_CMD_L3_IP4; + } else { + t->command |= MVNETA_TX_CMD_L4_CHECKSUM_NONE; + return; + } + + + /* L3 */ + if (csum_flags & CSUM_IP) { + t->command |= MVNETA_TX_CMD_IP4_CHECKSUM; + } + + /* L4 */ + if (csum_flags & CSUM_IP_TCP) { + t->command |= MVNETA_TX_CMD_L4_CHECKSUM_NOFRAG; + t->command |= MVNETA_TX_CMD_L4_TCP; + } else if (csum_flags & CSUM_IP_UDP) { + t->command |= MVNETA_TX_CMD_L4_CHECKSUM_NOFRAG; + t->command |= MVNETA_TX_CMD_L4_UDP; + } else + t->command |= MVNETA_TX_CMD_L4_CHECKSUM_NONE; + + t->l4ichk = 0; + t->command |= MVNETA_TX_CMD_IP_HEADER_LEN(iphl >> 2); + t->command |= MVNETA_TX_CMD_L3_OFFSET(ipoff); +} + +STATIC void +mvneta_tx_queue_complete(struct mvneta_softc *sc, int q) +{ + struct mvneta_tx_ring *tx; + struct mvneta_buf *txbuf; + struct mvneta_tx_desc *t; + uint32_t ptxs, ptxsu, ndesc; + int i; + + KASSERT_TX_MTX(sc, q); + + tx = MVNETA_TX_RING(sc, q); + if (__predict_false(tx->queue_status == MVNETA_QUEUE_DISABLED)) + return; + + ptxs = MVNETA_READ(sc, MVNETA_PTXS(q)); + ndesc = MVNETA_PTXS_GET_TBC(ptxs); + + if (__predict_false(ndesc == 0)) { + if (tx->used == 0) + tx->queue_status = MVNETA_QUEUE_IDLE; + else if (tx->queue_status == MVNETA_QUEUE_WORKING && + ((ticks - tx->watchdog_time) > MVNETA_WATCHDOG)) + tx->queue_hung = TRUE; + return; + } + +#ifdef MVNETA_KTR + CTR3(KTR_SPARE2, "%s:%u tx_complete begin ndesc=%u", + sc->ifp->if_xname, q, ndesc); +#endif + + bus_dmamap_sync(sc->tx_dtag, tx->desc_map, + BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE); + + for (i = 0; i < ndesc; i++) { + t = &tx->desc[tx->dma]; +#ifdef MVNETA_KTR + if (t->flags & MVNETA_TX_F_ES) + CTR3(KTR_SPARE2, "%s tx error queue %d desc %d", + sc->ifp->if_xname, q, tx->dma); +#endif + txbuf = &tx->txbuf[tx->dma]; + if (__predict_true(txbuf->m != NULL)) { + DASSERT((t->command & MVNETA_TX_CMD_F) != 0); + bus_dmamap_unload(sc->txmbuf_dtag, txbuf->dmap); + m_freem(txbuf->m); + txbuf->m = NULL; + } + else + DASSERT((t->flags & MVNETA_TX_CMD_F) == 0); + tx->dma = tx_counter_adv(tx->dma, 1); + tx->used--; + } + DASSERT(tx->used >= 0); + DASSERT(tx->used <= MVNETA_TX_RING_CNT); + while (__predict_false(ndesc > 255)) { + ptxsu = MVNETA_PTXSU_NORB(255); + MVNETA_WRITE(sc, MVNETA_PTXSU(q), ptxsu); + ndesc -= 255; + } + if (__predict_true(ndesc > 0)) { + ptxsu = MVNETA_PTXSU_NORB(ndesc); + MVNETA_WRITE(sc, MVNETA_PTXSU(q), ptxsu); + } +#ifdef MVNETA_KTR + CTR5(KTR_SPARE2, "%s:%u tx_complete tx_cpu=%d tx_dma=%d tx_used=%d", + sc->ifp->if_xname, q, tx->cpu, tx->dma, tx->used); +#endif + + tx->watchdog_time = ticks; + + if (tx->used == 0) + tx->queue_status = MVNETA_QUEUE_IDLE; +} + +/* + * Do a final TX complete when TX is idle. + */ +STATIC void +mvneta_tx_drain(struct mvneta_softc *sc) +{ + struct mvneta_tx_ring *tx; + int q; + + /* + * Handle trailing mbuf on TX queue. + * Check is done lockess to avoid TX path contention. + */ + for (q = 0; q < MVNETA_TX_QNUM_MAX; q++) { + tx = MVNETA_TX_RING(sc, q); + if ((ticks - tx->watchdog_time) > MVNETA_WATCHDOG_TXCOMP && + tx->used > 0) { + mvneta_tx_lockq(sc, q); + mvneta_tx_queue_complete(sc, q); + mvneta_tx_unlockq(sc, q); + } + } +} + +/* + * Rx Subroutines + */ +STATIC int +mvneta_rx(struct mvneta_softc *sc, int q, int count) +{ + uint32_t prxs, npkt; + int more; + + more = 0; + mvneta_rx_lockq(sc, q); + prxs = MVNETA_READ(sc, MVNETA_PRXS(q)); + npkt = MVNETA_PRXS_GET_ODC(prxs); + if (__predict_false(npkt == 0)) + goto out; + + if (count > 0 && npkt > count) { + more = 1; + npkt = count; + } + mvneta_rx_queue(sc, q, npkt); +out: + mvneta_rx_unlockq(sc, q); + return more; +} + +/* + * Helper routine for updating PRXSU register of a given queue. + * Handles number of processed descriptors bigger than maximum acceptable value. + */ +STATIC __inline void +mvneta_prxsu_update(struct mvneta_softc *sc, int q, int processed) +{ + uint32_t prxsu; + + while (__predict_false(processed > 255)) { + prxsu = MVNETA_PRXSU_NOOFPROCESSEDDESCRIPTORS(255); + MVNETA_WRITE(sc, MVNETA_PRXSU(q), prxsu); + processed -= 255; + } + prxsu = MVNETA_PRXSU_NOOFPROCESSEDDESCRIPTORS(processed); + MVNETA_WRITE(sc, MVNETA_PRXSU(q), prxsu); +} + +static __inline void +mvneta_prefetch(void *p) +{ + + __builtin_prefetch(p); +} + +STATIC void +mvneta_rx_queue(struct mvneta_softc *sc, int q, int npkt) +{ + struct ifnet *ifp; + struct mvneta_rx_ring *rx; + struct mvneta_rx_desc *r; + struct mvneta_buf *rxbuf; + struct mbuf *m; + struct lro_ctrl *lro; + struct lro_entry *queued; + void *pktbuf; + int i, pktlen, processed, ndma; + + KASSERT_RX_MTX(sc, q); + + ifp = sc->ifp; + rx = MVNETA_RX_RING(sc, q); + processed = 0; + + if (__predict_false(rx->queue_status == MVNETA_QUEUE_DISABLED)) + return; + + bus_dmamap_sync(sc->rx_dtag, rx->desc_map, + BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE); + + for (i = 0; i < npkt; i++) { + /* Prefetch next desc, rxbuf. */ + ndma = rx_counter_adv(rx->dma, 1); + mvneta_prefetch(&rx->desc[ndma]); + mvneta_prefetch(&rx->rxbuf[ndma]); + + /* get descriptor and packet */ + r = &rx->desc[rx->dma]; + rxbuf = &rx->rxbuf[rx->dma]; + m = rxbuf->m; + rxbuf->m = NULL; + DASSERT(m != NULL); + bus_dmamap_sync(sc->rxbuf_dtag, rxbuf->dmap, + BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(sc->rxbuf_dtag, rxbuf->dmap); + /* Prefetch mbuf header. */ + mvneta_prefetch(m); + + processed++; + /* Drop desc with error status or not in a single buffer. */ + DASSERT((r->status & (MVNETA_RX_F|MVNETA_RX_L)) == + (MVNETA_RX_F|MVNETA_RX_L)); + if (__predict_false((r->status & MVNETA_RX_ES) || + (r->status & (MVNETA_RX_F|MVNETA_RX_L)) != + (MVNETA_RX_F|MVNETA_RX_L))) + goto rx_error; + + /* + * [ OFF | MH | PKT | CRC ] + * bytecnt cover MH, PKT, CRC + */ + pktlen = r->bytecnt - ETHER_CRC_LEN - MVNETA_HWHEADER_SIZE; + pktbuf = (uint8_t *)r->bufptr_va + MVNETA_PACKET_OFFSET + + MVNETA_HWHEADER_SIZE; + + /* Prefetch mbuf data. */ + mvneta_prefetch(pktbuf); + + /* Write value to mbuf (avoid read). */ + m->m_data = pktbuf; + m->m_len = m->m_pkthdr.len = pktlen; + m->m_pkthdr.rcvif = ifp; + mvneta_rx_set_csumflag(ifp, r, m); + + /* Increase rx_dma before releasing the lock. */ + rx->dma = ndma; + + if (__predict_false(rx->lro_enabled && + ((r->status & MVNETA_RX_L3_IP) != 0) && + ((r->status & MVNETA_RX_L4_MASK) == MVNETA_RX_L4_TCP) && + (m->m_pkthdr.csum_flags & + (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == + (CSUM_DATA_VALID | CSUM_PSEUDO_HDR))) { + if (rx->lro.lro_cnt != 0) { + if (tcp_lro_rx(&rx->lro, m, 0) == 0) + goto rx_done; + } + } + + mvneta_rx_unlockq(sc, q); + (*ifp->if_input)(ifp, m); + mvneta_rx_lockq(sc, q); + /* + * Check whether this queue has been disabled in the + * meantime. If yes, then clear LRO and exit. + */ + if(__predict_false(rx->queue_status == MVNETA_QUEUE_DISABLED)) + goto rx_lro; +rx_done: + /* Refresh receive ring to avoid stall and minimize jitter. */ + if (processed >= MVNETA_RX_REFILL_COUNT) { + mvneta_prxsu_update(sc, q, processed); + mvneta_rx_queue_refill(sc, q); + processed = 0; + } + continue; +rx_error: + m_freem(m); + rx->dma = ndma; + /* Refresh receive ring to avoid stall and minimize jitter. */ + if (processed >= MVNETA_RX_REFILL_COUNT) { + mvneta_prxsu_update(sc, q, processed); + mvneta_rx_queue_refill(sc, q); + processed = 0; + } + } +#ifdef MVNETA_KTR + CTR3(KTR_SPARE2, "%s:%u %u packets received", ifp->if_xname, q, npkt); +#endif + /* DMA status update */ + mvneta_prxsu_update(sc, q, processed); + /* Refill the rest of buffers if there are any to refill */ + mvneta_rx_queue_refill(sc, q); + +rx_lro: + /* + * Flush any outstanding LRO work + */ + lro = &rx->lro; + while (__predict_false((queued = LIST_FIRST(&lro->lro_active)) != NULL)) { + LIST_REMOVE(LIST_FIRST((&lro->lro_active)), next); + tcp_lro_flush(lro, queued); + } +} + +STATIC void +mvneta_rx_buf_free(struct mvneta_softc *sc, struct mvneta_buf *rxbuf) +{ + + bus_dmamap_unload(sc->rxbuf_dtag, rxbuf->dmap); + /* This will remove all data at once */ + m_freem(rxbuf->m); +} + +STATIC void +mvneta_rx_queue_refill(struct mvneta_softc *sc, int q) +{ + struct mvneta_rx_ring *rx; + struct mvneta_rx_desc *r; + struct mvneta_buf *rxbuf; + bus_dma_segment_t segs; + struct mbuf *m; + uint32_t prxs, prxsu, ndesc; + int npkt, refill, nsegs, error; + + KASSERT_RX_MTX(sc, q); + + rx = MVNETA_RX_RING(sc, q); + prxs = MVNETA_READ(sc, MVNETA_PRXS(q)); + ndesc = MVNETA_PRXS_GET_NODC(prxs) + MVNETA_PRXS_GET_ODC(prxs); + refill = MVNETA_RX_RING_CNT - ndesc; +#ifdef MVNETA_KTR + CTR3(KTR_SPARE2, "%s:%u refill %u packets", sc->ifp->if_xname, q, + refill); +#endif + if (__predict_false(refill <= 0)) + return; + + for (npkt = 0; npkt < refill; npkt++) { + rxbuf = &rx->rxbuf[rx->cpu]; + m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); + if (__predict_false(m == NULL)) { + error = ENOBUFS; + break; + } + m->m_len = m->m_pkthdr.len = m->m_ext.ext_size; + + error = bus_dmamap_load_mbuf_sg(sc->rxbuf_dtag, rxbuf->dmap, + m, &segs, &nsegs, BUS_DMA_NOWAIT); + if (__predict_false(error != 0 || nsegs != 1)) { + KASSERT(1, ("Failed to load Rx mbuf DMA map")); + m_freem(m); + break; + } + + /* Add the packet to the ring */ + rxbuf->m = m; + r = &rx->desc[rx->cpu]; + r->bufptr_pa = segs.ds_addr; + r->bufptr_va = (uint32_t)m->m_data; + + rx->cpu = rx_counter_adv(rx->cpu, 1); + } + if (npkt == 0) { + if (refill == MVNETA_RX_RING_CNT) + rx->needs_refill = TRUE; + return; + } + + rx->needs_refill = FALSE; + bus_dmamap_sync(sc->rx_dtag, rx->desc_map, BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE); + + while (__predict_false(npkt > 255)) { + prxsu = MVNETA_PRXSU_NOOFNEWDESCRIPTORS(255); + MVNETA_WRITE(sc, MVNETA_PRXSU(q), prxsu); + npkt -= 255; + } + if (__predict_true(npkt > 0)) { + prxsu = MVNETA_PRXSU_NOOFNEWDESCRIPTORS(npkt); + MVNETA_WRITE(sc, MVNETA_PRXSU(q), prxsu); + } +} + +STATIC __inline void +mvneta_rx_set_csumflag(struct ifnet *ifp, + struct mvneta_rx_desc *r, struct mbuf *m) +{ + uint32_t csum_flags; + + csum_flags = 0; + if (__predict_false((r->status & + (MVNETA_RX_IP_HEADER_OK|MVNETA_RX_L3_IP)) == 0)) + return; /* not a IP packet */ + + /* L3 */ + if (__predict_true((r->status & MVNETA_RX_IP_HEADER_OK) == + MVNETA_RX_IP_HEADER_OK)) + csum_flags |= CSUM_L3_CALC|CSUM_L3_VALID; + + if (__predict_true((r->status & (MVNETA_RX_IP_HEADER_OK|MVNETA_RX_L3_IP)) == + (MVNETA_RX_IP_HEADER_OK|MVNETA_RX_L3_IP))) { + /* L4 */ + switch (r->status & MVNETA_RX_L4_MASK) { + case MVNETA_RX_L4_TCP: + case MVNETA_RX_L4_UDP: + csum_flags |= CSUM_L4_CALC; + if (__predict_true((r->status & + MVNETA_RX_L4_CHECKSUM_OK) == MVNETA_RX_L4_CHECKSUM_OK)) { + csum_flags |= CSUM_L4_VALID; + m->m_pkthdr.csum_data = htons(0xffff); + } + break; + case MVNETA_RX_L4_OTH: + default: + break; + } + } + m->m_pkthdr.csum_flags = csum_flags; +} + +/* + * MAC address filter + */ +STATIC void +mvneta_filter_setup(struct mvneta_softc *sc) +{ + struct ifnet *ifp; + uint32_t dfut[MVNETA_NDFUT], dfsmt[MVNETA_NDFSMT], dfomt[MVNETA_NDFOMT]; + uint32_t pxc; + int i; + + KASSERT_SC_MTX(sc); + + memset(dfut, 0, sizeof(dfut)); + memset(dfsmt, 0, sizeof(dfsmt)); + memset(dfomt, 0, sizeof(dfomt)); + + ifp = sc->ifp; + ifp->if_flags |= IFF_ALLMULTI; + if (ifp->if_flags & (IFF_ALLMULTI|IFF_PROMISC)) { + for (i = 0; i < MVNETA_NDFSMT; i++) { + dfsmt[i] = dfomt[i] = + MVNETA_DF(0, MVNETA_DF_QUEUE(0) | MVNETA_DF_PASS) | + MVNETA_DF(1, MVNETA_DF_QUEUE(0) | MVNETA_DF_PASS) | + MVNETA_DF(2, MVNETA_DF_QUEUE(0) | MVNETA_DF_PASS) | + MVNETA_DF(3, MVNETA_DF_QUEUE(0) | MVNETA_DF_PASS); + } + } + + pxc = MVNETA_READ(sc, MVNETA_PXC); + pxc &= ~(MVNETA_PXC_UPM | MVNETA_PXC_RXQ_MASK | MVNETA_PXC_RXQARP_MASK | + MVNETA_PXC_TCPQ_MASK | MVNETA_PXC_UDPQ_MASK | MVNETA_PXC_BPDUQ_MASK); + pxc |= MVNETA_PXC_RXQ(MVNETA_RX_QNUM_MAX-1); + pxc |= MVNETA_PXC_RXQARP(MVNETA_RX_QNUM_MAX-1); + pxc |= MVNETA_PXC_TCPQ(MVNETA_RX_QNUM_MAX-1); + pxc |= MVNETA_PXC_UDPQ(MVNETA_RX_QNUM_MAX-1); + pxc |= MVNETA_PXC_BPDUQ(MVNETA_RX_QNUM_MAX-1); + pxc |= MVNETA_PXC_RB | MVNETA_PXC_RBIP | MVNETA_PXC_RBARP; + if (ifp->if_flags & IFF_BROADCAST) { + pxc &= ~(MVNETA_PXC_RB | MVNETA_PXC_RBIP | MVNETA_PXC_RBARP); + } + if (ifp->if_flags & IFF_PROMISC) { + pxc |= MVNETA_PXC_UPM; + } + MVNETA_WRITE(sc, MVNETA_PXC, pxc); + + /* Set Destination Address Filter Unicast Table */ + if (ifp->if_flags & IFF_PROMISC) { + /* pass all unicast addresses */ + for (i = 0; i < MVNETA_NDFUT; i++) { + dfut[i] = + MVNETA_DF(0, MVNETA_DF_QUEUE(0) | MVNETA_DF_PASS) | + MVNETA_DF(1, MVNETA_DF_QUEUE(0) | MVNETA_DF_PASS) | + MVNETA_DF(2, MVNETA_DF_QUEUE(0) | MVNETA_DF_PASS) | + MVNETA_DF(3, MVNETA_DF_QUEUE(0) | MVNETA_DF_PASS); + } + } else { + i = sc->enaddr[5] & 0xf; /* last nibble */ + dfut[i>>2] = MVNETA_DF(i&3, MVNETA_DF_QUEUE(0) | MVNETA_DF_PASS); + } + MVNETA_WRITE_REGION(sc, MVNETA_DFUT(0), dfut, MVNETA_NDFUT); + + /* Set Destination Address Filter Multicast Tables */ + MVNETA_WRITE_REGION(sc, MVNETA_DFSMT(0), dfsmt, MVNETA_NDFSMT); + MVNETA_WRITE_REGION(sc, MVNETA_DFOMT(0), dfomt, MVNETA_NDFOMT); +} + +/* + * sysctl(9) + */ +STATIC int +sysctl_read_mib(SYSCTL_HANDLER_ARGS) +{ + struct mvneta_sysctl_mib *arg; + struct mvneta_softc *sc; + uint64_t val; + + arg = (struct mvneta_sysctl_mib *)arg1; + if (arg == NULL) + return (EINVAL); + + sc = arg->sc; + if (sc == NULL) + return (EINVAL); + if (arg->index < 0 || arg->index > MVNETA_PORTMIB_NOCOUNTER) + return (EINVAL); + + mvneta_sc_lock(sc); + val = arg->counter; + mvneta_sc_unlock(sc); + return sysctl_handle_64(oidp, &val, 0, req); +} + + +STATIC int +sysctl_clear_mib(SYSCTL_HANDLER_ARGS) +{ + struct mvneta_softc *sc; + int err, val; + + val = 0; + sc = (struct mvneta_softc *)arg1; + if (sc == NULL) + return (EINVAL); + + err = sysctl_handle_int(oidp, &val, 0, req); + if (err != 0) + return (err); + + if (val < 0 || val > 1) + return (EINVAL); + + if (val == 1) { + mvneta_sc_lock(sc); + mvneta_clear_mib(sc); + mvneta_sc_unlock(sc); + } + + return (0); +} + +STATIC int +sysctl_set_queue_rxthtime(SYSCTL_HANDLER_ARGS) +{ + struct mvneta_sysctl_queue *arg; + struct mvneta_rx_ring *rx; + struct mvneta_softc *sc; + uint32_t reg, time_mvtclk; + int err, time_us; + + rx = NULL; + arg = (struct mvneta_sysctl_queue *)arg1; + if (arg == NULL) + return (EINVAL); + if (arg->queue < 0 || arg->queue > MVNETA_RX_RING_CNT) + return (EINVAL); + if (arg->rxtx != MVNETA_SYSCTL_RX) + return (EINVAL); + + sc = arg->sc; + if (sc == NULL) + return (EINVAL); + + /* read queue length */ + mvneta_sc_lock(sc); + mvneta_rx_lockq(sc, arg->queue); + rx = MVNETA_RX_RING(sc, arg->queue); + time_mvtclk = rx->queue_th_time; + time_us = ((uint64_t)time_mvtclk * 1000ULL * 1000ULL) / get_tclk(); + mvneta_rx_unlockq(sc, arg->queue); + mvneta_sc_unlock(sc); + + err = sysctl_handle_int(oidp, &time_us, 0, req); + if (err != 0) + return (err); + + mvneta_sc_lock(sc); + mvneta_rx_lockq(sc, arg->queue); + + /* update queue length (0[sec] - 1[sec]) */ + if (time_us < 0 || time_us > (1000 * 1000)) { + mvneta_rx_unlockq(sc, arg->queue); + mvneta_sc_unlock(sc); + return (EINVAL); + } + time_mvtclk = + (uint64_t)get_tclk() * (uint64_t)time_us / (1000ULL * 1000ULL); + rx->queue_th_time = time_mvtclk; + reg = MVNETA_PRXITTH_RITT(rx->queue_th_time); + MVNETA_WRITE(sc, MVNETA_PRXITTH(arg->queue), reg); + mvneta_rx_unlockq(sc, arg->queue); + mvneta_sc_unlock(sc); + + return (0); +} + +STATIC void +sysctl_mvneta_init(struct mvneta_softc *sc) +{ + struct sysctl_ctx_list *ctx; + struct sysctl_oid_list *children; + struct sysctl_oid_list *rxchildren; + struct sysctl_oid_list *qchildren, *mchildren; + struct sysctl_oid *tree; + int i, q; + struct mvneta_sysctl_queue *rxarg; +#define MVNETA_SYSCTL_NAME(num) "queue" # num + static const char *sysctl_queue_names[] = { + MVNETA_SYSCTL_NAME(0), MVNETA_SYSCTL_NAME(1), + MVNETA_SYSCTL_NAME(2), MVNETA_SYSCTL_NAME(3), + MVNETA_SYSCTL_NAME(4), MVNETA_SYSCTL_NAME(5), + MVNETA_SYSCTL_NAME(6), MVNETA_SYSCTL_NAME(7), + }; +#undef MVNETA_SYSCTL_NAME + +#define MVNETA_SYSCTL_DESCR(num) "configuration parameters for queue " # num + static const char *sysctl_queue_descrs[] = { + MVNETA_SYSCTL_DESCR(0), MVNETA_SYSCTL_DESCR(1), + MVNETA_SYSCTL_DESCR(2), MVNETA_SYSCTL_DESCR(3), + MVNETA_SYSCTL_DESCR(4), MVNETA_SYSCTL_DESCR(5), + MVNETA_SYSCTL_DESCR(6), MVNETA_SYSCTL_DESCR(7), + }; +#undef MVNETA_SYSCTL_DESCR + + + ctx = device_get_sysctl_ctx(sc->dev); + children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); + + tree = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "rx", + CTLFLAG_RD, 0, "NETA RX"); + rxchildren = SYSCTL_CHILDREN(tree); + tree = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "mib", + CTLFLAG_RD, 0, "NETA MIB"); + mchildren = SYSCTL_CHILDREN(tree); + + + SYSCTL_ADD_INT(ctx, children, OID_AUTO, "flow_control", + CTLFLAG_RW, &sc->cf_fc, 0, "flow control"); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lpi", + CTLFLAG_RW, &sc->cf_lpi, 0, "Low Power Idle"); + + /* + * MIB access + */ + /* dev.mvneta.[unit].mib.<mibs> */ + for (i = 0; i < MVNETA_PORTMIB_NOCOUNTER; i++) { + const char *name = mvneta_mib_list[i].sysctl_name; + const char *desc = mvneta_mib_list[i].desc; + struct mvneta_sysctl_mib *mib_arg = &sc->sysctl_mib[i]; + + mib_arg->sc = sc; + mib_arg->index = i; + SYSCTL_ADD_PROC(ctx, mchildren, OID_AUTO, name, + CTLTYPE_U64|CTLFLAG_RD, (void *)mib_arg, 0, + sysctl_read_mib, "I", desc); + } + SYSCTL_ADD_UQUAD(ctx, mchildren, OID_AUTO, "rx_discard", + CTLFLAG_RD, &sc->counter_pdfc, "Port Rx Discard Frame Counter"); + SYSCTL_ADD_UQUAD(ctx, mchildren, OID_AUTO, "overrun", + CTLFLAG_RD, &sc->counter_pofc, "Port Overrun Frame Counter"); + SYSCTL_ADD_UINT(ctx, mchildren, OID_AUTO, "watchdog", + CTLFLAG_RD, &sc->counter_watchdog, 0, "TX Watchdog Counter"); + + SYSCTL_ADD_PROC(ctx, mchildren, OID_AUTO, "reset", + CTLTYPE_INT|CTLFLAG_RW, (void *)sc, 0, + sysctl_clear_mib, "I", "Reset MIB counters"); + + for (q = 0; q < MVNETA_RX_QNUM_MAX; q++) { + rxarg = &sc->sysctl_rx_queue[q]; + + rxarg->sc = sc; + rxarg->queue = q; + rxarg->rxtx = MVNETA_SYSCTL_RX; + + /* hw.mvneta.mvneta[unit].rx.[queue] */ + tree = SYSCTL_ADD_NODE(ctx, rxchildren, OID_AUTO, + sysctl_queue_names[q], CTLFLAG_RD, 0, + sysctl_queue_descrs[q]); + qchildren = SYSCTL_CHILDREN(tree); + + /* hw.mvneta.mvneta[unit].rx.[queue].threshold_timer_us */ + SYSCTL_ADD_PROC(ctx, qchildren, OID_AUTO, "threshold_timer_us", + CTLTYPE_UINT | CTLFLAG_RW, rxarg, 0, + sysctl_set_queue_rxthtime, "I", + "interrupt coalescing threshold timer [us]"); + } +} + +/* + * MIB + */ +STATIC void +mvneta_clear_mib(struct mvneta_softc *sc) +{ + int i; + + KASSERT_SC_MTX(sc); + + for (i = 0; i < nitems(mvneta_mib_list); i++) { + if (mvneta_mib_list[i].reg64) + MVNETA_READ_MIB_8(sc, mvneta_mib_list[i].regnum); + else + MVNETA_READ_MIB_4(sc, mvneta_mib_list[i].regnum); + sc->sysctl_mib[i].counter = 0; + } + MVNETA_READ(sc, MVNETA_PDFC); + sc->counter_pdfc = 0; + MVNETA_READ(sc, MVNETA_POFC); + sc->counter_pofc = 0; + sc->counter_watchdog = 0; +} + +STATIC void +mvneta_update_mib(struct mvneta_softc *sc) +{ + struct mvneta_tx_ring *tx; + int i; + uint64_t val; + uint32_t reg; + + for (i = 0; i < nitems(mvneta_mib_list); i++) { + + if (mvneta_mib_list[i].reg64) + val = MVNETA_READ_MIB_8(sc, mvneta_mib_list[i].regnum); + else + val = MVNETA_READ_MIB_4(sc, mvneta_mib_list[i].regnum); + + if (val == 0) + continue; + + sc->sysctl_mib[i].counter += val; + switch (mvneta_mib_list[i].regnum) { + case MVNETA_MIB_RX_GOOD_OCT: + //if_inc_counter(sc->ifp, IFCOUNTER_IBYTES, val); + break; + case MVNETA_MIB_RX_BAD_FRAME: + if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, val); + break; + case MVNETA_MIB_RX_GOOD_FRAME: + if_inc_counter(sc->ifp, IFCOUNTER_IPACKETS, val); + break; + case MVNETA_MIB_RX_MCAST_FRAME: + if_inc_counter(sc->ifp, IFCOUNTER_IMCASTS, val); + break; + case MVNETA_MIB_TX_GOOD_OCT: + //if_inc_counter(sc->ifp, IFCOUNTER_OBYTES, val); + break; + case MVNETA_MIB_TX_GOOD_FRAME: + if_inc_counter(sc->ifp, IFCOUNTER_OPACKETS, val); + break; + case MVNETA_MIB_TX_MCAST_FRAME: + if_inc_counter(sc->ifp, IFCOUNTER_OMCASTS, val); + break; + case MVNETA_MIB_MAC_COL: + if_inc_counter(sc->ifp, IFCOUNTER_COLLISIONS, val); + break; + case MVNETA_MIB_TX_MAC_TRNS_ERR: + case MVNETA_MIB_TX_EXCES_COL: + case MVNETA_MIB_MAC_LATE_COL: + if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, val); + break; + } + } + + reg = MVNETA_READ(sc, MVNETA_PDFC); + sc->counter_pdfc += reg; + if_inc_counter(sc->ifp, IFCOUNTER_IQDROPS, reg); + reg = MVNETA_READ(sc, MVNETA_POFC); + sc->counter_pofc += reg; + if_inc_counter(sc->ifp, IFCOUNTER_IQDROPS, reg); + + /* TX watchdog. */ + if (sc->counter_watchdog_mib > 0) { + if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, sc->counter_watchdog_mib); + sc->counter_watchdog_mib = 0; + } + /* + * TX driver errors: + * We do not take queue locks to not disrupt TX path. + * We may only miss one drv error which will be fixed at + * next mib update. We may also clear counter when TX path + * is incrementing it but we only do it if counter was not zero + * thus we may only loose one error. + */ + for (i = 0; i < MVNETA_TX_QNUM_MAX; i++) { + tx = MVNETA_TX_RING(sc, i); + + if (tx->drv_error > 0) { + if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, tx->drv_error); + tx->drv_error = 0; + } + } +} diff --git a/sys/dev/neta/if_mvneta_fdt.c b/sys/dev/neta/if_mvneta_fdt.c new file mode 100644 index 0000000..67b31bd --- /dev/null +++ b/sys/dev/neta/if_mvneta_fdt.c @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2017 Stormshield. + * Copyright (c) 2017 Semihalf. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "opt_platform.h" +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/bus.h> +#include <sys/rman.h> +#include <sys/socket.h> +#include <sys/taskqueue.h> + +#include <net/ethernet.h> +#include <net/if.h> +#include <net/if_media.h> + +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/tcp_lro.h> + +#include <machine/bus.h> +#include <machine/resource.h> + +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> + +#include <dev/mii/mii.h> +#include <dev/mii/miivar.h> + +#include "if_mvnetareg.h" +#include "if_mvnetavar.h" + +#define PHY_MODE_MAXLEN 10 +#define INBAND_STATUS_MAXLEN 16 + +static int mvneta_fdt_probe(device_t); +static int mvneta_fdt_attach(device_t); + +static device_method_t mvneta_fdt_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, mvneta_fdt_probe), + DEVMETHOD(device_attach, mvneta_fdt_attach), + + /* End */ + DEVMETHOD_END +}; + +DEFINE_CLASS_1(mvneta, mvneta_fdt_driver, mvneta_fdt_methods, + sizeof(struct mvneta_softc), mvneta_driver); + +static devclass_t mvneta_fdt_devclass; + +DRIVER_MODULE(mvneta, ofwbus, mvneta_fdt_driver, mvneta_fdt_devclass, 0, 0); +DRIVER_MODULE(mvneta, simplebus, mvneta_fdt_driver, mvneta_fdt_devclass, 0, 0); + +static int mvneta_fdt_phy_acquire(device_t); + +static int +mvneta_fdt_probe(device_t dev) +{ + + if (!ofw_bus_status_okay(dev)) + return (ENXIO); + + if (!ofw_bus_is_compatible(dev, "marvell,armada-370-neta")) + return (ENXIO); + + device_set_desc(dev, "NETA controller"); + return (BUS_PROBE_DEFAULT); +} + +static int +mvneta_fdt_attach(device_t dev) +{ + int err; + + /* Try to fetch PHY information from FDT */ + err = mvneta_fdt_phy_acquire(dev); + if (err != 0) + return (err); + + return (mvneta_attach(dev)); +} + +static int +mvneta_fdt_phy_acquire(device_t dev) +{ + struct mvneta_softc *sc; + phandle_t node, child, phy_handle; + char phymode[PHY_MODE_MAXLEN]; + char managed[INBAND_STATUS_MAXLEN]; + char *name; + + sc = device_get_softc(dev); + node = ofw_bus_get_node(dev); + + /* PHY mode is crucial */ + if (OF_getprop(node, "phy-mode", phymode, sizeof(phymode)) <= 0) { + device_printf(dev, "Failed to acquire PHY mode from FDT.\n"); + return (ENXIO); + } + + if (strncmp(phymode, "rgmii-id", 8) == 0) + sc->phy_mode = MVNETA_PHY_RGMII_ID; + else if (strncmp(phymode, "rgmii", 5) == 0) + sc->phy_mode = MVNETA_PHY_RGMII; + else if (strncmp(phymode, "sgmii", 5) == 0) + sc->phy_mode = MVNETA_PHY_SGMII; + else if (strncmp(phymode, "qsgmii", 6) == 0) + sc->phy_mode = MVNETA_PHY_QSGMII; + else + sc->phy_mode = MVNETA_PHY_SGMII; + + /* Check if in-band link status will be used */ + if (OF_getprop(node, "managed", managed, sizeof(managed)) > 0) { + if (strncmp(managed, "in-band-status", 14) == 0) { + sc->use_inband_status = TRUE; + device_printf(dev, "Use in-band link status.\n"); + return (0); + } + } + + if (OF_getencprop(node, "phy", (void *)&phy_handle, + sizeof(phy_handle)) <= 0) { + /* Test for fixed-link (present i.e. in 388-gp) */ + for (child = OF_child(node); child != 0; child = OF_peer(child)) { + if (OF_getprop_alloc(child, + "name", 1, (void **)&name) <= 0) { + continue; + } + if (strncmp(name, "fixed-link", 10) == 0) { + free(name, M_OFWPROP); + if (OF_getencprop(child, "speed", + &sc->phy_speed, sizeof(sc->phy_speed)) <= 0) { + if (bootverbose) { + device_printf(dev, + "No PHY information.\n"); + } + return (ENXIO); + } + if (OF_hasprop(child, "full-duplex")) + sc->phy_fdx = TRUE; + else + sc->phy_fdx = FALSE; + + /* Keep this flag just for the record */ + sc->phy_addr = MII_PHY_ANY; + + return (0); + } + free(name, M_OFWPROP); + } + if (bootverbose) { + device_printf(dev, + "Could not find PHY information in FDT.\n"); + } + return (ENXIO); + } else { + phy_handle = OF_instance_to_package(phy_handle); + if (OF_getencprop(phy_handle, "reg", &sc->phy_addr, + sizeof(sc->phy_addr)) <= 0) { + device_printf(dev, + "Could not find PHY address in FDT.\n"); + return (ENXIO); + } + } + + return (0); +} + +int +mvneta_fdt_mac_address(struct mvneta_softc *sc, uint8_t *addr) +{ + phandle_t node; + uint8_t lmac[ETHER_ADDR_LEN]; + uint8_t zeromac[] = {[0 ... (ETHER_ADDR_LEN - 1)] = 0}; + int len; + + /* + * Retrieve hw address from the device tree. + */ + node = ofw_bus_get_node(sc->dev); + if (node == 0) + return (ENXIO); + + len = OF_getprop(node, "local-mac-address", (void *)lmac, sizeof(lmac)); + if (len != ETHER_ADDR_LEN) + return (ENOENT); + + if (memcmp(lmac, zeromac, ETHER_ADDR_LEN) == 0) { + /* Invalid MAC address (all zeros) */ + return (EINVAL); + } + memcpy(addr, lmac, ETHER_ADDR_LEN); + + return (0); +} diff --git a/sys/dev/neta/if_mvnetareg.h b/sys/dev/neta/if_mvnetareg.h new file mode 100644 index 0000000..ccc4644 --- /dev/null +++ b/sys/dev/neta/if_mvnetareg.h @@ -0,0 +1,926 @@ +/* + * Copyright (c) 2017 Stormshield. + * Copyright (c) 2017 Semihalf. + * Copyright (c) 2015 Internet Initiative Japan Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef _IF_MVNETAREG_H_ +#define _IF_MVNETAREG_H_ + +#if BYTE_ORDER == BIG_ENDIAN +#error "BIG ENDIAN not supported" +#endif + +#define MVNETA_SIZE 0x4000 + +#define MVNETA_NWINDOW 6 +#define MVNETA_NREMAP 4 + +#define MVNETA_MAX_QUEUE_SIZE 8 +#define MVNETA_RX_QNUM_MAX 1 +/* XXX: Currently multi-queue can be used on the Tx side only */ +#ifdef MVNETA_MULTIQUEUE +#define MVNETA_TX_QNUM_MAX 2 +#else +#define MVNETA_TX_QNUM_MAX 1 +#endif + +#if MVNETA_TX_QNUM_MAX & (MVNETA_TX_QNUM_MAX - 1) != 0 +#error "MVNETA_TX_QNUM_MAX Should be a power of 2" +#endif +#if MVNETA_RX_QNUM_MAX & (MVNETA_RX_QNUM_MAX - 1) != 0 +#error "MVNETA_RX_QNUM_MAX Should be a power of 2" +#endif + +#define MVNETA_QUEUE(n) (1 << (n)) +#define MVNETA_QUEUE_ALL 0xff +#define MVNETA_TX_QUEUE_ALL ((1<<MVNETA_TX_QNUM_MAX)-1) +#define MVNETA_RX_QUEUE_ALL ((1<<MVNETA_RX_QNUM_MAX)-1) + +/* + * Ethernet Unit Registers + * GbE0 BASE 0x00007.0000 SIZE 0x4000 + * GbE1 BASE 0x00007.4000 SIZE 0x4000 + * + * TBD: reasonable bus space submapping.... + */ +/* Address Decoder Registers */ +#define MVNETA_BASEADDR(n) (0x2200 + ((n) << 3)) /* Base Address */ +#define MVNETA_S(n) (0x2204 + ((n) << 3)) /* Size */ +#define MVNETA_HA(n) (0x2280 + ((n) << 2)) /* High Address Remap */ +#define MVNETA_BARE 0x2290 /* Base Address Enable */ +#define MVNETA_EPAP 0x2294 /* Ethernet Port Access Protect */ + +/* Global Miscellaneous Registers */ +#define MVNETA_PHYADDR 0x2000 +#define MVNETA_SMI 0x2004 +#define MVNETA_EUDA 0x2008 /* Ethernet Unit Default Address */ +#define MVNETA_EUDID 0x200c /* Ethernet Unit Default ID */ +#define MVNETA_MBUS_CONF 0x2010 /* MBUS configuration */ +#define MVNETA_MBUS_RETRY_EN 0x20 /* MBUS transactions retry enable */ +#define MVNETA_EUIC 0x2080 /* Ethernet Unit Interrupt Cause */ +#define MVNETA_EUIM 0x2084 /* Ethernet Unit Interrupt Mask */ +#define MVNETA_EUEA 0x2094 /* Ethernet Unit Error Address */ +#define MVNETA_EUIAE 0x2098 /* Ethernet Unit Internal Addr Error */ +#define MVNETA_EUC 0x20b0 /* Ethernet Unit Control */ + +/* Miscellaneous Registers */ +#define MVNETA_SDC 0x241c /* SDMA Configuration */ + +/* Networking Controller Miscellaneous Registers */ +#define MVNETA_PACC 0x2500 /* Port Acceleration Mode */ +#define MVNETA_PV 0x25bc /* Port Version */ + +/* Rx DMA Hardware Parser Registers */ +#define MVNETA_EVLANE 0x2410 /* VLAN EtherType */ +#define MVNETA_MACAL 0x2414 /* MAC Address Low */ +#define MVNETA_MACAH 0x2418 /* MAC Address High */ +#define MVNETA_NDSCP 7 +#define MVNETA_DSCP(n) (0x2420 + ((n) << 2)) +#define MVNETA_VPT2P 0x2440 /* VLAN Priority Tag to Priority */ +#define MVNETA_ETP 0x24bc /* Ethernet Type Priority */ +#define MVNETA_NDFSMT 64 +#define MVNETA_DFSMT(n) (0x3400 + ((n) << 2)) + /* Destination Address Filter Special Multicast Table */ +#define MVNETA_NDFOMT 64 +#define MVNETA_DFOMT(n) (0x3500 + ((n) << 2)) + /* Destination Address Filter Other Multicast Table */ +#define MVNETA_NDFUT 4 +#define MVNETA_DFUT(n) (0x3600 + ((n) << 2)) + /* Destination Address Filter Unicast Table */ + +/* Rx DMA Miscellaneous Registers */ +#define MVNETA_PMFS 0x247c /* Port Rx Minimal Frame Size */ +#define MVNETA_PDFC 0x2484 /* Port Rx Discard Frame Counter */ +#define MVNETA_POFC 0x2488 /* Port Overrun Frame Counter */ +#define MVNETA_RQC 0x2680 /* Receive Queue Command */ + +/* Rx DMA Networking Controller Miscellaneous Registers */ +#define MVNETA_PRXC(q) (0x1400 + ((q) << 2)) /*Port RX queues Config*/ +#define MVNETA_PRXSNP(q) (0x1420 + ((q) << 2)) /* Port RX queues Snoop */ +#define MVNETA_PRXDQA(q) (0x1480 + ((q) << 2)) /*P RXqueues desc Q Addr*/ +#define MVNETA_PRXDQS(q) (0x14a0 + ((q) << 2)) /*P RXqueues desc Q Size*/ +#define MVNETA_PRXDQTH(q) (0x14c0 + ((q) << 2)) /*P RXqueues desc Q Thrs*/ +#define MVNETA_PRXS(q) (0x14e0 + ((q) << 2)) /*Port RX queues Status */ +#define MVNETA_PRXSU(q) (0x1500 + ((q) << 2)) /*P RXqueues Stat Update*/ +#define MVNETA_PRXDI(q) (0x1520 + ((q) << 2)) /*P RXqueues Stat Update*/ +#define MVNETA_PRXINIT 0x1cc0 /* Port RX Initialization */ + +/* Rx DMA Wake on LAN Registers 0x3690 - 0x36b8 */ + +/* Tx DMA Miscellaneous Registers */ +#define MVNETA_TQC 0x2448 /* Transmit Queue Command */ +#define MVNETA_TQC_1 0x24e4 +#define MVNETA_PXTFTT 0x2478 /* Port Tx FIFO Threshold */ +#define MVNETA_TXBADFCS 0x3cc0 /*Tx Bad FCS Transmitted Pckts Counter*/ +#define MVNETA_TXDROPPED 0x3cc4 /* Tx Dropped Packets Counter */ + +/* Tx DMA Networking Controller Miscellaneous Registers */ +#define MVNETA_PTXDQA(q) (0x3c00 + ((q) << 2)) /*P TXqueues desc Q Addr*/ +#define MVNETA_PTXDQS(q) (0x3c20 + ((q) << 2)) /*P TXqueues desc Q Size*/ +#define MVNETA_PTXS(q) (0x3c40 + ((q) << 2)) /* Port TX queues Status*/ +#define MVNETA_PTXSU(q) (0x3c60 + ((q) << 2)) /*P TXqueues Stat Update*/ +#define MVNETA_PTXDI(q) (0x3c80 + ((q) << 2)) /* P TXqueues Desc Index*/ +#define MVNETA_TXTBC(q) (0x3ca0 + ((q) << 2)) /* TX Trans-ed Buf Count*/ +#define MVNETA_PTXINIT 0x3cf0 /* Port TX Initialization */ + +/* Tx DMA Packet Modification Registers */ +#define MVNETA_NMH 15 +#define MVNETA_TXMH(n) (0x3d44 + ((n) << 2)) +#define MVNETA_TXMTU 0x3d88 + +/* Tx DMA Queue Arbiter Registers (Version 1) */ +#define MVNETA_TQFPC_V1 0x24dc /* Transmit Queue Fixed Priority Cfg */ +#define MVNETA_TQTBC_V1 0x24e0 /* Transmit Queue Token-Bucket Cfg */ +#define MVNETA_MTU_V1 0x24e8 /* MTU */ +#define MVNETA_PMTBS_V1 0x24ec /* Port Max Token-Bucket Size */ +#define MVNETA_TQTBCOUNT_V1(q) (0x2700 + ((q) << 4)) + /* Transmit Queue Token-Bucket Counter */ +#define MVNETA_TQTBCONFIG_V1(q) (0x2704 + ((q) << 4)) + /* Transmit Queue Token-Bucket Configuration */ +#define MVNETA_PTTBC_V1 0x2740 /* Port Transmit Backet Counter */ + +/* Tx DMA Queue Arbiter Registers (Version 3) */ +#define MVNETA_TQC1_V3 0x3e00 /* Transmit Queue Command1 */ +#define MVNETA_TQFPC_V3 0x3e04 /* Transmit Queue Fixed Priority Cfg */ +#define MVNETA_BRC_V3 0x3e08 /* Basic Refill No of Clocks */ +#define MVNETA_MTU_V3 0x3e0c /* MTU */ +#define MVNETA_PREFILL_V3 0x3e10 /* Port Backet Refill */ +#define MVNETA_PMTBS_V3 0x3e14 /* Port Max Token-Bucket Size */ +#define MVNETA_QREFILL_V3(q) (0x3e20 + ((q) << 2)) + /* Transmit Queue Refill */ +#define MVNETA_QMTBS_V3(q) (0x3e40 + ((q) << 2)) + /* Transmit Queue Max Token-Bucket Size */ +#define MVNETA_QTTBC_V3(q) (0x3e60 + ((q) << 2)) + /* Transmit Queue Token-Bucket Counter */ +#define MVNETA_TQAC_V3(q) (0x3e80 + ((q) << 2)) + /* Transmit Queue Arbiter Cfg */ +#define MVNETA_TQIPG_V3(q) (0x3ea0 + ((q) << 2)) + /* Transmit Queue IPG(valid q=2..3) */ +#define MVNETA_HITKNINLOPKT_V3 0x3eb0 /* High Token in Low Packet */ +#define MVNETA_HITKNINASYNCPKT_V3 0x3eb4 /* High Token in Async Packet */ +#define MVNETA_LOTKNINASYNCPKT_V3 0x3eb8 /* Low Token in Async Packet */ +#define MVNETA_TS_V3 0x3ebc /* Token Speed */ + +/* RX_TX DMA Registers */ +#define MVNETA_PXC 0x2400 /* Port Configuration */ +#define MVNETA_PXCX 0x2404 /* Port Configuration Extend */ +#define MVNETA_MH 0x2454 /* Marvell Header */ + +/* Serial(SMI/MII) Registers */ +#define MVNETA_PSC0 0x243c /* Port Serial Control0 */ +#define MVNETA_PS0 0x2444 /* Ethernet Port Status */ +#define MVNETA_PSERDESCFG 0x24a0 /* Serdes Configuration */ +#define MVNETA_PSERDESSTS 0x24a4 /* Serdes Status */ +#define MVNETA_PSOMSCD 0x24f4 /* One mS Clock Divider */ +#define MVNETA_PSPFCCD 0x24f8 /* Periodic Flow Control Clock Divider*/ + +/* Gigabit Ethernet MAC Serial Parameters Configuration Registers */ +#define MVNETA_PSPC 0x2c14 /* Port Serial Parameters Config */ +#define MVNETA_PSP1C 0x2c94 /* Port Serial Parameters 1 Config */ + +/* Gigabit Ethernet Auto-Negotiation Configuration Registers */ +#define MVNETA_PANC 0x2c0c /* Port Auto-Negotiation Configuration*/ + +/* Gigabit Ethernet MAC Control Registers */ +#define MVNETA_PMACC0 0x2c00 /* Port MAC Control 0 */ +#define MVNETA_PMACC1 0x2c04 /* Port MAC Control 1 */ +#define MVNETA_PMACC2 0x2c08 /* Port MAC Control 2 */ +#define MVNETA_PMACC3 0x2c48 /* Port MAC Control 3 */ +#define MVNETA_CCFCPST(p) (0x2c58 + ((p) << 2)) /*CCFC Port Speed Timerp*/ +#define MVNETA_PMACC4 0x2c90 /* Port MAC Control 4 */ + +/* Gigabit Ethernet MAC Interrupt Registers */ +#define MVNETA_PIC 0x2c20 +#define MVNETA_PIM 0x2c24 + +/* Gigabit Ethernet Low Power Idle Registers */ +#define MVNETA_LPIC0 0x2cc0 /* LowPowerIdle control 0 */ +#define MVNETA_LPIC1 0x2cc4 /* LPI control 1 */ +#define MVNETA_LPIC2 0x2cc8 /* LPI control 2 */ +#define MVNETA_LPIS 0x2ccc /* LPI status */ +#define MVNETA_LPIC 0x2cd0 /* LPI counter */ + +/* Gigabit Ethernet MAC PRBS Check Status Registers */ +#define MVNETA_PPRBSS 0x2c38 /* Port PRBS Status */ +#define MVNETA_PPRBSEC 0x2c3c /* Port PRBS Error Counter */ + +/* Gigabit Ethernet MAC Status Registers */ +#define MVNETA_PSR 0x2c10 /* Port Status Register0 */ + +/* Networking Controller Interrupt Registers */ +#define MVNETA_PCP2Q(cpu) (0x2540 + ((cpu) << 2)) +#define MVNETA_PRXITTH(q) (0x2580 + ((q) << 2)) + /* Port Rx Interrupt Threshold */ +#define MVNETA_PRXTXTIC 0x25a0 /*Port RX_TX Threshold Interrupt Cause*/ +#define MVNETA_PRXTXTIM 0x25a4 /*Port RX_TX Threshold Interrupt Mask */ +#define MVNETA_PRXTXIC 0x25a8 /* Port RX_TX Interrupt Cause */ +#define MVNETA_PRXTXIM 0x25ac /* Port RX_TX Interrupt Mask */ +#define MVNETA_PMIC 0x25b0 /* Port Misc Interrupt Cause */ +#define MVNETA_PMIM 0x25b4 /* Port Misc Interrupt Mask */ +#define MVNETA_PIE 0x25b8 /* Port Interrupt Enable */ +#define MVNETA_PSNPCFG 0x25e4 /* Port Snoop Config */ +#define MVNETA_PSNPCFG_DESCSNP_MASK (0x3 << 4) +#define MVNETA_PSNPCFG_BUFSNP_MASK (0x3 << 8) + +/* Miscellaneous Interrupt Registers */ +#define MVNETA_PEUIAE 0x2494 /* Port Internal Address Error */ + +/* SGMII PHY Registers */ +#define MVNETA_PPLLC 0x2e04 /* Power and PLL Control */ +#define MVNETA_TESTC0 0x2e54 /* PHY Test Control 0 */ +#define MVNETA_TESTPRBSEC0 0x2e7c /* PHY Test PRBS Error Counter 0 */ +#define MVNETA_TESTPRBSEC1 0x2e80 /* PHY Test PRBS Error Counter 1 */ +#define MVNETA_TESTOOB0 0x2e84 /* PHY Test OOB 0 */ +#define MVNETA_DLE 0x2e8c /* Digital Loopback Enable */ +#define MVNETA_RCS 0x2f18 /* Reference Clock Select */ +#define MVNETA_COMPHYC 0x2f18 /* COMPHY Control */ + +/* + * Ethernet MAC MIB Registers + * GbE0 BASE 0x00007.3000 + * GbE1 BASE 0x00007.7000 + */ +/* MAC MIB Counters 0x3000 - 0x307c */ +#define MVNETA_PORTMIB_BASE 0x3000 +#define MVNETA_PORTMIB_SIZE 0x0080 +#define MVNETA_PORTMIB_NOCOUNTER 30 + +/* Rx */ +#define MVNETA_MIB_RX_GOOD_OCT 0x00 /* 64bit */ +#define MVNETA_MIB_RX_BAD_OCT 0x08 +#define MVNETA_MIB_RX_GOOD_FRAME 0x10 +#define MVNETA_MIB_RX_BAD_FRAME 0x14 +#define MVNETA_MIB_RX_BCAST_FRAME 0x18 +#define MVNETA_MIB_RX_MCAST_FRAME 0x1c +#define MVNETA_MIB_RX_FRAME64_OCT 0x20 +#define MVNETA_MIB_RX_FRAME127_OCT 0x24 +#define MVNETA_MIB_RX_FRAME255_OCT 0x28 +#define MVNETA_MIB_RX_FRAME511_OCT 0x2c +#define MVNETA_MIB_RX_FRAME1023_OCT 0x30 +#define MVNETA_MIB_RX_FRAMEMAX_OCT 0x34 + +/* Tx */ +#define MVNETA_MIB_TX_MAC_TRNS_ERR 0x0c +#define MVNETA_MIB_TX_GOOD_OCT 0x38 /* 64bit */ +#define MVNETA_MIB_TX_GOOD_FRAME 0x40 +#define MVNETA_MIB_TX_EXCES_COL 0x44 +#define MVNETA_MIB_TX_MCAST_FRAME 0x48 +#define MVNETA_MIB_TX_BCAST_FRAME 0x4c +#define MVNETA_MIB_TX_MAC_CTL_ERR 0x50 + +/* Flow Control */ +#define MVNETA_MIB_FC_SENT 0x54 +#define MVNETA_MIB_FC_GOOD 0x58 +#define MVNETA_MIB_FC_BAD 0x5c + +/* Packet Processing */ +#define MVNETA_MIB_PKT_UNDERSIZE 0x60 +#define MVNETA_MIB_PKT_FRAGMENT 0x64 +#define MVNETA_MIB_PKT_OVERSIZE 0x68 +#define MVNETA_MIB_PKT_JABBER 0x6c + +/* MAC Layer Errors */ +#define MVNETA_MIB_MAC_RX_ERR 0x70 +#define MVNETA_MIB_MAC_CRC_ERR 0x74 +#define MVNETA_MIB_MAC_COL 0x78 +#define MVNETA_MIB_MAC_LATE_COL 0x7c + +/* END OF REGISTER NUMBERS */ + +/* + * + * Register Formats + * + */ +/* + * Address Decoder Registers + */ +/* Base Address (MVNETA_BASEADDR) */ +#define MVNETA_BASEADDR_TARGET(target) ((target) & 0xf) +#define MVNETA_BASEADDR_ATTR(attr) (((attr) & 0xff) << 8) +#define MVNETA_BASEADDR_BASE(base) ((base) & 0xffff0000) + +/* Size (MVNETA_S) */ +#define MVNETA_S_SIZE(size) (((size) - 1) & 0xffff0000) + +/* Base Address Enable (MVNETA_BARE) */ +#define MVNETA_BARE_EN_MASK ((1 << MVNETA_NWINDOW) - 1) +#define MVNETA_BARE_EN(win) ((1 << (win)) & MVNETA_BARE_EN_MASK) + +/* Ethernet Port Access Protect (MVNETA_EPAP) */ +#define MVNETA_EPAP_AC_NAC 0x0 /* No access allowed */ +#define MVNETA_EPAP_AC_RO 0x1 /* Read Only */ +#define MVNETA_EPAP_AC_FA 0x3 /* Full access (r/w) */ +#define MVNETA_EPAP_EPAR(win, ac) ((ac) << ((win) * 2)) + +/* + * Global Miscellaneous Registers + */ +/* PHY Address (MVNETA_PHYADDR) */ +#define MVNETA_PHYADDR_PHYAD(phy) ((phy) & 0x1f) +#define MVNETA_PHYADDR_GET_PHYAD(reg) ((reg) & 0x1f) + +/* SMI register fields (MVNETA_SMI) */ +#define MVNETA_SMI_DATA_MASK 0x0000ffff +#define MVNETA_SMI_PHYAD(phy) (((phy) & 0x1f) << 16) +#define MVNETA_SMI_REGAD(reg) (((reg) & 0x1f) << 21) +#define MVNETA_SMI_OPCODE_WRITE (0 << 26) +#define MVNETA_SMI_OPCODE_READ (1 << 26) +#define MVNETA_SMI_READVALID (1 << 27) +#define MVNETA_SMI_BUSY (1 << 28) + +/* Ethernet Unit Default ID (MVNETA_EUDID) */ +#define MVNETA_EUDID_DIDR_MASK 0x0000000f +#define MVNETA_EUDID_DIDR(id) ((id) & 0x0f) +#define MVNETA_EUDID_DATTR_MASK 0x00000ff0 +#define MVNETA_EUDID_DATTR(attr) (((attr) & 0xff) << 4) + +/* Ethernet Unit Interrupt Cause (MVNETA_EUIC) */ +#define MVNETA_EUIC_ETHERINTSUM (1 << 0) +#define MVNETA_EUIC_PARITY (1 << 1) +#define MVNETA_EUIC_ADDRVIOL (1 << 2) +#define MVNETA_EUIC_ADDRVNOMATCH (1 << 3) +#define MVNETA_EUIC_SMIDONE (1 << 4) +#define MVNETA_EUIC_COUNTWA (1 << 5) +#define MVNETA_EUIC_INTADDRERR (1 << 7) +#define MVNETA_EUIC_PORT0DPERR (1 << 9) +#define MVNETA_EUIC_TOPDPERR (1 << 12) + +/* Ethernet Unit Internal Addr Error (MVNETA_EUIAE) */ +#define MVNETA_EUIAE_INTADDR_MASK 0x000001ff +#define MVNETA_EUIAE_INTADDR(addr) ((addr) & 0x1ff) +#define MVNETA_EUIAE_GET_INTADDR(addr) ((addr) & 0x1ff) + +/* Ethernet Unit Control (MVNETA_EUC) */ +#define MVNETA_EUC_POLLING (1 << 1) +#define MVNETA_EUC_PORTRESET (1 << 24) +#define MVNETA_EUC_RAMSINITIALIZATIONCOMPLETED (1 << 25) + +/* + * Miscellaneous Registers + */ +/* SDMA Configuration (MVNETA_SDC) */ +#define MVNETA_SDC_RXBSZ(x) ((x) << 1) +#define MVNETA_SDC_RXBSZ_MASK MVNETA_SDC_RXBSZ(7) +#define MVNETA_SDC_RXBSZ_1_64BITWORDS MVNETA_SDC_RXBSZ(0) +#define MVNETA_SDC_RXBSZ_2_64BITWORDS MVNETA_SDC_RXBSZ(1) +#define MVNETA_SDC_RXBSZ_4_64BITWORDS MVNETA_SDC_RXBSZ(2) +#define MVNETA_SDC_RXBSZ_8_64BITWORDS MVNETA_SDC_RXBSZ(3) +#define MVNETA_SDC_RXBSZ_16_64BITWORDS MVNETA_SDC_RXBSZ(4) +#define MVNETA_SDC_BLMR (1 << 4) +#define MVNETA_SDC_BLMT (1 << 5) +#define MVNETA_SDC_SWAPMODE (1 << 6) +#define MVNETA_SDC_TXBSZ(x) ((x) << 22) +#define MVNETA_SDC_TXBSZ_MASK MVNETA_SDC_TXBSZ(7) +#define MVNETA_SDC_TXBSZ_1_64BITWORDS MVNETA_SDC_TXBSZ(0) +#define MVNETA_SDC_TXBSZ_2_64BITWORDS MVNETA_SDC_TXBSZ(1) +#define MVNETA_SDC_TXBSZ_4_64BITWORDS MVNETA_SDC_TXBSZ(2) +#define MVNETA_SDC_TXBSZ_8_64BITWORDS MVNETA_SDC_TXBSZ(3) +#define MVNETA_SDC_TXBSZ_16_64BITWORDS MVNETA_SDC_TXBSZ(4) + +/* + * Networking Controller Miscellaneous Registers + */ +/* Port Acceleration Mode (MVNETA_PACC) */ +#define MVNETA_PACC_ACCELERATIONMODE_MASK 0x7 +#define MVNETA_PACC_ACCELERATIONMODE_EDM 0x1 /* Enhanced Desc Mode */ + +/* Port Version (MVNETA_PV) */ +#define MVNETA_PV_VERSION_MASK 0xff +#define MVNETA_PV_VERSION(v) ((v) & 0xff) +#define MVNETA_PV_GET_VERSION(reg) ((reg) & 0xff) + +/* + * Rx DMA Hardware Parser Registers + */ +/* Ether Type Priority (MVNETA_ETP) */ +#define MVNETA_ETP_ETHERTYPEPRIEN (1 << 0) /* EtherType Prio Ena */ +#define MVNETA_ETP_ETHERTYPEPRIFRSTEN (1 << 1) +#define MVNETA_ETP_ETHERTYPEPRIQ (0x7 << 2) /*EtherType Prio Queue*/ +#define MVNETA_ETP_ETHERTYPEPRIVAL (0xffff << 5) /*EtherType Prio Value*/ +#define MVNETA_ETP_FORCEUNICSTHIT (1 << 21) /* Force Unicast hit */ + +/* Destination Address Filter Registers (MVNETA_DF{SM,OM,U}T) */ +#define MVNETA_DF(n, x) ((x) << (8 * (n))) +#define MVNETA_DF_PASS (1 << 0) +#define MVNETA_DF_QUEUE(q) ((q) << 1) +#define MVNETA_DF_QUEUE_ALL ((MVNETA_RX_QNUM_MAX-1) << 1) +#define MVNETA_DF_QUEUE_MASK ((MVNETA_RX_QNUM_MAX-1) << 1) + +/* + * Rx DMA Miscellaneous Registers + */ +/* Port Rx Minimal Frame Size (MVNETA_PMFS) */ +#define MVNETA_PMFS_RXMFS(rxmfs) (((rxmfs) - 40) & 0x7c) + +/* Receive Queue Command (MVNETA_RQC) */ +#define MVNETA_RQC_EN_MASK (0xff << 0) /* Enable Q */ +#define MVNETA_RQC_ENQ(q) (1 << (0 + (q))) +#define MVNETA_RQC_EN(n) ((n) << 0) +#define MVNETA_RQC_DIS_MASK (0xff << 8) /* Disable Q */ +#define MVNETA_RQC_DISQ(q) (1 << (8 + (q))) +#define MVNETA_RQC_DIS(n) ((n) << 8) + +/* + * Rx DMA Networking Controller Miscellaneous Registers + */ +/* Port RX queues Configuration (MVNETA_PRXC) */ +#define MVNETA_PRXC_PACKETOFFSET(o) (((o) & 0xf) << 8) + +/* Port RX queues Snoop (MVNETA_PRXSNP) */ +#define MVNETA_PRXSNP_SNOOPNOOFBYTES(b) (((b) & 0x3fff) << 0) +#define MVNETA_PRXSNP_L2DEPOSITNOOFBYTES(b) (((b) & 0x3fff) << 16) + +/* Port RX queues Descriptors Queue Size (MVNETA_PRXDQS) */ +#define MVNETA_PRXDQS_DESCRIPTORSQUEUESIZE(s) (((s) & 0x3fff) << 0) +#define MVNETA_PRXDQS_BUFFERSIZE(s) (((s) & 0x1fff) << 19) + +/* Port RX queues Descriptors Queue Threshold (MVNETA_PRXDQTH) */ + /* Occupied Descriptors Threshold */ +#define MVNETA_PRXDQTH_ODT(x) (((x) & 0x3fff) << 0) + /* Non Occupied Descriptors Threshold */ +#define MVNETA_PRXDQTH_NODT(x) (((x) & 0x3fff) << 16) + +/* Port RX queues Status (MVNETA_PRXS) */ + /* Occupied Descriptors Counter */ +#define MVNETA_PRXS_ODC(x) (((x) & 0x3fff) << 0) + /* Non Occupied Descriptors Counter */ +#define MVNETA_PRXS_NODC(x) (((x) & 0x3fff) << 16) +#define MVNETA_PRXS_GET_ODC(reg) (((reg) >> 0) & 0x3fff) +#define MVNETA_PRXS_GET_NODC(reg) (((reg) >> 16) & 0x3fff) + +/* Port RX queues Status Update (MVNETA_PRXSU) */ +#define MVNETA_PRXSU_NOOFPROCESSEDDESCRIPTORS(x) (((x) & 0xff) << 0) +#define MVNETA_PRXSU_NOOFNEWDESCRIPTORS(x) (((x) & 0xff) << 16) + +/* Port RX Initialization (MVNETA_PRXINIT) */ +#define MVNETA_PRXINIT_RXDMAINIT (1 << 0) + +/* + * Tx DMA Miscellaneous Registers + */ +/* Transmit Queue Command (MVNETA_TQC) */ +#define MVNETA_TQC_EN_MASK (0xff << 0) +#define MVNETA_TQC_ENQ(q) (1 << ((q) + 0))/* Enable Q */ +#define MVNETA_TQC_EN(n) ((n) << 0) +#define MVNETA_TQC_DIS_MASK (0xff << 8) +#define MVNETA_TQC_DISQ(q) (1 << ((q) + 8))/* Disable Q */ +#define MVNETA_TQC_DIS(n) ((n) << 8) + +/* + * Tx DMA Networking Controller Miscellaneous Registers + */ +/* Port TX queues Descriptors Queue Size (MVNETA_PTXDQS) */ + /* Descriptors Queue Size */ +#define MVNETA_PTXDQS_DQS_MASK (0x3fff << 0) +#define MVNETA_PTXDQS_DQS(x) (((x) & 0x3fff) << 0) + /* Transmitted Buffer Threshold */ +#define MVNETA_PTXDQS_TBT_MASK (0x3fff << 16) +#define MVNETA_PTXDQS_TBT(x) (((x) & 0x3fff) << 16) + +/* Port TX queues Status (MVNETA_PTXS) */ + /* Transmitted Buffer Counter */ +#define MVNETA_PTXS_TBC(x) (((x) & 0x3fff) << 16) + +#define MVNETA_PTXS_GET_TBC(reg) (((reg) >> 16) & 0x3fff) + /* Pending Descriptors Counter */ +#define MVNETA_PTXS_PDC(x) ((x) & 0x3fff) +#define MVNETA_PTXS_GET_PDC(x) ((x) & 0x3fff) + +/* Port TX queues Status Update (MVNETA_PTXSU) */ + /* Number Of Written Descriptors */ +#define MVNETA_PTXSU_NOWD(x) (((x) & 0xff) << 0) + /* Number Of Released Buffers */ +#define MVNETA_PTXSU_NORB(x) (((x) & 0xff) << 16) + +/* TX Transmitted Buffers Counter (MVNETA_TXTBC) */ + /* Transmitted Buffers Counter */ +#define MVNETA_TXTBC_TBC(x) (((x) & 0x3fff) << 16) + +/* Port TX Initialization (MVNETA_PTXINIT) */ +#define MVNETA_PTXINIT_TXDMAINIT (1 << 0) + +/* + * Tx DMA Queue Arbiter Registers (Version 1 ) + */ +/* Transmit Queue Fixed Priority Configuration */ +#define MVNETA_TQFPC_EN(q) (1 << (q)) + +/* + * RX_TX DMA Registers + */ +/* Port Configuration (MVNETA_PXC) */ +#define MVNETA_PXC_UPM (1 << 0) /* Uni Promisc mode */ +#define MVNETA_PXC_RXQ(q) ((q) << 1) +#define MVNETA_PXC_RXQ_MASK MVNETA_PXC_RXQ(7) +#define MVNETA_PXC_RXQARP(q) ((q) << 4) +#define MVNETA_PXC_RXQARP_MASK MVNETA_PXC_RXQARP(7) +#define MVNETA_PXC_RB (1 << 7) /* Rej mode of MAC */ +#define MVNETA_PXC_RBIP (1 << 8) +#define MVNETA_PXC_RBARP (1 << 9) +#define MVNETA_PXC_AMNOTXES (1 << 12) +#define MVNETA_PXC_RBARPF (1 << 13) +#define MVNETA_PXC_TCPCAPEN (1 << 14) +#define MVNETA_PXC_UDPCAPEN (1 << 15) +#define MVNETA_PXC_TCPQ(q) ((q) << 16) +#define MVNETA_PXC_TCPQ_MASK MVNETA_PXC_TCPQ(7) +#define MVNETA_PXC_UDPQ(q) ((q) << 19) +#define MVNETA_PXC_UDPQ_MASK MVNETA_PXC_UDPQ(7) +#define MVNETA_PXC_BPDUQ(q) ((q) << 22) +#define MVNETA_PXC_BPDUQ_MASK MVNETA_PXC_BPDUQ(7) +#define MVNETA_PXC_RXCS (1 << 25) + +/* Port Configuration Extend (MVNETA_PXCX) */ +#define MVNETA_PXCX_SPAN (1 << 1) +#define MVNETA_PXCX_TXCRCDIS (1 << 3) + +/* Marvell Header (MVNETA_MH) */ +#define MVNETA_MH_MHEN (1 << 0) +#define MVNETA_MH_DAPREFIX (0x3 << 1) +#define MVNETA_MH_SPID (0xf << 4) +#define MVNETA_MH_MHMASK (0x3 << 8) +#define MVNETA_MH_MHMASK_8QUEUES (0x0 << 8) +#define MVNETA_MH_MHMASK_4QUEUES (0x1 << 8) +#define MVNETA_MH_MHMASK_2QUEUES (0x3 << 8) +#define MVNETA_MH_DSAEN_MASK (0x3 << 10) +#define MVNETA_MH_DSAEN_DISABLE (0x0 << 10) +#define MVNETA_MH_DSAEN_NONEXTENDED (0x1 << 10) +#define MVNETA_MH_DSAEN_EXTENDED (0x2 << 10) + +/* + * Serial(SMI/MII) Registers + */ +#define MVNETA_PSOMSCD_ENABLE (1UL<<31) +#define MVNETA_PSERDESCFG_QSGMII (0x0667) +#define MVNETA_PSERDESCFG_SGMII (0x0cc7) +/* Port Seiral Control0 (MVNETA_PSC0) */ +#define MVNETA_PSC0_FORCE_FC_MASK (0x3 << 5) +#define MVNETA_PSC0_FORCE_FC(fc) (((fc) & 0x3) << 5) +#define MVNETA_PSC0_FORCE_FC_PAUSE MVNETA_PSC0_FORCE_FC(0x1) +#define MVNETA_PSC0_FORCE_FC_NO_PAUSE MVNETA_PSC0_FORCE_FC(0x0) +#define MVNETA_PSC0_FORCE_BP_MASK (0x3 << 7) +#define MVNETA_PSC0_FORCE_BP(fc) (((fc) & 0x3) << 5) +#define MVNETA_PSC0_FORCE_BP_JAM MVNETA_PSC0_FORCE_BP(0x1) +#define MVNETA_PSC0_FORCE_BP_NO_JAM MVNETA_PSC0_FORCE_BP(0x0) +#define MVNETA_PSC0_DTE_ADV (1 << 14) +#define MVNETA_PSC0_IGN_RXERR (1 << 28) +#define MVNETA_PSC0_IGN_COLLISION (1 << 29) +#define MVNETA_PSC0_IGN_CARRIER (1 << 30) + +/* Ethernet Port Status0 (MVNETA_PS0) */ +#define MVNETA_PS0_TXINPROG (1 << 0) +#define MVNETA_PS0_TXFIFOEMP (1 << 8) +#define MVNETA_PS0_RXFIFOEMPTY (1 << 16) + +/* + * Gigabit Ethernet MAC Serial Parameters Configuration Registers + */ +#define MVNETA_PSPC_MUST_SET (1 << 3 | 1 << 4 | 1 << 5 | 0x23 << 6) +#define MVNETA_PSP1C_MUST_SET (1 << 0 | 1 << 1 | 1 << 2) + +/* + * Gigabit Ethernet Auto-Negotiation Configuration Registers + */ +/* Port Auto-Negotiation Configuration (MVNETA_PANC) */ +#define MVNETA_PANC_FORCELINKFAIL (1 << 0) +#define MVNETA_PANC_FORCELINKPASS (1 << 1) +#define MVNETA_PANC_INBANDANEN (1 << 2) +#define MVNETA_PANC_INBANDANBYPASSEN (1 << 3) +#define MVNETA_PANC_INBANDRESTARTAN (1 << 4) +#define MVNETA_PANC_SETMIISPEED (1 << 5) +#define MVNETA_PANC_SETGMIISPEED (1 << 6) +#define MVNETA_PANC_ANSPEEDEN (1 << 7) +#define MVNETA_PANC_SETFCEN (1 << 8) +#define MVNETA_PANC_PAUSEADV (1 << 9) +#define MVNETA_PANC_ANFCEN (1 << 11) +#define MVNETA_PANC_SETFULLDX (1 << 12) +#define MVNETA_PANC_ANDUPLEXEN (1 << 13) +#define MVNETA_PANC_MUSTSET (1 << 15) + +/* + * Gigabit Ethernet MAC Control Registers + */ +/* Port MAC Control 0 (MVNETA_PMACC0) */ +#define MVNETA_PMACC0_PORTEN (1 << 0) +#define MVNETA_PMACC0_PORTTYPE (1 << 1) +#define MVNETA_PMACC0_FRAMESIZELIMIT(x) ((((x) >> 1) << 2) & 0x7ffc) +#define MVNETA_PMACC0_FRAMESIZELIMIT_MASK (0x7ffc) +#define MVNETA_PMACC0_MUSTSET (1 << 15) + +/* Port MAC Control 1 (MVNETA_PMACC1) */ +#define MVNETA_PMACC1_PCSLB (1 << 6) + +/* Port MAC Control 2 (MVNETA_PMACC2) */ +#define MVNETA_PMACC2_INBANDANMODE (1 << 0) +#define MVNETA_PMACC2_PCSEN (1 << 3) +#define MVNETA_PMACC2_PCSEN (1 << 3) +#define MVNETA_PMACC2_RGMIIEN (1 << 4) +#define MVNETA_PMACC2_PADDINGDIS (1 << 5) +#define MVNETA_PMACC2_PORTMACRESET (1 << 6) +#define MVNETA_PMACC2_PRBSCHECKEN (1 << 10) +#define MVNETA_PMACC2_PRBSGENEN (1 << 11) +#define MVNETA_PMACC2_SDTT_MASK (3 << 12) /* Select Data To Transmit */ +#define MVNETA_PMACC2_SDTT_RM (0 << 12) /* Regular Mode */ +#define MVNETA_PMACC2_SDTT_PRBS (1 << 12) /* PRBS Mode */ +#define MVNETA_PMACC2_SDTT_ZC (2 << 12) /* Zero Constant */ +#define MVNETA_PMACC2_SDTT_OC (3 << 12) /* One Constant */ +#define MVNETA_PMACC2_MUSTSET (3 << 14) + +/* Port MAC Control 3 (MVNETA_PMACC3) */ +#define MVNETA_PMACC3_IPG_MASK 0x7f80 + +/* + * Gigabit Ethernet MAC Interrupt Registers + */ +/* Port Interrupt Cause/Mask (MVNETA_PIC/MVNETA_PIM) */ +#define MVNETA_PI_INTSUM (1 << 0) +#define MVNETA_PI_LSC (1 << 1) /* LinkStatus Change */ +#define MVNETA_PI_ACOP (1 << 2) /* AnCompleted OnPort */ +#define MVNETA_PI_AOOR (1 << 5) /* AddressOut Of Range */ +#define MVNETA_PI_SSC (1 << 6) /* SyncStatus Change */ +#define MVNETA_PI_PRBSEOP (1 << 7) /* QSGMII PRBS error */ +#define MVNETA_PI_MIBCWA (1 << 15) /* MIB counter wrap around */ +#define MVNETA_PI_QSGMIIPRBSE (1 << 10) /* QSGMII PRBS error */ +#define MVNETA_PI_PCSRXPRLPI (1 << 11) /* PCS Rx path received LPI*/ +#define MVNETA_PI_PCSTXPRLPI (1 << 12) /* PCS Tx path received LPI*/ +#define MVNETA_PI_MACRXPRLPI (1 << 13) /* MAC Rx path received LPI*/ +#define MVNETA_PI_MIBCCD (1 << 14) /* MIB counters copy done */ + +/* + * Gigabit Ethernet MAC Low Power Idle Registers + */ +/* LPI Control 0 (MVNETA_LPIC0) */ +#define MVNETA_LPIC0_LILIMIT(x) (((x) & 0xff) << 0) +#define MVNETA_LPIC0_TSLIMIT(x) (((x) & 0xff) << 8) + +/* LPI Control 1 (MVNETA_LPIC1) */ +#define MVNETA_LPIC1_LPIRE (1 << 0) /* LPI request enable */ +#define MVNETA_LPIC1_LPIRF (1 << 1) /* LPI request force */ +#define MVNETA_LPIC1_LPIMM (1 << 2) /* LPI manual mode */ +#define MVNETA_LPIC1_TWLIMIT(x) (((x) & 0xfff) << 4) + +/* LPI Control 2 (MVNETA_LPIC2) */ +#define MVNETA_LPIC2_MUSTSET 0x17d + +/* LPI Status (MVNETA_LPIS) */ +#define MVNETA_LPIS_PCSRXPLPIS (1 << 0) /* PCS Rx path LPI status */ +#define MVNETA_LPIS_PCSTXPLPIS (1 << 1) /* PCS Tx path LPI status */ +#define MVNETA_LPIS_MACRXPLPIS (1 << 2)/* MAC Rx path LP idle status */ +#define MVNETA_LPIS_MACTXPLPWS (1 << 3)/* MAC Tx path LP wait status */ +#define MVNETA_LPIS_MACTXPLPIS (1 << 4)/* MAC Tx path LP idle status */ + +/* + * Gigabit Ethernet MAC PRBS Check Status Registers + */ +/* Port PRBS Status (MVNETA_PPRBSS) */ +#define MVNETA_PPRBSS_PRBSCHECKLOCKED (1 << 0) +#define MVNETA_PPRBSS_PRBSCHECKRDY (1 << 1) + +/* + * Gigabit Ethernet MAC Status Registers + */ +/* Port Status Register (MVNETA_PSR) */ +#define MVNETA_PSR_LINKUP (1 << 0) +#define MVNETA_PSR_GMIISPEED (1 << 1) +#define MVNETA_PSR_MIISPEED (1 << 2) +#define MVNETA_PSR_FULLDX (1 << 3) +#define MVNETA_PSR_RXFCEN (1 << 4) +#define MVNETA_PSR_TXFCEN (1 << 5) +#define MVNETA_PSR_PRP (1 << 6) /* Port Rx Pause */ +#define MVNETA_PSR_PTP (1 << 7) /* Port Tx Pause */ +#define MVNETA_PSR_PDP (1 << 8) /*Port is Doing Back-Pressure*/ +#define MVNETA_PSR_SYNCFAIL10MS (1 << 10) +#define MVNETA_PSR_ANDONE (1 << 11) +#define MVNETA_PSR_IBANBA (1 << 12) /* InBand AutoNeg BypassAct */ +#define MVNETA_PSR_SYNCOK (1 << 14) + +/* + * Networking Controller Interrupt Registers + */ +/* Port CPU to Queue */ +#define MVNETA_MAXCPU 2 +#define MVNETA_PCP2Q_TXQEN(q) (1 << ((q) + 8)) +#define MVNETA_PCP2Q_TXQEN_MASK (0xff << 8) +#define MVNETA_PCP2Q_RXQEN(q) (1 << ((q) + 0)) +#define MVNETA_PCP2Q_RXQEN_MASK (0xff << 0) + +/* Port RX_TX Interrupt Threshold */ +#define MVNETA_PRXITTH_RITT(t) ((t) & 0xffffff) + +/* Port RX_TX Threshold Interrupt Cause/Mask (MVNETA_PRXTXTIC/MVNETA_PRXTXTIM) */ +#define MVNETA_PRXTXTI_TBTCQ(q) (1 << ((q) + 0)) +#define MVNETA_PRXTXTI_TBTCQ_MASK (0xff << 0) +#define MVNETA_PRXTXTI_GET_TBTCQ(reg) (((reg) >> 0) & 0xff) + /* Tx Buffer Threshold Cross Queue*/ +#define MVNETA_PRXTXTI_RBICTAPQ(q) (1 << ((q) + 8)) +#define MVNETA_PRXTXTI_RBICTAPQ_MASK (0xff << 8) +#define MVNETA_PRXTXTI_GET_RBICTAPQ(reg) (((reg) >> 8) & 0xff) + /* Rx Buffer Int. Coaleasing Th. Pri. Alrt Q */ +#define MVNETA_PRXTXTI_RDTAQ(q) (1 << ((q) + 16)) +#define MVNETA_PRXTXTI_RDTAQ_MASK (0xff << 16) +#define MVNETA_PRXTXTI_GET_RDTAQ(reg) (((reg) >> 16) & 0xff) + /* Rx Descriptor Threshold Alert Queue*/ +#define MVNETA_PRXTXTI_PRXTXICSUMMARY (1 << 29) /* PRXTXI summary */ +#define MVNETA_PRXTXTI_PTXERRORSUMMARY (1 << 30) /* PTEXERROR summary */ +#define MVNETA_PRXTXTI_PMISCICSUMMARY (1UL << 31) /* PMISCIC summary */ + +/* Port RX_TX Interrupt Cause/Mask (MVNETA_PRXTXIC/MVNETA_PRXTXIM) */ +#define MVNETA_PRXTXI_TBRQ(q) (1 << ((q) + 0)) +#define MVNETA_PRXTXI_TBRQ_MASK (0xff << 0) +#define MVNETA_PRXTXI_GET_TBRQ(reg) (((reg) >> 0) & 0xff) +#define MVNETA_PRXTXI_RPQ(q) (1 << ((q) + 8)) +#define MVNETA_PRXTXI_RPQ_MASK (0xff << 8) +#define MVNETA_PRXTXI_GET_RPQ(reg) (((reg) >> 8) & 0xff) +#define MVNETA_PRXTXI_RREQ(q) (1 << ((q) + 16)) +#define MVNETA_PRXTXI_RREQ_MASK (0xff << 16) +#define MVNETA_PRXTXI_GET_RREQ(reg) (((reg) >> 16) & 0xff) +#define MVNETA_PRXTXI_PRXTXTHICSUMMARY (1 << 29) +#define MVNETA_PRXTXI_PTXERRORSUMMARY (1 << 30) +#define MVNETA_PRXTXI_PMISCICSUMMARY (1UL << 31) + +/* Port Misc Interrupt Cause/Mask (MVNETA_PMIC/MVNETA_PMIM) */ +#define MVNETA_PMI_PHYSTATUSCHNG (1 << 0) +#define MVNETA_PMI_LINKCHANGE (1 << 1) +#define MVNETA_PMI_IAE (1 << 7) /* Internal Address Error */ +#define MVNETA_PMI_RXOVERRUN (1 << 8) +#define MVNETA_PMI_RXCRCERROR (1 << 9) +#define MVNETA_PMI_RXLARGEPACKET (1 << 10) +#define MVNETA_PMI_TXUNDRN (1 << 11) +#define MVNETA_PMI_PRBSERROR (1 << 12) +#define MVNETA_PMI_PSCSYNCCHANGE (1 << 13) +#define MVNETA_PMI_SRSE (1 << 14) /* SerdesRealignSyncError */ +#define MVNETA_PMI_TREQ(q) (1 << ((q) + 24)) /* TxResourceErrorQ */ +#define MVNETA_PMI_TREQ_MASK (0xff << 24) /* TxResourceErrorQ */ + +/* Port Interrupt Enable (MVNETA_PIE) */ +#define MVNETA_PIE_RXPKTINTRPTENB(q) (1 << ((q) + 0)) +#define MVNETA_PIE_TXPKTINTRPTENB(q) (1 << ((q) + 8)) +#define MVNETA_PIE_RXPKTINTRPTENB_MASK (0xff << 0) +#define MVNETA_PIE_TXPKTINTRPTENB_MASK (0xff << 8) + +/* + * Miscellaneous Interrupt Registers + */ +#define MVNETA_PEUIAE_ADDR_MASK (0x3fff) +#define MVNETA_PEUIAE_ADDR(addr) ((addr) & 0x3fff) +#define MVNETA_PEUIAE_GET_ADDR(reg) ((reg) & 0x3fff) + +/* + * SGMII PHY Registers + */ +/* Power and PLL Control (MVNETA_PPLLC) */ +#define MVNETA_PPLLC_REF_FREF_SEL_MASK (0xf << 0) +#define MVNETA_PPLLC_PHY_MODE_MASK (7 << 5) +#define MVNETA_PPLLC_PHY_MODE_SATA (0 << 5) +#define MVNETA_PPLLC_PHY_MODE_SAS (1 << 5) +#define MVNETA_PPLLC_PLL_LOCK (1 << 8) +#define MVNETA_PPLLC_PU_DFE (1 << 10) +#define MVNETA_PPLLC_PU_TX_INTP (1 << 11) +#define MVNETA_PPLLC_PU_TX (1 << 12) +#define MVNETA_PPLLC_PU_RX (1 << 13) +#define MVNETA_PPLLC_PU_PLL (1 << 14) + +/* Digital Loopback Enable (MVNETA_DLE) */ +#define MVNETA_DLE_LOCAL_SEL_BITS_MASK (3 << 10) +#define MVNETA_DLE_LOCAL_SEL_BITS_10BITS (0 << 10) +#define MVNETA_DLE_LOCAL_SEL_BITS_20BITS (1 << 10) +#define MVNETA_DLE_LOCAL_SEL_BITS_40BITS (2 << 10) +#define MVNETA_DLE_LOCAL_RXPHER_TO_TX_EN (1 << 12) +#define MVNETA_DLE_LOCAL_ANA_TX2RX_LPBK_EN (1 << 13) +#define MVNETA_DLE_LOCAL_DIG_TX2RX_LPBK_EN (1 << 14) +#define MVNETA_DLE_LOCAL_DIG_RX2TX_LPBK_EN (1 << 15) + +/* Reference Clock Select (MVNETA_RCS) */ +#define MVNETA_RCS_REFCLK_SEL (1 << 10) + +/* + * DMA descriptors + */ +struct mvneta_tx_desc { + /* LITTLE_ENDIAN */ + uint32_t command; /* off 0x00: commands */ + uint16_t l4ichk; /* initial checksum */ + uint16_t bytecnt; /* 0ff 0x04: buffer byte count */ + uint32_t bufptr_pa; /* off 0x08: buffer ptr(PA) */ + uint32_t flags; /* off 0x0c: flags */ + uint32_t reserved0; /* off 0x10 */ + uint32_t reserved1; /* off 0x14 */ + uint32_t reserved2; /* off 0x18 */ + uint32_t reserved3; /* off 0x1c */ +}; + +struct mvneta_rx_desc { + /* LITTLE_ENDIAN */ + uint32_t status; /* status and flags */ + uint16_t reserved0; + uint16_t bytecnt; /* buffer byte count */ + uint32_t bufptr_pa; /* packet buffer pointer */ + uint32_t reserved1; + uint32_t bufptr_va; + uint16_t reserved2; + uint16_t l4chk; /* L4 checksum */ + uint32_t reserved3; + uint32_t reserved4; +}; + +/* + * Received packet command header: + * network controller => software + * the controller parse the packet and set some flags. + */ +#define MVNETA_RX_IPV4_FRAGMENT (1UL << 31) /* Fragment Indicator */ +#define MVNETA_RX_L4_CHECKSUM_OK (1 << 30) /* L4 Checksum */ +/* bit 29 reserved */ +#define MVNETA_RX_U (1 << 28) /* Unknown Destination */ +#define MVNETA_RX_F (1 << 27) /* First buffer */ +#define MVNETA_RX_L (1 << 26) /* Last buffer */ +#define MVNETA_RX_IP_HEADER_OK (1 << 25) /* IP Header is OK */ +#define MVNETA_RX_L3_IP (1 << 24) /* IP Type 0:IP6 1:IP4 */ +#define MVNETA_RX_L2_EV2 (1 << 23) /* Ethernet v2 frame */ +#define MVNETA_RX_L4_MASK (3 << 21) /* L4 Type */ +#define MVNETA_RX_L4_TCP (0x00 << 21) +#define MVNETA_RX_L4_UDP (0x01 << 21) +#define MVNETA_RX_L4_OTH (0x10 << 21) +#define MVNETA_RX_BPDU (1 << 20) /* BPDU frame */ +#define MVNETA_RX_VLAN (1 << 19) /* VLAN tag found */ +#define MVNETA_RX_EC_MASK (3 << 17) /* Error code */ +#define MVNETA_RX_EC_CE (0x00 << 17) /* CRC error */ +#define MVNETA_RX_EC_OR (0x01 << 17) /* FIFO overrun */ +#define MVNETA_RX_EC_MF (0x10 << 17) /* Max. frame len */ +#define MVNETA_RX_EC_RE (0x11 << 17) /* Resource error */ +#define MVNETA_RX_ES (1 << 16) /* Error summary */ +/* bit 15:0 reserved */ + +/* + * Transmit packet command header: + * software => network controller + */ +#define MVNETA_TX_CMD_L4_CHECKSUM_MASK (0x3 << 30) /* Do L4 Checksum */ +#define MVNETA_TX_CMD_L4_CHECKSUM_FRAG (0x0 << 30) +#define MVNETA_TX_CMD_L4_CHECKSUM_NOFRAG (0x1 << 30) +#define MVNETA_TX_CMD_L4_CHECKSUM_NONE (0x2 << 30) +#define MVNETA_TX_CMD_PACKET_OFFSET_MASK (0x7f << 23) /* Payload offset */ +#define MVNETA_TX_CMD_W_PACKET_OFFSET(v) (((v) & 0x7f) << 23) +/* bit 22 reserved */ +#define MVNETA_TX_CMD_F (1 << 21) /* First buffer */ +#define MVNETA_TX_CMD_L (1 << 20) /* Last buffer */ +#define MVNETA_TX_CMD_PADDING (1 << 19) /* Pad short frame */ +#define MVNETA_TX_CMD_IP4_CHECKSUM (1 << 18) /* Do IPv4 Checksum */ +#define MVNETA_TX_CMD_L3_IP4 (0 << 17) +#define MVNETA_TX_CMD_L3_IP6 (1 << 17) +#define MVNETA_TX_CMD_L4_TCP (0 << 16) +#define MVNETA_TX_CMD_L4_UDP (1 << 16) +/* bit 15:13 reserved */ +#define MVNETA_TX_CMD_IP_HEADER_LEN_MASK (0x1f << 8) /* IP header len >> 2 */ +#define MVNETA_TX_CMD_IP_HEADER_LEN(v) (((v) & 0x1f) << 8) +/* bit 7 reserved */ +#define MVNETA_TX_CMD_L3_OFFSET_MASK (0x7f << 0) /* offset of L3 hdr. */ +#define MVNETA_TX_CMD_L3_OFFSET(v) (((v) & 0x7f) << 0) + +/* + * Transmit packet extra attributes + * and error status returned from network controller. + */ +#define MVNETA_TX_F_DSA_TAG (3 << 30) /* DSA Tag */ +/* bit 29:8 reserved */ +#define MVNETA_TX_F_MH_SEL (0xf << 4) /* Marvell Header */ +/* bit 3 reserved */ +#define MVNETA_TX_F_EC_MASK (3 << 1) /* Error code */ +#define MVNETA_TX_F_EC_LC (0x00 << 1) /* Late Collision */ +#define MVNETA_TX_F_EC_UR (0x01 << 1) /* Underrun */ +#define MVNETA_TX_F_EC_RL (0x10 << 1) /* Excess. Collision */ +#define MVNETA_TX_F_EC_RESERVED (0x11 << 1) +#define MVNETA_TX_F_ES (1 << 0) /* Error summary */ + +#define MVNETA_ERROR_SUMMARY (1 << 0) +#define MVNETA_BUFFER_OWNED_MASK (1UL << 31) +#define MVNETA_BUFFER_OWNED_BY_HOST (0UL << 31) +#define MVNETA_BUFFER_OWNED_BY_DMA (1UL << 31) + +#endif /* _IF_MVNETAREG_H_ */ diff --git a/sys/dev/neta/if_mvnetavar.h b/sys/dev/neta/if_mvnetavar.h new file mode 100644 index 0000000..912e9d7 --- /dev/null +++ b/sys/dev/neta/if_mvnetavar.h @@ -0,0 +1,327 @@ +/* + * Copyright (c) 2017 Stormshield. + * Copyright (c) 2017 Semihalf. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef _IF_MVNETAVAR_H_ +#define _IF_MVNETAVAR_H_ +#include <net/if.h> + +#define MVNETA_HWHEADER_SIZE 2 /* Marvell Header */ +#define MVNETA_ETHER_SIZE 22 /* Maximum ether size */ +#define MVNETA_MAX_CSUM_MTU 1600 /* Port1,2 hw limit */ + +/* + * Limit support for frame up to hw csum limit + * until jumbo frame support is added. + */ +#define MVNETA_MAX_FRAME (MVNETA_MAX_CSUM_MTU + MVNETA_ETHER_SIZE) + +/* + * Default limit of queue length + * + * queue 0 is lowest priority and queue 7 is highest priority. + * IP packet is received on queue 7 by default. + */ +#define MVNETA_TX_RING_CNT 512 +#define MVNETA_RX_RING_CNT 256 + +#define MVNETA_BUFRING_SIZE 1024 + +#define MVNETA_PACKET_OFFSET 64 +#define MVNETA_PACKET_SIZE MCLBYTES + +#define MVNETA_RXTH_COUNT 128 +#define MVNETA_RX_REFILL_COUNT 8 +#define MVNETA_TX_RECLAIM_COUNT 32 + +/* + * Device Register access + */ +#define MVNETA_READ(sc, reg) \ + bus_read_4((sc)->res[0], (reg)) +#define MVNETA_WRITE(sc, reg, val) \ + bus_write_4((sc)->res[0], (reg), (val)) + +#define MVNETA_READ_REGION(sc, reg, val, c) \ + bus_read_region_4((sc)->res[0], (reg), (val), (c)) +#define MVNETA_WRITE_REGION(sc, reg, val, c) \ + bus_write_region_4((sc)->res[0], (reg), (val), (c)) + +#define MVNETA_READ_MIB_4(sc, reg) \ + bus_read_4((sc)->res[0], MVNETA_PORTMIB_BASE + (reg)) +#define MVNETA_READ_MIB_8(sc, reg) \ + bus_read_8((sc)->res[0], MVNETA_PORTMIB_BASE + (reg)) + +#define MVNETA_IS_LINKUP(sc) \ + (MVNETA_READ((sc), MVNETA_PSR) & MVNETA_PSR_LINKUP) + +#define MVNETA_IS_QUEUE_SET(queues, q) \ + ((((queues) >> (q)) & 0x1)) + +/* + * EEE: Lower Power Idle config + * Default timer is duration of MTU sized frame transmission. + * The timer can be negotiated by LLDP protocol, but we have no + * support. + */ +#define MVNETA_LPI_TS (ETHERMTU * 8 / 1000) /* [us] */ +#define MVNETA_LPI_TW (ETHERMTU * 8 / 1000) /* [us] */ +#define MVNETA_LPI_LI (ETHERMTU * 8 / 1000) /* [us] */ + +/* + * DMA Descriptor + * + * the ethernet device has 8 rx/tx DMA queues. each of queue has its own + * decriptor list. descriptors are simply index by counter inside the device. + */ +#define MVNETA_TX_SEGLIMIT 32 + +#define MVNETA_QUEUE_IDLE 1 +#define MVNETA_QUEUE_WORKING 2 +#define MVNETA_QUEUE_DISABLED 3 + +struct mvneta_buf { + struct mbuf * m; /* pointer to related mbuf */ + bus_dmamap_t dmap; +}; + +struct mvneta_rx_ring { + int queue_status; + /* Real descriptors array. shared by RxDMA */ + struct mvneta_rx_desc *desc; + bus_dmamap_t desc_map; + bus_addr_t desc_pa; + + /* Managment entries for each of descritors */ + struct mvneta_buf rxbuf[MVNETA_RX_RING_CNT]; + + /* locks */ + struct mtx ring_mtx; + + /* Index */ + int dma; + int cpu; + + /* Limit */ + int queue_th_received; + int queue_th_time; /* [Tclk] */ + + /* LRO */ + struct lro_ctrl lro; + boolean_t lro_enabled; + /* Is this queue out of mbuf */ + boolean_t needs_refill; +} __aligned(CACHE_LINE_SIZE); + +struct mvneta_tx_ring { + /* Index of this queue */ + int qidx; + /* IFNET pointer */ + struct ifnet *ifp; + /* Ring buffer for IFNET */ + struct buf_ring *br; + /* Real descriptors array. shared by TxDMA */ + struct mvneta_tx_desc *desc; + bus_dmamap_t desc_map; + bus_addr_t desc_pa; + + /* Managment entries for each of descritors */ + struct mvneta_buf txbuf[MVNETA_TX_RING_CNT]; + + /* locks */ + struct mtx ring_mtx; + + /* Index */ + int used; + int dma; + int cpu; + + /* watchdog */ +#define MVNETA_WATCHDOG_TXCOMP (hz / 10) /* 100ms */ +#define MVNETA_WATCHDOG (10 * hz) /* 10s */ + int watchdog_time; + int queue_status; + boolean_t queue_hung; + + /* Task */ + struct task task; + struct taskqueue *taskq; + + /* Stats */ + uint32_t drv_error; +} __aligned(CACHE_LINE_SIZE); + +static __inline int +tx_counter_adv(int ctr, int n) +{ + + ctr += n; + while (__predict_false(ctr >= MVNETA_TX_RING_CNT)) + ctr -= MVNETA_TX_RING_CNT; + + return (ctr); +} + +static __inline int +rx_counter_adv(int ctr, int n) +{ + + ctr += n; + while (__predict_false(ctr >= MVNETA_RX_RING_CNT)) + ctr -= MVNETA_RX_RING_CNT; + + return (ctr); +} + +/* + * Timeout control + */ +#define MVNETA_PHY_TIMEOUT 10000 /* msec */ +#define RX_DISABLE_TIMEOUT 0x1000000 /* times */ +#define TX_DISABLE_TIMEOUT 0x1000000 /* times */ +#define TX_FIFO_EMPTY_TIMEOUT 0x1000000 /* times */ + +/* + * Debug + */ +#define KASSERT_SC_MTX(sc) \ + KASSERT(mtx_owned(&(sc)->mtx), ("SC mutex not owned")) +#define KASSERT_BM_MTX(sc) \ + KASSERT(mtx_owned(&(sc)->bm.bm_mtx), ("BM mutex not owned")) +#define KASSERT_RX_MTX(sc, q) \ + KASSERT(mtx_owned(&(sc)->rx_ring[(q)].ring_mtx),\ + ("RX mutex not owned")) +#define KASSERT_TX_MTX(sc, q) \ + KASSERT(mtx_owned(&(sc)->tx_ring[(q)].ring_mtx),\ + ("TX mutex not owned")) + +/* + * sysctl(9) parameters + */ +struct mvneta_sysctl_queue { + struct mvneta_softc *sc; + int rxtx; + int queue; +}; +#define MVNETA_SYSCTL_RX 0 +#define MVNETA_SYSCTL_TX 1 + +struct mvneta_sysctl_mib { + struct mvneta_softc *sc; + int index; + uint64_t counter; +}; + +enum mvneta_phy_mode { + MVNETA_PHY_QSGMII, + MVNETA_PHY_SGMII, + MVNETA_PHY_RGMII, + MVNETA_PHY_RGMII_ID +}; + +/* + * Ethernet Device main context + */ +DECLARE_CLASS(mvneta_driver); + +struct mvneta_softc { + device_t dev; + uint32_t version; + /* + * mtx must be held by interface functions to/from + * other frameworks. interrupt hander, sysctl hander, + * ioctl hander, and so on. + */ + struct mtx mtx; + struct resource *res[2]; + void *ih_cookie[1]; + + struct ifnet *ifp; + uint32_t mvneta_if_flags; + uint32_t mvneta_media; + + int phy_attached; + enum mvneta_phy_mode phy_mode; + int phy_addr; + int phy_speed; /* PHY speed */ + boolean_t phy_fdx; /* Full duplex mode */ + boolean_t use_inband_status; /* In-band link status */ + + /* + * Link State control + */ + boolean_t linkup; + device_t miibus; + struct mii_data *mii; + uint8_t enaddr[ETHER_ADDR_LEN]; + struct ifmedia mvneta_ifmedia; + + bus_dma_tag_t rx_dtag; + bus_dma_tag_t rxbuf_dtag; + bus_dma_tag_t tx_dtag; + bus_dma_tag_t txmbuf_dtag; + struct mvneta_rx_ring rx_ring[MVNETA_RX_QNUM_MAX]; + struct mvneta_tx_ring tx_ring[MVNETA_TX_QNUM_MAX]; + + /* + * Maintance clock + */ + struct callout tick_ch; + + int cf_lpi; + int cf_fc; + int debug; + + /* + * Sysctl interfaces + */ + struct mvneta_sysctl_queue sysctl_rx_queue[MVNETA_RX_QNUM_MAX]; + struct mvneta_sysctl_queue sysctl_tx_queue[MVNETA_TX_QNUM_MAX]; + + /* + * MIB counter + */ + struct mvneta_sysctl_mib sysctl_mib[MVNETA_PORTMIB_NOCOUNTER]; + uint64_t counter_pdfc; + uint64_t counter_pofc; + uint32_t counter_watchdog; /* manual reset when clearing mib */ + uint32_t counter_watchdog_mib; /* reset after each mib update */ +}; +#define MVNETA_RX_RING(sc, q) \ + (&(sc)->rx_ring[(q)]) +#define MVNETA_TX_RING(sc, q) \ + (&(sc)->tx_ring[(q)]) + +int mvneta_attach(device_t); + +#ifdef FDT +int mvneta_fdt_mac_address(struct mvneta_softc *, uint8_t *); +#endif + +#endif /* _IF_MVNETAVAR_H_ */ diff --git a/sys/dev/oce/oce_mbox.c b/sys/dev/oce/oce_mbox.c index cb2ae81..c9a997d 100644 --- a/sys/dev/oce/oce_mbox.c +++ b/sys/dev/oce/oce_mbox.c @@ -864,7 +864,7 @@ oce_rxf_set_promiscuous(POCE_SOFTC sc, uint8_t enable) req->iface_flags = MBX_RX_IFACE_FLAGS_PROMISCUOUS; if (enable & 0x02) - req->iface_flags = MBX_RX_IFACE_FLAGS_VLAN_PROMISCUOUS; + req->iface_flags |= MBX_RX_IFACE_FLAGS_VLAN_PROMISCUOUS; req->if_id = sc->if_id; diff --git a/sys/dev/ofw/ofw_bus_subr.c b/sys/dev/ofw/ofw_bus_subr.c index 671ac12..5a97edd 100644 --- a/sys/dev/ofw/ofw_bus_subr.c +++ b/sys/dev/ofw/ofw_bus_subr.c @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); #include "ofw_bus_if.h" #define OFW_COMPAT_LEN 255 +#define OFW_STATUS_LEN 16 int ofw_bus_gen_setup_devinfo(struct ofw_bus_devinfo *obd, phandle_t node) @@ -179,6 +180,24 @@ ofw_bus_status_okay(device_t dev) return (0); } +int +ofw_bus_node_status_okay(phandle_t node) +{ + char status[OFW_STATUS_LEN]; + int len; + + len = OF_getproplen(node, "status"); + if (len <= 0) + return (1); + + OF_getprop(node, "status", status, OFW_STATUS_LEN); + if ((len == 5 && (bcmp(status, "okay", len) == 0)) || + (len == 3 && (bcmp(status, "ok", len)))) + return (1); + + return (0); +} + static int ofw_bus_node_is_compatible_int(const char *compat, int len, const char *onecompat) @@ -701,22 +720,14 @@ phandle_t ofw_bus_find_compatible(phandle_t node, const char *onecompat) { phandle_t child, ret; - void *compat; - int len; /* * Traverse all children of 'start' node, and find first with * matching 'compatible' property. */ for (child = OF_child(node); child != 0; child = OF_peer(child)) { - len = OF_getprop_alloc(child, "compatible", 1, &compat); - if (len >= 0) { - ret = ofw_bus_node_is_compatible_int(compat, len, - onecompat); - free(compat, M_OFWPROP); - if (ret != 0) - return (child); - } + if (ofw_bus_node_is_compatible(child, onecompat) != 0) + return (child); ret = ofw_bus_find_compatible(child, onecompat); if (ret != 0) diff --git a/sys/dev/ofw/ofw_bus_subr.h b/sys/dev/ofw/ofw_bus_subr.h index 6532a16..6058696 100644 --- a/sys/dev/ofw/ofw_bus_subr.h +++ b/sys/dev/ofw/ofw_bus_subr.h @@ -100,6 +100,7 @@ int ofw_bus_intr_by_rid(device_t, phandle_t, int, phandle_t *, int *, /* Helper to get device status property */ const char *ofw_bus_get_status(device_t dev); int ofw_bus_status_okay(device_t dev); +int ofw_bus_node_status_okay(phandle_t node); /* Helper to get node's interrupt parent */ phandle_t ofw_bus_find_iparent(phandle_t); diff --git a/sys/dev/ofw/ofw_cpu.c b/sys/dev/ofw/ofw_cpu.c index 4b89b53..782f56a 100644 --- a/sys/dev/ofw/ofw_cpu.c +++ b/sys/dev/ofw/ofw_cpu.c @@ -315,13 +315,16 @@ ofw_cpu_early_foreach(ofw_cpu_foreach_cb callback, boolean_t only_runnable) /* * If we are filtering by runnable then limit to only - * those that have been enabled. + * those that have been enabled, or do provide a method + * to enable them. */ if (only_runnable) { status[0] = '\0'; OF_getprop(child, "status", status, sizeof(status)); - if (status[0] != '\0' && strcmp(status, "okay") != 0) - continue; + if (status[0] != '\0' && strcmp(status, "okay") != 0 && + strcmp(status, "ok") != 0 && + !OF_hasprop(child, "enable-method")) + continue; } /* diff --git a/sys/dev/ofw/ofw_fdt.c b/sys/dev/ofw/ofw_fdt.c index aa545a3..4c87f55 100644 --- a/sys/dev/ofw/ofw_fdt.c +++ b/sys/dev/ofw/ofw_fdt.c @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include <dev/fdt/fdt_common.h> #include <dev/ofw/ofwvar.h> #include <dev/ofw/openfirm.h> +#include <dev/ofw/ofw_bus_subr.h> #include "ofw_if.h" @@ -359,7 +360,11 @@ ofw_fdt_setprop(ofw_t ofw, phandle_t package, const char *propname, if (offset < 0) return (-1); - return (fdt_setprop_inplace(fdtp, offset, propname, buf, len)); + if (fdt_setprop_inplace(fdtp, offset, propname, buf, len) != 0) + /* Try to add property, when setting value inplace failed */ + return (fdt_setprop(fdtp, offset, propname, buf, len)); + + return (0); } /* Convert a device specifier to a fully qualified pathname. */ @@ -429,7 +434,15 @@ ofw_fdt_fixup(ofw_t ofw) for (i = 0; fdt_fixup_table[i].model != NULL; i++) { if (strncmp(model, fdt_fixup_table[i].model, FDT_MODEL_LEN) != 0) - continue; + /* + * Sometimes it's convenient to provide one + * fixup entry that refers to many boards. + * To handle this case, simply check if model + * is compatible parameter + */ + if(!ofw_bus_node_is_compatible(root, + fdt_fixup_table[i].model)) + continue; if (fdt_fixup_table[i].handler != NULL) (*fdt_fixup_table[i].handler)(root); diff --git a/sys/dev/ofw/openfirmio.c b/sys/dev/ofw/openfirmio.c index 5803ec1..0079310 100644 --- a/sys/dev/ofw/openfirmio.c +++ b/sys/dev/ofw/openfirmio.c @@ -100,8 +100,6 @@ openfirm_getstr(int len, const char *user, char **cpp) return (ENAMETOOLONG); *cpp = cp = malloc(len + 1, M_TEMP, M_WAITOK); - if (cp == NULL) - return (ENOMEM); error = copyin(user, cp, len); cp[len] = '\0'; return (error); @@ -173,10 +171,6 @@ openfirm_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flags, if (len <= 0) break; value = malloc(len, M_TEMP, M_WAITOK); - if (value == NULL) { - error = ENOMEM; - break; - } len = OF_getprop(node, name, (void *)value, len); error = copyout(value, of->of_buf, len); break; @@ -199,10 +193,6 @@ openfirm_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flags, if (error) break; value = malloc(of->of_buflen, M_TEMP, M_WAITOK); - if (value == NULL) { - error = ENOMEM; - break; - } error = copyin(of->of_buf, value, of->of_buflen); if (error) break; diff --git a/sys/dev/ofw/openpromio.c b/sys/dev/ofw/openpromio.c index 8ba6d3a..e2a345b 100644 --- a/sys/dev/ofw/openpromio.c +++ b/sys/dev/ofw/openpromio.c @@ -151,18 +151,10 @@ openprom_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flags, break; } prop = malloc(len, M_TEMP, M_WAITOK | M_ZERO); - if (prop == NULL) { - error = ENOMEM; - break; - } error = copyinstr(&oprom->oprom_array, prop, len, &done); if (error != 0) break; buf = malloc(OPROMMAXPARAM, M_TEMP, M_WAITOK | M_ZERO); - if (buf == NULL) { - error = ENOMEM; - break; - } node = openprom_node; switch (cmd) { case OPROMGETPROP: diff --git a/sys/dev/sdhci/sdhci.c b/sys/dev/sdhci/sdhci.c index 52415e4..f3da5c2 100644 --- a/sys/dev/sdhci/sdhci.c +++ b/sys/dev/sdhci/sdhci.c @@ -1072,7 +1072,7 @@ sdhci_set_transfer_mode(struct sdhci_slot *slot, struct mmc_data *data) mode |= SDHCI_TRNS_MULTI; if (data->flags & MMC_DATA_READ) mode |= SDHCI_TRNS_READ; - if (slot->req->stop) + if (slot->req->stop && !(slot->quirks & SDHCI_QUIRK_BROKEN_AUTO_STOP)) mode |= SDHCI_TRNS_ACMD12; if (slot->flags & SDHCI_USE_DMA) mode |= SDHCI_TRNS_DMA; @@ -1317,7 +1317,8 @@ sdhci_finish_data(struct sdhci_slot *slot) slot->intmask |= SDHCI_INT_RESPONSE); } /* Unload rest of data from DMA buffer. */ - if (!slot->data_done && (slot->flags & SDHCI_USE_DMA)) { + if (!slot->data_done && (slot->flags & SDHCI_USE_DMA) && + slot->curcmd->data != NULL) { if (data->flags & MMC_DATA_READ) { left = data->len - slot->offset; bus_dmamap_sync(slot->dmatag, slot->dmamap, @@ -1355,17 +1356,18 @@ sdhci_start(struct sdhci_slot *slot) sdhci_start_command(slot, req->cmd); return; } -/* We don't need this until using Auto-CMD12 feature - if (!(slot->flags & STOP_STARTED) && req->stop) { + if ((slot->quirks & SDHCI_QUIRK_BROKEN_AUTO_STOP) && + !(slot->flags & STOP_STARTED) && req->stop) { slot->flags |= STOP_STARTED; sdhci_start_command(slot, req->stop); return; } -*/ if (sdhci_debug > 1) slot_printf(slot, "result: %d\n", req->cmd->error); if (!req->cmd->error && - (slot->quirks & SDHCI_QUIRK_RESET_AFTER_REQUEST)) { + ((slot->curcmd == req->stop && + (slot->quirks & SDHCI_QUIRK_BROKEN_AUTO_STOP)) || + (slot->quirks & SDHCI_QUIRK_RESET_AFTER_REQUEST))) { sdhci_reset(slot, SDHCI_RESET_CMD); sdhci_reset(slot, SDHCI_RESET_DATA); } diff --git a/sys/dev/sdhci/sdhci.h b/sys/dev/sdhci/sdhci.h index 0b29915..814f81e 100644 --- a/sys/dev/sdhci/sdhci.h +++ b/sys/dev/sdhci/sdhci.h @@ -87,6 +87,8 @@ #define SDHCI_QUIRK_CAPS_BIT63_FOR_MMC_HS400 (1 << 26) /* Controller support for SDHCI_CTRL2_PRESET_VALUE is broken. */ #define SDHCI_QUIRK_PRESET_VALUE_BROKEN (1 << 27) +/* Controller does not support or the support for ACMD12 is broken. */ +#define SDHCI_QUIRK_BROKEN_AUTO_STOP (1 << 28) /* * Controller registers diff --git a/sys/dev/sdhci/sdhci_fdt.c b/sys/dev/sdhci/sdhci_fdt.c index 2e1b30e..20ba0b7 100644 --- a/sys/dev/sdhci/sdhci_fdt.c +++ b/sys/dev/sdhci/sdhci_fdt.c @@ -57,10 +57,25 @@ __FBSDID("$FreeBSD$"); #include "mmcbr_if.h" #include "sdhci_if.h" -#define MAX_SLOTS 6 +#define MAX_SLOTS 6 +#define SDHCI_FDT_ARMADA38X 1 +#define SDHCI_FDT_GENERIC 2 +#define SDHCI_FDT_XLNX_ZY7 3 + +#define MV_SDIO3_CONF 0 +#define MV_SDIO3_CONF_FB_CLK (1 << 2) +#define MV_SDIO3_CONF_CLK_INV (1 << 0) + +static struct ofw_compat_data compat_data[] = { + { "marvell,armada-380-sdhci", SDHCI_FDT_ARMADA38X }, + { "sdhci_generic", SDHCI_FDT_GENERIC }, + { "xlnx,zy7_sdhci", SDHCI_FDT_XLNX_ZY7 }, + { NULL, 0 } +}; struct sdhci_fdt_softc { device_t dev; /* Controller device */ + u_int devid; /* Device ID */ u_int quirks; /* Chip specific quirks */ u_int caps; /* If we override SDHCI_CAPABILITIES */ uint32_t max_clk; /* Max possible freq */ @@ -70,6 +85,9 @@ struct sdhci_fdt_softc { int num_slots; /* Number of slots on this controller*/ struct sdhci_slot slots[MAX_SLOTS]; struct resource *mem_res[MAX_SLOTS]; /* Memory resource */ + + bool wp_inverted; /* WP pin is inverted */ + bool no_18v; /* No 1.8V support */ }; static uint8_t @@ -110,8 +128,15 @@ static uint32_t sdhci_fdt_read_4(device_t dev, struct sdhci_slot *slot, bus_size_t off) { struct sdhci_fdt_softc *sc = device_get_softc(dev); + uint32_t val32; - return (bus_read_4(sc->mem_res[slot->num], off)); + val32 = bus_read_4(sc->mem_res[slot->num], off); + if (off == SDHCI_CAPABILITIES && sc->no_18v) + val32 &= ~SDHCI_CAN_VDD_180; + if (sc->devid == SDHCI_FDT_ARMADA38X && off == SDHCI_CAPABILITIES2) + val32 &= ~(SDHCI_CAN_SDR104 | SDHCI_TUNE_SDR50); + + return (val32); } static void @@ -152,6 +177,33 @@ sdhci_fdt_intr(void *arg) } static int +sdhci_fdt_get_ro(device_t bus, device_t dev) +{ + struct sdhci_fdt_softc *sc = device_get_softc(bus); + + return (sdhci_generic_get_ro(bus, dev) ^ sc->wp_inverted); +} + +static void +sdhci_fdt_mv_init(device_t dev) +{ + int rid; + struct resource *res; + uint32_t reg; + + rid = 2; + res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); + if (res == NULL) { + device_printf(dev, "SDIO3-conf register not present.\n"); + return; + } + reg = bus_read_4(res, MV_SDIO3_CONF); + reg &= ~MV_SDIO3_CONF_CLK_INV; + bus_write_4(res, MV_SDIO3_CONF, reg); + bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(res), res); +} + +static int sdhci_fdt_probe(device_t dev) { struct sdhci_fdt_softc *sc = device_get_softc(dev); @@ -165,13 +217,22 @@ sdhci_fdt_probe(device_t dev) if (!ofw_bus_status_okay(dev)) return (ENXIO); - if (ofw_bus_is_compatible(dev, "sdhci_generic")) { + sc->devid = ofw_bus_search_compatible(dev, compat_data)->ocd_data; + switch (sc->devid) { + case SDHCI_FDT_ARMADA38X: + sc->quirks = SDHCI_QUIRK_BROKEN_AUTO_STOP; + device_set_desc(dev, "ARMADA38X SDHCI controller"); + break; + case SDHCI_FDT_GENERIC: device_set_desc(dev, "generic fdt SDHCI controller"); - } else if (ofw_bus_is_compatible(dev, "xlnx,zy7_sdhci")) { + break; + case SDHCI_FDT_XLNX_ZY7: sc->quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK; device_set_desc(dev, "Zynq-7000 generic fdt SDHCI controller"); - } else + break; + default: return (ENXIO); + } node = ofw_bus_get_node(dev); @@ -182,6 +243,10 @@ sdhci_fdt_probe(device_t dev) sc->num_slots = cid; if ((OF_getencprop(node, "max-frequency", &cid, sizeof(cid))) > 0) sc->max_clk = cid; + if (OF_hasprop(node, "no-1-8-v")) + sc->no_18v = true; + if (OF_hasprop(node, "wp-inverted")) + sc->wp_inverted = true; return (0); } @@ -231,6 +296,10 @@ sdhci_fdt_attach(device_t dev) } device_printf(dev, "%d slot(s) allocated\n", sc->num_slots); + /* Platform init. */ + if (sc->devid == SDHCI_FDT_ARMADA38X && sc->num_slots == 1) + sdhci_fdt_mv_init(dev); + /* Activate the interrupt */ err = bus_setup_intr(dev, sc->irq_res, INTR_TYPE_MISC | INTR_MPSAFE, NULL, sdhci_fdt_intr, sc, &sc->intrhand); @@ -279,7 +348,7 @@ static device_method_t sdhci_fdt_methods[] = { /* mmcbr_if */ DEVMETHOD(mmcbr_update_ios, sdhci_generic_update_ios), DEVMETHOD(mmcbr_request, sdhci_generic_request), - DEVMETHOD(mmcbr_get_ro, sdhci_generic_get_ro), + DEVMETHOD(mmcbr_get_ro, sdhci_fdt_get_ro), DEVMETHOD(mmcbr_acquire_host, sdhci_generic_acquire_host), DEVMETHOD(mmcbr_release_host, sdhci_generic_release_host), diff --git a/sys/dev/spibus/ofw_spibus.c b/sys/dev/spibus/ofw_spibus.c index c89db2c..a2922fd 100644 --- a/sys/dev/spibus/ofw_spibus.c +++ b/sys/dev/spibus/ofw_spibus.c @@ -80,6 +80,7 @@ ofw_spibus_attach(device_t dev) phandle_t child; pcell_t clock, paddr; device_t childdev; + uint32_t mode = SPIBUS_MODE_NONE; sc->dev = dev; @@ -103,6 +104,24 @@ ofw_spibus_attach(device_t dev) } /* + * Try to get the cpol/cpha mode + */ + if (OF_hasprop(child, "spi-cpol")) + mode = SPIBUS_MODE_CPOL; + if (OF_hasprop(child, "spi-cpha")) { + if (mode == SPIBUS_MODE_CPOL) + mode = SPIBUS_MODE_CPOL_CPHA; + else + mode = SPIBUS_MODE_CPHA; + } + + /* + * Try to get the CS polarity + */ + if (OF_hasprop(child, "spi-cs-high")) + paddr |= SPIBUS_CS_HIGH; + + /* * Get the maximum clock frequency for device, zero means * use the default bus speed. */ @@ -120,6 +139,7 @@ ofw_spibus_attach(device_t dev) continue; dinfo->opd_dinfo.cs = paddr; dinfo->opd_dinfo.clock = clock; + dinfo->opd_dinfo.mode = mode; if (ofw_bus_gen_setup_devinfo(&dinfo->opd_obdinfo, child) != 0) { free(dinfo, M_DEVBUF); diff --git a/sys/dev/spibus/spibusvar.h b/sys/dev/spibus/spibusvar.h index 33605f2..a3cdc92 100644 --- a/sys/dev/spibus/spibusvar.h +++ b/sys/dev/spibus/spibusvar.h @@ -46,6 +46,8 @@ struct spibus_ivar uint32_t clock; }; +#define SPIBUS_CS_HIGH (1U << 31) + enum { SPIBUS_IVAR_CS, /* chip select that we're on */ SPIBUS_IVAR_MODE, /* SPI mode (0-3) */ diff --git a/sys/dev/virtio/network/if_vtnetvar.h b/sys/dev/virtio/network/if_vtnetvar.h index 15436d9..de3525c 100644 --- a/sys/dev/virtio/network/if_vtnetvar.h +++ b/sys/dev/virtio/network/if_vtnetvar.h @@ -29,6 +29,10 @@ #ifndef _IF_VTNETVAR_H #define _IF_VTNETVAR_H +#ifdef ALTQ +#define VTNET_LEGACY_TX +#endif + struct vtnet_softc; struct vtnet_statistics { diff --git a/sys/dev/vmware/vmxnet3/if_vmxvar.h b/sys/dev/vmware/vmxnet3/if_vmxvar.h index 8613516..711e694 100644 --- a/sys/dev/vmware/vmxnet3/if_vmxvar.h +++ b/sys/dev/vmware/vmxnet3/if_vmxvar.h @@ -20,6 +20,10 @@ #ifndef _IF_VMXVAR_H #define _IF_VMXVAR_H +#ifdef ALTQ +#define VMXNET3_LEGACY_TX +#endif + struct vmxnet3_softc; struct vmxnet3_dma_alloc { @@ -131,7 +135,7 @@ struct vmxnet3_txq_stats { struct vmxnet3_txqueue { struct mtx vxtxq_mtx; struct vmxnet3_softc *vxtxq_sc; -#ifndef VMXNET3_TX_LEGACY +#ifndef VMXNET3_LEGACY_TX struct buf_ring *vxtxq_br; #endif int vxtxq_id; @@ -142,7 +146,7 @@ struct vmxnet3_txqueue { struct vmxnet3_txq_stats vxtxq_stats; struct vmxnet3_txq_shared *vxtxq_ts; struct sysctl_oid_list *vxtxq_sysctl; -#ifndef VMXNET3_TX_LEGACY +#ifndef VMXNET3_LEGACY_TX struct task vxtxq_defrtask; #endif char vxtxq_name[16]; diff --git a/sys/dev/xilinx/axi_quad_spi.c b/sys/dev/xilinx/axi_quad_spi.c index a9967c3..e18be94 100644 --- a/sys/dev/xilinx/axi_quad_spi.c +++ b/sys/dev/xilinx/axi_quad_spi.c @@ -193,6 +193,8 @@ spi_transfer(device_t dev, device_t child, struct spi_command *cmd) /* get the proper chip select */ spibus_get_cs(child, &cs); + cs &= ~SPIBUS_CS_HIGH; + /* Assert CS */ reg = READ4(sc, SPI_SSR); reg &= ~(1 << cs); diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c index d247fbf..1227d48 100644 --- a/sys/fs/tmpfs/tmpfs_subr.c +++ b/sys/fs/tmpfs/tmpfs_subr.c @@ -1394,13 +1394,10 @@ retry: goto retry; MPASS(m->valid == VM_PAGE_BITS_ALL); } else if (vm_pager_has_page(uobj, idx, NULL, NULL)) { - m = vm_page_alloc(uobj, idx, VM_ALLOC_NORMAL); - if (m == NULL) { - VM_OBJECT_WUNLOCK(uobj); - VM_WAIT; - VM_OBJECT_WLOCK(uobj); + m = vm_page_alloc(uobj, idx, VM_ALLOC_NORMAL | + VM_ALLOC_WAITFAIL); + if (m == NULL) goto retry; - } rv = vm_pager_get_pages(uobj, &m, 1, NULL, NULL); vm_page_lock(m); diff --git a/sys/i386/cloudabi32/cloudabi32_sysvec.c b/sys/i386/cloudabi32/cloudabi32_sysvec.c index c2a21fa..e50a19c 100644 --- a/sys/i386/cloudabi32/cloudabi32_sysvec.c +++ b/sys/i386/cloudabi32/cloudabi32_sysvec.c @@ -85,11 +85,15 @@ cloudabi32_proc_setregs(struct thread *td, struct image_params *imgp, } static int -cloudabi32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +cloudabi32_fetch_syscall_args(struct thread *td) { - struct trapframe *frame = td->td_frame; + struct trapframe *frame; + struct syscall_args *sa; int error; + frame = td->td_frame; + sa = &td->td_sa; + /* Obtain system call number. */ sa->code = frame->tf_eax; if (sa->code >= CLOUDABI32_SYS_MAXSYSCALL) diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC index deccf48..3680e43 100644 --- a/sys/i386/conf/GENERIC +++ b/sys/i386/conf/GENERIC @@ -31,6 +31,7 @@ options PREEMPTION # Enable kernel thread preemption options INET # InterNETworking options INET6 # IPv6 communications protocols options IPSEC # IP (v4/v6) security +options IPSEC_SUPPORT # Allow kldload of ipsec and tcpmd5 options TCP_OFFLOAD # TCP offload options SCTP # Stream Control Transmission Protocol options FFS # Berkeley Fast Filesystem diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index 24c1fe8..f082748 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -2472,6 +2472,8 @@ init386(int first) else init_static_kenv(NULL, 0); + identify_hypervisor(); + /* Init basic tunables, hz etc */ init_param1(); @@ -3136,7 +3138,6 @@ static int set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, size_t xfpustate_len) { - union savefpu *fpstate; int error; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) @@ -3150,10 +3151,8 @@ set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, error = 0; } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { - fpstate = (union savefpu *)&mcp->mc_fpstate; - if (cpu_fxsr) - fpstate->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask; - error = npxsetregs(td, fpstate, xfpustate, xfpustate_len); + error = npxsetregs(td, (union savefpu *)&mcp->mc_fpstate, + xfpustate, xfpustate_len); } else return (EINVAL); return (error); diff --git a/sys/i386/i386/support.s b/sys/i386/i386/support.s index feffc15..334e5bd 100644 --- a/sys/i386/i386/support.s +++ b/sys/i386/i386/support.s @@ -50,7 +50,6 @@ ENTRY(bzero) movl 12(%esp),%ecx xorl %eax,%eax shrl $2,%ecx - cld rep stosl movl 12(%esp),%ecx @@ -83,7 +82,6 @@ ENTRY(i686_pagezero) movl 12(%esp),%edi movl $1024,%ecx - cld ALIGN_TEXT 1: @@ -134,7 +132,6 @@ ENTRY(fillw) movl 8(%esp),%eax movl 12(%esp),%edi movl 16(%esp),%ecx - cld rep stosw popl %edi @@ -151,7 +148,6 @@ ENTRY(bcopyb) subl %esi,%eax cmpl %ecx,%eax /* overlapping && src < dst? */ jb 1f - cld /* nope, copy forwards */ rep movsb popl %edi @@ -192,7 +188,6 @@ ENTRY(bcopy) jb 1f shrl $2,%ecx /* copy by 32-bit words */ - cld /* nope, copy forwards */ rep movsl movl 16(%ebp),%ecx @@ -238,7 +233,6 @@ ENTRY(memcpy) movl 20(%esp),%ecx movl %edi,%eax shrl $2,%ecx /* copy by 32-bit words */ - cld /* nope, copy forwards */ rep movsl movl 20(%esp),%ecx @@ -284,12 +278,11 @@ ENTRY(copyout) jz done_copyout /* - * Check explicitly for non-user addresses. If 486 write protection - * is being used, this check is essential because we are in kernel - * mode so the h/w does not provide any protection against writing - * kernel addresses. + * Check explicitly for non-user addresses. This check is essential + * because it prevents usermode from writing into the kernel. We do + * not verify anywhere else that the user did not specify a rogue + * address. */ - /* * First, prevent address wrapping. */ @@ -309,7 +302,6 @@ ENTRY(copyout) movl %ebx,%ecx shrl $2,%ecx - cld rep movsl movb %bl,%cl @@ -360,7 +352,6 @@ ENTRY(copyin) movb %cl,%al shrl $2,%ecx /* copy longword-wise */ - cld rep movsl movb %al,%cl @@ -577,7 +568,6 @@ ENTRY(copyinstr) movl %eax,20(%esp) 1: incl %edx - cld 2: decl %edx @@ -630,7 +620,6 @@ ENTRY(copystr) movl 16(%esp),%edi /* %edi = to */ movl 20(%esp),%edx /* %edx = maxlen */ incl %edx - cld 1: decl %edx jz 4f @@ -670,7 +659,6 @@ ENTRY(bcmp) movl %edx,%ecx shrl $2,%ecx - cld /* compare forwards */ repe cmpsl jne 1f diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index 3b8e8ed..7fa94f4 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -457,7 +457,7 @@ user_trctrap_out: goto userout; #else /* !POWERFAIL_NMI */ nmi_handle_intr(type, frame); - break; + goto out; #endif /* POWERFAIL_NMI */ #endif /* DEV_ISA */ @@ -501,7 +501,7 @@ user_trctrap_out: if (dtrace_return_probe_ptr != NULL && dtrace_return_probe_ptr(®s) == 0) goto out; - break; + goto userout; #endif } } else { @@ -1042,16 +1042,18 @@ dblfault_handler() } int -cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +cpu_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; + struct syscall_args *sa; caddr_t params; long tmp; int error; p = td->td_proc; frame = td->td_frame; + sa = &td->td_sa; params = (caddr_t)frame->tf_esp + sizeof(int); sa->code = frame->tf_eax; @@ -1112,7 +1114,6 @@ void syscall(struct trapframe *frame) { struct thread *td; - struct syscall_args sa; register_t orig_tf_eflags; int error; ksiginfo_t ksi; @@ -1129,7 +1130,7 @@ syscall(struct trapframe *frame) td = curthread; td->td_frame = frame; - error = syscallenter(td, &sa); + error = syscallenter(td); /* * Traced syscall. @@ -1145,10 +1146,10 @@ syscall(struct trapframe *frame) KASSERT(PCB_USER_FPU(td->td_pcb), ("System call %s returning with kernel FPU ctx leaked", - syscallname(td->td_proc, sa.code))); + syscallname(td->td_proc, td->td_sa.code))); KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td), ("System call %s returning with mangled pcb_save", - syscallname(td->td_proc, sa.code))); + syscallname(td->td_proc, td->td_sa.code))); - syscallret(td, error, &sa); + syscallret(td, error); } diff --git a/sys/i386/include/md_var.h b/sys/i386/include/md_var.h index 337f541..a0d0e53 100644 --- a/sys/i386/include/md_var.h +++ b/sys/i386/include/md_var.h @@ -59,7 +59,6 @@ void doreti_popl_es(void) __asm(__STRING(doreti_popl_es)); void doreti_popl_es_fault(void) __asm(__STRING(doreti_popl_es_fault)); void doreti_popl_fs(void) __asm(__STRING(doreti_popl_fs)); void doreti_popl_fs_fault(void) __asm(__STRING(doreti_popl_fs_fault)); -void finishidentcpu(void); void fill_based_sd(struct segment_descriptor *sdp, uint32_t base); void i686_pagezero(void *addr); void sse2_pagezero(void *addr); diff --git a/sys/i386/include/proc.h b/sys/i386/include/proc.h index 14f03b7..8fe7a6c 100644 --- a/sys/i386/include/proc.h +++ b/sys/i386/include/proc.h @@ -60,6 +60,13 @@ struct mdproc { #define KINFO_PROC_SIZE 768 +struct syscall_args { + u_int code; + struct sysent *callp; + register_t args[8]; + int narg; +}; + #ifdef _KERNEL /* Get the current kernel thread stack usage. */ @@ -77,13 +84,6 @@ void user_ldt_free(struct thread *); void user_ldt_deref(struct proc_ldt *pldt); extern struct mtx dt_lock; - -struct syscall_args { - u_int code; - struct sysent *callp; - register_t args[8]; - int narg; -}; #endif /* _KERNEL */ #endif /* !_MACHINE_PROC_H_ */ diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c index 5ef616b2..2f15cb8 100644 --- a/sys/i386/isa/npx.c +++ b/sys/i386/isa/npx.c @@ -1053,6 +1053,8 @@ npxsetregs(struct thread *td, union savefpu *addr, char *xfpustate, if (!hw_float) return (ENXIO); + if (cpu_fxsr) + addr->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask; pcb = td->td_pcb; critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { diff --git a/sys/i386/linux/linux_sysvec.c b/sys/i386/linux/linux_sysvec.c index 49fafdc..a4919a0 100644 --- a/sys/i386/linux/linux_sysvec.c +++ b/sys/i386/linux/linux_sysvec.c @@ -850,13 +850,15 @@ linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) } static int -linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +linux_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; + struct syscall_args *sa; p = td->td_proc; frame = td->td_frame; + sa = &td->td_sa; sa->code = frame->tf_eax; sa->args[0] = frame->tf_ebx; diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index a1d9846..2599ff4 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -369,8 +369,7 @@ SYSINIT(diagwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 2, #endif static int -null_fetch_syscall_args(struct thread *td __unused, - struct syscall_args *sa __unused) +null_fetch_syscall_args(struct thread *td __unused) { panic("null_fetch_syscall_args"); diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index c7680aa..b63bc65 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -478,6 +478,7 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread * bcopy(&td->td_startcopy, &td2->td_startcopy, __rangeof(struct thread, td_startcopy, td_endcopy)); + td2->td_sa = td->td_sa; bcopy(&p2->p_comm, &td2->td_name, sizeof(td2->td_name)); td2->td_sigstk = td->td_sigstk; @@ -1103,7 +1104,7 @@ fork_return(struct thread *td, struct trapframe *frame) */ PROC_LOCK(p); td->td_dbgflags |= TDB_SCX; - _STOPEVENT(p, S_SCX, td->td_dbg_sc_code); + _STOPEVENT(p, S_SCX, td->td_sa.code); if ((p->p_ptevents & PTRACE_SCX) != 0 || (td->td_dbgflags & TDB_BORN) != 0) ptracestop(td, SIGTRAP, NULL); diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c index 520bf97..69c1140 100644 --- a/sys/kern/kern_kthread.c +++ b/sys/kern/kern_kthread.c @@ -276,6 +276,7 @@ kthread_add(void (*func)(void *), void *arg, struct proc *p, newtd->td_sleeptimo = 0; bcopy(&oldtd->td_startcopy, &newtd->td_startcopy, __rangeof(struct thread, td_startcopy, td_endcopy)); + newtd->td_sa = oldtd->td_sa; /* set up arg0 for 'ps', et al */ va_start(ap, fmt); diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index eb841c8..24c20a0 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -131,9 +131,9 @@ struct pgrphashhead *pgrphashtbl; u_long pgrphash; struct proclist allproc; struct proclist zombproc; -struct sx allproc_lock; -struct sx proctree_lock; -struct mtx ppeers_lock; +struct sx __exclusive_cache_line allproc_lock; +struct sx __exclusive_cache_line proctree_lock; +struct mtx __exclusive_cache_line ppeers_lock; uma_zone_t proc_zone; /* diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c index 973e476..aafa697 100644 --- a/sys/kern/kern_sendfile.c +++ b/sys/kern/kern_sendfile.c @@ -308,7 +308,7 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, off_t off, off_t len, int npages, int rhpages, int flags) { vm_page_t *pa = sfio->pa; - int nios; + int grabbed, nios; nios = 0; flags = (flags & SF_NODISKIO) ? VM_ALLOC_NOWAIT : 0; @@ -318,14 +318,14 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, off_t off, off_t len, * only required pages. Readahead pages are dealt with later. */ VM_OBJECT_WLOCK(obj); - for (int i = 0; i < npages; i++) { - pa[i] = vm_page_grab(obj, OFF_TO_IDX(vmoff(i, off)), - VM_ALLOC_WIRED | VM_ALLOC_NORMAL | flags); - if (pa[i] == NULL) { - npages = i; - rhpages = 0; - break; - } + + grabbed = vm_page_grab_pages(obj, OFF_TO_IDX(off), + VM_ALLOC_NORMAL | VM_ALLOC_WIRED | flags, pa, npages); + if (grabbed < npages) { + for (int i = grabbed; i < npages; i++) + pa[i] = NULL; + npages = grabbed; + rhpages = 0; } for (int i = 0; i < npages;) { diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index 829f955..6182615 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -88,7 +88,7 @@ static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer"); * sysctl requests larger than a single page via an exclusive lock. */ static struct rmlock sysctllock; -static struct sx sysctlmemlock; +static struct sx __exclusive_cache_line sysctlmemlock; #define SYSCTL_WLOCK() rm_wlock(&sysctllock) #define SYSCTL_WUNLOCK() rm_wunlock(&sysctllock) diff --git a/sys/kern/kern_thr.c b/sys/kern/kern_thr.c index c39986d..6bebac9 100644 --- a/sys/kern/kern_thr.c +++ b/sys/kern/kern_thr.c @@ -235,6 +235,7 @@ thread_create(struct thread *td, struct rtprio *rtp, newtd->td_sleeptimo = 0; bcopy(&td->td_startcopy, &newtd->td_startcopy, __rangeof(struct thread, td_startcopy, td_endcopy)); + newtd->td_sa = td->td_sa; newtd->td_proc = td->td_proc; newtd->td_rb_list = newtd->td_rbp_list = newtd->td_rb_inact = 0; thread_cow_get(newtd, td); diff --git a/sys/kern/subr_autoconf.c b/sys/kern/subr_autoconf.c index 6384056..53fad81 100644 --- a/sys/kern/subr_autoconf.c +++ b/sys/kern/subr_autoconf.c @@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$"); #include <sys/kernel.h> #include <sys/linker.h> #include <sys/lock.h> +#include <sys/malloc.h> #include <sys/mutex.h> #include <sys/systm.h> @@ -63,6 +64,27 @@ MTX_SYSINIT(intr_config_hook, &intr_config_hook_lock, "intr config", MTX_DEF); static void run_interrupt_driven_config_hooks(void); /* + * Private data and a shim function for implementing config_interhook_oneshot(). + */ +struct oneshot_config_hook { + struct intr_config_hook + och_hook; /* Must be first */ + ich_func_t och_func; + void *och_arg; +}; + +static void +config_intrhook_oneshot_func(void *arg) +{ + struct oneshot_config_hook *ohook; + + ohook = arg; + ohook->och_func(ohook->och_arg); + config_intrhook_disestablish(&ohook->och_hook); + free(ohook, M_DEVBUF); +} + +/* * If we wait too long for an interrupt-driven config hook to return, print * a diagnostic. */ @@ -183,6 +205,22 @@ config_intrhook_establish(struct intr_config_hook *hook) return (0); } +/* + * Register a hook function that is automatically unregistered after it runs. + */ +void +config_intrhook_oneshot(ich_func_t func, void *arg) +{ + struct oneshot_config_hook *ohook; + + ohook = malloc(sizeof(*ohook), M_DEVBUF, M_WAITOK); + ohook->och_func = func; + ohook->och_arg = arg; + ohook->och_hook.ich_func = config_intrhook_oneshot_func; + ohook->och_hook.ich_arg = ohook; + config_intrhook_establish(&ohook->och_hook); +} + void config_intrhook_disestablish(struct intr_config_hook *hook) { diff --git a/sys/kern/subr_pctrie.c b/sys/kern/subr_pctrie.c index b538f5d..e92d26c 100644 --- a/sys/kern/subr_pctrie.c +++ b/sys/kern/subr_pctrie.c @@ -58,18 +58,6 @@ __FBSDID("$FreeBSD$"); #include <ddb/ddb.h> #endif -/* - * These widths should allow the pointers to a node's children to fit within - * a single cache line. The extra levels from a narrow width should not be - * a problem thanks to path compression. - */ -#ifdef __LP64__ -#define PCTRIE_WIDTH 4 -#else -#define PCTRIE_WIDTH 3 -#endif - -#define PCTRIE_COUNT (1 << PCTRIE_WIDTH) #define PCTRIE_MASK (PCTRIE_COUNT - 1) #define PCTRIE_LIMIT (howmany(sizeof(uint64_t) * NBBY, PCTRIE_WIDTH) - 1) diff --git a/sys/kern/subr_syscall.c b/sys/kern/subr_syscall.c index 2bcad34..0182884 100644 --- a/sys/kern/subr_syscall.c +++ b/sys/kern/subr_syscall.c @@ -52,13 +52,15 @@ __FBSDID("$FreeBSD$"); #include <security/audit/audit.h> static inline int -syscallenter(struct thread *td, struct syscall_args *sa) +syscallenter(struct thread *td) { struct proc *p; + struct syscall_args *sa; int error, traced; PCPU_INC(cnt.v_syscall); p = td->td_proc; + sa = &td->td_sa; td->td_pticks = 0; if (td->td_cowgen != p->p_cowgen) @@ -71,7 +73,7 @@ syscallenter(struct thread *td, struct syscall_args *sa) td->td_dbgflags |= TDB_SCE; PROC_UNLOCK(p); } - error = (p->p_sysent->sv_fetch_syscall_args)(td, sa); + error = (p->p_sysent->sv_fetch_syscall_args)(td); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) ktrsyscall(sa->code, sa->narg, sa->args); @@ -85,8 +87,6 @@ syscallenter(struct thread *td, struct syscall_args *sa) STOPEVENT(p, S_SCE, sa->narg); if (p->p_flag & P_TRACED) { PROC_LOCK(p); - td->td_dbg_sc_code = sa->code; - td->td_dbg_sc_narg = sa->narg; if (p->p_ptevents & PTRACE_SCE) ptracestop((td), SIGTRAP, NULL); PROC_UNLOCK(p); @@ -96,11 +96,7 @@ syscallenter(struct thread *td, struct syscall_args *sa) * Reread syscall number and arguments if * debugger modified registers or memory. */ - error = (p->p_sysent->sv_fetch_syscall_args)(td, sa); - PROC_LOCK(p); - td->td_dbg_sc_code = sa->code; - td->td_dbg_sc_narg = sa->narg; - PROC_UNLOCK(p); + error = (p->p_sysent->sv_fetch_syscall_args)(td); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) ktrsyscall(sa->code, sa->narg, sa->args); @@ -162,9 +158,10 @@ syscallenter(struct thread *td, struct syscall_args *sa) } static inline void -syscallret(struct thread *td, int error, struct syscall_args *sa) +syscallret(struct thread *td, int error) { struct proc *p, *p2; + struct syscall_args *sa; ksiginfo_t ksi; int traced, error1; @@ -172,6 +169,7 @@ syscallret(struct thread *td, int error, struct syscall_args *sa) ("fork() did not clear TDP_FORKING upon completion")); p = td->td_proc; + sa = &td->td_sa; if ((trap_enotcap || (p->p_flag2 & P2_TRAPCAP) != 0) && IN_CAPABILITY_MODE(td)) { error1 = (td->td_pflags & TDP_NERRNO) == 0 ? error : diff --git a/sys/kern/subr_terminal.c b/sys/kern/subr_terminal.c index 76c6cfb..5ac6a03 100644 --- a/sys/kern/subr_terminal.c +++ b/sys/kern/subr_terminal.c @@ -375,7 +375,10 @@ termtty_outwakeup(struct tty *tp) TERMINAL_UNLOCK_TTY(tm); } - tm->tm_class->tc_done(tm); + TERMINAL_LOCK_TTY(tm); + if (!(tm->tm_flags & TF_MUTE)) + tm->tm_class->tc_done(tm); + TERMINAL_UNLOCK_TTY(tm); if (flags & TF_BELL) tm->tm_class->tc_bell(tm); } @@ -545,10 +548,9 @@ termcn_cnputc(struct consdev *cp, int c) teken_set_curattr(&tm->tm_emulator, &kernel_message); teken_input(&tm->tm_emulator, &cv, 1); teken_set_curattr(&tm->tm_emulator, &backup); + tm->tm_class->tc_done(tm); } TERMINAL_UNLOCK_CONS(tm); - - tm->tm_class->tc_done(tm); } /* diff --git a/sys/kern/subr_vmem.c b/sys/kern/subr_vmem.c index 1de6378..4e3f04e 100644 --- a/sys/kern/subr_vmem.c +++ b/sys/kern/subr_vmem.c @@ -181,7 +181,7 @@ static struct callout vmem_periodic_ch; static int vmem_periodic_interval; static struct task vmem_periodic_wk; -static struct mtx_padalign vmem_list_lock; +static struct mtx_padalign __exclusive_cache_line vmem_list_lock; static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list); /* ---- misc */ @@ -580,7 +580,7 @@ qc_drain(vmem_t *vm) #ifndef UMA_MD_SMALL_ALLOC -static struct mtx_padalign vmem_bt_lock; +static struct mtx_padalign __exclusive_cache_line vmem_bt_lock; /* * vmem_bt_alloc: Allocate a new page of boundary tags. diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c index a8513c9..e0c4806 100644 --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -1347,8 +1347,8 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data) pl->pl_siglist = td2->td_siglist; strcpy(pl->pl_tdname, td2->td_name); if ((td2->td_dbgflags & (TDB_SCE | TDB_SCX)) != 0) { - pl->pl_syscall_code = td2->td_dbg_sc_code; - pl->pl_syscall_narg = td2->td_dbg_sc_narg; + pl->pl_syscall_code = td2->td_sa.code; + pl->pl_syscall_narg = td2->td_sa.narg; } else { pl->pl_syscall_code = 0; pl->pl_syscall_narg = 0; diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c index 5228253..d4155c6 100644 --- a/sys/kern/uipc_shm.c +++ b/sys/kern/uipc_shm.c @@ -449,13 +449,10 @@ retry: if (vm_page_sleep_if_busy(m, "shmtrc")) goto retry; } else if (vm_pager_has_page(object, idx, NULL, NULL)) { - m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL); - if (m == NULL) { - VM_OBJECT_WUNLOCK(object); - VM_WAIT; - VM_OBJECT_WLOCK(object); + m = vm_page_alloc(object, idx, + VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL); + if (m == NULL) goto retry; - } rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); vm_page_lock(m); diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 58481a8..7909e6b 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -249,23 +249,23 @@ SYSCTL_INT(_vfs, OID_AUTO, unmapped_buf_allowed, CTLFLAG_RD, /* * This lock synchronizes access to bd_request. */ -static struct mtx_padalign bdlock; +static struct mtx_padalign __exclusive_cache_line bdlock; /* * This lock protects the runningbufreq and synchronizes runningbufwakeup and * waitrunningbufspace(). */ -static struct mtx_padalign rbreqlock; +static struct mtx_padalign __exclusive_cache_line rbreqlock; /* * Lock that protects needsbuffer and the sleeps/wakeups surrounding it. */ -static struct rwlock_padalign nblock; +static struct rwlock_padalign __exclusive_cache_line nblock; /* * Lock that protects bdirtywait. */ -static struct mtx_padalign bdirtylock; +static struct mtx_padalign __exclusive_cache_line bdirtylock; /* * Wakeup point for bufdaemon, as well as indicator of whether it is already @@ -344,7 +344,7 @@ static int bq_len[BUFFER_QUEUES]; /* * Lock for each bufqueue */ -static struct mtx_padalign bqlocks[BUFFER_QUEUES]; +static struct mtx_padalign __exclusive_cache_line bqlocks[BUFFER_QUEUES]; /* * per-cpu empty buffer cache. @@ -2716,7 +2716,7 @@ vfs_vmio_extend(struct buf *bp, int desiredpages, int size) */ obj = bp->b_bufobj->bo_object; VM_OBJECT_WLOCK(obj); - while (bp->b_npages < desiredpages) { + if (bp->b_npages < desiredpages) { /* * We must allocate system pages since blocking * here could interfere with paging I/O, no @@ -2727,14 +2727,12 @@ vfs_vmio_extend(struct buf *bp, int desiredpages, int size) * deadlocks once allocbuf() is called after * pages are vfs_busy_pages(). */ - m = vm_page_grab(obj, OFF_TO_IDX(bp->b_offset) + bp->b_npages, - VM_ALLOC_NOBUSY | VM_ALLOC_SYSTEM | - VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY | - VM_ALLOC_COUNT(desiredpages - bp->b_npages)); - if (m->valid == 0) - bp->b_flags &= ~B_CACHE; - bp->b_pages[bp->b_npages] = m; - ++bp->b_npages; + (void)vm_page_grab_pages(obj, + OFF_TO_IDX(bp->b_offset) + bp->b_npages, + VM_ALLOC_SYSTEM | VM_ALLOC_IGN_SBUSY | + VM_ALLOC_NOBUSY | VM_ALLOC_WIRED, + &bp->b_pages[bp->b_npages], desiredpages - bp->b_npages); + bp->b_npages = desiredpages; } /* @@ -4442,18 +4440,14 @@ vm_hold_load_pages(struct buf *bp, vm_offset_t from, vm_offset_t to) index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT; for (pg = from; pg < to; pg += PAGE_SIZE, index++) { -tryagain: /* * note: must allocate system pages since blocking here * could interfere with paging I/O, no matter which * process we are. */ p = vm_page_alloc(NULL, 0, VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ | - VM_ALLOC_WIRED | VM_ALLOC_COUNT((to - pg) >> PAGE_SHIFT)); - if (p == NULL) { - VM_WAIT; - goto tryagain; - } + VM_ALLOC_WIRED | VM_ALLOC_COUNT((to - pg) >> PAGE_SHIFT) | + VM_ALLOC_WAITOK); pmap_qenter(pg, &p, 1); bp->b_pages[index] = p; } diff --git a/sys/mips/atheros/ar71xx_spi.c b/sys/mips/atheros/ar71xx_spi.c index aa2cf21..1b722d4 100644 --- a/sys/mips/atheros/ar71xx_spi.c +++ b/sys/mips/atheros/ar71xx_spi.c @@ -212,6 +212,8 @@ ar71xx_spi_transfer(device_t dev, device_t child, struct spi_command *cmd) spibus_get_cs(child, &cs); + cs &= ~SPIBUS_CS_HIGH; + ar71xx_spi_chip_activate(sc, cs); KASSERT(cmd->tx_cmd_sz == cmd->rx_cmd_sz, diff --git a/sys/mips/include/proc.h b/sys/mips/include/proc.h index efd5e79..da88d2a 100644 --- a/sys/mips/include/proc.h +++ b/sys/mips/include/proc.h @@ -79,7 +79,6 @@ struct mdproc { /* empty */ }; -#ifdef _KERNEL struct syscall_args { u_int code; struct sysent *callp; @@ -87,7 +86,6 @@ struct syscall_args { int narg; struct trapframe *trapframe; }; -#endif #ifdef __mips_n64 #define KINFO_PROC_SIZE 1088 diff --git a/sys/mips/mediatek/mtk_spi_v1.c b/sys/mips/mediatek/mtk_spi_v1.c index e50b025..4e37890 100644 --- a/sys/mips/mediatek/mtk_spi_v1.c +++ b/sys/mips/mediatek/mtk_spi_v1.c @@ -231,6 +231,8 @@ mtk_spi_transfer(device_t dev, device_t child, struct spi_command *cmd) spibus_get_cs(child, &cs); + cs &= ~SPIBUS_CS_HIGH; + if (cs != 0) /* Only 1 CS */ return (ENXIO); diff --git a/sys/mips/mediatek/mtk_spi_v2.c b/sys/mips/mediatek/mtk_spi_v2.c index ad55cb3..02fbd77 100644 --- a/sys/mips/mediatek/mtk_spi_v2.c +++ b/sys/mips/mediatek/mtk_spi_v2.c @@ -236,6 +236,8 @@ mtk_spi_transfer(device_t dev, device_t child, struct spi_command *cmd) spibus_get_cs(child, &cs); + cs &= ~SPIBUS_CS_HIGH; + if (cs != 0) /* Only 1 CS */ return (ENXIO); diff --git a/sys/mips/mips/trap.c b/sys/mips/mips/trap.c index f98fc97..396f69b 100644 --- a/sys/mips/mips/trap.c +++ b/sys/mips/mips/trap.c @@ -339,12 +339,16 @@ static int emulate_unaligned_access(struct trapframe *frame, int mode); extern void fswintrberr(void); /* XXX */ int -cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +cpu_fetch_syscall_args(struct thread *td) { - struct trapframe *locr0 = td->td_frame; + struct trapframe *locr0; struct sysentvec *se; + struct syscall_args *sa; int error, nsaved; + locr0 = td->td_frame; + sa = &td->td_sa; + bzero(sa->args, sizeof(sa->args)); /* compute next PC after syscall instruction */ @@ -786,19 +790,18 @@ dofault: case T_SYSCALL + T_USER: { - struct syscall_args sa; int error; - sa.trapframe = trapframe; - error = syscallenter(td, &sa); + td->td_sa.trapframe = trapframe; + error = syscallenter(td); #if !defined(SMP) && (defined(DDB) || defined(DEBUG)) if (trp == trapdebug) - trapdebug[TRAPSIZE - 1].code = sa.code; + trapdebug[TRAPSIZE - 1].code = td->td_sa.code; else - trp[-1].code = sa.code; + trp[-1].code = td->td_sa.code; #endif - trapdebug_enter(td->td_frame, -sa.code); + trapdebug_enter(td->td_frame, -td->td_sa.code); /* * The sync'ing of I & D caches for SYS_ptrace() is @@ -806,7 +809,7 @@ dofault: * instead of being done here under a special check * for SYS_ptrace(). */ - syscallret(td, error, &sa); + syscallret(td, error); return (trapframe->pc); } diff --git a/sys/mips/mips/uma_machdep.c b/sys/mips/mips/uma_machdep.c index b4006e1..bc32075 100644 --- a/sys/mips/mips/uma_machdep.c +++ b/sys/mips/mips/uma_machdep.c @@ -50,6 +50,10 @@ uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) *flags = UMA_SLAB_PRIV; pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED; +#ifndef __mips_n64 + pflags &= ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL); + pflags |= VM_ALLOC_NOWAIT; +#endif for (;;) { m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, pflags); diff --git a/sys/mips/rt305x/rt305x_spi.c b/sys/mips/rt305x/rt305x_spi.c index 17126d9..2fd99fa 100644 --- a/sys/mips/rt305x/rt305x_spi.c +++ b/sys/mips/rt305x/rt305x_spi.c @@ -226,6 +226,8 @@ rt305x_spi_transfer(device_t dev, device_t child, struct spi_command *cmd) spibus_get_cs(child, &cs); + cs &= ~SPIBUS_CS_HIGH; + if (cs != 0) /* Only 1 CS */ return (ENXIO); diff --git a/sys/modules/cpsw/Makefile b/sys/modules/cpsw/Makefile index 130a659..d3e34bf 100644 --- a/sys/modules/cpsw/Makefile +++ b/sys/modules/cpsw/Makefile @@ -3,6 +3,14 @@ .PATH: ${SRCTOP}/sys/arm/ti/cpsw KMOD= if_cpsw -SRCS= if_cpsw.c device_if.h bus_if.h ofw_bus_if.h miibus_if.h +SRCS= if_cpsw.c device_if.h bus_if.h ofw_bus_if.h miibus_if.h opt_cpsw.h + +.if !defined(OPT_CPSW_ETHERSWITCH) && defined(KERNBUILDDIR) +OPT_CPSW_ETHERSWITCH!= sed -n '/CPSW_ETHERSWITCH/p' ${KERNBUILDDIR}/opt_cpsw.h +.endif + +.if !empty(OPT_CPSW_ETHERSWITCH) +SRCS+= etherswitch_if.h +.endif .include <bsd.kmod.mk> diff --git a/sys/modules/dtb/am335x/Makefile b/sys/modules/dtb/am335x/Makefile index b60f4a9..f855b51 100644 --- a/sys/modules/dtb/am335x/Makefile +++ b/sys/modules/dtb/am335x/Makefile @@ -3,6 +3,7 @@ DTS= \ beaglebone.dts \ beaglebone-black.dts \ + ubmc.dts \ ufw.dts .include <bsd.dtb.mk> diff --git a/sys/modules/dtb/mv/Makefile b/sys/modules/dtb/mv/Makefile new file mode 100644 index 0000000..d028cd8 --- /dev/null +++ b/sys/modules/dtb/mv/Makefile @@ -0,0 +1,8 @@ +# $FreeBSD$ +# All the dts files for Marvell systems we support. +DTS= \ + armada-385-80300-0148-G00-X100.dts \ + armada-388-clearfog.dts \ + armada-388-gp.dts + +.include <bsd.dtb.mk> diff --git a/sys/net/altq/altq_cbq.h b/sys/net/altq/altq_cbq.h index 51e7cf9..68559e2 100644 --- a/sys/net/altq/altq_cbq.h +++ b/sys/net/altq/altq_cbq.h @@ -190,7 +190,7 @@ struct cbq_getstats { #define CBQ_TIMEOUT 10 #define CBQ_LS_TIMEOUT (20 * hz / 1000) -#define CBQ_MAX_CLASSES 256 +#define CBQ_MAX_CLASSES 2048 #ifdef ALTQ3_COMPAT #define CBQ_MAX_FILTERS 256 diff --git a/sys/net/altq/altq_hfsc.h b/sys/net/altq/altq_hfsc.h index de5e89b..78521f8 100644 --- a/sys/net/altq/altq_hfsc.h +++ b/sys/net/altq/altq_hfsc.h @@ -51,7 +51,7 @@ struct service_curve { /* special class handles */ #define HFSC_NULLCLASS_HANDLE 0 -#define HFSC_MAX_CLASSES 64 +#define HFSC_MAX_CLASSES 2048 /* hfsc class flags */ #define HFCF_RED 0x0001 /* use RED */ diff --git a/sys/net/if.c b/sys/net/if.c index 85fe24e..6e897d0 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -144,7 +144,7 @@ int (*carp_output_p)(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa); int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *); int (*carp_attach_p)(struct ifaddr *, int); -void (*carp_detach_p)(struct ifaddr *); +void (*carp_detach_p)(struct ifaddr *, bool); #endif #ifdef INET int (*carp_iamatch_p)(struct ifaddr *, uint8_t **); diff --git a/sys/net/if.h b/sys/net/if.h index 98ae0a8..5da596a 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -249,7 +249,7 @@ struct if_data { #define IFCAP_CANTCHANGE (IFCAP_NETMAP) -#define IFQ_MAXLEN 50 +#define IFQ_MAXLEN 128 #define IFNET_SLOWHZ 1 /* granularity is 1 second */ /* diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index 3fbb842..2a614cb 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -246,8 +246,7 @@ static void bridge_ifdetach(void *arg __unused, struct ifnet *); static void bridge_init(void *); static void bridge_dummynet(struct mbuf *, struct ifnet *); static void bridge_stop(struct ifnet *, int); -static int bridge_transmit(struct ifnet *, struct mbuf *); -static void bridge_qflush(struct ifnet *); +static void bridge_start(struct ifnet *); static struct mbuf *bridge_input(struct ifnet *, struct mbuf *); static int bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); @@ -408,7 +407,7 @@ SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac, static VNET_DEFINE(int, allow_llz_overlap) = 0; #define V_allow_llz_overlap VNET(allow_llz_overlap) SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap, - CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(allow_llz_overlap), 0, + CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(allow_llz_overlap), 0, "Allow overlap of link-local scope " "zones of a bridge interface and the member interfaces"); @@ -584,6 +583,7 @@ static moduledata_t bridge_mod = { }; DECLARE_MODULE(if_bridge, bridge_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); +MODULE_VERSION(if_bridge, 1); MODULE_DEPEND(if_bridge, bridgestp, 1, 1, 1); /* @@ -657,10 +657,12 @@ bridge_clone_create(struct if_clone *ifc, int unit, caddr_t params) if_initname(ifp, bridge_name, unit); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = bridge_ioctl; - ifp->if_transmit = bridge_transmit; - ifp->if_qflush = bridge_qflush; + ifp->if_start = bridge_start; ifp->if_init = bridge_init; ifp->if_type = IFT_BRIDGE; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = 0; + IFQ_SET_READY(&ifp->if_snd); /* * Generate an ethernet address with a locally administered address. @@ -745,6 +747,7 @@ bridge_clone_destroy(struct ifnet *ifp) BRIDGE_LIST_UNLOCK(); bstp_detach(&sc->sc_stp); + IFQ_PURGE(&ifp->if_snd); ether_ifdetach(ifp); if_free(ifp); @@ -865,6 +868,8 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) } BRIDGE_LOCK(sc); LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { + if (bif->bif_ifp->if_type == IFT_GIF) + continue; if (bif->bif_ifp->if_mtu != ifr->ifr_mtu) { log(LOG_NOTICE, "%s: invalid MTU: %u(%s)" " != %d\n", sc->sc_ifp->if_xname, @@ -940,8 +945,12 @@ bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set) error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr); if (error) if_printf(sc->sc_ifp, - "error setting interface capabilities on %s\n", - ifp->if_xname); + "error setting capabilities on %s: %d\n", + ifp->if_xname, error); + if ((ifp->if_capenable & ~set) != 0) + if_printf(sc->sc_ifp, + "can't disable some capabilities on %s: 0x%x\n", + ifp->if_xname, ifp->if_capenable & ~set); } } @@ -1162,12 +1171,14 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) } #endif /* Allow the first Ethernet member to define the MTU */ - if (LIST_EMPTY(&sc->sc_iflist)) - sc->sc_ifp->if_mtu = ifs->if_mtu; - else if (sc->sc_ifp->if_mtu != ifs->if_mtu) { - if_printf(sc->sc_ifp, "invalid MTU: %u(%s) != %u\n", - ifs->if_mtu, ifs->if_xname, sc->sc_ifp->if_mtu); - return (EINVAL); + if (ifs->if_type != IFT_GIF) { + if (LIST_EMPTY(&sc->sc_iflist)) + sc->sc_ifp->if_mtu = ifs->if_mtu; + else if (sc->sc_ifp->if_mtu != ifs->if_mtu) { + if_printf(sc->sc_ifp, "invalid MTU: %u(%s) != %u\n", + ifs->if_mtu, ifs->if_xname, sc->sc_ifp->if_mtu); + return (EINVAL); + } } bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO); @@ -2094,43 +2105,33 @@ sendunicast: return (0); } -/* - * bridge_transmit: - * - * Do output on a bridge. - * - */ -static int -bridge_transmit(struct ifnet *ifp, struct mbuf *m) + +static void +bridge_start(struct ifnet *ifp) { struct bridge_softc *sc; struct ether_header *eh; struct ifnet *dst_if; - int error = 0; + struct mbuf *m; sc = ifp->if_softc; + for (;;) { + IFQ_DEQUEUE(&ifp->if_snd, m); + if (m == NULL) + break; - ETHER_BPF_MTAP(ifp, m); - - eh = mtod(m, struct ether_header *); - - BRIDGE_LOCK(sc); - if (((m->m_flags & (M_BCAST|M_MCAST)) == 0) && - (dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1)) != NULL) { - BRIDGE_UNLOCK(sc); - error = bridge_enqueue(sc, dst_if, m); - } else - bridge_broadcast(sc, ifp, m, 0); + ETHER_BPF_MTAP(ifp, m); - return (error); -} + eh = mtod(m, struct ether_header *); -/* - * The ifp->if_qflush entry point for if_bridge(4) is no-op. - */ -static void -bridge_qflush(struct ifnet *ifp __unused) -{ + BRIDGE_LOCK(sc); + if (((m->m_flags & (M_BCAST|M_MCAST)) == 0) && + (dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1)) != NULL) { + BRIDGE_UNLOCK(sc); + (void)bridge_enqueue(sc, dst_if, m); + } else + bridge_broadcast(sc, ifp, m, 0); + } } /* diff --git a/sys/net/if_enc.c b/sys/net/if_enc.c index 1f3ef8f..82a58a8 100644 --- a/sys/net/if_enc.c +++ b/sys/net/if_enc.c @@ -99,9 +99,15 @@ static void enc_remove_hhooks(struct enc_softc *); static const char encname[] = "enc"; +#define IPSEC_ENC_AFTER_PFIL 0x04 /* * Before and after are relative to when we are stripping the * outer IP header. + * + * AFTER_PFIL flag used only for bpf_mask_*. It enables BPF capturing + * after PFIL hook execution. It might be useful when PFIL hook does + * some changes to the packet, e.g. address translation. If PFIL hook + * consumes mbuf, nothing will be captured. */ static VNET_DEFINE(int, filter_mask_in) = IPSEC_ENC_BEFORE; static VNET_DEFINE(int, bpf_mask_in) = IPSEC_ENC_BEFORE; @@ -194,6 +200,30 @@ enc_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) return (0); } +static void +enc_bpftap(struct ifnet *ifp, struct mbuf *m, const struct secasvar *sav, + int32_t hhook_type, uint8_t enc, uint8_t af) +{ + struct enchdr hdr; + + if (hhook_type == HHOOK_TYPE_IPSEC_IN && + (enc & V_bpf_mask_in) == 0) + return; + else if (hhook_type == HHOOK_TYPE_IPSEC_OUT && + (enc & V_bpf_mask_out) == 0) + return; + if (bpf_peers_present(ifp->if_bpf) == 0) + return; + hdr.af = af; + hdr.spi = sav->spi; + hdr.flags = 0; + if (sav->alg_enc != SADB_EALG_NONE) + hdr.flags |= M_CONF; + if (sav->alg_auth != SADB_AALG_NONE) + hdr.flags |= M_AUTH; + bpf_mtap2(ifp->if_bpf, &hdr, sizeof(hdr), m); +} + /* * One helper hook function is used by any hook points. * + from hhook_type we can determine the packet direction: @@ -206,7 +236,6 @@ static int enc_hhook(int32_t hhook_type, int32_t hhook_id, void *udata, void *ctx_data, void *hdata, struct osd *hosd) { - struct enchdr hdr; struct ipsec_ctx_data *ctx; struct enc_softc *sc; struct ifnet *ifp, *rcvif; @@ -223,21 +252,7 @@ enc_hhook(int32_t hhook_type, int32_t hhook_id, void *udata, void *ctx_data, if (ctx->af != hhook_id) return (EPFNOSUPPORT); - if (((hhook_type == HHOOK_TYPE_IPSEC_IN && - (ctx->enc & V_bpf_mask_in) != 0) || - (hhook_type == HHOOK_TYPE_IPSEC_OUT && - (ctx->enc & V_bpf_mask_out) != 0)) && - bpf_peers_present(ifp->if_bpf) != 0) { - hdr.af = ctx->af; - hdr.spi = ctx->sav->spi; - hdr.flags = 0; - if (ctx->sav->alg_enc != SADB_EALG_NONE) - hdr.flags |= M_CONF; - if (ctx->sav->alg_auth != SADB_AALG_NONE) - hdr.flags |= M_AUTH; - bpf_mtap2(ifp->if_bpf, &hdr, sizeof(hdr), *ctx->mp); - } - + enc_bpftap(ifp, *ctx->mp, ctx->sav, hhook_type, ctx->enc, ctx->af); switch (hhook_type) { case HHOOK_TYPE_IPSEC_IN: if (ctx->enc == IPSEC_ENC_BEFORE) { @@ -284,12 +299,14 @@ enc_hhook(int32_t hhook_type, int32_t hhook_id, void *udata, void *ctx_data, /* Make a packet looks like it was received on enc(4) */ rcvif = (*ctx->mp)->m_pkthdr.rcvif; (*ctx->mp)->m_pkthdr.rcvif = ifp; - if (pfil_run_hooks(ph, ctx->mp, ifp, pdir, NULL) != 0 || + if (pfil_run_hooks(ph, ctx->mp, ifp, pdir, ctx->inp) != 0 || *ctx->mp == NULL) { *ctx->mp = NULL; /* consumed by filter */ return (EACCES); } (*ctx->mp)->m_pkthdr.rcvif = rcvif; + enc_bpftap(ifp, *ctx->mp, ctx->sav, hhook_type, + IPSEC_ENC_AFTER_PFIL, ctx->af); return (0); } diff --git a/sys/net/if_enc.h b/sys/net/if_enc.h index 941ed12..616c621 100644 --- a/sys/net/if_enc.h +++ b/sys/net/if_enc.h @@ -33,6 +33,7 @@ struct ipsec_ctx_data { struct mbuf **mp; struct secasvar *sav; + struct inpcb *inp; uint8_t af; #define IPSEC_ENC_BEFORE 0x01 #define IPSEC_ENC_AFTER 0x02 diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 71f2ee8..bb79eec 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -87,6 +87,8 @@ CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN); VNET_DEFINE(struct pfil_head, link_pfil_hook); /* Packet filter hooks */ +SYSCTL_DECL(_net_link); + /* netgraph node hooks for ng_ether(4) */ void (*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp); void (*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m); @@ -702,6 +704,9 @@ vnet_ether_init(__unused void *arg) if ((i = pfil_head_register(&V_link_pfil_hook)) != 0) printf("%s: WARNING: unable to register pfil link hook, " "error %d\n", __func__, i); + else + pfil_head_export_sysctl(&V_link_pfil_hook, + SYSCTL_STATIC_CHILDREN(_net_link)); #ifdef VIMAGE netisr_register_vnet(ðer_nh); #endif @@ -770,6 +775,7 @@ ether_demux(struct ifnet *ifp, struct mbuf *m) { struct ether_header *eh; int i, isr; + uint32_t ours; u_short ether_type; KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__)); @@ -780,7 +786,9 @@ ether_demux(struct ifnet *ifp, struct mbuf *m) if (i != 0 || m == NULL) return; - } + ours = m->m_flags & (M_FASTFWD_OURS | M_IP_NEXTHOP); + } else + ours = 0; eh = mtod(m, struct ether_header *); ether_type = ntohs(eh->ether_type); @@ -819,6 +827,8 @@ ether_demux(struct ifnet *ifp, struct mbuf *m) */ m->m_flags &= ~M_VLANTAG; m_clrprotoflags(m); + if (ours) + m->m_flags |= ours; m_adj(m, ETHER_HDR_LEN); /* @@ -975,7 +985,6 @@ ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused) } #endif -SYSCTL_DECL(_net_link); SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet"); #if 0 diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c index 2883de0..dda8d26 100644 --- a/sys/net/if_lagg.c +++ b/sys/net/if_lagg.c @@ -819,10 +819,14 @@ lagg_port_destroy(struct lagg_port *lp, int rundelport) /* * Update lladdr for each port (new primary needs update - * as well, to switch from old lladdr to its 'real' one) + * as well, to switch from old lladdr to its 'real' one). + * We can skip this if the lagg is being destroyed. */ - SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries) - if_setlladdr(lp_ptr->lp_ifp, lladdr, ETHER_ADDR_LEN); + if (sc->sc_destroying == 0) { + SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries) + if_setlladdr(lp_ptr->lp_ifp, lladdr, + ETHER_ADDR_LEN); + } } else LAGG_WUNLOCK(sc); diff --git a/sys/net/if_pflog.h b/sys/net/if_pflog.h index 0faeb7d..06253e1 100644 --- a/sys/net/if_pflog.h +++ b/sys/net/if_pflog.h @@ -40,12 +40,18 @@ struct pfloghdr { char ruleset[PFLOG_RULESET_NAME_SIZE]; u_int32_t rulenr; u_int32_t subrulenr; +#ifdef PF_USER_INFO uid_t uid; pid_t pid; uid_t rule_uid; pid_t rule_pid; u_int8_t dir; u_int8_t pad[3]; +#else + u_int32_t ridentifier; + u_int8_t dir; + u_int8_t pad[sizeof(long) - 1]; +#endif }; #define PFLOG_HDRLEN sizeof(struct pfloghdr) diff --git a/sys/net/if_pfsync.h b/sys/net/if_pfsync.h index 5c4ba63..74be9b7 100644 --- a/sys/net/if_pfsync.h +++ b/sys/net/if_pfsync.h @@ -235,6 +235,9 @@ struct pfsyncreq { char pfsyncr_syncdev[IFNAMSIZ]; struct in_addr pfsyncr_syncpeer; int pfsyncr_maxupdates; +#define PFSYNCF_OK 0x00000001 +#define PFSYNCF_DEFER 0x00000002 +#define PFSYNCF_PUSH 0x00000004 int pfsyncr_defer; }; diff --git a/sys/net/if_stf.c b/sys/net/if_stf.c index 5209bc8..4f6dfd9 100644 --- a/sys/net/if_stf.c +++ b/sys/net/if_stf.c @@ -3,6 +3,9 @@ /*- * Copyright (C) 2000 WIDE Project. + * Copyright (c) 2010 Hiroki Sato <hrs@FreeBSD.org> + * Copyright (c) 2013 Ermal Luçi <eri@FreeBSD.org> + * Copyright (c) 2017 Rubicon Communications, LLC (Netgate) * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,7 +34,7 @@ */ /* - * 6to4 interface, based on RFC3056. + * 6to4 interface, based on RFC3056 + 6rd (RFC5569) support. * * 6to4 interface is NOT capable of link-layer (I mean, IPv4) multicasting. * There is no address mapping defined from IPv6 multicast address to IPv4 @@ -60,7 +63,7 @@ * ICMPv6: * - Redirects cannot be used due to the lack of link-local address. * - * stf interface does not have, and will not need, a link-local address. + * stf interface does not have, and will not need, a link-local address. * It seems to have no real benefit and does not help the above symptoms much. * Even if we assign link-locals to interface, we cannot really * use link-local unicast/multicast on top of 6to4 cloud (since there's no @@ -72,6 +75,12 @@ * http://playground.iijlab.net/i-d/draft-itojun-ipv6-transition-abuse-00.txt * for details. The code tries to filter out some of malicious packets. * Note that there is no way to be 100% secure. + * + * 6rd (RFC5569 & RFC5969) extension is enabled when an IPv6 GUA other than + * 2002::/16 is assigned. The stf(4) recognizes a 32-bit just after + * prefixlen as the IPv4 address of the 6rd customer site. The + * prefixlen must be shorter than 32. + * */ #include <sys/param.h> @@ -83,6 +92,7 @@ #include <sys/kernel.h> #include <sys/lock.h> #include <sys/module.h> +#include <sys/priv.h> #include <sys/protosw.h> #include <sys/proc.h> #include <sys/queue.h> @@ -106,8 +116,10 @@ #include <netinet/ip.h> #include <netinet/ip_var.h> #include <netinet/in_var.h> +#include <net/if_stf.h> #include <netinet/ip6.h> +#include <netinet6/in6_fib.h> #include <netinet6/ip6_var.h> #include <netinet6/in6_var.h> #include <netinet/ip_ecn.h> @@ -120,15 +132,41 @@ #include <security/mac/mac_framework.h> +#define STF_DEBUG 1 +#if STF_DEBUG > 3 +#define ip_sprintf(buf, a) \ + sprintf(buf, "%u.%u.%u.%u", \ + (ntohl((a)->s_addr)>>24)&0xFF, \ + (ntohl((a)->s_addr)>>16)&0xFF, \ + (ntohl((a)->s_addr)>>8)&0xFF, \ + (ntohl((a)->s_addr))&0xFF); +#endif + +#if STF_DEBUG +#define DEBUG_PRINTF(a, ...) \ + do { \ + if (V_stf_debug >= a) \ + printf(__VA_ARGS__); \ + } while (0) +#else +#define DEBUG_PRINTF(a, ...) +#endif + SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_STF, stf, CTLFLAG_RW, 0, "6to4 Interface"); +#if STF_DEBUG +static VNET_DEFINE(int, stf_debug) = 0; +#define V_stf_debug VNET(stf_debug) +SYSCTL_INT(_net_link_stf, OID_AUTO, stf_debug, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(stf_debug), 0, + "Enable displaying debug messages of stf interfaces"); +#endif + static int stf_permit_rfc1918 = 0; SYSCTL_INT(_net_link_stf, OID_AUTO, permit_rfc1918, CTLFLAG_RWTUN, &stf_permit_rfc1918, 0, "Permit the use of private IPv4 addresses"); -#define STFUNIT 0 - #define IN6_IS_ADDR_6TO4(x) (ntohs((x)->s6_addr16[0]) == 0x2002) /* @@ -139,9 +177,13 @@ SYSCTL_INT(_net_link_stf, OID_AUTO, permit_rfc1918, CTLFLAG_RWTUN, struct stf_softc { struct ifnet *sc_ifp; - struct mtx sc_ro_mtx; + in_addr_t inaddr; + in_addr_t dstv4_addr; + in_addr_t srcv4_addr; + u_int v4prefixlen; u_int sc_fibnum; const struct encaptab *encap_cookie; + LIST_ENTRY(stf_softc) stf_list; }; #define STF2IFP(sc) ((sc)->sc_ifp) @@ -151,7 +193,11 @@ static const char stfname[] = "stf"; * Note that mutable fields in the softc are not currently locked. * We do lock sc_ro in stf_output though. */ +static struct mtx stf_mtx; static MALLOC_DEFINE(M_STF, stfname, "6to4 Tunnel Interface"); +static VNET_DEFINE(LIST_HEAD(, stf_softc), stf_softc_list); +#define V_stf_softc_list VNET(stf_softc_list) + static const int ip_stf_ttl = 40; extern struct domain inetdomain; @@ -167,8 +213,6 @@ static struct protosw in_stf_protosw = { .pr_usrreqs = &rip_usrreqs }; -static char *stfnames[] = {"stf0", "stf", "6to4", NULL}; - static int stfmodevent(module_t, int, void *); static int stf_encapcheck(const struct mbuf *, int, int, void *); static int stf_getsrcifa6(struct ifnet *, struct in6_addr *, struct in6_addr *); @@ -179,68 +223,39 @@ static int stf_checkaddr4(struct stf_softc *, struct in_addr *, struct ifnet *); static int stf_checkaddr6(struct stf_softc *, struct in6_addr *, struct ifnet *); +static struct sockaddr_in *stf_getin4addr_in6(struct stf_softc *, + struct sockaddr_in *, struct in6_addr, struct in6_addr, + struct in6_addr); +static struct sockaddr_in *stf_getin4addr(struct stf_softc *, + struct sockaddr_in *, struct in6_addr, struct in6_addr); static int stf_ioctl(struct ifnet *, u_long, caddr_t); -static int stf_clone_match(struct if_clone *, const char *); -static int stf_clone_create(struct if_clone *, char *, size_t, caddr_t); -static int stf_clone_destroy(struct if_clone *, struct ifnet *); +static int stf_clone_create(struct if_clone *, int, caddr_t); +static void stf_clone_destroy(struct ifnet *); static struct if_clone *stf_cloner; static int -stf_clone_match(struct if_clone *ifc, const char *name) +stf_clone_create(struct if_clone *ifc, int unit, caddr_t params) { - int i; - - for(i = 0; stfnames[i] != NULL; i++) { - if (strcmp(stfnames[i], name) == 0) - return (1); - } - - return (0); -} - -static int -stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) -{ - int err, unit; struct stf_softc *sc; struct ifnet *ifp; - /* - * We can only have one unit, but since unit allocation is - * already locked, we use it to keep from allocating extra - * interfaces. - */ - unit = STFUNIT; - err = ifc_alloc_unit(ifc, &unit); - if (err != 0) - return (err); - sc = malloc(sizeof(struct stf_softc), M_STF, M_WAITOK | M_ZERO); ifp = STF2IFP(sc) = if_alloc(IFT_STF); if (ifp == NULL) { free(sc, M_STF); - ifc_free_unit(ifc, unit); return (ENOSPC); } ifp->if_softc = sc; sc->sc_fibnum = curthread->td_proc->p_fibnum; + if_initname(ifp, stfname, unit); - /* - * Set the name manually rather then using if_initname because - * we don't conform to the default naming convention for interfaces. - */ - strlcpy(ifp->if_xname, name, IFNAMSIZ); - ifp->if_dname = stfname; - ifp->if_dunit = IF_DUNIT_NONE; - - mtx_init(&(sc)->sc_ro_mtx, "stf ro", NULL, MTX_DEF); sc->encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV6, stf_encapcheck, &in_stf_protosw, sc); if (sc->encap_cookie == NULL) { if_printf(ifp, "attach failed\n"); + if_free(ifp); free(sc, M_STF); - ifc_free_unit(ifc, unit); return (ENOMEM); } @@ -250,39 +265,56 @@ stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) ifp->if_snd.ifq_maxlen = ifqmaxlen; if_attach(ifp); bpfattach(ifp, DLT_NULL, sizeof(u_int32_t)); + + mtx_lock(&stf_mtx); + LIST_INSERT_HEAD(&V_stf_softc_list, sc, stf_list); + mtx_unlock(&stf_mtx); + return (0); } -static int -stf_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) +static void +stf_clone_destroy(struct ifnet *ifp) { struct stf_softc *sc = ifp->if_softc; int err; + mtx_lock(&stf_mtx); + LIST_REMOVE(sc, stf_list); + mtx_unlock(&stf_mtx); + err = encap_detach(sc->encap_cookie); KASSERT(err == 0, ("Unexpected error detaching encap_cookie")); - mtx_destroy(&(sc)->sc_ro_mtx); bpfdetach(ifp); if_detach(ifp); if_free(ifp); free(sc, M_STF); - ifc_free_unit(ifc, STFUNIT); +} - return (0); +static void +vnet_stf_init(const void *unused __unused) +{ + + LIST_INIT(&V_stf_softc_list); } +VNET_SYSINIT(vnet_stf_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, vnet_stf_init, + NULL); + static int stfmodevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: - stf_cloner = if_clone_advanced(stfname, 0, stf_clone_match, - stf_clone_create, stf_clone_destroy); + mtx_init(&stf_mtx, "stf_mtx", NULL, MTX_DEF); + stf_cloner = if_clone_simple(stfname, + stf_clone_create, stf_clone_destroy, 0); break; case MOD_UNLOAD: if_clone_detach(stf_cloner); + mtx_destroy(&stf_mtx); break; default: return (EOPNOTSUPP); @@ -298,60 +330,125 @@ static moduledata_t stf_mod = { }; DECLARE_MODULE(if_stf, stf_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); +MODULE_VERSION(if_stf, 1); static int stf_encapcheck(const struct mbuf *m, int off, int proto, void *arg) { struct ip ip; struct stf_softc *sc; - struct in_addr a, b, mask; struct in6_addr addr6, mask6; + struct sockaddr_in sin4addr, sin4mask; + DEBUG_PRINTF(1, "%s: enter\n", __func__); sc = (struct stf_softc *)arg; if (sc == NULL) - return 0; + return (0); if ((STF2IFP(sc)->if_flags & IFF_UP) == 0) - return 0; + return (0); /* IFF_LINK0 means "no decapsulation" */ if ((STF2IFP(sc)->if_flags & IFF_LINK0) != 0) - return 0; + return (0); if (proto != IPPROTO_IPV6) - return 0; + return (0); /* LINTED const cast */ m_copydata((struct mbuf *)(uintptr_t)m, 0, sizeof(ip), (caddr_t)&ip); if (ip.ip_v != 4) - return 0; + return (0); if (stf_getsrcifa6(STF2IFP(sc), &addr6, &mask6) != 0) return (0); + if (sc->srcv4_addr != INADDR_ANY) { + sin4addr.sin_addr.s_addr = sc->srcv4_addr; + sin4addr.sin_family = AF_INET; + } else + if (stf_getin4addr(sc, &sin4addr, addr6, mask6) == NULL) + return (0); + +#if STF_DEBUG > 3 + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip6_sprintf(buf, &addr6); + DEBUG_PRINTF(1, "%s: addr6 = %s\n", __func__, buf); + ip6_sprintf(buf, &mask6); + DEBUG_PRINTF(1, "%s: mask6 = %s\n", __func__, buf); + + ip_sprintf(buf, &sin4addr.sin_addr); + DEBUG_PRINTF(1, "%s: sin4addr.sin_addr = %s\n", __func__, buf); + ip_sprintf(buf, &ip.ip_src); + DEBUG_PRINTF(1, "%s: ip.ip_src = %s\n", __func__, buf); + ip_sprintf(buf, &ip.ip_dst); + DEBUG_PRINTF(1, "%s: ip.ip_dst = %s\n", __func__, buf); + } +#endif + /* * check if IPv4 dst matches the IPv4 address derived from the * local 6to4 address. * success on: dst = 10.1.1.1, ia6->ia_addr = 2002:0a01:0101:... */ - if (bcmp(GET_V4(&addr6), &ip.ip_dst, sizeof(ip.ip_dst)) != 0) - return 0; + if (sin4addr.sin_addr.s_addr != ip.ip_dst.s_addr) { + DEBUG_PRINTF(1, + "%s: IPv4 dst address do not match the encoded address. " + "Ignore this packet.\n", __func__); + return (0); + } - /* - * check if IPv4 src matches the IPv4 address derived from the - * local 6to4 address masked by prefixmask. - * success on: src = 10.1.1.1, ia6->ia_addr = 2002:0a00:.../24 - * fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24 - */ - bzero(&a, sizeof(a)); - bcopy(GET_V4(&addr6), &a, sizeof(a)); - bcopy(GET_V4(&mask6), &mask, sizeof(mask)); - a.s_addr &= mask.s_addr; - b = ip.ip_src; - b.s_addr &= mask.s_addr; - if (a.s_addr != b.s_addr) - return 0; + if (IN6_IS_ADDR_6TO4(&addr6)) { + /* + * 6to4 (RFC 3056). + * Check if IPv4 src matches the IPv4 address derived + * from the local 6to4 address masked by prefixmask. + * success on: src = 10.1.1.1, ia6->ia_addr = 2002:0a00:.../24 + * fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24 + */ + + memcpy(&sin4mask.sin_addr, GET_V4(&mask6), sizeof(sin4mask)); +#if STF_DEBUG > 3 + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip_sprintf(buf, &sin4addr.sin_addr); + DEBUG_PRINTF(1, "%s: sin4addr = %s\n", + __func__, buf); + ip_sprintf(buf, &ip.ip_src); + DEBUG_PRINTF(1, "%s: ip.ip_src = %s\n", + __func__, buf); + ip_sprintf(buf, &sin4mask.sin_addr); + DEBUG_PRINTF(1, "%s: sin4mask = %s\n", + __func__, buf); + } +#endif + + if ((sin4addr.sin_addr.s_addr & sin4mask.sin_addr.s_addr) != + (ip.ip_src.s_addr & sin4mask.sin_addr.s_addr)) { + DEBUG_PRINTF(1, + "%s: v4 address do not match expected address. " + "Ignore this packet.\n", __func__); + return (0); + } + } else { + /* 6rd (RFC 5569) */ + /* + * No restriction on the src address in the case of + * 6rd because the stf(4) interface always has a + * prefix which covers whole of IPv4 src address + * range. So, stf_output() will catch all of + * 6rd-capsuled IPv4 traffic with suspicious inner dst + * IPv4 address (i.e. the IPv6 destination address is + * one the admin does not like to route to outside), + * and then it discard them silently. + */ + } /* stf interface makes single side match only */ return 32; @@ -362,29 +459,50 @@ stf_getsrcifa6(struct ifnet *ifp, struct in6_addr *addr, struct in6_addr *mask) { struct ifaddr *ia; struct in_ifaddr *ia4; + struct in6_addr addr6, mask6; struct in6_ifaddr *ia6; - struct sockaddr_in6 *sin6; + struct sockaddr_in sin4; + struct stf_softc *sc; struct in_addr in; + sc = ifp->if_softc; if_addr_rlock(ifp); TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) { if (ia->ifa_addr->sa_family != AF_INET6) continue; - sin6 = (struct sockaddr_in6 *)ia->ifa_addr; - if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) - continue; + ia6 = (struct in6_ifaddr *)ia; + *&addr6 = ((struct sockaddr_in6 *)ia->ifa_addr)->sin6_addr; + *&mask6 = ia6->ia_prefixmask.sin6_addr; + if (sc->srcv4_addr != INADDR_ANY) + bcopy(&sc->srcv4_addr, &in, sizeof(in)); + else { + if (stf_getin4addr(sc, &sin4, addr6, mask6) == NULL) + continue; + bcopy(&sin4.sin_addr, &in, sizeof(in)); + } - bcopy(GET_V4(&sin6->sin6_addr), &in, sizeof(in)); LIST_FOREACH(ia4, INADDR_HASH(in.s_addr), ia_hash) if (ia4->ia_addr.sin_addr.s_addr == in.s_addr) break; if (ia4 == NULL) continue; - ia6 = (struct in6_ifaddr *)ia; +#if STF_DEBUG > 3 + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip6_sprintf(buf, &((struct sockaddr_in6 *)ia->ifa_addr)->sin6_addr); + DEBUG_PRINTF(1, "%s: ia->ifa_addr->sin6_addr = %s\n", + __func__, buf); + ip_sprintf(buf, &ia4->ia_addr.sin_addr); + DEBUG_PRINTF(1, "%s: ia4->ia_addr.sin_addr = %s\n", + __func__, buf); + } +#endif - *addr = sin6->sin6_addr; - *mask = ia6->ia_prefixmask.sin6_addr; + *addr = addr6; + *mask = mask6; if_addr_runlock(ifp); return (0); } @@ -399,8 +517,7 @@ stf_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, { struct stf_softc *sc; const struct sockaddr_in6 *dst6; - struct in_addr in4; - const void *ptr; + struct sockaddr_in dst4, src4; u_int8_t tos; struct ip *ip; struct ip6_hdr *ip6; @@ -450,18 +567,27 @@ stf_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, * Pickup the right outer dst addr from the list of candidates. * ip6_dst has priority as it may be able to give us shorter IPv4 hops. */ - ptr = NULL; - if (IN6_IS_ADDR_6TO4(&ip6->ip6_dst)) - ptr = GET_V4(&ip6->ip6_dst); - else if (IN6_IS_ADDR_6TO4(&dst6->sin6_addr)) - ptr = GET_V4(&dst6->sin6_addr); - else { - m_freem(m); - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); - return ENETUNREACH; + DEBUG_PRINTF(1, "%s: dst addr selection\n", __func__); + if (stf_getin4addr_in6(sc, &dst4, addr6, mask6, + ip6->ip6_dst) == NULL) { + if (sc->dstv4_addr != INADDR_ANY) + dst4.sin_addr.s_addr = sc->dstv4_addr; + else if (stf_getin4addr_in6(sc, &dst4, addr6, mask6, + dst6->sin6_addr) == NULL) { + m_freem(m); + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + return (ENETUNREACH); + } } - bcopy(ptr, &in4, sizeof(in4)); +#if STF_DEBUG > 3 + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + ip_sprintf(buf, &dst4.sin_addr); + DEBUG_PRINTF(1, "%s: ip_dst = %s\n", __func__, buf); + } +#endif if (bpf_peers_present(ifp->if_bpf)) { /* * We need to prepend the address family as @@ -483,8 +609,24 @@ stf_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, bzero(ip, sizeof(*ip)); - bcopy(GET_V4(&addr6), &ip->ip_src, sizeof(ip->ip_src)); - bcopy(&in4, &ip->ip_dst, sizeof(ip->ip_dst)); + if (sc->srcv4_addr != INADDR_ANY) + src4.sin_addr.s_addr = sc->srcv4_addr; + else if (stf_getin4addr(sc, &src4, addr6, mask6) == NULL) { + m_freem(m); + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + return (ENETUNREACH); + } + bcopy(&src4.sin_addr, &ip->ip_src, sizeof(ip->ip_src)); +#if STF_DEBUG > 3 + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip_sprintf(buf, &ip->ip_src); + DEBUG_PRINTF(1, "%s: ip_src = %s\n", __func__, buf); + } +#endif + bcopy(&dst4.sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)); ip->ip_p = IPPROTO_IPV6; ip->ip_ttl = ip_stf_ttl; ip->ip_len = htons(m->m_pkthdr.len); @@ -534,13 +676,6 @@ stf_checkaddr4(struct stf_softc *sc, struct in_addr *in, struct ifnet *inifp) } /* - * reject packets with private address range. - * (requirement from RFC3056 section 2 1st paragraph) - */ - if (isrfc1918addr(in)) - return -1; - - /* * reject packets with broadcast */ IN_IFADDR_RLOCK(&in_ifa_tracker); @@ -603,6 +738,7 @@ in_stf_input(struct mbuf **mp, int *offp, int proto) struct mbuf *m; u_int8_t otos, itos; struct ifnet *ifp; + struct nhop6_basic nh6; int off; m = *mp; @@ -628,6 +764,17 @@ in_stf_input(struct mbuf **mp, int *offp, int proto) mac_ifnet_create_mbuf(ifp, m); #endif +#if STF_DEBUG > 3 + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip_sprintf(buf, &ip->ip_dst); + DEBUG_PRINTF(1, "%s: ip->ip_dst = %s\n", __func__, buf); + ip_sprintf(buf, &ip->ip_src); + DEBUG_PRINTF(1, "%s: ip->ip_src = %s\n", __func__, buf); + } +#endif /* * perform sanity check against outer src/dst. * for source, perform ingress filter as well. @@ -648,6 +795,17 @@ in_stf_input(struct mbuf **mp, int *offp, int proto) } ip6 = mtod(m, struct ip6_hdr *); +#if STF_DEBUG > 3 + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip6_sprintf(buf, &ip6->ip6_dst); + DEBUG_PRINTF(1, "%s: ip6->ip6_dst = %s\n", __func__, buf); + ip6_sprintf(buf, &ip6->ip6_src); + DEBUG_PRINTF(1, "%s: ip6->ip6_src = %s\n", __func__, buf); + } +#endif /* * perform sanity check against inner src/dst. * for source, perform ingress filter as well. @@ -658,6 +816,35 @@ in_stf_input(struct mbuf **mp, int *offp, int proto) return (IPPROTO_DONE); } + /* + * reject packets with private address range. + * (requirement from RFC3056 section 2 1st paragraph) + */ + if ((IN6_IS_ADDR_6TO4(&ip6->ip6_src) && isrfc1918addr(&ip->ip_src)) || + (IN6_IS_ADDR_6TO4(&ip6->ip6_dst) && isrfc1918addr(&ip->ip_dst))) { + m_freem(m); + return (IPPROTO_DONE); + } + + /* + * Ignore if the destination is the same stf interface because + * all of valid IPv6 outgoing traffic should go interfaces + * except for it. + */ + if (fib6_lookup_nh_basic(sc->sc_fibnum, + &ip6->ip6_dst, 0, 0, 0, &nh6) != 0) { + DEBUG_PRINTF(1, "%s: no IPv6 dst. Ignored.\n", __func__); + m_free(m); + return (IPPROTO_DONE); + } + if ((nh6.nh_ifp == ifp) && + (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &nh6.nh_addr))) { + DEBUG_PRINTF(1, "%s: IPv6 dst is the same stf. Ignored.\n", + __func__); + m_free(m); + return (IPPROTO_DONE); + } + itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; if ((ifp->if_flags & IFF_LINK1) != 0) ip_ecn_egress(ECN_ALLOWED, &otos, &itos); @@ -667,7 +854,7 @@ in_stf_input(struct mbuf **mp, int *offp, int proto) ip6->ip6_flow |= htonl((u_int32_t)itos << 20); m->m_pkthdr.rcvif = ifp; - + if (bpf_peers_present(ifp->if_bpf)) { /* * We need to prepend the address family as @@ -680,6 +867,7 @@ in_stf_input(struct mbuf **mp, int *offp, int proto) bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m); } + DEBUG_PRINTF(1, "%s: netisr_dispatch(NETISR_IPV6)\n", __func__); /* * Put the packet to the network layer input queue according to the * specified address family. @@ -693,35 +881,262 @@ in_stf_input(struct mbuf **mp, int *offp, int proto) return (IPPROTO_DONE); } +static struct sockaddr_in * +stf_getin4addr_in6(struct stf_softc *sc, struct sockaddr_in *sin, + struct in6_addr addr6, struct in6_addr mask6, struct in6_addr in6) +{ + int i; + +#if STF_DEBUG > 3 + { + char buf[INET6_ADDRSTRLEN + 1]; + memset(&buf, 0, sizeof(buf)); + + ip6_sprintf(buf, &in6); + DEBUG_PRINTF(1, "%s: in6 = %s\n", __func__, buf); + ip6_sprintf(buf, &addr6); + DEBUG_PRINTF(1, "%s: addr6 = %s\n", __func__, buf); + ip6_sprintf(buf, &mask6); + DEBUG_PRINTF(1, "%s: mask6 = %s\n", __func__, buf); + } +#endif + + /* + * When (src addr & src mask) != (in6 & src mask), + * the dst is not in the 6rd domain. The IPv4 address must + * not be used. + */ + for (i = 0; i < sizeof(addr6); i++) { + if ((((u_char *)&addr6)[i] & ((u_char *)&mask6)[i]) != + (((u_char *)&in6)[i] & ((u_char *)&mask6)[i])) + return (NULL); + } + + /* After the mask check, use in6 instead of addr6. */ + return (stf_getin4addr(sc, sin, in6, mask6)); +} + +static struct sockaddr_in * +stf_getin4addr(struct stf_softc *sc, struct sockaddr_in *sin, + struct in6_addr addr6, struct in6_addr mask6) +{ + struct in_addr *in; + + DEBUG_PRINTF(1, "%s: enter.\n", __func__); +#if STF_DEBUG > 3 + { + char tmpbuf[INET6_ADDRSTRLEN + 1]; + memset(&tmpbuf, 0, INET6_ADDRSTRLEN); + + ip6_sprintf(tmpbuf, &addr6); + DEBUG_PRINTF(1, "%s: addr6 = %s\n", __func__, tmpbuf); + } +#endif + memset(sin, 0, sizeof(*sin)); + in = &sin->sin_addr; + if (IN6_IS_ADDR_6TO4(&addr6)) { + /* 6to4 (RFC 3056) */ + bcopy(GET_V4(&addr6), in, sizeof(*in)); + if (isrfc1918addr(in)) + return (NULL); + } else { + /* 6rd (RFC 5569) */ + struct in6_addr buf; + u_char *p = (u_char *)&buf; + u_char *q = (u_char *)&in->s_addr; + u_int residue = 0, v4residue = 0; + u_char mask, v4mask = 0; + int i, j; + u_int plen, loop; + + /* + * 6rd-relays IPv6 prefix is located at a 32-bit just + * after the prefix edge. + */ + plen = in6_mask2len(&mask6, NULL); + if (64 < plen) { + DEBUG_PRINTF(1, "prefixlen is %d\n", plen); + return (NULL); + } + + loop = 4; /* Normal 6rd operation */ + memcpy(&buf, &addr6, sizeof(buf)); + if (sc->v4prefixlen != 0 && sc->v4prefixlen != 32) + v4residue = sc->v4prefixlen % 8; + p += plen / 8; + //plen -= 32; + + residue = plen % 8; + mask = ((u_char)(-1) >> (8 - residue)); + if (v4residue) { + loop++; + v4mask = ((u_char)(-1) << v4residue); + } + /* + * The p points head of the IPv4 address part in + * bytes. The residue is a bit-shift factor when + * prefixlen is not a multiple of 8. + */ + DEBUG_PRINTF(2, "residue = %d 0x%x\n", residue, mask); + for (j = (loop - (sc->v4prefixlen / 8)), + i = (loop - (sc->v4prefixlen / 8)); + i < loop; j++, i++) { + if (residue) { + q[i] = ((p[j] & mask) << (8 - residue)); + q[i] |= ((p[j + 1] >> residue) & mask); + DEBUG_PRINTF(2, + "FINAL i = %d q[%d] - p[%d/%d] %x\n", + i, q[i], p[j], p[j + 1] >> residue, q[i]); + } else { + q[i] = p[j]; + DEBUG_PRINTF(2, + "FINAL i = %d q[%d] - p[%d] %x\n", + i, q[i], p[j], q[i]); + } + } + if (v4residue) + q[loop - (sc->v4prefixlen / 8)] &= v4mask; + + if (sc->v4prefixlen > 0 && sc->v4prefixlen < 32) + in->s_addr |= sc->inaddr & + ((uint32_t)(-1) >> (32 - sc->v4prefixlen)); + } + +#if STF_DEBUG > 3 + { + char tmpbuf[INET6_ADDRSTRLEN + 1]; + memset(&tmpbuf, 0, INET_ADDRSTRLEN); + + ip_sprintf(tmpbuf, in); + DEBUG_PRINTF(1, "%s: in->in_addr = %s\n", __func__, tmpbuf); + } +#endif + + return (sin); +} + static int stf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifaddr *ifa; + struct ifdrv *ifd; struct ifreq *ifr; - struct sockaddr_in6 *sin6; - struct in_addr addr; + struct sockaddr_in sin4; + struct stf_softc *sc, *sc_cur; + struct stfv4args args; + struct in6_addr addr6, mask6; int error, mtu; error = 0; + sc_cur = ifp->if_softc; + switch (cmd) { + case SIOCSDRVSPEC: + ifd = (struct ifdrv *)data; + error = priv_check(curthread, PRIV_NET_ADDIFADDR); + if (error) + break; + if (ifd->ifd_cmd == STF_SV4NET) { + if (ifd->ifd_len != sizeof(args)) { + error = EINVAL; + break; + } + mtx_lock(&stf_mtx); + LIST_FOREACH(sc, &V_stf_softc_list, stf_list) { + if (sc == sc_cur) + continue; + if (sc->inaddr == 0 || sc->v4prefixlen == 0) + continue; + + if ((ntohl(sc->inaddr) & + ((uint32_t)(-1) << sc_cur->v4prefixlen)) == + ntohl(sc_cur->inaddr)) { + error = EEXIST; + mtx_unlock(&stf_mtx); + return (error); + } + if ((ntohl(sc_cur->inaddr) & + ((uint32_t)(-1) << sc->v4prefixlen)) == + ntohl(sc->inaddr)) { + error = EEXIST; + mtx_unlock(&stf_mtx); + return (error); + } + } + mtx_unlock(&stf_mtx); + bzero(&args, sizeof args); + error = copyin(ifd->ifd_data, &args, ifd->ifd_len); + if (error) + break; + + sc_cur->srcv4_addr = args.inaddr.s_addr; + sc_cur->inaddr = ntohl(args.inaddr.s_addr); + sc_cur->inaddr &= ((uint32_t)(-1) << args.prefix); + sc_cur->inaddr = htonl(sc_cur->inaddr); + sc_cur->v4prefixlen = args.prefix; + if (sc_cur->v4prefixlen == 0) + sc_cur->v4prefixlen = 32; + } else if (ifd->ifd_cmd == STF_SDSTV4) { + if (ifd->ifd_len != sizeof(args)) { + error = EINVAL; + break; + } + bzero(&args, sizeof args); + error = copyin(ifd->ifd_data, &args, ifd->ifd_len); + if (error) + break; + sc_cur->dstv4_addr = args.dstv4_addr.s_addr; + } else + error = EINVAL; + break; + case SIOCGDRVSPEC: + ifd = (struct ifdrv *)data; + if (ifd->ifd_len != sizeof(args)) { + error = EINVAL; + break; + } + if (ifd->ifd_cmd != STF_GV4NET) { + error = EINVAL; + break; + } + bzero(&args, sizeof args); + args.inaddr.s_addr = sc_cur->srcv4_addr; + args.dstv4_addr.s_addr = sc_cur->dstv4_addr; + args.prefix = sc_cur->v4prefixlen; + error = copyout(&args, ifd->ifd_data, ifd->ifd_len); + break; case SIOCSIFADDR: ifa = (struct ifaddr *)data; if (ifa == NULL || ifa->ifa_addr->sa_family != AF_INET6) { error = EAFNOSUPPORT; break; } - sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; - if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) { + if (stf_getin4addr(sc_cur, &sin4, + satosin6(ifa->ifa_addr)->sin6_addr, + satosin6(ifa->ifa_netmask)->sin6_addr) == NULL) { error = EINVAL; break; } - bcopy(GET_V4(&sin6->sin6_addr), &addr, sizeof(addr)); - if (isrfc1918addr(&addr)) { - error = EINVAL; - break; + /* + * Sanity check: if more than two interfaces have IFF_UP, do + * if_down() for all of them except for the specified one. + */ + mtx_lock(&stf_mtx); + LIST_FOREACH(sc, &V_stf_softc_list, stf_list) { + if (sc == sc_cur) + continue; + if (stf_getsrcifa6(ifp, &addr6, &mask6) != 0) + continue; + if (IN6_ARE_ADDR_EQUAL(&addr6, + &ifatoia6(ifa)->ia_addr.sin6_addr)) { + error = EEXIST; + break; + } } + mtx_unlock(&stf_mtx); ifp->if_flags |= IFF_UP; + ifp->if_drv_flags |= IFF_DRV_RUNNING; break; case SIOCADDMULTI: @@ -733,6 +1148,13 @@ stf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) error = EAFNOSUPPORT; break; + case SIOCSIFFLAGS: + if (ifp->if_flags & IFF_UP) + ifp->if_drv_flags |= IFF_DRV_RUNNING; + else + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + break; + case SIOCGIFMTU: break; diff --git a/sys/net/if_stf.h b/sys/net/if_stf.h new file mode 100644 index 0000000..8a5f7f8 --- /dev/null +++ b/sys/net/if_stf.h @@ -0,0 +1,46 @@ +/* $FreeBSD$ */ +/* $KAME: if_stf.h,v 1.5 2001/10/12 10:09:17 keiichi Exp $ */ + +/*- + * Copyright (C) 2000 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _NET_IF_STF_H_ +#define _NET_IF_STF_H_ + +struct stfv4args { + struct in_addr inaddr; + struct in_addr dstv4_addr; + int prefix; +}; + +#define STF_SV4NET 1 +#define STF_GV4NET 2 +#define STF_SDSTV4 3 + +#endif /* _NET_IF_STF_H_ */ diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c index 0af2128..2e1a16e 100644 --- a/sys/net/if_vlan.c +++ b/sys/net/if_vlan.c @@ -213,12 +213,11 @@ static void trunk_destroy(struct ifvlantrunk *trunk); static void vlan_init(void *foo); static void vlan_input(struct ifnet *ifp, struct mbuf *m); static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr); -static void vlan_qflush(struct ifnet *ifp); static int vlan_setflag(struct ifnet *ifp, int flag, int status, int (*func)(struct ifnet *, int)); static int vlan_setflags(struct ifnet *ifp, int status); static int vlan_setmulti(struct ifnet *ifp); -static int vlan_transmit(struct ifnet *ifp, struct mbuf *m); +static void vlan_start(struct ifnet *ifp); static void vlan_unconfig(struct ifnet *ifp); static void vlan_unconfig_locked(struct ifnet *ifp, int departing); static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag); @@ -976,10 +975,12 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) /* NB: mtu is not set here */ ifp->if_init = vlan_init; - ifp->if_transmit = vlan_transmit; - ifp->if_qflush = vlan_qflush; + ifp->if_start = vlan_start; ifp->if_ioctl = vlan_ioctl; ifp->if_flags = VLAN_IFFLAGS; + IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); + ifp->if_snd.ifq_drv_maxlen = 0; + IFQ_SET_READY(&ifp->if_snd); ether_ifattach(ifp, eaddr); /* Now undo some of the damage... */ ifp->if_baudrate = 0; @@ -1020,6 +1021,7 @@ vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) struct ifvlan *ifv = ifp->if_softc; int unit = ifp->if_dunit; + IFQ_PURGE(&ifp->if_snd); ether_ifdetach(ifp); /* first, remove it from system-wide lists */ vlan_unconfig(ifp); /* now it can be unconfigured and freed */ if_free(ifp); @@ -1038,106 +1040,99 @@ vlan_init(void *foo __unused) } /* - * The if_transmit method for vlan(4) interface. + * The if_start method for vlan(4) interface. */ -static int -vlan_transmit(struct ifnet *ifp, struct mbuf *m) +static void +vlan_start(struct ifnet *ifp) { struct ifvlan *ifv; struct ifnet *p; + struct mbuf *m; struct m_tag *mtag; uint16_t tag; int error, len, mcast; ifv = ifp->if_softc; p = PARENT(ifv); - len = m->m_pkthdr.len; - mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; - - BPF_MTAP(ifp, m); - - /* - * Do not run parent's if_transmit() if the parent is not up, - * or parent's driver will cause a system crash. - */ - if (!UP_AND_RUNNING(p)) { - m_freem(m); - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); - return (ENETDOWN); - } - - /* - * Pad the frame to the minimum size allowed if told to. - * This option is in accord with IEEE Std 802.1Q, 2003 Ed., - * paragraph C.4.4.3.b. It can help to work around buggy - * bridges that violate paragraph C.4.4.3.a from the same - * document, i.e., fail to pad short frames after untagging. - * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but - * untagging it will produce a 62-byte frame, which is a runt - * and requires padding. There are VLAN-enabled network - * devices that just discard such runts instead or mishandle - * them somehow. - */ - if (V_soft_pad && p->if_type == IFT_ETHER) { - static char pad[8]; /* just zeros */ - int n; + for (;;) { + IFQ_DEQUEUE(&ifp->if_snd, m); + if (m == NULL) + break; + len = m->m_pkthdr.len; + mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; - for (n = ETHERMIN + ETHER_HDR_LEN - m->m_pkthdr.len; - n > 0; n -= sizeof(pad)) - if (!m_append(m, min(n, sizeof(pad)), pad)) - break; + BPF_MTAP(ifp, m); - if (n > 0) { - if_printf(ifp, "cannot pad short frame\n"); - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + /* + * Do not run parent's if_transmit() if the parent is not up, + * or parent's driver will cause a system crash. + */ + if (!UP_AND_RUNNING(p)) { m_freem(m); - return (0); + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + return; } - } - /* - * If underlying interface can do VLAN tag insertion itself, - * just pass the packet along. However, we need some way to - * tell the interface where the packet came from so that it - * knows how to find the VLAN tag to use, so we attach a - * packet tag that holds it. - */ - if (vlan_mtag_pcp && (mtag = m_tag_locate(m, MTAG_8021Q, - MTAG_8021Q_PCP_OUT, NULL)) != NULL) - tag = EVL_MAKETAG(ifv->ifv_vid, *(uint8_t *)(mtag + 1), 0); - else - tag = ifv->ifv_tag; - if (p->if_capenable & IFCAP_VLAN_HWTAGGING) { - m->m_pkthdr.ether_vtag = tag; - m->m_flags |= M_VLANTAG; - } else { - m = ether_vlanencap(m, tag); - if (m == NULL) { - if_printf(ifp, "unable to prepend VLAN header\n"); - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); - return (0); + /* + * Pad the frame to the minimum size allowed if told to. + * This option is in accord with IEEE Std 802.1Q, 2003 Ed., + * paragraph C.4.4.3.b. It can help to work around buggy + * bridges that violate paragraph C.4.4.3.a from the same + * document, i.e., fail to pad short frames after untagging. + * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but + * untagging it will produce a 62-byte frame, which is a runt + * and requires padding. There are VLAN-enabled network + * devices that just discard such runts instead or mishandle + * them somehow. + */ + if (V_soft_pad && p->if_type == IFT_ETHER) { + static char pad[8]; /* just zeros */ + int n; + + for (n = ETHERMIN + ETHER_HDR_LEN - m->m_pkthdr.len; + n > 0; n -= sizeof(pad)) + if (!m_append(m, min(n, sizeof(pad)), pad)) + break; + + if (n > 0) { + if_printf(ifp, "cannot pad short frame\n"); + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + m_freem(m); + return; + } } - } - /* - * Send it, precisely as ether_output() would have. - */ - error = (p->if_transmit)(p, m); - if (error == 0) { - if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); - if_inc_counter(ifp, IFCOUNTER_OBYTES, len); - if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast); - } else - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); - return (error); -} + /* + * If underlying interface can do VLAN tag insertion itself, + * just pass the packet along. However, we need some way to + * tell the interface where the packet came from so that it + * knows how to find the VLAN tag to use, so we attach a + * packet tag that holds it. + */ + if (vlan_mtag_pcp && (mtag = m_tag_locate(m, MTAG_8021Q, + MTAG_8021Q_PCP_OUT, NULL)) != NULL) + tag = EVL_MAKETAG(ifv->ifv_vid, *(uint8_t *)(mtag + 1), 0); + else + tag = ifv->ifv_tag; + if (p->if_capenable & IFCAP_VLAN_HWTAGGING) { + m->m_pkthdr.ether_vtag = tag; + m->m_flags |= M_VLANTAG; + } else { + m = ether_vlanencap(m, tag); + if (m == NULL) { + if_printf(ifp, "unable to prepend VLAN header\n"); + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + return; + } + } -/* - * The ifp->if_qflush entry point for vlan(4) is a no-op. - */ -static void -vlan_qflush(struct ifnet *ifp __unused) -{ + /* + * Send it, precisely as ether_output() would have. + */ + error = (p->if_transmit)(p, m); + if_inc_counter(ifp, + (error == 0) ? IFCOUNTER_OPACKETS : IFCOUNTER_OERRORS, 1); + } } static void @@ -1248,7 +1243,7 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid) * We can handle non-ethernet hardware types as long as * they handle the tagging and headers themselves. */ - if (p->if_type != IFT_ETHER && + if (p->if_type != IFT_ETHER && p->if_type != IFT_BRIDGE && (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) return (EPROTONOSUPPORT); if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS) diff --git a/sys/net/iflib.c b/sys/net/iflib.c index 21d5095..00f29e0 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -1262,8 +1262,8 @@ iflib_txsd_alloc(iflib_txq_t txq) NULL, /* lockfuncarg */ &txq->ift_desc_tag))) { device_printf(dev,"Unable to allocate TX DMA tag: %d\n", err); - device_printf(dev,"maxsize: %zd nsegments: %d maxsegsize: %zd\n", - sctx->isc_tx_maxsize, nsegments, sctx->isc_tx_maxsegsize); + device_printf(dev,"maxsize: %ju nsegments: %d maxsegsize: %ju\n", + (uintmax_t)sctx->isc_tx_maxsize, nsegments, (uintmax_t)sctx->isc_tx_maxsegsize); goto fail; } #ifdef IFLIB_DIAGNOSTICS diff --git a/sys/net/pfil.c b/sys/net/pfil.c index 3d0586a..25daa26 100644 --- a/sys/net/pfil.c +++ b/sys/net/pfil.c @@ -34,6 +34,7 @@ #include <sys/errno.h> #include <sys/lock.h> #include <sys/malloc.h> +#include <sys/sbuf.h> #include <sys/rmlock.h> #include <sys/socket.h> #include <sys/socketvar.h> @@ -104,7 +105,7 @@ pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp, KASSERT(ph->ph_nhooks >= 0, ("Pfil hook count dropped < 0")); for (pfh = pfil_chain_get(dir, ph); pfh != NULL; pfh = TAILQ_NEXT(pfh, pfil_chain)) { - if (pfh->pfil_func != NULL) { + if (!(pfh->pfil_flags & PFIL_DISABLED) && pfh->pfil_func != NULL) { rv = (*pfh->pfil_func)(pfh->pfil_arg, &m, ifp, dir, inp); if (rv != 0 || m == NULL) @@ -237,6 +238,140 @@ pfil_head_unregister(struct pfil_head *ph) return (0); } +static int +pfil_sysctl_handler(SYSCTL_HANDLER_ARGS) +{ + struct rm_priotracker rmpt; + struct pfil_head *ph; + struct packet_filter_hook *pfh, *pfhtmp; + struct sbuf *sb; + pfil_chain_t npfl, *pfl; + char *new_order, *elm, *parse; + int i = 0, err = 0, hintlen, reqlen; + + hintlen = 0; + + ph = (struct pfil_head *)arg1; + if (ph == NULL || !PFIL_HOOKED(ph)) { + err = SYSCTL_OUT(req, "", 2); + return (err); + } + + if (arg2 == PFIL_IN) + pfl = &ph->ph_in; + else + pfl = &ph->ph_out; + + if (TAILQ_EMPTY(pfl)) { + err = SYSCTL_OUT(req, "", 2); + return (err); + } + + /* + * NOTE: This is needed to avoid witness(4) warnings. + */ + PFIL_RLOCK(ph, &rmpt); + TAILQ_FOREACH(pfh, pfl, pfil_chain) { + if (pfh->pfil_name != NULL) + hintlen = strlen(pfh->pfil_name); + else + hintlen += 2; + } + PFIL_RUNLOCK(ph, &rmpt); + + sb = sbuf_new(NULL, NULL, hintlen + 1, SBUF_AUTOEXTEND); + if (sb == NULL) + return (EINVAL); + + PFIL_RLOCK(ph, &rmpt); + TAILQ_FOREACH(pfh, pfl, pfil_chain) { + if (i > 0) + sbuf_printf(sb, ", "); + if (pfh->pfil_name != NULL) + sbuf_printf(sb, "%s%s", pfh->pfil_name, + pfh->pfil_flags & PFIL_DISABLED ? "*" : ""); + else + sbuf_printf(sb, "%s%s", "NA", + pfh->pfil_flags & PFIL_DISABLED ? "*" : ""); + i++; + } + PFIL_RUNLOCK(ph, &rmpt); + + sbuf_finish(sb); + + /* hint for sensible write buffer sizes */ + hintlen = sbuf_len(sb) + i * 2; + err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); + sbuf_delete(sb); + + if (err || !req->newptr) + return (err); + + if ((reqlen = req->newlen - req->newidx) > hintlen) + return (E2BIG); + new_order = malloc(reqlen + 1, M_TEMP, M_WAITOK|M_ZERO); + + err = SYSCTL_IN(req, new_order, reqlen); + if (err) + goto error; + new_order[reqlen] = '\0'; /* Just in case */ + parse = new_order; + + TAILQ_INIT(&npfl); + PFIL_WLOCK(ph); + while ((elm = strsep(&parse, " \t,")) != NULL) { + if (*elm == '\0') + continue; + TAILQ_FOREACH_SAFE(pfh, pfl, pfil_chain, pfhtmp) { + if (pfh->pfil_name != NULL) { + if (!strcmp(pfh->pfil_name, elm)) { + TAILQ_REMOVE(pfl, pfh, pfil_chain); + TAILQ_INSERT_TAIL(&npfl, pfh, pfil_chain); + pfh->pfil_flags &= ~PFIL_DISABLED; + break; + } + } else { + if (!strcmp(elm, "NA")) { + TAILQ_REMOVE(pfl, pfh, pfil_chain); + TAILQ_INSERT_TAIL(&npfl, pfh, pfil_chain); + pfh->pfil_flags &= ~PFIL_DISABLED; + break; + } + } + } + } + + TAILQ_FOREACH_SAFE(pfh, pfl, pfil_chain, pfhtmp) { + pfh->pfil_flags |= PFIL_DISABLED; + TAILQ_REMOVE(pfl, pfh, pfil_chain); + TAILQ_INSERT_TAIL(&npfl, pfh, pfil_chain); + } + + TAILQ_CONCAT(pfl, &npfl, pfil_chain); + +error: + PFIL_WUNLOCK(ph); + free(new_order, M_TEMP); + return (err); +} + +void +pfil_head_export_sysctl(struct pfil_head *ph, struct sysctl_oid_list *parent) +{ + struct sysctl_oid *root; + + root = SYSCTL_ADD_NODE(&ph->ph_clist, parent, OID_AUTO, "pfil", + CTLFLAG_RW, 0, "pfil(9) management"); + SYSCTL_ADD_PROC((void *)&ph->ph_clist, SYSCTL_CHILDREN(root), OID_AUTO, + "inbound", CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_SECURE3, + (void *)ph, PFIL_IN, pfil_sysctl_handler, "A", + "Inbound filter hooks"); + SYSCTL_ADD_PROC((void *)&ph->ph_clist, SYSCTL_CHILDREN(root), OID_AUTO, + "outbound", CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_SECURE3, + (void *)ph, PFIL_OUT, pfil_sysctl_handler, "A", + "Outbound filter hooks"); +} + /* * pfil_head_get() returns the pfil_head for a given key/dlt. */ @@ -264,6 +399,12 @@ pfil_head_get(int type, u_long val) int pfil_add_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph) { + return (pfil_add_named_hook(func, arg, NULL, flags, ph)); +} + +int +pfil_add_named_hook(pfil_func_t func, void *arg, char *name, int flags, struct pfil_head *ph) +{ struct packet_filter_hook *pfh1 = NULL; struct packet_filter_hook *pfh2 = NULL; int err; @@ -288,6 +429,8 @@ pfil_add_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph) if (flags & PFIL_IN) { pfh1->pfil_func = func; pfh1->pfil_arg = arg; + pfh1->pfil_name = name; + pfh1->pfil_flags &= ~PFIL_DISABLED; err = pfil_chain_add(&ph->ph_in, pfh1, flags & ~PFIL_OUT); if (err) goto locked_error; @@ -296,6 +439,8 @@ pfil_add_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph) if (flags & PFIL_OUT) { pfh2->pfil_func = func; pfh2->pfil_arg = arg; + pfh2->pfil_name = name; + pfh2->pfil_flags &= ~PFIL_DISABLED; err = pfil_chain_add(&ph->ph_out, pfh2, flags & ~PFIL_IN); if (err) { if (flags & PFIL_IN) diff --git a/sys/net/pfil.h b/sys/net/pfil.h index 64d7cf6..168739b 100644 --- a/sys/net/pfil.h +++ b/sys/net/pfil.h @@ -39,6 +39,7 @@ #include <sys/lock.h> #include <sys/rmlock.h> #include <net/vnet.h> +#include <sys/sysctl.h> struct mbuf; struct ifnet; @@ -56,11 +57,14 @@ struct packet_filter_hook { TAILQ_ENTRY(packet_filter_hook) pfil_chain; pfil_func_t pfil_func; void *pfil_arg; + int pfil_flags; + char *pfil_name; }; #define PFIL_IN 0x00000001 #define PFIL_OUT 0x00000002 #define PFIL_WAITOK 0x00000004 +#define PFIL_DISABLED 0x00000008 #define PFIL_ALL (PFIL_IN|PFIL_OUT) typedef TAILQ_HEAD(pfil_chain, packet_filter_hook) pfil_chain_t; @@ -86,6 +90,7 @@ struct pfil_head { struct rmlock ph_lock; /* Private lock storage */ int flags; #endif + struct sysctl_ctx_list ph_clist; union { u_long phu_val; void *phu_ptr; @@ -100,7 +105,9 @@ VNET_DECLARE(struct rmlock, pfil_lock); /* Public functions for pfil hook management by packet filters. */ struct pfil_head *pfil_head_get(int, u_long); +void pfil_head_export_sysctl(struct pfil_head *, struct sysctl_oid_list *); int pfil_add_hook(pfil_func_t, void *, int, struct pfil_head *); +int pfil_add_named_hook(pfil_func_t, void *, char *, int, struct pfil_head *); int pfil_remove_hook(pfil_func_t, void *, int, struct pfil_head *); #define PFIL_HOOKED(p) ((p)->ph_nhooks > 0) diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index 2b5ca39..e606440 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -154,6 +154,8 @@ extern struct rwlock pf_rules_lock; #define PF_RULES_RASSERT() rw_assert(&pf_rules_lock, RA_RLOCKED) #define PF_RULES_WASSERT() rw_assert(&pf_rules_lock, RA_WLOCKED) +extern struct sx pf_end_lock; + #define PF_MODVER 1 #define PFLOG_MODVER 1 #define PFSYNC_MODVER 1 @@ -468,6 +470,13 @@ struct pf_osfp_ioctl { int fp_getnum; /* DIOCOSFPGET number */ }; +struct pf_rule_actions { + u_int16_t qid; + u_int16_t pqid; + u_int32_t pdnpipe; + u_int32_t dnpipe; + u_int8_t flags; +}; union pf_rule_ptr { struct pf_rule *ptr; @@ -491,6 +500,7 @@ struct pf_rule { union pf_rule_ptr skip[PF_SKIP_COUNT]; #define PF_RULE_LABEL_SIZE 64 char label[PF_RULE_LABEL_SIZE]; + char schedule[PF_RULE_LABEL_SIZE]; char ifname[IFNAMSIZ]; char qname[PF_QNAME_SIZE]; char pqname[PF_QNAME_SIZE]; @@ -523,12 +533,19 @@ struct pf_rule { u_int32_t limit; u_int32_t seconds; } max_src_conn_rate; - u_int32_t qid; - u_int32_t pqid; + u_int16_t qid; + u_int16_t pqid; + u_int32_t dnpipe; + u_int32_t pdnpipe; + u_int32_t free_flags; u_int32_t rt_listid; u_int32_t nr; u_int32_t prob; +#ifdef PF_USER_INFO uid_t cuid; +#else + u_int32_t cuid; +#endif pid_t cpid; counter_u64_t states_cur; @@ -569,6 +586,29 @@ struct pf_rule { u_int8_t allow_opts; u_int8_t rt; u_int8_t return_ttl; + +#ifndef DSCP_EF +/* Copied from altq_cdnr.h */ +/* diffserve code points */ +#define DSCP_MASK 0xfc +#define DSCP_CUMASK 0x03 +#define DSCP_VA 0xb0 +#define DSCP_EF 0xb8 +#define DSCP_AF11 0x28 +#define DSCP_AF12 0x30 +#define DSCP_AF13 0x38 +#define DSCP_AF21 0x48 +#define DSCP_AF22 0x50 +#define DSCP_AF23 0x58 +#define DSCP_AF31 0x68 +#define DSCP_AF32 0x70 +#define DSCP_AF33 0x78 +#define DSCP_AF41 0x88 +#define DSCP_AF42 0x90 +#define DSCP_AF43 0x98 +#define AF_CLASSMASK 0xe0 +#define AF_DROPPRECMASK 0x18 +#endif u_int8_t tos; u_int8_t set_tos; u_int8_t anchor_relative; @@ -609,6 +649,13 @@ struct pf_rule { #define PFRULE_REASSEMBLE_TCP 0x1000 #define PFRULE_SET_TOS 0x2000 +/* rule flags for TOS or DSCP differentiation */ +#define PFRULE_TOS 0x2000 +#define PFRULE_DSCP 0x4000 + +/* rule flags for handling ALTQ hashing required by certain disciplines */ +#define PFRULE_ALTQ_HASH 0x8000 + /* rule flags again */ #define PFRULE_IFBOUND 0x00010000 /* if-bound */ #define PFRULE_STATESLOPPY 0x00020000 /* sloppy state tracking */ @@ -735,6 +782,10 @@ struct pf_state { u_int32_t creation; u_int32_t expire; u_int32_t pfsync_time; + u_int16_t qid; + u_int16_t pqid; + u_int32_t pdnpipe; + u_int32_t dnpipe; u_int16_t tag; u_int8_t log; u_int8_t state_flags; @@ -743,6 +794,8 @@ struct pf_state { /* was PFSTATE_PFLOW 0x04 */ #define PFSTATE_NOSYNC 0x08 #define PFSTATE_ACK 0x10 +#define PFRULE_DN_IS_PIPE 0x40 +#define PFRULE_DN_IS_QUEUE 0x80 #define PFSTATE_SETPRIO 0x0200 #define PFSTATE_SETMASK (PFSTATE_SETPRIO) u_int8_t timeout; @@ -1085,11 +1138,13 @@ struct pfi_kif { #define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */ struct pf_pdesc { +#ifdef PF_USER_INFO struct { int done; uid_t uid; gid_t gid; } lookup; +#endif u_int64_t tot_len; /* Make Mickey money */ union { struct tcphdr *tcp; @@ -1107,6 +1162,7 @@ struct pf_pdesc { u_int16_t *sport; u_int16_t *dport; struct pf_mtag *pf_mtag; + struct pf_rule_actions act; u_int32_t p_len; /* total length of payload */ @@ -1258,6 +1314,11 @@ struct pfioc_state_kill { u_int psk_killed; }; +struct pfioc_schedule_kill { + int numberkilled; + char schedule[PF_RULE_LABEL_SIZE]; +}; + struct pfioc_states { int ps_len; union { @@ -1442,6 +1503,7 @@ struct pf_ifspeed { u_int32_t baudrate; }; #define DIOCGIFSPEED _IOWR('D', 92, struct pf_ifspeed) +#define DIOCKILLSCHEDULE _IOWR('D', 96, struct pfioc_schedule_kill) #ifdef _KERNEL LIST_HEAD(pf_src_node_list, pf_src_node); diff --git a/sys/netgraph/ng_base.c b/sys/netgraph/ng_base.c index 8edd6a8..ed373b2 100644 --- a/sys/netgraph/ng_base.c +++ b/sys/netgraph/ng_base.c @@ -65,6 +65,10 @@ #include <machine/cpu.h> #include <vm/uma.h> +#include <sys/socket.h> +#include <net/if.h> +#include <net/if_var.h> + #include <net/netisr.h> #include <net/vnet.h> @@ -246,6 +250,8 @@ int ng_path_parse(char *addr, char **node, char **path, char **hook); void ng_rmnode(node_p node, hook_p dummy1, void *dummy2, int dummy3); void ng_unname(node_p node); +extern void (*ng_ether_attach_p)(struct ifnet *ifp); + /* Our own netgraph malloc type */ MALLOC_DEFINE(M_NETGRAPH, "netgraph", "netgraph structures and ctrl messages"); MALLOC_DEFINE(M_NETGRAPH_MSG, "netgraph_msg", "netgraph name storage"); @@ -580,6 +586,13 @@ static const struct ng_cmdlist ng_generic_cmds[] = { &ng_parse_ng_mesg_type, &ng_parse_ng_mesg_type }, + { + NGM_GENERIC_COOKIE, + NGM_ETHER_ATTACH, + "attach", + &ng_parse_string_type, + NULL + }, { 0 } }; @@ -2914,6 +2927,17 @@ ng_generic_msg(node_p here, item_p item, hook_p lasthook) break; } + case NGM_ETHER_ATTACH: + { + struct ifnet *ifp; + ifp = ifunit((char *)msg->data); + if (ifp && ng_ether_attach_p != NULL) { + ng_ether_attach_p(ifp); + } + + break; + } + case NGM_TEXT_CONFIG: case NGM_TEXT_STATUS: /* diff --git a/sys/netgraph/ng_eiface.c b/sys/netgraph/ng_eiface.c index db4e87c..af80133 100644 --- a/sys/netgraph/ng_eiface.c +++ b/sys/netgraph/ng_eiface.c @@ -45,6 +45,7 @@ #include <net/if_var.h> #include <net/if_media.h> #include <net/if_types.h> +#include <net/if_dl.h> #include <net/netisr.h> #include <net/route.h> #include <net/vnet.h> @@ -68,6 +69,13 @@ static const struct ng_cmdlist ng_eiface_cmdlist[] = { }, { NGM_EIFACE_COOKIE, + NGM_EIFACE_SET_IFNAME, + "setifname", + &ng_parse_string_type, + NULL + }, + { + NGM_EIFACE_COOKIE, NGM_EIFACE_SET, "set", &ng_parse_enaddr_type, @@ -475,6 +483,11 @@ ng_eiface_rcvmsg(node_p node, item_p item, hook_p lasthook) struct ng_mesg *resp = NULL; int error = 0; struct ng_mesg *msg; + char *new_name; + size_t namelen, onamelen; + struct sockaddr_dl *sdl = NULL; + struct ifaddr *ifa = NULL; + node_p ethernode; NGI_GET_MSG(item, msg); switch (msg->header.typecookie) { @@ -500,6 +513,46 @@ ng_eiface_rcvmsg(node_p node, item_p item, hook_p lasthook) } strlcpy(resp->data, ifp->if_xname, IFNAMSIZ); break; + case NGM_EIFACE_SET_IFNAME: + new_name = (char *)msg->data; + + /* Deny request if interface is UP */ + if ((ifp->if_flags & IFF_UP) != 0) { + error = EBUSY; + break; + } + + EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); + + ethernode = ng_name2noderef(node, ifp->if_xname); + if (ethernode != NULL) + ng_name_node(ethernode, new_name); + + IF_ADDR_WLOCK(ifp); + strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); + ifa = ifp->if_addr; + sdl = (struct sockaddr_dl *)ifa->ifa_addr; + namelen = strlen(new_name) + 1; + onamelen = sdl->sdl_nlen; + /* + * Move the address if needed. This is safe because we + * allocate space for a name of length IFNAMSIZ when we + * create this in if_attach(). + */ + if (namelen != onamelen) { + bcopy(sdl->sdl_data + onamelen, + sdl->sdl_data + namelen, sdl->sdl_alen); + } + bcopy(new_name, sdl->sdl_data, namelen); + sdl->sdl_nlen = namelen; + sdl = (struct sockaddr_dl *)ifa->ifa_netmask; + bzero(sdl->sdl_data, onamelen); + while (namelen != 0) + sdl->sdl_data[--namelen] = 0xff; + IF_ADDR_WUNLOCK(ifp); + + EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); + break; case NGM_EIFACE_GET_IFADDRS: { diff --git a/sys/netgraph/ng_eiface.h b/sys/netgraph/ng_eiface.h index 6fc1c5b..9f1509b 100644 --- a/sys/netgraph/ng_eiface.h +++ b/sys/netgraph/ng_eiface.h @@ -54,6 +54,7 @@ enum { NGM_EIFACE_GET_IFNAME = 1, /* get the interface name */ NGM_EIFACE_GET_IFADDRS, /* returns list of addresses */ NGM_EIFACE_SET, /* set ethernet address */ + NGM_EIFACE_SET_IFNAME, }; #endif /* _NETGRAPH_NG_EIFACE_H_ */ diff --git a/sys/netgraph/ng_ether.c b/sys/netgraph/ng_ether.c index 42c4f0e..6eed9ca 100644 --- a/sys/netgraph/ng_ether.c +++ b/sys/netgraph/ng_ether.c @@ -314,7 +314,8 @@ ng_ether_attach(struct ifnet *ifp) * eiface nodes, which may be problematic due to naming * clashes. */ - if ((node = ng_name2noderef(NULL, ifp->if_xname)) != NULL) { + ng_ether_sanitize_ifname(ifp->if_xname, name); + if ((node = ng_name2noderef(NULL, name)) != NULL) { NG_NODE_UNREF(node); return; } @@ -341,7 +342,6 @@ ng_ether_attach(struct ifnet *ifp) priv->hwassist = ifp->if_hwassist; /* Try to give the node the same name as the interface */ - ng_ether_sanitize_ifname(ifp->if_xname, name); if (ng_name_node(node, name) != 0) log(LOG_WARNING, "%s: can't name node %s\n", __func__, name); } diff --git a/sys/netgraph/ng_iface.c b/sys/netgraph/ng_iface.c index b5f5626..2a342b6 100644 --- a/sys/netgraph/ng_iface.c +++ b/sys/netgraph/ng_iface.c @@ -58,6 +58,7 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/errno.h> +#include <sys/eventhandler.h> #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/mbuf.h> @@ -70,6 +71,7 @@ #include <sys/libkern.h> #include <net/if.h> +#include <net/if_dl.h> #include <net/if_var.h> #include <net/if_types.h> #include <net/bpf.h> @@ -154,6 +156,13 @@ static const struct ng_cmdlist ng_iface_cmds[] = { }, { NGM_IFACE_COOKIE, + NGM_IFACE_SET_IFNAME, + "setifname", + &ng_parse_string_type, + NULL + }, + { + NGM_IFACE_COOKIE, NGM_IFACE_POINT2POINT, "point2point", NULL, @@ -586,6 +595,10 @@ ng_iface_rcvmsg(node_p node, item_p item, hook_p lasthook) struct ng_mesg *resp = NULL; int error = 0; struct ng_mesg *msg; + char *new_name; + size_t namelen, onamelen; + struct sockaddr_dl *sdl = NULL; + struct ifaddr *ifa = NULL; NGI_GET_MSG(item, msg); switch (msg->header.typecookie) { @@ -600,6 +613,49 @@ ng_iface_rcvmsg(node_p node, item_p item, hook_p lasthook) strlcpy(resp->data, ifp->if_xname, IFNAMSIZ); break; + case NGM_IFACE_SET_IFNAME: + + new_name = (char *)msg->data; + /* Announce the departure of the interface. */ + //new_name[strlen(new_name)] = '\0'; + + /* Deny request if interface is UP */ + if ((ifp->if_flags & IFF_UP) != 0) { + error = EBUSY; + break; + } + + //rt_ifannouncemsg(ifp, IFAN_DEPARTURE); + EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); + + IF_ADDR_WLOCK(ifp); + strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); + ifa = ifp->if_addr; + sdl = (struct sockaddr_dl *)ifa->ifa_addr; + namelen = strlen(new_name) + 1; + onamelen = sdl->sdl_nlen; + /* + * Move the address if needed. This is safe because we + * allocate space for a name of length IFNAMSIZ when we + * create this in if_attach(). + */ + if (namelen != onamelen) { + bcopy(sdl->sdl_data + onamelen, + sdl->sdl_data + namelen, sdl->sdl_alen); + } + bcopy(new_name, sdl->sdl_data, namelen); + sdl->sdl_nlen = namelen; + sdl = (struct sockaddr_dl *)ifa->ifa_netmask; + bzero(sdl->sdl_data, onamelen); + while (namelen != 0) + sdl->sdl_data[--namelen] = 0xff; + IF_ADDR_WUNLOCK(ifp); + + EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); + /* Announce the return of the interface. */ + //rt_ifannouncemsg(ifp, IFAN_ARRIVAL); + break; + case NGM_IFACE_POINT2POINT: case NGM_IFACE_BROADCAST: { diff --git a/sys/netgraph/ng_iface.h b/sys/netgraph/ng_iface.h index 3497e9d..4bbae3b 100644 --- a/sys/netgraph/ng_iface.h +++ b/sys/netgraph/ng_iface.h @@ -68,6 +68,7 @@ enum { NGM_IFACE_POINT2POINT, NGM_IFACE_BROADCAST, NGM_IFACE_GET_IFINDEX, + NGM_IFACE_SET_IFNAME, }; #define MTAG_NGIF NGM_IFACE_COOKIE diff --git a/sys/netgraph/ng_message.h b/sys/netgraph/ng_message.h index da531f0..d17ce46 100644 --- a/sys/netgraph/ng_message.h +++ b/sys/netgraph/ng_message.h @@ -138,6 +138,7 @@ enum { NGM_ASCII2BINARY= (13|NGM_READONLY|NGM_HASREPLY), /* (optional) Get/set text config. */ NGM_TEXT_CONFIG = 14, + NGM_ETHER_ATTACH = 15, }; /* diff --git a/sys/netinet/in.c b/sys/netinet/in.c index f42375b..f2e15cc 100644 --- a/sys/netinet/in.c +++ b/sys/netinet/in.c @@ -71,7 +71,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/udp_var.h> static int in_aifaddr_ioctl(u_long, caddr_t, struct ifnet *, struct thread *); -static int in_difaddr_ioctl(caddr_t, struct ifnet *, struct thread *); +static int in_difaddr_ioctl(u_long, caddr_t, struct ifnet *, struct thread *); static void in_socktrim(struct sockaddr_in *); static void in_purgemaddrs(struct ifnet *); @@ -245,7 +245,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, break; case SIOCDIFADDR: sx_xlock(&in_control_sx); - error = in_difaddr_ioctl(data, ifp, td); + error = in_difaddr_ioctl(cmd, data, ifp, td); sx_xunlock(&in_control_sx); return (error); case OSIOCAIFADDR: /* 9.x compat */ @@ -390,7 +390,7 @@ in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) IF_ADDR_RUNLOCK(ifp); if (ia != NULL) - (void )in_difaddr_ioctl(data, ifp, td); + (void )in_difaddr_ioctl(cmd, data, ifp, td); ifa = ifa_alloc(sizeof(struct in_ifaddr), M_WAITOK); ia = (struct in_ifaddr *)ifa; @@ -528,7 +528,7 @@ fail2: fail1: if (ia->ia_ifa.ifa_carp) - (*carp_detach_p)(&ia->ia_ifa); + (*carp_detach_p)(&ia->ia_ifa, true); IF_ADDR_WLOCK(ifp); TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); @@ -545,7 +545,7 @@ fail1: } static int -in_difaddr_ioctl(caddr_t data, struct ifnet *ifp, struct thread *td) +in_difaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) { const struct ifreq *ifr = (struct ifreq *)data; const struct sockaddr_in *addr = (const struct sockaddr_in *) @@ -618,7 +618,8 @@ in_difaddr_ioctl(caddr_t data, struct ifnet *ifp, struct thread *td) in_ifadown(&ia->ia_ifa, 1); if (ia->ia_ifa.ifa_carp) - (*carp_detach_p)(&ia->ia_ifa); + (*carp_detach_p)(&ia->ia_ifa, + (cmd == SIOCDIFADDR) ? true : false); /* * If this is the last IPv4 address configured on this @@ -1294,6 +1295,9 @@ in_lltable_delete_entry(struct lltable *llt, struct llentry *lle) #ifdef DIAGNOSTIC log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); #endif + /* cancel timer */ + if (callout_stop(&lle->lle_timer) > 0) + LLE_REMREF(lle); llentry_free(lle); } diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c index 7855af2..ac948e0 100644 --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -115,7 +115,6 @@ struct carp_softc { int sc_sendad_success; #define CARP_SENDAD_MIN_SUCCESS 3 - int sc_init_counter; uint64_t sc_counter; /* authentication */ @@ -587,7 +586,6 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) struct ifnet *ifp = m->m_pkthdr.rcvif; struct ifaddr *ifa; struct carp_softc *sc; - uint64_t tmp_counter; struct timeval sc_tv, ch_tv; /* verify that the VHID is valid on the receiving interface */ @@ -627,14 +625,20 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) goto out; } - tmp_counter = ntohl(ch->carp_counter[0]); - tmp_counter = tmp_counter<<32; - tmp_counter += ntohl(ch->carp_counter[1]); - - /* XXX Replay protection goes here */ - - sc->sc_init_counter = 0; - sc->sc_counter = tmp_counter; + if (!bcmp(&sc->sc_counter, ch->carp_counter, + sizeof(ch->carp_counter))) { + /* Do not log duplicates from non simplex interfaces */ + if (sc->sc_carpdev->if_flags & IFF_SIMPLEX) { + CARPSTATS_INC(carps_badauth); + if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); + CARP_UNLOCK(sc); + CARP_LOG("%s, replay or network loop detected.\n", + ifp->if_xname); + } else + CARP_UNLOCK(sc); + m_freem(m); + return; + } sc_tv.tv_sec = sc->sc_advbase; sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; @@ -698,13 +702,12 @@ carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) { struct m_tag *mtag; - if (sc->sc_init_counter) { + if (!sc->sc_counter) { /* this could also be seconds since unix epoch */ sc->sc_counter = arc4random(); sc->sc_counter = sc->sc_counter << 32; sc->sc_counter += arc4random(); - } else - sc->sc_counter++; + } ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); @@ -770,7 +773,8 @@ carp_send_ad_error(struct carp_softc *sc, int error) char msg[sizeof(fmt) + IFNAMSIZ]; sprintf(msg, fmt, error, sc->sc_carpdev->if_xname); - carp_demote_adj(V_carp_senderr_adj, msg); + if (V_carp_senderr_adj > 0) + carp_demote_adj(V_carp_senderr_adj, msg); } sc->sc_sendad_success = 0; } else { @@ -780,7 +784,8 @@ carp_send_ad_error(struct carp_softc *sc, int error) char msg[sizeof(fmt) + IFNAMSIZ]; sprintf(msg, fmt, sc->sc_carpdev->if_xname); - carp_demote_adj(-V_carp_senderr_adj, msg); + if (V_carp_senderr_adj > 0) + carp_demote_adj(-V_carp_senderr_adj, msg); sc->sc_sendad_errors = 0; } else sc->sc_sendad_errors = 0; @@ -1121,14 +1126,11 @@ carp_forus(struct ifnet *ifp, u_char *dhost) CIF_LOCK(ifp->if_carp); IFNET_FOREACH_CARP(ifp, sc) { - CARP_LOCK(sc); if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr), ETHER_ADDR_LEN)) { - CARP_UNLOCK(sc); CIF_UNLOCK(ifp->if_carp); return (1); } - CARP_UNLOCK(sc); } CIF_UNLOCK(ifp->if_carp); @@ -1473,9 +1475,9 @@ carp_alloc(struct ifnet *ifp) sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); + sc->sc_counter = 0; sc->sc_advbase = CARP_DFLTINTV; sc->sc_vhid = -1; /* required setting */ - sc->sc_init_counter = 1; sc->sc_state = INIT; sc->sc_ifasiz = sizeof(struct ifaddr *); @@ -1875,7 +1877,7 @@ carp_attach(struct ifaddr *ifa, int vhid) } void -carp_detach(struct ifaddr *ifa) +carp_detach(struct ifaddr *ifa, bool destroy) { struct ifnet *ifp = ifa->ifa_ifp; struct carp_if *cif = ifp->if_carp; @@ -1921,12 +1923,13 @@ carp_detach(struct ifaddr *ifa) carp_hmac_prepare(sc); carp_sc_state(sc); - if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) + if (destroy && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) carp_destroy(sc); else CARP_UNLOCK(sc); - CIF_FREE(cif); + if (destroy) + CIF_FREE(cif); sx_xunlock(&carp_sx); } diff --git a/sys/netinet/ip_carp.h b/sys/netinet/ip_carp.h index 5b7e506..9c6edf6 100644 --- a/sys/netinet/ip_carp.h +++ b/sys/netinet/ip_carp.h @@ -138,7 +138,7 @@ struct carpreq { #ifdef _KERNEL int carp_ioctl(struct ifreq *, u_long, struct thread *); int carp_attach(struct ifaddr *, int); -void carp_detach(struct ifaddr *); +void carp_detach(struct ifaddr *, bool); void carp_carpdev_state(struct ifnet *); int carp_input(struct mbuf **, int *, int); int carp6_input (struct mbuf **, int *, int); @@ -154,7 +154,7 @@ int carp_forus(struct ifnet *, u_char *); /* net/if.c */ extern int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *); extern int (*carp_attach_p)(struct ifaddr *, int); -extern void (*carp_detach_p)(struct ifaddr *); +extern void (*carp_detach_p)(struct ifaddr *, bool); extern void (*carp_linkstate_p)(struct ifnet *); extern void (*carp_demote_adj_p)(int, char *); extern int (*carp_master_p)(struct ifaddr *); diff --git a/sys/netinet/ip_dummynet.h b/sys/netinet/ip_dummynet.h index 377b5b0..0d5efa6 100644 --- a/sys/netinet/ip_dummynet.h +++ b/sys/netinet/ip_dummynet.h @@ -129,7 +129,7 @@ struct dn_link { * XXX what about burst ? */ int32_t link_nr; - int bandwidth; /* bit/s or bits/tick. */ + uint32_t bandwidth; /* bit/s or bits/tick. */ int delay; /* ms and ticks */ uint64_t burst; /* scaled. bits*Hz XXX */ }; @@ -214,7 +214,7 @@ struct dn_profile { char name[ED_MAX_NAME_LEN]; int link_nr; int loss_level; - int bandwidth; // XXX use link bandwidth? + uint32_t bandwidth; // XXX use link bandwidth? int samples_no; /* actual len of samples[] */ int samples[ED_MAX_SAMPLES_NO]; /* may be shorter */ }; diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h index ddee5bf..495e659 100644 --- a/sys/netinet/ip_fw.h +++ b/sys/netinet/ip_fw.h @@ -110,6 +110,8 @@ typedef struct _ip_fw3_opheader { #define IP_FW_DUMP_SOPTCODES 116 /* Dump available sopts/versions */ #define IP_FW_DUMP_SRVOBJECTS 117 /* Dump existing named objects */ +#define IP_FW_TABLE_XZEROCNT 118 /* zero table entry counters */ + #define IP_FW_NAT64STL_CREATE 130 /* Create stateless NAT64 instance */ #define IP_FW_NAT64STL_DESTROY 131 /* Destroy stateless NAT64 instance */ #define IP_FW_NAT64STL_CONFIG 132 /* Modify stateless NAT64 instance */ @@ -283,6 +285,8 @@ enum ipfw_opcodes { /* arguments (4 byte each) */ O_EXTERNAL_INSTANCE, /* arg1=id of eaction handler instance */ O_EXTERNAL_DATA, /* variable length data */ + O_MACADDR2_LOOKUP, /* arg1=table number, u32=value */ + O_LAST_OPCODE /* not an opcode! */ }; @@ -737,7 +741,8 @@ struct _ipfw_dyn_rule { #define IPFW_TABLE_INTERFACE 2 /* Table for holding interface names */ #define IPFW_TABLE_NUMBER 3 /* Table for holding ports/uid/gid/etc */ #define IPFW_TABLE_FLOW 4 /* Table for holding flow data */ -#define IPFW_TABLE_MAXTYPE 4 /* Maximum valid number */ +#define IPFW_TABLE_MAC2 5 /* Table for holding 2 mac addresses */ +#define IPFW_TABLE_MAXTYPE 5 /* Maximum valid number */ #define IPFW_TABLE_CIDR IPFW_TABLE_ADDR /* compat */ @@ -849,6 +854,11 @@ struct tflow_entry { } a; }; +struct mac_entry { + u_char addr[12]; /* dst[6] + src[6] */ + u_char mask[12]; /* dst[6] + src[6] */ +}; + typedef struct _ipfw_table_value { uint32_t tag; /* O_TAG/O_TAGGED */ uint32_t pipe; /* O_PIPE/O_QUEUE */ @@ -876,12 +886,17 @@ typedef struct _ipfw_obj_tentry { uint8_t spare0; uint16_t idx; /* Table name index */ uint16_t spare1; + uint64_t bcnt; /* Byte counter */ + uint64_t mac; /* MAC address for mixed enties */ + uint64_t pcnt; /* Packet counter */ + time_t timestamp; /* Timestamp of last match */ union { /* Longest field needs to be aligned by 8-byte boundary */ struct in_addr addr; /* IPv4 address */ uint32_t key; /* uid/gid/port */ struct in6_addr addr6; /* IPv6 address */ char iface[IF_NAMESIZE]; /* interface name */ + struct mac_entry mac; /* 2 mac addr:mask */ struct tflow_entry flow; } k; union { diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 8c37a5e..c7e586e 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -314,6 +314,9 @@ ip_init(void) if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0) printf("%s: WARNING: unable to register pfil hook, " "error %d\n", __func__, i); + else + pfil_head_export_sysctl(&V_inet_pfil_hook, + SYSCTL_STATIC_CHILDREN(_net_inet_ip)); if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET, &V_ipsec_hhh_in[HHOOK_IPSEC_INET], diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index da56300..d5ee8ed 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -224,9 +224,8 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, struct rtentry *rte; /* cache for ro->ro_rt */ uint32_t fibnum; int have_ia_ref; -#if defined(IPSEC) || defined(IPSEC_SUPPORT) - int no_route_but_check_spd = 0; -#endif + int no_route_but_check = 0; + M_ASSERTPKTHDR(m); if (inp != NULL) { @@ -383,10 +382,11 @@ again: * There is no route for this packet, but it is * possible that a matching SPD entry exists. */ - no_route_but_check_spd = 1; mtu = 0; /* Silence GCC warning. */ - goto sendit; #endif + no_route_but_check = 1; + goto sendit; + IPSTAT_INC(ips_noroute); error = EHOSTUNREACH; goto bad; @@ -556,19 +556,14 @@ sendit: goto done; } } - /* - * Check if there was a route for this packet; return error if not. - */ - if (no_route_but_check_spd) { - IPSTAT_INC(ips_noroute); - error = EHOSTUNREACH; - goto bad; - } /* Update variables that are affected by ipsec4_output(). */ ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; #endif /* IPSEC */ + if (ifp == NULL) + ifp = V_loif; + /* Jump over all PFIL processing if hooks are not active. */ if (PFIL_HOOKED(&V_inet_pfil_hook)) { switch (ip_output_pfil(&m, ifp, inp, dst, &fibnum, &error)) { @@ -593,6 +588,15 @@ sendit: } } + /* + * Check if there was a route for this packet; return error if not. + */ + if (no_route_but_check) { + IPSTAT_INC(ips_noroute); + error = EHOSTUNREACH; + goto bad; + } + /* 127/8 must not appear on wire - RFC1122. */ if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index 4df3564..d473995 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -624,7 +624,7 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, */ if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) { if (carp_attached) - (*carp_detach_p)(&ia->ia_ifa); + (*carp_detach_p)(&ia->ia_ifa, false); goto out; } } @@ -1245,7 +1245,7 @@ in6_purgeaddr(struct ifaddr *ifa) int plen, error; if (ifa->ifa_carp) - (*carp_detach_p)(ifa); + (*carp_detach_p)(ifa, true); /* * Remove the loopback route to the interface address. diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 436efc7..efff20c 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -189,6 +189,7 @@ SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_INTRDQMAXLEN, intr_direct_queue_maxlen, #endif +SYSCTL_DECL(_net_inet6_ip6); VNET_DEFINE(struct pfil_head, inet6_pfil_hook); VNET_PCPUSTAT_DEFINE(struct ip6stat, ip6stat); @@ -230,6 +231,9 @@ ip6_init(void) if ((i = pfil_head_register(&V_inet6_pfil_hook)) != 0) printf("%s: WARNING: unable to register pfil hook, " "error %d\n", __func__, i); + else + pfil_head_export_sysctl(&V_inet6_pfil_hook, + SYSCTL_STATIC_CHILDREN(_net_inet6_ip6)); if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET6, &V_ipsec_hhh_in[HHOOK_IPSEC_INET6], diff --git a/sys/netipsec/ipsec.c b/sys/netipsec/ipsec.c index e120f65..fd53da9 100644 --- a/sys/netipsec/ipsec.c +++ b/sys/netipsec/ipsec.c @@ -149,6 +149,15 @@ sysctl_def_policy(SYSCTL_HANDLER_ARGS) * 0 take anything */ VNET_DEFINE(int, crypto_support) = CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE; + +/* + * Use asynchronous mode to parallelize crypto jobs: + * + * 0 - disabled + * 1 - enabled + */ +VNET_DEFINE(int, async_crypto) = 0; + /* * TCP/UDP checksum handling policy for transport mode NAT-T (RFC3948) * @@ -195,6 +204,9 @@ SYSCTL_INT(_net_inet_ipsec, IPSECCTL_ECN, ecn, SYSCTL_INT(_net_inet_ipsec, OID_AUTO, crypto_support, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(crypto_support), 0, "Crypto driver selection."); +SYSCTL_INT(_net_inet_ipsec, OID_AUTO, async_crypto, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(async_crypto), 0, + "Use asynchronous mode to parallelize crypto jobs."); SYSCTL_INT(_net_inet_ipsec, OID_AUTO, check_policy_history, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(check_policy_history), 0, "Use strict check of inbound packets to security policy compliance."); @@ -563,7 +575,8 @@ ipsec4_setspidx_ipaddr(const struct mbuf *m, struct secpolicyindex *spidx) } static struct secpolicy * -ipsec4_getpolicy(const struct mbuf *m, struct inpcb *inp, u_int dir) +ipsec4_getpolicy(const struct mbuf *m, struct inpcb *inp, u_int dir, + int needport) { struct secpolicyindex spidx; struct secpolicy *sp; @@ -572,8 +585,7 @@ ipsec4_getpolicy(const struct mbuf *m, struct inpcb *inp, u_int dir) if (sp == NULL && key_havesp(dir)) { /* Make an index to look for a policy. */ ipsec4_setspidx_ipaddr(m, &spidx); - /* Fill ports in spidx if we have inpcb. */ - ipsec4_get_ulp(m, &spidx, inp != NULL); + ipsec4_get_ulp(m, &spidx, needport); spidx.dir = dir; sp = key_allocsp(&spidx, dir); } @@ -586,12 +598,13 @@ ipsec4_getpolicy(const struct mbuf *m, struct inpcb *inp, u_int dir) * Check security policy for *OUTBOUND* IPv4 packet. */ struct secpolicy * -ipsec4_checkpolicy(const struct mbuf *m, struct inpcb *inp, int *error) +ipsec4_checkpolicy(const struct mbuf *m, struct inpcb *inp, int *error, + int needport) { struct secpolicy *sp; *error = 0; - sp = ipsec4_getpolicy(m, inp, IPSEC_DIR_OUTBOUND); + sp = ipsec4_getpolicy(m, inp, IPSEC_DIR_OUTBOUND, needport); if (sp != NULL) sp = ipsec_checkpolicy(sp, inp, error); if (sp == NULL) { @@ -623,7 +636,7 @@ ipsec4_in_reject(const struct mbuf *m, struct inpcb *inp) struct secpolicy *sp; int result; - sp = ipsec4_getpolicy(m, inp, IPSEC_DIR_INBOUND); + sp = ipsec4_getpolicy(m, inp, IPSEC_DIR_INBOUND, 0); result = ipsec_in_reject(sp, inp, m); key_freesp(&sp); if (result != 0) @@ -731,7 +744,8 @@ ipsec6_setspidx_ipaddr(const struct mbuf *m, struct secpolicyindex *spidx) } static struct secpolicy * -ipsec6_getpolicy(const struct mbuf *m, struct inpcb *inp, u_int dir) +ipsec6_getpolicy(const struct mbuf *m, struct inpcb *inp, u_int dir, + int needport) { struct secpolicyindex spidx; struct secpolicy *sp; @@ -740,8 +754,7 @@ ipsec6_getpolicy(const struct mbuf *m, struct inpcb *inp, u_int dir) if (sp == NULL && key_havesp(dir)) { /* Make an index to look for a policy. */ ipsec6_setspidx_ipaddr(m, &spidx); - /* Fill ports in spidx if we have inpcb. */ - ipsec6_get_ulp(m, &spidx, inp != NULL); + ipsec6_get_ulp(m, &spidx, needport); spidx.dir = dir; sp = key_allocsp(&spidx, dir); } @@ -754,12 +767,13 @@ ipsec6_getpolicy(const struct mbuf *m, struct inpcb *inp, u_int dir) * Check security policy for *OUTBOUND* IPv6 packet. */ struct secpolicy * -ipsec6_checkpolicy(const struct mbuf *m, struct inpcb *inp, int *error) +ipsec6_checkpolicy(const struct mbuf *m, struct inpcb *inp, int *error, + int needport) { struct secpolicy *sp; *error = 0; - sp = ipsec6_getpolicy(m, inp, IPSEC_DIR_OUTBOUND); + sp = ipsec6_getpolicy(m, inp, IPSEC_DIR_OUTBOUND, needport); if (sp != NULL) sp = ipsec_checkpolicy(sp, inp, error); if (sp == NULL) { @@ -791,7 +805,7 @@ ipsec6_in_reject(const struct mbuf *m, struct inpcb *inp) struct secpolicy *sp; int result; - sp = ipsec6_getpolicy(m, inp, IPSEC_DIR_INBOUND); + sp = ipsec6_getpolicy(m, inp, IPSEC_DIR_INBOUND, 0); result = ipsec_in_reject(sp, inp, m); key_freesp(&sp); if (result) diff --git a/sys/netipsec/ipsec.h b/sys/netipsec/ipsec.h index 147412f..1e73c01 100644 --- a/sys/netipsec/ipsec.h +++ b/sys/netipsec/ipsec.h @@ -253,8 +253,9 @@ struct ipsecstat { #include <sys/counter.h> struct ipsec_ctx_data; -#define IPSEC_INIT_CTX(_ctx, _mp, _sav, _af, _enc) do { \ +#define IPSEC_INIT_CTX(_ctx, _mp, _inp, _sav, _af, _enc) do { \ (_ctx)->mp = (_mp); \ + (_ctx)->inp = (_inp); \ (_ctx)->sav = (_sav); \ (_ctx)->af = (_af); \ (_ctx)->enc = (_enc); \ @@ -282,6 +283,7 @@ VNET_DECLARE(int, ip4_ipsec_dfbit); VNET_DECLARE(int, ip4_ipsec_ecn); VNET_DECLARE(int, ip4_esp_randpad); VNET_DECLARE(int, crypto_support); +VNET_DECLARE(int, async_crypto); VNET_DECLARE(int, natt_cksum_policy); #define IPSECSTAT_INC(name) \ @@ -295,6 +297,7 @@ VNET_DECLARE(int, natt_cksum_policy); #define V_ip4_ipsec_ecn VNET(ip4_ipsec_ecn) #define V_ip4_esp_randpad VNET(ip4_esp_randpad) #define V_crypto_support VNET(crypto_support) +#define V_async_crypto VNET(async_crypto) #define V_natt_cksum_policy VNET(natt_cksum_policy) #define ipseclog(x) do { if (V_ipsec_debug) log x; } while (0) @@ -319,7 +322,7 @@ int ipsec_if_input(struct mbuf *, struct secasvar *, uint32_t); struct ipsecrequest *ipsec_newisr(void); void ipsec_delisr(struct ipsecrequest *); struct secpolicy *ipsec4_checkpolicy(const struct mbuf *, struct inpcb *, - int *); + int *, int); u_int ipsec_get_reqlevel(struct secpolicy *, u_int); diff --git a/sys/netipsec/ipsec6.h b/sys/netipsec/ipsec6.h index a5fae4d..33aa30f 100644 --- a/sys/netipsec/ipsec6.h +++ b/sys/netipsec/ipsec6.h @@ -60,7 +60,7 @@ VNET_DECLARE(int, ip6_ipsec_ecn); struct inpcb; struct secpolicy *ipsec6_checkpolicy(const struct mbuf *, - struct inpcb *, int *); + struct inpcb *, int *, int); void ipsec6_setsockaddrs(const struct mbuf *, union sockaddr_union *, union sockaddr_union *); diff --git a/sys/netipsec/ipsec_input.c b/sys/netipsec/ipsec_input.c index f30a017..9814495 100644 --- a/sys/netipsec/ipsec_input.c +++ b/sys/netipsec/ipsec_input.c @@ -325,7 +325,7 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, (prot == IPPROTO_UDP || prot == IPPROTO_TCP)) udp_ipsec_adjust_cksum(m, sav, prot, skip); - IPSEC_INIT_CTX(&ctx, &m, sav, AF_INET, IPSEC_ENC_BEFORE); + IPSEC_INIT_CTX(&ctx, &m, NULL, sav, AF_INET, IPSEC_ENC_BEFORE); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_IN)) != 0) goto bad; ip = mtod(m, struct ip *); /* update pointer */ @@ -416,7 +416,7 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, goto bad; } - IPSEC_INIT_CTX(&ctx, &m, sav, af, IPSEC_ENC_AFTER); + IPSEC_INIT_CTX(&ctx, &m, NULL, sav, af, IPSEC_ENC_AFTER); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_IN)) != 0) goto bad; @@ -522,7 +522,7 @@ ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, goto bad; } - IPSEC_INIT_CTX(&ctx, &m, sav, af, IPSEC_ENC_BEFORE); + IPSEC_INIT_CTX(&ctx, &m, NULL, sav, af, IPSEC_ENC_BEFORE); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_IN)) != 0) goto bad; @@ -593,7 +593,7 @@ ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, else #endif af = AF_INET6; - IPSEC_INIT_CTX(&ctx, &m, sav, af, IPSEC_ENC_AFTER); + IPSEC_INIT_CTX(&ctx, &m, NULL, sav, af, IPSEC_ENC_AFTER); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_IN)) != 0) goto bad; if (skip == 0) { diff --git a/sys/netipsec/ipsec_output.c b/sys/netipsec/ipsec_output.c index ee45ce2..e64b52e 100644 --- a/sys/netipsec/ipsec_output.c +++ b/sys/netipsec/ipsec_output.c @@ -181,7 +181,8 @@ next: * IPsec output logic for IPv4. */ static int -ipsec4_perform_request(struct mbuf *m, struct secpolicy *sp, u_int idx) +ipsec4_perform_request(struct mbuf *m, struct secpolicy *sp, + struct inpcb *inp, u_int idx) { struct ipsec_ctx_data ctx; union sockaddr_union *dst; @@ -211,7 +212,7 @@ ipsec4_perform_request(struct mbuf *m, struct secpolicy *sp, u_int idx) /* * XXXAE: most likely ip_sum at this point is wrong. */ - IPSEC_INIT_CTX(&ctx, &m, sav, AF_INET, IPSEC_ENC_BEFORE); + IPSEC_INIT_CTX(&ctx, &m, inp, sav, AF_INET, IPSEC_ENC_BEFORE); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) goto bad; @@ -235,9 +236,10 @@ ipsec4_perform_request(struct mbuf *m, struct secpolicy *sp, u_int idx) /* XXXAE: IPSEC_OSTAT_INC(tunnel); */ goto bad; } + inp = NULL; } - IPSEC_INIT_CTX(&ctx, &m, sav, dst->sa.sa_family, IPSEC_ENC_AFTER); + IPSEC_INIT_CTX(&ctx, &m, inp, sav, dst->sa.sa_family, IPSEC_ENC_AFTER); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) goto bad; @@ -285,7 +287,7 @@ ipsec4_process_packet(struct mbuf *m, struct secpolicy *sp, struct inpcb *inp) { - return (ipsec4_perform_request(m, sp, 0)); + return (ipsec4_perform_request(m, sp, inp, 0)); } static int @@ -295,7 +297,7 @@ ipsec4_common_output(struct mbuf *m, struct inpcb *inp, int forwarding) int error; /* Lookup for the corresponding outbound security policy */ - sp = ipsec4_checkpolicy(m, inp, &error); + sp = ipsec4_checkpolicy(m, inp, &error, !forwarding); if (sp == NULL) { if (error == -EINVAL) { /* Discarded by policy. */ @@ -491,7 +493,8 @@ next: * IPsec output logic for IPv6. */ static int -ipsec6_perform_request(struct mbuf *m, struct secpolicy *sp, u_int idx) +ipsec6_perform_request(struct mbuf *m, struct secpolicy *sp, + struct inpcb *inp, u_int idx) { struct ipsec_ctx_data ctx; union sockaddr_union *dst; @@ -514,7 +517,7 @@ ipsec6_perform_request(struct mbuf *m, struct secpolicy *sp, u_int idx) ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6)); - IPSEC_INIT_CTX(&ctx, &m, sav, AF_INET6, IPSEC_ENC_BEFORE); + IPSEC_INIT_CTX(&ctx, &m, inp, sav, AF_INET6, IPSEC_ENC_BEFORE); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) goto bad; @@ -540,9 +543,10 @@ ipsec6_perform_request(struct mbuf *m, struct secpolicy *sp, u_int idx) /* XXXAE: IPSEC_OSTAT_INC(tunnel); */ goto bad; } + inp = NULL; } - IPSEC_INIT_CTX(&ctx, &m, sav, dst->sa.sa_family, IPSEC_ENC_AFTER); + IPSEC_INIT_CTX(&ctx, &m, inp, sav, dst->sa.sa_family, IPSEC_ENC_AFTER); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) goto bad; @@ -585,7 +589,7 @@ ipsec6_process_packet(struct mbuf *m, struct secpolicy *sp, struct inpcb *inp) { - return (ipsec6_perform_request(m, sp, 0)); + return (ipsec6_perform_request(m, sp, inp, 0)); } static int @@ -595,7 +599,7 @@ ipsec6_common_output(struct mbuf *m, struct inpcb *inp, int forwarding) int error; /* Lookup for the corresponding outbound security policy */ - sp = ipsec6_checkpolicy(m, inp, &error); + sp = ipsec6_checkpolicy(m, inp, &error, !forwarding); if (sp == NULL) { if (error == -EINVAL) { /* Discarded by policy. */ @@ -750,14 +754,14 @@ ipsec_process_done(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav, case AF_INET: key_freesav(&sav); IPSECSTAT_INC(ips_out_bundlesa); - return (ipsec4_perform_request(m, sp, idx)); + return (ipsec4_perform_request(m, sp, NULL, idx)); /* NOTREACHED */ #endif #ifdef INET6 case AF_INET6: key_freesav(&sav); IPSEC6STAT_INC(ips_out_bundlesa); - return (ipsec6_perform_request(m, sp, idx)); + return (ipsec6_perform_request(m, sp, NULL, idx)); /* NOTREACHED */ #endif /* INET6 */ default: diff --git a/sys/netipsec/key.c b/sys/netipsec/key.c index 721cd2e..a2de282 100644 --- a/sys/netipsec/key.c +++ b/sys/netipsec/key.c @@ -5084,7 +5084,7 @@ key_updateaddresses(struct socket *so, struct mbuf *m, newsav->natt = NULL; newsav->sah = sah; newsav->state = SADB_SASTATE_MATURE; - error = key_setnatt(sav, mhp); + error = key_setnatt(newsav, mhp); if (error != 0) goto fail; @@ -8129,7 +8129,10 @@ key_destroy(void) TAILQ_CONCAT(&drainq, &V_sptree[i], chain); TAILQ_CONCAT(&drainq, &V_sptree_ifnet[i], chain); } + for (i = 0; i < V_sphash_mask + 1; i++) + LIST_INIT(&V_sphashtbl[i]); SPTREE_WUNLOCK(); + sp = TAILQ_FIRST(&drainq); while (sp != NULL) { nextsp = TAILQ_NEXT(sp, chain); @@ -8180,6 +8183,10 @@ key_destroy(void) free(acq, M_IPSEC_SAQ); acq = nextacq; } + for (i = 0; i < V_acqaddrhash_mask + 1; i++) + LIST_INIT(&V_acqaddrhashtbl[i]); + for (i = 0; i < V_acqseqhash_mask + 1; i++) + LIST_INIT(&V_acqseqhashtbl[i]); ACQ_UNLOCK(); SPACQ_LOCK(); @@ -8195,6 +8202,18 @@ key_destroy(void) hashdestroy(V_acqaddrhashtbl, M_IPSEC_SAQ, V_acqaddrhash_mask); hashdestroy(V_acqseqhashtbl, M_IPSEC_SAQ, V_acqseqhash_mask); uma_zdestroy(V_key_lft_zone); + + if (!IS_DEFAULT_VNET(curvnet)) + return; +#ifndef IPSEC_DEBUG2 + callout_drain(&key_timer); +#endif + XFORMS_LOCK_DESTROY(); + SPTREE_LOCK_DESTROY(); + REGTREE_LOCK_DESTROY(); + SAHTREE_LOCK_DESTROY(); + ACQ_LOCK_DESTROY(); + SPACQ_LOCK_DESTROY(); } #endif diff --git a/sys/netipsec/xform_ah.c b/sys/netipsec/xform_ah.c index 6716e70..44d4b1b 100644 --- a/sys/netipsec/xform_ah.c +++ b/sys/netipsec/xform_ah.c @@ -654,6 +654,8 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) /* Crypto operation descriptor. */ crp->crp_ilen = m->m_pkthdr.len; /* Total input length. */ crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; + if (V_async_crypto) + crp->crp_flags |= CRYPTO_F_ASYNC | CRYPTO_F_ASYNC_KEEPORDER; crp->crp_buf = (caddr_t) m; crp->crp_callback = ah_input_cb; crp->crp_sid = cryptoid; @@ -1030,6 +1032,8 @@ ah_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav, /* Crypto operation descriptor. */ crp->crp_ilen = m->m_pkthdr.len; /* Total input length. */ crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; + if (V_async_crypto) + crp->crp_flags |= CRYPTO_F_ASYNC | CRYPTO_F_ASYNC_KEEPORDER; crp->crp_buf = (caddr_t) m; crp->crp_callback = ah_output_cb; crp->crp_sid = cryptoid; diff --git a/sys/netipsec/xform_esp.c b/sys/netipsec/xform_esp.c index 39d5b8c..02064a9 100644 --- a/sys/netipsec/xform_esp.c +++ b/sys/netipsec/xform_esp.c @@ -385,6 +385,8 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) /* Crypto operation descriptor */ crp->crp_ilen = m->m_pkthdr.len; /* Total input length */ crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; + if (V_async_crypto) + crp->crp_flags |= CRYPTO_F_ASYNC | CRYPTO_F_ASYNC_KEEPORDER; crp->crp_buf = (caddr_t) m; crp->crp_callback = esp_input_cb; crp->crp_sid = cryptoid; @@ -841,6 +843,8 @@ esp_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav, /* Crypto operation descriptor. */ crp->crp_ilen = m->m_pkthdr.len; /* Total input length. */ crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; + if (V_async_crypto) + crp->crp_flags |= CRYPTO_F_ASYNC | CRYPTO_F_ASYNC_KEEPORDER; crp->crp_buf = (caddr_t) m; crp->crp_callback = esp_output_cb; crp->crp_opaque = (caddr_t) xd; diff --git a/sys/netpfil/ipfw/ip_dn_glue.c b/sys/netpfil/ipfw/ip_dn_glue.c index 4c4659a..7686155 100644 --- a/sys/netpfil/ipfw/ip_dn_glue.c +++ b/sys/netpfil/ipfw/ip_dn_glue.c @@ -164,7 +164,7 @@ struct dn_pipe7 { /* a pipe */ SLIST_ENTRY(dn_pipe7) next; /* linked list in a hash slot */ int pipe_nr ; /* number */ - int bandwidth; /* really, bytes/tick. */ + uint32_t bandwidth; /* really, bytes/tick. */ int delay ; /* really, ticks */ struct mbuf *head, *tail ; /* packets in delay line */ @@ -230,7 +230,7 @@ struct dn_pipe8 { /* a pipe */ SLIST_ENTRY(dn_pipe8) next; /* linked list in a hash slot */ int pipe_nr ; /* number */ - int bandwidth; /* really, bytes/tick. */ + uint32_t bandwidth; /* really, bytes/tick. */ int delay ; /* really, ticks */ struct mbuf *head, *tail ; /* packets in delay line */ diff --git a/sys/netpfil/ipfw/ip_dn_io.c b/sys/netpfil/ipfw/ip_dn_io.c index 831b909..4a8843f 100644 --- a/sys/netpfil/ipfw/ip_dn_io.c +++ b/sys/netpfil/ipfw/ip_dn_io.c @@ -618,7 +618,8 @@ serve_sched(struct mq *q, struct dn_sch_inst *si, uint64_t now) struct dn_schk *s = si->sched; struct mbuf *m = NULL; int delay_line_idle = (si->dline.mq.head == NULL); - int done, bw; + int done; + uint32_t bw; if (q == NULL) { q = &def_q; @@ -776,6 +777,7 @@ dummynet_send(struct mbuf *m) dst = DIR_DROP; } else { dst = pkt->dn_dir; + pkt->rule.info |= IPFW_IS_DUMMYNET; ifp = pkt->ifp; tag->m_tag_cookie = MTAG_IPFW_RULE; tag->m_tag_id = 0; diff --git a/sys/netpfil/ipfw/ip_dummynet.c b/sys/netpfil/ipfw/ip_dummynet.c index 7240a99..9fdcc13 100644 --- a/sys/netpfil/ipfw/ip_dummynet.c +++ b/sys/netpfil/ipfw/ip_dummynet.c @@ -153,7 +153,7 @@ ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg) op = "Clamp"; } else return *v; - if (op && msg) + if (op && msg && bootverbose) printf("%s %s to %d (was %d)\n", op, msg, *v, oldv); return *v; } @@ -2682,7 +2682,6 @@ static moduledata_t dummynet_mod = { #define DN_SI_SUB SI_SUB_PROTO_FIREWALL #define DN_MODEV_ORD (SI_ORDER_ANY - 128) /* after ipfw */ DECLARE_MODULE(dummynet, dummynet_mod, DN_SI_SUB, DN_MODEV_ORD); -MODULE_DEPEND(dummynet, ipfw, 3, 3, 3); MODULE_VERSION(dummynet, 3); /* diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c index a66d5e7..058b184 100644 --- a/sys/netpfil/ipfw/ip_fw2.c +++ b/sys/netpfil/ipfw/ip_fw2.c @@ -373,7 +373,7 @@ tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd) static int iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain, - uint32_t *tablearg) + uint32_t *tablearg, void **te) { if (ifp == NULL) /* no iface with this packet, match fails */ @@ -383,7 +383,7 @@ iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain, if (cmd->name[0] != '\0') { /* match by name */ if (cmd->name[0] == '\1') /* use tablearg to match */ return ipfw_lookup_table(chain, cmd->p.kidx, 0, - &ifp->if_index, tablearg); + &ifp->if_index, tablearg, NULL, te); /* Check name */ if (cmd->p.glob) { if (fnmatch(cmd->name, ifp->if_xname, 0) == 0) @@ -976,6 +976,12 @@ ipfw_chk(struct ip_fw_args *args) struct ip_fw_chain *chain = &V_layer3_chain; /* + * Table match pointers. + */ + void *te = NULL; /* table entry */ + uint16_t tidx, tkeylen; + + /* * We store in ulp a pointer to the upper layer protocol header. * In the ipv4 case this is easy to determine from the header, * but for ipv6 we might have some additional headers in the middle. @@ -1301,11 +1307,17 @@ do { \ uint32_t tablearg = 0; int l, cmdlen, skip_or; /* skip rest of OR block */ struct ip_fw *f; + uint8_t *ea; f = chain->map[f_pos]; if (V_set_disable & (1 << f->set) ) continue; + ea = NULL; + te = NULL; + tidx = 0; + tkeylen = 0; + skip_or = 0; for (l = f->cmd_len, cmd = f->cmd ; l > 0 ; l -= cmdlen, cmd += cmdlen) { @@ -1373,19 +1385,63 @@ do { \ break; case O_RECV: + { + void *ifte = NULL; + match = iface_match(m->m_pkthdr.rcvif, - (ipfw_insn_if *)cmd, chain, &tablearg); + (ipfw_insn_if *)cmd, chain, &tablearg, + &ifte); + if (match && ifte != NULL) { + te = ifte; + tkeylen = 0; + tidx = ((ipfw_insn_if *)cmd)->p.kidx; + } break; + } case O_XMIT: + { + void *ifte = NULL; + match = iface_match(oif, (ipfw_insn_if *)cmd, - chain, &tablearg); + chain, &tablearg, &ifte); + if (match && ifte != NULL) { + te = ifte; + tkeylen = 0; + tidx = ((ipfw_insn_if *)cmd)->p.kidx; + } break; + } case O_VIA: + { + void *ifte = NULL; + match = iface_match(oif ? oif : m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd, - chain, &tablearg); + chain, &tablearg, &ifte); + if (match && ifte != NULL) { + te = ifte; + tkeylen = 0; + tidx = ((ipfw_insn_if *)cmd)->p.kidx; + } + break; + } + + case O_MACADDR2_LOOKUP: + if (args->eh != NULL) { /* have MAC header */ + uint32_t v = 0; + match = ipfw_lookup_table(chain, + cmd->arg1, 0, args->eh, &v, NULL, + &te); + if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) + match = ((ipfw_insn_u32 *)cmd)->d[0] == v; + if (match) { + tablearg = v; + tkeylen = 0; + tidx = cmd->arg1; + } + } break; case O_MACADDR2: @@ -1530,11 +1586,16 @@ do { \ #endif /* !USERSPACE */ else break; + if (args->eh != NULL) /* have MAC header */ + ea = (uint8_t *)args->eh->ether_dhost; match = ipfw_lookup_table(chain, - cmd->arg1, keylen, pkey, &vidx); + cmd->arg1, keylen, pkey, &vidx, + ea, &te); if (!match) break; tablearg = vidx; + tidx = cmd->arg1; + tkeylen = keylen; break; } /* cmdlen =< F_INSN_SIZE(ipfw_insn_u32) */ @@ -1560,8 +1621,14 @@ do { \ pkey = &args->f_id.src_ip6; } else break; + if (args->eh != NULL) { /* have MAC header */ + if (cmd->opcode == O_IP_DST_LOOKUP) + ea = (uint8_t *)args->eh->ether_dhost; + else + ea = (uint8_t *)args->eh->ether_shost; + } match = ipfw_lookup_table(chain, cmd->arg1, - keylen, pkey, &vidx); + keylen, pkey, &vidx, ea, &te); if (!match) break; if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) { @@ -1571,6 +1638,8 @@ do { \ break; } tablearg = vidx; + tidx = cmd->arg1; + tkeylen = keylen; break; } @@ -1578,12 +1647,16 @@ do { \ { uint32_t v = 0; match = ipfw_lookup_table(chain, - cmd->arg1, 0, &args->f_id, &v); + cmd->arg1, 0, &args->f_id, &v, + NULL, &te); if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) match = ((ipfw_insn_u32 *)cmd)->d[0] == TARG_VAL(chain, v, tag); - if (match) + if (match) { tablearg = v; + tidx = cmd->arg1; + tkeylen = 0; + } } break; case O_IP_SRC_MASK: @@ -2254,11 +2327,19 @@ do { \ case O_COUNT: IPFW_INC_RULE_COUNTER(f, pktlen); + if (te != NULL) { + ipfw_cnt_update_tentry(chain, tidx, + tkeylen, te, pktlen); + } l = 0; /* exit inner loop */ break; case O_SKIPTO: IPFW_INC_RULE_COUNTER(f, pktlen); + if (te != NULL) { + ipfw_cnt_update_tentry(chain, tidx, + tkeylen, te, pktlen); + } f_pos = JUMP(chain, f, cmd->arg1, tablearg, 0); /* * Skip disabled rules, and re-enter @@ -2335,6 +2416,10 @@ do { \ } IPFW_INC_RULE_COUNTER(f, pktlen); + if (te != NULL) { + ipfw_cnt_update_tentry(chain, tidx, + tkeylen, te, pktlen); + } stack = (uint16_t *)(mtag + 1); /* @@ -2415,8 +2500,6 @@ do { \ break; case O_FORWARD_IP: - if (args->eh) /* not valid on layer2 pkts */ - break; if (q == NULL || q->rule != f || dyn_dir == MATCH_FORWARD) { struct sockaddr_in *sa; @@ -2473,8 +2556,6 @@ do { \ #ifdef INET6 case O_FORWARD_IP6: - if (args->eh) /* not valid on layer2 pkts */ - break; if (q == NULL || q->rule != f || dyn_dir == MATCH_FORWARD) { struct sockaddr_in6 *sin6; @@ -2504,6 +2585,10 @@ do { \ uint32_t fib; IPFW_INC_RULE_COUNTER(f, pktlen); + if (te != NULL) { + ipfw_cnt_update_tentry(chain, tidx, + tkeylen, te, pktlen); + } fib = TARG(cmd->arg1, fib) & 0x7FFF; if (fib >= rt_numfibs) fib = 0; @@ -2537,6 +2622,10 @@ do { \ break; IPFW_INC_RULE_COUNTER(f, pktlen); + if (te != NULL) { + ipfw_cnt_update_tentry(chain, tidx, + tkeylen, te, pktlen); + } break; } @@ -2576,6 +2665,10 @@ do { \ int ip_off; IPFW_INC_RULE_COUNTER(f, pktlen); + if (te != NULL) { + ipfw_cnt_update_tentry(chain, tidx, + tkeylen, te, pktlen); + } l = 0; /* in any case exit inner loop */ ip_off = ntohs(ip->ip_off); @@ -2617,6 +2710,10 @@ do { \ */ if (retval == 0 && done == 0) { IPFW_INC_RULE_COUNTER(f, pktlen); + if (te != NULL) { + ipfw_cnt_update_tentry(chain, + tidx, tkeylen, te, pktlen); + } /* * Reset the result of the last * dynamic state lookup. @@ -2660,6 +2757,10 @@ do { \ struct ip_fw *rule = chain->map[f_pos]; /* Update statistics */ IPFW_INC_RULE_COUNTER(rule, pktlen); + if (te != NULL) { + ipfw_cnt_update_tentry(chain, tidx, tkeylen, te, + pktlen); + } } else { retval = IP_FW_DENY; printf("ipfw: ouch!, skip past end of rules, denying packet\n"); diff --git a/sys/netpfil/ipfw/ip_fw_pfil.c b/sys/netpfil/ipfw/ip_fw_pfil.c index 4316526..f014dfb 100644 --- a/sys/netpfil/ipfw/ip_fw_pfil.c +++ b/sys/netpfil/ipfw/ip_fw_pfil.c @@ -113,6 +113,74 @@ SYSEND #endif /* SYSCTL_NODE */ +static int +ipfw_check_next_hop(struct ip_fw_args *args, int dir, struct mbuf *m) +{ + struct m_tag *fwd_tag; + size_t len; + +#if (!defined(INET6) && !defined(INET)) + return (EACCES); +#else + + KASSERT(args->next_hop == NULL || args->next_hop6 == NULL, + ("%s: both next_hop=%p and next_hop6=%p not NULL", __func__, + args->next_hop, args->next_hop6)); +#ifdef INET6 + if (args->next_hop6 != NULL) + len = sizeof(struct sockaddr_in6); +#endif +#ifdef INET + if (args->next_hop != NULL) + len = sizeof(struct sockaddr_in); +#endif + + /* Incoming packets should not be tagged so we do not + * m_tag_find. Outgoing packets may be tagged, so we + * reuse the tag if present. + */ + fwd_tag = (dir == DIR_IN) ? NULL : + m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); + if (fwd_tag != NULL) { + m_tag_unlink(m, fwd_tag); + } else { + fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD, len, + M_NOWAIT); + if (fwd_tag == NULL) + return (EACCES); + } +#ifdef INET6 + if (args->next_hop6 != NULL) { + struct sockaddr_in6 *sa6; + + sa6 = (struct sockaddr_in6 *)(fwd_tag + 1); + bcopy(args->next_hop6, sa6, len); + /* + * If nh6 address is link-local we should convert + * it to kernel internal form before doing any + * comparisons. + */ + if (sa6_embedscope(sa6, V_ip6_use_defzone) != 0) + return (EACCES); + if (in6_localip(&sa6->sin6_addr)) + m->m_flags |= M_FASTFWD_OURS; + m->m_flags |= M_IP6_NEXTHOP; + } +#endif +#ifdef INET + if (args->next_hop != NULL) { + bcopy(args->next_hop, (fwd_tag+1), len); + if (in_localip(args->next_hop->sin_addr)) + m->m_flags |= M_FASTFWD_OURS; + m->m_flags |= M_IP_NEXTHOP; + } +#endif + m_tag_prepend(m, fwd_tag); +#endif /* INET || INET6 */ + + return (IP_FW_PASS); +} + /* * The pfilter hook to pass packets to ipfw_chk and then to * dummynet, divert, netgraph or other modules. @@ -161,72 +229,7 @@ again: /* next_hop may be set by ipfw_chk */ if (args.next_hop == NULL && args.next_hop6 == NULL) break; /* pass */ -#if (!defined(INET6) && !defined(INET)) - ret = EACCES; -#else - { - struct m_tag *fwd_tag; - size_t len; - - KASSERT(args.next_hop == NULL || args.next_hop6 == NULL, - ("%s: both next_hop=%p and next_hop6=%p not NULL", __func__, - args.next_hop, args.next_hop6)); -#ifdef INET6 - if (args.next_hop6 != NULL) - len = sizeof(struct sockaddr_in6); -#endif -#ifdef INET - if (args.next_hop != NULL) - len = sizeof(struct sockaddr_in); -#endif - - /* Incoming packets should not be tagged so we do not - * m_tag_find. Outgoing packets may be tagged, so we - * reuse the tag if present. - */ - fwd_tag = (dir == DIR_IN) ? NULL : - m_tag_find(*m0, PACKET_TAG_IPFORWARD, NULL); - if (fwd_tag != NULL) { - m_tag_unlink(*m0, fwd_tag); - } else { - fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD, len, - M_NOWAIT); - if (fwd_tag == NULL) { - ret = EACCES; - break; /* i.e. drop */ - } - } -#ifdef INET6 - if (args.next_hop6 != NULL) { - struct sockaddr_in6 *sa6; - - sa6 = (struct sockaddr_in6 *)(fwd_tag + 1); - bcopy(args.next_hop6, sa6, len); - /* - * If nh6 address is link-local we should convert - * it to kernel internal form before doing any - * comparisons. - */ - if (sa6_embedscope(sa6, V_ip6_use_defzone) != 0) { - ret = EACCES; - break; - } - if (in6_localip(&sa6->sin6_addr)) - (*m0)->m_flags |= M_FASTFWD_OURS; - (*m0)->m_flags |= M_IP6_NEXTHOP; - } -#endif -#ifdef INET - if (args.next_hop != NULL) { - bcopy(args.next_hop, (fwd_tag+1), len); - if (in_localip(args.next_hop->sin_addr)) - (*m0)->m_flags |= M_FASTFWD_OURS; - (*m0)->m_flags |= M_IP_NEXTHOP; - } -#endif - m_tag_prepend(*m0, fwd_tag); - } -#endif /* INET || INET6 */ + ret = ipfw_check_next_hop(&args, dir, *m0); break; case IP_FW_DENY: @@ -368,6 +371,10 @@ ipfw_check_frame(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, /* Check result of ipfw_chk() */ switch (i) { case IP_FW_PASS: + /* next_hop may be set by ipfw_chk */ + if (args.next_hop == NULL && args.next_hop6 == NULL) + break; /* pass */ + ret = ipfw_check_next_hop(&args, dir, *m0); break; case IP_FW_DENY: @@ -505,7 +512,11 @@ ipfw_hook(int onoff, int pf) hook_func = (pf == AF_LINK) ? ipfw_check_frame : ipfw_check_packet; - (void) (onoff ? pfil_add_hook : pfil_remove_hook) + if (onoff) + (void) pfil_add_named_hook + (hook_func, NULL, "ipfw", PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh); + else + (void) pfil_remove_hook (hook_func, NULL, PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh); return 0; diff --git a/sys/netpfil/ipfw/ip_fw_private.h b/sys/netpfil/ipfw/ip_fw_private.h index b6471a0..9973aef 100644 --- a/sys/netpfil/ipfw/ip_fw_private.h +++ b/sys/netpfil/ipfw/ip_fw_private.h @@ -735,10 +735,12 @@ int ipfw_run_eaction(struct ip_fw_chain *ch, struct ip_fw_args *args, struct table_info; typedef int (table_lookup_t)(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val); + uint32_t *val, uint8_t *ea, void **te); int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen, - void *paddr, uint32_t *val); + void *paddr, uint32_t *val, uint8_t *ea, void **te); +void ipfw_cnt_update_tentry(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen, + void *e, int pktlen); struct named_object *ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch, uint16_t kidx); int ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx); diff --git a/sys/netpfil/ipfw/ip_fw_sockopt.c b/sys/netpfil/ipfw/ip_fw_sockopt.c index e6d7487..a9f8e70 100644 --- a/sys/netpfil/ipfw/ip_fw_sockopt.c +++ b/sys/netpfil/ipfw/ip_fw_sockopt.c @@ -1857,10 +1857,22 @@ check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, struct rule_check_info *ci) goto bad_size; ci->object_opcodes++; break; + case O_MACADDR2: if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) goto bad_size; break; + case O_MACADDR2_LOOKUP: + if (cmd->arg1 >= V_fw_tables_max) { + printf("ipfw: invalid table number %d\n", + cmd->arg1); + return (EINVAL); + } + if (cmdlen != F_INSN_SIZE(ipfw_insn) && + cmdlen != F_INSN_SIZE(ipfw_insn_u32)) + goto bad_size; + ci->object_opcodes++; + break; case O_NOP: case O_IPID: diff --git a/sys/netpfil/ipfw/ip_fw_table.c b/sys/netpfil/ipfw/ip_fw_table.c index 4445683..d4a52b0 100644 --- a/sys/netpfil/ipfw/ip_fw_table.c +++ b/sys/netpfil/ipfw/ip_fw_table.c @@ -185,6 +185,66 @@ get_table_value(struct ip_fw_chain *ch, struct table_config *tc, uint32_t kidx) return (&pval[kidx]); } +static int +zero_cnt_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd) +{ + ipfw_obj_tentry *tent; + ipfw_obj_header *oh; + struct tid_info ti; + struct table_config *tc; + struct table_algo *ta; + struct table_info *kti; + struct namedobj_instance *ni; + int error; + size_t sz; + + /* Check minimum header size */ + sz = sizeof(*oh) + sizeof(*tent); + if (sd->valsize != sz) + return (EINVAL); + + oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); + tent = (ipfw_obj_tentry *)(oh + 1); + + /* Basic length checks for TLVs */ + if (oh->ntlv.head.length != sizeof(oh->ntlv)) + return (EINVAL); + + objheader_to_ti(oh, &ti); + ti.type = oh->ntlv.type; + ti.uidx = tent->idx; + + IPFW_UH_RLOCK(ch); + ni = CHAIN_TO_NI(ch); + + /* + * Find existing table and check its type . + */ + ta = NULL; + if ((tc = find_table(ni, &ti)) == NULL) { + IPFW_UH_RUNLOCK(ch); + return (ESRCH); + } + + /* check table type */ + if (tc->no.subtype != ti.type) { + IPFW_UH_RUNLOCK(ch); + return (EINVAL); + } + + kti = KIDX_TO_TI(ch, tc->no.kidx); + ta = tc->ta; + + if (ta->zero_cnt_tentry == NULL) + return (ENOTSUP); + + error = ta->zero_cnt_tentry(tc->astate, kti, tent); + IPFW_UH_RUNLOCK(ch); + + return (error); +} + /* * Checks if we're able to insert/update entry @tei into table @@ -1029,6 +1089,7 @@ manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, ptei = tei_buf; ptent = tent; for (i = 0; i < ctlv->count; i++, ptent++, ptei++) { + ptei->mac = ptent->mac; ptei->paddr = &ptent->k; ptei->subtype = ptent->subtype; ptei->masklen = ptent->masklen; @@ -1089,6 +1150,7 @@ find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct table_algo *ta; struct table_info *kti; struct table_value *pval; + struct timeval boottime; struct namedobj_instance *ni; int error; size_t sz; @@ -1137,6 +1199,10 @@ find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3, if (error == 0) { pval = get_table_value(ch, tc, tent->v.kidx); ipfw_export_table_value_v1(pval, &tent->v.value); + if (tent->timestamp != 0) { + getboottime(&boottime); + tent->timestamp += boottime.tv_sec; + } } IPFW_UH_RUNLOCK(ch); @@ -1665,13 +1731,37 @@ ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx) */ int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen, - void *paddr, uint32_t *val) + void *paddr, uint32_t *val, uint8_t *ea, void **te) { struct table_info *ti; ti = KIDX_TO_TI(ch, tbl); - return (ti->lookup(ti, paddr, plen, val)); + return (ti->lookup(ti, paddr, plen, val, ea, te)); +} + +/* + * Update the table entry counter. + */ +void +ipfw_cnt_update_tentry(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen, + void *e, int pktlen) +{ + struct namedobj_instance *ni; + struct table_algo *ta; + struct table_config *tc; + struct table_info *ti; + + ni = CHAIN_TO_NI(ch); + tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, tbl); + if (tc == NULL) + return; + ta = tc->ta; + if (ta->cnt_tentry == NULL) + return; + + ti = KIDX_TO_TI(ch, tbl); + ta->cnt_tentry(tc->astate, ti, plen, e, pktlen); } /* @@ -2013,6 +2103,7 @@ struct dump_args { ta_foreach_f *f; void *farg; ipfw_obj_tentry tent; + time_t boottime; }; static int @@ -2171,6 +2262,7 @@ dump_table_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, struct tid_info ti; struct table_config *tc; struct table_algo *ta; + struct timeval boottime; struct dump_args da; uint32_t sz; @@ -2209,6 +2301,8 @@ dump_table_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, da.ti = KIDX_TO_TI(ch, tc->no.kidx); da.tc = tc; da.sd = sd; + getboottime(&boottime); + da.boottime = boottime.tv_sec; ta = tc->ta; @@ -2449,6 +2543,9 @@ dump_table_tentry(void *e, void *arg) pval = get_table_value(da->ch, da->tc, tent->v.kidx); ipfw_export_table_value_v1(pval, &tent->v.value); + if (tent->timestamp != 0) + tent->timestamp += da->boottime; + return (0); } @@ -2808,6 +2905,15 @@ classify_flow(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) return (0); } +static int +classify_mac(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) +{ + *puidx = cmd->arg1; + *ptype = IPFW_TABLE_MAC2; + + return (0); +} + static void update_arg1(ipfw_insn *cmd, uint16_t idx) { @@ -2959,6 +3065,16 @@ static struct opcode_obj_rewrite opcodes[] = { .manage_sets = table_manage_sets, }, { + .opcode = O_MACADDR2_LOOKUP, + .etlv = IPFW_TLV_TBL_NAME, + .classifier = classify_mac, + .update = update_arg1, + .find_byname = table_findbyname, + .find_bykidx = table_findbykidx, + .create_object = create_table_compat, + .manage_sets = table_manage_sets, + }, + { .opcode = O_XMIT, .etlv = IPFW_TLV_TBL_NAME, .classifier = classify_via, @@ -3293,6 +3409,7 @@ static struct ipfw_sopt_handler scodes[] = { { IP_FW_TABLE_XSWAP, 0, HDIR_SET, swap_table }, { IP_FW_TABLES_ALIST, 0, HDIR_GET, list_table_algo }, { IP_FW_TABLE_XGETSIZE, 0, HDIR_GET, get_table_size }, + { IP_FW_TABLE_XZEROCNT, 0, HDIR_SET, zero_cnt_entry }, }; static int diff --git a/sys/netpfil/ipfw/ip_fw_table.h b/sys/netpfil/ipfw/ip_fw_table.h index d657848..cf0309c 100644 --- a/sys/netpfil/ipfw/ip_fw_table.h +++ b/sys/netpfil/ipfw/ip_fw_table.h @@ -62,6 +62,7 @@ struct tentry_info { uint8_t subtype; uint16_t flags; /* record flags */ uint32_t value; /* value index */ + uint64_t mac; }; #define TEI_FLAGS_UPDATE 0x0001 /* Add or update rec if exists */ #define TEI_FLAGS_UPDATED 0x0002 /* Entry has been updated */ @@ -111,6 +112,10 @@ typedef int ta_find_tentry(void *ta_state, struct table_info *ti, typedef void ta_dump_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo); typedef uint32_t ta_get_count(void *ta_state, struct table_info *ti); +typedef void ta_cnt_tentry(void *ta_state, struct table_info *ti, + uint32_t keylen, void *e, int pktlen); +typedef int ta_zero_cnt_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent); struct table_algo { char name[16]; @@ -139,6 +144,8 @@ struct table_algo { ta_print_config *print_config; ta_dump_tinfo *dump_tinfo; ta_get_count *get_count; + ta_cnt_tentry *cnt_tentry; + ta_zero_cnt_tentry *zero_cnt_tentry; }; #define TA_FLAG_DEFAULT 0x01 /* Algo is default for given type */ #define TA_FLAG_READONLY 0x02 /* Algo does not support modifications*/ diff --git a/sys/netpfil/ipfw/ip_fw_table_algo.c b/sys/netpfil/ipfw/ip_fw_table_algo.c index 97bc879..dfa80c6 100644 --- a/sys/netpfil/ipfw/ip_fw_table_algo.c +++ b/sys/netpfil/ipfw/ip_fw_table_algo.c @@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$"); #include <sys/rmlock.h> #include <sys/socket.h> #include <sys/queue.h> +#include <net/ethernet.h> #include <net/if.h> /* ip_fw.h requires IFNAMSIZ */ #include <net/radix.h> #include <net/route.h> @@ -71,7 +72,8 @@ __FBSDID("$FreeBSD$"); * Algo init: * * struct table_algo has to be filled with: * name: "type:algoname" format, e.g. "addr:radix". Currently - * there are the following types: "addr", "iface", "number" and "flow". + * there are the following types: "addr", "iface", "mac", "number" and + * "flow". * type: one of IPFW_TABLE_* types * flags: one or more TA_FLAGS_* * ta_buf_size: size of structure used to store add/del item state. @@ -328,6 +330,10 @@ struct radix_addr_entry { struct radix_node rn[2]; struct sockaddr_in addr; uint32_t value; + uint64_t bcnt; + uint64_t mac; + uint64_t pcnt; + time_t timestamp; uint8_t masklen; }; @@ -342,6 +348,10 @@ struct radix_addr_xentry { struct radix_node rn[2]; struct sa_in6 addr6; uint32_t value; + uint64_t bcnt; + uint64_t mac; + uint64_t pcnt; + time_t timestamp; uint8_t masklen; }; @@ -370,7 +380,7 @@ struct ta_buf_radix }; static int ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val); + uint32_t *val, uint8_t *ea, void **te); static int ta_init_radix(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags); static int flush_radix_entry(struct radix_node *rn, void *arg); @@ -397,10 +407,14 @@ static void ta_flush_radix_entry(struct ip_fw_chain *ch, struct tentry_info *tei void *ta_buf); static int ta_need_modify_radix(void *ta_state, struct table_info *ti, uint32_t count, uint64_t *pflags); +static void ta_cnt_radix_tentry(void *ta_state, struct table_info *ti, + uint32_t keylen, void *e, int pktlen); +static int ta_zero_cnt_radix_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent); static int ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val) + uint32_t *val, uint8_t *ea, void **te) { struct radix_node_head *rnh; @@ -412,7 +426,14 @@ ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen, rnh = (struct radix_node_head *)ti->state; ent = (struct radix_addr_entry *)(rnh->rnh_matchaddr(&sa, &rnh->rh)); if (ent != NULL) { + if (ent->mac != 0 && ea == NULL) + return (0); + if (ent->mac != 0 && + memcmp(ea, &ent->mac, ETHER_ADDR_LEN) != 0) + return (0); *val = ent->value; + if (te != NULL) + *te = (void *)ent; return (1); } } else { @@ -423,7 +444,14 @@ ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen, rnh = (struct radix_node_head *)ti->xstate; xent = (struct radix_addr_xentry *)(rnh->rnh_matchaddr(&sa6, &rnh->rh)); if (xent != NULL) { + if (xent->mac != 0 && ea == NULL) + return (0); + if (xent->mac != 0 && + memcmp(ea, &xent->mac, ETHER_ADDR_LEN) != 0) + return (0); *val = xent->value; + if (te != NULL) + *te = (void *)xent; return (1); } } @@ -523,6 +551,10 @@ ta_dump_radix_tentry(void *ta_state, struct table_info *ti, void *e, tent->masklen = n->masklen; tent->subtype = AF_INET; tent->v.kidx = n->value; + tent->mac = n->mac; + tent->bcnt = n->bcnt; + tent->pcnt = n->pcnt; + tent->timestamp = n->timestamp; #ifdef INET6 } else { xn = (struct radix_addr_xentry *)e; @@ -530,6 +562,10 @@ ta_dump_radix_tentry(void *ta_state, struct table_info *ti, void *e, tent->masklen = xn->masklen; tent->subtype = AF_INET6; tent->v.kidx = xn->value; + tent->mac = n->mac; + tent->bcnt = n->bcnt; + tent->pcnt = n->pcnt; + tent->timestamp = n->timestamp; #endif } @@ -717,9 +753,11 @@ ta_add_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei, /* Save current entry value from @tei */ if (tei->subtype == AF_INET) { rnh = ti->state; + ((struct radix_addr_entry *)tb->ent_ptr)->mac = tei->mac; ((struct radix_addr_entry *)tb->ent_ptr)->value = tei->value; } else { rnh = ti->xstate; + ((struct radix_addr_xentry *)tb->ent_ptr)->mac = tei->mac; ((struct radix_addr_xentry *)tb->ent_ptr)->value = tei->value; } @@ -867,6 +905,63 @@ ta_need_modify_radix(void *ta_state, struct table_info *ti, uint32_t count, return (0); } +static void +ta_cnt_radix_tentry(void *ta_state, struct table_info *ti, uint32_t keylen, + void *e, int pktlen) +{ + + if (keylen == sizeof(in_addr_t)) { + struct radix_addr_entry *ent; + ent = (struct radix_addr_entry *)e; + ent->pcnt++; + ent->bcnt += pktlen; + ent->timestamp = time_uptime; + } else { + struct radix_addr_xentry *xent; + xent = (struct radix_addr_xentry *)e; + xent->pcnt++; + xent->bcnt += pktlen; + xent->timestamp = time_uptime; + } +} + +static int +ta_zero_cnt_radix_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent) +{ + struct radix_node_head *rnh; + + if (tent->subtype == AF_INET) { + struct radix_addr_entry *ent; + struct sockaddr_in sa; + KEY_LEN(sa) = KEY_LEN_INET; + sa.sin_addr.s_addr = tent->k.addr.s_addr; + rnh = (struct radix_node_head *)ti->state; + ent = (struct radix_addr_entry *)rnh->rnh_matchaddr(&sa, + &rnh->rh); + if (ent == NULL) + return (ENOENT); + ent->pcnt = 0; + ent->bcnt = 0; + ent->timestamp = 0; + } else { + struct radix_addr_xentry *xent; + struct sa_in6 sa6; + KEY_LEN(sa6) = KEY_LEN_INET6; + memcpy(&sa6.sin6_addr, &tent->k.addr6, sizeof(struct in6_addr)); + rnh = (struct radix_node_head *)ti->xstate; + xent = (struct radix_addr_xentry *)rnh->rnh_matchaddr(&sa6, + &rnh->rh); + if (xent == NULL) + return (ENOENT); + xent->pcnt = 0; + xent->bcnt = 0; + xent->timestamp = 0; + } + + return (0); +} + struct table_algo addr_radix = { .name = "addr:radix", .type = IPFW_TABLE_ADDR, @@ -884,6 +979,8 @@ struct table_algo addr_radix = { .find_tentry = ta_find_radix_tentry, .dump_tinfo = ta_dump_radix_tinfo, .need_modify = ta_need_modify_radix, + .cnt_tentry = ta_cnt_radix_tentry, + .zero_cnt_tentry = ta_zero_cnt_radix_tentry, }; @@ -926,6 +1023,9 @@ struct chashentry { SLIST_ENTRY(chashentry) next; uint32_t value; uint32_t type; + uint64_t bcnt; + uint64_t pcnt; + time_t timestamp; union { uint32_t a4; /* Host format */ struct in6_addr a6; /* Network format */ @@ -950,11 +1050,11 @@ static __inline uint32_t hash_ip6_al(struct in6_addr *addr6, void *key, int mask int hsize); #endif static int ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val); + uint32_t *val, uint8_t *ea, void **te); static int ta_lookup_chash_aligned(struct table_info *ti, void *key, - uint32_t keylen, uint32_t *val); + uint32_t keylen, uint32_t *val, uint8_t *ea, void **te); static int ta_lookup_chash_64(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val); + uint32_t *val, uint8_t *ea, void **te); static int chash_parse_opts(struct chash_cfg *cfg, char *data); static void ta_print_chash_config(void *ta_state, struct table_info *ti, char *buf, size_t bufsize); @@ -991,7 +1091,10 @@ static int ta_fill_mod_chash(void *ta_state, struct table_info *ti, void *ta_buf static void ta_modify_chash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags); static void ta_flush_mod_chash(void *ta_buf); - +static void ta_cnt_chash_tentry(void *ta_state, struct table_info *ti, + uint32_t keylen, void *e, int pktlen); +static int ta_zero_cnt_chash_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent); #ifdef INET static __inline uint32_t @@ -1052,7 +1155,7 @@ hash_ip6_al(struct in6_addr *addr6, void *key, int mask, int hsize) static int ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val) + uint32_t *val, uint8_t *ea, void **te) { struct chashbhead *head; struct chashentry *ent; @@ -1071,6 +1174,8 @@ ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen, SLIST_FOREACH(ent, &head[hash], next) { if (ent->a.a4 == a) { *val = ent->value; + if (te != NULL) + *te = (void *)ent; return (1); } } @@ -1086,6 +1191,8 @@ ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen, SLIST_FOREACH(ent, &head[hash], next) { if (memcmp(&ent->a.a6, &addr6, 16) == 0) { *val = ent->value; + if (te != NULL) + *te = (void *)ent; return (1); } } @@ -1097,7 +1204,7 @@ ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen, static int ta_lookup_chash_aligned(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val) + uint32_t *val, uint8_t *ea, void **te) { struct chashbhead *head; struct chashentry *ent; @@ -1116,6 +1223,8 @@ ta_lookup_chash_aligned(struct table_info *ti, void *key, uint32_t keylen, SLIST_FOREACH(ent, &head[hash], next) { if (ent->a.a4 == a) { *val = ent->value; + if (te != NULL) + *te = (void *)ent; return (1); } } @@ -1135,6 +1244,8 @@ ta_lookup_chash_aligned(struct table_info *ti, void *key, uint32_t keylen, ptmp = (uint64_t *)&ent->a.a6; if (paddr[0] == ptmp[0] && paddr[1] == ptmp[1]) { *val = ent->value; + if (te != NULL) + *te = (void *)ent; return (1); } } @@ -1146,7 +1257,7 @@ ta_lookup_chash_aligned(struct table_info *ti, void *key, uint32_t keylen, static int ta_lookup_chash_64(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val) + uint32_t *val, uint8_t *ea, void **te) { struct chashbhead *head; struct chashentry *ent; @@ -1165,6 +1276,8 @@ ta_lookup_chash_64(struct table_info *ti, void *key, uint32_t keylen, SLIST_FOREACH(ent, &head[hash], next) { if (ent->a.a4 == a) { *val = ent->value; + if (te != NULL) + *te = (void *)ent; return (1); } } @@ -1182,6 +1295,8 @@ ta_lookup_chash_64(struct table_info *ti, void *key, uint32_t keylen, paddr = (uint64_t *)&ent->a.a6; if (a6 == *paddr) { *val = ent->value; + if (te != NULL) + *te = (void *)ent; return (1); } } @@ -1379,12 +1494,18 @@ ta_dump_chash_tentry(void *ta_state, struct table_info *ti, void *e, tent->masklen = cfg->mask4; tent->subtype = AF_INET; tent->v.kidx = ent->value; + tent->bcnt = ent->bcnt; + tent->pcnt = ent->pcnt; + tent->timestamp = ent->timestamp; #ifdef INET6 } else { memcpy(&tent->k, &ent->a.a6, sizeof(struct in6_addr)); tent->masklen = cfg->mask6; tent->subtype = AF_INET6; tent->v.kidx = ent->value; + tent->bcnt = ent->bcnt; + tent->pcnt = ent->pcnt; + tent->timestamp = ent->timestamp; #endif } @@ -1860,6 +1981,82 @@ ta_flush_mod_chash(void *ta_buf) free(mi->main_ptr6, M_IPFW); } +static void +ta_cnt_chash_tentry(void *ta_state, struct table_info *ti, uint32_t keylen, + void *e, int pktlen) +{ + struct chashentry *ent; + + ent = (struct chashentry *)e; + ent->pcnt++; + ent->bcnt += pktlen; + ent->timestamp = time_uptime; +} + +static int +ta_zero_cnt_chash_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent) +{ + struct chash_cfg *cfg; + struct chashbhead *head; + struct chashentry ent, *tmp; + struct tentry_info tei; + int error; + uint32_t hash; + bool done; + + cfg = (struct chash_cfg *)ta_state; + + done = false; + memset(&ent, 0, sizeof(ent)); + memset(&tei, 0, sizeof(tei)); + + if (tent->subtype == AF_INET) { + tei.paddr = &tent->k.addr; + tei.masklen = cfg->mask4; + tei.subtype = AF_INET; + + if ((error = tei_to_chash_ent(&tei, &ent)) != 0) + return (error); + + head = cfg->head4; + hash = hash_ent(&ent, AF_INET, cfg->mask4, cfg->size4); + /* Check for existence */ + SLIST_FOREACH(tmp, &head[hash], next) { + if (tmp->a.a4 != ent.a.a4) + continue; + done = true; + break; + } + } else { + tei.paddr = &tent->k.addr6; + tei.masklen = cfg->mask6; + tei.subtype = AF_INET6; + + if ((error = tei_to_chash_ent(&tei, &ent)) != 0) + return (error); + + head = cfg->head6; + hash = hash_ent(&ent, AF_INET6, cfg->mask6, cfg->size6); + /* Check for existence */ + SLIST_FOREACH(tmp, &head[hash], next) { + if (memcmp(&tmp->a.a6, &ent.a.a6, 16) != 0) + continue; + done = true; + break; + } + } + + if (!done) + return (ENOENT); + + tmp->pcnt = 0; + tmp->bcnt = 0; + tmp->timestamp = 0; + + return (0); +} + struct table_algo addr_hash = { .name = "addr:hash", .type = IPFW_TABLE_ADDR, @@ -1881,6 +2078,8 @@ struct table_algo addr_hash = { .fill_mod = ta_fill_mod_chash, .modify = ta_modify_chash, .flush_mod = ta_flush_mod_chash, + .cnt_tentry = ta_cnt_chash_tentry, + .zero_cnt_tentry = ta_zero_cnt_chash_tentry, }; @@ -1906,6 +2105,9 @@ struct ifidx { uint16_t kidx; uint16_t spare; uint32_t value; + uint64_t bcnt; + uint64_t pcnt; + time_t timestamp; }; #define DEFAULT_IFIDX_SIZE 64 @@ -1938,7 +2140,7 @@ struct ta_buf_ifidx int compare_ifidx(const void *k, const void *v); static struct ifidx * ifidx_find(struct table_info *ti, void *key); static int ta_lookup_ifidx(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val); + uint32_t *val, uint8_t *ea, void **te); static int ta_init_ifidx(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags); static void ta_change_ti_ifidx(void *ta_state, struct table_info *ti); @@ -1974,6 +2176,10 @@ static int foreach_ifidx(struct namedobj_instance *ii, struct named_object *no, void *arg); static void ta_foreach_ifidx(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg); +static void ta_cnt_ifidx_tentry(void *ta_state, struct table_info *ti, + uint32_t keylen, void *e, int pktlen); +static int ta_zero_cnt_ifidx_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent); int compare_ifidx(const void *k, const void *v) @@ -2080,7 +2286,7 @@ ifidx_find(struct table_info *ti, void *key) static int ta_lookup_ifidx(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val) + uint32_t *val, uint8_t *ea, void **te) { struct ifidx *ifi; @@ -2088,6 +2294,8 @@ ta_lookup_ifidx(struct table_info *ti, void *key, uint32_t keylen, if (ifi != NULL) { *val = ifi->value; + if (te != NULL) + *te = ifi; return (1); } @@ -2395,6 +2603,9 @@ if_notifier(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex) ifi.kidx = ifindex; ifi.spare = 0; ifi.value = ife->value; + ifi.bcnt = 0; + ifi.pcnt = 0; + ifi.timestamp = 0; res = badd(&ifindex, &ifi, icfg->main_ptr, icfg->used, sizeof(struct ifidx), compare_ifidx); KASSERT(res == 1, ("index %d already exists", ifindex)); @@ -2523,6 +2734,7 @@ ta_dump_ifidx_tentry(void *ta_state, struct table_info *ti, void *e, ipfw_obj_tentry *tent) { struct ifentry *ife; + struct ifidx *ifi; ife = (struct ifentry *)e; @@ -2530,6 +2742,13 @@ ta_dump_ifidx_tentry(void *ta_state, struct table_info *ti, void *e, memcpy(&tent->k, ife->no.name, IF_NAMESIZE); tent->v.kidx = ife->value; + ifi = ifidx_find(ti, &ife->ic.iface->ifindex); + if (ifi != NULL) { + tent->bcnt = ifi->bcnt; + tent->pcnt = ifi->pcnt; + tent->timestamp = ifi->timestamp; + } + return (0); } @@ -2591,6 +2810,47 @@ ta_foreach_ifidx(void *ta_state, struct table_info *ti, ta_foreach_f *f, ipfw_objhash_foreach(icfg->ii, foreach_ifidx, &wa); } +static void +ta_cnt_ifidx_tentry(void *ta_state, struct table_info *ti, uint32_t keylen, + void *e, int pktlen) +{ + struct ifidx *ifi; + + ifi = (struct ifidx *)e; + ifi->pcnt++; + ifi->bcnt += pktlen; + ifi->timestamp = time_uptime; +} + +static int +ta_zero_cnt_ifidx_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent) +{ + struct iftable_cfg *icfg; + struct ifentry *ife; + struct ifidx *ifi; + char *ifname; + + icfg = (struct iftable_cfg *)ta_state; + ifname = tent->k.iface; + + if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE) + return (EINVAL); + + ife = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname); + if (ife == NULL) + return (ENOENT); + + ifi = ifidx_find(ti, &ife->ic.iface->ifindex); + if (ifi == NULL) + return (ENOENT); + ifi->pcnt = 0; + ifi->bcnt = 0; + ifi->timestamp = 0; + + return (0); +} + struct table_algo iface_idx = { .name = "iface:array", .type = IPFW_TABLE_INTERFACE, @@ -2613,6 +2873,8 @@ struct table_algo iface_idx = { .modify = ta_modify_ifidx, .flush_mod = ta_flush_mod_ifidx, .change_ti = ta_change_ti_ifidx, + .cnt_tentry = ta_cnt_ifidx_tentry, + .zero_cnt_tentry = ta_zero_cnt_ifidx_tentry, }; /* @@ -2630,6 +2892,9 @@ struct table_algo iface_idx = { struct numarray { uint32_t number; uint32_t value; + uint64_t bcnt; + uint64_t pcnt; + time_t timestamp; }; struct numarray_cfg { @@ -2646,7 +2911,7 @@ struct ta_buf_numarray int compare_numarray(const void *k, const void *v); static struct numarray *numarray_find(struct table_info *ti, void *key); static int ta_lookup_numarray(struct table_info *ti, void *key, - uint32_t keylen, uint32_t *val); + uint32_t keylen, uint32_t *val, uint8_t *ea, void **te); static int ta_init_numarray(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags); static void ta_destroy_numarray(void *ta_state, struct table_info *ti); @@ -2674,6 +2939,10 @@ static int ta_find_numarray_tentry(void *ta_state, struct table_info *ti, ipfw_obj_tentry *tent); static void ta_foreach_numarray(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg); +static void ta_cnt_numarray_tentry(void *ta_state, struct table_info *ti, + uint32_t keylen, void *e, int pktlen); +static int ta_zero_cnt_numarray_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent); int compare_numarray(const void *k, const void *v) @@ -2705,7 +2974,7 @@ numarray_find(struct table_info *ti, void *key) static int ta_lookup_numarray(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val) + uint32_t *val, uint8_t *ea, void **te) { struct numarray *ri; @@ -2713,6 +2982,8 @@ ta_lookup_numarray(struct table_info *ti, void *key, uint32_t keylen, if (ri != NULL) { *val = ri->value; + if (te != NULL) + *te = ri; return (1); } @@ -2986,6 +3257,9 @@ ta_dump_numarray_tentry(void *ta_state, struct table_info *ti, void *e, tent->k.key = na->number; tent->v.kidx = na->value; + tent->bcnt = na->bcnt; + tent->pcnt = na->pcnt; + tent->timestamp = na->timestamp; return (0); } @@ -3024,6 +3298,37 @@ ta_foreach_numarray(void *ta_state, struct table_info *ti, ta_foreach_f *f, f(&array[i], arg); } +static void +ta_cnt_numarray_tentry(void *ta_state, struct table_info *ti, uint32_t keylen, + void *e, int pktlen) +{ + struct numarray *na; + + na = (struct numarray *)e; + na->pcnt++; + na->bcnt += pktlen; + na->timestamp = time_uptime; +} + +static int +ta_zero_cnt_numarray_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent) +{ + struct numarray_cfg *cfg; + struct numarray *na; + + cfg = (struct numarray_cfg *)ta_state; + + na = numarray_find(ti, &tent->k.key); + if (na == NULL) + return (ENOENT); + na->pcnt = 0; + na->bcnt = 0; + na->timestamp = 0; + + return (0); +} + struct table_algo number_array = { .name = "number:array", .type = IPFW_TABLE_NUMBER, @@ -3044,6 +3349,8 @@ struct table_algo number_array = { .fill_mod = ta_fill_mod_numarray, .modify = ta_modify_numarray, .flush_mod = ta_flush_mod_numarray, + .cnt_tentry = ta_cnt_numarray_tentry, + .zero_cnt_tentry = ta_zero_cnt_numarray_tentry, }; /* @@ -3078,6 +3385,9 @@ struct fhashentry { uint16_t dport; uint16_t sport; uint32_t value; + uint64_t bcnt; + uint64_t pcnt; + time_t timestamp; uint32_t spare1; }; @@ -3112,7 +3422,7 @@ static __inline uint32_t hash_flow4(struct fhashentry4 *f, int hsize); static __inline uint32_t hash_flow6(struct fhashentry6 *f, int hsize); static uint32_t hash_flow_ent(struct fhashentry *ent, uint32_t size); static int ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val); + uint32_t *val, uint8_t *ea, void **te); static int ta_init_fhash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, char *data, uint8_t tflags); static void ta_destroy_fhash(void *ta_state, struct table_info *ti); @@ -3143,6 +3453,10 @@ static int ta_fill_mod_fhash(void *ta_state, struct table_info *ti, static void ta_modify_fhash(void *ta_state, struct table_info *ti, void *ta_buf, uint64_t pflags); static void ta_flush_mod_fhash(void *ta_buf); +static void ta_cnt_fhash_tentry(void *ta_state, struct table_info *ti, + uint32_t keylen, void *e, int pktlen); +static int ta_zero_cnt_fhash_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent); static __inline int cmp_flow_ent(struct fhashentry *a, struct fhashentry *b, size_t sz) @@ -3198,7 +3512,7 @@ hash_flow_ent(struct fhashentry *ent, uint32_t size) static int ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val) + uint32_t *val, uint8_t *ea, void **te) { struct fhashbhead *head; struct fhashentry *ent; @@ -3226,6 +3540,8 @@ ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen, SLIST_FOREACH(ent, &head[hash], next) { if (cmp_flow_ent(ent, &f.e, 2 * 4) != 0) { *val = ent->value; + if (te != NULL) + *te = ent; return (1); } } @@ -3251,6 +3567,8 @@ ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen, SLIST_FOREACH(ent, &head[hash], next) { if (cmp_flow_ent(ent, &f.e, 2 * 16) != 0) { *val = ent->value; + if (te != NULL) + *te = ent; return (1); } } @@ -3373,6 +3691,9 @@ ta_dump_fhash_tentry(void *ta_state, struct table_info *ti, void *e, tfe->sport = htons(ent->sport); tent->v.kidx = ent->value; tent->subtype = ent->af; + tent->bcnt = ent->bcnt; + tent->pcnt = ent->pcnt; + tent->timestamp = ent->timestamp; if (ent->af == AF_INET) { fe4 = (struct fhashentry4 *)ent; @@ -3750,6 +4071,65 @@ ta_flush_mod_fhash(void *ta_buf) free(mi->main_ptr, M_IPFW); } +static void +ta_cnt_fhash_tentry(void *ta_state, struct table_info *ti, uint32_t keylen, + void *e, int pktlen) +{ + struct fhashentry *ent; + + ent = (struct fhashentry *)e; + ent->pcnt++; + ent->bcnt += pktlen; + ent->timestamp = time_uptime; +} + +static int +ta_zero_cnt_fhash_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent) +{ + struct fhash_cfg *cfg; + struct fhashbhead *head; + struct fhashentry *ent, *tmp; + struct fhashentry6 fe6; + struct tentry_info tei; + int error; + uint32_t hash; + size_t sz; + + cfg = (struct fhash_cfg *)ta_state; + + ent = &fe6.e; + + memset(&fe6, 0, sizeof(fe6)); + memset(&tei, 0, sizeof(tei)); + + tei.paddr = &tent->k.flow; + tei.subtype = tent->subtype; + + if ((error = tei_to_fhash_ent(&tei, ent)) != 0) + return (error); + + head = cfg->head; + hash = hash_flow_ent(ent, cfg->size); + + if (tei.subtype == AF_INET) + sz = 2 * sizeof(struct in_addr); + else + sz = 2 * sizeof(struct in6_addr); + + /* Check for existence */ + SLIST_FOREACH(tmp, &head[hash], next) { + if (cmp_flow_ent(tmp, ent, sz) != 0) { + ent->pcnt = 0; + ent->bcnt = 0; + ent->timestamp = 0; + return (0); + } + } + + return (ENOENT); +} + struct table_algo flow_hash = { .name = "flow:hash", .type = IPFW_TABLE_FLOW, @@ -3771,6 +4151,8 @@ struct table_algo flow_hash = { .fill_mod = ta_fill_mod_fhash, .modify = ta_modify_fhash, .flush_mod = ta_flush_mod_fhash, + .cnt_tentry = ta_cnt_fhash_tentry, + .zero_cnt_tentry = ta_zero_cnt_fhash_tentry, }; /* @@ -3785,7 +4167,7 @@ struct table_algo flow_hash = { */ static int ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val); + uint32_t *val, uint8_t *ea, void **te); static int kfib_parse_opts(int *pfib, char *data); static void ta_print_kfib_config(void *ta_state, struct table_info *ti, char *buf, size_t bufsize); @@ -3807,7 +4189,7 @@ static void ta_foreach_kfib(void *ta_state, struct table_info *ti, static int ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val) + uint32_t *val, uint8_t *ea, void **te) { #ifdef INET struct nhop4_basic nh4; @@ -3836,6 +4218,8 @@ ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen, return (0); *val = 0; + if (te != NULL) + *te = NULL; return (1); } @@ -4078,6 +4462,509 @@ struct table_algo addr_kfib = { .print_config = ta_print_kfib_config, }; +/* + * mac:hash cmds + * + * ti->data: + * [unused][log2hsize] + * [ 24][ 8] + * + */ + +struct mhashentry; + +SLIST_HEAD(mhashbhead, mhashentry); + +struct mhash_cfg { + struct mhashbhead *head; + size_t size; + size_t items; +}; + +struct macdata { + u_char addr[12]; /* dst[6] + src[6] */ + u_char mask[12]; /* dst[6] + src[6] */ + uint32_t value; + uint64_t bcnt; + uint64_t pcnt; + time_t timestamp; +}; + +struct mhashentry { + SLIST_ENTRY(mhashentry) next; + struct macdata *mac; +}; + +struct ta_buf_mhash { + void *ent_ptr; + struct macdata mac; +}; + +static __inline uint32_t +hash_mac2(u_char *mac, int hsize) +{ + uint32_t i; + + i = ((mac[2] << 16) | (mac[1] << 8) | (mac[0] << 0)) ^ + ((mac[5] << 16) | (mac[4] << 8) | (mac[3] << 0)) ^ + ((mac[8] << 16) | (mac[7] << 8) | (mac[6] << 0)) ^ + ((mac[11] << 16) | (mac[10] << 8) | (mac[9] << 0)); + + return (i % (hsize - 1)); +} + +static void +ta_print_mhash_config(void *ta_state, struct table_info *ti, char *buf, + size_t bufsize) +{ + snprintf(buf, bufsize, "%s", "mac:hash"); +} + +static __inline int +ta_lookup_find_mhash(struct mhashbhead *head, uint32_t hash2, + struct macdata *mac, uint32_t *val, uint8_t *ea, void **te) +{ + struct mhashentry *ent; + + SLIST_FOREACH(ent, &head[hash2], next) { + if (memcmp(&ent->mac->addr, mac->addr, sizeof(mac->addr)) != 0) + continue; + *val = ent->mac->value; + if (te != NULL) + *te = (void *)ent; + return (1); + } + + return (0); +} + +static int +ta_lookup_mhash(struct table_info *ti, void *key, uint32_t keylen, + uint32_t *val, uint8_t *ea, void **te) +{ + struct macdata mac; + struct mhashbhead *head; + uint32_t hash2, hsize; + + /* any any always match. */ + if (ti->xstate != NULL) { + *te = ti->xstate; + return (1); + } + + /* + * Look three times for a MAC is still faster than looking at whole + * table (128 entries by default). + */ + head = (struct mhashbhead *)ti->state; + hsize = 1 << (ti->data & 0xFF); + hash2 = hash_mac2(key, hsize); + if (ta_lookup_find_mhash(head, hash2, + (struct macdata *)key, val, ea, te) == 1) + return (1); + + /* src any */ + memcpy(mac.addr, key, 6); + memset(mac.addr + 6, 0, 6); + hash2 = hash_mac2(mac.addr, hsize); + if (ta_lookup_find_mhash(head, hash2, &mac, val, ea, te) == 1) + return (1); + + /* dst any */ + memset(mac.addr, 0, 6); + memcpy(mac.addr + 6, (uint8_t *)key + 6, 6); + hash2 = hash_mac2(mac.addr, hsize); + if (ta_lookup_find_mhash(head, hash2, &mac, val, ea, te) == 1) + return (1); + + return (0); +} + +static int +ta_init_mhash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, + char *data, uint8_t tflags) +{ + int i; + struct mhash_cfg *cfg; + + cfg = malloc(sizeof(struct mhash_cfg), M_IPFW, M_WAITOK | M_ZERO); + + cfg->size = 128; + cfg->head = malloc(sizeof(struct mhashbhead) * cfg->size, M_IPFW, + M_WAITOK | M_ZERO); + for (i = 0; i < cfg->size; i++) + SLIST_INIT(&cfg->head[i]); + *ta_state = cfg; + ti->xstate = NULL; + ti->state = cfg->head; + ti->data = ta_log2(cfg->size); + ti->lookup = ta_lookup_mhash; + + return (0); +} + +static void +ta_destroy_mhash(void *ta_state, struct table_info *ti) +{ + int i; + struct mhash_cfg *cfg; + struct mhashentry *ent, *ent_next; + + cfg = (struct mhash_cfg *)ta_state; + + for (i = 0; i < cfg->size; i++) + SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next) { + free(ent->mac, M_IPFW_TBL); + free(ent, M_IPFW_TBL); + } + + free(cfg->head, M_IPFW); + + free(cfg, M_IPFW); +} + +static void +ta_foreach_mhash(void *ta_state, struct table_info *ti, ta_foreach_f *f, + void *arg) +{ + struct mhash_cfg *cfg; + struct mhashentry *ent, *ent_next; + int i; + + cfg = (struct mhash_cfg *)ta_state; + + if (ti->xstate != NULL) + f(ti->xstate, arg); + for (i = 0; i < cfg->size; i++) + SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next) + f(ent, arg); +} + +static int +ta_dump_mhash_tentry(void *ta_state, struct table_info *ti, void *e, + ipfw_obj_tentry *tent) +{ + struct macdata *mac; + struct mhash_cfg *cfg; + + cfg = (struct mhash_cfg *)ta_state; + mac = ((struct mhashentry *)e)->mac; + + memcpy(&tent->k.mac, mac->addr, sizeof(mac->addr) + sizeof(mac->mask)); + tent->masklen = ETHER_ADDR_LEN * 8; + tent->subtype = AF_LINK; + tent->v.kidx = mac->value; + tent->bcnt = mac->bcnt; + tent->pcnt = mac->pcnt; + tent->timestamp = mac->timestamp; + + return (0); +} + +static int +ta_find_mhash_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent) +{ + struct macdata mac; + struct mhash_cfg *cfg; + struct mhashentry *ent; + struct tentry_info tei; + uint32_t hash2; + u_char any[12]; + + cfg = (struct mhash_cfg *)ta_state; + + memset(&mac, 0, sizeof(mac)); + memset(&tei, 0, sizeof(tei)); + + tei.paddr = &tent->k.mac; + tei.subtype = AF_LINK; + + memcpy(mac.addr, tei.paddr, sizeof(mac.addr) + sizeof(mac.mask)); + + /* any any */ + memset(any, 0, sizeof(any)); + if (memcmp(mac.addr, any, sizeof(mac.addr)) == 0 && + ti->xstate != NULL) { + ta_dump_mhash_tentry(ta_state, ti, ti->xstate, tent); + return (0); + } + + /* Check for existence */ + hash2 = hash_mac2(mac.addr, cfg->size); + SLIST_FOREACH(ent, &cfg->head[hash2], next) { + if (memcmp(&ent->mac->addr, &mac.addr, sizeof(mac.addr)) != 0) + continue; + ta_dump_mhash_tentry(ta_state, ti, ent, tent); + return (0); + } + + return (ENOENT); +} + +static void +ta_dump_mhash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) +{ + struct mhash_cfg *cfg; + + cfg = (struct mhash_cfg *)ta_state; + + tinfo->taclass4 = IPFW_TACLASS_HASH; + tinfo->size4 = cfg->size; + tinfo->count4 = cfg->items; + tinfo->itemsize4 = sizeof(struct mhashentry) + sizeof(struct macdata) - + sizeof(void *); +} + +static int +ta_prepare_add_mhash(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct ta_buf_mhash *tb; + struct mhashentry *ent; + struct macdata *mac; + + if (tei->subtype != AF_LINK) + return (EINVAL); + + tb = (struct ta_buf_mhash *)ta_buf; + ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO); + mac = malloc(sizeof(*mac), M_IPFW_TBL, M_WAITOK | M_ZERO); + memcpy(mac->addr, tei->paddr, sizeof(mac->addr) + sizeof(mac->mask)); + + ent->mac = mac; + tb->ent_ptr = ent; + + return (0); +} + +static int +ta_add_mhash(void *ta_state, struct table_info *ti, struct tentry_info *tei, + void *ta_buf, uint32_t *pnum) +{ + int exists; + struct macdata *mac; + struct mhash_cfg *cfg; + struct mhashentry *ent, *tmp; + struct ta_buf_mhash *tb; + uint32_t hash2, value; + u_char any[12]; + + cfg = (struct mhash_cfg *)ta_state; + tb = (struct ta_buf_mhash *)ta_buf; + ent = (struct mhashentry *)tb->ent_ptr; + mac = ent->mac; + exists = 0; + + /* Read current value from @tei */ + mac->value = tei->value; + + if (tei->subtype != AF_LINK) + return (EINVAL); + + /* any any */ + memset(any, 0, sizeof(any)); + if (memcmp(mac->addr, any, sizeof(mac->addr)) == 0) { + if (ti->xstate != NULL) { + if ((tei->flags & TEI_FLAGS_UPDATE) == 0) + return (EEXIST); + /* Record already exists. Update value if we're asked to */ + value = ((struct mhashentry *)ti->xstate)->mac->value; + ((struct mhashentry *)ti->xstate)->mac->value = tei->value; + tei->value = value; + /* Indicate that update has happened instead of addition */ + tei->flags |= TEI_FLAGS_UPDATED; + *pnum = 0; + } else { + if ((tei->flags & TEI_FLAGS_DONTADD) != 0) + return (EFBIG); + ti->xstate = ent; + tb->ent_ptr = NULL; + *pnum = 1; + + /* Update counters */ + cfg->items++; + } + return (0); + } + + /* Check for existence */ + hash2 = hash_mac2(mac->addr, cfg->size); + SLIST_FOREACH(tmp, &cfg->head[hash2], next) { + if (memcmp(&tmp->mac->addr, &mac->addr, + sizeof(mac->addr)) == 0) { + exists = 1; + break; + } + } + + if (exists == 1) { + if ((tei->flags & TEI_FLAGS_UPDATE) == 0) + return (EEXIST); + /* Record already exists. Update value if we're asked to */ + value = tmp->mac->value; + tmp->mac->value = tei->value; + tei->value = value; + /* Indicate that update has happened instead of addition */ + tei->flags |= TEI_FLAGS_UPDATED; + *pnum = 0; + } else { + if ((tei->flags & TEI_FLAGS_DONTADD) != 0) + return (EFBIG); + SLIST_INSERT_HEAD(&cfg->head[hash2], ent, next); + tb->ent_ptr = NULL; + *pnum = 1; + + /* Update counters */ + cfg->items++; + } + + return (0); +} + +static int +ta_prepare_del_mhash(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct ta_buf_mhash *tb; + + tb = (struct ta_buf_mhash *)ta_buf; + + memcpy(tb->mac.addr, tei->paddr, sizeof(tb->mac.addr)); + + return (0); +} + +static int +ta_del_mhash(void *ta_state, struct table_info *ti, struct tentry_info *tei, + void *ta_buf, uint32_t *pnum) +{ + struct macdata *mac; + struct mhash_cfg *cfg; + struct mhashentry *tmp, *tmp_next; + struct ta_buf_mhash *tb; + uint32_t hash2; + u_char any[12]; + + cfg = (struct mhash_cfg *)ta_state; + tb = (struct ta_buf_mhash *)ta_buf; + mac = &tb->mac; + + if (tei->masklen != ETHER_ADDR_LEN * 8) + return (EINVAL); + + /* any any */ + memset(any, 0, sizeof(any)); + if (memcmp(mac->addr, any, sizeof(mac->addr)) == 0 && + ti->xstate != NULL) { + cfg->items--; + tb->ent_ptr = ti->xstate; + tei->value = ((struct mhashentry *)ti->xstate)->mac->value; + ti->xstate = NULL; + *pnum = 1; + return (0); + } + + hash2 = hash_mac2(mac->addr, cfg->size); + SLIST_FOREACH_SAFE(tmp, &cfg->head[hash2], next, tmp_next) { + if (memcmp(&tmp->mac->addr, &mac->addr, sizeof(mac->addr)) != 0) + continue; + + SLIST_REMOVE(&cfg->head[hash2], tmp, mhashentry, next); + cfg->items--; + tb->ent_ptr = tmp; + tei->value = tmp->mac->value; + *pnum = 1; + return (0); + } + + return (ENOENT); +} + +static void +ta_flush_mhash_entry(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct mhashentry *ent; + struct ta_buf_mhash *tb; + + tb = (struct ta_buf_mhash *)ta_buf; + + if (tb->ent_ptr != NULL) { + ent = (struct mhashentry *)tb->ent_ptr; + free(ent->mac, M_IPFW_TBL); + free(tb->ent_ptr, M_IPFW_TBL); + tb->ent_ptr = NULL; + } +} + +static void +ta_cnt_mhash_tentry(void *ta_state, struct table_info *ti, uint32_t keylen, + void *e, int pktlen) +{ + struct mhashentry *ent; + + ent = (struct mhashentry *)e; + ent->mac->pcnt++; + ent->mac->bcnt += pktlen; + ent->mac->timestamp = time_uptime; +} + +static int +ta_zero_cnt_mhash_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent) +{ + struct macdata mac; + struct mhash_cfg *cfg; + struct mhashentry *ent; + struct tentry_info tei; + uint32_t hash2; + + cfg = (struct mhash_cfg *)ta_state; + + memset(&mac, 0, sizeof(mac)); + memset(&tei, 0, sizeof(tei)); + + tei.paddr = &tent->k.mac; + tei.subtype = AF_LINK; + + memcpy(mac.addr, tei.paddr, sizeof(mac.addr) + sizeof(mac.mask)); + + /* Check for existence */ + hash2 = hash_mac2(mac.addr, cfg->size); + SLIST_FOREACH(ent, &cfg->head[hash2], next) { + if (memcmp(&ent->mac->addr, &mac.addr, sizeof(mac.addr)) != 0) + continue; + ent->mac->pcnt = 0; + ent->mac->bcnt = 0; + ent->mac->timestamp = 0; + return (0); + } + + return (ENOENT); +} + +struct table_algo mac_hash = { + .name = "mac:hash", + .type = IPFW_TABLE_MAC2, + .flags = TA_FLAG_DEFAULT, + .ta_buf_size = sizeof(struct ta_buf_mhash), + .print_config = ta_print_mhash_config, + .init = ta_init_mhash, + .destroy = ta_destroy_mhash, + .prepare_add = ta_prepare_add_mhash, + .prepare_del = ta_prepare_del_mhash, + .add = ta_add_mhash, + .del = ta_del_mhash, + .flush_entry = ta_flush_mhash_entry, + .foreach = ta_foreach_mhash, + .dump_tentry = ta_dump_mhash_tentry, + .find_tentry = ta_find_mhash_tentry, + .dump_tinfo = ta_dump_mhash_tinfo, + .cnt_tentry = ta_cnt_mhash_tentry, + .zero_cnt_tentry = ta_zero_cnt_mhash_tentry, +}; + void ipfw_table_algo_init(struct ip_fw_chain *ch) { @@ -4090,6 +4977,7 @@ ipfw_table_algo_init(struct ip_fw_chain *ch) ipfw_add_table_algo(ch, &addr_radix, sz, &addr_radix.idx); ipfw_add_table_algo(ch, &addr_hash, sz, &addr_hash.idx); ipfw_add_table_algo(ch, &iface_idx, sz, &iface_idx.idx); + ipfw_add_table_algo(ch, &mac_hash, sz, &mac_hash.idx); ipfw_add_table_algo(ch, &number_array, sz, &number_array.idx); ipfw_add_table_algo(ch, &flow_hash, sz, &flow_hash.idx); ipfw_add_table_algo(ch, &addr_kfib, sz, &addr_kfib.idx); @@ -4102,9 +4990,8 @@ ipfw_table_algo_destroy(struct ip_fw_chain *ch) ipfw_del_table_algo(ch, addr_radix.idx); ipfw_del_table_algo(ch, addr_hash.idx); ipfw_del_table_algo(ch, iface_idx.idx); + ipfw_del_table_algo(ch, mac_hash.idx); ipfw_del_table_algo(ch, number_array.idx); ipfw_del_table_algo(ch, flow_hash.idx); ipfw_del_table_algo(ch, addr_kfib.idx); } - - diff --git a/sys/netpfil/ipfw/nat64/nat64stl.c b/sys/netpfil/ipfw/nat64/nat64stl.c index 3a13aba..f12a9a6 100644 --- a/sys/netpfil/ipfw/nat64/nat64stl.c +++ b/sys/netpfil/ipfw/nat64/nat64stl.c @@ -184,8 +184,8 @@ nat64stl_handle_icmp6(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg, * IPv4 mapped address. */ ip6i = mtodo(m, hlen); - if (ipfw_lookup_table(chain, cfg->map64, - sizeof(struct in6_addr), &ip6i->ip6_dst, &tablearg) == 0) { + if (ipfw_lookup_table(chain, cfg->map64, sizeof(struct in6_addr), + &ip6i->ip6_dst, &tablearg, NULL, NULL) == 0) { m_freem(m); return (NAT64RETURN); } @@ -222,11 +222,12 @@ ipfw_nat64stl(struct ip_fw_chain *chain, struct ip_fw_args *args, case 4: dst4 = htonl(args->f_id.dst_ip); ret = ipfw_lookup_table(chain, cfg->map46, sizeof(in_addr_t), - &dst4, &tablearg); + &dst4, &tablearg, NULL, NULL); break; case 6: ret = ipfw_lookup_table(chain, cfg->map64, - sizeof(struct in6_addr), &args->f_id.src_ip6, &tablearg); + sizeof(struct in6_addr), &args->f_id.src_ip6, + &tablearg, NULL, NULL); break; default: return (0); diff --git a/sys/netpfil/pf/if_pflog.c b/sys/netpfil/pf/if_pflog.c index cbf596b..3e5d7c4 100644 --- a/sys/netpfil/pf/if_pflog.c +++ b/sys/netpfil/pf/if_pflog.c @@ -222,13 +222,16 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, if (am == NULL) { hdr.rulenr = htonl(rm->nr); hdr.subrulenr = -1; + hdr.ridentifier = rm->cuid; } else { hdr.rulenr = htonl(am->nr); hdr.subrulenr = htonl(rm->nr); + hdr.ridentifier = rm->cuid; if (ruleset != NULL && ruleset->anchor != NULL) strlcpy(hdr.ruleset, ruleset->anchor->name, sizeof(hdr.ruleset)); } +#ifdef PF_USER_INFO /* * XXXGL: we avoid pf_socket_lookup() when we are holding * state lock, since this leads to unsafe LOR. @@ -243,6 +246,7 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, hdr.pid = NO_PID; hdr.rule_uid = rm->cuid; hdr.rule_pid = rm->cpid; +#endif hdr.dir = dir; #ifdef INET diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c index e6561ee..acddac8 100644 --- a/sys/netpfil/pf/if_pfsync.c +++ b/sys/netpfil/pf/if_pfsync.c @@ -188,9 +188,6 @@ struct pfsync_softc { struct ip_moptions sc_imo; struct in_addr sc_sync_peer; uint32_t sc_flags; -#define PFSYNCF_OK 0x00000001 -#define PFSYNCF_DEFER 0x00000002 -#define PFSYNCF_PUSH 0x00000004 uint8_t sc_maxupdates; struct ip sc_template; struct callout sc_tmo; @@ -368,7 +365,7 @@ pfsync_clone_destroy(struct ifnet *ifp) callout_drain(&sc->sc_bulkfail_tmo); callout_drain(&sc->sc_bulk_tmo); - if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) + if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p && V_pfsync_carp_adj > 0) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); bpfdetach(ifp); if_detach(ifp); @@ -1156,7 +1153,7 @@ pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; callout_stop(&sc->sc_bulkfail_tmo); - if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) + if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p && V_pfsync_carp_adj > 0) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync bulk done"); sc->sc_flags |= PFSYNCF_OK; @@ -1314,8 +1311,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) } pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; - pfsyncr.pfsyncr_defer = (PFSYNCF_DEFER == - (sc->sc_flags & PFSYNCF_DEFER)); + pfsyncr.pfsyncr_defer = sc->sc_flags; PFSYNC_UNLOCK(sc); return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); @@ -1407,7 +1403,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; /* Request a full state table update. */ - if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) + if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p && V_pfsync_carp_adj > 0) (*carp_demote_adj_p)(V_pfsync_carp_adj, "pfsync bulk start"); sc->sc_flags &= ~PFSYNCF_OK; @@ -1637,6 +1633,7 @@ pfsync_sendout(int schedswi) if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); sc->sc_len = PFSYNC_MINPKT; + /* XXX: Sould not drop voluntarily update packets! */ if (!_IF_QFULL(&sc->sc_ifp->if_snd)) _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); else { @@ -2162,7 +2159,7 @@ pfsync_bulk_fail(void *arg) sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; PFSYNC_LOCK(sc); - if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) + if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p && V_pfsync_carp_adj > 0) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync bulk fail"); sc->sc_flags |= PFSYNCF_OK; diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 1fa0b7a..6df9986 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -89,6 +89,8 @@ __FBSDID("$FreeBSD$"); #include <netinet/udp_var.h> #include <netpfil/ipfw/ip_fw_private.h> /* XXX: only for DIR_IN/DIR_OUT */ +#include <netinet/ip_fw.h> +#include <netinet/ip_dummynet.h> #ifdef INET6 #include <netinet/ip6.h> @@ -231,6 +233,8 @@ static int pf_state_key_attach(struct pf_state_key *, static void pf_state_key_detach(struct pf_state *, int); static int pf_state_key_ctor(void *, int, void *, int); static u_int32_t pf_tcp_iss(struct pf_pdesc *); +void pf_rule_to_actions(struct pf_rule *, + struct pf_rule_actions *); static int pf_test_rule(struct pf_rule **, struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, struct pf_pdesc *, struct pf_rule **, @@ -263,7 +267,8 @@ static int pf_test_state_icmp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, u_short *); static int pf_test_state_other(struct pf_state **, int, - struct pfi_kif *, struct mbuf *, struct pf_pdesc *); + struct pfi_kif *, struct mbuf *, int, + struct pf_pdesc *); static u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, sa_family_t); static u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, @@ -286,6 +291,8 @@ static u_int pf_purge_expired_states(u_int, int); static void pf_purge_unlinked_rules(void); static int pf_mtag_uminit(void *, int, int); static void pf_mtag_free(struct m_tag *); +static void pf_packet_rework_nat(struct mbuf *, struct pf_pdesc *, + int, struct pf_state_key *); #ifdef INET static void pf_route(struct mbuf **, struct pf_rule *, int, struct ifnet *, struct pf_state *, @@ -302,31 +309,53 @@ static void pf_route6(struct mbuf **, struct pf_rule *, int, int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len); extern int pf_end_threads; +extern struct proc *pf_purge_proc; VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]); -#define PACKET_LOOPED(pd) ((pd)->pf_mtag && \ - (pd)->pf_mtag->flags & PF_PACKET_LOOPED) +#define PACKET_UNDO_NAT(_m, _pd, _off, _s, _dir) \ + do { \ + struct pf_state_key *nk; \ + if ((_dir) == PF_OUT) \ + nk = (_s)->key[PF_SK_STACK]; \ + else \ + nk = (_s)->key[PF_SK_WIRE]; \ + pf_packet_rework_nat(_m, _pd, _off, nk); \ + } while (0) +#define PACKET_REDO_NAT(_m, _pd, _off, _s, _dir) \ + do { \ + struct pf_state_key *nk; \ + if ((_dir) == PF_OUT) \ + nk = (_s)->key[PF_SK_WIRE]; \ + else \ + nk = (_s)->key[PF_SK_STACK]; \ + pf_packet_rework_nat(_m, _pd, _off, nk); \ + } while (0) + + +#define PACKET_LOOPED(pd) (((pd)->pf_mtag && \ + (pd)->pf_mtag->flags & PF_PACKET_LOOPED) ? 1 : 0) #define STATE_LOOKUP(i, k, d, s, pd) \ do { \ (s) = pf_find_state((i), (k), (d)); \ if ((s) == NULL) \ return (PF_DROP); \ - if (PACKET_LOOPED(pd)) \ + if (PACKET_LOOPED(pd)) { \ + if ((s)->key[PF_SK_WIRE] != (s)->key[PF_SK_STACK]) { \ + PACKET_REDO_NAT(m, pd, off, s, direction); \ + } \ return (PF_PASS); \ + } \ if ((d) == PF_OUT && \ (((s)->rule.ptr->rt == PF_ROUTETO && \ - (s)->rule.ptr->direction == PF_OUT) || \ - ((s)->rule.ptr->rt == PF_REPLYTO && \ - (s)->rule.ptr->direction == PF_IN)) && \ + (s)->rule.ptr->direction == PF_OUT)) && \ (s)->rt_kif != NULL && \ (s)->rt_kif != (i)) \ return (PF_PASS); \ } while (0) -#define BOUND_IFACE(r, k) \ - ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all +#define BOUND_IFACE(r, k) k #define STATE_INC_COUNTERS(s) \ do { \ @@ -412,6 +441,72 @@ pf_addr_cmp(struct pf_addr *a, struct pf_addr *b, sa_family_t af) return (0); } +static void +pf_packet_rework_nat(struct mbuf *m, struct pf_pdesc *pd, int off, + struct pf_state_key *nk) +{ + + switch (pd->proto) { + case IPPROTO_TCP: { + struct tcphdr *th = pd->hdr.tcp; + + if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) + pf_change_ap(m, pd->src, &th->th_sport, pd->ip_sum, + &th->th_sum, &nk->addr[pd->sidx], + nk->port[pd->sidx], 0, pd->af); + if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) + pf_change_ap(m, pd->dst, &th->th_dport, pd->ip_sum, + &th->th_sum, &nk->addr[pd->didx], + nk->port[pd->didx], 0, pd->af); + m_copyback(m, off, sizeof(*th), (caddr_t)th); + } + break; + case IPPROTO_UDP: { + struct udphdr *uh = pd->hdr.udp; + + if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) + pf_change_ap(m, pd->src, &uh->uh_sport, pd->ip_sum, + &uh->uh_sum, &nk->addr[pd->sidx], + nk->port[pd->sidx], 1, pd->af); + if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) + pf_change_ap(m, pd->dst, &uh->uh_dport, pd->ip_sum, + &uh->uh_sum, &nk->addr[pd->didx], + nk->port[pd->didx], 1, pd->af); + m_copyback(m, off, sizeof(*uh), (caddr_t)uh); + } + break; + /* case IPPROTO_ICMP: */ + /* XXX: If we want to do this for icmp is probably wrong!?! */ + /* break; */ + default: + if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) { + switch (pd->af) { + case AF_INET: + pf_change_a(&pd->src->v4.s_addr, + pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, + 0); + break; + case AF_INET6: + PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); + break; + } + } + if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) { + switch (pd->af) { + case AF_INET: + pf_change_a(&pd->dst->v4.s_addr, + pd->ip_sum, nk->addr[pd->didx].v4.s_addr, + 0); + break; + case AF_INET6: + PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); + break; + } + } + break; + } +} + static __inline uint32_t pf_hashkey(struct pf_state_key *sk) { @@ -1293,7 +1388,8 @@ pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir) /* List is sorted, if-bound states before floating ones. */ TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) - if (s->kif == V_pfi_all || s->kif == kif) { + /* if (s->kif == V_pfi_all || s->kif == kif) { */ + { PF_STATE_LOCK(s); PF_HASHROW_UNLOCK(kh); if (s->timeout >= PFTM_MAX) { @@ -1428,48 +1524,44 @@ pf_purge_thread(void *unused __unused) VNET_ITERATOR_DECL(vnet_iter); u_int idx = 0; - for (;;) { - PF_RULES_RLOCK(); - rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftm", hz / 10); - PF_RULES_RUNLOCK(); + sx_xlock(&pf_end_lock); + while (pf_end_threads == 0) { + sx_sleep(pf_purge_thread, &pf_end_lock, 0, "pftm", hz / 10); VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); - if (pf_end_threads) { - pf_end_threads++; - wakeup(pf_purge_thread); - kproc_exit(0); - } - - /* Wait while V_pf_default_rule.timeout is initialized. */ - if (V_pf_vnet_active == 0) { - CURVNET_RESTORE(); - continue; - } + /* Wait while V_pf_default_rule.timeout is initialized. */ + if (V_pf_vnet_active == 0) { + CURVNET_RESTORE(); + continue; + } - /* Process 1/interval fraction of the state table every run. */ - idx = pf_purge_expired_states(idx, pf_hashmask / + /* Process 1/interval fraction of the state table every run. */ + idx = pf_purge_expired_states(idx, pf_hashmask / (V_pf_default_rule.timeout[PFTM_INTERVAL] * 10)); - /* Purge other expired types every PFTM_INTERVAL seconds. */ - if (idx == 0) { - /* - * Order is important: - * - states and src nodes reference rules - * - states and rules reference kifs - */ - pf_purge_expired_fragments(); - pf_purge_expired_src_nodes(); - pf_purge_unlinked_rules(); - pfi_kif_purge(); - } - CURVNET_RESTORE(); + /* Purge other expired types every PFTM_INTERVAL seconds. */ + if (idx == 0) { + /* + * Order is important: + * - states and src nodes reference rules + * - states and rules reference kifs + */ + pf_purge_expired_fragments(); + pf_purge_expired_src_nodes(); + pf_purge_unlinked_rules(); + pfi_kif_purge(); + } + CURVNET_RESTORE(); } VNET_LIST_RUNLOCK(); } - /* not reached */ + + pf_end_threads++; + sx_xunlock(&pf_end_lock); + kproc_exit(0); } void @@ -2671,6 +2763,7 @@ pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) return (pf_match(op, a1, a2, p)); } +#ifdef PF_USER_INFO static int pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) { @@ -2686,6 +2779,7 @@ pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) return (0); return (pf_match(op, a1, a2, g)); } +#endif int pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag, int mtag) @@ -2879,6 +2973,22 @@ pf_addr_inc(struct pf_addr *addr, sa_family_t af) } #endif /* INET6 */ +void +pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a) +{ + if (r->qid) + a->qid = r->qid; + if (r->pqid) + a->pqid = r->pqid; + if (r->pdnpipe) + a->pdnpipe = r->pdnpipe; + if (r->dnpipe) + a->dnpipe = r->dnpipe; + if (r->free_flags & PFRULE_DN_IS_PIPE) + a->flags |= PFRULE_DN_IS_PIPE; +} + +#ifdef PF_USER_INFO int pf_socket_lookup(int direction, struct pf_pdesc *pd, struct mbuf *m) { @@ -2958,6 +3068,7 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd, struct mbuf *m) return (1); } +#endif static u_int8_t pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) @@ -3135,12 +3246,14 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, PF_RULES_RASSERT(); +#ifdef PF_USER_INFO if (inp != NULL) { INP_LOCK_ASSERT(inp); pd->lookup.uid = inp->inp_cred->cr_uid; pd->lookup.gid = inp->inp_cred->cr_groups[0]; pd->lookup.done = 1; } +#endif switch (pd->proto) { case IPPROTO_TCP: @@ -3351,7 +3464,11 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, /* icmp only. type always 0 in other cases */ else if (r->code && r->code != icmpcode + 1) r = TAILQ_NEXT(r, entries); - else if (r->tos && !(r->tos == pd->tos)) + else if ((r->rule_flag & PFRULE_TOS) && r->tos && + !(r->tos == pd->tos)) + r = TAILQ_NEXT(r, entries); + else if ((r->rule_flag & PFRULE_DSCP) && r->tos && + !(r->tos == (pd->tos & DSCP_MASK))) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); @@ -3359,6 +3476,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, (r->flagset & th->th_flags) != r->flags) r = TAILQ_NEXT(r, entries); /* tcp/udp only. uid.op always 0 in other cases */ +#ifdef PF_USER_INFO else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = pf_socket_lookup(direction, pd, m), 1)) && !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], @@ -3370,6 +3488,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], pd->lookup.gid)) r = TAILQ_NEXT(r, entries); +#endif else if (r->prio && !pf_match_ieee8021q_pcp(r->prio, m)) r = TAILQ_NEXT(r, entries); @@ -3390,10 +3509,20 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, if (r->rtableid >= 0) rtableid = r->rtableid; if (r->anchor == NULL) { - match = 1; - *rm = r; - *am = a; - *rsm = ruleset; + if (r->action == PF_MATCH) { + r->packets[direction == PF_OUT]++; + r->bytes[direction == PF_OUT] += pd->tot_len; + pf_rule_to_actions(r, &pd->act); + if (r->log) + PFLOG_PACKET(kif, m, af, + direction, PFRES_MATCH, r, + a, ruleset, pd, 1); + } else { + match = 1; + *rm = r; + *am = a; + *rsm = ruleset; + } if ((*rm)->quick) break; r = TAILQ_NEXT(r, entries); @@ -3412,6 +3541,9 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, REASON_SET(&reason, PFRES_MATCH); + /* apply actions for last matching pass/block rule */ + pf_rule_to_actions(r, &pd->act); + if (r->log || (nr != NULL && nr->log)) { if (rewrite) m_copyback(m, off, hdrlen, pd->hdr.any); @@ -3585,6 +3717,11 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, s->state_flags |= PFSTATE_SLOPPY; s->log = r->log & PF_LOG_ALL; s->sync_state = PFSYNC_S_NONE; + s->qid = pd->act.qid; + s->pqid = pd->act.pqid; + s->pdnpipe = pd->act.pdnpipe; + s->dnpipe = pd->act.dnpipe; + s->state_flags |= pd->act.flags; if (nr != NULL) s->log |= nr->log & PF_LOG_ALL; switch (pd->proto) { @@ -3823,6 +3960,9 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); + else if ((r->rule_flag & PFRULE_DSCP) && r->tos && + !(r->tos == (pd->tos & DSCP_MASK))) + r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); else if (pd->proto == IPPROTO_UDP && @@ -3846,10 +3986,20 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, r = TAILQ_NEXT(r, entries); else { if (r->anchor == NULL) { - match = 1; - *rm = r; - *am = a; - *rsm = ruleset; + if (r->action == PF_MATCH) { + r->packets[direction == PF_OUT]++; + r->bytes[direction == PF_OUT] += pd->tot_len; + pf_rule_to_actions(r, &pd->act); + if (r->log) + PFLOG_PACKET(kif, m, af, + direction, PFRES_MATCH, r, + a, ruleset, pd, 1); + } else { + match = 1; + *rm = r; + *am = a; + *rsm = ruleset; + } if ((*rm)->quick) break; r = TAILQ_NEXT(r, entries); @@ -3868,6 +4018,9 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, REASON_SET(&reason, PFRES_MATCH); + /* apply actions for last matching pass/block rule */ + pf_rule_to_actions(r, &pd->act); + if (r->log) PFLOG_PACKET(kif, m, af, direction, reason, r, a, ruleset, pd, 1); @@ -5106,7 +5259,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, static int pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, - struct mbuf *m, struct pf_pdesc *pd) + struct mbuf *m, int off, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; struct pf_state_key_cmp key; @@ -5442,6 +5595,12 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, ip = mtod(m0, struct ip *); + if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { + if (s) + PF_STATE_UNLOCK(s); + return; + } + bzero(&dst, sizeof(dst)); dst.sin_family = AF_INET; dst.sin_len = sizeof(dst); @@ -5486,7 +5645,72 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, if (ifp == NULL) goto bad; - if (oifp != ifp) { + else if (r->rt == PF_REPLYTO || (r->rt == PF_ROUTETO && ifp->if_type == IFT_ENC)) { + /* XXX: Copied from ifaof_ifpforaddr() since it mostly will not return NULL! */ + struct sockaddr_in inaddr; + struct sockaddr *addr; + struct ifaddr *ifa; + char *cp, *cp2, *cp3; + char *cplim; + + inaddr.sin_addr = ip->ip_dst; + inaddr.sin_family = AF_INET; + inaddr.sin_len = sizeof(inaddr); + inaddr.sin_port = 0; + addr = (struct sockaddr *)&inaddr; + + IF_ADDR_RLOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET) + continue; + if (ifa->ifa_netmask == 0) { + if ((bcmp(addr, ifa->ifa_addr, addr->sa_len) == 0) || + (ifa->ifa_dstaddr && + (bcmp(addr, ifa->ifa_dstaddr, addr->sa_len) == 0))) { + IF_ADDR_RUNLOCK(ifp); + return; + } + continue; + } + if (ifp->if_flags & IFF_POINTOPOINT) { + if (bcmp(addr, ifa->ifa_dstaddr, addr->sa_len) == 0) { + IF_ADDR_RUNLOCK(ifp); + return; + } + } else { + cp = addr->sa_data; + cp2 = ifa->ifa_addr->sa_data; + cp3 = ifa->ifa_netmask->sa_data; + cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; + for (; cp3 < cplim; cp3++) + if ((*cp++ ^ *cp2++) & *cp3) + break; + if (cp3 == cplim) { + IF_ADDR_RUNLOCK(ifp); + return; + } + } + } + IF_ADDR_RUNLOCK(ifp); + } + else if (r->rt == PF_ROUTETO && r->direction == dir && in_localip(ip->ip_dst)) + return; + + if (s != NULL && r->rt == PF_REPLYTO) { + /* + * Send it out since it came from state recorded ifp(rt_addr). + * Routing table lookup might have chosen not correct interface! + */ + } else if (oifp != ifp) { + if (in_broadcast(ip->ip_dst, oifp)) /* XXX: LOCKING of address list?! */ + return; + + if (s && r->rt == PF_ROUTETO && pd->nat_rule != NULL && + r->direction == PF_OUT && r->direction == dir && + pd->pf_mtag->routed < 2) { + PACKET_UNDO_NAT(m0, pd, ntohs(ip->ip_off), s, dir); + } + if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS) goto bad; else if (m0 == NULL) @@ -5539,6 +5763,9 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, error = EMSGSIZE; KMOD_IPSTAT_INC(ips_cantfrag); if (r->rt != PF_DUPTO) { + if (s && pd->nat_rule != NULL) + PACKET_UNDO_NAT(m0, pd, ntohs(ip->ip_off), s, dir); + icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, ifp->if_mtu); goto done; @@ -5618,6 +5845,12 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, ip6 = mtod(m0, struct ip6_hdr *); + if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { + if (s) + PF_STATE_UNLOCK(s); + return; + } + bzero(&dst, sizeof(dst)); dst.sin6_family = AF_INET6; dst.sin6_len = sizeof(dst); @@ -5657,8 +5890,70 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, if (ifp == NULL) goto bad; + else if (r->rt == PF_REPLYTO) { + /* XXX: Copied from ifaof_ifpforaddr() since it mostly will not return NULL! */ + struct sockaddr_in6 inaddr6; + struct sockaddr *addr; + struct ifaddr *ifa; + char *cp, *cp2, *cp3; + char *cplim; + + inaddr6.sin6_addr = ip6->ip6_dst; + inaddr6.sin6_family = AF_INET6; + inaddr6.sin6_len = sizeof(inaddr6); + inaddr6.sin6_port = 0; + inaddr6.sin6_flowinfo = 0; + addr = (struct sockaddr *)&inaddr6; + + IF_ADDR_RLOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + if (ifa->ifa_netmask == 0) { + if ((bcmp(addr, ifa->ifa_addr, addr->sa_len) == 0) || + (ifa->ifa_dstaddr && + (bcmp(addr, ifa->ifa_dstaddr, addr->sa_len) == 0))) { + IF_ADDR_RUNLOCK(ifp); + return; + } + continue; + } + if (ifp->if_flags & IFF_POINTOPOINT) { + if (bcmp(addr, ifa->ifa_dstaddr, addr->sa_len) == 0) { + IF_ADDR_RUNLOCK(ifp); + return; + } + } else { + cp = addr->sa_data; + cp2 = ifa->ifa_addr->sa_data; + cp3 = ifa->ifa_netmask->sa_data; + cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; + for (; cp3 < cplim; cp3++) + if ((*cp++ ^ *cp2++) & *cp3) + break; + if (cp3 == cplim) { + IF_ADDR_RUNLOCK(ifp); + return; + } + } + } + IF_ADDR_RUNLOCK(ifp); + } else if (r->rt == PF_ROUTETO && r->direction == dir && in6_localaddr(&ip6->ip6_dst)) + return; + + if (s != NULL && r->rt == PF_REPLYTO) { + /* + * Send it out since it came from state recorded ifp(rt_addr). + * Routing table lookup might have chosen not correct interface! + */ + } else if (oifp != ifp) { + if (s && r->rt == PF_ROUTETO && pd->nat_rule != NULL && + r->direction == PF_OUT && r->direction == dir && + pd->pf_mtag->routed < 2) { + int ip_off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr); + PACKET_UNDO_NAT(m0, pd, ip_off, s, dir); + } - if (oifp != ifp) { if (pf_test6(PF_FWD, ifp, &m0, NULL) != PF_PASS) goto bad; else if (m0 == NULL) @@ -5692,9 +5987,12 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, nd6_output_ifp(ifp, ifp, m0, &dst, NULL); else { in6_ifstat_inc(ifp, ifs6_in_toobig); - if (r->rt != PF_DUPTO) + if (r->rt != PF_DUPTO) { + if (s && pd->nat_rule != NULL) + PACKET_UNDO_NAT(m0, pd, ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr), s, dir); + icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); - else + } else goto bad; } @@ -5860,7 +6158,8 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) struct pf_state *s = NULL; struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; - int off, dirndx, pqid = 0; + int off = 0, dirndx, pqid = 0; + struct ip_fw_args dnflow; M_ASSERTPKTHDR(m); @@ -5886,22 +6185,19 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) PF_RULES_RLOCK(); - if (ip_divert_ptr != NULL && + if ((ip_divert_ptr != NULL || ip_dn_io_ptr != NULL) && ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) { - struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1); - if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) { - if (pd.pf_mtag == NULL && - ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { - action = PF_DROP; - goto done; - } - pd.pf_mtag->flags |= PF_PACKET_LOOPED; - m_tag_delete(m, ipfwtag); + if (pd.pf_mtag == NULL && + ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { + action = PF_DROP; + goto done; } + pd.pf_mtag->flags |= PF_PACKET_LOOPED; if (pd.pf_mtag && pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) { m->m_flags |= M_FASTFWD_OURS; pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT; } + m_tag_delete(m, ipfwtag); } else if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) { /* We do IP header normalization and packet reassembly here */ action = PF_DROP; @@ -5928,7 +6224,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) pd.sidx = (dir == PF_IN) ? 0 : 1; pd.didx = (dir == PF_IN) ? 1 : 0; pd.af = AF_INET; - pd.tos = h->ip_tos; + pd.tos = h->ip_tos & ~IPTOS_ECN_MASK; pd.tot_len = ntohs(h->ip_len); /* handle fragments that didn't get reassembled by normalization */ @@ -5949,6 +6245,9 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) log = action != PF_PASS; goto done; } + dnflow.f_id._flags = th.th_flags; + dnflow.f_id.dst_port = ntohs(th.th_dport); + dnflow.f_id.src_port = ntohs(th.th_sport); pd.p_len = pd.tot_len - off - (th.th_off << 2); if ((th.th_flags & TH_ACK) && pd.p_len == 0) pqid = 1; @@ -5958,6 +6257,20 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { + if (dir == PF_IN && s != NULL && + s->nat_rule.ptr != NULL && + s->nat_rule.ptr->action == PF_NAT) { + dnflow.f_id.dst_port = + ntohs(s->key[(s->direction == PF_IN)]-> + port[(s->direction == PF_OUT)]); + } + if (dir == PF_OUT && s != NULL && + s->nat_rule.ptr != NULL && + s->nat_rule.ptr->action != PF_NAT) { + dnflow.f_id.src_port = + ntohs(s->key[(s->direction == PF_OUT)]-> + port[(s->direction == PF_IN)]); + } if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); r = s->rule.ptr; @@ -5978,6 +6291,8 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) log = action != PF_PASS; goto done; } + dnflow.f_id.dst_port = ntohs(uh.uh_dport); + dnflow.f_id.src_port = ntohs(uh.uh_sport); if (uh.uh_dport == 0 || ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { @@ -5987,6 +6302,20 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); if (action == PF_PASS) { + if (dir == PF_IN && s != NULL && + s->nat_rule.ptr != NULL && + s->nat_rule.ptr->action == PF_NAT) { + dnflow.f_id.dst_port = + ntohs(s->key[(s->direction == PF_IN)]-> + port[(s->direction == PF_OUT)]); + } + if (dir == PF_OUT && s != NULL && + s->nat_rule.ptr != NULL && + s->nat_rule.ptr->action != PF_NAT) { + dnflow.f_id.src_port = + ntohs(s->key[(s->direction == PF_OUT)]-> + port[(s->direction == PF_IN)]); + } if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); r = s->rule.ptr; @@ -6031,7 +6360,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) #endif default: - action = pf_test_state_other(&s, dir, kif, m, &pd); + action = pf_test_state_other(&s, dir, kif, m, off, &pd); if (action == PF_PASS) { if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); @@ -6075,7 +6404,14 @@ done: } #ifdef ALTQ - if (action == PF_PASS && r->qid) { + if (s && s->qid) { + pd.act.pqid = s->pqid; + pd.act.qid = s->qid; + } else if (r->qid) { + pd.act.pqid = r->pqid; + pd.act.qid = r->qid; + } + if (action == PF_PASS && pd.act.qid) { if (pd.pf_mtag == NULL && ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { action = PF_DROP; @@ -6084,9 +6420,9 @@ done: if (s != NULL) pd.pf_mtag->qid_hash = pf_state_hash(s); if (pqid || (pd.tos & IPTOS_LOWDELAY)) - pd.pf_mtag->qid = r->pqid; + pd.pf_mtag->qid = pd.act.pqid; else - pd.pf_mtag->qid = r->qid; + pd.pf_mtag->qid = pd.act.qid; /* Add hints for ecn. */ pd.pf_mtag->hdr = h; } @@ -6094,8 +6430,78 @@ done: } #endif /* ALTQ */ + if (pd.pf_mtag == NULL && + ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + } + if (s && (s->dnpipe || s->pdnpipe)) { + pd.act.dnpipe = s->dnpipe; + pd.act.pdnpipe = s->pdnpipe; + pd.act.flags = s->state_flags; + } else if (r->dnpipe || r->pdnpipe) { + pd.act.dnpipe = r->dnpipe; + pd.act.pdnpipe = r->pdnpipe; + pd.act.flags = r->free_flags; + } + if ((pd.act.dnpipe || pd.act.pdnpipe) && ip_dn_io_ptr == NULL) { + /* XXX: ipfw has the same behaviour! */ + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + } else if (action == PF_PASS && + (pd.act.dnpipe || pd.act.pdnpipe) && !PACKET_LOOPED(&pd)) { + if (dir != r->direction && pd.act.pdnpipe) { + dnflow.rule.info = pd.act.pdnpipe; + } else if (dir == r->direction) { + dnflow.rule.info = pd.act.dnpipe; + } else + goto continueprocessing; + + if (pd.act.flags & PFRULE_DN_IS_PIPE) + dnflow.rule.info |= IPFW_IS_PIPE; + dnflow.f_id.addr_type = 4; /* IPv4 type */ + dnflow.f_id.proto = pd.proto; + if (dir == PF_OUT && s != NULL && s->nat_rule.ptr != NULL && + s->nat_rule.ptr->action == PF_NAT) + dnflow.f_id.src_ip = + ntohl(s->key[(s->direction == PF_IN)]-> + addr[(s->direction == PF_OUT)].v4.s_addr); + else + dnflow.f_id.src_ip = ntohl(h->ip_src.s_addr); + if (dir == PF_IN && s != NULL && s->nat_rule.ptr != NULL && + s->nat_rule.ptr->action != PF_NAT) + dnflow.f_id.dst_ip = + ntohl(s->key[(s->direction == PF_OUT)]-> + addr[(s->direction == PF_IN)].v4.s_addr); + else + dnflow.f_id.dst_ip = ntohl(h->ip_dst.s_addr); + dnflow.f_id.extra = dnflow.rule.info; + + if (m->m_flags & M_FASTFWD_OURS) { + pd.pf_mtag->flags |= PF_FASTFWD_OURS_PRESENT; + m->m_flags &= ~M_FASTFWD_OURS; + } + + if (s != NULL && s->nat_rule.ptr) + PACKET_UNDO_NAT(m, &pd, off, s, dir); + + ip_dn_io_ptr(m0, (dir == PF_IN) ? DIR_IN : DIR_OUT, &dnflow); + if (*m0 == NULL) { + if (s) + PF_STATE_UNLOCK(s); + return (action); + } + /* This is dummynet fast io processing */ + ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL); + if (ipfwtag != NULL) + m_tag_delete(*m0, ipfwtag); + if (s != NULL && s->nat_rule.ptr) + PACKET_REDO_NAT(m, &pd, off, s, dir); + } +continueprocessing: + /* - * connections redirected to loopback should not match sockets + * Connections redirected to loopback should match sockets * bound specifically to loopback due to security implications, * see tcp_input() and in_pcblookup_listen(). */ @@ -6104,7 +6510,7 @@ done: (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) - m->m_flags |= M_SKIP_FIREWALL; + m->m_flags |= M_FASTFWD_OURS; if (action == PF_PASS && r->divert.port && ip_divert_ptr != NULL && !PACKET_LOOPED(&pd)) { @@ -6148,6 +6554,9 @@ done: } } + if (PACKET_LOOPED(&pd)) + pd.pf_mtag->flags &= ~PF_PACKET_LOOPED; + if (log) { struct pf_rule *lr; @@ -6247,7 +6656,9 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) struct pf_state *s = NULL; struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; - int off, terminal = 0, dirndx, rh_cnt = 0, pqid = 0; + int off = 0, terminal = 0, dirndx, rh_cnt = 0, pqid = 0; + struct m_tag *dn_tag; + struct ip_fw_args dnflow; int fwdir = dir; M_ASSERTPKTHDR(m); @@ -6293,8 +6704,22 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) PF_RULES_RLOCK(); + if (ip_dn_io_ptr != NULL && + ((dn_tag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) { + if (pd.pf_mtag == NULL && + ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { + action = PF_DROP; + goto done; + } + pd.pf_mtag->flags |= PF_PACKET_LOOPED; + if (pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) { + m->m_flags |= M_FASTFWD_OURS; + pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT; + } + m_tag_delete(m, dn_tag); + } /* We do IP header normalization and packet reassembly here */ - if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { + else if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { action = PF_DROP; goto done; } @@ -6408,6 +6833,9 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) log = action != PF_PASS; goto done; } + dnflow.f_id._flags = th.th_flags; + dnflow.f_id.dst_port = th.th_dport; + dnflow.f_id.src_port = th.th_sport; pd.p_len = pd.tot_len - off - (th.th_off << 2); action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); if (action == PF_DROP) @@ -6435,6 +6863,8 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) log = action != PF_PASS; goto done; } + dnflow.f_id.dst_port = uh.uh_dport; + dnflow.f_id.src_port = uh.uh_sport; if (uh.uh_dport == 0 || ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { @@ -6486,7 +6916,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) } default: - action = pf_test_state_other(&s, dir, kif, m, &pd); + action = pf_test_state_other(&s, dir, kif, m, off, &pd); if (action == PF_PASS) { if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); @@ -6536,7 +6966,14 @@ done: } #ifdef ALTQ - if (action == PF_PASS && r->qid) { + if (s && s->qid) { + pd.act.pqid = s->pqid; + pd.act.qid = s->qid; + } else if (r->qid) { + pd.act.pqid = r->pqid; + pd.act.qid = r->qid; + } + if (action == PF_PASS && pd.act.qid) { if (pd.pf_mtag == NULL && ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { action = PF_DROP; @@ -6545,26 +6982,88 @@ done: if (s != NULL) pd.pf_mtag->qid_hash = pf_state_hash(s); if (pd.tos & IPTOS_LOWDELAY) - pd.pf_mtag->qid = r->pqid; + pd.pf_mtag->qid = pd.act.pqid; else - pd.pf_mtag->qid = r->qid; + pd.pf_mtag->qid = pd.act.qid; /* Add hints for ecn. */ pd.pf_mtag->hdr = h; } } #endif /* ALTQ */ + if (pd.pf_mtag == NULL && + ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + } + if (s && (s->dnpipe || s->pdnpipe)) { + pd.act.dnpipe = s->dnpipe; + pd.act.pdnpipe = s->pdnpipe; + pd.act.flags = s->state_flags; + } else if (r->dnpipe || r->pdnpipe) { + pd.act.dnpipe = r->dnpipe; + pd.act.pdnpipe = r->pdnpipe; + pd.act.flags = r->free_flags; + } + if ((pd.act.dnpipe || pd.act.pdnpipe) && ip_dn_io_ptr == NULL) { + /* XXX: ipfw has the same behaviour! */ + action = PF_DROP; + REASON_SET(&reason, PFRES_MEMORY); + } else if (action == PF_PASS && + (pd.act.dnpipe || pd.act.pdnpipe) && !PACKET_LOOPED(&pd)) { + if (dir != r->direction && pd.act.pdnpipe) { + dnflow.rule.info = pd.act.pdnpipe; + } else if (dir == r->direction && pd.act.dnpipe) { + dnflow.rule.info = pd.act.dnpipe; + } else + goto continueprocessing6; + + if (pd.act.flags & PFRULE_DN_IS_PIPE) + dnflow.rule.info |= IPFW_IS_PIPE; + dnflow.f_id.addr_type = 6; /* IPv4 type */ + dnflow.f_id.proto = pd.proto; + dnflow.f_id.src_ip = 0; + dnflow.f_id.dst_ip = 0; + if (dir == PF_OUT && s != NULL && s->nat_rule.ptr != NULL && + s->nat_rule.ptr->action == PF_NAT) + dnflow.f_id.src_ip6 = s->key[(s->direction == PF_IN)]->addr[0].v6; + else + dnflow.f_id.src_ip6 = h->ip6_src; + dnflow.f_id.dst_ip6 = h->ip6_dst; + + if (s != NULL && s->nat_rule.ptr) + PACKET_UNDO_NAT(m, &pd, off, s, dir); + + ip_dn_io_ptr(m0, + ((dir == PF_IN) ? DIR_IN : DIR_OUT) | PROTO_IPV6, &dnflow); + if (*m0 == NULL) { + if (s) + PF_STATE_UNLOCK(s); + return (action); + } + /* This is dummynet fast io processing */ + dn_tag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL); + if (dn_tag != NULL) + m_tag_delete(*m0, dn_tag); + if (s != NULL && s->nat_rule.ptr) + PACKET_REDO_NAT(m, &pd, off, s, dir); + } +continueprocessing6: + if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)) - m->m_flags |= M_SKIP_FIREWALL; + m->m_flags |= M_FASTFWD_OURS; /* XXX: Anybody working on it?! */ if (r->divert.port) printf("pf: divert(9) is not supported for IPv6\n"); + if (PACKET_LOOPED(&pd)) + pd.pf_mtag->flags &= ~PF_PACKET_LOOPED; + if (log) { struct pf_rule *lr; diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h index ac0e0fb..16e60eb 100644 --- a/sys/netpfil/pf/pf.h +++ b/sys/netpfil/pf/pf.h @@ -45,7 +45,8 @@ enum { PF_INOUT, PF_IN, PF_OUT, PF_FWD }; enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT, - PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP, PF_DEFER }; + PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP, PF_DEFER, + PF_MATCH }; enum { PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT, PF_RULESET_BINAT, PF_RULESET_RDR, PF_RULESET_MAX }; enum { PF_OP_NONE, PF_OP_IRG, PF_OP_EQ, PF_OP_NE, PF_OP_LT, diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c index cf7f6f2..8ca8184 100644 --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -198,9 +198,11 @@ VNET_DEFINE(int, pf_vnet_active); #define V_pf_vnet_active VNET(pf_vnet_active) int pf_end_threads; +struct proc *pf_purge_proc; struct rwlock pf_rules_lock; struct sx pf_ioctl_lock; +struct sx pf_end_lock; /* pfsync */ pfsync_state_import_t *pfsync_state_import_ptr = NULL; @@ -1168,7 +1170,9 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td rule->states_cur = counter_u64_alloc(M_WAITOK); rule->states_tot = counter_u64_alloc(M_WAITOK); rule->src_nodes = counter_u64_alloc(M_WAITOK); +#ifdef PF_USER_INFO rule->cuid = td->td_ucred->cr_ruid; +#endif rule->cpid = td->td_proc ? td->td_proc->p_pid : 0; TAILQ_INIT(&rule->rpool.list); @@ -1194,7 +1198,6 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td V_ticket_pabuf)); ERROUT(EBUSY); } - tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr, pf_rulequeue); if (tail) @@ -1278,8 +1281,29 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td } rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list); +#ifndef PF_USER_INFO + if (rule->cuid) { + tail = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); + while ((tail != NULL) && (tail->cuid != rule->cuid)) + tail = TAILQ_NEXT(tail, entries); + if (tail != NULL) { + rule->evaluations = tail->evaluations; + rule->packets[0] = tail->packets[0]; + rule->packets[1] = tail->packets[1]; + rule->bytes[0] = tail->bytes[0]; + rule->bytes[1] = tail->bytes[1]; + } else { + rule->evaluations = rule->packets[0] = rule->packets[1] = + rule->bytes[0] = rule->bytes[1] = 0; + } + } else { + rule->evaluations = rule->packets[0] = rule->packets[1] = + rule->bytes[0] = rule->bytes[1] = 0; + } +#else rule->evaluations = rule->packets[0] = rule->packets[1] = rule->bytes[0] = rule->bytes[1] = 0; +#endif TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr, rule, entries); ruleset->rules[rs_num].inactive.rcount++; @@ -1429,7 +1453,9 @@ DIOCADDRULE_error: newrule->states_cur = counter_u64_alloc(M_WAITOK); newrule->states_tot = counter_u64_alloc(M_WAITOK); newrule->src_nodes = counter_u64_alloc(M_WAITOK); +#ifdef PF_USER_INFO newrule->cuid = td->td_ucred->cr_ruid; +#endif newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0; TAILQ_INIT(&newrule->rpool.list); } @@ -1717,6 +1743,30 @@ relock_DIOCKILLSTATES: break; } + case DIOCKILLSCHEDULE: { + struct pf_state *state; + struct pfioc_schedule_kill *psk = (struct pfioc_schedule_kill *)addr; + int killed = 0; + u_int i; + + for (i = 0; i <= pf_hashmask; i++) { + struct pf_idhash *ih = &V_pf_idhash[i]; + +relock_DIOCKILLSCHEDULE: + PF_HASHROW_LOCK(ih); + LIST_FOREACH(state, &ih->states, entry) { + if (!strcmp(psk->schedule, state->rule.ptr->schedule)) { + pf_unlink_state(state, PF_ENTER_LOCKED); + killed++; + goto relock_DIOCKILLSCHEDULE; + } + } + PF_HASHROW_UNLOCK(ih); + } + psk->numberkilled = killed; + break; + } + case DIOCADDSTATE: { struct pfioc_state *ps = (struct pfioc_state *)addr; struct pfsync_state *sp = &ps->state; @@ -3646,8 +3696,8 @@ hook_pf(void) pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); if (pfh_inet == NULL) return (ESRCH); /* XXX */ - pfil_add_hook(pf_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet); - pfil_add_hook(pf_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet); + pfil_add_named_hook(pf_check_in, NULL, "pf", PFIL_IN | PFIL_WAITOK, pfh_inet); + pfil_add_named_hook(pf_check_out, NULL, "pf", PFIL_OUT | PFIL_WAITOK, pfh_inet); #endif #ifdef INET6 pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); @@ -3660,8 +3710,10 @@ hook_pf(void) #endif return (ESRCH); /* XXX */ } - pfil_add_hook(pf_check6_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6); - pfil_add_hook(pf_check6_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6); + pfil_add_named_hook(pf_check6_in, NULL, "pf", PFIL_IN | PFIL_WAITOK, + pfh_inet6); + pfil_add_named_hook(pf_check6_out, NULL, "pf", PFIL_OUT | PFIL_WAITOK, + pfh_inet6); #endif V_pf_pfil_hooked = 1; @@ -3730,6 +3782,7 @@ pf_load(void) rw_init(&pf_rules_lock, "pf rulesets"); sx_init(&pf_ioctl_lock, "pf ioctl"); + sx_init(&pf_end_lock, "pf end thread"); pf_mtag_initialize(); @@ -3738,7 +3791,7 @@ pf_load(void) return (ENOMEM); pf_end_threads = 0; - error = kproc_create(pf_purge_thread, NULL, NULL, 0, 0, "pf purge"); + error = kproc_create(pf_purge_thread, NULL, &pf_purge_proc, 0, 0, "pf purge"); if (error != 0) return (error); @@ -3766,12 +3819,12 @@ pf_unload_vnet(void) return; } - pf_unload_vnet_purge(); - PF_RULES_WLOCK(); shutdown_pf(); PF_RULES_WUNLOCK(); + pf_unload_vnet_purge(); + pf_normalize_cleanup(); PF_RULES_WLOCK(); pfi_cleanup_vnet(); @@ -3788,11 +3841,13 @@ pf_unload(void) { int error = 0; + sx_xlock(&pf_end_lock); pf_end_threads = 1; while (pf_end_threads < 2) { wakeup_one(pf_purge_thread); - rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftmo", 0); + sx_sleep(pf_purge_proc, &pf_end_lock, 0, "pftmo", 0); } + sx_xunlock(&pf_end_lock); if (pf_dev != NULL) destroy_dev(pf_dev); @@ -3801,6 +3856,7 @@ pf_unload(void) rw_destroy(&pf_rules_lock); sx_destroy(&pf_ioctl_lock); + sx_destroy(&pf_end_lock); return (error); } diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c index f54e038..81ef54d 100644 --- a/sys/netpfil/pf/pf_norm.c +++ b/sys/netpfil/pf/pf_norm.c @@ -1815,7 +1815,7 @@ pf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos) u_int16_t ov, nv; ov = *(u_int16_t *)h; - h->ip_tos = tos; + h->ip_tos = tos | (h->ip_tos & IPTOS_ECN_MASK); nv = *(u_int16_t *)h; h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0); diff --git a/sys/netpfil/pf/pf_ruleset.c b/sys/netpfil/pf/pf_ruleset.c index 61da586..5bb3be6 100644 --- a/sys/netpfil/pf/pf_ruleset.c +++ b/sys/netpfil/pf/pf_ruleset.c @@ -121,6 +121,7 @@ pf_get_ruleset_number(u_int8_t action) return (PF_RULESET_SCRUB); break; case PF_PASS: + case PF_MATCH: case PF_DROP: return (PF_RULESET_FILTER); break; diff --git a/sys/opencrypto/crypto.c b/sys/opencrypto/crypto.c index 44fbdf8..2071437 100644 --- a/sys/opencrypto/crypto.c +++ b/sys/opencrypto/crypto.c @@ -69,7 +69,9 @@ __FBSDID("$FreeBSD$"); #include <sys/malloc.h> #include <sys/proc.h> #include <sys/sdt.h> +#include <sys/smp.h> #include <sys/sysctl.h> +#include <sys/taskqueue.h> #include <ddb/ddb.h> @@ -133,26 +135,51 @@ static int crypto_drivers_num = 0; * operations. */ static int crp_sleep = 0; -static TAILQ_HEAD(,cryptop) crp_q; /* request queues */ +static TAILQ_HEAD(cryptop_q ,cryptop) crp_q; /* request queues */ static TAILQ_HEAD(,cryptkop) crp_kq; static struct mtx crypto_q_mtx; #define CRYPTO_Q_LOCK() mtx_lock(&crypto_q_mtx) #define CRYPTO_Q_UNLOCK() mtx_unlock(&crypto_q_mtx) /* - * There are two queues for processing completed crypto requests; one - * for the symmetric and one for the asymmetric ops. We only need one - * but have two to avoid type futzing (cryptop vs. cryptkop). A single - * mutex is used to lock access to both queues. Note that this lock - * must be separate from the lock on request queues to insure driver - * callbacks don't generate lock order reversals. + * Taskqueue used to dispatch the crypto requests + * that have the CRYPTO_F_ASYNC flag */ -static TAILQ_HEAD(,cryptop) crp_ret_q; /* callback queues */ -static TAILQ_HEAD(,cryptkop) crp_ret_kq; -static struct mtx crypto_ret_q_mtx; -#define CRYPTO_RETQ_LOCK() mtx_lock(&crypto_ret_q_mtx) -#define CRYPTO_RETQ_UNLOCK() mtx_unlock(&crypto_ret_q_mtx) -#define CRYPTO_RETQ_EMPTY() (TAILQ_EMPTY(&crp_ret_q) && TAILQ_EMPTY(&crp_ret_kq)) +static struct taskqueue *crypto_tq; + +/* + * Crypto seq numbers are operated on with modular arithmetic + */ +#define CRYPTO_SEQ_GT(a,b) ((int)((a)-(b)) > 0) + +struct crypto_ret_worker { + struct mtx crypto_ret_mtx; + + TAILQ_HEAD(,cryptop) crp_ordered_ret_q; /* ordered callback queue for symetric jobs */ + TAILQ_HEAD(,cryptop) crp_ret_q; /* callback queue for symetric jobs */ + TAILQ_HEAD(,cryptkop) crp_ret_kq; /* callback queue for asym jobs */ + + u_int32_t reorder_ops; /* total ordered sym jobs received */ + u_int32_t reorder_cur_seq; /* current sym job dispatched */ + + struct proc *cryptoretproc; +}; +static struct crypto_ret_worker *crypto_ret_workers = NULL; + +#define CRYPTO_RETW(i) (&crypto_ret_workers[i]) +#define CRYPTO_RETW_ID(w) ((w) - crypto_ret_workers) +#define FOREACH_CRYPTO_RETW(w) \ + for (w = crypto_ret_workers; w < crypto_ret_workers + crypto_workers_num; ++w) + +#define CRYPTO_RETW_LOCK(w) mtx_lock(&w->crypto_ret_mtx) +#define CRYPTO_RETW_UNLOCK(w) mtx_unlock(&w->crypto_ret_mtx) +#define CRYPTO_RETW_EMPTY(w) \ + (TAILQ_EMPTY(&w->crp_ret_q) && TAILQ_EMPTY(&w->crp_ret_kq) && TAILQ_EMPTY(&w->crp_ordered_ret_q)) + +static int crypto_workers_num = 0; +SYSCTL_INT(_kern, OID_AUTO, crypto_workers_num, CTLFLAG_RDTUN, + &crypto_workers_num, 0, + "Number of crypto workers used to dispatch crypto jobs"); static uma_zone_t cryptop_zone; static uma_zone_t cryptodesc_zone; @@ -170,11 +197,12 @@ MALLOC_DEFINE(M_CRYPTO_DATA, "crypto", "crypto session records"); static void crypto_proc(void); static struct proc *cryptoproc; -static void crypto_ret_proc(void); -static struct proc *cryptoretproc; +static void crypto_ret_proc(struct crypto_ret_worker *ret_worker); static void crypto_destroy(void); static int crypto_invoke(struct cryptocap *cap, struct cryptop *crp, int hint); static int crypto_kinvoke(struct cryptkop *krp, int flags); +static void crypto_task_invoke(void *ctx, int pending); +static void crypto_batch_enqueue(struct cryptop *crp); static struct cryptostats cryptostats; SYSCTL_STRUCT(_kern, OID_AUTO, crypto_stats, CTLFLAG_RW, &cryptostats, @@ -189,6 +217,7 @@ SYSCTL_INT(_debug, OID_AUTO, crypto_timing, CTLFLAG_RW, static int crypto_init(void) { + struct crypto_ret_worker *ret_worker; int error; mtx_init(&crypto_drivers_mtx, "crypto", "crypto driver table", @@ -198,10 +227,6 @@ crypto_init(void) TAILQ_INIT(&crp_kq); mtx_init(&crypto_q_mtx, "crypto", "crypto op queues", MTX_DEF); - TAILQ_INIT(&crp_ret_q); - TAILQ_INIT(&crp_ret_kq); - mtx_init(&crypto_ret_q_mtx, "crypto", "crypto return queues", MTX_DEF); - cryptop_zone = uma_zcreate("cryptop", sizeof (struct cryptop), 0, 0, 0, 0, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); @@ -223,6 +248,20 @@ crypto_init(void) goto bad; } + if (crypto_workers_num < 1 || crypto_workers_num > mp_ncpus) + crypto_workers_num = mp_ncpus; + + crypto_tq = taskqueue_create("crypto", M_WAITOK|M_ZERO, + taskqueue_thread_enqueue, &crypto_tq); + if (crypto_tq == NULL) { + printf("crypto init: cannot setup crypto taskqueue\n"); + error = ENOMEM; + goto bad; + } + + taskqueue_start_threads(&crypto_tq, crypto_workers_num, PRI_MIN_KERN, + "crypto"); + error = kproc_create((void (*)(void *)) crypto_proc, NULL, &cryptoproc, 0, 0, "crypto"); if (error) { @@ -231,13 +270,33 @@ crypto_init(void) goto bad; } - error = kproc_create((void (*)(void *)) crypto_ret_proc, NULL, - &cryptoretproc, 0, 0, "crypto returns"); - if (error) { - printf("crypto_init: cannot start cryptoret thread; error %d", - error); + crypto_ret_workers = malloc(crypto_workers_num * sizeof(struct crypto_ret_worker), + M_CRYPTO_DATA, M_NOWAIT|M_ZERO); + if (crypto_ret_workers == NULL) { + error = ENOMEM; + printf("crypto_init: cannot allocate ret workers\n"); goto bad; } + + FOREACH_CRYPTO_RETW(ret_worker) { + TAILQ_INIT(&ret_worker->crp_ordered_ret_q); + TAILQ_INIT(&ret_worker->crp_ret_q); + TAILQ_INIT(&ret_worker->crp_ret_kq); + + ret_worker->reorder_ops = 0; + ret_worker->reorder_cur_seq = 0; + + mtx_init(&ret_worker->crypto_ret_mtx, "crypto", "crypto return queues", MTX_DEF); + + error = kproc_create((void (*)(void *)) crypto_ret_proc, ret_worker, + &ret_worker->cryptoretproc, 0, 0, "crypto returns %td", CRYPTO_RETW_ID(ret_worker)); + if (error) { + printf("crypto_init: cannot start cryptoret thread; error %d", + error); + goto bad; + } + } + return 0; bad: crypto_destroy(); @@ -272,12 +331,17 @@ crypto_terminate(struct proc **pp, void *q) static void crypto_destroy(void) { + struct crypto_ret_worker *ret_worker; + /* * Terminate any crypto threads. */ + if (crypto_tq != NULL) + taskqueue_drain_all(crypto_tq); CRYPTO_DRIVER_LOCK(); crypto_terminate(&cryptoproc, &crp_q); - crypto_terminate(&cryptoretproc, &crp_ret_q); + FOREACH_CRYPTO_RETW(ret_worker) + crypto_terminate(&ret_worker->cryptoretproc, &ret_worker->crp_ret_q); CRYPTO_DRIVER_UNLOCK(); /* XXX flush queues??? */ @@ -293,7 +357,11 @@ crypto_destroy(void) if (cryptop_zone != NULL) uma_zdestroy(cryptop_zone); mtx_destroy(&crypto_q_mtx); - mtx_destroy(&crypto_ret_q_mtx); + FOREACH_CRYPTO_RETW(ret_worker) + mtx_destroy(&ret_worker->crypto_ret_mtx); + free(crypto_ret_workers, M_CRYPTO_DATA); + if (crypto_tq != NULL) + taskqueue_free(crypto_tq); mtx_destroy(&crypto_drivers_mtx); } @@ -792,9 +860,26 @@ crypto_dispatch(struct cryptop *crp) binuptime(&crp->crp_tstamp); #endif - hid = CRYPTO_SESID2HID(crp->crp_sid); + if (CRYPTOP_ASYNC(crp)) { + if (crp->crp_flags & CRYPTO_F_ASYNC_KEEPORDER) { + struct crypto_ret_worker *ret_worker; + + crp->crp_retw_id = crp->crp_sid % crypto_workers_num; + ret_worker = CRYPTO_RETW(crp->crp_retw_id); + + CRYPTO_RETW_LOCK(ret_worker); + crp->crp_seq = ret_worker->reorder_ops++; + CRYPTO_RETW_UNLOCK(ret_worker); + } + + TASK_INIT(&crp->crp_task, 0, crypto_task_invoke, crp); + taskqueue_enqueue(crypto_tq, &crp->crp_task); + return (0); + } if ((crp->crp_flags & CRYPTO_F_BATCH) == 0) { + hid = CRYPTO_SESID2HID(crp->crp_sid); + /* * Caller marked the request to be processed * immediately; dispatch it directly to the @@ -813,12 +898,19 @@ crypto_dispatch(struct cryptop *crp) */ } } + crypto_batch_enqueue(crp); + return 0; +} + +void +crypto_batch_enqueue(struct cryptop *crp) +{ + CRYPTO_Q_LOCK(); TAILQ_INSERT_TAIL(&crp_q, crp, crp_next); if (crp_sleep) wakeup_one(&crp_q); CRYPTO_Q_UNLOCK(); - return 0; } /* @@ -999,6 +1091,23 @@ crypto_tstat(struct cryptotstat *ts, struct bintime *bt) } #endif +static void +crypto_task_invoke(void *ctx, int pending) +{ + struct cryptocap *cap; + struct cryptop *crp; + int hid, result; + + crp = (struct cryptop *)ctx; + + hid = CRYPTO_SESID2HID(crp->crp_sid); + cap = crypto_checkdriver(hid); + + result = crypto_invoke(cap, crp, 0); + if (result == ERESTART) + crypto_batch_enqueue(crp); +} + /* * Dispatch a crypto request to the appropriate crypto devices. */ @@ -1061,6 +1170,7 @@ crypto_freereq(struct cryptop *crp) #ifdef DIAGNOSTIC { struct cryptop *crp2; + struct crypto_ret_worker *ret_worker; CRYPTO_Q_LOCK(); TAILQ_FOREACH(crp2, &crp_q, crp_next) { @@ -1069,13 +1179,16 @@ crypto_freereq(struct cryptop *crp) crp)); } CRYPTO_Q_UNLOCK(); - CRYPTO_RETQ_LOCK(); - TAILQ_FOREACH(crp2, &crp_ret_q, crp_next) { - KASSERT(crp2 != crp, - ("Freeing cryptop from the return queue (%p).", - crp)); + + FOREACH_CRYPTO_RETW(ret_worker) { + CRYPTO_RETW_LOCK(ret_worker); + TAILQ_FOREACH(crp2, &ret_worker->crp_ret_q, crp_next) { + KASSERT(crp2 != crp, + ("Freeing cryptop from the return queue (%p).", + crp)); + } + CRYPTO_RETW_UNLOCK(ret_worker); } - CRYPTO_RETQ_UNLOCK(); } #endif @@ -1133,9 +1246,10 @@ crypto_done(struct cryptop *crp) * doing extraneous context switches; the latter is mostly * used with the software crypto driver. */ - if ((crp->crp_flags & CRYPTO_F_CBIMM) || + if (!CRYPTOP_ASYNC_KEEPORDER(crp) && + ((crp->crp_flags & CRYPTO_F_CBIMM) || ((crp->crp_flags & CRYPTO_F_CBIFSYNC) && - (CRYPTO_SESID2CAPS(crp->crp_sid) & CRYPTOCAP_F_SYNC))) { + (CRYPTO_SESID2CAPS(crp->crp_sid) & CRYPTOCAP_F_SYNC)))) { /* * Do the callback directly. This is ok when the * callback routine does very little (e.g. the @@ -1156,14 +1270,45 @@ crypto_done(struct cryptop *crp) #endif crp->crp_callback(crp); } else { + struct crypto_ret_worker *ret_worker; + bool wake; + + ret_worker = CRYPTO_RETW(crp->crp_retw_id); + wake = false; + /* * Normal case; queue the callback for the thread. */ - CRYPTO_RETQ_LOCK(); - if (CRYPTO_RETQ_EMPTY()) - wakeup_one(&crp_ret_q); /* shared wait channel */ - TAILQ_INSERT_TAIL(&crp_ret_q, crp, crp_next); - CRYPTO_RETQ_UNLOCK(); + CRYPTO_RETW_LOCK(ret_worker); + if (CRYPTOP_ASYNC_KEEPORDER(crp)) { + struct cryptop *tmp; + + TAILQ_FOREACH_REVERSE(tmp, &ret_worker->crp_ordered_ret_q, + cryptop_q, crp_next) { + if (CRYPTO_SEQ_GT(crp->crp_seq, tmp->crp_seq)) { + TAILQ_INSERT_AFTER(&ret_worker->crp_ordered_ret_q, + tmp, crp, crp_next); + break; + } + } + if (tmp == NULL) { + TAILQ_INSERT_HEAD(&ret_worker->crp_ordered_ret_q, + crp, crp_next); + } + + if (crp->crp_seq == ret_worker->reorder_cur_seq) + wake = true; + } + else { + if (CRYPTO_RETW_EMPTY(ret_worker)) + wake = true; + + TAILQ_INSERT_TAIL(&ret_worker->crp_ret_q, crp, crp_next); + } + + if (wake) + wakeup_one(&ret_worker->crp_ret_q); /* shared wait channel */ + CRYPTO_RETW_UNLOCK(ret_worker); } } @@ -1173,6 +1318,7 @@ crypto_done(struct cryptop *crp) void crypto_kdone(struct cryptkop *krp) { + struct crypto_ret_worker *ret_worker; struct cryptocap *cap; if (krp->krp_status != 0) @@ -1187,11 +1333,14 @@ crypto_kdone(struct cryptkop *krp) crypto_remove(cap); } CRYPTO_DRIVER_UNLOCK(); - CRYPTO_RETQ_LOCK(); - if (CRYPTO_RETQ_EMPTY()) - wakeup_one(&crp_ret_q); /* shared wait channel */ - TAILQ_INSERT_TAIL(&crp_ret_kq, krp, krp_next); - CRYPTO_RETQ_UNLOCK(); + + ret_worker = CRYPTO_RETW(0); + + CRYPTO_RETW_LOCK(ret_worker); + if (CRYPTO_RETW_EMPTY(ret_worker)) + wakeup_one(&ret_worker->crp_ret_q); /* shared wait channel */ + TAILQ_INSERT_TAIL(&ret_worker->crp_ret_kq, krp, krp_next); + CRYPTO_RETW_UNLOCK(ret_worker); } int @@ -1391,24 +1540,36 @@ crypto_proc(void) * callbacks typically are expensive and would slow interrupt handling. */ static void -crypto_ret_proc(void) +crypto_ret_proc(struct crypto_ret_worker *ret_worker) { struct cryptop *crpt; struct cryptkop *krpt; - CRYPTO_RETQ_LOCK(); + CRYPTO_RETW_LOCK(ret_worker); for (;;) { /* Harvest return q's for completed ops */ - crpt = TAILQ_FIRST(&crp_ret_q); - if (crpt != NULL) - TAILQ_REMOVE(&crp_ret_q, crpt, crp_next); + crpt = TAILQ_FIRST(&ret_worker->crp_ordered_ret_q); + if (crpt != NULL) { + if (crpt->crp_seq == ret_worker->reorder_cur_seq) { + TAILQ_REMOVE(&ret_worker->crp_ordered_ret_q, crpt, crp_next); + ret_worker->reorder_cur_seq++; + } else { + crpt = NULL; + } + } - krpt = TAILQ_FIRST(&crp_ret_kq); + if (crpt == NULL) { + crpt = TAILQ_FIRST(&ret_worker->crp_ret_q); + if (crpt != NULL) + TAILQ_REMOVE(&ret_worker->crp_ret_q, crpt, crp_next); + } + + krpt = TAILQ_FIRST(&ret_worker->crp_ret_kq); if (krpt != NULL) - TAILQ_REMOVE(&crp_ret_kq, krpt, krp_next); + TAILQ_REMOVE(&ret_worker->crp_ret_kq, krpt, krp_next); if (crpt != NULL || krpt != NULL) { - CRYPTO_RETQ_UNLOCK(); + CRYPTO_RETW_UNLOCK(ret_worker); /* * Run callbacks unlocked. */ @@ -1430,22 +1591,22 @@ crypto_ret_proc(void) } if (krpt != NULL) krpt->krp_callback(krpt); - CRYPTO_RETQ_LOCK(); + CRYPTO_RETW_LOCK(ret_worker); } else { /* * Nothing more to be processed. Sleep until we're * woken because there are more returns to process. */ - msleep(&crp_ret_q, &crypto_ret_q_mtx, PWAIT, + msleep(&ret_worker->crp_ret_q, &ret_worker->crypto_ret_mtx, PWAIT, "crypto_ret_wait", 0); - if (cryptoretproc == NULL) + if (ret_worker->cryptoretproc == NULL) break; cryptostats.cs_rets++; } } - CRYPTO_RETQ_UNLOCK(); + CRYPTO_RETW_UNLOCK(ret_worker); - crypto_finis(&crp_ret_q); + crypto_finis(&ret_worker->crp_ret_q); } #ifdef DDB @@ -1480,6 +1641,7 @@ db_show_drivers(void) DB_SHOW_COMMAND(crypto, db_show_crypto) { struct cryptop *crp; + struct crypto_ret_worker *ret_worker; db_show_drivers(); db_printf("\n"); @@ -1498,16 +1660,19 @@ DB_SHOW_COMMAND(crypto, db_show_crypto) , crp->crp_callback ); } - if (!TAILQ_EMPTY(&crp_ret_q)) { - db_printf("\n%4s %4s %4s %8s\n", - "HID", "Etype", "Flags", "Callback"); - TAILQ_FOREACH(crp, &crp_ret_q, crp_next) { - db_printf("%4u %4u %04x %8p\n" - , (int) CRYPTO_SESID2HID(crp->crp_sid) - , crp->crp_etype - , crp->crp_flags - , crp->crp_callback - ); + FOREACH_CRYPTO_RETW(ret_worker) { + db_printf("\n%8s %4s %4s %4s %8s\n", + "ret_worker", "HID", "Etype", "Flags", "Callback"); + if (!TAILQ_EMPTY(&ret_worker->crp_ret_q)) { + TAILQ_FOREACH(crp, &ret_worker->crp_ret_q, crp_next) { + db_printf("%8td %4u %4u %04x %8p\n" + , CRYPTO_RETW_ID(ret_worker) + , (int) CRYPTO_SESID2HID(crp->crp_sid) + , crp->crp_etype + , crp->crp_flags + , crp->crp_callback + ); + } } } } @@ -1515,6 +1680,7 @@ DB_SHOW_COMMAND(crypto, db_show_crypto) DB_SHOW_COMMAND(kcrypto, db_show_kcrypto) { struct cryptkop *krp; + struct crypto_ret_worker *ret_worker; db_show_drivers(); db_printf("\n"); @@ -1530,10 +1696,12 @@ DB_SHOW_COMMAND(kcrypto, db_show_kcrypto) , krp->krp_callback ); } - if (!TAILQ_EMPTY(&crp_ret_q)) { + + ret_worker = CRYPTO_RETW(0); + if (!TAILQ_EMPTY(&ret_worker->crp_ret_q)) { db_printf("%4s %5s %8s %4s %8s\n", "Op", "Status", "CRID", "HID", "Callback"); - TAILQ_FOREACH(krp, &crp_ret_kq, krp_next) { + TAILQ_FOREACH(krp, &ret_worker->crp_ret_kq, krp_next) { db_printf("%4u %5u %08x %4u %8p\n" , krp->krp_op , krp->krp_status diff --git a/sys/opencrypto/cryptodev.h b/sys/opencrypto/cryptodev.h index d14fb3a..91bec56 100644 --- a/sys/opencrypto/cryptodev.h +++ b/sys/opencrypto/cryptodev.h @@ -63,6 +63,7 @@ #define _CRYPTO_CRYPTO_H_ #include <sys/ioccom.h> +#include <sys/_task.h> /* Some initial values */ #define CRYPTO_DRIVERS_INITIAL 4 @@ -395,6 +396,8 @@ struct cryptodesc { struct cryptop { TAILQ_ENTRY(cryptop) crp_next; + struct task crp_task; + u_int64_t crp_sid; /* Session ID */ int crp_ilen; /* Input data total length */ int crp_olen; /* Result total length */ @@ -417,6 +420,14 @@ struct cryptop { #define CRYPTO_F_CBIMM 0x0010 /* Do callback immediately */ #define CRYPTO_F_DONE 0x0020 /* Operation completed */ #define CRYPTO_F_CBIFSYNC 0x0040 /* Do CBIMM if op is synchronous */ +#define CRYPTO_F_ASYNC 0x0080 /* Dispatch crypto jobs on several threads + * if op is synchronous + */ +#define CRYPTO_F_ASYNC_KEEPORDER 0x0100 /* + * Dispatch the crypto jobs in the same + * order there are submitted. Applied only + * if CRYPTO_F_ASYNC flags is set + */ caddr_t crp_buf; /* Data to be processed */ caddr_t crp_opaque; /* Opaque pointer, passed along */ @@ -425,8 +436,20 @@ struct cryptop { int (*crp_callback)(struct cryptop *); /* Callback function */ struct bintime crp_tstamp; /* performance time stamp */ + uint32_t crp_seq; /* used for ordered dispatch */ + uint32_t crp_retw_id; /* + * the return worker to be used, + * used for ordered dispatch + */ }; +#define CRYPTOP_ASYNC(crp) \ + (((crp)->crp_flags & CRYPTO_F_ASYNC) && \ + CRYPTO_SESID2CAPS((crp)->crp_sid) & CRYPTOCAP_F_SYNC) +#define CRYPTOP_ASYNC_KEEPORDER(crp) \ + (CRYPTOP_ASYNC(crp) && \ + (crp)->crp_flags & CRYPTO_F_ASYNC_KEEPORDER) + #define CRYPTO_BUF_CONTIG 0x0 #define CRYPTO_BUF_IOV 0x1 #define CRYPTO_BUF_MBUF 0x2 diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c index 71ce72a..aa96807 100644 --- a/sys/powerpc/aim/mmu_oea64.c +++ b/sys/powerpc/aim/mmu_oea64.c @@ -1513,7 +1513,7 @@ moea64_uma_page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, struct pvo_entry *pvo; vm_offset_t va; vm_page_t m; - int pflags, needed_lock; + int needed_lock; /* * This entire routine is a horrible hack to avoid bothering kmem @@ -1524,17 +1524,11 @@ moea64_uma_page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, *flags = UMA_SLAB_PRIV; needed_lock = !PMAP_LOCKED(kernel_pmap); - pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED; - - for (;;) { - m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ); - if (m == NULL) { - if (wait & M_NOWAIT) - return (NULL); - VM_WAIT; - } else - break; - } + + m = vm_page_alloc(NULL, 0, + malloc2vm_flags(wait) | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ); + if (m == NULL) + return (NULL); va = VM_PAGE_TO_PHYS(m); diff --git a/sys/powerpc/aim/slb.c b/sys/powerpc/aim/slb.c index 72a595b..caf6f4f 100644 --- a/sys/powerpc/aim/slb.c +++ b/sys/powerpc/aim/slb.c @@ -483,24 +483,16 @@ slb_uma_real_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) static vm_offset_t realmax = 0; void *va; vm_page_t m; - int pflags; if (realmax == 0) realmax = platform_real_maxaddr(); *flags = UMA_SLAB_PRIV; - pflags = malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; - - for (;;) { - m = vm_page_alloc_contig(NULL, 0, pflags, 1, 0, realmax, - PAGE_SIZE, PAGE_SIZE, VM_MEMATTR_DEFAULT); - if (m == NULL) { - if (wait & M_NOWAIT) - return (NULL); - VM_WAIT; - } else - break; - } + m = vm_page_alloc_contig(NULL, 0, + malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED, + 1, 0, realmax, PAGE_SIZE, PAGE_SIZE, VM_MEMATTR_DEFAULT); + if (m == NULL) + return (NULL); va = (void *) VM_PAGE_TO_PHYS(m); diff --git a/sys/powerpc/conf/GENERIC b/sys/powerpc/conf/GENERIC index b0703f2..787aaa5 100644 --- a/sys/powerpc/conf/GENERIC +++ b/sys/powerpc/conf/GENERIC @@ -38,6 +38,7 @@ options PREEMPTION #Enable kernel thread preemption options INET #InterNETworking options INET6 #IPv6 communications protocols options IPSEC # IP (v4/v6) security +options IPSEC_SUPPORT # Allow kldload of ipsec and tcpmd5 options SCTP #Stream Control Transmission Protocol options FFS #Berkeley Fast Filesystem options SOFTUPDATES #Enable FFS soft updates support diff --git a/sys/powerpc/include/proc.h b/sys/powerpc/include/proc.h index 4981581..d11cf88 100644 --- a/sys/powerpc/include/proc.h +++ b/sys/powerpc/include/proc.h @@ -53,6 +53,13 @@ struct mdproc { #define KINFO_PROC_SIZE 768 #endif +struct syscall_args { + u_int code; + struct sysent *callp; + register_t args[10]; + int narg; +}; + #ifdef _KERNEL #include <machine/pcb.h> @@ -65,13 +72,6 @@ struct mdproc { td->td_kstack_pages * PAGE_SIZE - \ (char *)&td; \ } while (0) - -struct syscall_args { - u_int code; - struct sysent *callp; - register_t args[10]; - int narg; -}; #endif #endif /* !_MACHINE_PROC_H_ */ diff --git a/sys/powerpc/powerpc/trap.c b/sys/powerpc/powerpc/trap.c index 4464dd8..95fb554 100644 --- a/sys/powerpc/powerpc/trap.c +++ b/sys/powerpc/powerpc/trap.c @@ -484,16 +484,18 @@ handle_onfault(struct trapframe *frame) } int -cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +cpu_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; + struct syscall_args *sa; caddr_t params; size_t argsz; int error, n, i; p = td->td_proc; frame = td->td_frame; + sa = &td->td_sa; sa->code = frame->fixreg[0]; params = (caddr_t)(frame->fixreg + FIRSTARG); @@ -575,7 +577,6 @@ void syscall(struct trapframe *frame) { struct thread *td; - struct syscall_args sa; int error; td = curthread; @@ -590,8 +591,8 @@ syscall(struct trapframe *frame) "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE)); #endif - error = syscallenter(td, &sa); - syscallret(td, error, &sa); + error = syscallenter(td); + syscallret(td, error); } #ifdef __powerpc64__ diff --git a/sys/powerpc/powerpc/uma_machdep.c b/sys/powerpc/powerpc/uma_machdep.c index d5a458f..e9be5ca 100644 --- a/sys/powerpc/powerpc/uma_machdep.c +++ b/sys/powerpc/powerpc/uma_machdep.c @@ -55,20 +55,13 @@ uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) void *va; vm_paddr_t pa; vm_page_t m; - int pflags; *flags = UMA_SLAB_PRIV; - pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED; - for (;;) { - m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ); - if (m == NULL) { - if (wait & M_NOWAIT) - return (NULL); - VM_WAIT; - } else - break; - } + m = vm_page_alloc(NULL, 0, + malloc2vm_flags(wait) | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ); + if (m == NULL) + return (NULL); pa = VM_PAGE_TO_PHYS(m); diff --git a/sys/riscv/conf/GENERIC b/sys/riscv/conf/GENERIC index 13f595a4..fd4951f 100644 --- a/sys/riscv/conf/GENERIC +++ b/sys/riscv/conf/GENERIC @@ -32,6 +32,7 @@ options PREEMPTION # Enable kernel thread preemption options INET # InterNETworking options INET6 # IPv6 communications protocols options IPSEC # IP (v4/v6) security +options IPSEC_SUPPORT # Allow kldload of ipsec and tcpmd5 options TCP_OFFLOAD # TCP offload options SCTP # Stream Control Transmission Protocol options FFS # Berkeley Fast Filesystem diff --git a/sys/riscv/include/proc.h b/sys/riscv/include/proc.h index 6732681..644bb91 100644 --- a/sys/riscv/include/proc.h +++ b/sys/riscv/include/proc.h @@ -45,8 +45,6 @@ struct mdproc { #define KINFO_PROC_SIZE 1088 -#ifdef _KERNEL - #define MAXARGS 8 struct syscall_args { u_int code; @@ -55,6 +53,4 @@ struct syscall_args { int narg; }; -#endif - #endif /* !_MACHINE_PROC_H_ */ diff --git a/sys/riscv/riscv/trap.c b/sys/riscv/riscv/trap.c index 22d27f4..7e85c16 100644 --- a/sys/riscv/riscv/trap.c +++ b/sys/riscv/riscv/trap.c @@ -89,14 +89,16 @@ call_trapsignal(struct thread *td, int sig, int code, void *addr) } int -cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +cpu_fetch_syscall_args(struct thread *td) { struct proc *p; register_t *ap; + struct syscall_args *sa; int nap; nap = 8; p = td->td_proc; + sa = &td->td_sa; ap = &td->td_frame->tf_a[0]; sa->code = td->td_frame->tf_t[0]; @@ -151,15 +153,14 @@ dump_regs(struct trapframe *frame) static void svc_handler(struct trapframe *frame) { - struct syscall_args sa; struct thread *td; int error; td = curthread; td->td_frame = frame; - error = syscallenter(td, &sa); - syscallret(td, error, &sa); + error = syscallenter(td); + syscallret(td, error); } static void diff --git a/sys/sparc64/conf/GENERIC b/sys/sparc64/conf/GENERIC index e593e18..96a1edb 100644 --- a/sys/sparc64/conf/GENERIC +++ b/sys/sparc64/conf/GENERIC @@ -31,6 +31,7 @@ options PREEMPTION # Enable kernel thread preemption options INET # InterNETworking options INET6 # IPv6 communications protocols options IPSEC # IP (v4/v6) security +options IPSEC_SUPPORT # Allow kldload of ipsec and tcpmd5 options SCTP # Stream Control Transmission Protocol options FFS # Berkeley Fast Filesystem options SOFTUPDATES # Enable FFS soft updates support diff --git a/sys/sparc64/include/proc.h b/sys/sparc64/include/proc.h index 4b5e1c9..8d1dec3 100644 --- a/sys/sparc64/include/proc.h +++ b/sys/sparc64/include/proc.h @@ -53,6 +53,13 @@ struct mdproc { #define KINFO_PROC_SIZE 1088 +struct syscall_args { + u_int code; + struct sysent *callp; + register_t args[8]; + int narg; +}; + #ifdef _KERNEL #include <machine/pcb.h> @@ -66,13 +73,6 @@ struct mdproc { (char *)&td; \ } while (0) -struct syscall_args { - u_int code; - struct sysent *callp; - register_t args[8]; - int narg; -}; - #endif #endif /* !_MACHINE_PROC_H_ */ diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c index 0f06fd1..cf5c9d8 100644 --- a/sys/sparc64/sparc64/pmap.c +++ b/sys/sparc64/sparc64/pmap.c @@ -1230,7 +1230,6 @@ int pmap_pinit(pmap_t pm) { vm_page_t ma[TSB_PAGES]; - vm_page_t m; int i; /* @@ -1253,14 +1252,11 @@ pmap_pinit(pmap_t pm) CPU_ZERO(&pm->pm_active); VM_OBJECT_WLOCK(pm->pm_tsb_obj); - for (i = 0; i < TSB_PAGES; i++) { - m = vm_page_grab(pm->pm_tsb_obj, i, VM_ALLOC_NOBUSY | - VM_ALLOC_WIRED | VM_ALLOC_ZERO); - m->valid = VM_PAGE_BITS_ALL; - m->md.pmap = pm; - ma[i] = m; - } + (void)vm_page_grab_pages(pm->pm_tsb_obj, 0, VM_ALLOC_NORMAL | + VM_ALLOC_NOBUSY | VM_ALLOC_WIRED | VM_ALLOC_ZERO, ma, TSB_PAGES); VM_OBJECT_WUNLOCK(pm->pm_tsb_obj); + for (i = 0; i < TSB_PAGES; i++) + ma[i]->md.pmap = pm; pmap_qenter((vm_offset_t)pm->pm_tsb, ma, TSB_PAGES); bzero(&pm->pm_stats, sizeof(pm->pm_stats)); diff --git a/sys/sparc64/sparc64/trap.c b/sys/sparc64/sparc64/trap.c index 73719f2..e77ccf7 100644 --- a/sys/sparc64/sparc64/trap.c +++ b/sys/sparc64/sparc64/trap.c @@ -538,17 +538,19 @@ trap_pfault(struct thread *td, struct trapframe *tf) #define REG_MAXARGS 6 int -cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +cpu_fetch_syscall_args(struct thread *td) { struct trapframe *tf; struct proc *p; register_t *argp; + struct syscall_args *sa; int reg; int regcnt; int error; p = td->td_proc; tf = td->td_frame; + sa = &td->td_sa; reg = 0; regcnt = REG_MAXARGS; @@ -596,7 +598,6 @@ void syscall(struct trapframe *tf) { struct thread *td; - struct syscall_args sa; int error; td = curthread; @@ -612,6 +613,6 @@ syscall(struct trapframe *tf) td->td_pcb->pcb_tpc = tf->tf_tpc; TF_DONE(tf); - error = syscallenter(td, &sa); - syscallret(td, error, &sa); + error = syscallenter(td); + syscallret(td, error); } diff --git a/sys/sparc64/sparc64/vm_machdep.c b/sys/sparc64/sparc64/vm_machdep.c index a8b244c..10d1d36 100644 --- a/sys/sparc64/sparc64/vm_machdep.c +++ b/sys/sparc64/sparc64/vm_machdep.c @@ -394,24 +394,16 @@ uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) { vm_paddr_t pa; vm_page_t m; - int pflags; void *va; PMAP_STATS_INC(uma_nsmall_alloc); *flags = UMA_SLAB_PRIV; - pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED; - - for (;;) { - m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ); - if (m == NULL) { - if (wait & M_NOWAIT) - return (NULL); - else - VM_WAIT; - } else - break; - } + + m = vm_page_alloc(NULL, 0, + malloc2vm_flags(wait) | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ); + if (m == NULL) + return (NULL); pa = VM_PAGE_TO_PHYS(m); if (dcache_color_ignore == 0 && m->md.color != DCACHE_COLOR(pa)) { diff --git a/sys/sys/_pctrie.h b/sys/sys/_pctrie.h index 45f69b2..c6d13ba 100644 --- a/sys/sys/_pctrie.h +++ b/sys/sys/_pctrie.h @@ -38,14 +38,4 @@ struct pctrie { uintptr_t pt_root; }; -#ifdef _KERNEL - -static __inline boolean_t -pctrie_is_empty(struct pctrie *ptree) -{ - - return (ptree->pt_root == 0); -} - -#endif /* _KERNEL */ #endif /* !__SYS_PCTRIE_H_ */ diff --git a/sys/sys/gpio.h b/sys/sys/gpio.h index 9b0a1b5..4b63baa 100644 --- a/sys/sys/gpio.h +++ b/sys/sys/gpio.h @@ -56,6 +56,11 @@ #define GPIO_PIN_LOW 0x00 /* low level (logical 0) */ #define GPIO_PIN_HIGH 0x01 /* high level (logical 1) */ +/* GPIO PWM settings */ +#define GPIO_PWM_DUTY 0x01 /* PWM duty cycle */ +#define GPIO_PWM_FREQ 0x02 /* PWM frequency */ +#define GPIO_PWM_PERIOD 0x04 /* PWM period */ + /* Max name length of a pin */ #define GPIOMAXNAME 64 @@ -70,6 +75,8 @@ #define GPIO_PIN_INVIN 0x00000080 /* invert input */ #define GPIO_PIN_INVOUT 0x00000100 /* invert output */ #define GPIO_PIN_PULSATE 0x00000200 /* pulsate in hardware */ +#define GPIO_PIN_PWM 0x00000400 /* pwm output */ +#define GPIO_PIN_MUX 0x00000800 /* pin mux */ /* GPIO interrupt capabilities */ #define GPIO_INTR_NONE 0x00000000 /* no interrupt support */ #define GPIO_INTR_LEVEL_LOW 0x00010000 /* level trigger, low */ @@ -86,6 +93,7 @@ struct gpio_pin { char gp_name[GPIOMAXNAME]; /* human-readable name */ uint32_t gp_caps; /* capabilities */ uint32_t gp_flags; /* current flags */ + uint32_t gp_pwm_caps; /* pwm capabilities */ }; /* GPIO pin request (read/write/toggle) */ @@ -94,6 +102,15 @@ struct gpio_req { uint32_t gp_value; /* value */ }; +/* GPIO pwm request (read/write) */ +struct gpio_pwm_req { + int32_t gp_pwm; /* pwm number */ + uint32_t gp_pwm_pin; /* pin number */ + uint32_t gp_pwm_reg; /* register */ + uint32_t gp_pwm_value; /* value */ + uint32_t gp_pwm_caps; /* pwm capabilities */ +}; + /* * ioctls */ @@ -104,5 +121,9 @@ struct gpio_req { #define GPIOSET _IOW('G', 4, struct gpio_req) #define GPIOTOGGLE _IOWR('G', 5, struct gpio_req) #define GPIOSETNAME _IOW('G', 6, struct gpio_pin) +#define GPIOMAXPWM _IOR('G', 7, int) +#define GPIOPWMGETCONFIG _IOWR('G', 8, struct gpio_pwm_req) +#define GPIOPWMGET _IOWR('G', 9, struct gpio_pwm_req) +#define GPIOPWMSET _IOW('G', 10, struct gpio_pwm_req) #endif /* __GPIO_H__ */ diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h index 7124349..c9478fd 100644 --- a/sys/sys/kernel.h +++ b/sys/sys/kernel.h @@ -400,13 +400,16 @@ struct tunable_str { #define TUNABLE_STR_FETCH(path, var, size) \ getenv_string((path), (var), (size)) +typedef void (*ich_func_t)(void *_arg); + struct intr_config_hook { TAILQ_ENTRY(intr_config_hook) ich_links; - void (*ich_func)(void *arg); - void *ich_arg; + ich_func_t ich_func; + void *ich_arg; }; int config_intrhook_establish(struct intr_config_hook *hook); void config_intrhook_disestablish(struct intr_config_hook *hook); +void config_intrhook_oneshot(ich_func_t _func, void *_arg); #endif /* !_SYS_KERNEL_H_*/ diff --git a/sys/sys/param.h b/sys/sys/param.h index 16d3d6f..ad25a3c 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -85,6 +85,9 @@ #define P_OSREL_MAP_GUARD 1200035 #define P_OSREL_MAP_GUARD_11 1101501 #define P_OSREL_MAP_GUARD_11_1 1101001 +#define P_OSREL_WRFSBASE 1200041 +#define P_OSREL_WRFSBASE_11 1101503 +#define P_OSREL_WRFSBASE_11_1 1101001 #define P_OSREL_MAJOR(x) ((x) / 100000) #endif diff --git a/sys/sys/pctrie.h b/sys/sys/pctrie.h index f736877..1fd0b34 100644 --- a/sys/sys/pctrie.h +++ b/sys/sys/pctrie.h @@ -76,7 +76,7 @@ name##_PCTRIE_LOOKUP(struct pctrie *ptree, uint64_t key) \ return name##_PCTRIE_VAL2PTR(pctrie_lookup(ptree, key)); \ } \ \ -static __inline struct type * \ +static __inline __unused struct type * \ name##_PCTRIE_LOOKUP_LE(struct pctrie *ptree, uint64_t key) \ { \ \ @@ -119,5 +119,32 @@ void pctrie_remove(struct pctrie *ptree, uint64_t key, size_t pctrie_node_size(void); int pctrie_zone_init(void *mem, int size, int flags); +static __inline void +pctrie_init(struct pctrie *ptree) +{ + + ptree->pt_root = 0; +} + +static __inline boolean_t +pctrie_is_empty(struct pctrie *ptree) +{ + + return (ptree->pt_root == 0); +} + +/* + * These widths should allow the pointers to a node's children to fit within + * a single cache line. The extra levels from a narrow width should not be + * a problem thanks to path compression. + */ +#ifdef __LP64__ +#define PCTRIE_WIDTH 4 +#else +#define PCTRIE_WIDTH 3 +#endif + +#define PCTRIE_COUNT (1 << PCTRIE_WIDTH) + #endif /* _KERNEL */ #endif /* !_SYS_PCTRIE_H_ */ diff --git a/sys/sys/proc.h b/sys/sys/proc.h index d32807a..11947db6 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -142,6 +142,7 @@ struct pargs { * j - locked by proc slock * k - only accessed by curthread * k*- only accessed by curthread and from an interrupt + * kx- only accessed by curthread and by debugger * l - the attaching proc or attaching proc parent * m - Giant * n - not locked, lazy @@ -295,8 +296,8 @@ struct thread { u_char td_pri_class; /* (t) Scheduling class. */ u_char td_user_pri; /* (t) User pri from estcpu and nice. */ u_char td_base_user_pri; /* (t) Base user pri */ - u_int td_dbg_sc_code; /* (c) Syscall code to debugger. */ - u_int td_dbg_sc_narg; /* (c) Syscall arg count to debugger.*/ + u_int td_padding3; + u_int td_padding4; uintptr_t td_rb_list; /* (k) Robust list head. */ uintptr_t td_rbp_list; /* (k) Robust priv list head. */ uintptr_t td_rb_inact; /* (k) Current in-action mutex loc. */ @@ -343,6 +344,8 @@ struct thread { sbintime_t td_sleeptimo; /* (t) Sleep timeout. */ sigqueue_t td_sigqueue; /* (c) Sigs arrived, not delivered. */ #define td_siglist td_sigqueue.sq_signals + struct syscall_args td_sa; /* (kx) Syscall parameters. Copied on + fork for child tracing. */ }; struct thread0_storage { @@ -1051,7 +1054,7 @@ void userret(struct thread *, struct trapframe *); void cpu_exit(struct thread *); void exit1(struct thread *, int, int) __dead2; void cpu_copy_thread(struct thread *td, struct thread *td0); -int cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa); +int cpu_fetch_syscall_args(struct thread *td); void cpu_fork(struct thread *, struct proc *, struct thread *, int); void cpu_fork_kthread_handler(struct thread *, void (*)(void *), void *); void cpu_set_syscall_retval(struct thread *, int); diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h index 8313fc0..f1f4590 100644 --- a/sys/sys/sysent.h +++ b/sys/sys/sysent.h @@ -119,8 +119,7 @@ struct sysentvec { u_long *sv_maxssiz; u_int sv_flags; void (*sv_set_syscall_retval)(struct thread *, int); - int (*sv_fetch_syscall_args)(struct thread *, struct - syscall_args *); + int (*sv_fetch_syscall_args)(struct thread *); const char **sv_syscallnames; vm_offset_t sv_timekeep_base; vm_offset_t sv_shared_page_base; diff --git a/sys/vm/_vm_radix.h b/sys/vm/_vm_radix.h index f066462..f061a5f 100644 --- a/sys/vm/_vm_radix.h +++ b/sys/vm/_vm_radix.h @@ -38,14 +38,4 @@ struct vm_radix { uintptr_t rt_root; }; -#ifdef _KERNEL - -static __inline boolean_t -vm_radix_is_empty(struct vm_radix *rtree) -{ - - return (rtree->rt_root == 0); -} - -#endif /* _KERNEL */ #endif /* !__VM_RADIX_H_ */ diff --git a/sys/vm/phys_pager.c b/sys/vm/phys_pager.c index 7adc7c6..0977cfd 100644 --- a/sys/vm/phys_pager.c +++ b/sys/vm/phys_pager.c @@ -209,13 +209,10 @@ retry: if (m == NULL) { ahead = MIN(end - i, PHYSALLOC); m = vm_page_alloc(object, i, VM_ALLOC_NORMAL | - VM_ALLOC_ZERO | VM_ALLOC_COUNT(ahead)); - if (m == NULL) { - VM_OBJECT_WUNLOCK(object); - VM_WAIT; - VM_OBJECT_WLOCK(object); + VM_ALLOC_ZERO | VM_ALLOC_WAITFAIL | + VM_ALLOC_COUNT(ahead)); + if (m == NULL) goto retry; - } if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); m->valid = VM_PAGE_BITS_ALL; diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 86c246c..0dfc98d 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -86,6 +86,7 @@ __FBSDID("$FreeBSD$"); #include <sys/namei.h> #include <sys/vnode.h> #include <sys/malloc.h> +#include <sys/pctrie.h> #include <sys/racct.h> #include <sys/resource.h> #include <sys/resourcevar.h> @@ -119,30 +120,24 @@ __FBSDID("$FreeBSD$"); * The 64-page limit is due to the radix code (kern/subr_blist.c). */ #ifndef MAX_PAGEOUT_CLUSTER -#define MAX_PAGEOUT_CLUSTER 16 +#define MAX_PAGEOUT_CLUSTER 32 #endif #if !defined(SWB_NPAGES) #define SWB_NPAGES MAX_PAGEOUT_CLUSTER #endif +#define SWAP_META_PAGES PCTRIE_COUNT + /* - * The swblock structure maps an object and a small, fixed-size range - * of page indices to disk addresses within a swap area. - * The collection of these mappings is implemented as a hash table. - * Unused disk addresses within a swap area are allocated and managed - * using a blist. + * A swblk structure maps each page index within a + * SWAP_META_PAGES-aligned and sized range to the address of an + * on-disk swap block (or SWAPBLK_NONE). The collection of these + * mappings for an entire vm object is implemented as a pc-trie. */ -#define SWCORRECT(n) (sizeof(void *) * (n) / sizeof(daddr_t)) -#define SWAP_META_PAGES (SWB_NPAGES * 2) -#define SWAP_META_MASK (SWAP_META_PAGES - 1) - -struct swblock { - struct swblock *swb_hnext; - vm_object_t swb_object; - vm_pindex_t swb_index; - int swb_count; - daddr_t swb_pages[SWAP_META_PAGES]; +struct swblk { + vm_pindex_t p; + daddr_t d[SWAP_META_PAGES]; }; static MALLOC_DEFINE(M_VMPGDATA, "vm_pgdata", "swap pager private data"); @@ -315,7 +310,7 @@ swap_release_by_cred(vm_ooffset_t decr, struct ucred *cred) #define SWM_FREE 0x02 /* free, period */ #define SWM_POP 0x04 /* pop out */ -int swap_pager_full = 2; /* swap space exhaustion (task killing) */ +static int swap_pager_full = 2; /* swap space exhaustion (task killing) */ static int swap_pager_almost_full = 1; /* swap space exhaustion (w/hysteresis)*/ static int nsw_rcount; /* free read buffers */ static int nsw_wcount_sync; /* limit write buffers / synchronous */ @@ -328,10 +323,6 @@ SYSCTL_PROC(_vm, OID_AUTO, swap_async_max, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, sysctl_swap_async_max, "I", "Maximum running async swap ops"); -static struct swblock **swhash; -static int swhash_mask; -static struct mtx swhash_mtx; - static struct sx sw_alloc_sx; /* @@ -345,7 +336,8 @@ static struct sx sw_alloc_sx; (&swap_pager_object_list[((int)(intptr_t)handle >> 4) & (NOBJLISTS-1)]) static struct pagerlst swap_pager_object_list[NOBJLISTS]; -static uma_zone_t swap_zone; +static uma_zone_t swblk_zone; +static uma_zone_t swpctrie_zone; /* * pagerops for OBJT_SWAP - "swap pager". Some ops are also global procedure @@ -403,12 +395,28 @@ static daddr_t swp_pager_getswapspace(int npages); /* * Metadata functions */ -static struct swblock **swp_pager_hash(vm_object_t object, vm_pindex_t index); static void swp_pager_meta_build(vm_object_t, vm_pindex_t, daddr_t); static void swp_pager_meta_free(vm_object_t, vm_pindex_t, vm_pindex_t); static void swp_pager_meta_free_all(vm_object_t); static daddr_t swp_pager_meta_ctl(vm_object_t, vm_pindex_t, int); +static void * +swblk_trie_alloc(struct pctrie *ptree) +{ + + return (uma_zalloc(swpctrie_zone, M_NOWAIT | (curproc == pageproc ? + M_USE_RESERVE : 0))); +} + +static void +swblk_trie_free(struct pctrie *ptree, void *node) +{ + + uma_zfree(swpctrie_zone, node); +} + +PCTRIE_DEFINE(SWAP, swblk, p, swblk_trie_alloc, swblk_trie_free); + /* * SWP_SIZECHECK() - update swap_pager_full indication * @@ -437,33 +445,6 @@ swp_sizecheck(void) } /* - * SWP_PAGER_HASH() - hash swap meta data - * - * This is an helper function which hashes the swapblk given - * the object and page index. It returns a pointer to a pointer - * to the object, or a pointer to a NULL pointer if it could not - * find a swapblk. - */ -static struct swblock ** -swp_pager_hash(vm_object_t object, vm_pindex_t index) -{ - struct swblock **pswap; - struct swblock *swap; - - index &= ~(vm_pindex_t)SWAP_META_MASK; - pswap = &swhash[(index ^ (int)(intptr_t)object) & swhash_mask]; - while ((swap = *pswap) != NULL) { - if (swap->swb_object == object && - swap->swb_index == index - ) { - break; - } - pswap = &swap->swb_hnext; - } - return (pswap); -} - -/* * SWAP_PAGER_INIT() - initialize the swap pager! * * Expected to be started from system init. NOTE: This code is run @@ -528,21 +509,25 @@ swap_pager_swap_init(void) mtx_unlock(&pbuf_mtx); /* - * Initialize our zone. Right now I'm just guessing on the number - * we need based on the number of pages in the system. Each swblock - * can hold 32 pages, so this is probably overkill. This reservation - * is typically limited to around 32MB by default. + * Initialize our zone, guessing on the number we need based + * on the number of pages in the system. */ n = vm_cnt.v_page_count / 2; - if (maxswzone && n > maxswzone / sizeof(struct swblock)) - n = maxswzone / sizeof(struct swblock); + if (maxswzone && n > maxswzone / sizeof(struct swblk)) + n = maxswzone / sizeof(struct swblk); + swpctrie_zone = uma_zcreate("swpctrie", pctrie_node_size(), NULL, NULL, + pctrie_zone_init, NULL, UMA_ALIGN_PTR, + UMA_ZONE_NOFREE | UMA_ZONE_VM); + if (swpctrie_zone == NULL) + panic("failed to create swap pctrie zone."); + swblk_zone = uma_zcreate("swblk", sizeof(struct swblk), NULL, NULL, + NULL, NULL, _Alignof(struct swblk) - 1, + UMA_ZONE_NOFREE | UMA_ZONE_VM); + if (swblk_zone == NULL) + panic("failed to create swap blk zone."); n2 = n; - swap_zone = uma_zcreate("SWAPMETA", sizeof(struct swblock), NULL, NULL, - NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM); - if (swap_zone == NULL) - panic("failed to create swap_zone."); do { - if (uma_zone_reserve_kva(swap_zone, n)) + if (uma_zone_reserve_kva(swblk_zone, n)) break; /* * if the allocation failed, try a zone two thirds the @@ -551,24 +536,13 @@ swap_pager_swap_init(void) n -= ((n + 2) / 3); } while (n > 0); if (n2 != n) - printf("Swap zone entries reduced from %lu to %lu.\n", n2, n); + printf("Swap blk zone entries reduced from %lu to %lu.\n", + n2, n); swap_maxpages = n * SWAP_META_PAGES; - swzone = n * sizeof(struct swblock); - n2 = n; - - /* - * Initialize our meta-data hash table. The swapper does not need to - * be quite as efficient as the VM system, so we do not use an - * oversized hash table. - * - * n: size of hash table, must be power of 2 - * swhash_mask: hash table index mask - */ - for (n = 1; n < n2 / 8; n *= 2) - ; - swhash = malloc(sizeof(struct swblock *) * n, M_VMPGDATA, M_WAITOK | M_ZERO); - swhash_mask = n - 1; - mtx_init(&swhash_mtx, "swap_pager swhash", NULL, MTX_DEF); + swzone = n * sizeof(struct swblk); + if (!uma_zone_reserve_kva(swpctrie_zone, n)) + printf("Cannot reserve swap pctrie zone, " + "reduce kern.maxswzone.\n"); } static vm_object_t @@ -582,14 +556,20 @@ swap_pager_alloc_init(void *handle, struct ucred *cred, vm_ooffset_t size, return (NULL); crhold(cred); } + + /* + * The un_pager.swp.swp_blks trie is initialized by + * vm_object_allocate() to ensure the correct order of + * visibility to other threads. + */ object = vm_object_allocate(OBJT_SWAP, OFF_TO_IDX(offset + PAGE_MASK + size)); + object->handle = handle; if (cred != NULL) { object->cred = cred; object->charge = size; } - object->un_pager.swp.swp_bcount = 0; return (object); } @@ -1371,7 +1351,7 @@ swap_pager_putpages(vm_object_t object, vm_page_t *m, int count, mreq->pindex, blk + j ); - vm_page_dirty(mreq); + MPASS(mreq->dirty == VM_PAGE_BITS_ALL); mreq->oflags |= VPO_SWAPINPROG; bp->b_pages[j] = mreq; } @@ -1586,43 +1566,6 @@ swp_pager_async_iodone(struct buf *bp) } /* - * swap_pager_isswapped: - * - * Return 1 if at least one page in the given object is paged - * out to the given swap device. - * - * This routine may not sleep. - */ -int -swap_pager_isswapped(vm_object_t object, struct swdevt *sp) -{ - daddr_t index = 0; - int bcount; - int i; - - VM_OBJECT_ASSERT_WLOCKED(object); - if (object->type != OBJT_SWAP) - return (0); - - mtx_lock(&swhash_mtx); - for (bcount = 0; bcount < object->un_pager.swp.swp_bcount; bcount++) { - struct swblock *swap; - - if ((swap = *swp_pager_hash(object, index)) != NULL) { - for (i = 0; i < SWAP_META_PAGES; ++i) { - if (swp_pager_isondev(swap->swb_pages[i], sp)) { - mtx_unlock(&swhash_mtx); - return (1); - } - } - } - index += SWAP_META_PAGES; - } - mtx_unlock(&swhash_mtx); - return (0); -} - -/* * SWP_PAGER_FORCE_PAGEIN() - force a swap block to be paged in * * This routine dissociates the page at the given index within an object @@ -1681,50 +1624,56 @@ swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex) static void swap_pager_swapoff(struct swdevt *sp) { - struct swblock *swap; - vm_object_t locked_obj, object; - vm_pindex_t pindex; - int i, j, retries; + struct swblk *sb; + vm_object_t object; + vm_pindex_t pi; + int i, retries; sx_assert(&swdev_syscall_lock, SA_XLOCKED); retries = 0; - locked_obj = NULL; full_rescan: - mtx_lock(&swhash_mtx); - for (i = 0; i <= swhash_mask; i++) { /* '<=' is correct here */ -restart: - for (swap = swhash[i]; swap != NULL; swap = swap->swb_hnext) { - object = swap->swb_object; - pindex = swap->swb_index; - for (j = 0; j < SWAP_META_PAGES; ++j) { - if (!swp_pager_isondev(swap->swb_pages[j], sp)) + mtx_lock(&vm_object_list_mtx); + TAILQ_FOREACH(object, &vm_object_list, object_list) { + if (object->type != OBJT_SWAP) + continue; + mtx_unlock(&vm_object_list_mtx); + /* Depends on type-stability. */ + VM_OBJECT_WLOCK(object); + + /* + * Dead objects are eventually terminated on their own. + */ + if ((object->flags & OBJ_DEAD) != 0) + goto next_obj; + + /* + * Sync with fences placed after pctrie + * initialization. We must not access pctrie below + * unless we checked that our object is swap and not + * dead. + */ + atomic_thread_fence_acq(); + if (object->type != OBJT_SWAP) + goto next_obj; + + for (pi = 0; (sb = SWAP_PCTRIE_LOOKUP_GE( + &object->un_pager.swp.swp_blks, pi)) != NULL; ) { + pi = sb->p + SWAP_META_PAGES; + for (i = 0; i < SWAP_META_PAGES; i++) { + if (sb->d[i] == SWAPBLK_NONE) continue; - if (locked_obj != object) { - if (locked_obj != NULL) - VM_OBJECT_WUNLOCK(locked_obj); - locked_obj = object; - if (!VM_OBJECT_TRYWLOCK(object)) { - mtx_unlock(&swhash_mtx); - /* Depends on type-stability. */ - VM_OBJECT_WLOCK(object); - mtx_lock(&swhash_mtx); - goto restart; - } - } - MPASS(locked_obj == object); - mtx_unlock(&swhash_mtx); - swp_pager_force_pagein(object, pindex + j); - mtx_lock(&swhash_mtx); - goto restart; + if (swp_pager_isondev(sb->d[i], sp)) + swp_pager_force_pagein(object, + sb->p + i); } } +next_obj: + VM_OBJECT_WUNLOCK(object); + mtx_lock(&vm_object_list_mtx); } - mtx_unlock(&swhash_mtx); - if (locked_obj != NULL) { - VM_OBJECT_WUNLOCK(locked_obj); - locked_obj = NULL; - } + mtx_unlock(&vm_object_list_mtx); + if (sp->sw_used) { /* * Objects may be locked or paging to the device being @@ -1767,85 +1716,120 @@ restart: static void swp_pager_meta_build(vm_object_t object, vm_pindex_t pindex, daddr_t swapblk) { - static volatile int exhausted; - struct swblock *swap; - struct swblock **pswap; - int idx; + static volatile int swblk_zone_exhausted, swpctrie_zone_exhausted; + struct swblk *sb, *sb1; + vm_pindex_t modpi, rdpi; + int error, i; VM_OBJECT_ASSERT_WLOCKED(object); + /* * Convert default object to swap object if necessary */ if (object->type != OBJT_SWAP) { + pctrie_init(&object->un_pager.swp.swp_blks); + + /* + * Ensure that swap_pager_swapoff()'s iteration over + * object_list does not see a garbage pctrie. + */ + atomic_thread_fence_rel(); + object->type = OBJT_SWAP; - object->un_pager.swp.swp_bcount = 0; KASSERT(object->handle == NULL, ("default pager with handle")); } - /* - * Locate hash entry. If not found create, but if we aren't adding - * anything just return. If we run out of space in the map we wait - * and, since the hash table may have changed, retry. - */ -retry: - mtx_lock(&swhash_mtx); - pswap = swp_pager_hash(object, pindex); - - if ((swap = *pswap) == NULL) { - int i; - + rdpi = rounddown(pindex, SWAP_META_PAGES); + sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, rdpi); + if (sb == NULL) { if (swapblk == SWAPBLK_NONE) - goto done; - - swap = *pswap = uma_zalloc(swap_zone, M_NOWAIT | - (curproc == pageproc ? M_USE_RESERVE : 0)); - if (swap == NULL) { - mtx_unlock(&swhash_mtx); + return; + for (;;) { + sb = uma_zalloc(swblk_zone, M_NOWAIT | (curproc == + pageproc ? M_USE_RESERVE : 0)); + if (sb != NULL) { + sb->p = rdpi; + for (i = 0; i < SWAP_META_PAGES; i++) + sb->d[i] = SWAPBLK_NONE; + if (atomic_cmpset_int(&swblk_zone_exhausted, + 1, 0)) + printf("swblk zone ok\n"); + break; + } VM_OBJECT_WUNLOCK(object); - if (uma_zone_exhausted(swap_zone)) { - if (atomic_cmpset_int(&exhausted, 0, 1)) - printf("swap zone exhausted, " + if (uma_zone_exhausted(swblk_zone)) { + if (atomic_cmpset_int(&swblk_zone_exhausted, + 0, 1)) + printf("swap blk zone exhausted, " "increase kern.maxswzone\n"); vm_pageout_oom(VM_OOM_SWAPZ); - pause("swzonex", 10); + pause("swzonxb", 10); } else - VM_WAIT; + uma_zwait(swblk_zone); VM_OBJECT_WLOCK(object); - goto retry; + sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, + rdpi); + if (sb != NULL) + /* + * Somebody swapped out a nearby page, + * allocating swblk at the rdpi index, + * while we dropped the object lock. + */ + goto allocated; + } + for (;;) { + error = SWAP_PCTRIE_INSERT( + &object->un_pager.swp.swp_blks, sb); + if (error == 0) { + if (atomic_cmpset_int(&swpctrie_zone_exhausted, + 1, 0)) + printf("swpctrie zone ok\n"); + break; + } + VM_OBJECT_WUNLOCK(object); + if (uma_zone_exhausted(swpctrie_zone)) { + if (atomic_cmpset_int(&swpctrie_zone_exhausted, + 0, 1)) + printf("swap pctrie zone exhausted, " + "increase kern.maxswzone\n"); + vm_pageout_oom(VM_OOM_SWAPZ); + pause("swzonxp", 10); + } else + uma_zwait(swpctrie_zone); + VM_OBJECT_WLOCK(object); + sb1 = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, + rdpi); + if (sb1 != NULL) { + uma_zfree(swblk_zone, sb); + sb = sb1; + goto allocated; + } } - - if (atomic_cmpset_int(&exhausted, 1, 0)) - printf("swap zone ok\n"); - - swap->swb_hnext = NULL; - swap->swb_object = object; - swap->swb_index = pindex & ~(vm_pindex_t)SWAP_META_MASK; - swap->swb_count = 0; - - ++object->un_pager.swp.swp_bcount; - - for (i = 0; i < SWAP_META_PAGES; ++i) - swap->swb_pages[i] = SWAPBLK_NONE; } +allocated: + MPASS(sb->p == rdpi); - /* - * Delete prior contents of metadata - */ - idx = pindex & SWAP_META_MASK; - - if (swap->swb_pages[idx] != SWAPBLK_NONE) { - swp_pager_freeswapspace(swap->swb_pages[idx], 1); - --swap->swb_count; - } + modpi = pindex % SWAP_META_PAGES; + /* Delete prior contents of metadata. */ + if (sb->d[modpi] != SWAPBLK_NONE) + swp_pager_freeswapspace(sb->d[modpi], 1); + /* Enter block into metadata. */ + sb->d[modpi] = swapblk; /* - * Enter block into metadata + * Free the swblk if we end up with the empty page run. */ - swap->swb_pages[idx] = swapblk; - if (swapblk != SWAPBLK_NONE) - ++swap->swb_count; -done: - mtx_unlock(&swhash_mtx); + if (swapblk == SWAPBLK_NONE) { + for (i = 0; i < SWAP_META_PAGES; i++) { + if (sb->d[i] != SWAPBLK_NONE) + break; + } + if (i == SWAP_META_PAGES) { + SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks, + rdpi); + uma_zfree(swblk_zone, sb); + } + } } /* @@ -1859,42 +1843,40 @@ done: * with resident pages. */ static void -swp_pager_meta_free(vm_object_t object, vm_pindex_t index, vm_pindex_t count) +swp_pager_meta_free(vm_object_t object, vm_pindex_t pindex, vm_pindex_t count) { - struct swblock **pswap, *swap; - vm_pindex_t c; - daddr_t v; - int n, sidx; + struct swblk *sb; + vm_pindex_t last; + int i; + bool empty; - VM_OBJECT_ASSERT_LOCKED(object); + VM_OBJECT_ASSERT_WLOCKED(object); if (object->type != OBJT_SWAP || count == 0) return; - mtx_lock(&swhash_mtx); - for (c = 0; c < count;) { - pswap = swp_pager_hash(object, index); - sidx = index & SWAP_META_MASK; - n = SWAP_META_PAGES - sidx; - index += n; - if ((swap = *pswap) == NULL) { - c += n; - continue; - } - for (; c < count && sidx < SWAP_META_PAGES; ++c, ++sidx) { - if ((v = swap->swb_pages[sidx]) == SWAPBLK_NONE) + last = pindex + count - 1; + for (;;) { + sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks, + rounddown(pindex, SWAP_META_PAGES)); + if (sb == NULL || sb->p > last) + break; + empty = true; + for (i = 0; i < SWAP_META_PAGES; i++) { + if (sb->d[i] == SWAPBLK_NONE) continue; - swp_pager_freeswapspace(v, 1); - swap->swb_pages[sidx] = SWAPBLK_NONE; - if (--swap->swb_count == 0) { - *pswap = swap->swb_hnext; - uma_zfree(swap_zone, swap); - --object->un_pager.swp.swp_bcount; - c += SWAP_META_PAGES - sidx; - break; - } + if (pindex <= sb->p + i && sb->p + i <= last) { + swp_pager_freeswapspace(sb->d[i], 1); + sb->d[i] = SWAPBLK_NONE; + } else + empty = false; + } + pindex = sb->p + SWAP_META_PAGES; + if (empty) { + SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks, + sb->p); + uma_zfree(swblk_zone, sb); } } - mtx_unlock(&swhash_mtx); } /* @@ -1906,36 +1888,23 @@ swp_pager_meta_free(vm_object_t object, vm_pindex_t index, vm_pindex_t count) static void swp_pager_meta_free_all(vm_object_t object) { - struct swblock **pswap, *swap; - vm_pindex_t index; - daddr_t v; + struct swblk *sb; + vm_pindex_t pindex; int i; VM_OBJECT_ASSERT_WLOCKED(object); if (object->type != OBJT_SWAP) return; - index = 0; - while (object->un_pager.swp.swp_bcount != 0) { - mtx_lock(&swhash_mtx); - pswap = swp_pager_hash(object, index); - if ((swap = *pswap) != NULL) { - for (i = 0; i < SWAP_META_PAGES; ++i) { - v = swap->swb_pages[i]; - if (v != SWAPBLK_NONE) { - --swap->swb_count; - swp_pager_freeswapspace(v, 1); - } - } - if (swap->swb_count != 0) - panic( - "swap_pager_meta_free_all: swb_count != 0"); - *pswap = swap->swb_hnext; - uma_zfree(swap_zone, swap); - --object->un_pager.swp.swp_bcount; + for (pindex = 0; (sb = SWAP_PCTRIE_LOOKUP_GE( + &object->un_pager.swp.swp_blks, pindex)) != NULL;) { + pindex = sb->p + SWAP_META_PAGES; + for (i = 0; i < SWAP_META_PAGES; i++) { + if (sb->d[i] != SWAPBLK_NONE) + swp_pager_freeswapspace(sb->d[i], 1); } - mtx_unlock(&swhash_mtx); - index += SWAP_META_PAGES; + SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks, sb->p); + uma_zfree(swblk_zone, sb); } } @@ -1949,9 +1918,6 @@ swp_pager_meta_free_all(vm_object_t object) * was invalid. This routine will automatically free any invalid * meta-data swapblks. * - * It is not possible to store invalid swapblks in the swap meta data - * (other then a literal 'SWAPBLK_NONE'), so we don't bother checking. - * * When acting on a busy resident page and paging is in progress, we * have to wait until paging is complete but otherwise can act on the * busy page. @@ -1962,43 +1928,45 @@ swp_pager_meta_free_all(vm_object_t object) static daddr_t swp_pager_meta_ctl(vm_object_t object, vm_pindex_t pindex, int flags) { - struct swblock **pswap; - struct swblock *swap; + struct swblk *sb; daddr_t r1; - int idx; + int i; + + if ((flags & (SWM_FREE | SWM_POP)) != 0) + VM_OBJECT_ASSERT_WLOCKED(object); + else + VM_OBJECT_ASSERT_LOCKED(object); - VM_OBJECT_ASSERT_LOCKED(object); /* - * The meta data only exists of the object is OBJT_SWAP + * The meta data only exists if the object is OBJT_SWAP * and even then might not be allocated yet. */ if (object->type != OBJT_SWAP) return (SWAPBLK_NONE); - r1 = SWAPBLK_NONE; - mtx_lock(&swhash_mtx); - pswap = swp_pager_hash(object, pindex); - - if ((swap = *pswap) != NULL) { - idx = pindex & SWAP_META_MASK; - r1 = swap->swb_pages[idx]; - - if (r1 != SWAPBLK_NONE) { - if (flags & SWM_FREE) { - swp_pager_freeswapspace(r1, 1); - r1 = SWAPBLK_NONE; - } - if (flags & (SWM_FREE|SWM_POP)) { - swap->swb_pages[idx] = SWAPBLK_NONE; - if (--swap->swb_count == 0) { - *pswap = swap->swb_hnext; - uma_zfree(swap_zone, swap); - --object->un_pager.swp.swp_bcount; - } - } + sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, + rounddown(pindex, SWAP_META_PAGES)); + if (sb == NULL) + return (SWAPBLK_NONE); + r1 = sb->d[pindex % SWAP_META_PAGES]; + if (r1 == SWAPBLK_NONE) + return (SWAPBLK_NONE); + if ((flags & (SWM_FREE | SWM_POP)) != 0) { + sb->d[pindex % SWAP_META_PAGES] = SWAPBLK_NONE; + for (i = 0; i < SWAP_META_PAGES; i++) { + if (sb->d[i] != SWAPBLK_NONE) + break; + } + if (i == SWAP_META_PAGES) { + SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks, + rounddown(pindex, SWAP_META_PAGES)); + uma_zfree(swblk_zone, sb); } } - mtx_unlock(&swhash_mtx); + if ((flags & SWM_FREE) != 0) { + swp_pager_freeswapspace(r1, 1); + r1 = SWAPBLK_NONE; + } return (r1); } @@ -2012,32 +1980,38 @@ swp_pager_meta_ctl(vm_object_t object, vm_pindex_t pindex, int flags) vm_pindex_t swap_pager_find_least(vm_object_t object, vm_pindex_t pindex) { - struct swblock **pswap, *swap; - vm_pindex_t i, j, lim; - int idx; + struct swblk *sb; + int i; VM_OBJECT_ASSERT_LOCKED(object); - if (object->type != OBJT_SWAP || object->un_pager.swp.swp_bcount == 0) + if (object->type != OBJT_SWAP) return (object->size); - mtx_lock(&swhash_mtx); - for (j = pindex; j < object->size; j = lim) { - pswap = swp_pager_hash(object, j); - lim = rounddown2(j + SWAP_META_PAGES, SWAP_META_PAGES); - if (lim > object->size) - lim = object->size; - if ((swap = *pswap) != NULL) { - for (idx = j & SWAP_META_MASK, i = j; i < lim; - i++, idx++) { - if (swap->swb_pages[idx] != SWAPBLK_NONE) - goto found; - } + sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks, + rounddown(pindex, SWAP_META_PAGES)); + if (sb == NULL) + return (object->size); + if (sb->p < pindex) { + for (i = pindex % SWAP_META_PAGES; i < SWAP_META_PAGES; i++) { + if (sb->d[i] != SWAPBLK_NONE) + return (sb->p + i); } + sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks, + roundup(pindex, SWAP_META_PAGES)); + if (sb == NULL) + return (object->size); } - i = object->size; -found: - mtx_unlock(&swhash_mtx); - return (i); + for (i = 0; i < SWAP_META_PAGES; i++) { + if (sb->d[i] != SWAPBLK_NONE) + return (sb->p + i); + } + + /* + * We get here if a swblk is present in the trie but it + * doesn't map any blocks. + */ + MPASS(0); + return (object->size); } /* @@ -2073,7 +2047,7 @@ sys_swapon(struct thread *td, struct swapon_args *uap) * Swap metadata may not fit in the KVM if we have physical * memory of >1GB. */ - if (swap_zone == NULL) { + if (swblk_zone == NULL) { error = ENOMEM; goto done; } @@ -2109,15 +2083,16 @@ done: /* * Check that the total amount of swap currently configured does not * exceed half the theoretical maximum. If it does, print a warning - * message and return -1; otherwise, return 0. + * message. */ -static int -swapon_check_swzone(unsigned long npages) +static void +swapon_check_swzone(void) { - unsigned long maxpages; + unsigned long maxpages, npages; + npages = swap_total / PAGE_SIZE; /* absolute maximum we can handle assuming 100% efficiency */ - maxpages = uma_zone_get_max(swap_zone) * SWAP_META_PAGES; + maxpages = uma_zone_get_max(swblk_zone) * SWAP_META_PAGES; /* recommend using no more than half that amount */ if (npages > maxpages / 2) { @@ -2126,9 +2101,7 @@ swapon_check_swzone(unsigned long npages) npages, maxpages / 2); printf("warning: increase kern.maxswzone " "or reduce amount of swap.\n"); - return (-1); } - return (0); } static void @@ -2196,7 +2169,7 @@ swaponsomething(struct vnode *vp, void *id, u_long nblks, nswapdev++; swap_pager_avail += nblks - 2; swap_total += (vm_ooffset_t)nblks * PAGE_SIZE; - swapon_check_swzone(swap_total / PAGE_SIZE); + swapon_check_swzone(); swp_sizecheck(); mtx_unlock(&sw_dev_mtx); } @@ -2417,15 +2390,9 @@ SYSCTL_NODE(_vm, OID_AUTO, swap_info, CTLFLAG_RD | CTLFLAG_MPSAFE, "Swap statistics by device"); /* - * vmspace_swap_count() - count the approximate swap usage in pages for a - * vmspace. - * - * The map must be locked. - * - * Swap usage is determined by taking the proportional swap used by - * VM objects backing the VM map. To make up for fractional losses, - * if the VM object has any swap use at all the associated map entries - * count for at least 1 swap page. + * Count the approximate swap usage in pages for a vmspace. The + * shadowed or not yet copied on write swap blocks are not accounted. + * The map must be locked. */ long vmspace_swap_count(struct vmspace *vmspace) @@ -2433,23 +2400,38 @@ vmspace_swap_count(struct vmspace *vmspace) vm_map_t map; vm_map_entry_t cur; vm_object_t object; - long count, n; + struct swblk *sb; + vm_pindex_t e, pi; + long count; + int i; map = &vmspace->vm_map; count = 0; for (cur = map->header.next; cur != &map->header; cur = cur->next) { - if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 && - (object = cur->object.vm_object) != NULL) { - VM_OBJECT_WLOCK(object); - if (object->type == OBJT_SWAP && - object->un_pager.swp.swp_bcount != 0) { - n = (cur->end - cur->start) / PAGE_SIZE; - count += object->un_pager.swp.swp_bcount * - SWAP_META_PAGES * n / object->size + 1; + if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) + continue; + object = cur->object.vm_object; + if (object == NULL || object->type != OBJT_SWAP) + continue; + VM_OBJECT_RLOCK(object); + if (object->type != OBJT_SWAP) + goto unlock; + pi = OFF_TO_IDX(cur->offset); + e = pi + OFF_TO_IDX(cur->end - cur->start); + for (;; pi = sb->p + SWAP_META_PAGES) { + sb = SWAP_PCTRIE_LOOKUP_GE( + &object->un_pager.swp.swp_blks, pi); + if (sb == NULL || sb->p >= e) + break; + for (i = 0; i < SWAP_META_PAGES; i++) { + if (sb->p + i < e && + sb->d[i] != SWAPBLK_NONE) + count++; } - VM_OBJECT_WUNLOCK(object); } +unlock: + VM_OBJECT_RUNLOCK(object); } return (count); } diff --git a/sys/vm/swap_pager.h b/sys/vm/swap_pager.h index 83567f4..1abded5 100644 --- a/sys/vm/swap_pager.h +++ b/sys/vm/swap_pager.h @@ -73,7 +73,6 @@ struct swdevt { #ifdef _KERNEL -extern int swap_pager_full; extern int swap_pager_avail; struct xswdev; @@ -82,7 +81,6 @@ void swap_pager_copy(vm_object_t, vm_object_t, vm_pindex_t, int); vm_pindex_t swap_pager_find_least(vm_object_t object, vm_pindex_t pindex); void swap_pager_freespace(vm_object_t, vm_pindex_t, vm_size_t); void swap_pager_swap_init(void); -int swap_pager_isswapped(vm_object_t, struct swdevt *); int swap_pager_reserve(vm_object_t, vm_pindex_t, vm_size_t); void swap_pager_status(int *total, int *used); void swapoff_all(void); diff --git a/sys/vm/uma.h b/sys/vm/uma.h index f4c2de8..569b951 100644 --- a/sys/vm/uma.h +++ b/sys/vm/uma.h @@ -365,6 +365,11 @@ uma_zfree(uma_zone_t zone, void *item) } /* + * Wait until the specified zone can allocate an item. + */ +void uma_zwait(uma_zone_t zone); + +/* * XXX The rest of the prototypes in this header are h0h0 magic for the VM. * If you think you need to use it for a normal zone you're probably incorrect. */ diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index 26439dc..295e4e2 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -138,7 +138,7 @@ static LIST_HEAD(,uma_zone) uma_cachezones = LIST_HEAD_INITIALIZER(uma_cachezones); /* This RW lock protects the keg list */ -static struct rwlock_padalign uma_rwlock; +static struct rwlock_padalign __exclusive_cache_line uma_rwlock; /* Linked list of boot time pages */ static LIST_HEAD(,uma_slab) uma_boot_pages = @@ -1127,7 +1127,9 @@ noobj_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait) npages = howmany(bytes, PAGE_SIZE); while (npages > 0) { p = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT | - VM_ALLOC_WIRED | VM_ALLOC_NOOBJ); + VM_ALLOC_WIRED | VM_ALLOC_NOOBJ | + ((wait & M_WAITOK) != 0 ? VM_ALLOC_WAITOK : + VM_ALLOC_NOWAIT)); if (p != NULL) { /* * Since the page does not belong to an object, its @@ -1137,11 +1139,6 @@ noobj_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait) npages--; continue; } - if (wait & M_WAITOK) { - VM_WAIT; - continue; - } - /* * Page allocation failed, free intermediate pages and * exit. @@ -1326,10 +1323,6 @@ keg_large_init(uma_keg_t keg) keg->uk_ipers = 1; keg->uk_rsize = keg->uk_size; - /* We can't do OFFPAGE if we're internal, bail out here. */ - if (keg->uk_flags & UMA_ZFLAG_INTERNAL) - return; - /* Check whether we have enough space to not do OFFPAGE. */ if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0) { shsize = sizeof(struct uma_slab); @@ -1337,8 +1330,17 @@ keg_large_init(uma_keg_t keg) shsize = (shsize & ~UMA_ALIGN_PTR) + (UMA_ALIGN_PTR + 1); - if ((PAGE_SIZE * keg->uk_ppera) - keg->uk_rsize < shsize) - keg->uk_flags |= UMA_ZONE_OFFPAGE; + if (PAGE_SIZE * keg->uk_ppera - keg->uk_rsize < shsize) { + /* + * We can't do OFFPAGE if we're internal, in which case + * we need an extra page per allocation to contain the + * slab header. + */ + if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) == 0) + keg->uk_flags |= UMA_ZONE_OFFPAGE; + else + keg->uk_ppera++; + } } if ((keg->uk_flags & UMA_ZONE_OFFPAGE) && @@ -2077,6 +2079,15 @@ uma_zdestroy(uma_zone_t zone) sx_sunlock(&uma_drain_lock); } +void +uma_zwait(uma_zone_t zone) +{ + void *item; + + item = uma_zalloc_arg(zone, NULL, M_WAITOK); + uma_zfree(zone, item); +} + /* See uma.h */ void * uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index d2147f6..d9e2366 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -236,14 +236,15 @@ vm_fault_dirty(vm_map_entry_t entry, vm_page_t m, vm_prot_t prot, * written NOW so dirty it explicitly to save on * pmap_is_modified() calls later. * - * Also tell the backing pager, if any, that it should remove - * any swap backing since the page is now dirty. + * Also, since the page is now dirty, we can possibly tell + * the pager to release any swap backing the page. Calling + * the pager requires a write lock on the object. */ if (need_dirty) vm_page_dirty(m); if (!set_wd) vm_page_unlock(m); - if (need_dirty) + else if (need_dirty) vm_pager_page_unswapped(m); } diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index 60b822e..941e7e8 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -322,7 +322,7 @@ vm_thread_new(struct thread *td, int pages) { vm_object_t ksobj; vm_offset_t ks; - vm_page_t m, ma[KSTACK_MAX_PAGES]; + vm_page_t ma[KSTACK_MAX_PAGES]; struct kstack_cache_entry *ks_ce; int i; @@ -391,15 +391,10 @@ vm_thread_new(struct thread *td, int pages) * page of stack. */ VM_OBJECT_WLOCK(ksobj); - for (i = 0; i < pages; i++) { - /* - * Get a kernel stack page. - */ - m = vm_page_grab(ksobj, i, VM_ALLOC_NOBUSY | - VM_ALLOC_NORMAL | VM_ALLOC_WIRED); - ma[i] = m; - m->valid = VM_PAGE_BITS_ALL; - } + (void)vm_page_grab_pages(ksobj, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | + VM_ALLOC_WIRED, ma, pages); + for (i = 0; i < pages; i++) + ma[i]->valid = VM_PAGE_BITS_ALL; VM_OBJECT_WUNLOCK(ksobj); pmap_qenter(ks, ma, pages); return (1); @@ -573,9 +568,8 @@ vm_thread_swapin(struct thread *td) pages = td->td_kstack_pages; ksobj = td->td_kstack_obj; VM_OBJECT_WLOCK(ksobj); - for (int i = 0; i < pages; i++) - ma[i] = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | - VM_ALLOC_WIRED); + (void)vm_page_grab_pages(ksobj, 0, VM_ALLOC_NORMAL | VM_ALLOC_WIRED, ma, + pages); for (int i = 0; i < pages;) { int j, a, count, rv; diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index af678ac..5a0eb4d 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_object.h> #include <vm/vm_page.h> #include <vm/vm_pageout.h> +#include <vm/vm_radix.h> #include <vm/vm_extern.h> #include <vm/uma.h> @@ -171,6 +172,8 @@ kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low, return (0); offset = addr - VM_MIN_KERNEL_ADDRESS; pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; + pflags &= ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL); + pflags |= VM_ALLOC_NOWAIT; VM_OBJECT_WLOCK(object); for (i = 0; i < size; i += PAGE_SIZE) { tries = 0; @@ -226,6 +229,8 @@ kmem_alloc_contig(struct vmem *vmem, vm_size_t size, int flags, vm_paddr_t low, return (0); offset = addr - VM_MIN_KERNEL_ADDRESS; pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; + pflags &= ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL); + pflags |= VM_ALLOC_NOWAIT; npages = atop(size); VM_OBJECT_WLOCK(object); tries = 0; @@ -329,7 +334,7 @@ int kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags) { vm_offset_t offset, i; - vm_page_t m; + vm_page_t m, mpred; int pflags; KASSERT(object == kmem_object || object == kernel_object, @@ -337,11 +342,17 @@ kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags) offset = addr - VM_MIN_KERNEL_ADDRESS; pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; + pflags &= ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL); + if (flags & M_WAITOK) + pflags |= VM_ALLOC_WAITFAIL; + i = 0; VM_OBJECT_WLOCK(object); - for (i = 0; i < size; i += PAGE_SIZE) { retry: - m = vm_page_alloc(object, atop(offset + i), pflags); + mpred = vm_radix_lookup_le(&object->rtree, atop(offset + i)); + for (; i < size; i += PAGE_SIZE, mpred = m) { + m = vm_page_alloc_after(object, atop(offset + i), pflags, + mpred); /* * Ran out of space, free everything up and return. Don't need @@ -349,12 +360,9 @@ retry: * aren't on any queues. */ if (m == NULL) { - VM_OBJECT_WUNLOCK(object); - if ((flags & M_NOWAIT) == 0) { - VM_WAIT; - VM_OBJECT_WLOCK(object); + if ((flags & M_NOWAIT) == 0) goto retry; - } + VM_OBJECT_WUNLOCK(object); kmem_unback(object, addr, i); return (KERN_NO_SPACE); } diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 5dc76d1..1290b21 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -1962,7 +1962,7 @@ vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot, (pagesizes[p->psind] - 1)) == 0) { mask = atop(pagesizes[p->psind]) - 1; if (tmpidx + mask < psize && - vm_page_ps_is_valid(p)) { + vm_page_ps_test(p, PS_ALL_VALID, NULL)) { p += mask; threshold += mask; } diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 6db1ac4..1e11078 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -73,6 +73,7 @@ __FBSDID("$FreeBSD$"); #include <sys/mman.h> #include <sys/mount.h> #include <sys/kernel.h> +#include <sys/pctrie.h> #include <sys/sysctl.h> #include <sys/mutex.h> #include <sys/proc.h> /* for curproc, pageproc */ @@ -204,10 +205,11 @@ vm_object_zinit(void *mem, int size, int flags) /* These are true for any object that has been freed */ object->type = OBJT_DEAD; object->ref_count = 0; - object->rtree.rt_root = 0; + vm_radix_init(&object->rtree); object->paging_in_progress = 0; object->resident_page_count = 0; object->shadow_count = 0; + object->flags = OBJ_DEAD; mtx_lock(&vm_object_list_mtx); TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); @@ -223,6 +225,16 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object) LIST_INIT(&object->shadow_head); object->type = type; + if (type == OBJT_SWAP) + pctrie_init(&object->un_pager.swp.swp_blks); + + /* + * Ensure that swap_pager_swapoff() iteration over object_list + * sees up to date type and pctrie head if it observed + * non-dead object. + */ + atomic_thread_fence_rel(); + switch (type) { case OBJT_DEAD: panic("_vm_object_allocate: can't create OBJT_DEAD"); @@ -301,7 +313,7 @@ vm_object_init(void) #endif vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); - vm_radix_init(); + vm_radix_zinit(); } void @@ -1075,6 +1087,33 @@ vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size, } /* + * Determine whether the given advice can be applied to the object. Advice is + * not applied to unmanaged pages since they never belong to page queues, and + * since MADV_FREE is destructive, it can apply only to anonymous pages that + * have been mapped at most once. + */ +static bool +vm_object_advice_applies(vm_object_t object, int advice) +{ + + if ((object->flags & OBJ_UNMANAGED) != 0) + return (false); + if (advice != MADV_FREE) + return (true); + return ((object->type == OBJT_DEFAULT || object->type == OBJT_SWAP) && + (object->flags & OBJ_ONEMAPPING) != 0); +} + +static void +vm_object_madvise_freespace(vm_object_t object, int advice, vm_pindex_t pindex, + vm_size_t size) +{ + + if (advice == MADV_FREE && object->type == OBJT_SWAP) + swap_pager_freespace(object, pindex, size); +} + +/* * vm_object_madvise: * * Implements the madvise function at the object/page level. @@ -1097,96 +1136,109 @@ vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size, */ void vm_object_madvise(vm_object_t object, vm_pindex_t pindex, vm_pindex_t end, - int advise) + int advice) { vm_pindex_t tpindex; vm_object_t backing_object, tobject; - vm_page_t m; + vm_page_t m, tm; if (object == NULL) return; - VM_OBJECT_WLOCK(object); - /* - * Locate and adjust resident pages - */ - for (; pindex < end; pindex += 1) { + relookup: + VM_OBJECT_WLOCK(object); + if (!vm_object_advice_applies(object, advice)) { + VM_OBJECT_WUNLOCK(object); + return; + } + for (m = vm_page_find_least(object, pindex); pindex < end; pindex++) { tobject = object; - tpindex = pindex; -shadowlookup: + /* - * MADV_FREE only operates on OBJT_DEFAULT or OBJT_SWAP pages - * and those pages must be OBJ_ONEMAPPING. + * If the next page isn't resident in the top-level object, we + * need to search the shadow chain. When applying MADV_FREE, we + * take care to release any swap space used to store + * non-resident pages. */ - if (advise == MADV_FREE) { - if ((tobject->type != OBJT_DEFAULT && - tobject->type != OBJT_SWAP) || - (tobject->flags & OBJ_ONEMAPPING) == 0) { - goto unlock_tobject; - } - } else if ((tobject->flags & OBJ_UNMANAGED) != 0) - goto unlock_tobject; - m = vm_page_lookup(tobject, tpindex); - if (m == NULL) { - /* - * There may be swap even if there is no backing page - */ - if (advise == MADV_FREE && tobject->type == OBJT_SWAP) - swap_pager_freespace(tobject, tpindex, 1); + if (m == NULL || pindex < m->pindex) { /* - * next object + * Optimize a common case: if the top-level object has + * no backing object, we can skip over the non-resident + * range in constant time. */ - backing_object = tobject->backing_object; - if (backing_object == NULL) - goto unlock_tobject; - VM_OBJECT_WLOCK(backing_object); - tpindex += OFF_TO_IDX(tobject->backing_object_offset); - if (tobject != object) - VM_OBJECT_WUNLOCK(tobject); - tobject = backing_object; - goto shadowlookup; - } else if (m->valid != VM_PAGE_BITS_ALL) - goto unlock_tobject; + if (object->backing_object == NULL) { + tpindex = (m != NULL && m->pindex < end) ? + m->pindex : end; + vm_object_madvise_freespace(object, advice, + pindex, tpindex - pindex); + if ((pindex = tpindex) == end) + break; + goto next_page; + } + + tpindex = pindex; + do { + vm_object_madvise_freespace(tobject, advice, + tpindex, 1); + /* + * Prepare to search the next object in the + * chain. + */ + backing_object = tobject->backing_object; + if (backing_object == NULL) + goto next_pindex; + VM_OBJECT_WLOCK(backing_object); + tpindex += + OFF_TO_IDX(tobject->backing_object_offset); + if (tobject != object) + VM_OBJECT_WUNLOCK(tobject); + tobject = backing_object; + if (!vm_object_advice_applies(tobject, advice)) + goto next_pindex; + } while ((tm = vm_page_lookup(tobject, tpindex)) == + NULL); + } else { +next_page: + tm = m; + m = TAILQ_NEXT(m, listq); + } + /* * If the page is not in a normal state, skip it. */ - vm_page_lock(m); - if (m->hold_count != 0 || m->wire_count != 0) { - vm_page_unlock(m); - goto unlock_tobject; + if (tm->valid != VM_PAGE_BITS_ALL) + goto next_pindex; + vm_page_lock(tm); + if (tm->hold_count != 0 || tm->wire_count != 0) { + vm_page_unlock(tm); + goto next_pindex; } - KASSERT((m->flags & PG_FICTITIOUS) == 0, - ("vm_object_madvise: page %p is fictitious", m)); - KASSERT((m->oflags & VPO_UNMANAGED) == 0, - ("vm_object_madvise: page %p is not managed", m)); - if (vm_page_busied(m)) { - if (advise == MADV_WILLNEED) { + KASSERT((tm->flags & PG_FICTITIOUS) == 0, + ("vm_object_madvise: page %p is fictitious", tm)); + KASSERT((tm->oflags & VPO_UNMANAGED) == 0, + ("vm_object_madvise: page %p is not managed", tm)); + if (vm_page_busied(tm)) { + if (object != tobject) + VM_OBJECT_WUNLOCK(tobject); + VM_OBJECT_WUNLOCK(object); + if (advice == MADV_WILLNEED) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less - * likely to reclaim it. + * likely to reclaim it. */ - vm_page_aflag_set(m, PGA_REFERENCED); + vm_page_aflag_set(tm, PGA_REFERENCED); } - if (object != tobject) - VM_OBJECT_WUNLOCK(object); - VM_OBJECT_WUNLOCK(tobject); - vm_page_busy_sleep(m, "madvpo", false); - VM_OBJECT_WLOCK(object); + vm_page_busy_sleep(tm, "madvpo", false); goto relookup; } - if (advise == MADV_WILLNEED) { - vm_page_activate(m); - } else { - vm_page_advise(m, advise); - } - vm_page_unlock(m); - if (advise == MADV_FREE && tobject->type == OBJT_SWAP) - swap_pager_freespace(tobject, tpindex, 1); -unlock_tobject: + vm_page_advise(tm, advice); + vm_page_unlock(tm); + vm_object_madvise_freespace(tobject, advice, tm->pindex, 1); +next_pindex: if (tobject != object) VM_OBJECT_WUNLOCK(tobject); - } + } VM_OBJECT_WUNLOCK(object); } @@ -1360,7 +1412,7 @@ retry: if (vm_page_rename(m, new_object, idx)) { VM_OBJECT_WUNLOCK(new_object); VM_OBJECT_WUNLOCK(orig_object); - VM_WAIT; + vm_radix_wait(); VM_OBJECT_WLOCK(orig_object); VM_OBJECT_WLOCK(new_object); goto retry; @@ -1422,8 +1474,9 @@ vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p, vm_page_t next, vm_page_lock(p); VM_OBJECT_WUNLOCK(object); VM_OBJECT_WUNLOCK(backing_object); + /* The page is only NULL when rename fails. */ if (p == NULL) - VM_WAIT; + vm_radix_wait(); else vm_page_busy_sleep(p, "vmocol", false); VM_OBJECT_WLOCK(object); @@ -1858,6 +1911,7 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int options) { vm_page_t p, next; + struct mtx *mtx; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->flags & OBJ_UNMANAGED) == 0 || @@ -1868,6 +1922,7 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, vm_object_pip_add(object, 1); again: p = vm_page_find_least(object, start); + mtx = NULL; /* * Here, the variable "p" is either (1) the page with the least pindex @@ -1884,7 +1939,7 @@ again: * however, be invalidated if the option OBJPR_CLEANONLY is * not specified. */ - vm_page_lock(p); + vm_page_change_lock(p, &mtx); if (vm_page_xbusied(p)) { VM_OBJECT_WUNLOCK(object); vm_page_busy_sleep(p, "vmopax", true); @@ -1892,13 +1947,14 @@ again: goto again; } if (p->wire_count != 0) { - if ((options & OBJPR_NOTMAPPED) == 0) + if ((options & OBJPR_NOTMAPPED) == 0 && + object->ref_count != 0) pmap_remove_all(p); if ((options & OBJPR_CLEANONLY) == 0) { p->valid = 0; vm_page_undirty(p); } - goto next; + continue; } if (vm_page_busied(p)) { VM_OBJECT_WUNLOCK(object); @@ -1909,17 +1965,18 @@ again: KASSERT((p->flags & PG_FICTITIOUS) == 0, ("vm_object_page_remove: page %p is fictitious", p)); if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) { - if ((options & OBJPR_NOTMAPPED) == 0) + if ((options & OBJPR_NOTMAPPED) == 0 && + object->ref_count != 0) pmap_remove_write(p); - if (p->dirty) - goto next; + if (p->dirty != 0) + continue; } - if ((options & OBJPR_NOTMAPPED) == 0) + if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0) pmap_remove_all(p); vm_page_free(p); -next: - vm_page_unlock(p); } + if (mtx != NULL) + mtx_unlock(mtx); vm_object_pip_wakeup(object); } @@ -1942,7 +1999,7 @@ next: void vm_object_page_noreuse(vm_object_t object, vm_pindex_t start, vm_pindex_t end) { - struct mtx *mtx, *new_mtx; + struct mtx *mtx; vm_page_t p, next; VM_OBJECT_ASSERT_LOCKED(object); @@ -1959,17 +2016,7 @@ vm_object_page_noreuse(vm_object_t object, vm_pindex_t start, vm_pindex_t end) mtx = NULL; for (; p != NULL && (p->pindex < end || end == 0); p = next) { next = TAILQ_NEXT(p, listq); - - /* - * Avoid releasing and reacquiring the same page lock. - */ - new_mtx = vm_page_lockptr(p); - if (mtx != new_mtx) { - if (mtx != NULL) - mtx_unlock(mtx); - mtx = new_mtx; - mtx_lock(mtx); - } + vm_page_change_lock(p, &mtx); vm_page_deactivate_noreuse(p); } if (mtx != NULL) diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index 9b2192e..285b3e0 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -70,6 +70,7 @@ #include <sys/queue.h> #include <sys/_lock.h> #include <sys/_mutex.h> +#include <sys/_pctrie.h> #include <sys/_rwlock.h> #include <vm/_vm_radix.h> @@ -151,13 +152,12 @@ struct vm_object { * the handle changed and hash-chain * invalid. * - * swp_bcount - number of swap 'swblock' metablocks, each - * contains up to 16 swapblk assignments. - * see vm/swap_pager.h + * swp_blks - pc-trie of the allocated swap blocks. + * */ struct { void *swp_tmpfs; - int swp_bcount; + struct pctrie swp_blks; } swp; } un_pager; struct ucred *cred; diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 9f4fe44..49398f2 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -127,9 +127,9 @@ __FBSDID("$FreeBSD$"); */ struct vm_domain vm_dom[MAXMEMDOM]; -struct mtx_padalign vm_page_queue_free_mtx; +struct mtx_padalign __exclusive_cache_line vm_page_queue_free_mtx; -struct mtx_padalign pa_lock[PA_LOCK_COUNT]; +struct mtx_padalign __exclusive_cache_line pa_lock[PA_LOCK_COUNT]; vm_page_t vm_page_array; long vm_page_array_size; @@ -158,6 +158,7 @@ static uma_zone_t fakepg_zone; static void vm_page_alloc_check(vm_page_t m); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); static void vm_page_enqueue(uint8_t queue, vm_page_t m); +static void vm_page_free_phys(vm_page_t m); static void vm_page_free_wakeup(void); static void vm_page_init_fakepg(void *dummy); static int vm_page_insert_after(vm_page_t m, vm_object_t object, @@ -166,6 +167,7 @@ static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred); static int vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run, vm_paddr_t high); +static int vm_page_alloc_fail(vm_object_t object, int req); SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init_fakepg, NULL); @@ -407,6 +409,29 @@ vm_page_domain_init(struct vm_domain *vmd) } /* + * Initialize a physical page in preparation for adding it to the free + * lists. + */ +static void +vm_page_init_page(vm_page_t m, vm_paddr_t pa, int segind) +{ + + m->object = NULL; + m->wire_count = 0; + m->busy_lock = VPB_UNBUSIED; + m->hold_count = 0; + m->flags = 0; + m->phys_addr = pa; + m->queue = PQ_NONE; + m->psind = 0; + m->segind = segind; + m->order = VM_NFREEORDER; + m->pool = VM_FREEPOOL_DEFAULT; + m->valid = m->dirty = 0; + pmap_page_init(m); +} + +/* * vm_page_startup: * * Initializes the resident memory module. Allocates physical memory for @@ -417,17 +442,15 @@ vm_page_domain_init(struct vm_domain *vmd) vm_offset_t vm_page_startup(vm_offset_t vaddr) { - vm_offset_t mapped; - vm_paddr_t high_avail, low_avail, page_range, size; - vm_paddr_t new_end; - int i; - vm_paddr_t pa; - vm_paddr_t last_pa; + struct vm_domain *vmd; + struct vm_phys_seg *seg; + vm_page_t m; char *list, *listend; - vm_paddr_t end; - vm_paddr_t biggestsize; - int biggestone; - int pages_per_zone; + vm_offset_t mapped; + vm_paddr_t end, high_avail, low_avail, new_end, page_range, size; + vm_paddr_t biggestsize, last_pa, pa; + u_long pagecount; + int biggestone, i, pages_per_zone, segind; biggestsize = 0; biggestone = 0; @@ -463,7 +486,8 @@ vm_page_startup(vm_offset_t vaddr) * in proportion to the zone structure size. */ pages_per_zone = howmany(sizeof(struct uma_zone) + - sizeof(struct uma_cache) * (mp_maxid + 1), UMA_SLAB_SIZE); + sizeof(struct uma_cache) * (mp_maxid + 1) + + roundup2(sizeof(struct uma_slab), sizeof(void *)), UMA_SLAB_SIZE); if (pages_per_zone > 1) { /* Reserve more pages so that we don't run out. */ boot_pages = UMA_BOOT_PAGES_ZONES * pages_per_zone; @@ -507,6 +531,8 @@ vm_page_startup(vm_offset_t vaddr) vm_page_dump = (void *)(uintptr_t)pmap_map(&vaddr, new_end, new_end + vm_page_dump_size, VM_PROT_READ | VM_PROT_WRITE); bzero((void *)vm_page_dump, vm_page_dump_size); +#else + (void)last_pa; #endif #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) /* @@ -611,7 +637,9 @@ vm_page_startup(vm_offset_t vaddr) new_end = trunc_page(end - page_range * sizeof(struct vm_page)); mapped = pmap_map(&vaddr, new_end, end, VM_PROT_READ | VM_PROT_WRITE); - vm_page_array = (vm_page_t) mapped; + vm_page_array = (vm_page_t)mapped; + vm_page_array_size = page_range; + #if VM_NRESERVLEVEL > 0 /* * Allocate physical memory for the reservation management system's @@ -638,33 +666,53 @@ vm_page_startup(vm_offset_t vaddr) vm_phys_add_seg(phys_avail[i], phys_avail[i + 1]); /* - * Clear all of the page structures - */ - bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page)); - for (i = 0; i < page_range; i++) - vm_page_array[i].order = VM_NFREEORDER; - vm_page_array_size = page_range; - - /* * Initialize the physical memory allocator. */ vm_phys_init(); /* - * Add every available physical page that is not blacklisted to - * the free lists. + * Initialize the page structures and add every available page to the + * physical memory allocator's free lists. */ vm_cnt.v_page_count = 0; vm_cnt.v_free_count = 0; - for (i = 0; phys_avail[i + 1] != 0; i += 2) { - pa = phys_avail[i]; - last_pa = phys_avail[i + 1]; - while (pa < last_pa) { - vm_phys_add_page(pa); - pa += PAGE_SIZE; + for (segind = 0; segind < vm_phys_nsegs; segind++) { + seg = &vm_phys_segs[segind]; + for (m = seg->first_page, pa = seg->start; pa < seg->end; + m++, pa += PAGE_SIZE) + vm_page_init_page(m, pa, segind); + + /* + * Add the segment to the free lists only if it is covered by + * one of the ranges in phys_avail. Because we've added the + * ranges to the vm_phys_segs array, we can assume that each + * segment is either entirely contained in one of the ranges, + * or doesn't overlap any of them. + */ + for (i = 0; phys_avail[i + 1] != 0; i += 2) { + if (seg->start < phys_avail[i] || + seg->end > phys_avail[i + 1]) + continue; + + m = seg->first_page; + pagecount = (u_long)atop(seg->end - seg->start); + + mtx_lock(&vm_page_queue_free_mtx); + vm_phys_free_contig(m, pagecount); + vm_phys_freecnt_adj(m, (int)pagecount); + mtx_unlock(&vm_page_queue_free_mtx); + vm_cnt.v_page_count += (u_int)pagecount; + + vmd = &vm_dom[seg->domain]; + vmd->vmd_page_count += (u_int)pagecount; + vmd->vmd_segs |= 1UL << m->segind; + break; } } + /* + * Remove blacklisted pages from the physical memory allocator. + */ TAILQ_INIT(&blacklist_head); vm_page_blacklist_load(&list, &listend); vm_page_blacklist_check(list, listend); @@ -745,6 +793,7 @@ vm_page_sunbusy(vm_page_t m) { u_int x; + vm_page_lock_assert(m, MA_NOTOWNED); vm_page_assert_sbusied(m); for (;;) { @@ -904,6 +953,23 @@ vm_page_flash(vm_page_t m) } /* + * Avoid releasing and reacquiring the same page lock. + */ +void +vm_page_change_lock(vm_page_t m, struct mtx **mtx) +{ + struct mtx *mtx1; + + mtx1 = vm_page_lockptr(m); + if (*mtx == mtx1) + return; + if (*mtx != NULL) + mtx_unlock(*mtx); + *mtx = mtx1; + mtx_lock(mtx1); +} + +/* * Keep page from being freed by the page daemon * much of the same effect as wiring, except much lower * overhead and should be used only for *very* temporary @@ -936,20 +1002,11 @@ vm_page_unhold(vm_page_t mem) void vm_page_unhold_pages(vm_page_t *ma, int count) { - struct mtx *mtx, *new_mtx; + struct mtx *mtx; mtx = NULL; for (; count != 0; count--) { - /* - * Avoid releasing and reacquiring the same page lock. - */ - new_mtx = vm_page_lockptr(*ma); - if (mtx != new_mtx) { - if (mtx != NULL) - mtx_unlock(mtx); - mtx = new_mtx; - mtx_lock(mtx); - } + vm_page_change_lock(*ma, &mtx); vm_page_unhold(*ma); ma++; } @@ -1527,15 +1584,34 @@ vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) vm_page_t vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) { - vm_page_t m, mpred; + + return (vm_page_alloc_after(object, pindex, req, object != NULL ? + vm_radix_lookup_le(&object->rtree, pindex) : NULL)); +} + +/* + * Allocate a page in the specified object with the given page index. To + * optimize insertion of the page into the object, the caller must also specifiy + * the resident page in the object with largest index smaller than the given + * page index, or NULL if no such page exists. + */ +vm_page_t +vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex, int req, + vm_page_t mpred) +{ + vm_page_t m; int flags, req_class; - mpred = NULL; /* XXX: pacify gcc */ KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && (object != NULL || (req & VM_ALLOC_SBUSY) == 0) && ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) != (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)), - ("vm_page_alloc: inconsistent object(%p)/req(%x)", object, req)); + ("inconsistent object(%p)/req(%x)", object, req)); + KASSERT(object == NULL || (req & VM_ALLOC_WAITOK) == 0, + ("Can't sleep and retry object insertion.")); + KASSERT(mpred == NULL || mpred->pindex < pindex, + ("mpred %p doesn't precede pindex 0x%jx", mpred, + (uintmax_t)pindex)); if (object != NULL) VM_OBJECT_ASSERT_WLOCKED(object); @@ -1550,16 +1626,11 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) req_class = VM_ALLOC_SYSTEM; - if (object != NULL) { - mpred = vm_radix_lookup_le(&object->rtree, pindex); - KASSERT(mpred == NULL || mpred->pindex != pindex, - ("vm_page_alloc: pindex already allocated")); - } - /* * Allocate a page if the number of free pages exceeds the minimum * for the request class. */ +again: mtx_lock(&vm_page_queue_free_mtx); if (vm_cnt.v_free_count > vm_cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM && @@ -1592,17 +1663,15 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) /* * Not allocatable, give up. */ - mtx_unlock(&vm_page_queue_free_mtx); - atomic_add_int(&vm_pageout_deficit, - max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); - pagedaemon_wakeup(); + if (vm_page_alloc_fail(object, req)) + goto again; return (NULL); } /* * At this point we had better have found a good page. */ - KASSERT(m != NULL, ("vm_page_alloc: missing page")); + KASSERT(m != NULL, ("missing page")); vm_phys_freecnt_adj(m, -1); if ((m->flags & PG_ZERO) != 0) vm_page_zero_count--; @@ -1649,6 +1718,11 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) m->busy_lock = VPB_UNBUSIED; /* Don't change PG_ZERO. */ vm_page_free_toq(m); + if (req & VM_ALLOC_WAITFAIL) { + VM_OBJECT_WUNLOCK(object); + vm_radix_wait(); + VM_OBJECT_WLOCK(object); + } return (NULL); } @@ -1726,6 +1800,8 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)), ("vm_page_alloc_contig: inconsistent object(%p)/req(%x)", object, req)); + KASSERT(object == NULL || (req & VM_ALLOC_WAITOK) == 0, + ("Can't sleep and retry object insertion.")); if (object != NULL) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->flags & OBJ_FICTITIOUS) == 0, @@ -1751,6 +1827,7 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, * Can we allocate the pages without the number of free pages falling * below the lower bound for the allocation class? */ +again: mtx_lock(&vm_page_queue_free_mtx); if (vm_cnt.v_free_count >= npages + vm_cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM && @@ -1772,9 +1849,8 @@ retry: m_ret = vm_phys_alloc_contig(npages, low, high, alignment, boundary); } else { - mtx_unlock(&vm_page_queue_free_mtx); - atomic_add_int(&vm_pageout_deficit, npages); - pagedaemon_wakeup(); + if (vm_page_alloc_fail(object, req)) + goto again; return (NULL); } if (m_ret != NULL) { @@ -1843,6 +1919,11 @@ retry: /* Don't change PG_ZERO. */ vm_page_free_toq(m); } + if (req & VM_ALLOC_WAITFAIL) { + VM_OBJECT_WUNLOCK(object); + vm_radix_wait(); + VM_OBJECT_WLOCK(object); + } return (NULL); } mpred = m; @@ -1915,18 +1996,17 @@ vm_page_alloc_freelist(int flind, int req) /* * Do not allocate reserved pages unless the req has asked for it. */ +again: mtx_lock(&vm_page_queue_free_mtx); if (vm_cnt.v_free_count > vm_cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM && vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT && - vm_cnt.v_free_count > 0)) + vm_cnt.v_free_count > 0)) { m = vm_phys_alloc_freelist_pages(flind, VM_FREEPOOL_DIRECT, 0); - else { - mtx_unlock(&vm_page_queue_free_mtx); - atomic_add_int(&vm_pageout_deficit, - max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); - pagedaemon_wakeup(); + } else { + if (vm_page_alloc_fail(NULL, req)) + goto again; return (NULL); } if (m == NULL) { @@ -1988,7 +2068,7 @@ vm_page_t vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end, u_long alignment, vm_paddr_t boundary, int options) { - struct mtx *m_mtx, *new_mtx; + struct mtx *m_mtx; vm_object_t object; vm_paddr_t pa; vm_page_t m, m_run; @@ -2004,8 +2084,10 @@ vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end, run_len = 0; m_mtx = NULL; for (m = m_start; m < m_end && run_len < npages; m += m_inc) { - KASSERT((m->flags & (PG_FICTITIOUS | PG_MARKER)) == 0, - ("page %p is PG_FICTITIOUS or PG_MARKER", m)); + KASSERT((m->flags & PG_MARKER) == 0, + ("page %p is PG_MARKER", m)); + KASSERT((m->flags & PG_FICTITIOUS) == 0 || m->wire_count == 1, + ("fictitious page %p has invalid wire count", m)); /* * If the current page would be the start of a run, check its @@ -2031,16 +2113,7 @@ vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end, } else KASSERT(m_run != NULL, ("m_run == NULL")); - /* - * Avoid releasing and reacquiring the same page lock. - */ - new_mtx = vm_page_lockptr(m); - if (m_mtx != new_mtx) { - if (m_mtx != NULL) - mtx_unlock(m_mtx); - m_mtx = new_mtx; - mtx_lock(m_mtx); - } + vm_page_change_lock(m, &m_mtx); m_inc = 1; retry: if (m->wire_count != 0 || m->hold_count != 0) @@ -2190,7 +2263,7 @@ static int vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run, vm_paddr_t high) { - struct mtx *m_mtx, *new_mtx; + struct mtx *m_mtx; struct spglist free; vm_object_t object; vm_paddr_t pa; @@ -2211,13 +2284,7 @@ vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run, /* * Avoid releasing and reacquiring the same page lock. */ - new_mtx = vm_page_lockptr(m); - if (m_mtx != new_mtx) { - if (m_mtx != NULL) - mtx_unlock(m_mtx); - m_mtx = new_mtx; - mtx_lock(m_mtx); - } + vm_page_change_lock(m, &m_mtx); retry: if (m->wire_count != 0 || m->hold_count != 0) error = EBUSY; @@ -2330,12 +2397,7 @@ retry: * The new page must be deactivated * before the object is unlocked. */ - new_mtx = vm_page_lockptr(m_new); - if (m_mtx != new_mtx) { - mtx_unlock(m_mtx); - m_mtx = new_mtx; - mtx_lock(m_mtx); - } + vm_page_change_lock(m_new, &m_mtx); vm_page_deactivate(m_new); } else { m->flags &= ~PG_ZERO; @@ -2378,13 +2440,7 @@ unlock: mtx_lock(&vm_page_queue_free_mtx); do { SLIST_REMOVE_HEAD(&free, plinks.s.ss); - vm_phys_freecnt_adj(m, 1); -#if VM_NRESERVLEVEL > 0 - if (!vm_reserv_free_page(m)) -#else - if (true) -#endif - vm_phys_free_pages(m, 0); + vm_page_free_phys(m); } while ((m = SLIST_FIRST(&free)) != NULL); vm_page_zero_idle_wakeup(); vm_page_free_wakeup(); @@ -2514,11 +2570,11 @@ vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high, * Sleep until free pages are available for allocation. * - Called in various places before memory allocations. */ -void -vm_wait(void) +static void +_vm_wait(void) { - mtx_lock(&vm_page_queue_free_mtx); + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); if (curproc == pageproc) { vm_pageout_pages_needed = 1; msleep(&vm_pageout_pages_needed, &vm_page_queue_free_mtx, @@ -2536,6 +2592,46 @@ vm_wait(void) } } +void +vm_wait(void) +{ + + mtx_lock(&vm_page_queue_free_mtx); + _vm_wait(); +} + +/* + * vm_page_alloc_fail: + * + * Called when a page allocation function fails. Informs the + * pagedaemon and performs the requested wait. Requires the + * page_queue_free and object lock on entry. Returns with the + * object lock held and free lock released. Returns an error when + * retry is necessary. + * + */ +static int +vm_page_alloc_fail(vm_object_t object, int req) +{ + + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + + atomic_add_int(&vm_pageout_deficit, + max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); + pagedaemon_wakeup(); + if (req & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) { + if (object != NULL) + VM_OBJECT_WUNLOCK(object); + _vm_wait(); + if (object != NULL) + VM_OBJECT_WLOCK(object); + if (req & VM_ALLOC_WAITOK) + return (EAGAIN); + } else + mtx_unlock(&vm_page_queue_free_mtx); + return (0); +} + /* * vm_waitpfault: (also see VM_WAITPFAULT macro) * @@ -2721,7 +2817,7 @@ vm_page_activate(vm_page_t m) * * The page queues must be locked. */ -static inline void +static void vm_page_free_wakeup(void) { @@ -2747,17 +2843,30 @@ vm_page_free_wakeup(void) } /* - * vm_page_free_toq: + * vm_page_free_prep: * - * Returns the given page to the free list, - * disassociating it with any VM object. + * Prepares the given page to be put on the free list, + * disassociating it from any VM object. The caller may return + * the page to the free list only if this function returns true. * - * The object must be locked. The page must be locked if it is managed. + * The object must be locked. The page must be locked if it is + * managed. For a queued managed page, the pagequeue_locked + * argument specifies whether the page queue is already locked. */ -void -vm_page_free_toq(vm_page_t m) +bool +vm_page_free_prep(vm_page_t m, bool pagequeue_locked) { +#if defined(DIAGNOSTIC) && defined(PHYS_TO_DMAP) + if ((m->flags & PG_ZERO) != 0) { + uint64_t *p; + int i; + p = (uint64_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); + for (i = 0; i < PAGE_SIZE / sizeof(uint64_t); i++, p++) + KASSERT(*p == 0, ("vm_page_free_prep %p PG_ZERO %d %jx", + m, i, (uintmax_t)*p)); + } +#endif if ((m->oflags & VPO_UNMANAGED) == 0) { vm_page_lock_assert(m, MA_OWNED); KASSERT(!pmap_page_is_mapped(m), @@ -2776,16 +2885,20 @@ vm_page_free_toq(vm_page_t m) * callback routine until after we've put the page on the * appropriate free queue. */ - vm_page_remque(m); + if (m->queue != PQ_NONE) { + if (pagequeue_locked) + vm_page_dequeue_locked(m); + else + vm_page_dequeue(m); + } vm_page_remove(m); /* * If fictitious remove object association and * return, otherwise delay object association removal. */ - if ((m->flags & PG_FICTITIOUS) != 0) { - return; - } + if ((m->flags & PG_FICTITIOUS) != 0) + return (false); m->valid = 0; vm_page_undirty(m); @@ -2797,32 +2910,72 @@ vm_page_free_toq(vm_page_t m) KASSERT((m->flags & PG_UNHOLDFREE) == 0, ("vm_page_free: freeing PG_UNHOLDFREE page %p", m)); m->flags |= PG_UNHOLDFREE; - } else { - /* - * Restore the default memory attribute to the page. - */ - if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT) - pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT); + return (false); + } - /* - * Insert the page into the physical memory allocator's free - * page queues. - */ - mtx_lock(&vm_page_queue_free_mtx); - vm_phys_freecnt_adj(m, 1); + /* + * Restore the default memory attribute to the page. + */ + if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT) + pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT); + + return (true); +} + +/* + * Insert the page into the physical memory allocator's free page + * queues. This is the last step to free a page. + */ +static void +vm_page_free_phys(vm_page_t m) +{ + + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + + vm_phys_freecnt_adj(m, 1); #if VM_NRESERVLEVEL > 0 - if (!vm_reserv_free_page(m)) -#else - if (TRUE) + if (!vm_reserv_free_page(m)) #endif vm_phys_free_pages(m, 0); - if ((m->flags & PG_ZERO) != 0) - ++vm_page_zero_count; - else - vm_page_zero_idle_wakeup(); - vm_page_free_wakeup(); - mtx_unlock(&vm_page_queue_free_mtx); - } + if ((m->flags & PG_ZERO) != 0) + ++vm_page_zero_count; + else + vm_page_zero_idle_wakeup(); +} + +void +vm_page_free_phys_pglist(struct pglist *tq) +{ + vm_page_t m; + + if (TAILQ_EMPTY(tq)) + return; + mtx_lock(&vm_page_queue_free_mtx); + TAILQ_FOREACH(m, tq, listq) + vm_page_free_phys(m); + vm_page_free_wakeup(); + mtx_unlock(&vm_page_queue_free_mtx); +} + +/* + * vm_page_free_toq: + * + * Returns the given page to the free list, disassociating it + * from any VM object. + * + * The object must be locked. The page must be locked if it is + * managed. + */ +void +vm_page_free_toq(vm_page_t m) +{ + + if (!vm_page_free_prep(m, false)) + return; + mtx_lock(&vm_page_queue_free_mtx); + vm_page_free_phys(m); + vm_page_free_wakeup(); + mtx_unlock(&vm_page_queue_free_mtx); } /* @@ -3005,29 +3158,31 @@ vm_page_launder(vm_page_t m) * vm_page_try_to_free() * * Attempt to free the page. If we cannot free it, we do nothing. - * 1 is returned on success, 0 on failure. + * true is returned on success, false on failure. */ -int +bool vm_page_try_to_free(vm_page_t m) { - vm_page_lock_assert(m, MA_OWNED); + vm_page_assert_locked(m); if (m->object != NULL) VM_OBJECT_ASSERT_WLOCKED(m->object); - if (m->dirty || m->hold_count || m->wire_count || + if (m->dirty != 0 || m->hold_count != 0 || m->wire_count != 0 || (m->oflags & VPO_UNMANAGED) != 0 || vm_page_busied(m)) - return (0); - pmap_remove_all(m); - if (m->dirty) - return (0); + return (false); + if (m->object != NULL && m->object->ref_count != 0) { + pmap_remove_all(m); + if (m->dirty != 0) + return (false); + } vm_page_free(m); - return (1); + return (true); } /* * vm_page_advise * - * Deactivate or do nothing, as appropriate. + * Apply the specified advice to the given page. * * The object and page must be locked. */ @@ -3045,8 +3200,11 @@ vm_page_advise(vm_page_t m, int advice) * would result in a page fault on a later access. */ vm_page_undirty(m); - else if (advice != MADV_DONTNEED) + else if (advice != MADV_DONTNEED) { + if (advice == MADV_WILLNEED) + vm_page_activate(m); return; + } /* * Clear any references to the page. Otherwise, the page daemon will @@ -3085,11 +3243,16 @@ vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) { vm_page_t m; int sleep; + int pflags; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 || (allocflags & VM_ALLOC_IGN_SBUSY) != 0, ("vm_page_grab: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch")); + pflags = allocflags & + ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL); + if ((allocflags & VM_ALLOC_NOWAIT) == 0) + pflags |= VM_ALLOC_WAITFAIL; retrylookup: if ((m = vm_page_lookup(object, pindex)) != NULL) { sleep = (allocflags & VM_ALLOC_IGN_SBUSY) != 0 ? @@ -3123,13 +3286,10 @@ retrylookup: return (m); } } - m = vm_page_alloc(object, pindex, allocflags); + m = vm_page_alloc(object, pindex, pflags); if (m == NULL) { if ((allocflags & VM_ALLOC_NOWAIT) != 0) return (NULL); - VM_OBJECT_WUNLOCK(object); - VM_WAIT; - VM_OBJECT_WLOCK(object); goto retrylookup; } if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0) @@ -3138,6 +3298,114 @@ retrylookup: } /* + * Return the specified range of pages from the given object. For each + * page offset within the range, if a page already exists within the object + * at that offset and it is busy, then wait for it to change state. If, + * instead, the page doesn't exist, then allocate it. + * + * The caller must always specify an allocation class. + * + * allocation classes: + * VM_ALLOC_NORMAL normal process request + * VM_ALLOC_SYSTEM system *really* needs the pages + * + * The caller must always specify that the pages are to be busied and/or + * wired. + * + * optional allocation flags: + * VM_ALLOC_IGN_SBUSY do not sleep on soft busy pages + * VM_ALLOC_NOBUSY do not exclusive busy the page + * VM_ALLOC_NOWAIT do not sleep + * VM_ALLOC_SBUSY set page to sbusy state + * VM_ALLOC_WIRED wire the pages + * VM_ALLOC_ZERO zero and validate any invalid pages + * + * If VM_ALLOC_NOWAIT is not specified, this routine may sleep. Otherwise, it + * may return a partial prefix of the requested range. + */ +int +vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags, + vm_page_t *ma, int count) +{ + vm_page_t m, mpred; + int pflags; + int i; + bool sleep; + + VM_OBJECT_ASSERT_WLOCKED(object); + KASSERT(((u_int)allocflags >> VM_ALLOC_COUNT_SHIFT) == 0, + ("vm_page_grap_pages: VM_ALLOC_COUNT() is not allowed")); + KASSERT((allocflags & VM_ALLOC_NOBUSY) == 0 || + (allocflags & VM_ALLOC_WIRED) != 0, + ("vm_page_grab_pages: the pages must be busied or wired")); + KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 || + (allocflags & VM_ALLOC_IGN_SBUSY) != 0, + ("vm_page_grab_pages: VM_ALLOC_SBUSY/IGN_SBUSY mismatch")); + if (count == 0) + return (0); + pflags = allocflags & ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | + VM_ALLOC_WAITFAIL | VM_ALLOC_IGN_SBUSY); + if ((allocflags & VM_ALLOC_NOWAIT) == 0) + pflags |= VM_ALLOC_WAITFAIL; + i = 0; +retrylookup: + m = vm_radix_lookup_le(&object->rtree, pindex + i); + if (m == NULL || m->pindex != pindex + i) { + mpred = m; + m = NULL; + } else + mpred = TAILQ_PREV(m, pglist, listq); + for (; i < count; i++) { + if (m != NULL) { + sleep = (allocflags & VM_ALLOC_IGN_SBUSY) != 0 ? + vm_page_xbusied(m) : vm_page_busied(m); + if (sleep) { + if ((allocflags & VM_ALLOC_NOWAIT) != 0) + break; + /* + * Reference the page before unlocking and + * sleeping so that the page daemon is less + * likely to reclaim it. + */ + vm_page_aflag_set(m, PGA_REFERENCED); + vm_page_lock(m); + VM_OBJECT_WUNLOCK(object); + vm_page_busy_sleep(m, "grbmaw", (allocflags & + VM_ALLOC_IGN_SBUSY) != 0); + VM_OBJECT_WLOCK(object); + goto retrylookup; + } + if ((allocflags & VM_ALLOC_WIRED) != 0) { + vm_page_lock(m); + vm_page_wire(m); + vm_page_unlock(m); + } + if ((allocflags & (VM_ALLOC_NOBUSY | + VM_ALLOC_SBUSY)) == 0) + vm_page_xbusy(m); + if ((allocflags & VM_ALLOC_SBUSY) != 0) + vm_page_sbusy(m); + } else { + m = vm_page_alloc_after(object, pindex + i, + pflags | VM_ALLOC_COUNT(count - i), mpred); + if (m == NULL) { + if ((allocflags & VM_ALLOC_NOWAIT) != 0) + break; + goto retrylookup; + } + } + if (m->valid == 0 && (allocflags & VM_ALLOC_ZERO) != 0) { + if ((m->flags & PG_ZERO) == 0) + pmap_zero_page(m); + m->valid = VM_PAGE_BITS_ALL; + } + ma[i] = mpred = m; + m = vm_page_next(m); + } + return (i); +} + +/* * Mapping function for valid or dirty bits in a page. * * Inputs are required to range within a page. @@ -3455,16 +3723,17 @@ vm_page_is_valid(vm_page_t m, int base, int size) } /* - * vm_page_ps_is_valid: - * - * Returns TRUE if the entire (super)page is valid and FALSE otherwise. + * Returns true if all of the specified predicates are true for the entire + * (super)page and false otherwise. */ -boolean_t -vm_page_ps_is_valid(vm_page_t m) +bool +vm_page_ps_test(vm_page_t m, int flags, vm_page_t skip_m) { + vm_object_t object; int i, npages; - VM_OBJECT_ASSERT_LOCKED(m->object); + object = m->object; + VM_OBJECT_ASSERT_LOCKED(object); npages = atop(pagesizes[m->psind]); /* @@ -3473,10 +3742,28 @@ vm_page_ps_is_valid(vm_page_t m) * occupy adjacent entries in vm_page_array[]. */ for (i = 0; i < npages; i++) { - if (m[i].valid != VM_PAGE_BITS_ALL) - return (FALSE); + /* Always test object consistency, including "skip_m". */ + if (m[i].object != object) + return (false); + if (&m[i] == skip_m) + continue; + if ((flags & PS_NONE_BUSY) != 0 && vm_page_busied(&m[i])) + return (false); + if ((flags & PS_ALL_DIRTY) != 0) { + /* + * Calling vm_page_test_dirty() or pmap_is_modified() + * might stop this case from spuriously returning + * "false". However, that would require a write lock + * on the object containing "m[i]". + */ + if (m[i].dirty != VM_PAGE_BITS_ALL) + return (false); + } + if ((flags & PS_ALL_VALID) != 0 && + m[i].valid != VM_PAGE_BITS_ALL) + return (false); } - return (TRUE); + return (true); } /* diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 1ee8dde..f9db153 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -387,6 +387,9 @@ vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa); * vm_page_alloc_freelist(). Some functions support only a subset * of the flags, and ignore others, see the flags legend. * + * The meaning of VM_ALLOC_ZERO differs slightly between the vm_page_alloc*() + * and the vm_page_grab*() functions. See these functions for details. + * * Bits 0 - 1 define class. * Bits 2 - 15 dedicated for flags. * Legend: @@ -394,6 +397,7 @@ vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa); * (c) - vm_page_alloc_contig() supports the flag. * (f) - vm_page_alloc_freelist() supports the flag. * (g) - vm_page_grab() supports the flag. + * (p) - vm_page_grab_pages() supports the flag. * Bits above 15 define the count of additional pages that the caller * intends to allocate. */ @@ -401,16 +405,18 @@ vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa); #define VM_ALLOC_INTERRUPT 1 #define VM_ALLOC_SYSTEM 2 #define VM_ALLOC_CLASS_MASK 3 -#define VM_ALLOC_WIRED 0x0020 /* (acfg) Allocate non pageable page */ -#define VM_ALLOC_ZERO 0x0040 /* (acfg) Try to obtain a zeroed page */ +#define VM_ALLOC_WAITOK 0x0008 /* (acf) Sleep and retry */ +#define VM_ALLOC_WAITFAIL 0x0010 /* (acf) Sleep and return error */ +#define VM_ALLOC_WIRED 0x0020 /* (acfgp) Allocate a wired page */ +#define VM_ALLOC_ZERO 0x0040 /* (acfgp) Allocate a prezeroed page */ #define VM_ALLOC_NOOBJ 0x0100 /* (acg) No associated object */ -#define VM_ALLOC_NOBUSY 0x0200 /* (acg) Do not busy the page */ +#define VM_ALLOC_NOBUSY 0x0200 /* (acgp) Do not excl busy the page */ #define VM_ALLOC_IFCACHED 0x0400 #define VM_ALLOC_IFNOTCACHED 0x0800 -#define VM_ALLOC_IGN_SBUSY 0x1000 /* (g) Ignore shared busy flag */ +#define VM_ALLOC_IGN_SBUSY 0x1000 /* (gp) Ignore shared busy flag */ #define VM_ALLOC_NODUMP 0x2000 /* (ag) don't include in dump */ -#define VM_ALLOC_SBUSY 0x4000 /* (acg) Shared busy the page */ -#define VM_ALLOC_NOWAIT 0x8000 /* (g) Do not sleep, return NULL */ +#define VM_ALLOC_SBUSY 0x4000 /* (acgp) Shared busy the page */ +#define VM_ALLOC_NOWAIT 0x8000 /* (acfgp) Do not sleep */ #define VM_ALLOC_COUNT_SHIFT 16 #define VM_ALLOC_COUNT(count) ((count) << VM_ALLOC_COUNT_SHIFT) @@ -429,10 +435,26 @@ malloc2vm_flags(int malloc_flags) pflags |= VM_ALLOC_ZERO; if ((malloc_flags & M_NODUMP) != 0) pflags |= VM_ALLOC_NODUMP; + if ((malloc_flags & M_NOWAIT)) + pflags |= VM_ALLOC_NOWAIT; + if ((malloc_flags & M_WAITOK)) + pflags |= VM_ALLOC_WAITOK; return (pflags); } #endif +/* + * Predicates supported by vm_page_ps_test(): + * + * PS_ALL_DIRTY is true only if the entire (super)page is dirty. + * However, it can be spuriously false when the (super)page has become + * dirty in the pmap but that information has not been propagated to the + * machine-independent layer. + */ +#define PS_ALL_DIRTY 0x1 +#define PS_ALL_VALID 0x2 +#define PS_NONE_BUSY 0x4 + void vm_page_busy_downgrade(vm_page_t m); void vm_page_busy_sleep(vm_page_t m, const char *msg, bool nonshared); void vm_page_flash(vm_page_t m); @@ -443,18 +465,23 @@ void vm_page_free_zero(vm_page_t m); void vm_page_activate (vm_page_t); void vm_page_advise(vm_page_t m, int advice); -vm_page_t vm_page_alloc (vm_object_t, vm_pindex_t, int); +vm_page_t vm_page_alloc(vm_object_t, vm_pindex_t, int); +vm_page_t vm_page_alloc_after(vm_object_t, vm_pindex_t, int, vm_page_t); vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr); vm_page_t vm_page_alloc_freelist(int, int); +void vm_page_change_lock(vm_page_t m, struct mtx **mtx); vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int); -int vm_page_try_to_free (vm_page_t); +int vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags, + vm_page_t *ma, int count); void vm_page_deactivate (vm_page_t); void vm_page_deactivate_noreuse(vm_page_t); void vm_page_dequeue(vm_page_t m); void vm_page_dequeue_locked(vm_page_t m); vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t); +void vm_page_free_phys_pglist(struct pglist *tq); +bool vm_page_free_prep(vm_page_t m, bool pagequeue_locked); vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr); void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t); @@ -464,7 +491,7 @@ vm_page_t vm_page_next(vm_page_t m); int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *); struct vm_pagequeue *vm_page_pagequeue(vm_page_t m); vm_page_t vm_page_prev(vm_page_t m); -boolean_t vm_page_ps_is_valid(vm_page_t m); +bool vm_page_ps_test(vm_page_t m, int flags, vm_page_t skip_m); void vm_page_putfake(vm_page_t m); void vm_page_readahead_finish(vm_page_t m); bool vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, @@ -483,6 +510,7 @@ void vm_page_set_valid_range(vm_page_t m, int base, int size); int vm_page_sleep_if_busy(vm_page_t m, const char *msg); vm_offset_t vm_page_startup(vm_offset_t vaddr); void vm_page_sunbusy(vm_page_t m); +bool vm_page_try_to_free(vm_page_t m); int vm_page_trysbusy(vm_page_t m); void vm_page_unhold_pages(vm_page_t *ma, int count); boolean_t vm_page_unwire(vm_page_t m, uint8_t queue); diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index a8a19f3..199f2d9 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -250,8 +250,7 @@ static u_int vm_background_launder_max = 20 * 1024; SYSCTL_UINT(_vm, OID_AUTO, background_launder_max, CTLFLAG_RW, &vm_background_launder_max, 0, "background laundering cap, in kilobytes"); -#define VM_PAGEOUT_PAGE_COUNT 16 -int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT; +int vm_pageout_page_count = 32; int vm_page_max_wired; /* XXX max # of wired pages system-wide */ SYSCTL_INT(_vm, OID_AUTO, max_wired, diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c index 45d0c27..3625b41 100644 --- a/sys/vm/vm_pager.c +++ b/sys/vm/vm_pager.c @@ -165,7 +165,7 @@ struct pagerops *pagertab[] = { * cleaning requests (NPENDINGIO == 64) * the maximum swap cluster size * (MAXPHYS == 64k) if you want to get the most efficiency. */ -struct mtx_padalign pbuf_mtx; +struct mtx_padalign __exclusive_cache_line pbuf_mtx; static TAILQ_HEAD(swqueue, buf) bswlist; static int bswneeded; vm_offset_t swapbkva; /* swap buffers kva */ diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c index 484417b..60b452f 100644 --- a/sys/vm/vm_phys.c +++ b/sys/vm/vm_phys.c @@ -175,7 +175,6 @@ static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, vm_paddr_t boundary); static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); -static int vm_phys_paddr_to_segind(vm_paddr_t pa); static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order); @@ -731,35 +730,6 @@ vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order) } /* - * Initialize a physical page and add it to the free lists. - */ -void -vm_phys_add_page(vm_paddr_t pa) -{ - vm_page_t m; - struct vm_domain *vmd; - - vm_cnt.v_page_count++; - m = vm_phys_paddr_to_vm_page(pa); - m->busy_lock = VPB_UNBUSIED; - m->phys_addr = pa; - m->queue = PQ_NONE; - m->segind = vm_phys_paddr_to_segind(pa); - vmd = vm_phys_domain(m); - vmd->vmd_page_count++; - vmd->vmd_segs |= 1UL << m->segind; - KASSERT(m->order == VM_NFREEORDER, - ("vm_phys_add_page: page %p has unexpected order %d", - m, m->order)); - m->pool = VM_FREEPOOL_DEFAULT; - pmap_page_init(m); - mtx_lock(&vm_page_queue_free_mtx); - vm_phys_freecnt_adj(m, 1); - vm_phys_free_pages(m, 0); - mtx_unlock(&vm_page_queue_free_mtx); -} - -/* * Allocate a contiguous, power of two-sized set of physical pages * from the free lists. * @@ -912,6 +882,7 @@ vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, { long i; + bzero(range, page_count * sizeof(*range)); for (i = 0; i < page_count; i++) { vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); range[i].oflags &= ~VPO_UNMANAGED; @@ -986,7 +957,7 @@ vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, alloc: #endif fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, - M_WAITOK | M_ZERO); + M_WAITOK); #ifdef VM_PHYSSEG_DENSE } #endif @@ -1067,24 +1038,6 @@ vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) } /* - * Find the segment containing the given physical address. - */ -static int -vm_phys_paddr_to_segind(vm_paddr_t pa) -{ - struct vm_phys_seg *seg; - int segind; - - for (segind = 0; segind < vm_phys_nsegs; segind++) { - seg = &vm_phys_segs[segind]; - if (pa >= seg->start && pa < seg->end) - return (segind); - } - panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" , - (uintmax_t)pa); -} - -/* * Free a contiguous, power of two-sized set of physical pages. * * The free page queues must be locked. diff --git a/sys/vm/vm_phys.h b/sys/vm/vm_phys.h index ee4aa2d..c5dd58d 100644 --- a/sys/vm/vm_phys.h +++ b/sys/vm/vm_phys.h @@ -69,7 +69,6 @@ extern int vm_phys_nsegs; /* * The following functions are only to be used by the virtual memory system. */ -void vm_phys_add_page(vm_paddr_t pa); void vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end); vm_page_t vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary); diff --git a/sys/vm/vm_radix.c b/sys/vm/vm_radix.c index 4f0a575..546d316 100644 --- a/sys/vm/vm_radix.c +++ b/sys/vm/vm_radix.c @@ -310,7 +310,7 @@ SYSINIT(vm_radix_reserve_kva, SI_SUB_KMEM, SI_ORDER_THIRD, * Initialize the UMA slab zone. */ void -vm_radix_init(void) +vm_radix_zinit(void) { vm_radix_node_zone = uma_zcreate("RADIX NODE", @@ -775,6 +775,12 @@ vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage) panic("%s: original replacing page not found", __func__); } +void +vm_radix_wait(void) +{ + uma_zwait(vm_radix_node_zone); +} + #ifdef DDB /* * Show details about the given radix node. diff --git a/sys/vm/vm_radix.h b/sys/vm/vm_radix.h index b8a722d..df81180 100644 --- a/sys/vm/vm_radix.h +++ b/sys/vm/vm_radix.h @@ -35,8 +35,8 @@ #ifdef _KERNEL -void vm_radix_init(void); int vm_radix_insert(struct vm_radix *rtree, vm_page_t page); +void vm_radix_wait(void); boolean_t vm_radix_is_singleton(struct vm_radix *rtree); vm_page_t vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index); vm_page_t vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index); @@ -44,6 +44,21 @@ vm_page_t vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index); void vm_radix_reclaim_allnodes(struct vm_radix *rtree); vm_page_t vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index); vm_page_t vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage); +void vm_radix_zinit(void); + +static __inline void +vm_radix_init(struct vm_radix *rtree) +{ + + rtree->rt_root = 0; +} + +static __inline boolean_t +vm_radix_is_empty(struct vm_radix *rtree) +{ + + return (rtree->rt_root == 0); +} #endif /* _KERNEL */ #endif /* !_VM_RADIX_H_ */ diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h index 04b2489..9ca3d1a 100644 --- a/sys/x86/include/specialreg.h +++ b/sys/x86/include/specialreg.h @@ -74,6 +74,7 @@ #define CR4_PCIDE 0x00020000 /* Enable Context ID */ #define CR4_XSAVE 0x00040000 /* XSETBV/XGETBV */ #define CR4_SMEP 0x00100000 /* Supervisor-Mode Execution Prevention */ +#define CR4_SMAP 0x00200000 /* Supervisor-Mode Access Prevention */ /* * Bits in AMD64 special registers. EFER is 64 bits wide. @@ -322,6 +323,13 @@ #define AMDPM_CPB 0x00000200 /* + * AMD extended function 8000_0008h ebx info (amd_extended_feature_extensions) + */ +#define AMDFEID_CLZERO 0x00000001 +#define AMDFEID_IRPERF 0x00000002 +#define AMDFEID_XSAVEERPTR 0x00000004 + +/* * AMD extended function 8000_0008h ecx info */ #define AMDID_CMP_CORES 0x000000ff @@ -362,6 +370,7 @@ #define CPUID_STDEXT_AVX512CD 0x10000000 #define CPUID_STDEXT_SHA 0x20000000 #define CPUID_STDEXT_AVX512BW 0x40000000 +#define CPUID_STDEXT_AVX512VL 0x80000000 /* * CPUID instruction 7 Structured Extended Features, leaf 0 ecx info diff --git a/sys/x86/include/x86_var.h b/sys/x86/include/x86_var.h index dc7d424..a4bb7f3 100644 --- a/sys/x86/include/x86_var.h +++ b/sys/x86/include/x86_var.h @@ -45,6 +45,7 @@ extern u_int cpu_feature2; extern u_int amd_feature; extern u_int amd_feature2; extern u_int amd_pminfo; +extern u_int amd_extended_feature_extensions; extern u_int via_feature_rng; extern u_int via_feature_xcrypt; extern u_int cpu_clflush_line_size; @@ -121,6 +122,7 @@ void dump_drop_page(vm_paddr_t); void finishidentcpu(void); void identify_cpu1(void); void identify_cpu2(void); +void identify_hypervisor(void); void initializecpu(void); void initializecpucache(void); bool fix_cpuid(void); diff --git a/sys/x86/iommu/intel_utils.c b/sys/x86/iommu/intel_utils.c index 0dcb618..96ee694 100644 --- a/sys/x86/iommu/intel_utils.c +++ b/sys/x86/iommu/intel_utils.c @@ -257,9 +257,12 @@ vm_page_t dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags) { vm_page_t m; - int zeroed; + int zeroed, aflags; zeroed = (flags & DMAR_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0; + aflags = zeroed | VM_ALLOC_NOBUSY | VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP | + ((flags & DMAR_PGF_WAITOK) != 0 ? VM_ALLOC_WAITFAIL : + VM_ALLOC_NOWAIT); for (;;) { if ((flags & DMAR_PGF_OBJL) == 0) VM_OBJECT_WLOCK(obj); @@ -269,8 +272,7 @@ dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags) VM_OBJECT_WUNLOCK(obj); break; } - m = vm_page_alloc_contig(obj, idx, VM_ALLOC_NOBUSY | - VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP | zeroed, 1, 0, + m = vm_page_alloc_contig(obj, idx, aflags, 1, 0, dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); if ((flags & DMAR_PGF_OBJL) == 0) VM_OBJECT_WUNLOCK(obj); @@ -282,11 +284,6 @@ dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags) } if ((flags & DMAR_PGF_WAITOK) == 0) break; - if ((flags & DMAR_PGF_OBJL) != 0) - VM_OBJECT_WUNLOCK(obj); - VM_WAIT; - if ((flags & DMAR_PGF_OBJL) != 0) - VM_OBJECT_WLOCK(obj); } return (m); } diff --git a/sys/x86/x86/identcpu.c b/sys/x86/x86/identcpu.c index dd58037..63cd2cb 100644 --- a/sys/x86/x86/identcpu.c +++ b/sys/x86/x86/identcpu.c @@ -92,6 +92,7 @@ u_int cpu_feature2; /* Feature flags */ u_int amd_feature; /* AMD feature flags */ u_int amd_feature2; /* AMD feature flags */ u_int amd_pminfo; /* AMD advanced power management info */ +u_int amd_extended_feature_extensions; u_int via_feature_rng; /* VIA RNG features */ u_int via_feature_xcrypt; /* VIA ACE features */ u_int cpu_high; /* Highest arg to CPUID */ @@ -963,6 +964,7 @@ printcpuinfo(void) "\035AVX512CD" "\036SHA" "\037AVX512BW" + "\040AVX512VL" ); } @@ -1012,6 +1014,16 @@ printcpuinfo(void) ); } + if (amd_extended_feature_extensions != 0) { + printf("\n " + "AMD Extended Feature Extensions ID EBX=" + "0x%b", amd_extended_feature_extensions, + "\020" + "\001CLZERO" + "\002IRPerf" + "\003XSaveErPtr"); + } + if (via_feature_rng != 0 || via_feature_xcrypt != 0) print_via_padlock_info(); @@ -1273,7 +1285,7 @@ static const char *const vm_pnames[] = { NULL }; -static void +void identify_hypervisor(void) { u_int regs[4]; @@ -1380,7 +1392,8 @@ fix_cpuid(void) * See BIOS and Kernel Developer’s Guide (BKDG) for AMD Family 15h * Models 60h-6Fh Processors, Publication # 50742. */ - if (cpu_vendor_id == CPU_VENDOR_AMD && CPUID_TO_FAMILY(cpu_id) == 0x15) { + if (vm_guest == VM_GUEST_NO && cpu_vendor_id == CPU_VENDOR_AMD && + CPUID_TO_FAMILY(cpu_id) == 0x15) { msr = rdmsr(MSR_EXTFEATURES); if ((msr & ((uint64_t)1 << 54)) == 0) { msr |= (uint64_t)1 << 54; @@ -1448,7 +1461,6 @@ finishidentcpu(void) u_char ccr3; #endif - identify_hypervisor(); cpu_vendor_id = find_cpu_vendor_id(); if (fix_cpuid()) { @@ -1496,6 +1508,7 @@ finishidentcpu(void) if (cpu_exthigh >= 0x80000008) { do_cpuid(0x80000008, regs); cpu_maxphyaddr = regs[0] & 0xff; + amd_extended_feature_extensions = regs[1]; cpu_procinfo2 = regs[2]; } else { cpu_maxphyaddr = (cpu_feature & CPUID_PAE) != 0 ? 36 : 32; |