diff options
author | jimharris <jimharris@FreeBSD.org> | 2012-01-26 15:23:45 +0000 |
---|---|---|
committer | jimharris <jimharris@FreeBSD.org> | 2012-01-26 15:23:45 +0000 |
commit | bcd0e15cf642d6e5bf78ee585ad282b0e3061864 (patch) | |
tree | 4b7b6096856cdeb36fcba0adf3f4d121a52cce21 /sys/i386 | |
parent | 0cf3f853641c2ededc243c9f2139ac6a0b681fe4 (diff) | |
parent | a479ceccdf3646c7bc3ff60796c765650b3ffc03 (diff) | |
download | FreeBSD-src-bcd0e15cf642d6e5bf78ee585ad282b0e3061864.zip FreeBSD-src-bcd0e15cf642d6e5bf78ee585ad282b0e3061864.tar.gz |
Rebase user/jimharris/isci branch from head.
Diffstat (limited to 'sys/i386')
40 files changed, 513 insertions, 277 deletions
diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC index 3c851fa..0db2bbf 100644 --- a/sys/i386/conf/GENERIC +++ b/sys/i386/conf/GENERIC @@ -61,6 +61,8 @@ options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options KBD_INSTALL_CDEV # install a CDEV entry in /dev options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options AUDIT # Security event auditing +options CAPABILITY_MODE # Capsicum capability mode +options CAPABILITIES # Capsicum capabilities options MAC # TrustedBSD MAC Framework #options KDTRACE_HOOKS # Kernel DTrace hooks options INCLUDE_CONFIG_FILE # Include this file in kernel @@ -137,7 +139,8 @@ device da # Direct Access (disks) device sa # Sequential Access (tape etc) device cd # CD device pass # Passthrough device (direct ATA/SCSI access) -device ses # SCSI Environmental Services (and SAF-TE) +device ses # Enclosure Services (SES and SAF-TE) +device ctl # CAM Target Layer # RAID controllers interfaced to the SCSI subsystem device amr # AMI MegaRAID diff --git a/sys/i386/conf/NOTES b/sys/i386/conf/NOTES index 8b69e45..0cd0080 100644 --- a/sys/i386/conf/NOTES +++ b/sys/i386/conf/NOTES @@ -706,6 +706,10 @@ device aacp # SCSI Passthrough interface (optional, CAM required) device asr # +# Highpoint RocketRAID 27xx. +device hpt27xx + +# # Highpoint RocketRAID 182x. device hptmv @@ -833,9 +837,11 @@ hint.pcf.0.irq="5" # # ichwd: Intel ICH watchdog timer # amdsbwd: AMD SB7xx watchdog timer +# viawd: VIA south bridge watchdog timer # device ichwd device amdsbwd +device viawd # # Temperature sensors: diff --git a/sys/i386/conf/PAE b/sys/i386/conf/PAE index 613b7d7..ebf010e 100644 --- a/sys/i386/conf/PAE +++ b/sys/i386/conf/PAE @@ -23,6 +23,7 @@ device ispfw # address properly may cause data corruption when used in a machine with more # than 4 gigabytes of memory. + nodevice ahb nodevice sym nodevice trm @@ -37,6 +38,8 @@ nodevice ncv nodevice nsp nodevice stg +nodevice ctl + nodevice asr nodevice dpt nodevice mly diff --git a/sys/i386/conf/XEN b/sys/i386/conf/XEN index ad453bf..61fdbb4 100644 --- a/sys/i386/conf/XEN +++ b/sys/i386/conf/XEN @@ -83,6 +83,10 @@ device md # Memory "disks" device gif # IPv6 and IPv4 tunneling device faith # IPv6-to-IPv4 relaying (translation) +# Wireless cards +options IEEE80211_SUPPORT_MESH +options AH_SUPPORT_AR5416 + # The `bpf' device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! # Note that 'bpf' is required for DHCP. diff --git a/sys/i386/conf/XENHVM b/sys/i386/conf/XENHVM new file mode 100644 index 0000000..672716b --- /dev/null +++ b/sys/i386/conf/XENHVM @@ -0,0 +1,24 @@ +# +# XENHVM -- Xen HVM kernel configuration file for FreeBSD/i386 +# +# $FreeBSD$ +# +include GENERIC +ident XENHVM + +makeoptions MODULES_OVERRIDE="" + +# +# Adaptive locks rely on a lock-free pointer read to determine the run state +# of the thread holding a lock when under contention; under a virtualisation +# system, the thread run state may not accurately reflect whether the thread +# (or rather its host VCPU) is actually executing. As such, disable this +# optimisation. +# +options NO_ADAPTIVE_MUTEXES +options NO_ADAPTIVE_RWLOCKS +options NO_ADAPTIVE_SX + +# Xen HVM support +options XENHVM +device xenpci diff --git a/sys/i386/i386/locore.s b/sys/i386/i386/locore.s index 107cbcf..68cb430 100644 --- a/sys/i386/i386/locore.s +++ b/sys/i386/i386/locore.s @@ -775,8 +775,7 @@ no_kernend: * if we've enabled PSE above, we'll just switch the corresponding kernel * PDEs before we turn on paging. * - * XXX: We waste some pages here in the PSE case! DON'T BLINDLY REMOVE - * THIS! SMP needs the page table to be there to map the kernel P==V. + * XXX: We waste some pages here in the PSE case! */ xorl %eax, %eax movl R(KERNend),%ecx diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index c7352d9..126b618 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -653,8 +653,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) sdp = &td->td_pcb->pcb_gsd; sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; - bzero(sf.sf_uc.uc_mcontext.mc_spare1, - sizeof(sf.sf_uc.uc_mcontext.mc_spare1)); + sf.sf_uc.uc_mcontext.mc_flags = 0; bzero(sf.sf_uc.uc_mcontext.mc_spare2, sizeof(sf.sf_uc.uc_mcontext.mc_spare2)); bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); @@ -2369,10 +2368,13 @@ physmap_done: Maxmem = atop(physmap[physmap_idx + 1]); /* - * By default keep the memtest enabled. Use a general name so that + * By default enable the memory test on real hardware, and disable + * it if we appear to be running in a VM. This avoids touching all + * pages unnecessarily, which doesn't matter on real hardware but is + * bad for shared VM hosts. Use a general name so that * one could eventually do more with the code than just disable it. */ - memtest = 1; + memtest = (vm_guest > VM_GUEST_NO) ? 0 : 1; TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); if (atop(physmap[physmap_idx + 1]) != Maxmem && @@ -3379,7 +3381,7 @@ get_mcontext(struct thread *td, mcontext_t *mcp, int flags) mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; sdp = &td->td_pcb->pcb_gsd; mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; - bzero(mcp->mc_spare1, sizeof(mcp->mc_spare1)); + mcp->mc_flags = 0; bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2)); return (0); } diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index 11d23ba..b67a39f 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -145,9 +145,6 @@ static int bootAP; void *bootstacks[MAXCPU]; static void *dpcpu; -/* Hotwire a 0->4MB V==P mapping */ -extern pt_entry_t *KPTphys; - struct pcb stoppcbs[MAXCPU]; /* Variables needed for SMP tlb shootdown. */ @@ -931,7 +928,6 @@ start_all_aps(void) #ifndef PC98 u_char mpbiosreason; #endif - uintptr_t kptbase; u_int32_t mpbioswarmvec; int apic_id, cpu, i; @@ -949,11 +945,8 @@ start_all_aps(void) /* set up temporary P==V mapping for AP boot */ /* XXX this is a hack, we should boot the AP on its own stack/PTD */ - - kptbase = (uintptr_t)(void *)KPTphys; for (i = TMPMAP_START; i < NKPT; i++) - PTD[i] = (pd_entry_t)(PG_V | PG_RW | - ((kptbase + i * PAGE_SIZE) & PG_FRAME)); + PTD[i] = PTD[KPTDI + i]; invltlb(); /* start each AP */ diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 3c9d83b..cdafc43 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -330,7 +330,7 @@ static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); -static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags); +static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags); static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free); static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va); static void pmap_pte_release(pt_entry_t *pte); @@ -340,6 +340,8 @@ static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int w #endif static void pmap_set_pg(void); +static __inline void pagezero(void *page); + CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); @@ -1216,7 +1218,7 @@ pmap_is_current(pmap_t pmap) { return (pmap == kernel_pmap || - (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) && + (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) && (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME))); } @@ -1366,8 +1368,8 @@ retry: if (pde != 0) { if (pde & PG_PS) { if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { - if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | - (va & PDRMASK), &pa)) + if (vm_page_pa_tryrelock(pmap, (pde & + PG_PS_FRAME) | (va & PDRMASK), &pa)) goto retry; m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | (va & PDRMASK)); @@ -1379,7 +1381,8 @@ retry: pmap_pte_release(ptep); if (pte != 0 && ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) + if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, + &pa)) goto retry; m = PHYS_TO_VM_PAGE(pte & PG_FRAME); vm_page_hold(m); @@ -1450,12 +1453,40 @@ vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) { vm_offset_t va, sva; + vm_paddr_t superpage_offset; + pd_entry_t newpde; - va = sva = *virt; + va = *virt; + /* + * Does the physical address range's size and alignment permit at + * least one superpage mapping to be created? + */ + superpage_offset = start & PDRMASK; + if ((end - start) - ((NBPDR - superpage_offset) & PDRMASK) >= NBPDR) { + /* + * Increase the starting virtual address so that its alignment + * does not preclude the use of superpage mappings. + */ + if ((va & PDRMASK) < superpage_offset) + va = (va & ~PDRMASK) + superpage_offset; + else if ((va & PDRMASK) > superpage_offset) + va = ((va + PDRMASK) & ~PDRMASK) + superpage_offset; + } + sva = va; while (start < end) { - pmap_kenter(va, start); - va += PAGE_SIZE; - start += PAGE_SIZE; + if ((start & PDRMASK) == 0 && end - start >= NBPDR && + pseflag) { + KASSERT((va & PDRMASK) == 0, + ("pmap_map: misaligned va %#x", va)); + newpde = start | PG_PS | pgeflag | PG_RW | PG_V; + pmap_kenter_pde(va, newpde); + va += NBPDR; + start += NBPDR; + } else { + pmap_kenter(va, start); + va += PAGE_SIZE; + start += PAGE_SIZE; + } } pmap_invalidate_range(kernel_pmap, sva, va); *virt = va; @@ -1731,7 +1762,6 @@ pmap_pinit(pmap_t pmap) if (pmap->pm_pdir == NULL) { pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map, NBPTD); - if (pmap->pm_pdir == NULL) { PMAP_LOCK_DESTROY(pmap); return (0); @@ -1764,10 +1794,9 @@ pmap_pinit(pmap_t pmap) pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); - for (i = 0; i < NPGPTD; i++) { + for (i = 0; i < NPGPTD; i++) if ((ptdpg[i]->flags & PG_ZERO) == 0) - bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE); - } + pagezero(pmap->pm_pdir + (i * NPDEPG)); mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); @@ -1796,7 +1825,7 @@ pmap_pinit(pmap_t pmap) * mapped correctly. */ static vm_page_t -_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags) +_pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags) { vm_paddr_t ptepa; vm_page_t m; @@ -1844,7 +1873,7 @@ _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags) static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) { - unsigned ptepindex; + u_int ptepindex; pd_entry_t ptepa; vm_page_t m; @@ -1992,7 +2021,7 @@ pmap_lazyfix(pmap_t pmap) cr3 = vtophys(pmap->pm_pdir); if (cr3 == rcr3()) { load_cr3(PCPU_GET(curpcb)->pcb_cr3); - CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active); + CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active); } } #endif /* SMP */ @@ -2821,7 +2850,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) } for (; sva < eva; sva = pdnxt) { - unsigned pdirindex; + u_int pdirindex; /* * Calculate index for next page table. @@ -3042,7 +3071,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) PMAP_LOCK(pmap); for (; sva < eva; sva = pdnxt) { pt_entry_t obits, pbits; - unsigned pdirindex; + u_int pdirindex; pdnxt = (sva + NBPDR) & ~PDRMASK; if (pdnxt < sva) @@ -3568,7 +3597,7 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, m = TAILQ_NEXT(m, listq); } vm_page_unlock_queues(); - PMAP_UNLOCK(pmap); + PMAP_UNLOCK(pmap); } /* @@ -3610,7 +3639,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { - unsigned ptepindex; + u_int ptepindex; pd_entry_t ptepa; /* @@ -3876,7 +3905,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, pt_entry_t *src_pte, *dst_pte; vm_page_t dstmpte, srcmpte; pd_entry_t srcptepaddr; - unsigned ptepindex; + u_int ptepindex; KASSERT(addr < UPT_MIN_ADDRESS, ("pmap_copy: invalid to pmap_copy page tables")); @@ -5216,7 +5245,7 @@ pmap_pid_dump(int pid) #if defined(DEBUG) static void pads(pmap_t pm); -void pmap_pvdump(vm_offset_t pa); +void pmap_pvdump(vm_paddr_t pa); /* print address space of pmap*/ static void diff --git a/sys/i386/i386/sys_machdep.c b/sys/i386/i386/sys_machdep.c index beffff2..374690f 100644 --- a/sys/i386/i386/sys_machdep.c +++ b/sys/i386/i386/sys_machdep.c @@ -553,7 +553,7 @@ user_ldt_free(struct thread *td) return; } - if (td == PCPU_GET(curthread)) { + if (td == curthread) { #ifdef XEN i386_reset_ldt(&default_proc_ldt); PCPU_SET(currentldt, (int)&default_proc_ldt); diff --git a/sys/i386/ibcs2/coff.h b/sys/i386/ibcs2/coff.h index a0eecd9..5527515 100644 --- a/sys/i386/ibcs2/coff.h +++ b/sys/i386/ibcs2/coff.h @@ -1,6 +1,6 @@ /*- * Copyright (c) 1994 Sean Eric Fagan - * Copyright (c) 1994 Søren Schmidt + * Copyright (c) 1994 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/sys/i386/ibcs2/ibcs2_isc.c b/sys/i386/ibcs2/ibcs2_isc.c index adafc99..5b25c12 100644 --- a/sys/i386/ibcs2/ibcs2_isc.c +++ b/sys/i386/ibcs2/ibcs2_isc.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1994 Søren Schmidt + * Copyright (c) 1994 Søren Schmidt * Copyright (c) 1994 Sean Eric Fagan * Copyright (c) 1995 Steven Wallace * All rights reserved. diff --git a/sys/i386/ibcs2/ibcs2_socksys.c b/sys/i386/ibcs2/ibcs2_socksys.c index ce96fc9..80b1216 100644 --- a/sys/i386/ibcs2/ibcs2_socksys.c +++ b/sys/i386/ibcs2/ibcs2_socksys.c @@ -152,7 +152,7 @@ ibcs2_getipdomainname(td, uap) /* Get the domain name. */ getcredhostname(td->td_ucred, hname, sizeof(hname)); - dptr = index(hname, '.'); + dptr = strchr(hname, '.'); if ( dptr ) dptr++; else @@ -182,7 +182,7 @@ ibcs2_setipdomainname(td, uap) return EINVAL; /* Get the host's unqualified name (strip off the domain) */ - ptr = index(hname, '.'); + ptr = strchr(hname, '.'); if ( ptr != NULL ) { ptr++; *ptr = '\0'; diff --git a/sys/i386/ibcs2/ibcs2_stat.c b/sys/i386/ibcs2/ibcs2_stat.c index b61e45e..c1097a3 100644 --- a/sys/i386/ibcs2/ibcs2_stat.c +++ b/sys/i386/ibcs2/ibcs2_stat.c @@ -212,7 +212,7 @@ ibcs2_utssys(td, uap) IBCS2_UNAME_VERSION, sizeof(sut.version) - 1); getcredhostname(td->td_ucred, machine_name, sizeof(machine_name) - 1); - p = index(machine_name, '.'); + p = strchr(machine_name, '.'); if ( p ) *p = '\0'; strncpy(sut.nodename, machine_name, sizeof(sut.nodename) - 1); diff --git a/sys/i386/ibcs2/ibcs2_sysi86.c b/sys/i386/ibcs2/ibcs2_sysi86.c index 8d85bc1..17964af 100644 --- a/sys/i386/ibcs2/ibcs2_sysi86.c +++ b/sys/i386/ibcs2/ibcs2_sysi86.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1994 Søren Schmidt + * Copyright (c) 1994 Søren Schmidt * Copyright (c) 1995 Steven Wallace * All rights reserved. * diff --git a/sys/i386/ibcs2/ibcs2_xenix.c b/sys/i386/ibcs2/ibcs2_xenix.c index 74abbb1..c5416fb 100644 --- a/sys/i386/ibcs2/ibcs2_xenix.c +++ b/sys/i386/ibcs2/ibcs2_xenix.c @@ -1,6 +1,6 @@ /*- * Copyright (c) 1994 Sean Eric Fagan - * Copyright (c) 1994 Søren Schmidt + * Copyright (c) 1994 Søren Schmidt * Copyright (c) 1995 Steven Wallace * All rights reserved. * diff --git a/sys/i386/ibcs2/imgact_coff.c b/sys/i386/ibcs2/imgact_coff.c index 44a243c..abafe54 100644 --- a/sys/i386/ibcs2/imgact_coff.c +++ b/sys/i386/ibcs2/imgact_coff.c @@ -1,6 +1,6 @@ /*- * Copyright (c) 1994 Sean Eric Fagan - * Copyright (c) 1994 Søren Schmidt + * Copyright (c) 1994 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/sys/i386/include/_types.h b/sys/i386/include/_types.h index 3194fd6..9f5ab90 100644 --- a/sys/i386/include/_types.h +++ b/sys/i386/include/_types.h @@ -48,7 +48,7 @@ /* * Basic types upon which most other types are built. */ -typedef __signed char __int8_t; +typedef signed char __int8_t; typedef unsigned char __uint8_t; typedef short __int16_t; typedef unsigned short __uint16_t; diff --git a/sys/i386/include/asm.h b/sys/i386/include/asm.h index 3148bb7..7ce3d57 100644 --- a/sys/i386/include/asm.h +++ b/sys/i386/include/asm.h @@ -89,6 +89,20 @@ #define ENTRY(x) _ENTRY(x) #endif +/* + * WEAK_ALIAS: create a weak alias. + */ +#define WEAK_ALIAS(alias,sym) \ + .weak alias; \ + alias = sym + +/* + * STRONG_ALIAS: create a strong alias. + */ +#define STRONG_ALIAS(alias,sym) \ + .globl alias; \ + alias = sym + #define RCSID(x) .text; .asciz x #undef __FBSDID diff --git a/sys/i386/include/float.h b/sys/i386/include/float.h index 52b899f..2669298 100644 --- a/sys/i386/include/float.h +++ b/sys/i386/include/float.h @@ -55,6 +55,11 @@ __END_DECLS #define FLT_MAX_EXP 128 /* emax */ #define FLT_MAX 3.40282347E+38F /* (1-b**(-p))*b**emax */ #define FLT_MAX_10_EXP 38 /* floor(log10((1-b**(-p))*b**emax)) */ +#if __ISO_C_VISIBLE >= 2011 +#define FLT_TRUE_MIN 1.40129846E-45F /* b**(emin-p) */ +#define FLT_DECIMAL_DIG 9 /* ceil(1+p*log10(b)) */ +#define FLT_HAS_SUBNORM 1 +#endif /* __ISO_C_VISIBLE >= 2011 */ #define DBL_MANT_DIG 53 #define DBL_EPSILON 2.2204460492503131E-16 @@ -65,6 +70,11 @@ __END_DECLS #define DBL_MAX_EXP 1024 #define DBL_MAX 1.7976931348623157E+308 #define DBL_MAX_10_EXP 308 +#if __ISO_C_VISIBLE >= 2011 +#define DBL_TRUE_MIN 4.9406564584124654E-324 +#define DBL_DECIMAL_DIG 17 +#define DBL_HAS_SUBNORM 1 +#endif /* __ISO_C_VISIBLE >= 2011 */ #define LDBL_MANT_DIG 64 #define LDBL_EPSILON 1.0842021724855044340E-19L @@ -75,4 +85,10 @@ __END_DECLS #define LDBL_MAX_EXP 16384 #define LDBL_MAX 1.1897314953572317650E+4932L #define LDBL_MAX_10_EXP 4932 +#if __ISO_C_VISIBLE >= 2011 +#define LDBL_TRUE_MIN 3.6451995318824746025E-4951L +#define LDBL_DECIMAL_DIG 21 +#define LDBL_HAS_SUBNORM 1 +#endif /* __ISO_C_VISIBLE >= 2011 */ + #endif /* _MACHINE_FLOAT_H_ */ diff --git a/sys/i386/include/npx.h b/sys/i386/include/npx.h index 6b56bb4..932ba67 100644 --- a/sys/i386/include/npx.h +++ b/sys/i386/include/npx.h @@ -101,6 +101,11 @@ struct xmmacc { u_char xmm_bytes[16]; }; +/* Contents of the upper 16 bytes of each AVX extended accumulator */ +struct ymmacc { + uint8_t ymm_bytes[16]; +}; + struct savexmm { struct envxmm sv_env; struct { @@ -116,6 +121,28 @@ union savefpu { struct savexmm sv_xmm; }; +struct xstate_hdr { + uint64_t xstate_bv; + uint8_t xstate_rsrv0[16]; + uint8_t xstate_rsrv[40]; +}; + +struct savexmm_xstate { + struct xstate_hdr sx_hd; + struct ymmacc sx_ymm[16]; +}; + +struct savexmm_ymm { + struct envxmm sv_env; + struct { + struct fpacc87 fp_acc; + int8_t fp_pad[6]; /* padding */ + } sv_fp[8]; + struct xmmacc sv_xmm[16]; + uint8_t sv_pad[96]; + struct savexmm_xstate sv_xstate; +} __aligned(64); + /* * The hardware default control word for i387's and later coprocessors is * 0x37F, giving: @@ -138,13 +165,6 @@ union savefpu { #ifdef _KERNEL -struct fpu_kern_ctx { - union savefpu hwstate; - union savefpu *prev; - uint32_t flags; -}; -#define FPU_KERN_CTX_NPXINITDONE 0x01 - #define PCB_USER_FPU(pcb) (((pcb)->pcb_flags & PCB_KERNNPX) == 0) int npxdna(void); @@ -157,6 +177,8 @@ void npxsave(union savefpu *addr); void npxsetregs(struct thread *td, union savefpu *addr); int npxtrap(void); void npxuserinited(struct thread *); +struct fpu_kern_ctx *fpu_kern_alloc_ctx(u_int flags); +void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx); int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags); int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx); @@ -167,6 +189,7 @@ int is_fpu_kern_thread(u_int flags); * Flags for fpu_kern_enter() and fpu_kern_thread(). */ #define FPU_KERN_NORMAL 0x0000 +#define FPU_KERN_NOWAIT 0x0001 #endif diff --git a/sys/i386/include/pcaudioio.h b/sys/i386/include/pcaudioio.h index 969f910..c67d866 100644 --- a/sys/i386/include/pcaudioio.h +++ b/sys/i386/include/pcaudioio.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1994 Søren Schmidt + * Copyright (c) 1994 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/sys/i386/include/ptrace.h b/sys/i386/include/ptrace.h index 24eb411..c64d3e8 100644 --- a/sys/i386/include/ptrace.h +++ b/sys/i386/include/ptrace.h @@ -37,5 +37,7 @@ #define PT_GETXMMREGS (PT_FIRSTMACH + 0) #define PT_SETXMMREGS (PT_FIRSTMACH + 1) +#define PT_GETXSTATE (PT_FIRSTMACH + 2) +#define PT_SETXSTATE (PT_FIRSTMACH + 3) #endif diff --git a/sys/i386/include/specialreg.h b/sys/i386/include/specialreg.h index e3199f7..601b3e9 100644 --- a/sys/i386/include/specialreg.h +++ b/sys/i386/include/specialreg.h @@ -66,6 +66,7 @@ #define CR4_PCE 0x00000100 /* Performance monitoring counter enable */ #define CR4_FXSR 0x00000200 /* Fast FPU save/restore used by OS */ #define CR4_XMM 0x00000400 /* enable SIMD/MMX2 to use except 16 */ +#define CR4_XSAVE 0x00040000 /* XSETBV/XGETBV */ /* * Bits in AMD64 special registers. EFER is 64 bits wide. diff --git a/sys/i386/include/sysarch.h b/sys/i386/include/sysarch.h index 454d64c..316a5ab 100644 --- a/sys/i386/include/sysarch.h +++ b/sys/i386/include/sysarch.h @@ -47,6 +47,7 @@ #define I386_SET_FSBASE 8 #define I386_GET_GSBASE 9 #define I386_SET_GSBASE 10 +#define I386_GET_XFPUSTATE 11 /* These four only exist when running an i386 binary on amd64 */ #define _AMD64_GET_FSBASE 128 @@ -71,6 +72,11 @@ struct i386_vm86_args { char *sub_args; /* args */ }; +struct i386_get_xfpustate { + void *addr; + int len; +}; + #ifndef _KERNEL #include <sys/cdefs.h> diff --git a/sys/i386/include/ucontext.h b/sys/i386/include/ucontext.h index d9f8344..79aabd7 100644 --- a/sys/i386/include/ucontext.h +++ b/sys/i386/include/ucontext.h @@ -31,11 +31,18 @@ #ifndef _MACHINE_UCONTEXT_H_ #define _MACHINE_UCONTEXT_H_ +/* Keep _MC_* values similar to amd64 */ +#define _MC_HASSEGS 0x1 +#define _MC_HASBASES 0x2 +#define _MC_HASFPXSTATE 0x4 +#define _MC_FLAG_MASK (_MC_HASSEGS | _MC_HASBASES | _MC_HASFPXSTATE) + typedef struct __mcontext { /* - * The definition of mcontext_t shall match the layout of - * struct sigcontext after the sc_mask member. So that we can - * support sigcontext and ucontext_t at the same time. + * The definition of mcontext_t must match the layout of + * struct sigcontext after the sc_mask member. This is so + * that we can support sigcontext and ucontext_t at the same + * time. */ __register_t mc_onstack; /* XXX - sigcontext compat. */ __register_t mc_gs; /* machine state (struct trapframe) */ @@ -67,7 +74,7 @@ typedef struct __mcontext { #define _MC_FPOWNED_FPU 0x20001 /* FP state came from FPU */ #define _MC_FPOWNED_PCB 0x20002 /* FP state came from PCB */ int mc_ownedfp; - int mc_spare1[1]; /* align next field to 16 bytes */ + __register_t mc_flags; /* * See <machine/npx.h> for the internals of mc_fpstate[]. */ @@ -76,11 +83,13 @@ typedef struct __mcontext { __register_t mc_fsbase; __register_t mc_gsbase; - int mc_spare2[6]; + __register_t mc_xfpustate; + __register_t mc_xfpustate_len; + + int mc_spare2[4]; } mcontext_t; #if defined(_KERNEL) && defined(COMPAT_FREEBSD4) - struct mcontext4 { __register_t mc_onstack; /* XXX - sigcontext compat. */ __register_t mc_gs; /* machine state (struct trapframe) */ diff --git a/sys/i386/include/vmparam.h b/sys/i386/include/vmparam.h index 8fe9b2b..56ab4b3 100644 --- a/sys/i386/include/vmparam.h +++ b/sys/i386/include/vmparam.h @@ -186,11 +186,12 @@ #endif /* - * Ceiling on amount of kmem_map kva space. + * Ceiling on the amount of kmem_map KVA space: 40% of the entire KVA space + * rounded to the nearest multiple of the superpage size. */ #ifndef VM_KMEM_SIZE_MAX -#define VM_KMEM_SIZE_MAX ((VM_MAX_KERNEL_ADDRESS - \ - VM_MIN_KERNEL_ADDRESS) * 2 / 5) +#define VM_KMEM_SIZE_MAX (((((VM_MAX_KERNEL_ADDRESS - \ + VM_MIN_KERNEL_ADDRESS) >> (PDRSHIFT - 2)) + 5) / 10) << PDRSHIFT) #endif /* initial pagein size of beginning of executable file */ diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c index f314e44..0da580f 100644 --- a/sys/i386/isa/npx.c +++ b/sys/i386/isa/npx.c @@ -985,6 +985,50 @@ DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0); #endif #endif /* DEV_ISA */ +static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", + "Kernel contexts for FPU state"); + +#define XSAVE_AREA_ALIGN 64 + +#define FPU_KERN_CTX_NPXINITDONE 0x01 + +struct fpu_kern_ctx { + union savefpu *prev; + uint32_t flags; + char hwstate1[]; +}; + +struct fpu_kern_ctx * +fpu_kern_alloc_ctx(u_int flags) +{ + struct fpu_kern_ctx *res; + size_t sz; + + sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + + sizeof(union savefpu); + res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? + M_NOWAIT : M_WAITOK) | M_ZERO); + return (res); +} + +void +fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) +{ + + /* XXXKIB clear the memory ? */ + free(ctx, M_FPUKERN_CTX); +} + +static union savefpu * +fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx) +{ + vm_offset_t p; + + p = (vm_offset_t)&ctx->hwstate1; + p = roundup2(p, XSAVE_AREA_ALIGN); + return ((union savefpu *)p); +} + int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) { @@ -998,7 +1042,7 @@ fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) ctx->flags |= FPU_KERN_CTX_NPXINITDONE; npxexit(td); ctx->prev = pcb->pcb_save; - pcb->pcb_save = &ctx->hwstate; + pcb->pcb_save = fpu_kern_ctx_savefpu(ctx); pcb->pcb_flags |= PCB_KERNNPX; pcb->pcb_flags &= ~PCB_NPXINITDONE; return (0); diff --git a/sys/i386/linux/imgact_linux.c b/sys/i386/linux/imgact_linux.c index 3a66e87..3a42e81 100644 --- a/sys/i386/linux/imgact_linux.c +++ b/sys/i386/linux/imgact_linux.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1994-1996 Søren Schmidt + * Copyright (c) 1994-1996 Søren Schmidt * All rights reserved. * * Based heavily on /sys/kern/imgact_aout.c which is: diff --git a/sys/i386/linux/linux.h b/sys/i386/linux/linux.h index d02614b..8b5bcbf 100644 --- a/sys/i386/linux/linux.h +++ b/sys/i386/linux/linux.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1994-1996 Søren Schmidt + * Copyright (c) 1994-1996 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -573,6 +573,16 @@ int linux_ioctl_unregister_handler(struct linux_ioctl_handler *h); #define LINUX_F_UNLCK 2 /* + * posix_fadvise advice + */ +#define LINUX_POSIX_FADV_NORMAL 0 +#define LINUX_POSIX_FADV_RANDOM 1 +#define LINUX_POSIX_FADV_SEQUENTIAL 2 +#define LINUX_POSIX_FADV_WILLNEED 3 +#define LINUX_POSIX_FADV_DONTNEED 4 +#define LINUX_POSIX_FADV_NOREUSE 5 + +/* * mount flags */ #define LINUX_MS_RDONLY 0x0001 diff --git a/sys/i386/linux/linux_dummy.c b/sys/i386/linux/linux_dummy.c index bc300b1..054cb60 100644 --- a/sys/i386/linux/linux_dummy.c +++ b/sys/i386/linux/linux_dummy.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1994-1995 Søren Schmidt + * Copyright (c) 1994-1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -62,14 +62,12 @@ DUMMY(setfsuid); DUMMY(setfsgid); DUMMY(pivot_root); DUMMY(mincore); -DUMMY(fadvise64); DUMMY(lookup_dcookie); DUMMY(epoll_create); DUMMY(epoll_ctl); DUMMY(epoll_wait); DUMMY(remap_file_pages); DUMMY(fstatfs64); -DUMMY(fadvise64_64); DUMMY(mbind); DUMMY(get_mempolicy); DUMMY(set_mempolicy); diff --git a/sys/i386/linux/linux_proto.h b/sys/i386/linux/linux_proto.h index d480012..53c804b 100644 --- a/sys/i386/linux/linux_proto.h +++ b/sys/i386/linux/linux_proto.h @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/i386/linux/syscalls.master 227693 2011-11-19 07:19:37Z ed + * created from FreeBSD: head/sys/i386/linux/syscalls.master 228957 2011-12-29 15:34:59Z jhb */ #ifndef _LINUX_SYSPROTO_H_ @@ -756,7 +756,10 @@ struct linux_get_thread_area_args { char desc_l_[PADL_(struct l_user_desc *)]; struct l_user_desc * desc; char desc_r_[PADR_(struct l_user_desc *)]; }; struct linux_fadvise64_args { - register_t dummy; + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char offset_l_[PADL_(l_loff_t)]; l_loff_t offset; char offset_r_[PADR_(l_loff_t)]; + char len_l_[PADL_(l_size_t)]; l_size_t len; char len_r_[PADR_(l_size_t)]; + char advice_l_[PADL_(int)]; int advice; char advice_r_[PADR_(int)]; }; struct linux_exit_group_args { char error_code_l_[PADL_(int)]; int error_code; char error_code_r_[PADR_(int)]; @@ -835,7 +838,10 @@ struct linux_utimes_args { char tptr_l_[PADL_(struct l_timeval *)]; struct l_timeval * tptr; char tptr_r_[PADR_(struct l_timeval *)]; }; struct linux_fadvise64_64_args { - register_t dummy; + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char offset_l_[PADL_(l_loff_t)]; l_loff_t offset; char offset_r_[PADR_(l_loff_t)]; + char len_l_[PADL_(l_loff_t)]; l_loff_t len; char len_r_[PADR_(l_loff_t)]; + char advice_l_[PADL_(int)]; int advice; char advice_r_[PADR_(int)]; }; struct linux_mbind_args { register_t dummy; diff --git a/sys/i386/linux/linux_syscall.h b/sys/i386/linux/linux_syscall.h index 367dbf7..c206be8 100644 --- a/sys/i386/linux/linux_syscall.h +++ b/sys/i386/linux/linux_syscall.h @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/i386/linux/syscalls.master 227693 2011-11-19 07:19:37Z ed + * created from FreeBSD: head/sys/i386/linux/syscalls.master 228957 2011-12-29 15:34:59Z jhb */ #define LINUX_SYS_exit 1 diff --git a/sys/i386/linux/linux_syscalls.c b/sys/i386/linux/linux_syscalls.c index 5ebe4f9..53f7ca8 100644 --- a/sys/i386/linux/linux_syscalls.c +++ b/sys/i386/linux/linux_syscalls.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/i386/linux/syscalls.master 227693 2011-11-19 07:19:37Z ed + * created from FreeBSD: head/sys/i386/linux/syscalls.master 228957 2011-12-29 15:34:59Z jhb */ const char *linux_syscallnames[] = { diff --git a/sys/i386/linux/linux_sysent.c b/sys/i386/linux/linux_sysent.c index 27b2d3b..27481ae 100644 --- a/sys/i386/linux/linux_sysent.c +++ b/sys/i386/linux/linux_sysent.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/i386/linux/syscalls.master 227693 2011-11-19 07:19:37Z ed + * created from FreeBSD: head/sys/i386/linux/syscalls.master 228957 2011-12-29 15:34:59Z jhb */ #include <sys/param.h> @@ -268,7 +268,7 @@ struct sysent linux_sysent[] = { { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 247 = linux_io_getevents */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 248 = linux_io_submit */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 249 = linux_io_cancel */ - { 0, (sy_call_t *)linux_fadvise64, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 250 = linux_fadvise64 */ + { AS(linux_fadvise64_args), (sy_call_t *)linux_fadvise64, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 250 = linux_fadvise64 */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 251 = */ { AS(linux_exit_group_args), (sy_call_t *)linux_exit_group, AUE_EXIT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 252 = linux_exit_group */ { 0, (sy_call_t *)linux_lookup_dcookie, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 253 = linux_lookup_dcookie */ @@ -290,7 +290,7 @@ struct sysent linux_sysent[] = { { 0, (sy_call_t *)linux_fstatfs64, AUE_FSTATFS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 269 = linux_fstatfs64 */ { AS(linux_tgkill_args), (sy_call_t *)linux_tgkill, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 270 = linux_tgkill */ { AS(linux_utimes_args), (sy_call_t *)linux_utimes, AUE_UTIMES, NULL, 0, 0, 0, SY_THR_STATIC }, /* 271 = linux_utimes */ - { 0, (sy_call_t *)linux_fadvise64_64, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 272 = linux_fadvise64_64 */ + { AS(linux_fadvise64_64_args), (sy_call_t *)linux_fadvise64_64, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 272 = linux_fadvise64_64 */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 273 = */ { 0, (sy_call_t *)linux_mbind, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 274 = linux_mbind */ { 0, (sy_call_t *)linux_get_mempolicy, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 275 = linux_get_mempolicy */ diff --git a/sys/i386/linux/linux_systrace_args.c b/sys/i386/linux/linux_systrace_args.c index 4c06a7f..f9c3cdc 100644 --- a/sys/i386/linux/linux_systrace_args.c +++ b/sys/i386/linux/linux_systrace_args.c @@ -1724,7 +1724,12 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) } /* linux_fadvise64 */ case 250: { - *n_args = 0; + struct linux_fadvise64_args *p = params; + iarg[0] = p->fd; /* int */ + iarg[1] = p->offset; /* l_loff_t */ + iarg[2] = p->len; /* l_size_t */ + iarg[3] = p->advice; /* int */ + *n_args = 4; break; } /* linux_exit_group */ @@ -1873,7 +1878,12 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) } /* linux_fadvise64_64 */ case 272: { - *n_args = 0; + struct linux_fadvise64_64_args *p = params; + iarg[0] = p->fd; /* int */ + iarg[1] = p->offset; /* l_loff_t */ + iarg[2] = p->len; /* l_loff_t */ + iarg[3] = p->advice; /* int */ + *n_args = 4; break; } /* linux_mbind */ @@ -4776,6 +4786,22 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_fadvise64 */ case 250: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "l_loff_t"; + break; + case 2: + p = "l_size_t"; + break; + case 3: + p = "int"; + break; + default: + break; + }; break; /* linux_exit_group */ case 252: @@ -4985,6 +5011,22 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_fadvise64_64 */ case 272: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "l_loff_t"; + break; + case 2: + p = "l_loff_t"; + break; + case 3: + p = "int"; + break; + default: + break; + }; break; /* linux_mbind */ case 274: @@ -6414,6 +6456,9 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_fadvise64 */ case 250: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_exit_group */ case 252: if (ndx == 0 || ndx == 1) @@ -6498,6 +6543,9 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_fadvise64_64 */ case 272: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_mbind */ case 274: /* linux_get_mempolicy */ diff --git a/sys/i386/linux/linux_sysvec.c b/sys/i386/linux/linux_sysvec.c index a4443bd..7634138 100644 --- a/sys/i386/linux/linux_sysvec.c +++ b/sys/i386/linux/linux_sysvec.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1994-1996 Søren Schmidt + * Copyright (c) 1994-1996 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/sys/i386/linux/syscalls.master b/sys/i386/linux/syscalls.master index 7e7965f..618d0ad 100644 --- a/sys/i386/linux/syscalls.master +++ b/sys/i386/linux/syscalls.master @@ -421,7 +421,8 @@ 247 AUE_NULL UNIMPL linux_io_getevents 248 AUE_NULL UNIMPL linux_io_submit 249 AUE_NULL UNIMPL linux_io_cancel -250 AUE_NULL STD { int linux_fadvise64(void); } +250 AUE_NULL STD { int linux_fadvise64(int fd, l_loff_t offset, \ + l_size_t len, int advice); } 251 AUE_NULL UNIMPL 252 AUE_EXIT STD { int linux_exit_group(int error_code); } 253 AUE_NULL STD { int linux_lookup_dcookie(void); } @@ -447,7 +448,9 @@ 270 AUE_NULL STD { int linux_tgkill(int tgid, int pid, int sig); } 271 AUE_UTIMES STD { int linux_utimes(char *fname, \ struct l_timeval *tptr); } -272 AUE_NULL STD { int linux_fadvise64_64(void); } +272 AUE_NULL STD { int linux_fadvise64_64(int fd, \ + l_loff_t offset, l_loff_t len, \ + int advice); } 273 AUE_NULL UNIMPL 274 AUE_NULL STD { int linux_mbind(void); } 275 AUE_NULL STD { int linux_get_mempolicy(void); } diff --git a/sys/i386/xen/mp_machdep.c b/sys/i386/xen/mp_machdep.c index a0ef1e8..b878884 100644 --- a/sys/i386/xen/mp_machdep.c +++ b/sys/i386/xen/mp_machdep.c @@ -810,12 +810,11 @@ cpu_initialize_context(unsigned int cpu) { /* vcpu_guest_context_t is too large to allocate on the stack. * Hence we allocate statically and protect it with a lock */ - vm_page_t m[4]; + vm_page_t m[NPGPTD + 2]; static vcpu_guest_context_t ctxt; vm_offset_t boot_stack; vm_offset_t newPTD; vm_paddr_t ma[NPGPTD]; - static int color; int i; /* @@ -825,15 +824,15 @@ cpu_initialize_context(unsigned int cpu) * */ for (i = 0; i < NPGPTD + 2; i++) { - m[i] = vm_page_alloc(NULL, color++, + m[i] = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); pmap_zero_page(m[i]); } - boot_stack = kmem_alloc_nofault(kernel_map, 1); - newPTD = kmem_alloc_nofault(kernel_map, NPGPTD); + boot_stack = kmem_alloc_nofault(kernel_map, PAGE_SIZE); + newPTD = kmem_alloc_nofault(kernel_map, NPGPTD * PAGE_SIZE); ma[0] = VM_PAGE_TO_MACH(m[0])|PG_V; #ifdef PAE @@ -855,7 +854,7 @@ cpu_initialize_context(unsigned int cpu) nkpt*sizeof(vm_paddr_t)); pmap_qremove(newPTD, 4); - kmem_free(kernel_map, newPTD, 4); + kmem_free(kernel_map, newPTD, 4 * PAGE_SIZE); /* * map actual idle stack to boot_stack */ diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c index c81834b..189f311 100644 --- a/sys/i386/xen/pmap.c +++ b/sys/i386/xen/pmap.c @@ -125,6 +125,8 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #ifdef SMP #include <sys/smp.h> +#else +#include <sys/cpuset.h> #endif #include <vm/vm.h> @@ -184,9 +186,6 @@ __FBSDID("$FreeBSD$"); #define PV_STAT(x) do { } while (0) #endif -#define pa_index(pa) ((pa) >> PDRSHIFT) -#define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) - /* * Get PDEs and PTEs for user/kernel address space */ @@ -224,13 +223,14 @@ extern u_int32_t KERNend; pt_entry_t pg_nx; #endif +static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); + static int pat_works; /* Is page attribute table sane? */ /* * Data for the pv entry allocation mechanism */ static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; -static struct md_page *pv_table; static int shpgperproc = PMAP_SHPGPERPROC; struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ @@ -277,22 +277,6 @@ SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, "Number of times pmap_pte_quick didn't change PMAP1"); static struct mtx PMAP2mutex; -static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); -static int pg_ps_enabled; -SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN, &pg_ps_enabled, 0, - "Are large page mappings enabled?"); - -SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, - "Max number of PV entries"); -SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, - "Page share factor per proc"); -static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, - "2/4MB page mapping counters"); - -static u_long pmap_pde_mappings; -SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, - &pmap_pde_mappings, 0, "2/4MB page mappings"); - static void free_pv_entry(pmap_t pmap, pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try); static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); @@ -301,6 +285,8 @@ static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, static vm_page_t pmap_enter_quick_locked(multicall_entry_t **mcl, int *count, pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte); +static void pmap_flush_page(vm_page_t m); +static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, vm_page_t *free); static void pmap_remove_page(struct pmap *pmap, vm_offset_t va, @@ -312,14 +298,12 @@ static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); -static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags); +static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags); static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free); static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va); static void pmap_pte_release(pt_entry_t *pte); static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *); -static vm_offset_t pmap_kmem_choose(vm_offset_t addr); static boolean_t pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr); -static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); static __inline void pagezero(void *page); @@ -333,8 +317,6 @@ CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); */ CTASSERT(KERNBASE % (1 << 24) == 0); - - void pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type) { @@ -366,24 +348,6 @@ pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type) } /* - * Move the kernel virtual free pointer to the next - * 4MB. This is used to help improve performance - * by using a large (4MB) page for much of the kernel - * (.text, .data, .bss) - */ -static vm_offset_t -pmap_kmem_choose(vm_offset_t addr) -{ - vm_offset_t newaddr = addr; - -#ifndef DISABLE_PSE - if (cpu_feature & CPUID_PSE) - newaddr = (addr + PDRMASK) & ~PDRMASK; -#endif - return newaddr; -} - -/* * Bootstrap the system enough to run with virtual memory. * * On the i386 this is called after mapping has already been enabled @@ -402,15 +366,13 @@ pmap_bootstrap(vm_paddr_t firstaddr) int i; /* - * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too - * large. It should instead be correctly calculated in locore.s and - * not based on 'first' (which is a physical address, not a virtual - * address, for the start of unused physical memory). The kernel - * page tables are NOT double mapped and thus should not be included - * in this calculation. + * Initialize the first available kernel virtual address. However, + * using "firstaddr" may waste a few pages of the kernel virtual + * address space, because locore may not have mapped every physical + * page that it allocated. Preferably, locore would provide a first + * unused virtual address in addition to "firstaddr". */ virtual_avail = (vm_offset_t) KERNBASE + firstaddr; - virtual_avail = pmap_kmem_choose(virtual_avail); virtual_end = VM_MAX_KERNEL_ADDRESS; @@ -475,8 +437,8 @@ pmap_bootstrap(vm_paddr_t firstaddr) /* * ptemap is used for pmap_pte_quick */ - SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1); - SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1); + SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1) + SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1) mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF); @@ -636,24 +598,8 @@ pmap_ptelist_init(vm_offset_t *head, void *base, int npages) void pmap_init(void) { - vm_page_t mpte; - vm_size_t s; - int i, pv_npg; /* - * Initialize the vm page array entries for the kernel pmap's - * page table pages. - */ - for (i = 0; i < nkpt; i++) { - mpte = PHYS_TO_VM_PAGE(xpmap_mtop(PTD[i + KPTDI] & PG_FRAME)); - KASSERT(mpte >= vm_page_array && - mpte < &vm_page_array[vm_page_array_size], - ("pmap_init: page table page is out of range")); - mpte->pindex = i + KPTDI; - mpte->phys_addr = xpmap_mtop(PTD[i + KPTDI] & PG_FRAME); - } - - /* * Initialize the address space (zone) for the pv entries. Set a * high water mark so that the system can recover from excessive * numbers of pv entries. @@ -664,26 +610,6 @@ pmap_init(void) pv_entry_max = roundup(pv_entry_max, _NPCPV); pv_entry_high_water = 9 * (pv_entry_max / 10); - /* - * Are large page mappings enabled? - */ - TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled); - - /* - * Calculate the size of the pv head table for superpages. - */ - for (i = 0; phys_avail[i + 1]; i += 2); - pv_npg = round_4mpage(phys_avail[(i - 2) + 1]) / NBPDR; - - /* - * Allocate memory for the pv head table for superpages. - */ - s = (vm_size_t)(pv_npg * sizeof(struct md_page)); - s = round_page(s); - pv_table = (struct md_page *)kmem_alloc(kernel_map, s); - for (i = 0; i < pv_npg; i++) - TAILQ_INIT(&pv_table[i].pv_list); - pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map, PAGE_SIZE * pv_maxchunks); @@ -693,6 +619,18 @@ pmap_init(void) } +SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, + "Max number of PV entries"); +SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, + "Page share factor per proc"); + +static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, + "2/4MB page mapping counters"); + +static u_long pmap_pde_mappings; +SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, + &pmap_pde_mappings, 0, "2/4MB page mappings"); + /*************************************************** * Low level helper routines..... ***************************************************/ @@ -939,6 +877,8 @@ pmap_invalidate_cache(void) } #endif /* !SMP */ +#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024) + void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva) { @@ -950,7 +890,8 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva) if (cpu_feature & CPUID_SS) ; /* If "Self Snoop" is supported, do nothing. */ - else if (cpu_feature & CPUID_CLFSH) { + else if ((cpu_feature & CPUID_CLFSH) != 0 && + eva - sva < PMAP_CLFLUSH_THRESHOLD) { /* * Otherwise, do per-cache line flush. Use the mfence @@ -967,12 +908,27 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva) /* * No targeted cache flush methods are supported by CPU, - * globally invalidate cache as a last resort. + * or the supplied range is bigger than 2MB. + * Globally invalidate cache. */ pmap_invalidate_cache(); } } +void +pmap_invalidate_cache_pages(vm_page_t *pages, int count) +{ + int i; + + if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE || + (cpu_feature & CPUID_CLFSH) == 0) { + pmap_invalidate_cache(); + } else { + for (i = 0; i < count; i++) + pmap_flush_page(pages[i]); + } +} + /* * Are we current address space or kernel? N.B. We return FALSE when * a pmap's page table is in use because a kernel thread is borrowing @@ -985,7 +941,7 @@ pmap_is_current(pmap_t pmap) return (pmap == kernel_pmap || (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) && - (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME))); + (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME))); } /* @@ -1014,10 +970,9 @@ pmap_pte(pmap_t pmap, vm_offset_t va) CTR3(KTR_PMAP, "pmap_pte: pmap=%p va=0x%x newpte=0x%08x", pmap, va, (*PMAP2 & 0xffffffff)); } - return (PADDR2 + (i386_btop(va) & (NPTEPG - 1))); } - return (0); + return (NULL); } /* @@ -1108,7 +1063,7 @@ pmap_extract(pmap_t pmap, vm_offset_t va) pt_entry_t *pte; pd_entry_t pde; pt_entry_t pteval; - + rtval = 0; PMAP_LOCK(pmap); pde = pmap->pm_pdir[va >> PDRSHIFT]; @@ -1167,7 +1122,7 @@ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pd_entry_t pde; - pt_entry_t pte; + pt_entry_t pte, *ptep; vm_page_t m; vm_paddr_t pa; @@ -1179,26 +1134,25 @@ retry: if (pde != 0) { if (pde & PG_PS) { if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { - if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | - (va & PDRMASK), &pa)) + if (vm_page_pa_tryrelock(pmap, (pde & + PG_PS_FRAME) | (va & PDRMASK), &pa)) goto retry; m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | (va & PDRMASK)); vm_page_hold(m); } } else { - sched_pin(); - pte = PT_GET(pmap_pte_quick(pmap, va)); - if (*PMAP1) - PT_SET_MA(PADDR1, 0); - if ((pte & PG_V) && + ptep = pmap_pte(pmap, va); + pte = PT_GET(ptep); + pmap_pte_release(ptep); + if (pte != 0 && ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) + if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, + &pa)) goto retry; m = PHYS_TO_VM_PAGE(pte & PG_FRAME); vm_page_hold(m); } - sched_unpin(); } } PA_UNLOCK_COND(pa); @@ -1213,10 +1167,13 @@ retry: /* * Add a wired page to the kva. * Note: not SMP coherent. + * + * This function may be used before pmap_bootstrap() is called. */ void pmap_kenter(vm_offset_t va, vm_paddr_t pa) { + PT_SET_MA(va, xpmap_ptom(pa)| PG_RW | PG_V | pgeflag); } @@ -1229,16 +1186,18 @@ pmap_kenter_ma(vm_offset_t va, vm_paddr_t ma) pte_store_ma(pte, ma | PG_RW | PG_V | pgeflag); } - -static __inline void +static __inline void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode) { + PT_SET_MA(va, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0)); } /* * Remove a page from the kernel pagetables. * Note: not SMP coherent. + * + * This function may be used before pmap_bootstrap() is called. */ PMAP_INLINE void pmap_kremove(vm_offset_t va) @@ -1335,7 +1294,6 @@ pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) #endif } - /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. @@ -1385,9 +1343,9 @@ pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free) --m->wire_count; if (m->wire_count == 0) - return _pmap_unwire_pte_hold(pmap, m, free); + return (_pmap_unwire_pte_hold(pmap, m, free)); else - return 0; + return (0); } static int @@ -1428,7 +1386,7 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free) m->right = *free; *free = m; - return 1; + return (1); } /* @@ -1442,17 +1400,25 @@ pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free) vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) - return 0; + return (0); ptepde = PT_GET(pmap_pde(pmap, va)); mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); - return pmap_unwire_pte_hold(pmap, mpte, free); + return (pmap_unwire_pte_hold(pmap, mpte, free)); } +/* + * Initialize the pmap for the swapper process. + */ void pmap_pinit0(pmap_t pmap) { PMAP_LOCK_INIT(pmap); + /* + * Since the page table directory is shared with the kernel pmap, + * which is already included in the list "allpmaps", this pmap does + * not need to be inserted into that list. + */ pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); #ifdef PAE pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); @@ -1461,9 +1427,6 @@ pmap_pinit0(pmap_t pmap) PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); - mtx_lock_spin(&allpmaps_lock); - LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); - mtx_unlock_spin(&allpmaps_lock); } /* @@ -1514,18 +1477,19 @@ pmap_pinit(pmap_t pmap) ptdpg[i++] = m; } } + pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); - for (i = 0; i < NPGPTD; i++) { + + for (i = 0; i < NPGPTD; i++) if ((ptdpg[i]->flags & PG_ZERO) == 0) - pagezero(&pmap->pm_pdir[i*NPTEPG]); - } + pagezero(pmap->pm_pdir + (i * NPDEPG)); mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); + /* Copy the kernel page table directory entries. */ + bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t)); mtx_unlock_spin(&allpmaps_lock); - /* Wire in kernel global address entries. */ - bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t)); #ifdef PAE pmap_qenter((vm_offset_t)pmap->pm_pdpt, &ptdpg[NPGPTD], 1); if ((ptdpg[NPGPTD]->flags & PG_ZERO) == 0) @@ -1577,7 +1541,7 @@ pmap_pinit(pmap_t pmap) * mapped correctly. */ static vm_page_t -_pmap_allocpte(pmap_t pmap, unsigned int ptepindex, int flags) +_pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags) { vm_paddr_t ptema; vm_page_t m; @@ -1612,6 +1576,7 @@ _pmap_allocpte(pmap_t pmap, unsigned int ptepindex, int flags) * Map the pagetable page into the process address space, if * it isn't already there. */ + pmap->pm_stats.resident_count++; ptema = VM_PAGE_TO_MACH(m); @@ -1627,7 +1592,7 @@ _pmap_allocpte(pmap_t pmap, unsigned int ptepindex, int flags) static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) { - unsigned ptepindex; + u_int ptepindex; pd_entry_t ptema; vm_page_t m; @@ -1805,6 +1770,7 @@ pmap_release(pmap_t pmap) #else int npgptd = NPGPTD; #endif + KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); @@ -1860,7 +1826,7 @@ kvm_size(SYSCTL_HANDLER_ARGS) { unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; - return sysctl_handle_long(oidp, &ksize, 0, req); + return (sysctl_handle_long(oidp, &ksize, 0, req)); } SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_size, "IU", "Size of KVM"); @@ -1870,7 +1836,7 @@ kvm_free(SYSCTL_HANDLER_ARGS) { unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; - return sysctl_handle_long(oidp, &kfree, 0, req); + return (sysctl_handle_long(oidp, &kfree, 0, req)); } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "IU", "Amount of KVM free"); @@ -1899,12 +1865,12 @@ pmap_growkernel(vm_offset_t addr) } } } - addr = roundup2(addr, PAGE_SIZE * NPTEPG); + addr = roundup2(addr, NBPDR); if (addr - 1 >= kernel_map->max_offset) addr = kernel_map->max_offset; while (kernel_vm_end < addr) { if (pdir_pde(PTD, kernel_vm_end)) { - kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); + kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; @@ -1912,17 +1878,16 @@ pmap_growkernel(vm_offset_t addr) continue; } - /* - * This index is bogus, but out of the way - */ - nkpg = vm_page_alloc(NULL, nkpt, - VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED); - if (!nkpg) + nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT, + VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | + VM_ALLOC_ZERO); + if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); nkpt++; - pmap_zero_page(nkpg); + if ((nkpg->flags & PG_ZERO) == 0) + pmap_zero_page(nkpg); ptppaddr = VM_PAGE_TO_PHYS(nkpg); newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); vm_page_lock_queues(); @@ -1934,7 +1899,7 @@ pmap_growkernel(vm_offset_t addr) mtx_unlock_spin(&allpmaps_lock); vm_page_unlock_queues(); - kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); + kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; @@ -1954,7 +1919,7 @@ static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) { - return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK); + return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) @@ -2076,15 +2041,15 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv) pc->pc_map[field] |= 1ul << bit; /* move to head of list */ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); - TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); for (idx = 0; idx < _NPCM; idx++) - if (pc->pc_map[idx] != pc_freemask[idx]) + if (pc->pc_map[idx] != pc_freemask[idx]) { + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); return; + } PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); /* entire chunk is free, return it */ - TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); pmap_qremove((vm_offset_t)pc, 1); vm_page_unwire(m, 0); @@ -2313,10 +2278,10 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) pt_entry_t *pte; vm_page_t free = NULL; int anyvalid; - + CTR3(KTR_PMAP, "pmap_remove: pmap=%p sva=0x%x eva=0x%x", pmap, sva, eva); - + /* * Perform an unsynchronized read. This is, however, safe. */ @@ -2341,12 +2306,14 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) } for (; sva < eva; sva = pdnxt) { - unsigned pdirindex; + u_int pdirindex; /* * Calculate index for next page table. */ pdnxt = (sva + NBPDR) & ~PDRMASK; + if (pdnxt < sva) + pdnxt = eva; if (pmap->pm_stats.resident_count == 0) break; @@ -2436,7 +2403,6 @@ pmap_remove_all(vm_page_t m) PMAP_LOCK(pmap); pmap->pm_stats.resident_count--; pte = pmap_pte_quick(pmap, pv->pv_va); - tpte = *pte; PT_SET_VA_MA(pte, 0, TRUE); if (tpte & PG_W) @@ -2500,9 +2466,11 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) PMAP_LOCK(pmap); for (; sva < eva; sva = pdnxt) { pt_entry_t obits, pbits; - unsigned pdirindex; + u_int pdirindex; pdnxt = (sva + NBPDR) & ~PDRMASK; + if (pdnxt < sva) + pdnxt = eva; pdirindex = sva >> PDRSHIFT; ptpaddr = pmap->pm_pdir[pdirindex]; @@ -2612,7 +2580,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va)); - KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0, + KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 || + VM_OBJECT_LOCKED(m->object), ("pmap_enter: page %p is not busy", m)); mpte = NULL; @@ -2815,10 +2784,9 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, multicall_entry_t mcl[16]; multicall_entry_t *mclp = mcl; int error, count = 0; - + VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); psize = atop(end - start); - mpte = NULL; m = m_start; vm_page_lock_queues(); @@ -2857,7 +2825,7 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) multicall_entry_t mcl, *mclp; int count = 0; mclp = &mcl; - + CTR4(KTR_PMAP, "pmap_enter_quick: pmap=%p va=0x%x m=%p prot=0x%x", pmap, va, m, prot); @@ -2908,7 +2876,7 @@ pmap_enter_quick_locked(multicall_entry_t **mclpp, int *count, pmap_t pmap, vm_o vm_paddr_t pa; vm_page_t free; multicall_entry_t *mcl = *mclpp; - + KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->oflags & VPO_UNMANAGED) != 0, ("pmap_enter_quick_locked: managed mapping within the clean submap")); @@ -2920,7 +2888,7 @@ pmap_enter_quick_locked(multicall_entry_t **mclpp, int *count, pmap_t pmap, vm_o * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { - unsigned ptepindex; + u_int ptepindex; pd_entry_t ptema; /* @@ -3024,7 +2992,7 @@ pmap_enter_quick_locked(multicall_entry_t **mclpp, int *count, pmap_t pmap, vm_o *mclpp = mcl + 1; *count = *count + 1; #endif - return mpte; + return (mpte); } /* @@ -3049,9 +3017,8 @@ pmap_kenter_temporary(vm_paddr_t pa, int i) * are taken, but the code works. */ void -pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, - vm_object_t object, vm_pindex_t pindex, - vm_size_t size) +pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, + vm_pindex_t pindex, vm_size_t size) { pd_entry_t *pde; vm_paddr_t pa, ptepa; @@ -3069,6 +3036,7 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, KASSERT(p->valid == VM_PAGE_BITS_ALL, ("pmap_object_init_pt: invalid page %p", p)); pat_mode = p->md.pat_mode; + /* * Abort the mapping if the first page is not physically * aligned to a 2/4MB page boundary. @@ -3076,6 +3044,7 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, ptepa = VM_PAGE_TO_PHYS(p); if (ptepa & (NBPDR - 1)) return; + /* * Skip the first page. Abort the mapping if the rest of * the pages are not physically contiguous or have differing @@ -3091,7 +3060,12 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, return; p = TAILQ_NEXT(p, listq); } - /* Map using 2/4MB pages. */ + + /* + * Map using 2/4MB pages. Since "ptepa" is 2/4M aligned and + * "size" is a multiple of 2/4M, adding the PAT setting to + * "pa" will not affect the termination of this loop. + */ PMAP_LOCK(pmap); for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa + size; pa += NBPDR) { @@ -3155,7 +3129,7 @@ pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, - vm_offset_t src_addr) + vm_offset_t src_addr) { vm_page_t free; vm_offset_t addr; @@ -3192,12 +3166,14 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, pt_entry_t *src_pte, *dst_pte; vm_page_t dstmpte, srcmpte; pd_entry_t srcptepaddr; - unsigned ptepindex; + u_int ptepindex; KASSERT(addr < UPT_MIN_ADDRESS, ("pmap_copy: invalid to pmap_copy page tables")); pdnxt = (addr + NBPDR) & ~PDRMASK; + if (pdnxt < addr) + pdnxt = end_addr; ptepindex = addr >> PDRSHIFT; srcptepaddr = PT_GET(&src_pmap->pm_pdir[ptepindex]); @@ -3231,7 +3207,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, dstmpte = pmap_allocpte(dst_pmap, addr, M_NOWAIT); if (dstmpte == NULL) - break; + goto out; dst_pte = pmap_pte_quick(dst_pmap, addr); if (*dst_pte == 0 && pmap_try_insert_pv_entry(dst_pmap, addr, @@ -3255,6 +3231,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, addr); pmap_free_zero_pages(free); } + goto out; } if (dstmpte->wire_count >= srcmpte->wire_count) break; @@ -3263,6 +3240,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, src_pte++; } } +out: PT_UPDATES_FLUSH(); sched_unpin(); vm_page_unlock_queues(); @@ -3325,7 +3303,7 @@ pmap_zero_page_area(vm_page_t m, int off, int size) sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (*sysmaps->CMAP2) - panic("pmap_zero_page: CMAP2 busy"); + panic("pmap_zero_page_area: CMAP2 busy"); sched_pin(); PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M); @@ -3349,7 +3327,7 @@ pmap_zero_page_idle(vm_page_t m) { if (*CMAP3) - panic("pmap_zero_page: CMAP3 busy"); + panic("pmap_zero_page_idle: CMAP3 busy"); sched_pin(); PT_SET_MA(CADDR3, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M); pagezero(CADDR3); @@ -3448,21 +3426,15 @@ pmap_page_wired_mappings(vm_page_t m) } /* - * Returns TRUE if the given page is mapped individually or as part of - * a 4mpage. Otherwise, returns FALSE. + * Returns TRUE if the given page is mapped. Otherwise, returns FALSE. */ boolean_t pmap_page_is_mapped(vm_page_t m) { - boolean_t rv; if ((m->oflags & VPO_UNMANAGED) != 0) return (FALSE); - vm_page_lock_queues(); - rv = !TAILQ_EMPTY(&m->md.pv_list) || - !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list); - vm_page_unlock_queues(); - return (rv); + return (!TAILQ_EMPTY(&m->md.pv_list)); } /* @@ -3819,7 +3791,6 @@ pmap_ts_referenced(vm_page_t m) PT_UPDATES_FLUSH(); if (*PMAP1) PT_SET_MA(PADDR1, 0); - sched_unpin(); vm_page_unlock_queues(); return (rtval); @@ -3854,7 +3825,7 @@ pmap_clear_modify(vm_page_t m) pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); - if ((*pte & PG_M) != 0) { + if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { /* * Regardless of whether a pte is 32 or 64 bits * in size, PG_M is among the least significant @@ -3976,8 +3947,6 @@ pmap_unmapdev(vm_offset_t va, vm_size_t size) void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { - struct sysmaps *sysmaps; - vm_offset_t sva, eva; m->md.pat_mode = ma; if ((m->flags & PG_FICTITIOUS) != 0) @@ -4000,11 +3969,21 @@ pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) * invalidation. In the worst case, whole cache is flushed by * pmap_invalidate_cache_range(). */ - if ((cpu_feature & (CPUID_SS|CPUID_CLFSH)) == CPUID_CLFSH) { + if ((cpu_feature & CPUID_SS) == 0) + pmap_flush_page(m); +} + +static void +pmap_flush_page(vm_page_t m) +{ + struct sysmaps *sysmaps; + vm_offset_t sva, eva; + + if ((cpu_feature & CPUID_CLFSH) != 0) { sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (*sysmaps->CMAP2) - panic("pmap_page_set_memattr: CMAP2 busy"); + panic("pmap_flush_page: CMAP2 busy"); sched_pin(); PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M | @@ -4012,21 +3991,35 @@ pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) invlcaddr(sysmaps->CADDR2); sva = (vm_offset_t)sysmaps->CADDR2; eva = sva + PAGE_SIZE; - } else - sva = eva = 0; /* gcc */ - pmap_invalidate_cache_range(sva, eva); - if (sva != 0) { + + /* + * Use mfence despite the ordering implied by + * mtx_{un,}lock() because clflush is not guaranteed + * to be ordered by any other instruction. + */ + mfence(); + for (; sva < eva; sva += cpu_clflush_line_size) + clflush(sva); + mfence(); PT_SET_MA(sysmaps->CADDR2, 0); sched_unpin(); mtx_unlock(&sysmaps->lock); - } + } else + pmap_invalidate_cache(); } +/* + * Changes the specified virtual address range's memory type to that given by + * the parameter "mode". The specified virtual address range must be + * completely contained within either the kernel map. + * + * Returns zero if the change completed successfully, and either EINVAL or + * ENOMEM if the change failed. Specifically, EINVAL is returned if some part + * of the virtual address range was not mapped, and ENOMEM is returned if + * there was insufficient memory available to complete the change. + */ int -pmap_change_attr(va, size, mode) - vm_offset_t va; - vm_size_t size; - int mode; +pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) { vm_offset_t base, offset, tmpva; pt_entry_t *pte; @@ -4080,8 +4073,8 @@ pmap_change_attr(va, size, mode) } /* - * Flush CPU caches to make sure any data isn't cached that shouldn't - * be, etc. + * Flush CPU caches to make sure any data isn't cached that + * shouldn't be, etc. */ if (changed) { pmap_invalidate_range(kernel_pmap, base, tmpva); @@ -4099,7 +4092,7 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) pt_entry_t *ptep, pte; vm_paddr_t pa; int val; - + PMAP_LOCK(pmap); retry: ptep = pmap_pte(pmap, addr); @@ -4282,7 +4275,7 @@ pmap_pid_dump(int pid) printf("\n"); } sx_sunlock(&allproc_lock); - return npte; + return (npte); } pte = pmap_pte(pmap, va); if (pte && pmap_pte_v(pte)) { @@ -4307,7 +4300,7 @@ pmap_pid_dump(int pid) } } sx_sunlock(&allproc_lock); - return npte; + return (npte); } #endif |