summaryrefslogtreecommitdiffstats
path: root/sys/amd64/amd64/pmap.c
diff options
context:
space:
mode:
authorjkim <jkim@FreeBSD.org>2016-05-25 23:06:52 +0000
committerjkim <jkim@FreeBSD.org>2016-05-25 23:06:52 +0000
commit45ae491494493d7935a06c1ad0e655d974d1cf22 (patch)
tree18b691be837ea394d57aa5fb5d89808be9d3964c /sys/amd64/amd64/pmap.c
parentdad0f4b1ee3df3366262c758edb7d3b2b37678f4 (diff)
downloadFreeBSD-src-45ae491494493d7935a06c1ad0e655d974d1cf22.zip
FreeBSD-src-45ae491494493d7935a06c1ad0e655d974d1cf22.tar.gz
Both Clang and GCC cannot generate efficient reserve_pv_entries().
http://docs.freebsd.org/cgi/mid.cgi?552BFEB2.8040407 Re-implement it entirely in inline assembly not to let compilers do silly spilling to memory. For non-POPCNT case, use newly added bit_count(3). Reported by: alc Reviewed by: alc, kib Differential Revision: https://reviews.freebsd.org/D6541
Diffstat (limited to 'sys/amd64/amd64/pmap.c')
-rw-r--r--sys/amd64/amd64/pmap.c31
1 files changed, 15 insertions, 16 deletions
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 7132c57..cc76974 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -104,6 +104,7 @@ __FBSDID("$FreeBSD$");
#include "opt_vm.h"
#include <sys/param.h>
+#include <sys/bitstring.h>
#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
@@ -585,7 +586,7 @@ static caddr_t crashdumpmap;
static void free_pv_chunk(struct pv_chunk *pc);
static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
-static int popcnt_pc_map_elem_pq(uint64_t elem);
+static int popcnt_pc_map_pq(uint64_t *map);
static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
static void reserve_pv_entries(pmap_t pmap, int needed,
struct rwlock **lockp);
@@ -3126,7 +3127,7 @@ retry:
}
/*
- * Returns the number of one bits within the given PV chunk map element.
+ * Returns the number of one bits within the given PV chunk map.
*
* The erratas for Intel processors state that "POPCNT Instruction May
* Take Longer to Execute Than Expected". It is believed that the
@@ -3142,12 +3143,15 @@ retry:
* 6th Gen Core: SKL029
*/
static int
-popcnt_pc_map_elem_pq(uint64_t elem)
+popcnt_pc_map_pq(uint64_t *map)
{
- u_long result;
+ u_long result, tmp;
- __asm __volatile("xorl %k0,%k0;popcntq %1,%0"
- : "=&r" (result) : "rm" (elem));
+ __asm __volatile("xorl %k0,%k0;popcntq %2,%0;"
+ "xorl %k1,%k1;popcntq %3,%1;addl %k1,%k0;"
+ "xorl %k1,%k1;popcntq %4,%1;addl %k1,%k0"
+ : "=&r" (result), "=&r" (tmp)
+ : "m" (map[0]), "m" (map[1]), "m" (map[2]));
return (result);
}
@@ -3179,17 +3183,12 @@ retry:
avail = 0;
TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) {
#ifndef __POPCNT__
- if ((cpu_feature2 & CPUID2_POPCNT) == 0) {
- free = bitcount64(pc->pc_map[0]);
- free += bitcount64(pc->pc_map[1]);
- free += bitcount64(pc->pc_map[2]);
- } else
+ if ((cpu_feature2 & CPUID2_POPCNT) == 0)
+ bit_count((bitstr_t *)pc->pc_map, 0,
+ sizeof(pc->pc_map) * NBBY, &free);
+ else
#endif
- {
- free = popcnt_pc_map_elem_pq(pc->pc_map[0]);
- free += popcnt_pc_map_elem_pq(pc->pc_map[1]);
- free += popcnt_pc_map_elem_pq(pc->pc_map[2]);
- }
+ free = popcnt_pc_map_pq(pc->pc_map);
if (free == 0)
break;
avail += free;
OpenPOWER on IntegriCloud