summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/base/node.c22
-rw-r--r--include/linux/mmzone.h25
-rw-r--r--include/linux/vmstat.h29
-rw-r--r--mm/page_alloc.c10
-rw-r--r--mm/vmstat.c161
5 files changed, 220 insertions, 27 deletions
diff --git a/drivers/base/node.c b/drivers/base/node.c
index d8dc830..3855902 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -160,12 +160,12 @@ static ssize_t node_read_numastat(struct device *dev,
"interleave_hit %lu\n"
"local_node %lu\n"
"other_node %lu\n",
- sum_zone_node_page_state(dev->id, NUMA_HIT),
- sum_zone_node_page_state(dev->id, NUMA_MISS),
- sum_zone_node_page_state(dev->id, NUMA_FOREIGN),
- sum_zone_node_page_state(dev->id, NUMA_INTERLEAVE_HIT),
- sum_zone_node_page_state(dev->id, NUMA_LOCAL),
- sum_zone_node_page_state(dev->id, NUMA_OTHER));
+ sum_zone_numa_state(dev->id, NUMA_HIT),
+ sum_zone_numa_state(dev->id, NUMA_MISS),
+ sum_zone_numa_state(dev->id, NUMA_FOREIGN),
+ sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT),
+ sum_zone_numa_state(dev->id, NUMA_LOCAL),
+ sum_zone_numa_state(dev->id, NUMA_OTHER));
}
static DEVICE_ATTR(numastat, S_IRUGO, node_read_numastat, NULL);
@@ -181,9 +181,17 @@ static ssize_t node_read_vmstat(struct device *dev,
n += sprintf(buf+n, "%s %lu\n", vmstat_text[i],
sum_zone_node_page_state(nid, i));
- for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
+#ifdef CONFIG_NUMA
+ for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
n += sprintf(buf+n, "%s %lu\n",
vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
+ sum_zone_numa_state(nid, i));
+#endif
+
+ for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
+ n += sprintf(buf+n, "%s %lu\n",
+ vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
+ NR_VM_NUMA_STAT_ITEMS],
node_page_state(pgdat, i));
return n;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index e7e92c8..e65d91c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -114,6 +114,20 @@ struct zone_padding {
#define ZONE_PADDING(name)
#endif
+#ifdef CONFIG_NUMA
+enum numa_stat_item {
+ NUMA_HIT, /* allocated in intended node */
+ NUMA_MISS, /* allocated in non intended node */
+ NUMA_FOREIGN, /* was intended here, hit elsewhere */
+ NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */
+ NUMA_LOCAL, /* allocation from local node */
+ NUMA_OTHER, /* allocation from other node */
+ NR_VM_NUMA_STAT_ITEMS
+};
+#else
+#define NR_VM_NUMA_STAT_ITEMS 0
+#endif
+
enum zone_stat_item {
/* First 128 byte cacheline (assuming 64 bit words) */
NR_FREE_PAGES,
@@ -132,14 +146,6 @@ enum zone_stat_item {
#if IS_ENABLED(CONFIG_ZSMALLOC)
NR_ZSPAGES, /* allocated in zsmalloc */
#endif
-#ifdef CONFIG_NUMA
- NUMA_HIT, /* allocated in intended node */
- NUMA_MISS, /* allocated in non intended node */
- NUMA_FOREIGN, /* was intended here, hit elsewhere */
- NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */
- NUMA_LOCAL, /* allocation from local node */
- NUMA_OTHER, /* allocation from other node */
-#endif
NR_FREE_CMA_PAGES,
NR_VM_ZONE_STAT_ITEMS };
@@ -276,6 +282,8 @@ struct per_cpu_pageset {
struct per_cpu_pages pcp;
#ifdef CONFIG_NUMA
s8 expire;
+ s8 numa_stat_threshold;
+ s8 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS];
#endif
#ifdef CONFIG_SMP
s8 stat_threshold;
@@ -496,6 +504,7 @@ struct zone {
ZONE_PADDING(_pad3_)
/* Zone statistics */
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
+ atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS];
} ____cacheline_internodealigned_in_smp;
enum pgdat_flags {
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 97e11ab..9ac82e2 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -107,8 +107,33 @@ static inline void vm_events_fold_cpu(int cpu)
* Zone and node-based page accounting with per cpu differentials.
*/
extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS];
+extern atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS];
extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS];
+#ifdef CONFIG_NUMA
+static inline void zone_numa_state_add(long x, struct zone *zone,
+ enum numa_stat_item item)
+{
+ atomic_long_add(x, &zone->vm_numa_stat[item]);
+ atomic_long_add(x, &vm_numa_stat[item]);
+}
+
+static inline unsigned long global_numa_state(enum numa_stat_item item)
+{
+ long x = atomic_long_read(&vm_numa_stat[item]);
+
+ return x;
+}
+
+static inline unsigned long zone_numa_state(struct zone *zone,
+ enum numa_stat_item item)
+{
+ long x = atomic_long_read(&zone->vm_numa_stat[item]);
+
+ return x;
+}
+#endif /* CONFIG_NUMA */
+
static inline void zone_page_state_add(long x, struct zone *zone,
enum zone_stat_item item)
{
@@ -194,8 +219,10 @@ static inline unsigned long node_page_state_snapshot(pg_data_t *pgdat,
#ifdef CONFIG_NUMA
+extern void __inc_numa_state(struct zone *zone, enum numa_stat_item item);
extern unsigned long sum_zone_node_page_state(int node,
- enum zone_stat_item item);
+ enum zone_stat_item item);
+extern unsigned long sum_zone_numa_state(int node, enum numa_stat_item item);
extern unsigned long node_page_state(struct pglist_data *pgdat,
enum node_stat_item item);
#else
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a9add06..45583cd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2741,18 +2741,18 @@ int __isolate_free_page(struct page *page, unsigned int order)
static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
{
#ifdef CONFIG_NUMA
- enum zone_stat_item local_stat = NUMA_LOCAL;
+ enum numa_stat_item local_stat = NUMA_LOCAL;
if (z->node != numa_node_id())
local_stat = NUMA_OTHER;
if (z->node == preferred_zone->node)
- __inc_zone_state(z, NUMA_HIT);
+ __inc_numa_state(z, NUMA_HIT);
else {
- __inc_zone_state(z, NUMA_MISS);
- __inc_zone_state(preferred_zone, NUMA_FOREIGN);
+ __inc_numa_state(z, NUMA_MISS);
+ __inc_numa_state(preferred_zone, NUMA_FOREIGN);
}
- __inc_zone_state(z, local_stat);
+ __inc_numa_state(z, local_stat);
#endif
}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c7e4b84..daea028 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -87,8 +87,10 @@ void vm_events_fold_cpu(int cpu)
* vm_stat contains the global counters
*/
atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
+atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp;
atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
EXPORT_SYMBOL(vm_zone_stat);
+EXPORT_SYMBOL(vm_numa_stat);
EXPORT_SYMBOL(vm_node_stat);
#ifdef CONFIG_SMP
@@ -192,7 +194,10 @@ void refresh_zone_stat_thresholds(void)
per_cpu_ptr(zone->pageset, cpu)->stat_threshold
= threshold;
-
+#ifdef CONFIG_NUMA
+ per_cpu_ptr(zone->pageset, cpu)->numa_stat_threshold
+ = threshold;
+#endif
/* Base nodestat threshold on the largest populated zone. */
pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold;
per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold
@@ -226,9 +231,14 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat,
continue;
threshold = (*calculate_pressure)(zone);
- for_each_online_cpu(cpu)
+ for_each_online_cpu(cpu) {
per_cpu_ptr(zone->pageset, cpu)->stat_threshold
= threshold;
+#ifdef CONFIG_NUMA
+ per_cpu_ptr(zone->pageset, cpu)->numa_stat_threshold
+ = threshold;
+#endif
+ }
}
}
@@ -604,6 +614,32 @@ EXPORT_SYMBOL(dec_node_page_state);
* Fold a differential into the global counters.
* Returns the number of counters updated.
*/
+#ifdef CONFIG_NUMA
+static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
+{
+ int i;
+ int changes = 0;
+
+ for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+ if (zone_diff[i]) {
+ atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
+ changes++;
+ }
+
+ for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
+ if (numa_diff[i]) {
+ atomic_long_add(numa_diff[i], &vm_numa_stat[i]);
+ changes++;
+ }
+
+ for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
+ if (node_diff[i]) {
+ atomic_long_add(node_diff[i], &vm_node_stat[i]);
+ changes++;
+ }
+ return changes;
+}
+#else
static int fold_diff(int *zone_diff, int *node_diff)
{
int i;
@@ -622,6 +658,7 @@ static int fold_diff(int *zone_diff, int *node_diff)
}
return changes;
}
+#endif /* CONFIG_NUMA */
/*
* Update the zone counters for the current cpu.
@@ -645,6 +682,9 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
struct zone *zone;
int i;
int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
+#ifdef CONFIG_NUMA
+ int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
+#endif
int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
int changes = 0;
@@ -666,6 +706,18 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
}
}
#ifdef CONFIG_NUMA
+ for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
+ int v;
+
+ v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0);
+ if (v) {
+
+ atomic_long_add(v, &zone->vm_numa_stat[i]);
+ global_numa_diff[i] += v;
+ __this_cpu_write(p->expire, 3);
+ }
+ }
+
if (do_pagesets) {
cond_resched();
/*
@@ -712,7 +764,12 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
}
}
+#ifdef CONFIG_NUMA
+ changes += fold_diff(global_zone_diff, global_numa_diff,
+ global_node_diff);
+#else
changes += fold_diff(global_zone_diff, global_node_diff);
+#endif
return changes;
}
@@ -727,6 +784,9 @@ void cpu_vm_stats_fold(int cpu)
struct zone *zone;
int i;
int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
+#ifdef CONFIG_NUMA
+ int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
+#endif
int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
for_each_populated_zone(zone) {
@@ -743,6 +803,18 @@ void cpu_vm_stats_fold(int cpu)
atomic_long_add(v, &zone->vm_stat[i]);
global_zone_diff[i] += v;
}
+
+#ifdef CONFIG_NUMA
+ for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
+ if (p->vm_numa_stat_diff[i]) {
+ int v;
+
+ v = p->vm_numa_stat_diff[i];
+ p->vm_numa_stat_diff[i] = 0;
+ atomic_long_add(v, &zone->vm_numa_stat[i]);
+ global_numa_diff[i] += v;
+ }
+#endif
}
for_each_online_pgdat(pgdat) {
@@ -761,7 +833,11 @@ void cpu_vm_stats_fold(int cpu)
}
}
+#ifdef CONFIG_NUMA
+ fold_diff(global_zone_diff, global_numa_diff, global_node_diff);
+#else
fold_diff(global_zone_diff, global_node_diff);
+#endif
}
/*
@@ -779,10 +855,38 @@ void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
atomic_long_add(v, &zone->vm_stat[i]);
atomic_long_add(v, &vm_zone_stat[i]);
}
+
+#ifdef CONFIG_NUMA
+ for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
+ if (pset->vm_numa_stat_diff[i]) {
+ int v = pset->vm_numa_stat_diff[i];
+
+ pset->vm_numa_stat_diff[i] = 0;
+ atomic_long_add(v, &zone->vm_numa_stat[i]);
+ atomic_long_add(v, &vm_numa_stat[i]);
+ }
+#endif
}
#endif
#ifdef CONFIG_NUMA
+void __inc_numa_state(struct zone *zone,
+ enum numa_stat_item item)
+{
+ struct per_cpu_pageset __percpu *pcp = zone->pageset;
+ s8 __percpu *p = pcp->vm_numa_stat_diff + item;
+ s8 v, t;
+
+ v = __this_cpu_inc_return(*p);
+ t = __this_cpu_read(pcp->numa_stat_threshold);
+ if (unlikely(v > t)) {
+ s8 overstep = t >> 1;
+
+ zone_numa_state_add(v + overstep, zone, item);
+ __this_cpu_write(*p, -overstep);
+ }
+}
+
/*
* Determine the per node value of a stat item. This function
* is called frequently in a NUMA machine, so try to be as
@@ -801,6 +905,19 @@ unsigned long sum_zone_node_page_state(int node,
return count;
}
+unsigned long sum_zone_numa_state(int node,
+ enum numa_stat_item item)
+{
+ struct zone *zones = NODE_DATA(node)->node_zones;
+ int i;
+ unsigned long count = 0;
+
+ for (i = 0; i < MAX_NR_ZONES; i++)
+ count += zone_numa_state(zones + i, item);
+
+ return count;
+}
+
/*
* Determine the per node value of a stat item.
*/
@@ -937,6 +1054,9 @@ const char * const vmstat_text[] = {
#if IS_ENABLED(CONFIG_ZSMALLOC)
"nr_zspages",
#endif
+ "nr_free_cma",
+
+ /* enum numa_stat_item counters */
#ifdef CONFIG_NUMA
"numa_hit",
"numa_miss",
@@ -945,7 +1065,6 @@ const char * const vmstat_text[] = {
"numa_local",
"numa_other",
#endif
- "nr_free_cma",
/* Node-based counters */
"nr_inactive_anon",
@@ -1106,7 +1225,6 @@ const char * const vmstat_text[] = {
};
#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */
-
#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
defined(CONFIG_PROC_FS)
static void *frag_start(struct seq_file *m, loff_t *pos)
@@ -1384,7 +1502,8 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
seq_printf(m, "\n per-node stats");
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
seq_printf(m, "\n %-12s %lu",
- vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
+ vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
+ NR_VM_NUMA_STAT_ITEMS],
node_page_state(pgdat, i));
}
}
@@ -1421,6 +1540,13 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
seq_printf(m, "\n %-12s %lu", vmstat_text[i],
zone_page_state(zone, i));
+#ifdef CONFIG_NUMA
+ for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
+ seq_printf(m, "\n %-12s %lu",
+ vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
+ zone_numa_state(zone, i));
+#endif
+
seq_printf(m, "\n pagesets");
for_each_online_cpu(i) {
struct per_cpu_pageset *pageset;
@@ -1497,6 +1623,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
if (*pos >= ARRAY_SIZE(vmstat_text))
return NULL;
stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
+ NR_VM_NUMA_STAT_ITEMS * sizeof(unsigned long) +
NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) +
NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
@@ -1512,6 +1639,12 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
v[i] = global_zone_page_state(i);
v += NR_VM_ZONE_STAT_ITEMS;
+#ifdef CONFIG_NUMA
+ for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
+ v[i] = global_numa_state(i);
+ v += NR_VM_NUMA_STAT_ITEMS;
+#endif
+
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
v[i] = global_node_page_state(i);
v += NR_VM_NODE_STAT_ITEMS;
@@ -1613,6 +1746,16 @@ int vmstat_refresh(struct ctl_table *table, int write,
err = -EINVAL;
}
}
+#ifdef CONFIG_NUMA
+ for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
+ val = atomic_long_read(&vm_numa_stat[i]);
+ if (val < 0) {
+ pr_warn("%s: %s %ld\n",
+ __func__, vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], val);
+ err = -EINVAL;
+ }
+ }
+#endif
if (err)
return err;
if (write)
@@ -1654,13 +1797,19 @@ static bool need_update(int cpu)
struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
+#ifdef CONFIG_NUMA
+ BUILD_BUG_ON(sizeof(p->vm_numa_stat_diff[0]) != 1);
+#endif
/*
* The fast way of checking if there are any vmstat diffs.
* This works because the diffs are byte sized items.
*/
if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS))
return true;
-
+#ifdef CONFIG_NUMA
+ if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS))
+ return true;
+#endif
}
return false;
}
OpenPOWER on IntegriCloud