summaryrefslogtreecommitdiff
path: root/mm/percpu-stats.c
diff options
context:
space:
mode:
authorRoman Gushchin <guro@fb.com>2021-06-02 18:09:31 -0700
committerDennis Zhou <dennis@kernel.org>2021-06-05 20:43:15 +0000
commitfaf65dde844affa9e360ccaa4bd231c2a04b87ea (patch)
tree7c4741eb522798b08d75e9b0d5f209ce706fe49c /mm/percpu-stats.c
parent4d5c8aedc8aa6a1f5d1b06eb4f5517dc60dd9440 (diff)
downloadlwn-faf65dde844affa9e360ccaa4bd231c2a04b87ea.tar.gz
lwn-faf65dde844affa9e360ccaa4bd231c2a04b87ea.zip
percpu: rework memcg accounting
The current implementation of the memcg accounting of the percpu memory is based on the idea of having two separate sets of chunks for accounted and non-accounted memory. This approach has an advantage of not wasting any extra memory for memcg data for non-accounted chunks, however it complicates the code and leads to a higher chunks number due to a lower chunk utilization. Instead of having two chunk types it's possible to declare all* chunks memcg-aware unless the kernel memory accounting is disabled globally by a boot option. The size of objcg_array is usually small in comparison to chunks themselves (it obviously depends on the number of CPUs), so even if some chunk will have no accounted allocations, the memory waste isn't significant and will likely be compensated by a higher chunk utilization. Also, with time more and more percpu allocations will likely become accounted. * The first chunk is initialized before the memory cgroup subsystem, so we don't know for sure whether we need to allocate obj_cgroups. Because it's small, let's make it free for use. Then we don't need to allocate obj_cgroups for it. Signed-off-by: Roman Gushchin <guro@fb.com> Signed-off-by: Dennis Zhou <dennis@kernel.org>
Diffstat (limited to 'mm/percpu-stats.c')
-rw-r--r--mm/percpu-stats.c46
1 files changed, 15 insertions, 31 deletions
diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c
index 2125981acfb9..c6bd092ff7a3 100644
--- a/mm/percpu-stats.c
+++ b/mm/percpu-stats.c
@@ -34,15 +34,11 @@ static int find_max_nr_alloc(void)
{
struct pcpu_chunk *chunk;
int slot, max_nr_alloc;
- enum pcpu_chunk_type type;
max_nr_alloc = 0;
- for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++)
- for (slot = 0; slot < pcpu_nr_slots; slot++)
- list_for_each_entry(chunk, &pcpu_chunk_list(type)[slot],
- list)
- max_nr_alloc = max(max_nr_alloc,
- chunk->nr_alloc);
+ for (slot = 0; slot < pcpu_nr_slots; slot++)
+ list_for_each_entry(chunk, &pcpu_chunk_lists[slot], list)
+ max_nr_alloc = max(max_nr_alloc, chunk->nr_alloc);
return max_nr_alloc;
}
@@ -133,9 +129,6 @@ static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk,
P("cur_min_alloc", cur_min_alloc);
P("cur_med_alloc", cur_med_alloc);
P("cur_max_alloc", cur_max_alloc);
-#ifdef CONFIG_MEMCG_KMEM
- P("memcg_aware", pcpu_is_memcg_chunk(pcpu_chunk_type(chunk)));
-#endif
seq_putc(m, '\n');
}
@@ -144,8 +137,6 @@ static int percpu_stats_show(struct seq_file *m, void *v)
struct pcpu_chunk *chunk;
int slot, max_nr_alloc;
int *buffer;
- enum pcpu_chunk_type type;
- int nr_empty_pop_pages;
alloc_buffer:
spin_lock_irq(&pcpu_lock);
@@ -166,10 +157,6 @@ alloc_buffer:
goto alloc_buffer;
}
- nr_empty_pop_pages = 0;
- for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++)
- nr_empty_pop_pages += pcpu_nr_empty_pop_pages[type];
-
#define PL(X) \
seq_printf(m, " %-20s: %12lld\n", #X, (long long int)pcpu_stats_ai.X)
@@ -201,7 +188,7 @@ alloc_buffer:
PU(nr_max_chunks);
PU(min_alloc_size);
PU(max_alloc_size);
- P("empty_pop_pages", nr_empty_pop_pages);
+ P("empty_pop_pages", pcpu_nr_empty_pop_pages);
seq_putc(m, '\n');
#undef PU
@@ -215,20 +202,17 @@ alloc_buffer:
chunk_map_stats(m, pcpu_reserved_chunk, buffer);
}
- for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) {
- for (slot = 0; slot < pcpu_nr_slots; slot++) {
- list_for_each_entry(chunk, &pcpu_chunk_list(type)[slot],
- list) {
- if (chunk == pcpu_first_chunk)
- seq_puts(m, "Chunk: <- First Chunk\n");
- else if (slot == pcpu_to_depopulate_slot)
- seq_puts(m, "Chunk (to_depopulate)\n");
- else if (slot == pcpu_sidelined_slot)
- seq_puts(m, "Chunk (sidelined):\n");
- else
- seq_puts(m, "Chunk:\n");
- chunk_map_stats(m, chunk, buffer);
- }
+ for (slot = 0; slot < pcpu_nr_slots; slot++) {
+ list_for_each_entry(chunk, &pcpu_chunk_lists[slot], list) {
+ if (chunk == pcpu_first_chunk)
+ seq_puts(m, "Chunk: <- First Chunk\n");
+ else if (slot == pcpu_to_depopulate_slot)
+ seq_puts(m, "Chunk (to_depopulate)\n");
+ else if (slot == pcpu_sidelined_slot)
+ seq_puts(m, "Chunk (sidelined):\n");
+ else
+ seq_puts(m, "Chunk:\n");
+ chunk_map_stats(m, chunk, buffer);
}
}