mm/oom_kill: count global and memory cgroup oom kills
authorKonstantin Khlebnikov <khlebnikov@yandex-team.ru>
Thu, 6 Jul 2017 22:40:28 +0000 (15:40 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 6 Jul 2017 23:24:35 +0000 (16:24 -0700)
Show count of oom killer invocations in /proc/vmstat and count of
processes killed in memory cgroup in knob "memory.events" (in
memory.oom_control for v1 cgroup).

Also describe difference between "oom" and "oom_kill" in memory cgroup
documentation.  Currently oom in memory cgroup kills tasks iff shortage
has happened inside page fault.

These counters helps in monitoring oom kills - for now the only way is
grepping for magic words in kernel log.

[akpm@linux-foundation.org: fix for mem_cgroup_count_vm_event() rename]
[akpm@linux-foundation.org: fix comment, per Konstantin]
Link: http://lkml.kernel.org/r/149570810989.203600.9492483715840752937.stgit@buzz
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Roman Guschin <guroan@gmail.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Documentation/cgroup-v2.txt
include/linux/memcontrol.h
include/linux/vm_event_item.h
mm/memcontrol.c
mm/oom_kill.c
mm/vmstat.c

index 5ac2fbde97e6aad20b8916d9904cb9e0d03e1831..e6101976e0f18595d77e1242ab3f5680396fbfd9 100644 (file)
@@ -852,13 +852,25 @@ PAGE_SIZE multiple when read back.
 
                The number of times the cgroup's memory usage was
                about to go over the max boundary.  If direct reclaim
-               fails to bring it down, the OOM killer is invoked.
+               fails to bring it down, the cgroup goes to OOM state.
 
          oom
 
-               The number of times the OOM killer has been invoked in
-               the cgroup.  This may not exactly match the number of
-               processes killed but should generally be close.
+               The number of time the cgroup's memory usage was
+               reached the limit and allocation was about to fail.
+
+               Depending on context result could be invocation of OOM
+               killer and retrying allocation or failing alloction.
+
+               Failed allocation in its turn could be returned into
+               userspace as -ENOMEM or siletly ignored in cases like
+               disk readahead.  For now OOM in memory cgroup kills
+               tasks iff shortage has happened inside page fault.
+
+         oom_kill
+
+               The number of processes belonging to this cgroup
+               killed by any kind of OOM killer.
 
   memory.stat
 
index b2a5b1cd4e552bbb188476275d97a68aec41decd..72d0853beb31fbbf6658954962cfb86465c70982 100644 (file)
@@ -582,8 +582,11 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
 
        rcu_read_lock();
        memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
-       if (likely(memcg))
+       if (likely(memcg)) {
                this_cpu_inc(memcg->stat->events[idx]);
+               if (idx == OOM_KILL)
+                       cgroup_file_notify(&memcg->events_file);
+       }
        rcu_read_unlock();
 }
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
index be3ab2d13adf52a5d834e87bacf9de7f45103342..37e8d31a4632dbbdcb22b492153cb36897057687 100644 (file)
@@ -41,6 +41,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
                KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
                PAGEOUTRUN, PGROTATED,
                DROP_PAGECACHE, DROP_SLAB,
+               OOM_KILL,
 #ifdef CONFIG_NUMA_BALANCING
                NUMA_PTE_UPDATES,
                NUMA_HUGE_PTE_UPDATES,
index 3e2f8cf85b4c471fb5661b00f02a65b84ab5d7a3..4f686fc1c5fac144769a278315d9074cee4d6cb5 100644 (file)
@@ -3573,6 +3573,7 @@ static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v)
 
        seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable);
        seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom);
+       seq_printf(sf, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
        return 0;
 }
 
@@ -5164,6 +5165,7 @@ static int memory_events_show(struct seq_file *m, void *v)
        seq_printf(m, "high %lu\n", memcg_sum_events(memcg, MEMCG_HIGH));
        seq_printf(m, "max %lu\n", memcg_sum_events(memcg, MEMCG_MAX));
        seq_printf(m, "oom %lu\n", memcg_sum_events(memcg, MEMCG_OOM));
+       seq_printf(m, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
 
        return 0;
 }
index 04c9143a86255a179aa40c06c2f36d203608117b..0e2c925e7826fe302e5db43c5940237f1c6eb0d6 100644 (file)
@@ -876,6 +876,11 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
        /* Get a reference to safely compare mm after task_unlock(victim) */
        mm = victim->mm;
        mmgrab(mm);
+
+       /* Raise event before sending signal: task reaper must see this */
+       count_vm_event(OOM_KILL);
+       count_memcg_event_mm(mm, OOM_KILL);
+
        /*
         * We should send SIGKILL before setting TIF_MEMDIE in order to prevent
         * the OOM victim from depleting the memory reserves from the user
index 6dae6b240b21ffee5ce49bdfb7735fbb2c8cd02b..46281825c71054451d43d2f0256c421f1a171de4 100644 (file)
@@ -1018,6 +1018,7 @@ const char * const vmstat_text[] = {
 
        "drop_pagecache",
        "drop_slab",
+       "oom_kill",
 
 #ifdef CONFIG_NUMA_BALANCING
        "numa_pte_updates",