[PATCH] drop-pagecache
authorAndrew Morton <akpm@osdl.org>
Sun, 8 Jan 2006 09:00:39 +0000 (01:00 -0800)
committerLinus Torvalds <torvalds@g5.osdl.org>
Mon, 9 Jan 2006 04:12:40 +0000 (20:12 -0800)
Add /proc/sys/vm/drop_caches.  When written to, this will cause the kernel to
discard as much pagecache and/or reclaimable slab objects as it can.  THis
operation requires root permissions.

It won't drop dirty data, so the user should run `sync' first.

Caveats:

a) Holds inode_lock for exorbitant amounts of time.

b) Needs to be taught about NUMA nodes: propagate these all the way through
   so the discarding can be controlled on a per-node basis.

This is a debugging feature: useful for getting consistent results between
filesystem benchmarks.  We could possibly put it under a config option, but
it's less than 300 bytes.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Documentation/filesystems/proc.txt
Documentation/sysctl/vm.txt
fs/Makefile
fs/drop_caches.c [new file with mode: 0644]
include/linux/mm.h
include/linux/sysctl.h
kernel/sysctl.c
mm/truncate.c
mm/vmscan.c

index d4773565ea2f20fabf868505f8b48f2ea7b6295a..a4dcf42c2fd93f1e1177aad2abe567030ecbcdde 100644 (file)
@@ -1302,6 +1302,23 @@ VM has token based thrashing control mechanism and uses the token to prevent
 unnecessary page faults in thrashing situation. The unit of the value is
 second. The value would be useful to tune thrashing behavior.
 
+drop_caches
+-----------
+
+Writing to this will cause the kernel to drop clean caches, dentries and
+inodes from memory, causing that memory to become free.
+
+To free pagecache:
+       echo 1 > /proc/sys/vm/drop_caches
+To free dentries and inodes:
+       echo 2 > /proc/sys/vm/drop_caches
+To free pagecache, dentries and inodes:
+       echo 3 > /proc/sys/vm/drop_caches
+
+As this is a non-destructive operation and dirty objects are not freeable, the
+user should run `sync' first.
+
+
 2.5 /proc/sys/dev - Device specific parameters
 ----------------------------------------------
 
index 2f1aae32a5d9dfc2c0f71bb0400ce4a714d49c07..89ba1a42a17da4c1c17c153897c45731572d2a04 100644 (file)
@@ -26,12 +26,13 @@ Currently, these files are in /proc/sys/vm:
 - min_free_kbytes
 - laptop_mode
 - block_dump
+- drop-caches
 
 ==============================================================
 
 dirty_ratio, dirty_background_ratio, dirty_expire_centisecs,
 dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode,
-block_dump, swap_token_timeout:
+block_dump, swap_token_timeout, drop-caches:
 
 See Documentation/filesystems/proc.txt
 
index 73676111ebbe763b2f23b51ff27131589cff6ae3..35e9aec608e4945565a5f20bc829006da5a9d72d 100644 (file)
@@ -10,7 +10,7 @@ obj-y :=      open.o read_write.o file_table.o buffer.o  bio.o super.o \
                ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
                attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
                seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
-               ioprio.o pnode.o
+               ioprio.o pnode.o drop_caches.o
 
 obj-$(CONFIG_INOTIFY)          += inotify.o
 obj-$(CONFIG_EPOLL)            += eventpoll.o
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
new file mode 100644 (file)
index 0000000..4e47623
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Implement the manual drop-all-pagecache function
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/writeback.h>
+#include <linux/sysctl.h>
+#include <linux/gfp.h>
+
+/* A global variable is a bit ugly, but it keeps the code simple */
+int sysctl_drop_caches;
+
+static void drop_pagecache_sb(struct super_block *sb)
+{
+       struct inode *inode;
+
+       spin_lock(&inode_lock);
+       list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+               if (inode->i_state & (I_FREEING|I_WILL_FREE))
+                       continue;
+               invalidate_inode_pages(inode->i_mapping);
+       }
+       spin_unlock(&inode_lock);
+}
+
+void drop_pagecache(void)
+{
+       struct super_block *sb;
+
+       spin_lock(&sb_lock);
+restart:
+       list_for_each_entry(sb, &super_blocks, s_list) {
+               sb->s_count++;
+               spin_unlock(&sb_lock);
+               down_read(&sb->s_umount);
+               if (sb->s_root)
+                       drop_pagecache_sb(sb);
+               up_read(&sb->s_umount);
+               spin_lock(&sb_lock);
+               if (__put_super_and_need_restart(sb))
+                       goto restart;
+       }
+       spin_unlock(&sb_lock);
+}
+
+void drop_slab(void)
+{
+       int nr_objects;
+
+       do {
+               nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
+       } while (nr_objects > 10);
+}
+
+int drop_caches_sysctl_handler(ctl_table *table, int write,
+       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+{
+       proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+       if (write) {
+               if (sysctl_drop_caches & 1)
+                       drop_pagecache();
+               if (sysctl_drop_caches & 2)
+                       drop_slab();
+       }
+       return 0;
+}
index bc01fff3aa0156a4f717e831890fbceb16be6cb8..83c651f25188e6671307e6886611563d4d17aa71 100644 (file)
@@ -1036,5 +1036,12 @@ int in_gate_area_no_task(unsigned long addr);
 /* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */
 #define OOM_DISABLE -17
 
+int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
+                                       void __user *, size_t *, loff_t *);
+int shrink_slab(unsigned long scanned, gfp_t gfp_mask,
+                       unsigned long lru_pages);
+void drop_pagecache(void);
+void drop_slab(void);
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
index a9b80fc7f0f38d31f19e0573797a91a6735a935c..4cd267fe87ecc2e16dbf8be9fd6a340a4a82e427 100644 (file)
@@ -180,6 +180,7 @@ enum
        VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */
        VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */
        VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */
+       VM_DROP_PAGECACHE=29,   /* int: nuke lots of pagecache */
 };
 
 
index a85047bb5739f97763345bc9ef047e0fe043f33c..8dcf6fd5b0f9fad9c717756f2f93ad858372a3e6 100644 (file)
@@ -68,6 +68,7 @@ extern int min_free_kbytes;
 extern int printk_ratelimit_jiffies;
 extern int printk_ratelimit_burst;
 extern int pid_max_min, pid_max_max;
+extern int sysctl_drop_caches;
 
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 int unknown_nmi_panic;
@@ -774,6 +775,15 @@ static ctl_table vm_table[] = {
                .proc_handler   = &lowmem_reserve_ratio_sysctl_handler,
                .strategy       = &sysctl_intvec,
        },
+       {
+               .ctl_name       = VM_DROP_PAGECACHE,
+               .procname       = "drop_caches",
+               .data           = &sysctl_drop_caches,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = drop_caches_sysctl_handler,
+               .strategy       = &sysctl_intvec,
+       },
        {
                .ctl_name       = VM_MIN_FREE_KBYTES,
                .procname       = "min_free_kbytes",
index 7dee327459017f35578178f4b4b97f76d3dffa4e..b1a463d0fe713dbf671a7fa4a8e439e138ee56fa 100644 (file)
@@ -249,7 +249,6 @@ unlock:
                                break;
                }
                pagevec_release(&pvec);
-               cond_resched();
        }
        return ret;
 }
index be8235fb193945cf15129f4a68ee7e88305078e4..428c5801d4b45cf19be318079ed23c8c55a7ccfd 100644 (file)
@@ -180,8 +180,7 @@ EXPORT_SYMBOL(remove_shrinker);
  *
  * Returns the number of slab objects which we shrunk.
  */
-static int shrink_slab(unsigned long scanned, gfp_t gfp_mask,
-                       unsigned long lru_pages)
+int shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages)
 {
        struct shrinker *shrinker;
        int ret = 0;