From d2997b1042ec150616c1963b5e5e919ffd0b0ebf Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 9 Aug 2010 17:20:11 -0700 Subject: [PATCH] hibernation: freeze swap at hibernation When taking a memory snapshot in hibernate_snapshot(), all (directly called) memory allocations use GFP_ATOMIC. Hence swap misusage during hibernation never occurs. But from a pessimistic point of view, there is no guarantee that no page allcation has __GFP_WAIT. It is better to have a global indication "we enter hibernation, don't use swap!". This patch tries to freeze new-swap-allocation during hibernation. (All user processes are frozenm so swapin is not a concern). This way, no updates will happen to swap_map[] between hibernate_snapshot() and save_image(). Swap is thawed when swsusp_free() is called. We can be assured that swap corruption will not occur. Signed-off-by: KAMEZAWA Hiroyuki Cc: "Rafael J. Wysocki" Cc: Hugh Dickins Cc: KOSAKI Motohiro Cc: Ondrej Zary Cc: Balbir Singh Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 8 +++- kernel/power/hibernate.c | 1 + kernel/power/snapshot.c | 1 + kernel/power/swap.c | 6 +-- mm/swapfile.c | 94 ++++++++++++++++++++++++++++++---------- 5 files changed, 84 insertions(+), 26 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index ff4acea9bbdb..91c9d3fc8513 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -316,7 +316,6 @@ extern long nr_swap_pages; extern long total_swap_pages; extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(void); -extern swp_entry_t get_swap_page_of_type(int); extern int valid_swaphandles(swp_entry_t, unsigned long *); extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t); @@ -333,6 +332,13 @@ extern int reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); struct backing_dev_info; +#ifdef CONFIG_HIBERNATION +void hibernation_freeze_swap(void); +void hibernation_thaw_swap(void); +swp_entry_t get_swap_for_hibernation(int type); +void swap_free_for_hibernation(swp_entry_t val); +#endif + /* linux/mm/thrash.c */ extern struct mm_struct *swap_token_mm; extern void grab_swap_token(struct mm_struct *); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 8dc31e02ae12..c77963938bca 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -338,6 +338,7 @@ int hibernation_snapshot(int platform_mode) goto Close; suspend_console(); + hibernation_freeze_swap(); saved_mask = clear_gfp_allowed_mask(GFP_IOFS); error = dpm_suspend_start(PMSG_FREEZE); if (error) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index f6cd6faf84fd..5e7edfb05e66 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1086,6 +1086,7 @@ void swsusp_free(void) buffer = NULL; alloc_normal = 0; alloc_highmem = 0; + hibernation_thaw_swap(); } /* Helper functions used for the shrinking of memory. */ diff --git a/kernel/power/swap.c b/kernel/power/swap.c index e6a5bdf61a37..5d0059eed3e4 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -136,10 +136,10 @@ sector_t alloc_swapdev_block(int swap) { unsigned long offset; - offset = swp_offset(get_swap_page_of_type(swap)); + offset = swp_offset(get_swap_for_hibernation(swap)); if (offset) { if (swsusp_extents_insert(offset)) - swap_free(swp_entry(swap, offset)); + swap_free_for_hibernation(swp_entry(swap, offset)); else return swapdev_block(swap, offset); } @@ -163,7 +163,7 @@ void free_all_swap_pages(int swap) ext = container_of(node, struct swsusp_extent, node); rb_erase(node, &swsusp_extents); for (offset = ext->start; offset <= ext->end; offset++) - swap_free(swp_entry(swap, offset)); + swap_free_for_hibernation(swp_entry(swap, offset)); kfree(ext); } diff --git a/mm/swapfile.c b/mm/swapfile.c index f08d165871b3..1f3f9c59a73a 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -47,6 +47,8 @@ long nr_swap_pages; long total_swap_pages; static int least_priority; +static bool swap_for_hibernation; + static const char Bad_file[] = "Bad swap file entry "; static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; @@ -451,6 +453,8 @@ swp_entry_t get_swap_page(void) spin_lock(&swap_lock); if (nr_swap_pages <= 0) goto noswap; + if (swap_for_hibernation) + goto noswap; nr_swap_pages--; for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) { @@ -483,28 +487,6 @@ noswap: return (swp_entry_t) {0}; } -/* The only caller of this function is now susupend routine */ -swp_entry_t get_swap_page_of_type(int type) -{ - struct swap_info_struct *si; - pgoff_t offset; - - spin_lock(&swap_lock); - si = swap_info[type]; - if (si && (si->flags & SWP_WRITEOK)) { - nr_swap_pages--; - /* This is called for allocating swap entry, not cache */ - offset = scan_swap_map(si, 1); - if (offset) { - spin_unlock(&swap_lock); - return swp_entry(type, offset); - } - nr_swap_pages++; - } - spin_unlock(&swap_lock); - return (swp_entry_t) {0}; -} - static struct swap_info_struct *swap_info_get(swp_entry_t entry) { struct swap_info_struct *p; @@ -764,6 +746,74 @@ int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep) #endif #ifdef CONFIG_HIBERNATION + +static pgoff_t hibernation_offset[MAX_SWAPFILES]; +/* + * Once hibernation starts to use swap, we freeze swap_map[]. Otherwise, + * saved swap_map[] image to the disk will be an incomplete because it's + * changing without synchronization with hibernation snap shot. + * At resume, we just make swap_for_hibernation=false. We can forget + * used maps easily. + */ +void hibernation_freeze_swap(void) +{ + int i; + + spin_lock(&swap_lock); + + printk(KERN_INFO "PM: Freeze Swap\n"); + swap_for_hibernation = true; + for (i = 0; i < MAX_SWAPFILES; i++) + hibernation_offset[i] = 1; + spin_unlock(&swap_lock); +} + +void hibernation_thaw_swap(void) +{ + spin_lock(&swap_lock); + if (swap_for_hibernation) { + printk(KERN_INFO "PM: Thaw Swap\n"); + swap_for_hibernation = false; + } + spin_unlock(&swap_lock); +} + +/* + * Because updateing swap_map[] can make not-saved-status-change, + * we use our own easy allocator. + * Please see kernel/power/swap.c, Used swaps are recorded into + * RB-tree. + */ +swp_entry_t get_swap_for_hibernation(int type) +{ + pgoff_t off; + swp_entry_t val = {0}; + struct swap_info_struct *si; + + spin_lock(&swap_lock); + + si = swap_info[type]; + if (!si || !(si->flags & SWP_WRITEOK)) + goto done; + + for (off = hibernation_offset[type]; off < si->max; ++off) { + if (!si->swap_map[off]) + break; + } + if (off < si->max) { + val = swp_entry(type, off); + hibernation_offset[type] = off + 1; + } +done: + spin_unlock(&swap_lock); + return val; +} + +void swap_free_for_hibernation(swp_entry_t ent) +{ + /* Nothing to do */ +} + /* * Find the swap type that corresponds to given device (if any). * -- 2.30.2