mm, compaction: capture a page under direct compaction

author Mel Gorman <mgorman@techsingularity.net>

Tue, 5 Mar 2019 23:45:41 +0000 (15:45 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 6 Mar 2019 05:07:17 +0000 (21:07 -0800)
author Mel Gorman <mgorman@techsingularity.net>
Tue, 5 Mar 2019 23:45:41 +0000 (15:45 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 6 Mar 2019 05:07:17 +0000 (21:07 -0800)
diff --git a/include/linux/compaction.h b/include/linux/compaction.h

index 70d0256edd3149e61d53cd8e07de8195eba2940c..c960923d9ec22b94a37ea7cc49a45b8f70a10725 100644 (file)
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -93,7 +93,8 @@ extern int sysctl_compact_unevictable_allowed;
  extern int fragmentation_index(struct zone *zone, unsigned int order);
  extern enum compact_result try_to_compact_pages(gfp_t gfp_mask,
                 unsigned int order, unsigned int alloc_flags,
-               const struct alloc_context *ac, enum compact_priority prio);
+               const struct alloc_context *ac, enum compact_priority prio,
+               struct page **page);
  extern void reset_isolation_suitable(pg_data_t *pgdat);
  extern enum compact_result compaction_suitable(struct zone *zone, int order,
                 unsigned int alloc_flags, int classzone_idx);
diff --git a/include/linux/sched.h b/include/linux/sched.h

index f9b43c989577fdf50da7147ee28a7b0a8c4e24ad..ebfb34fb9b30dea640a6c9cbe9c51d8c1429030e 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -47,6 +47,7 @@ struct pid_namespace;
  struct pipe_inode_info;
  struct rcu_node;
  struct reclaim_state;
+struct capture_control;
  struct robust_list_head;
  struct sched_attr;
  struct sched_param;
@@ -958,6 +959,9 @@ struct task_struct {
  
         struct io_context               *io_context;
  
+#ifdef CONFIG_COMPACTION
+       struct capture_control          *capture_control;
+#endif
         /* Ptrace state: */
         unsigned long                   ptrace_message;
         kernel_siginfo_t                *last_siginfo;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 7cbb5658be80c89d41d3a8f1d9614e7405152cb8..916e956e92bebf4cf2caed0298fba5e3da02aa30 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2190,6 +2190,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
         INIT_HLIST_HEAD(&p->preempt_notifiers);
  #endif
  
+#ifdef CONFIG_COMPACTION
+       p->capture_control = NULL;
+#endif
         init_numa_balancing(clone_flags, p);
  }
  
diff --git a/mm/compaction.c b/mm/compaction.c

index 3084cee77fdab2ed856d7b523345e48e7e6cbec7..1cc871da3fdae931350eb17c12d8b044bf73d567 100644 (file)
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2056,7 +2056,8 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
         return false;
  }
  
-static enum compact_result compact_zone(struct compact_control *cc)
+static enum compact_result
+compact_zone(struct compact_control *cc, struct capture_control *capc)
  {
         enum compact_result ret;
         unsigned long start_pfn = cc->zone->zone_start_pfn;
@@ -2225,6 +2226,11 @@ check_drain:
                         }
                 }
  
+               /* Stop if a page has been captured */
+               if (capc && capc->page) {
+                       ret = COMPACT_SUCCESS;
+                       break;
+               }
         }
  
  out:
@@ -2258,7 +2264,8 @@ out:
  
  static enum compact_result compact_zone_order(struct zone *zone, int order,
                 gfp_t gfp_mask, enum compact_priority prio,
-               unsigned int alloc_flags, int classzone_idx)
+               unsigned int alloc_flags, int classzone_idx,
+               struct page **capture)
  {
         enum compact_result ret;
         struct compact_control cc = {
@@ -2279,14 +2286,24 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
                 .ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY),
                 .ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY)
         };
+       struct capture_control capc = {
+               .cc = &cc,
+               .page = NULL,
+       };
+
+       if (capture)
+               current->capture_control = &capc;
         INIT_LIST_HEAD(&cc.freepages);
         INIT_LIST_HEAD(&cc.migratepages);
  
-       ret = compact_zone(&cc);
+       ret = compact_zone(&cc, &capc);
  
         VM_BUG_ON(!list_empty(&cc.freepages));
         VM_BUG_ON(!list_empty(&cc.migratepages));
  
+       *capture = capc.page;
+       current->capture_control = NULL;
+
         return ret;
  }
  
@@ -2304,7 +2321,7 @@ int sysctl_extfrag_threshold = 500;
   */
  enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
                 unsigned int alloc_flags, const struct alloc_context *ac,
-               enum compact_priority prio)
+               enum compact_priority prio, struct page **capture)
  {
         int may_perform_io = gfp_mask & __GFP_IO;
         struct zoneref *z;
@@ -2332,7 +2349,7 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
                 }
  
                 status = compact_zone_order(zone, order, gfp_mask, prio,
-                                       alloc_flags, ac_classzone_idx(ac));
+                               alloc_flags, ac_classzone_idx(ac), capture);
                 rc = max(status, rc);
  
                 /* The allocation should succeed, stop compacting */
@@ -2400,7 +2417,7 @@ static void compact_node(int nid)
                 INIT_LIST_HEAD(&cc.freepages);
                 INIT_LIST_HEAD(&cc.migratepages);
  
-               compact_zone(&cc);
+               compact_zone(&cc, NULL);
  
                 VM_BUG_ON(!list_empty(&cc.freepages));
                 VM_BUG_ON(!list_empty(&cc.migratepages));
@@ -2535,7 +2552,7 @@ static void kcompactd_do_work(pg_data_t *pgdat)
  
                 if (kthread_should_stop())
                         return;
-               status = compact_zone(&cc);
+               status = compact_zone(&cc, NULL);
  
                 if (status == COMPACT_SUCCESS) {
                         compaction_defer_reset(zone, cc.order, false);
diff --git a/mm/internal.h b/mm/internal.h

index 31bb0be6fd525b76c732d7774333da5fae1fefa0..9eeaf2b95166fc65b5c0e2f4ca47099b6cbcb442 100644 (file)
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -209,6 +209,15 @@ struct compact_control {
         bool rescan;                    /* Rescanning the same pageblock */
  };
  
+/*
+ * Used in direct compaction when a page should be taken from the freelists
+ * immediately when one is created during the free path.
+ */
+struct capture_control {
+       struct compact_control *cc;
+       struct page *page;
+};
+
  unsigned long
  isolate_freepages_range(struct compact_control *cc,
                         unsigned long start_pfn, unsigned long end_pfn);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 2e132b9e7a9328600e03cf987a93b5bd5957851c..09bf2c5f8b4b938fefe726d1910580407fdd644d 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -789,6 +789,57 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
         return 0;
  }
  
+#ifdef CONFIG_COMPACTION
+static inline struct capture_control *task_capc(struct zone *zone)
+{
+       struct capture_control *capc = current->capture_control;
+
+       return capc &&
+               !(current->flags & PF_KTHREAD) &&
+               !capc->page &&
+               capc->cc->zone == zone &&
+               capc->cc->direct_compaction ? capc : NULL;
+}
+
+static inline bool
+compaction_capture(struct capture_control *capc, struct page *page,
+                  int order, int migratetype)
+{
+       if (!capc || order != capc->cc->order)
+               return false;
+
+       /* Do not accidentally pollute CMA or isolated regions*/
+       if (is_migrate_cma(migratetype) ||
+           is_migrate_isolate(migratetype))
+               return false;
+
+       /*
+        * Do not let lower order allocations polluate a movable pageblock.
+        * This might let an unmovable request use a reclaimable pageblock
+        * and vice-versa but no more than normal fallback logic which can
+        * have trouble finding a high-order free page.
+        */
+       if (order < pageblock_order && migratetype == MIGRATE_MOVABLE)
+               return false;
+
+       capc->page = page;
+       return true;
+}
+
+#else
+static inline struct capture_control *task_capc(struct zone *zone)
+{
+       return NULL;
+}
+
+static inline bool
+compaction_capture(struct capture_control *capc, struct page *page,
+                  int order, int migratetype)
+{
+       return false;
+}
+#endif /* CONFIG_COMPACTION */
+
  /*
   * Freeing function for a buddy system allocator.
   *
@@ -822,6 +873,7 @@ static inline void __free_one_page(struct page *page,
         unsigned long uninitialized_var(buddy_pfn);
         struct page *buddy;
         unsigned int max_order;
+       struct capture_control *capc = task_capc(zone);
  
         max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
  
@@ -837,6 +889,11 @@ static inline void __free_one_page(struct page *page,
  
  continue_merging:
         while (order < max_order - 1) {
+               if (compaction_capture(capc, page, order, migratetype)) {
+                       __mod_zone_freepage_state(zone, -(1 << order),
+                                                               migratetype);
+                       return;
+               }
                 buddy_pfn = __find_buddy_pfn(pfn, order);
                 buddy = page + (buddy_pfn - pfn);
  
@@ -3710,7 +3767,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
                 unsigned int alloc_flags, const struct alloc_context *ac,
                 enum compact_priority prio, enum compact_result *compact_result)
  {
-       struct page *page;
+       struct page *page = NULL;
         unsigned long pflags;
         unsigned int noreclaim_flag;
  
@@ -3721,13 +3778,15 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
         noreclaim_flag = memalloc_noreclaim_save();
  
         *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
-                                                                       prio);
+                                                               prio, &page);
  
         memalloc_noreclaim_restore(noreclaim_flag);
         psi_memstall_leave(&pflags);
  
-       if (*compact_result <= COMPACT_INACTIVE)
+       if (*compact_result <= COMPACT_INACTIVE) {
+               WARN_ON_ONCE(page);
                 return NULL;
+       }
  
         /*
          * At least in one zone compaction wasn't deferred or skipped, so let's
@@ -3735,7 +3794,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
          */
         count_vm_event(COMPACTSTALL);
  
-       page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
+       /* Prep a captured page if available */
+       if (page)
+               prep_new_page(page, order, gfp_mask, alloc_flags);
+
+       /* Try get a page from the freelist if available */
+       if (!page)
+               page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
  
         if (page) {
                 struct zone *zone = page_zone(page);
author	Mel Gorman <mgorman@techsingularity.net>
	Tue, 5 Mar 2019 23:45:41 +0000 (15:45 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 6 Mar 2019 05:07:17 +0000 (21:07 -0800)
include/linux/compaction.h		patch \| blob \| history
include/linux/sched.h		patch \| blob \| history
kernel/sched/core.c		patch \| blob \| history
mm/compaction.c		patch \| blob \| history
mm/internal.h		patch \| blob \| history
mm/page_alloc.c		patch \| blob \| history