uprobe: use original page when all uprobes are removed
authorSong Liu <songliubraving@fb.com>
Mon, 23 Sep 2019 22:38:22 +0000 (15:38 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 24 Sep 2019 22:54:11 +0000 (15:54 -0700)
Currently, uprobe swaps the target page with a anonymous page in both
install_breakpoint() and remove_breakpoint().  When all uprobes on a page
are removed, the given mm is still using an anonymous page (not the
original page).

This patch allows uprobe to use original page when possible (all uprobes
on the page are already removed, and the original page is in page cache
and uptodate).

As suggested by Oleg, we unmap the old_page and let the original page
fault in.

Link: http://lkml.kernel.org/r/20190815164525.1848545-3-songliubraving@fb.com
Signed-off-by: Song Liu <songliubraving@fb.com>
Suggested-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
kernel/events/uprobes.c

index 84fa00497c49f9f8d30538a75b9bb3d6727968a6..648f47553bffae552d47c934d5a99afa17725ea6 100644 (file)
@@ -143,10 +143,12 @@ static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr)
  *
  * @vma:      vma that holds the pte pointing to page
  * @addr:     address the old @page is mapped at
- * @page:     the cowed page we are replacing by kpage
- * @kpage:    the modified page we replace page by
+ * @old_page: the page we are replacing by new_page
+ * @new_page: the modified page we replace page by
  *
- * Returns 0 on success, -EFAULT on failure.
+ * If @new_page is NULL, only unmap @old_page.
+ *
+ * Returns 0 on success, negative error code otherwise.
  */
 static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
                                struct page *old_page, struct page *new_page)
@@ -166,10 +168,12 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 
        VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page);
 
-       err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg,
-                       false);
-       if (err)
-               return err;
+       if (new_page) {
+               err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL,
+                                           &memcg, false);
+               if (err)
+                       return err;
+       }
 
        /* For try_to_free_swap() and munlock_vma_page() below */
        lock_page(old_page);
@@ -177,15 +181,20 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
        mmu_notifier_invalidate_range_start(&range);
        err = -EAGAIN;
        if (!page_vma_mapped_walk(&pvmw)) {
-               mem_cgroup_cancel_charge(new_page, memcg, false);
+               if (new_page)
+                       mem_cgroup_cancel_charge(new_page, memcg, false);
                goto unlock;
        }
        VM_BUG_ON_PAGE(addr != pvmw.address, old_page);
 
-       get_page(new_page);
-       page_add_new_anon_rmap(new_page, vma, addr, false);
-       mem_cgroup_commit_charge(new_page, memcg, false, false);
-       lru_cache_add_active_or_unevictable(new_page, vma);
+       if (new_page) {
+               get_page(new_page);
+               page_add_new_anon_rmap(new_page, vma, addr, false);
+               mem_cgroup_commit_charge(new_page, memcg, false, false);
+               lru_cache_add_active_or_unevictable(new_page, vma);
+       } else
+               /* no new page, just dec_mm_counter for old_page */
+               dec_mm_counter(mm, MM_ANONPAGES);
 
        if (!PageAnon(old_page)) {
                dec_mm_counter(mm, mm_counter_file(old_page));
@@ -194,8 +203,9 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 
        flush_cache_page(vma, addr, pte_pfn(*pvmw.pte));
        ptep_clear_flush_notify(vma, addr, pvmw.pte);
-       set_pte_at_notify(mm, addr, pvmw.pte,
-                       mk_pte(new_page, vma->vm_page_prot));
+       if (new_page)
+               set_pte_at_notify(mm, addr, pvmw.pte,
+                                 mk_pte(new_page, vma->vm_page_prot));
 
        page_remove_rmap(old_page, false);
        if (!page_mapped(old_page))
@@ -488,6 +498,10 @@ retry:
                ref_ctr_updated = 1;
        }
 
+       ret = 0;
+       if (!is_register && !PageAnon(old_page))
+               goto put_old;
+
        ret = anon_vma_prepare(vma);
        if (ret)
                goto put_old;
@@ -501,8 +515,30 @@ retry:
        copy_highpage(new_page, old_page);
        copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
 
+       if (!is_register) {
+               struct page *orig_page;
+               pgoff_t index;
+
+               VM_BUG_ON_PAGE(!PageAnon(old_page), old_page);
+
+               index = vaddr_to_offset(vma, vaddr & PAGE_MASK) >> PAGE_SHIFT;
+               orig_page = find_get_page(vma->vm_file->f_inode->i_mapping,
+                                         index);
+
+               if (orig_page) {
+                       if (PageUptodate(orig_page) &&
+                           pages_identical(new_page, orig_page)) {
+                               /* let go new_page */
+                               put_page(new_page);
+                               new_page = NULL;
+                       }
+                       put_page(orig_page);
+               }
+       }
+
        ret = __replace_page(vma, vaddr, old_page, new_page);
-       put_page(new_page);
+       if (new_page)
+               put_page(new_page);
 put_old:
        put_page(old_page);